net/unix/af_unix.c

   1 /*
   2  * NET4:        Implementation of BSD Unix domain sockets.
   3  *
   4  * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
   5  *
   6  *              This program is free software; you can redistribute it and/or
   7  *              modify it under the terms of the GNU General Public License
   8  *              as published by the Free Software Foundation; either version
   9  *              2 of the License, or (at your option) any later version.
  10  *
  11  * Fixes:
  12  *              Linus Torvalds  :       Assorted bug cures.
  13  *              Niibe Yutaka    :       async I/O support.
  14  *              Carsten Paeth   :       PF_UNIX check, address fixes.
  15  *              Alan Cox        :       Limit size of allocated blocks.
  16  *              Alan Cox        :       Fixed the stupid socketpair bug.
  17  *              Alan Cox        :       BSD compatibility fine tuning.
  18  *              Alan Cox        :       Fixed a bug in connect when interrupted.
  19  *              Alan Cox        :       Sorted out a proper draft version of
  20  *                                      file descriptor passing hacked up from
  21  *                                      Mike Shaver's work.
  22  *              Marty Leisner   :       Fixes to fd passing
  23  *              Nick Nevin      :       recvmsg bugfix.
  24  *              Alan Cox        :       Started proper garbage collector
  25  *              Heiko EiBfeldt  :       Missing verify_area check
  26  *              Alan Cox        :       Started POSIXisms
  27  *              Andreas Schwab  :       Replace inode by dentry for proper
  28  *                                      reference counting
  29  *              Kirk Petersen   :       Made this a module
  30  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  31  *                                      Lots of bug fixes.
  32  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  33  *                                      by above two patches.
  34  *           Andrea Arcangeli   :       If possible we block in connect(2)
  35  *                                      if the max backlog of the listen socket
  36  *                                      is been reached. This won't break
  37  *                                      old apps and it will avoid huge amount
  38  *                                      of socks hashed (this for unix_gc()
  39  *                                      performances reasons).
  40  *                                      Security fix that limits the max
  41  *                                      number of socks to 2*max_files and
  42  *                                      the number of skb queueable in the
  43  *                                      dgram receiver.
  44  *              Artur Skawina   :       Hash function optimizations
  45  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  46  *            Malcolm Beattie   :       Set peercred for socketpair
  47  *           Michal Ostrowski   :       Module initialization cleanup.
  48  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  49  *                                      the core infrastructure is doing that
  50  *                                      for all net proto families now (2.5.69+)
  51  *
  52  *
  53  * Known differences from reference BSD that was tested:
  54  *
  55  *      [TO FIX]
  56  *      ECONNREFUSED is not returned from one end of a connected() socket to the
  57  *              other the moment one end closes.
  58  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  59  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  60  *      [NOT TO FIX]
  61  *      accept() returns a path name even if the connecting socket has closed
  62  *              in the meantime (BSD loses the path and gives up).
  63  *      accept() returns 0 length path for an unbound connector. BSD returns 16
  64  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  65  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  66  *      BSD af_unix apparently has connect forgetting to block properly.
  67  *              (need to check this with the POSIX spec in detail)
  68  *
  69  * Differences from 2.0.0-11-... (ANK)
  70  *      Bug fixes and improvements.
  71  *              - client shutdown killed server socket.
  72  *              - removed all useless cli/sti pairs.
  73  *
  74  *      Semantic changes/extensions.
  75  *              - generic control message passing.
  76  *              - SCM_CREDENTIALS control message.
  77  *              - "Abstract" (not FS based) socket bindings.
  78  *                Abstract names are sequences of bytes (not zero terminated)
  79  *                started by 0, so that this name space does not intersect
  80  *                with BSD names.
  81  */
  82
  83 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  84
  85 #include <linux/module.h>
  86 #include <linux/kernel.h>
  87 #include <linux/signal.h>
  88 #include <linux/sched.h>
  89 #include <linux/errno.h>
  90 #include <linux/string.h>
  91 #include <linux/stat.h>
  92 #include <linux/dcache.h>
  93 #include <linux/namei.h>
  94 #include <linux/socket.h>
  95 #include <linux/un.h>
  96 #include <linux/fcntl.h>
  97 #include <linux/termios.h>
  98 #include <linux/sockios.h>
  99 #include <linux/net.h>
 100 #include <linux/in.h>
 101 #include <linux/fs.h>
 102 #include <linux/slab.h>
 103 #include <asm/uaccess.h>
 104 #include <linux/skbuff.h>
 105 #include <linux/netdevice.h>
 106 #include <net/net_namespace.h>
 107 #include <net/sock.h>
 108 #include <net/tcp_states.h>
 109 #include <net/af_unix.h>
 110 #include <linux/proc_fs.h>
 111 #include <linux/seq_file.h>
 112 #include <net/scm.h>
 113 #include <linux/init.h>
 114 #include <linux/poll.h>
 115 #include <linux/rtnetlink.h>
 116 #include <linux/mount.h>
 117 #include <net/checksum.h>
 118 #include <linux/security.h>
 119 #include <linux/freezer.h>
 120
 121 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 122 EXPORT_SYMBOL_GPL(unix_socket_table);
 123 DEFINE_SPINLOCK(unix_table_lock);
 124 EXPORT_SYMBOL_GPL(unix_table_lock);
 125 static atomic_long_t unix_nr_socks;
 126
 127
 128 static struct hlist_head *unix_sockets_unbound(void *addr)
 129 {
 130         unsigned long hash = (unsigned long)addr;
 131
 132         hash ^= hash >> 16;
 133         hash ^= hash >> 8;
 134         hash %= UNIX_HASH_SIZE;
 135         return &unix_socket_table[UNIX_HASH_SIZE + hash];
 136 }
 137
 138 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
 139
 140 #ifdef CONFIG_SECURITY_NETWORK
 141 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 142 {
 143         UNIXCB(skb).secid = scm->secid;
 144 }
 145
 146 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 147 {
 148         scm->secid = UNIXCB(skb).secid;
 149 }
 150
 151 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 152 {
 153         return (scm->secid == UNIXCB(skb).secid);
 154 }
 155 #else
 156 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 157 { }
 158
 159 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 160 { }
 161
 162 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 163 {
 164         return true;
 165 }
 166 #endif /* CONFIG_SECURITY_NETWORK */
 167
 168 /*
 169  *  SMP locking strategy:
 170  *    hash table is protected with spinlock unix_table_lock
 171  *    each socket state is protected by separate spin lock.
 172  */
 173
 174 static inline unsigned int unix_hash_fold(__wsum n)
 175 {
 176         unsigned int hash = (__force unsigned int)csum_fold(n);
 177
 178         hash ^= hash>>8;
 179         return hash&(UNIX_HASH_SIZE-1);
 180 }
 181
 182 #define unix_peer(sk) (unix_sk(sk)->peer)
 183
 184 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 185 {
 186         return unix_peer(osk) == sk;
 187 }
 188
 189 static inline int unix_may_send(struct sock *sk, struct sock *osk)
 190 {
 191         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 192 }
 193
 194 static inline int unix_recvq_full(struct sock const *sk)
 195 {
 196         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 197 }
 198
 199 struct sock *unix_peer_get(struct sock *s)
 200 {
 201         struct sock *peer;
 202
 203         unix_state_lock(s);
 204         peer = unix_peer(s);
 205         if (peer)
 206                 sock_hold(peer);
 207         unix_state_unlock(s);
 208         return peer;
 209 }
 210 EXPORT_SYMBOL_GPL(unix_peer_get);
 211
 212 static inline void unix_release_addr(struct unix_address *addr)
 213 {
 214         if (atomic_dec_and_test(&addr->refcnt))
 215                 kfree(addr);
 216 }
 217
 218 /*
 219  *      Check unix socket name:
 220  *              - should be not zero length.
 221  *              - if started by not zero, should be NULL terminated (FS object)
 222  *              - if started by zero, it is abstract name.
 223  */
 224
 225 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 226 {
 227         if (len <= sizeof(short) || len > sizeof(*sunaddr))
 228                 return -EINVAL;
 229         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 230                 return -EINVAL;
 231         if (sunaddr->sun_path[0]) {
 232                 /*
 233                  * This may look like an off by one error but it is a bit more
 234                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
 235                  * sun_path[108] doesn't as such exist.  However in kernel space
 236                  * we are guaranteed that it is a valid memory location in our
 237                  * kernel address buffer.
 238                  */
 239                 ((char *)sunaddr)[len] = 0;
 240                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
 241                 return len;
 242         }
 243
 244         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 245         return len;
 246 }
 247
 248 static void __unix_remove_socket(struct sock *sk)
 249 {
 250         sk_del_node_init(sk);
 251 }
 252
 253 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 254 {
 255         WARN_ON(!sk_unhashed(sk));
 256         sk_add_node(sk, list);
 257 }
 258
 259 static inline void unix_remove_socket(struct sock *sk)
 260 {
 261         spin_lock(&unix_table_lock);
 262         __unix_remove_socket(sk);
 263         spin_unlock(&unix_table_lock);
 264 }
 265
 266 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 267 {
 268         spin_lock(&unix_table_lock);
 269         __unix_insert_socket(list, sk);
 270         spin_unlock(&unix_table_lock);
 271 }
 272
 273 static struct sock *__unix_find_socket_byname(struct net *net,
 274                                               struct sockaddr_un *sunname,
 275                                               int len, int type, unsigned int hash)
 276 {
 277         struct sock *s;
 278
 279         sk_for_each(s, &unix_socket_table[hash ^ type]) {
 280                 struct unix_sock *u = unix_sk(s);
 281
 282                 if (!net_eq(sock_net(s), net))
 283                         continue;
 284
 285                 if (u->addr->len == len &&
 286                     !memcmp(u->addr->name, sunname, len))
 287                         goto found;
 288         }
 289         s = NULL;
 290 found:
 291         return s;
 292 }
 293
 294 static inline struct sock *unix_find_socket_byname(struct net *net,
 295                                                    struct sockaddr_un *sunname,
 296                                                    int len, int type,
 297                                                    unsigned int hash)
 298 {
 299         struct sock *s;
 300
 301         spin_lock(&unix_table_lock);
 302         s = __unix_find_socket_byname(net, sunname, len, type, hash);
 303         if (s)
 304                 sock_hold(s);
 305         spin_unlock(&unix_table_lock);
 306         return s;
 307 }
 308
 309 static struct sock *unix_find_socket_byinode(struct inode *i)
 310 {
 311         struct sock *s;
 312
 313         spin_lock(&unix_table_lock);
 314         sk_for_each(s,
 315                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 316                 struct dentry *dentry = unix_sk(s)->path.dentry;
 317
 318                 if (dentry && d_real_inode(dentry) == i) {
 319                         sock_hold(s);
 320                         goto found;
 321                 }
 322         }
 323         s = NULL;
 324 found:
 325         spin_unlock(&unix_table_lock);
 326         return s;
 327 }
 328
 329 /* Support code for asymmetrically connected dgram sockets
 330  *
 331  * If a datagram socket is connected to a socket not itself connected
 332  * to the first socket (eg, /dev/log), clients may only enqueue more
 333  * messages if the present receive queue of the server socket is not
 334  * "too large". This means there's a second writeability condition
 335  * poll and sendmsg need to test. The dgram recv code will do a wake
 336  * up on the peer_wait wait queue of a socket upon reception of a
 337  * datagram which needs to be propagated to sleeping would-be writers
 338  * since these might not have sent anything so far. This can't be
 339  * accomplished via poll_wait because the lifetime of the server
 340  * socket might be less than that of its clients if these break their
 341  * association with it or if the server socket is closed while clients
 342  * are still connected to it and there's no way to inform "a polling
 343  * implementation" that it should let go of a certain wait queue
 344  *
 345  * In order to propagate a wake up, a wait_queue_t of the client
 346  * socket is enqueued on the peer_wait queue of the server socket
 347  * whose wake function does a wake_up on the ordinary client socket
 348  * wait queue. This connection is established whenever a write (or
 349  * poll for write) hit the flow control condition and broken when the
 350  * association to the server socket is dissolved or after a wake up
 351  * was relayed.
 352  */
 353
 354 static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
 355                                       void *key)
 356 {
 357         struct unix_sock *u;
 358         wait_queue_head_t *u_sleep;
 359
 360         u = container_of(q, struct unix_sock, peer_wake);
 361
 362         __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
 363                             q);
 364         u->peer_wake.private = NULL;
 365
 366         /* relaying can only happen while the wq still exists */
 367         u_sleep = sk_sleep(&u->sk);
 368         if (u_sleep)
 369                 wake_up_interruptible_poll(u_sleep, key);
 370
 371         return 0;
 372 }
 373
 374 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
 375 {
 376         struct unix_sock *u, *u_other;
 377         int rc;
 378
 379         u = unix_sk(sk);
 380         u_other = unix_sk(other);
 381         rc = 0;
 382         spin_lock(&u_other->peer_wait.lock);
 383
 384         if (!u->peer_wake.private) {
 385                 u->peer_wake.private = other;
 386                 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
 387
 388                 rc = 1;
 389         }
 390
 391         spin_unlock(&u_other->peer_wait.lock);
 392         return rc;
 393 }
 394
 395 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
 396                                             struct sock *other)
 397 {
 398         struct unix_sock *u, *u_other;
 399
 400         u = unix_sk(sk);
 401         u_other = unix_sk(other);
 402         spin_lock(&u_other->peer_wait.lock);
 403
 404         if (u->peer_wake.private == other) {
 405                 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
 406                 u->peer_wake.private = NULL;
 407         }
 408
 409         spin_unlock(&u_other->peer_wait.lock);
 410 }
 411
 412 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
 413                                                    struct sock *other)
 414 {
 415         unix_dgram_peer_wake_disconnect(sk, other);
 416         wake_up_interruptible_poll(sk_sleep(sk),
 417                                    POLLOUT |
 418                                    POLLWRNORM |
 419                                    POLLWRBAND);
 420 }
 421
 422 /* preconditions:
 423  *      - unix_peer(sk) == other
 424  *      - association is stable
 425  */
 426 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 427 {
 428         int connected;
 429
 430         connected = unix_dgram_peer_wake_connect(sk, other);
 431
 432         if (unix_recvq_full(other))
 433                 return 1;
 434
 435         if (connected)
 436                 unix_dgram_peer_wake_disconnect(sk, other);
 437
 438         return 0;
 439 }
 440
 441 static int unix_writable(const struct sock *sk)
 442 {
 443         return sk->sk_state != TCP_LISTEN &&
 444                (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 445 }
 446
 447 static void unix_write_space(struct sock *sk)
 448 {
 449         struct socket_wq *wq;
 450
 451         rcu_read_lock();
 452         if (unix_writable(sk)) {
 453                 wq = rcu_dereference(sk->sk_wq);
 454                 if (skwq_has_sleeper(wq))
 455                         wake_up_interruptible_sync_poll(&wq->wait,
 456                                 POLLOUT | POLLWRNORM | POLLWRBAND);
 457                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 458         }
 459         rcu_read_unlock();
 460 }
 461
 462 /* When dgram socket disconnects (or changes its peer), we clear its receive
 463  * queue of packets arrived from previous peer. First, it allows to do
 464  * flow control based only on wmem_alloc; second, sk connected to peer
 465  * may receive messages only from that peer. */
 466 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 467 {
 468         if (!skb_queue_empty(&sk->sk_receive_queue)) {
 469                 skb_queue_purge(&sk->sk_receive_queue);
 470                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 471
 472                 /* If one link of bidirectional dgram pipe is disconnected,
 473                  * we signal error. Messages are lost. Do not make this,
 474                  * when peer was not connected to us.
 475                  */
 476                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 477                         other->sk_err = ECONNRESET;
 478                         other->sk_error_report(other);
 479                 }
 480         }
 481 }
 482
 483 static void unix_sock_destructor(struct sock *sk)
 484 {
 485         struct unix_sock *u = unix_sk(sk);
 486
 487         skb_queue_purge(&sk->sk_receive_queue);
 488
 489         WARN_ON(atomic_read(&sk->sk_wmem_alloc));
 490         WARN_ON(!sk_unhashed(sk));
 491         WARN_ON(sk->sk_socket);
 492         if (!sock_flag(sk, SOCK_DEAD)) {
 493                 pr_info("Attempt to release alive unix socket: %p\n", sk);
 494                 return;
 495         }
 496
 497         if (u->addr)
 498                 unix_release_addr(u->addr);
 499
 500         atomic_long_dec(&unix_nr_socks);
 501         local_bh_disable();
 502         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 503         local_bh_enable();
 504 #ifdef UNIX_REFCNT_DEBUG
 505         pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
 506                 atomic_long_read(&unix_nr_socks));
 507 #endif
 508 }
 509
 510 static void unix_release_sock(struct sock *sk, int embrion)
 511 {
 512         struct unix_sock *u = unix_sk(sk);
 513         struct path path;
 514         struct sock *skpair;
 515         struct sk_buff *skb;
 516         int state;
 517
 518         unix_remove_socket(sk);
 519
 520         /* Clear state */
 521         unix_state_lock(sk);
 522         sock_orphan(sk);
 523         sk->sk_shutdown = SHUTDOWN_MASK;
 524         path         = u->path;
 525         u->path.dentry = NULL;
 526         u->path.mnt = NULL;
 527         state = sk->sk_state;
 528         sk->sk_state = TCP_CLOSE;
 529         unix_state_unlock(sk);
 530
 531         wake_up_interruptible_all(&u->peer_wait);
 532
 533         skpair = unix_peer(sk);
 534
 535         if (skpair != NULL) {
 536                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 537                         unix_state_lock(skpair);
 538                         /* No more writes */
 539                         skpair->sk_shutdown = SHUTDOWN_MASK;
 540                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 541                                 skpair->sk_err = ECONNRESET;
 542                         unix_state_unlock(skpair);
 543                         skpair->sk_state_change(skpair);
 544                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 545                 }
 546
 547                 unix_dgram_peer_wake_disconnect(sk, skpair);
 548                 sock_put(skpair); /* It may now die */
 549                 unix_peer(sk) = NULL;
 550         }
 551
 552         /* Try to flush out this socket. Throw out buffers at least */
 553
 554         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 555                 if (state == TCP_LISTEN)
 556                         unix_release_sock(skb->sk, 1);
 557                 /* passed fds are erased in the kfree_skb hook        */
 558                 UNIXCB(skb).consumed = skb->len;
 559                 kfree_skb(skb);
 560         }
 561
 562         if (path.dentry)
 563                 path_put(&path);
 564
 565         sock_put(sk);
 566
 567         /* ---- Socket is dead now and most probably destroyed ---- */
 568
 569         /*
 570          * Fixme: BSD difference: In BSD all sockets connected to us get
 571          *        ECONNRESET and we die on the spot. In Linux we behave
 572          *        like files and pipes do and wait for the last
 573          *        dereference.
 574          *
 575          * Can't we simply set sock->err?
 576          *
 577          *        What the above comment does talk about? --ANK(980817)
 578          */
 579
 580         if (unix_tot_inflight)
 581                 unix_gc();              /* Garbage collect fds */
 582 }
 583
 584 static void init_peercred(struct sock *sk)
 585 {
 586         put_pid(sk->sk_peer_pid);
 587         if (sk->sk_peer_cred)
 588                 put_cred(sk->sk_peer_cred);
 589         sk->sk_peer_pid  = get_pid(task_tgid(current));
 590         sk->sk_peer_cred = get_current_cred();
 591 }
 592
 593 static void copy_peercred(struct sock *sk, struct sock *peersk)
 594 {
 595         put_pid(sk->sk_peer_pid);
 596         if (sk->sk_peer_cred)
 597                 put_cred(sk->sk_peer_cred);
 598         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 599         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 600 }
 601
 602 static int unix_listen(struct socket *sock, int backlog)
 603 {
 604         int err;
 605         struct sock *sk = sock->sk;
 606         struct unix_sock *u = unix_sk(sk);
 607         struct pid *old_pid = NULL;
 608
 609         err = -EOPNOTSUPP;
 610         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 611                 goto out;       /* Only stream/seqpacket sockets accept */
 612         err = -EINVAL;
 613         if (!u->addr)
 614                 goto out;       /* No listens on an unbound socket */
 615         unix_state_lock(sk);
 616         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 617                 goto out_unlock;
 618         if (backlog > sk->sk_max_ack_backlog)
 619                 wake_up_interruptible_all(&u->peer_wait);
 620         sk->sk_max_ack_backlog  = backlog;
 621         sk->sk_state            = TCP_LISTEN;
 622         /* set credentials so connect can copy them */
 623         init_peercred(sk);
 624         err = 0;
 625
 626 out_unlock:
 627         unix_state_unlock(sk);
 628         put_pid(old_pid);
 629 out:
 630         return err;
 631 }
 632
 633 static int unix_release(struct socket *);
 634 static int unix_bind(struct socket *, struct sockaddr *, int);
 635 static int unix_stream_connect(struct socket *, struct sockaddr *,
 636                                int addr_len, int flags);
 637 static int unix_socketpair(struct socket *, struct socket *);
 638 static int unix_accept(struct socket *, struct socket *, int);
 639 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
 640 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
 641 static unsigned int unix_dgram_poll(struct file *, struct socket *,
 642                                     poll_table *);
 643 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 644 static int unix_shutdown(struct socket *, int);
 645 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
 646 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
 647 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
 648                                     size_t size, int flags);
 649 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 650                                        struct pipe_inode_info *, size_t size,
 651                                        unsigned int flags);
 652 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 653 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
 654 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 655                               int, int);
 656 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
 657 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
 658                                   int);
 659
 660 static int unix_set_peek_off(struct sock *sk, int val)
 661 {
 662         struct unix_sock *u = unix_sk(sk);
 663
 664         if (mutex_lock_interruptible(&u->readlock))
 665                 return -EINTR;
 666
 667         sk->sk_peek_off = val;
 668         mutex_unlock(&u->readlock);
 669
 670         return 0;
 671 }
 672
 673
 674 static const struct proto_ops unix_stream_ops = {
 675         .family =       PF_UNIX,
 676         .owner =        THIS_MODULE,
 677         .release =      unix_release,
 678         .bind =         unix_bind,
 679         .connect =      unix_stream_connect,
 680         .socketpair =   unix_socketpair,
 681         .accept =       unix_accept,
 682         .getname =      unix_getname,
 683         .poll =         unix_poll,
 684         .ioctl =        unix_ioctl,
 685         .listen =       unix_listen,
 686         .shutdown =     unix_shutdown,
 687         .setsockopt =   sock_no_setsockopt,
 688         .getsockopt =   sock_no_getsockopt,
 689         .sendmsg =      unix_stream_sendmsg,
 690         .recvmsg =      unix_stream_recvmsg,
 691         .mmap =         sock_no_mmap,
 692         .sendpage =     unix_stream_sendpage,
 693         .splice_read =  unix_stream_splice_read,
 694         .set_peek_off = unix_set_peek_off,
 695 };
 696
 697 static const struct proto_ops unix_dgram_ops = {
 698         .family =       PF_UNIX,
 699         .owner =        THIS_MODULE,
 700         .release =      unix_release,
 701         .bind =         unix_bind,
 702         .connect =      unix_dgram_connect,
 703         .socketpair =   unix_socketpair,
 704         .accept =       sock_no_accept,
 705         .getname =      unix_getname,
 706         .poll =         unix_dgram_poll,
 707         .ioctl =        unix_ioctl,
 708         .listen =       sock_no_listen,
 709         .shutdown =     unix_shutdown,
 710         .setsockopt =   sock_no_setsockopt,
 711         .getsockopt =   sock_no_getsockopt,
 712         .sendmsg =      unix_dgram_sendmsg,
 713         .recvmsg =      unix_dgram_recvmsg,
 714         .mmap =         sock_no_mmap,
 715         .sendpage =     sock_no_sendpage,
 716         .set_peek_off = unix_set_peek_off,
 717 };
 718
 719 static const struct proto_ops unix_seqpacket_ops = {
 720         .family =       PF_UNIX,
 721         .owner =        THIS_MODULE,
 722         .release =      unix_release,
 723         .bind =         unix_bind,
 724         .connect =      unix_stream_connect,
 725         .socketpair =   unix_socketpair,
 726         .accept =       unix_accept,
 727         .getname =      unix_getname,
 728         .poll =         unix_dgram_poll,
 729         .ioctl =        unix_ioctl,
 730         .listen =       unix_listen,
 731         .shutdown =     unix_shutdown,
 732         .setsockopt =   sock_no_setsockopt,
 733         .getsockopt =   sock_no_getsockopt,
 734         .sendmsg =      unix_seqpacket_sendmsg,
 735         .recvmsg =      unix_seqpacket_recvmsg,
 736         .mmap =         sock_no_mmap,
 737         .sendpage =     sock_no_sendpage,
 738         .set_peek_off = unix_set_peek_off,
 739 };
 740
 741 static struct proto unix_proto = {
 742         .name                   = "UNIX",
 743         .owner                  = THIS_MODULE,
 744         .obj_size               = sizeof(struct unix_sock),
 745 };
 746
 747 /*
 748  * AF_UNIX sockets do not interact with hardware, hence they
 749  * dont trigger interrupts - so it's safe for them to have
 750  * bh-unsafe locking for their sk_receive_queue.lock. Split off
 751  * this special lock-class by reinitializing the spinlock key:
 752  */
 753 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
 754
 755 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 756 {
 757         struct sock *sk = NULL;
 758         struct unix_sock *u;
 759
 760         atomic_long_inc(&unix_nr_socks);
 761         if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 762                 goto out;
 763
 764         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
 765         if (!sk)
 766                 goto out;
 767
 768         sock_init_data(sock, sk);
 769         lockdep_set_class(&sk->sk_receive_queue.lock,
 770                                 &af_unix_sk_receive_queue_lock_key);
 771
 772         sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
 773         sk->sk_write_space      = unix_write_space;
 774         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 775         sk->sk_destruct         = unix_sock_destructor;
 776         u         = unix_sk(sk);
 777         u->path.dentry = NULL;
 778         u->path.mnt = NULL;
 779         spin_lock_init(&u->lock);
 780         atomic_long_set(&u->inflight, 0);
 781         INIT_LIST_HEAD(&u->link);
 782         mutex_init(&u->readlock); /* single task reading lock */
 783         init_waitqueue_head(&u->peer_wait);
 784         init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 785         unix_insert_socket(unix_sockets_unbound(sk), sk);
 786 out:
 787         if (sk == NULL)
 788                 atomic_long_dec(&unix_nr_socks);
 789         else {
 790                 local_bh_disable();
 791                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 792                 local_bh_enable();
 793         }
 794         return sk;
 795 }
 796
 797 static int unix_create(struct net *net, struct socket *sock, int protocol,
 798                        int kern)
 799 {
 800         if (protocol && protocol != PF_UNIX)
 801                 return -EPROTONOSUPPORT;
 802
 803         sock->state = SS_UNCONNECTED;
 804
 805         switch (sock->type) {
 806         case SOCK_STREAM:
 807                 sock->ops = &unix_stream_ops;
 808                 break;
 809                 /*
 810                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 811                  *      nothing uses it.
 812                  */
 813         case SOCK_RAW:
 814                 sock->type = SOCK_DGRAM;
 815         case SOCK_DGRAM:
 816                 sock->ops = &unix_dgram_ops;
 817                 break;
 818         case SOCK_SEQPACKET:
 819                 sock->ops = &unix_seqpacket_ops;
 820                 break;
 821         default:
 822                 return -ESOCKTNOSUPPORT;
 823         }
 824
 825         return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
 826 }
 827
 828 static int unix_release(struct socket *sock)
 829 {
 830         struct sock *sk = sock->sk;
 831
 832         if (!sk)
 833                 return 0;
 834
 835         unix_release_sock(sk, 0);
 836         sock->sk = NULL;
 837
 838         return 0;
 839 }
 840
 841 static int unix_autobind(struct socket *sock)
 842 {
 843         struct sock *sk = sock->sk;
 844         struct net *net = sock_net(sk);
 845         struct unix_sock *u = unix_sk(sk);
 846         static u32 ordernum = 1;
 847         struct unix_address *addr;
 848         int err;
 849         unsigned int retries = 0;
 850
 851         err = mutex_lock_interruptible(&u->readlock);
 852         if (err)
 853                 return err;
 854
 855         err = 0;
 856         if (u->addr)
 857                 goto out;
 858
 859         err = -ENOMEM;
 860         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 861         if (!addr)
 862                 goto out;
 863
 864         addr->name->sun_family = AF_UNIX;
 865         atomic_set(&addr->refcnt, 1);
 866
 867 retry:
 868         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 869         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 870
 871         spin_lock(&unix_table_lock);
 872         ordernum = (ordernum+1)&0xFFFFF;
 873
 874         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 875                                       addr->hash)) {
 876                 spin_unlock(&unix_table_lock);
 877                 /*
 878                  * __unix_find_socket_byname() may take long time if many names
 879                  * are already in use.
 880                  */
 881                 cond_resched();
 882                 /* Give up if all names seems to be in use. */
 883                 if (retries++ == 0xFFFFF) {
 884                         err = -ENOSPC;
 885                         kfree(addr);
 886                         goto out;
 887                 }
 888                 goto retry;
 889         }
 890         addr->hash ^= sk->sk_type;
 891
 892         __unix_remove_socket(sk);
 893         u->addr = addr;
 894         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 895         spin_unlock(&unix_table_lock);
 896         err = 0;
 897
 898 out:    mutex_unlock(&u->readlock);
 899         return err;
 900 }
 901
 902 static struct sock *unix_find_other(struct net *net,
 903                                     struct sockaddr_un *sunname, int len,
 904                                     int type, unsigned int hash, int *error)
 905 {
 906         struct sock *u;
 907         struct path path;
 908         int err = 0;
 909
 910         if (sunname->sun_path[0]) {
 911                 struct inode *inode;
 912                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 913                 if (err)
 914                         goto fail;
 915                 inode = d_real_inode(path.dentry);
 916                 err = inode_permission(inode, MAY_WRITE);
 917                 if (err)
 918                         goto put_fail;
 919
 920                 err = -ECONNREFUSED;
 921                 if (!S_ISSOCK(inode->i_mode))
 922                         goto put_fail;
 923                 u = unix_find_socket_byinode(inode);
 924                 if (!u)
 925                         goto put_fail;
 926
 927                 if (u->sk_type == type)
 928                         touch_atime(&path);
 929
 930                 path_put(&path);
 931
 932                 err = -EPROTOTYPE;
 933                 if (u->sk_type != type) {
 934                         sock_put(u);
 935                         goto fail;
 936                 }
 937         } else {
 938                 err = -ECONNREFUSED;
 939                 u = unix_find_socket_byname(net, sunname, len, type, hash);
 940                 if (u) {
 941                         struct dentry *dentry;
 942                         dentry = unix_sk(u)->path.dentry;
 943                         if (dentry)
 944                                 touch_atime(&unix_sk(u)->path);
 945                 } else
 946                         goto fail;
 947         }
 948         return u;
 949
 950 put_fail:
 951         path_put(&path);
 952 fail:
 953         *error = err;
 954         return NULL;
 955 }
 956
 957 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 958 {
 959         struct dentry *dentry;
 960         struct path path;
 961         int err = 0;
 962         /*
 963          * Get the parent directory, calculate the hash for last
 964          * component.
 965          */
 966         dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
 967         err = PTR_ERR(dentry);
 968         if (IS_ERR(dentry))
 969                 return err;
 970
 971         /*
 972          * All right, let's create it.
 973          */
 974         err = security_path_mknod(&path, dentry, mode, 0);
 975         if (!err) {
 976                 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
 977                 if (!err) {
 978                         res->mnt = mntget(path.mnt);
 979                         res->dentry = dget(dentry);
 980                 }
 981         }
 982         done_path_create(&path, dentry);
 983         return err;
 984 }
 985
 986 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 987 {
 988         struct sock *sk = sock->sk;
 989         struct net *net = sock_net(sk);
 990         struct unix_sock *u = unix_sk(sk);
 991         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 992         char *sun_path = sunaddr->sun_path;
 993         int err;
 994         unsigned int hash;
 995         struct unix_address *addr;
 996         struct hlist_head *list;
 997
 998         err = -EINVAL;
 999         if (sunaddr->sun_family != AF_UNIX)
1000                 goto out;
1001
1002         if (addr_len == sizeof(short)) {
1003                 err = unix_autobind(sock);
1004                 goto out;
1005         }
1006
1007         err = unix_mkname(sunaddr, addr_len, &hash);
1008         if (err < 0)
1009                 goto out;
1010         addr_len = err;
1011
1012         err = mutex_lock_interruptible(&u->readlock);
1013         if (err)
1014                 goto out;
1015
1016         err = -EINVAL;
1017         if (u->addr)
1018                 goto out_up;
1019
1020         err = -ENOMEM;
1021         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1022         if (!addr)
1023                 goto out_up;
1024
1025         memcpy(addr->name, sunaddr, addr_len);
1026         addr->len = addr_len;
1027         addr->hash = hash ^ sk->sk_type;
1028         atomic_set(&addr->refcnt, 1);
1029
1030         if (sun_path[0]) {
1031                 struct path path;
1032                 umode_t mode = S_IFSOCK |
1033                        (SOCK_INODE(sock)->i_mode & ~current_umask());
1034                 err = unix_mknod(sun_path, mode, &path);
1035                 if (err) {
1036                         if (err == -EEXIST)
1037                                 err = -EADDRINUSE;
1038                         unix_release_addr(addr);
1039                         goto out_up;
1040                 }
1041                 addr->hash = UNIX_HASH_SIZE;
1042                 hash = d_real_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1043                 spin_lock(&unix_table_lock);
1044                 u->path = path;
1045                 list = &unix_socket_table[hash];
1046         } else {
1047                 spin_lock(&unix_table_lock);
1048                 err = -EADDRINUSE;
1049                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1050                                               sk->sk_type, hash)) {
1051                         unix_release_addr(addr);
1052                         goto out_unlock;
1053                 }
1054
1055                 list = &unix_socket_table[addr->hash];
1056         }
1057
1058         err = 0;
1059         __unix_remove_socket(sk);
1060         u->addr = addr;
1061         __unix_insert_socket(list, sk);
1062
1063 out_unlock:
1064         spin_unlock(&unix_table_lock);
1065 out_up:
1066         mutex_unlock(&u->readlock);
1067 out:
1068         return err;
1069 }
1070
1071 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1072 {
1073         if (unlikely(sk1 == sk2) || !sk2) {
1074                 unix_state_lock(sk1);
1075                 return;
1076         }
1077         if (sk1 < sk2) {
1078                 unix_state_lock(sk1);
1079                 unix_state_lock_nested(sk2);
1080         } else {
1081                 unix_state_lock(sk2);
1082                 unix_state_lock_nested(sk1);
1083         }
1084 }
1085
1086 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1087 {
1088         if (unlikely(sk1 == sk2) || !sk2) {
1089                 unix_state_unlock(sk1);
1090                 return;
1091         }
1092         unix_state_unlock(sk1);
1093         unix_state_unlock(sk2);
1094 }
1095
1096 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1097                               int alen, int flags)
1098 {
1099         struct sock *sk = sock->sk;
1100         struct net *net = sock_net(sk);
1101         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1102         struct sock *other;
1103         unsigned int hash;
1104         int err;
1105
1106         if (addr->sa_family != AF_UNSPEC) {
1107                 err = unix_mkname(sunaddr, alen, &hash);
1108                 if (err < 0)
1109                         goto out;
1110                 alen = err;
1111
1112                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1113                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1114                         goto out;
1115
1116 restart:
1117                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1118                 if (!other)
1119                         goto out;
1120
1121                 unix_state_double_lock(sk, other);
1122
1123                 /* Apparently VFS overslept socket death. Retry. */
1124                 if (sock_flag(other, SOCK_DEAD)) {
1125                         unix_state_double_unlock(sk, other);
1126                         sock_put(other);
1127                         goto restart;
1128                 }
1129
1130                 err = -EPERM;
1131                 if (!unix_may_send(sk, other))
1132                         goto out_unlock;
1133
1134                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1135                 if (err)
1136                         goto out_unlock;
1137
1138         } else {
1139                 /*
1140                  *      1003.1g breaking connected state with AF_UNSPEC
1141                  */
1142                 other = NULL;
1143                 unix_state_double_lock(sk, other);
1144         }
1145
1146         /*
1147          * If it was connected, reconnect.
1148          */
1149         if (unix_peer(sk)) {
1150                 struct sock *old_peer = unix_peer(sk);
1151                 unix_peer(sk) = other;
1152                 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1153
1154                 unix_state_double_unlock(sk, other);
1155
1156                 if (other != old_peer)
1157                         unix_dgram_disconnected(sk, old_peer);
1158                 sock_put(old_peer);
1159         } else {
1160                 unix_peer(sk) = other;
1161                 unix_state_double_unlock(sk, other);
1162         }
1163         return 0;
1164
1165 out_unlock:
1166         unix_state_double_unlock(sk, other);
1167         sock_put(other);
1168 out:
1169         return err;
1170 }
1171
1172 static long unix_wait_for_peer(struct sock *other, long timeo)
1173 {
1174         struct unix_sock *u = unix_sk(other);
1175         int sched;
1176         DEFINE_WAIT(wait);
1177
1178         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1179
1180         sched = !sock_flag(other, SOCK_DEAD) &&
1181                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1182                 unix_recvq_full(other);
1183
1184         unix_state_unlock(other);
1185
1186         if (sched)
1187                 timeo = schedule_timeout(timeo);
1188
1189         finish_wait(&u->peer_wait, &wait);
1190         return timeo;
1191 }
1192
1193 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1194                                int addr_len, int flags)
1195 {
1196         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1197         struct sock *sk = sock->sk;
1198         struct net *net = sock_net(sk);
1199         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1200         struct sock *newsk = NULL;
1201         struct sock *other = NULL;
1202         struct sk_buff *skb = NULL;
1203         unsigned int hash;
1204         int st;
1205         int err;
1206         long timeo;
1207
1208         err = unix_mkname(sunaddr, addr_len, &hash);
1209         if (err < 0)
1210                 goto out;
1211         addr_len = err;
1212
1213         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1214             (err = unix_autobind(sock)) != 0)
1215                 goto out;
1216
1217         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1218
1219         /* First of all allocate resources.
1220            If we will make it after state is locked,
1221            we will have to recheck all again in any case.
1222          */
1223
1224         err = -ENOMEM;
1225
1226         /* create new sock for complete connection */
1227         newsk = unix_create1(sock_net(sk), NULL, 0);
1228         if (newsk == NULL)
1229                 goto out;
1230
1231         /* Allocate skb for sending to listening sock */
1232         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1233         if (skb == NULL)
1234                 goto out;
1235
1236 restart:
1237         /*  Find listening sock. */
1238         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1239         if (!other)
1240                 goto out;
1241
1242         /* Latch state of peer */
1243         unix_state_lock(other);
1244
1245         /* Apparently VFS overslept socket death. Retry. */
1246         if (sock_flag(other, SOCK_DEAD)) {
1247                 unix_state_unlock(other);
1248                 sock_put(other);
1249                 goto restart;
1250         }
1251
1252         err = -ECONNREFUSED;
1253         if (other->sk_state != TCP_LISTEN)
1254                 goto out_unlock;
1255         if (other->sk_shutdown & RCV_SHUTDOWN)
1256                 goto out_unlock;
1257
1258         if (unix_recvq_full(other)) {
1259                 err = -EAGAIN;
1260                 if (!timeo)
1261                         goto out_unlock;
1262
1263                 timeo = unix_wait_for_peer(other, timeo);
1264
1265                 err = sock_intr_errno(timeo);
1266                 if (signal_pending(current))
1267                         goto out;
1268                 sock_put(other);
1269                 goto restart;
1270         }
1271
1272         /* Latch our state.
1273
1274            It is tricky place. We need to grab our state lock and cannot
1275            drop lock on peer. It is dangerous because deadlock is
1276            possible. Connect to self case and simultaneous
1277            attempt to connect are eliminated by checking socket
1278            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1279            check this before attempt to grab lock.
1280
1281            Well, and we have to recheck the state after socket locked.
1282          */
1283         st = sk->sk_state;
1284
1285         switch (st) {
1286         case TCP_CLOSE:
1287                 /* This is ok... continue with connect */
1288                 break;
1289         case TCP_ESTABLISHED:
1290                 /* Socket is already connected */
1291                 err = -EISCONN;
1292                 goto out_unlock;
1293         default:
1294                 err = -EINVAL;
1295                 goto out_unlock;
1296         }
1297
1298         unix_state_lock_nested(sk);
1299
1300         if (sk->sk_state != st) {
1301                 unix_state_unlock(sk);
1302                 unix_state_unlock(other);
1303                 sock_put(other);
1304                 goto restart;
1305         }
1306
1307         err = security_unix_stream_connect(sk, other, newsk);
1308         if (err) {
1309                 unix_state_unlock(sk);
1310                 goto out_unlock;
1311         }
1312
1313         /* The way is open! Fastly set all the necessary fields... */
1314
1315         sock_hold(sk);
1316         unix_peer(newsk)        = sk;
1317         newsk->sk_state         = TCP_ESTABLISHED;
1318         newsk->sk_type          = sk->sk_type;
1319         init_peercred(newsk);
1320         newu = unix_sk(newsk);
1321         RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1322         otheru = unix_sk(other);
1323
1324         /* copy address information from listening to new sock*/
1325         if (otheru->addr) {
1326                 atomic_inc(&otheru->addr->refcnt);
1327                 newu->addr = otheru->addr;
1328         }
1329         if (otheru->path.dentry) {
1330                 path_get(&otheru->path);
1331                 newu->path = otheru->path;
1332         }
1333
1334         /* Set credentials */
1335         copy_peercred(sk, other);
1336
1337         sock->state     = SS_CONNECTED;
1338         sk->sk_state    = TCP_ESTABLISHED;
1339         sock_hold(newsk);
1340
1341         smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1342         unix_peer(sk)   = newsk;
1343
1344         unix_state_unlock(sk);
1345
1346         /* take ten and and send info to listening sock */
1347         spin_lock(&other->sk_receive_queue.lock);
1348         __skb_queue_tail(&other->sk_receive_queue, skb);
1349         spin_unlock(&other->sk_receive_queue.lock);
1350         unix_state_unlock(other);
1351         other->sk_data_ready(other);
1352         sock_put(other);
1353         return 0;
1354
1355 out_unlock:
1356         if (other)
1357                 unix_state_unlock(other);
1358
1359 out:
1360         kfree_skb(skb);
1361         if (newsk)
1362                 unix_release_sock(newsk, 0);
1363         if (other)
1364                 sock_put(other);
1365         return err;
1366 }
1367
1368 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1369 {
1370         struct sock *ska = socka->sk, *skb = sockb->sk;
1371
1372         /* Join our sockets back to back */
1373         sock_hold(ska);
1374         sock_hold(skb);
1375         unix_peer(ska) = skb;
1376         unix_peer(skb) = ska;
1377         init_peercred(ska);
1378         init_peercred(skb);
1379
1380         if (ska->sk_type != SOCK_DGRAM) {
1381                 ska->sk_state = TCP_ESTABLISHED;
1382                 skb->sk_state = TCP_ESTABLISHED;
1383                 socka->state  = SS_CONNECTED;
1384                 sockb->state  = SS_CONNECTED;
1385         }
1386         return 0;
1387 }
1388
1389 static void unix_sock_inherit_flags(const struct socket *old,
1390                                     struct socket *new)
1391 {
1392         if (test_bit(SOCK_PASSCRED, &old->flags))
1393                 set_bit(SOCK_PASSCRED, &new->flags);
1394         if (test_bit(SOCK_PASSSEC, &old->flags))
1395                 set_bit(SOCK_PASSSEC, &new->flags);
1396 }
1397
1398 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1399 {
1400         struct sock *sk = sock->sk;
1401         struct sock *tsk;
1402         struct sk_buff *skb;
1403         int err;
1404
1405         err = -EOPNOTSUPP;
1406         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1407                 goto out;
1408
1409         err = -EINVAL;
1410         if (sk->sk_state != TCP_LISTEN)
1411                 goto out;
1412
1413         /* If socket state is TCP_LISTEN it cannot change (for now...),
1414          * so that no locks are necessary.
1415          */
1416
1417         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1418         if (!skb) {
1419                 /* This means receive shutdown. */
1420                 if (err == 0)
1421                         err = -EINVAL;
1422                 goto out;
1423         }
1424
1425         tsk = skb->sk;
1426         skb_free_datagram(sk, skb);
1427         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1428
1429         /* attach accepted sock to socket */
1430         unix_state_lock(tsk);
1431         newsock->state = SS_CONNECTED;
1432         unix_sock_inherit_flags(sock, newsock);
1433         sock_graft(tsk, newsock);
1434         unix_state_unlock(tsk);
1435         return 0;
1436
1437 out:
1438         return err;
1439 }
1440
1441
1442 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1443 {
1444         struct sock *sk = sock->sk;
1445         struct unix_sock *u;
1446         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1447         int err = 0;
1448
1449         if (peer) {
1450                 sk = unix_peer_get(sk);
1451
1452                 err = -ENOTCONN;
1453                 if (!sk)
1454                         goto out;
1455                 err = 0;
1456         } else {
1457                 sock_hold(sk);
1458         }
1459
1460         u = unix_sk(sk);
1461         unix_state_lock(sk);
1462         if (!u->addr) {
1463                 sunaddr->sun_family = AF_UNIX;
1464                 sunaddr->sun_path[0] = 0;
1465                 *uaddr_len = sizeof(short);
1466         } else {
1467                 struct unix_address *addr = u->addr;
1468
1469                 *uaddr_len = addr->len;
1470                 memcpy(sunaddr, addr->name, *uaddr_len);
1471         }
1472         unix_state_unlock(sk);
1473         sock_put(sk);
1474 out:
1475         return err;
1476 }
1477
1478 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1479 {
1480         int i;
1481
1482         scm->fp = UNIXCB(skb).fp;
1483         UNIXCB(skb).fp = NULL;
1484
1485         for (i = scm->fp->count-1; i >= 0; i--)
1486                 unix_notinflight(scm->fp->user, scm->fp->fp[i]);
1487 }
1488
1489 static void unix_destruct_scm(struct sk_buff *skb)
1490 {
1491         struct scm_cookie scm;
1492         memset(&scm, 0, sizeof(scm));
1493         scm.pid  = UNIXCB(skb).pid;
1494         if (UNIXCB(skb).fp)
1495                 unix_detach_fds(&scm, skb);
1496
1497         /* Alas, it calls VFS */
1498         /* So fscking what? fput() had been SMP-safe since the last Summer */
1499         scm_destroy(&scm);
1500         sock_wfree(skb);
1501 }
1502
1503 /*
1504  * The "user->unix_inflight" variable is protected by the garbage
1505  * collection lock, and we just read it locklessly here. If you go
1506  * over the limit, there might be a tiny race in actually noticing
1507  * it across threads. Tough.
1508  */
1509 static inline bool too_many_unix_fds(struct task_struct *p)
1510 {
1511         struct user_struct *user = current_user();
1512
1513         if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
1514                 return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1515         return false;
1516 }
1517
1518 #define MAX_RECURSION_LEVEL 4
1519
1520 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1521 {
1522         int i;
1523         unsigned char max_level = 0;
1524
1525         if (too_many_unix_fds(current))
1526                 return -ETOOMANYREFS;
1527
1528         for (i = scm->fp->count - 1; i >= 0; i--) {
1529                 struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1530
1531                 if (sk)
1532                         max_level = max(max_level,
1533                                         unix_sk(sk)->recursion_level);
1534         }
1535         if (unlikely(max_level > MAX_RECURSION_LEVEL))
1536                 return -ETOOMANYREFS;
1537
1538         /*
1539          * Need to duplicate file references for the sake of garbage
1540          * collection.  Otherwise a socket in the fps might become a
1541          * candidate for GC while the skb is not yet queued.
1542          */
1543         UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1544         if (!UNIXCB(skb).fp)
1545                 return -ENOMEM;
1546
1547         for (i = scm->fp->count - 1; i >= 0; i--)
1548                 unix_inflight(scm->fp->user, scm->fp->fp[i]);
1549         return max_level;
1550 }
1551
1552 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1553 {
1554         int err = 0;
1555
1556         UNIXCB(skb).pid  = get_pid(scm->pid);
1557         UNIXCB(skb).uid = scm->creds.uid;
1558         UNIXCB(skb).gid = scm->creds.gid;
1559         UNIXCB(skb).fp = NULL;
1560         unix_get_secdata(scm, skb);
1561         if (scm->fp && send_fds)
1562                 err = unix_attach_fds(scm, skb);
1563
1564         skb->destructor = unix_destruct_scm;
1565         return err;
1566 }
1567
1568 static bool unix_passcred_enabled(const struct socket *sock,
1569                                   const struct sock *other)
1570 {
1571         return test_bit(SOCK_PASSCRED, &sock->flags) ||
1572                !other->sk_socket ||
1573                test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1574 }
1575
1576 /*
1577  * Some apps rely on write() giving SCM_CREDENTIALS
1578  * We include credentials if source or destination socket
1579  * asserted SOCK_PASSCRED.
1580  */
1581 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1582                             const struct sock *other)
1583 {
1584         if (UNIXCB(skb).pid)
1585                 return;
1586         if (unix_passcred_enabled(sock, other)) {
1587                 UNIXCB(skb).pid  = get_pid(task_tgid(current));
1588                 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1589         }
1590 }
1591
1592 static int maybe_init_creds(struct scm_cookie *scm,
1593                             struct socket *socket,
1594                             const struct sock *other)
1595 {
1596         int err;
1597         struct msghdr msg = { .msg_controllen = 0 };
1598
1599         err = scm_send(socket, &msg, scm, false);
1600         if (err)
1601                 return err;
1602
1603         if (unix_passcred_enabled(socket, other)) {
1604                 scm->pid = get_pid(task_tgid(current));
1605                 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1606         }
1607         return err;
1608 }
1609
1610 static bool unix_skb_scm_eq(struct sk_buff *skb,
1611                             struct scm_cookie *scm)
1612 {
1613         const struct unix_skb_parms *u = &UNIXCB(skb);
1614
1615         return u->pid == scm->pid &&
1616                uid_eq(u->uid, scm->creds.uid) &&
1617                gid_eq(u->gid, scm->creds.gid) &&
1618                unix_secdata_eq(scm, skb);
1619 }
1620
1621 /*
1622  *      Send AF_UNIX data.
1623  */
1624
1625 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1626                               size_t len)
1627 {
1628         struct sock *sk = sock->sk;
1629         struct net *net = sock_net(sk);
1630         struct unix_sock *u = unix_sk(sk);
1631         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1632         struct sock *other = NULL;
1633         int namelen = 0; /* fake GCC */
1634         int err;
1635         unsigned int hash;
1636         struct sk_buff *skb;
1637         long timeo;
1638         struct scm_cookie scm;
1639         int max_level;
1640         int data_len = 0;
1641         int sk_locked;
1642
1643         wait_for_unix_gc();
1644         err = scm_send(sock, msg, &scm, false);
1645         if (err < 0)
1646                 return err;
1647
1648         err = -EOPNOTSUPP;
1649         if (msg->msg_flags&MSG_OOB)
1650                 goto out;
1651
1652         if (msg->msg_namelen) {
1653                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1654                 if (err < 0)
1655                         goto out;
1656                 namelen = err;
1657         } else {
1658                 sunaddr = NULL;
1659                 err = -ENOTCONN;
1660                 other = unix_peer_get(sk);
1661                 if (!other)
1662                         goto out;
1663         }
1664
1665         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1666             && (err = unix_autobind(sock)) != 0)
1667                 goto out;
1668
1669         err = -EMSGSIZE;
1670         if (len > sk->sk_sndbuf - 32)
1671                 goto out;
1672
1673         if (len > SKB_MAX_ALLOC) {
1674                 data_len = min_t(size_t,
1675                                  len - SKB_MAX_ALLOC,
1676                                  MAX_SKB_FRAGS * PAGE_SIZE);
1677                 data_len = PAGE_ALIGN(data_len);
1678
1679                 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1680         }
1681
1682         skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1683                                    msg->msg_flags & MSG_DONTWAIT, &err,
1684                                    PAGE_ALLOC_COSTLY_ORDER);
1685         if (skb == NULL)
1686                 goto out;
1687
1688         err = unix_scm_to_skb(&scm, skb, true);
1689         if (err < 0)
1690                 goto out_free;
1691         max_level = err + 1;
1692
1693         skb_put(skb, len - data_len);
1694         skb->data_len = data_len;
1695         skb->len = len;
1696         err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1697         if (err)
1698                 goto out_free;
1699
1700         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1701
1702 restart:
1703         if (!other) {
1704                 err = -ECONNRESET;
1705                 if (sunaddr == NULL)
1706                         goto out_free;
1707
1708                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1709                                         hash, &err);
1710                 if (other == NULL)
1711                         goto out_free;
1712         }
1713
1714         if (sk_filter(other, skb) < 0) {
1715                 /* Toss the packet but do not return any error to the sender */
1716                 err = len;
1717                 goto out_free;
1718         }
1719
1720         sk_locked = 0;
1721         unix_state_lock(other);
1722 restart_locked:
1723         err = -EPERM;
1724         if (!unix_may_send(sk, other))
1725                 goto out_unlock;
1726
1727         if (unlikely(sock_flag(other, SOCK_DEAD))) {
1728                 /*
1729                  *      Check with 1003.1g - what should
1730                  *      datagram error
1731                  */
1732                 unix_state_unlock(other);
1733                 sock_put(other);
1734
1735                 if (!sk_locked)
1736                         unix_state_lock(sk);
1737
1738                 err = 0;
1739                 if (unix_peer(sk) == other) {
1740                         unix_peer(sk) = NULL;
1741                         unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1742
1743                         unix_state_unlock(sk);
1744
1745                         unix_dgram_disconnected(sk, other);
1746                         sock_put(other);
1747                         err = -ECONNREFUSED;
1748                 } else {
1749                         unix_state_unlock(sk);
1750                 }
1751
1752                 other = NULL;
1753                 if (err)
1754                         goto out_free;
1755                 goto restart;
1756         }
1757
1758         err = -EPIPE;
1759         if (other->sk_shutdown & RCV_SHUTDOWN)
1760                 goto out_unlock;
1761
1762         if (sk->sk_type != SOCK_SEQPACKET) {
1763                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1764                 if (err)
1765                         goto out_unlock;
1766         }
1767
1768         /* other == sk && unix_peer(other) != sk if
1769          * - unix_peer(sk) == NULL, destination address bound to sk
1770          * - unix_peer(sk) == sk by time of get but disconnected before lock
1771          */
1772         if (other != sk &&
1773             unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1774                 if (timeo) {
1775                         timeo = unix_wait_for_peer(other, timeo);
1776
1777                         err = sock_intr_errno(timeo);
1778                         if (signal_pending(current))
1779                                 goto out_free;
1780
1781                         goto restart;
1782                 }
1783
1784                 if (!sk_locked) {
1785                         unix_state_unlock(other);
1786                         unix_state_double_lock(sk, other);
1787                 }
1788
1789                 if (unix_peer(sk) != other ||
1790                     unix_dgram_peer_wake_me(sk, other)) {
1791                         err = -EAGAIN;
1792                         sk_locked = 1;
1793                         goto out_unlock;
1794                 }
1795
1796                 if (!sk_locked) {
1797                         sk_locked = 1;
1798                         goto restart_locked;
1799                 }
1800         }
1801
1802         if (unlikely(sk_locked))
1803                 unix_state_unlock(sk);
1804
1805         if (sock_flag(other, SOCK_RCVTSTAMP))
1806                 __net_timestamp(skb);
1807         maybe_add_creds(skb, sock, other);
1808         skb_queue_tail(&other->sk_receive_queue, skb);
1809         if (max_level > unix_sk(other)->recursion_level)
1810                 unix_sk(other)->recursion_level = max_level;
1811         unix_state_unlock(other);
1812         other->sk_data_ready(other);
1813         sock_put(other);
1814         scm_destroy(&scm);
1815         return len;
1816
1817 out_unlock:
1818         if (sk_locked)
1819                 unix_state_unlock(sk);
1820         unix_state_unlock(other);
1821 out_free:
1822         kfree_skb(skb);
1823 out:
1824         if (other)
1825                 sock_put(other);
1826         scm_destroy(&scm);
1827         return err;
1828 }
1829
1830 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1831  * bytes, and a minimun of a full page.
1832  */
1833 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1834
1835 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1836                                size_t len)
1837 {
1838         struct sock *sk = sock->sk;
1839         struct sock *other = NULL;
1840         int err, size;
1841         struct sk_buff *skb;
1842         int sent = 0;
1843         struct scm_cookie scm;
1844         bool fds_sent = false;
1845         int max_level;
1846         int data_len;
1847
1848         wait_for_unix_gc();
1849         err = scm_send(sock, msg, &scm, false);
1850         if (err < 0)
1851                 return err;
1852
1853         err = -EOPNOTSUPP;
1854         if (msg->msg_flags&MSG_OOB)
1855                 goto out_err;
1856
1857         if (msg->msg_namelen) {
1858                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1859                 goto out_err;
1860         } else {
1861                 err = -ENOTCONN;
1862                 other = unix_peer(sk);
1863                 if (!other)
1864                         goto out_err;
1865         }
1866
1867         if (sk->sk_shutdown & SEND_SHUTDOWN)
1868                 goto pipe_err;
1869
1870         while (sent < len) {
1871                 size = len - sent;
1872
1873                 /* Keep two messages in the pipe so it schedules better */
1874                 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1875
1876                 /* allow fallback to order-0 allocations */
1877                 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1878
1879                 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1880
1881                 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1882
1883                 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1884                                            msg->msg_flags & MSG_DONTWAIT, &err,
1885                                            get_order(UNIX_SKB_FRAGS_SZ));
1886                 if (!skb)
1887                         goto out_err;
1888
1889                 /* Only send the fds in the first buffer */
1890                 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1891                 if (err < 0) {
1892                         kfree_skb(skb);
1893                         goto out_err;
1894                 }
1895                 max_level = err + 1;
1896                 fds_sent = true;
1897
1898                 skb_put(skb, size - data_len);
1899                 skb->data_len = data_len;
1900                 skb->len = size;
1901                 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1902                 if (err) {
1903                         kfree_skb(skb);
1904                         goto out_err;
1905                 }
1906
1907                 unix_state_lock(other);
1908
1909                 if (sock_flag(other, SOCK_DEAD) ||
1910                     (other->sk_shutdown & RCV_SHUTDOWN))
1911                         goto pipe_err_free;
1912
1913                 maybe_add_creds(skb, sock, other);
1914                 skb_queue_tail(&other->sk_receive_queue, skb);
1915                 if (max_level > unix_sk(other)->recursion_level)
1916                         unix_sk(other)->recursion_level = max_level;
1917                 unix_state_unlock(other);
1918                 other->sk_data_ready(other);
1919                 sent += size;
1920         }
1921
1922         scm_destroy(&scm);
1923
1924         return sent;
1925
1926 pipe_err_free:
1927         unix_state_unlock(other);
1928         kfree_skb(skb);
1929 pipe_err:
1930         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1931                 send_sig(SIGPIPE, current, 0);
1932         err = -EPIPE;
1933 out_err:
1934         scm_destroy(&scm);
1935         return sent ? : err;
1936 }
1937
1938 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1939                                     int offset, size_t size, int flags)
1940 {
1941         int err;
1942         bool send_sigpipe = false;
1943         bool init_scm = true;
1944         struct scm_cookie scm;
1945         struct sock *other, *sk = socket->sk;
1946         struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1947
1948         if (flags & MSG_OOB)
1949                 return -EOPNOTSUPP;
1950
1951         other = unix_peer(sk);
1952         if (!other || sk->sk_state != TCP_ESTABLISHED)
1953                 return -ENOTCONN;
1954
1955         if (false) {
1956 alloc_skb:
1957                 unix_state_unlock(other);
1958                 mutex_unlock(&unix_sk(other)->readlock);
1959                 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1960                                               &err, 0);
1961                 if (!newskb)
1962                         goto err;
1963         }
1964
1965         /* we must acquire readlock as we modify already present
1966          * skbs in the sk_receive_queue and mess with skb->len
1967          */
1968         err = mutex_lock_interruptible(&unix_sk(other)->readlock);
1969         if (err) {
1970                 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1971                 goto err;
1972         }
1973
1974         if (sk->sk_shutdown & SEND_SHUTDOWN) {
1975                 err = -EPIPE;
1976                 send_sigpipe = true;
1977                 goto err_unlock;
1978         }
1979
1980         unix_state_lock(other);
1981
1982         if (sock_flag(other, SOCK_DEAD) ||
1983             other->sk_shutdown & RCV_SHUTDOWN) {
1984                 err = -EPIPE;
1985                 send_sigpipe = true;
1986                 goto err_state_unlock;
1987         }
1988
1989         if (init_scm) {
1990                 err = maybe_init_creds(&scm, socket, other);
1991                 if (err)
1992                         goto err_state_unlock;
1993                 init_scm = false;
1994         }
1995
1996         skb = skb_peek_tail(&other->sk_receive_queue);
1997         if (tail && tail == skb) {
1998                 skb = newskb;
1999         } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2000                 if (newskb) {
2001                         skb = newskb;
2002                 } else {
2003                         tail = skb;
2004                         goto alloc_skb;
2005                 }
2006         } else if (newskb) {
2007                 /* this is fast path, we don't necessarily need to
2008                  * call to kfree_skb even though with newskb == NULL
2009                  * this - does no harm
2010                  */
2011                 consume_skb(newskb);
2012                 newskb = NULL;
2013         }
2014
2015         if (skb_append_pagefrags(skb, page, offset, size)) {
2016                 tail = skb;
2017                 goto alloc_skb;
2018         }
2019
2020         skb->len += size;
2021         skb->data_len += size;
2022         skb->truesize += size;
2023         atomic_add(size, &sk->sk_wmem_alloc);
2024
2025         if (newskb) {
2026                 err = unix_scm_to_skb(&scm, skb, false);
2027                 if (err)
2028                         goto err_state_unlock;
2029                 spin_lock(&other->sk_receive_queue.lock);
2030                 __skb_queue_tail(&other->sk_receive_queue, newskb);
2031                 spin_unlock(&other->sk_receive_queue.lock);
2032         }
2033
2034         unix_state_unlock(other);
2035         mutex_unlock(&unix_sk(other)->readlock);
2036
2037         other->sk_data_ready(other);
2038         scm_destroy(&scm);
2039         return size;
2040
2041 err_state_unlock:
2042         unix_state_unlock(other);
2043 err_unlock:
2044         mutex_unlock(&unix_sk(other)->readlock);
2045 err:
2046         kfree_skb(newskb);
2047         if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2048                 send_sig(SIGPIPE, current, 0);
2049         if (!init_scm)
2050                 scm_destroy(&scm);
2051         return err;
2052 }
2053
2054 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2055                                   size_t len)
2056 {
2057         int err;
2058         struct sock *sk = sock->sk;
2059
2060         err = sock_error(sk);
2061         if (err)
2062                 return err;
2063
2064         if (sk->sk_state != TCP_ESTABLISHED)
2065                 return -ENOTCONN;
2066
2067         if (msg->msg_namelen)
2068                 msg->msg_namelen = 0;
2069
2070         return unix_dgram_sendmsg(sock, msg, len);
2071 }
2072
2073 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2074                                   size_t size, int flags)
2075 {
2076         struct sock *sk = sock->sk;
2077
2078         if (sk->sk_state != TCP_ESTABLISHED)
2079                 return -ENOTCONN;
2080
2081         return unix_dgram_recvmsg(sock, msg, size, flags);
2082 }
2083
2084 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2085 {
2086         struct unix_sock *u = unix_sk(sk);
2087
2088         if (u->addr) {
2089                 msg->msg_namelen = u->addr->len;
2090                 memcpy(msg->msg_name, u->addr->name, u->addr->len);
2091         }
2092 }
2093
2094 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2095                               size_t size, int flags)
2096 {
2097         struct scm_cookie scm;
2098         struct sock *sk = sock->sk;
2099         struct unix_sock *u = unix_sk(sk);
2100         struct sk_buff *skb, *last;
2101         long timeo;
2102         int err;
2103         int peeked, skip;
2104
2105         err = -EOPNOTSUPP;
2106         if (flags&MSG_OOB)
2107                 goto out;
2108
2109         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2110
2111         do {
2112                 mutex_lock(&u->readlock);
2113
2114                 skip = sk_peek_offset(sk, flags);
2115                 skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err,
2116                                               &last);
2117                 if (skb)
2118                         break;
2119
2120                 mutex_unlock(&u->readlock);
2121
2122                 if (err != -EAGAIN)
2123                         break;
2124         } while (timeo &&
2125                  !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2126
2127         if (!skb) { /* implies readlock unlocked */
2128                 unix_state_lock(sk);
2129                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2130                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2131                     (sk->sk_shutdown & RCV_SHUTDOWN))
2132                         err = 0;
2133                 unix_state_unlock(sk);
2134                 goto out;
2135         }
2136
2137         if (wq_has_sleeper(&u->peer_wait))
2138                 wake_up_interruptible_sync_poll(&u->peer_wait,
2139                                                 POLLOUT | POLLWRNORM |
2140                                                 POLLWRBAND);
2141
2142         if (msg->msg_name)
2143                 unix_copy_addr(msg, skb->sk);
2144
2145         if (size > skb->len - skip)
2146                 size = skb->len - skip;
2147         else if (size < skb->len - skip)
2148                 msg->msg_flags |= MSG_TRUNC;
2149
2150         err = skb_copy_datagram_msg(skb, skip, msg, size);
2151         if (err)
2152                 goto out_free;
2153
2154         if (sock_flag(sk, SOCK_RCVTSTAMP))
2155                 __sock_recv_timestamp(msg, sk, skb);
2156
2157         memset(&scm, 0, sizeof(scm));
2158
2159         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2160         unix_set_secdata(&scm, skb);
2161
2162         if (!(flags & MSG_PEEK)) {
2163                 if (UNIXCB(skb).fp)
2164                         unix_detach_fds(&scm, skb);
2165
2166                 sk_peek_offset_bwd(sk, skb->len);
2167         } else {
2168                 /* It is questionable: on PEEK we could:
2169                    - do not return fds - good, but too simple 8)
2170                    - return fds, and do not return them on read (old strategy,
2171                      apparently wrong)
2172                    - clone fds (I chose it for now, it is the most universal
2173                      solution)
2174
2175                    POSIX 1003.1g does not actually define this clearly
2176                    at all. POSIX 1003.1g doesn't define a lot of things
2177                    clearly however!
2178
2179                 */
2180
2181                 sk_peek_offset_fwd(sk, size);
2182
2183                 if (UNIXCB(skb).fp)
2184                         scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2185         }
2186         err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2187
2188         scm_recv(sock, msg, &scm, flags);
2189
2190 out_free:
2191         skb_free_datagram(sk, skb);
2192         mutex_unlock(&u->readlock);
2193 out:
2194         return err;
2195 }
2196
2197 /*
2198  *      Sleep until more data has arrived. But check for races..
2199  */
2200 static long unix_stream_data_wait(struct sock *sk, long timeo,
2201                                   struct sk_buff *last, unsigned int last_len)
2202 {
2203         struct sk_buff *tail;
2204         DEFINE_WAIT(wait);
2205
2206         unix_state_lock(sk);
2207
2208         for (;;) {
2209                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2210
2211                 tail = skb_peek_tail(&sk->sk_receive_queue);
2212                 if (tail != last ||
2213                     (tail && tail->len != last_len) ||
2214                     sk->sk_err ||
2215                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
2216                     signal_pending(current) ||
2217                     !timeo)
2218                         break;
2219
2220                 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2221                 unix_state_unlock(sk);
2222                 timeo = freezable_schedule_timeout(timeo);
2223                 unix_state_lock(sk);
2224
2225                 if (sock_flag(sk, SOCK_DEAD))
2226                         break;
2227
2228                 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2229         }
2230
2231         finish_wait(sk_sleep(sk), &wait);
2232         unix_state_unlock(sk);
2233         return timeo;
2234 }
2235
2236 static unsigned int unix_skb_len(const struct sk_buff *skb)
2237 {
2238         return skb->len - UNIXCB(skb).consumed;
2239 }
2240
2241 struct unix_stream_read_state {
2242         int (*recv_actor)(struct sk_buff *, int, int,
2243                           struct unix_stream_read_state *);
2244         struct socket *socket;
2245         struct msghdr *msg;
2246         struct pipe_inode_info *pipe;
2247         size_t size;
2248         int flags;
2249         unsigned int splice_flags;
2250 };
2251
2252 static int unix_stream_read_generic(struct unix_stream_read_state *state)
2253 {
2254         struct scm_cookie scm;
2255         struct socket *sock = state->socket;
2256         struct sock *sk = sock->sk;
2257         struct unix_sock *u = unix_sk(sk);
2258         int copied = 0;
2259         int flags = state->flags;
2260         int noblock = flags & MSG_DONTWAIT;
2261         bool check_creds = false;
2262         int target;
2263         int err = 0;
2264         long timeo;
2265         int skip;
2266         size_t size = state->size;
2267         unsigned int last_len;
2268
2269         if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2270                 err = -EINVAL;
2271                 goto out;
2272         }
2273
2274         if (unlikely(flags & MSG_OOB)) {
2275                 err = -EOPNOTSUPP;
2276                 goto out;
2277         }
2278
2279         target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2280         timeo = sock_rcvtimeo(sk, noblock);
2281
2282         memset(&scm, 0, sizeof(scm));
2283
2284         /* Lock the socket to prevent queue disordering
2285          * while sleeps in memcpy_tomsg
2286          */
2287         mutex_lock(&u->readlock);
2288
2289         if (flags & MSG_PEEK)
2290                 skip = sk_peek_offset(sk, flags);
2291         else
2292                 skip = 0;
2293
2294         do {
2295                 int chunk;
2296                 bool drop_skb;
2297                 struct sk_buff *skb, *last;
2298
2299 redo:
2300                 unix_state_lock(sk);
2301                 if (sock_flag(sk, SOCK_DEAD)) {
2302                         err = -ECONNRESET;
2303                         goto unlock;
2304                 }
2305                 last = skb = skb_peek(&sk->sk_receive_queue);
2306                 last_len = last ? last->len : 0;
2307 again:
2308                 if (skb == NULL) {
2309                         unix_sk(sk)->recursion_level = 0;
2310                         if (copied >= target)
2311                                 goto unlock;
2312
2313                         /*
2314                          *      POSIX 1003.1g mandates this order.
2315                          */
2316
2317                         err = sock_error(sk);
2318                         if (err)
2319                                 goto unlock;
2320                         if (sk->sk_shutdown & RCV_SHUTDOWN)
2321                                 goto unlock;
2322
2323                         unix_state_unlock(sk);
2324                         if (!timeo) {
2325                                 err = -EAGAIN;
2326                                 break;
2327                         }
2328
2329                         mutex_unlock(&u->readlock);
2330
2331                         timeo = unix_stream_data_wait(sk, timeo, last,
2332                                                       last_len);
2333
2334                         if (signal_pending(current)) {
2335                                 err = sock_intr_errno(timeo);
2336                                 scm_destroy(&scm);
2337                                 goto out;
2338                         }
2339
2340                         mutex_lock(&u->readlock);
2341                         goto redo;
2342 unlock:
2343                         unix_state_unlock(sk);
2344                         break;
2345                 }
2346
2347                 while (skip >= unix_skb_len(skb)) {
2348                         skip -= unix_skb_len(skb);
2349                         last = skb;
2350                         last_len = skb->len;
2351                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2352                         if (!skb)
2353                                 goto again;
2354                 }
2355
2356                 unix_state_unlock(sk);
2357
2358                 if (check_creds) {
2359                         /* Never glue messages from different writers */
2360                         if (!unix_skb_scm_eq(skb, &scm))
2361                                 break;
2362                 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2363                         /* Copy credentials */
2364                         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2365                         unix_set_secdata(&scm, skb);
2366                         check_creds = true;
2367                 }
2368
2369                 /* Copy address just once */
2370                 if (state->msg && state->msg->msg_name) {
2371                         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2372                                          state->msg->msg_name);
2373                         unix_copy_addr(state->msg, skb->sk);
2374                         sunaddr = NULL;
2375                 }
2376
2377                 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2378                 skb_get(skb);
2379                 chunk = state->recv_actor(skb, skip, chunk, state);
2380                 drop_skb = !unix_skb_len(skb);
2381                 /* skb is only safe to use if !drop_skb */
2382                 consume_skb(skb);
2383                 if (chunk < 0) {
2384                         if (copied == 0)
2385                                 copied = -EFAULT;
2386                         break;
2387                 }
2388                 copied += chunk;
2389                 size -= chunk;
2390
2391                 if (drop_skb) {
2392                         /* the skb was touched by a concurrent reader;
2393                          * we should not expect anything from this skb
2394                          * anymore and assume it invalid - we can be
2395                          * sure it was dropped from the socket queue
2396                          *
2397                          * let's report a short read
2398                          */
2399                         err = 0;
2400                         break;
2401                 }
2402
2403                 /* Mark read part of skb as used */
2404                 if (!(flags & MSG_PEEK)) {
2405                         UNIXCB(skb).consumed += chunk;
2406
2407                         sk_peek_offset_bwd(sk, chunk);
2408
2409                         if (UNIXCB(skb).fp)
2410                                 unix_detach_fds(&scm, skb);
2411
2412                         if (unix_skb_len(skb))
2413                                 break;
2414
2415                         skb_unlink(skb, &sk->sk_receive_queue);
2416                         consume_skb(skb);
2417
2418                         if (scm.fp)
2419                                 break;
2420                 } else {
2421                         /* It is questionable, see note in unix_dgram_recvmsg.
2422                          */
2423                         if (UNIXCB(skb).fp)
2424                                 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2425
2426                         sk_peek_offset_fwd(sk, chunk);
2427
2428                         if (UNIXCB(skb).fp)
2429                                 break;
2430
2431                         skip = 0;
2432                         last = skb;
2433                         last_len = skb->len;
2434                         unix_state_lock(sk);
2435                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2436                         if (skb)
2437                                 goto again;
2438                         unix_state_unlock(sk);
2439                         break;
2440                 }
2441         } while (size);
2442
2443         mutex_unlock(&u->readlock);
2444         if (state->msg)
2445                 scm_recv(sock, state->msg, &scm, flags);
2446         else
2447                 scm_destroy(&scm);
2448 out:
2449         return copied ? : err;
2450 }
2451
2452 static int unix_stream_read_actor(struct sk_buff *skb,
2453                                   int skip, int chunk,
2454                                   struct unix_stream_read_state *state)
2455 {
2456         int ret;
2457
2458         ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2459                                     state->msg, chunk);
2460         return ret ?: chunk;
2461 }
2462
2463 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2464                                size_t size, int flags)
2465 {
2466         struct unix_stream_read_state state = {
2467                 .recv_actor = unix_stream_read_actor,
2468                 .socket = sock,
2469                 .msg = msg,
2470                 .size = size,
2471                 .flags = flags
2472         };
2473
2474         return unix_stream_read_generic(&state);
2475 }
2476
2477 static ssize_t skb_unix_socket_splice(struct sock *sk,
2478                                       struct pipe_inode_info *pipe,
2479                                       struct splice_pipe_desc *spd)
2480 {
2481         int ret;
2482         struct unix_sock *u = unix_sk(sk);
2483
2484         mutex_unlock(&u->readlock);
2485         ret = splice_to_pipe(pipe, spd);
2486         mutex_lock(&u->readlock);
2487
2488         return ret;
2489 }
2490
2491 static int unix_stream_splice_actor(struct sk_buff *skb,
2492                                     int skip, int chunk,
2493                                     struct unix_stream_read_state *state)
2494 {
2495         return skb_splice_bits(skb, state->socket->sk,
2496                                UNIXCB(skb).consumed + skip,
2497                                state->pipe, chunk, state->splice_flags,
2498                                skb_unix_socket_splice);
2499 }
2500
2501 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2502                                        struct pipe_inode_info *pipe,
2503                                        size_t size, unsigned int flags)
2504 {
2505         struct unix_stream_read_state state = {
2506                 .recv_actor = unix_stream_splice_actor,
2507                 .socket = sock,
2508                 .pipe = pipe,
2509                 .size = size,
2510                 .splice_flags = flags,
2511         };
2512
2513         if (unlikely(*ppos))
2514                 return -ESPIPE;
2515
2516         if (sock->file->f_flags & O_NONBLOCK ||
2517             flags & SPLICE_F_NONBLOCK)
2518                 state.flags = MSG_DONTWAIT;
2519
2520         return unix_stream_read_generic(&state);
2521 }
2522
2523 static int unix_shutdown(struct socket *sock, int mode)
2524 {
2525         struct sock *sk = sock->sk;
2526         struct sock *other;
2527
2528         if (mode < SHUT_RD || mode > SHUT_RDWR)
2529                 return -EINVAL;
2530         /* This maps:
2531          * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2532          * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2533          * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2534          */
2535         ++mode;
2536
2537         unix_state_lock(sk);
2538         sk->sk_shutdown |= mode;
2539         other = unix_peer(sk);
2540         if (other)
2541                 sock_hold(other);
2542         unix_state_unlock(sk);
2543         sk->sk_state_change(sk);
2544
2545         if (other &&
2546                 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2547
2548                 int peer_mode = 0;
2549
2550                 if (mode&RCV_SHUTDOWN)
2551                         peer_mode |= SEND_SHUTDOWN;
2552                 if (mode&SEND_SHUTDOWN)
2553                         peer_mode |= RCV_SHUTDOWN;
2554                 unix_state_lock(other);
2555                 other->sk_shutdown |= peer_mode;
2556                 unix_state_unlock(other);
2557                 other->sk_state_change(other);
2558                 if (peer_mode == SHUTDOWN_MASK)
2559                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2560                 else if (peer_mode & RCV_SHUTDOWN)
2561                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2562         }
2563         if (other)
2564                 sock_put(other);
2565
2566         return 0;
2567 }
2568
2569 long unix_inq_len(struct sock *sk)
2570 {
2571         struct sk_buff *skb;
2572         long amount = 0;
2573
2574         if (sk->sk_state == TCP_LISTEN)
2575                 return -EINVAL;
2576
2577         spin_lock(&sk->sk_receive_queue.lock);
2578         if (sk->sk_type == SOCK_STREAM ||
2579             sk->sk_type == SOCK_SEQPACKET) {
2580                 skb_queue_walk(&sk->sk_receive_queue, skb)
2581                         amount += unix_skb_len(skb);
2582         } else {
2583                 skb = skb_peek(&sk->sk_receive_queue);
2584                 if (skb)
2585                         amount = skb->len;
2586         }
2587         spin_unlock(&sk->sk_receive_queue.lock);
2588
2589         return amount;
2590 }
2591 EXPORT_SYMBOL_GPL(unix_inq_len);
2592
2593 long unix_outq_len(struct sock *sk)
2594 {
2595         return sk_wmem_alloc_get(sk);
2596 }
2597 EXPORT_SYMBOL_GPL(unix_outq_len);
2598
2599 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2600 {
2601         struct sock *sk = sock->sk;
2602         long amount = 0;
2603         int err;
2604
2605         switch (cmd) {
2606         case SIOCOUTQ:
2607                 amount = unix_outq_len(sk);
2608                 err = put_user(amount, (int __user *)arg);
2609                 break;
2610         case SIOCINQ:
2611                 amount = unix_inq_len(sk);
2612                 if (amount < 0)
2613                         err = amount;
2614                 else
2615                         err = put_user(amount, (int __user *)arg);
2616                 break;
2617         default:
2618                 err = -ENOIOCTLCMD;
2619                 break;
2620         }
2621         return err;
2622 }
2623
2624 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2625 {
2626         struct sock *sk = sock->sk;
2627         unsigned int mask;
2628
2629         sock_poll_wait(file, sk_sleep(sk), wait);
2630         mask = 0;
2631
2632         /* exceptional events? */
2633         if (sk->sk_err)
2634                 mask |= POLLERR;
2635         if (sk->sk_shutdown == SHUTDOWN_MASK)
2636                 mask |= POLLHUP;
2637         if (sk->sk_shutdown & RCV_SHUTDOWN)
2638                 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2639
2640         /* readable? */
2641         if (!skb_queue_empty(&sk->sk_receive_queue))
2642                 mask |= POLLIN | POLLRDNORM;
2643
2644         /* Connection-based need to check for termination and startup */
2645         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2646             sk->sk_state == TCP_CLOSE)
2647                 mask |= POLLHUP;
2648
2649         /*
2650          * we set writable also when the other side has shut down the
2651          * connection. This prevents stuck sockets.
2652          */
2653         if (unix_writable(sk))
2654                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2655
2656         return mask;
2657 }
2658
2659 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2660                                     poll_table *wait)
2661 {
2662         struct sock *sk = sock->sk, *other;
2663         unsigned int mask, writable;
2664
2665         sock_poll_wait(file, sk_sleep(sk), wait);
2666         mask = 0;
2667
2668         /* exceptional events? */
2669         if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2670                 mask |= POLLERR |
2671                         (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
2672
2673         if (sk->sk_shutdown & RCV_SHUTDOWN)
2674                 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2675         if (sk->sk_shutdown == SHUTDOWN_MASK)
2676                 mask |= POLLHUP;
2677
2678         /* readable? */
2679         if (!skb_queue_empty(&sk->sk_receive_queue))
2680                 mask |= POLLIN | POLLRDNORM;
2681
2682         /* Connection-based need to check for termination and startup */
2683         if (sk->sk_type == SOCK_SEQPACKET) {
2684                 if (sk->sk_state == TCP_CLOSE)
2685                         mask |= POLLHUP;
2686                 /* connection hasn't started yet? */
2687                 if (sk->sk_state == TCP_SYN_SENT)
2688                         return mask;
2689         }
2690
2691         /* No write status requested, avoid expensive OUT tests. */
2692         if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2693                 return mask;
2694
2695         writable = unix_writable(sk);
2696         if (writable) {
2697                 unix_state_lock(sk);
2698
2699                 other = unix_peer(sk);
2700                 if (other && unix_peer(other) != sk &&
2701                     unix_recvq_full(other) &&
2702                     unix_dgram_peer_wake_me(sk, other))
2703                         writable = 0;
2704
2705                 unix_state_unlock(sk);
2706         }
2707
2708         if (writable)
2709                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2710         else
2711                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2712
2713         return mask;
2714 }
2715
2716 #ifdef CONFIG_PROC_FS
2717
2718 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2719
2720 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2721 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2722 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2723
2724 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2725 {
2726         unsigned long offset = get_offset(*pos);
2727         unsigned long bucket = get_bucket(*pos);
2728         struct sock *sk;
2729         unsigned long count = 0;
2730
2731         for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2732                 if (sock_net(sk) != seq_file_net(seq))
2733                         continue;
2734                 if (++count == offset)
2735                         break;
2736         }
2737
2738         return sk;
2739 }
2740
2741 static struct sock *unix_next_socket(struct seq_file *seq,
2742                                      struct sock *sk,
2743                                      loff_t *pos)
2744 {
2745         unsigned long bucket;
2746
2747         while (sk > (struct sock *)SEQ_START_TOKEN) {
2748                 sk = sk_next(sk);
2749                 if (!sk)
2750                         goto next_bucket;
2751                 if (sock_net(sk) == seq_file_net(seq))
2752                         return sk;
2753         }
2754
2755         do {
2756                 sk = unix_from_bucket(seq, pos);
2757                 if (sk)
2758                         return sk;
2759
2760 next_bucket:
2761                 bucket = get_bucket(*pos) + 1;
2762                 *pos = set_bucket_offset(bucket, 1);
2763         } while (bucket < ARRAY_SIZE(unix_socket_table));
2764
2765         return NULL;
2766 }
2767
2768 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2769         __acquires(unix_table_lock)
2770 {
2771         spin_lock(&unix_table_lock);
2772
2773         if (!*pos)
2774                 return SEQ_START_TOKEN;
2775
2776         if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2777                 return NULL;
2778
2779         return unix_next_socket(seq, NULL, pos);
2780 }
2781
2782 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2783 {
2784         ++*pos;
2785         return unix_next_socket(seq, v, pos);
2786 }
2787
2788 static void unix_seq_stop(struct seq_file *seq, void *v)
2789         __releases(unix_table_lock)
2790 {
2791         spin_unlock(&unix_table_lock);
2792 }
2793
2794 static int unix_seq_show(struct seq_file *seq, void *v)
2795 {
2796
2797         if (v == SEQ_START_TOKEN)
2798                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2799                          "Inode Path\n");
2800         else {
2801                 struct sock *s = v;
2802                 struct unix_sock *u = unix_sk(s);
2803                 unix_state_lock(s);
2804
2805                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2806                         s,
2807                         atomic_read(&s->sk_refcnt),
2808                         0,
2809                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2810                         s->sk_type,
2811                         s->sk_socket ?
2812                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2813                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2814                         sock_i_ino(s));
2815
2816                 if (u->addr) {
2817                         int i, len;
2818                         seq_putc(seq, ' ');
2819
2820                         i = 0;
2821                         len = u->addr->len - sizeof(short);
2822                         if (!UNIX_ABSTRACT(s))
2823                                 len--;
2824                         else {
2825                                 seq_putc(seq, '@');
2826                                 i++;
2827                         }
2828                         for ( ; i < len; i++)
2829                                 seq_putc(seq, u->addr->name->sun_path[i]);
2830                 }
2831                 unix_state_unlock(s);
2832                 seq_putc(seq, '\n');
2833         }
2834
2835         return 0;
2836 }
2837
2838 static const struct seq_operations unix_seq_ops = {
2839         .start  = unix_seq_start,
2840         .next   = unix_seq_next,
2841         .stop   = unix_seq_stop,
2842         .show   = unix_seq_show,
2843 };
2844
2845 static int unix_seq_open(struct inode *inode, struct file *file)
2846 {
2847         return seq_open_net(inode, file, &unix_seq_ops,
2848                             sizeof(struct seq_net_private));
2849 }
2850
2851 static const struct file_operations unix_seq_fops = {
2852         .owner          = THIS_MODULE,
2853         .open           = unix_seq_open,
2854         .read           = seq_read,
2855         .llseek         = seq_lseek,
2856         .release        = seq_release_net,
2857 };
2858
2859 #endif
2860
2861 static const struct net_proto_family unix_family_ops = {
2862         .family = PF_UNIX,
2863         .create = unix_create,
2864         .owner  = THIS_MODULE,
2865 };
2866
2867
2868 static int __net_init unix_net_init(struct net *net)
2869 {
2870         int error = -ENOMEM;
2871
2872         net->unx.sysctl_max_dgram_qlen = 10;
2873         if (unix_sysctl_register(net))
2874                 goto out;
2875
2876 #ifdef CONFIG_PROC_FS
2877         if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
2878                 unix_sysctl_unregister(net);
2879                 goto out;
2880         }
2881 #endif
2882         error = 0;
2883 out:
2884         return error;
2885 }
2886
2887 static void __net_exit unix_net_exit(struct net *net)
2888 {
2889         unix_sysctl_unregister(net);
2890         remove_proc_entry("unix", net->proc_net);
2891 }
2892
2893 static struct pernet_operations unix_net_ops = {
2894         .init = unix_net_init,
2895         .exit = unix_net_exit,
2896 };
2897
2898 static int __init af_unix_init(void)
2899 {
2900         int rc = -1;
2901
2902         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2903
2904         rc = proto_register(&unix_proto, 1);
2905         if (rc != 0) {
2906                 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2907                 goto out;
2908         }
2909
2910         sock_register(&unix_family_ops);
2911         register_pernet_subsys(&unix_net_ops);
2912 out:
2913         return rc;
2914 }
2915
2916 static void __exit af_unix_exit(void)
2917 {
2918         sock_unregister(PF_UNIX);
2919         proto_unregister(&unix_proto);
2920         unregister_pernet_subsys(&unix_net_ops);
2921 }
2922
2923 /* Earlier than device_initcall() so that other drivers invoking
2924    request_module() don't end up in a loop when modprobe tries
2925    to use a UNIX socket. But later than subsys_initcall() because
2926    we depend on stuff initialised there */
2927 fs_initcall(af_unix_init);
2928 module_exit(af_unix_exit);
2929
2930 MODULE_LICENSE("GPL");
2931 MODULE_ALIAS_NETPROTO(PF_UNIX);