[SUNGEM]: Fix suspend regression due to NAPI changes.
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
55737fda 66#include <linux/rcupdate.h>
1da177e4
LT
67#include <linux/netdevice.h>
68#include <linux/proc_fs.h>
69#include <linux/seq_file.h>
4a3e2f71 70#include <linux/mutex.h>
1da177e4
LT
71#include <linux/wanrouter.h>
72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
1da177e4
LT
75#include <linux/init.h>
76#include <linux/poll.h>
77#include <linux/cache.h>
78#include <linux/module.h>
79#include <linux/highmem.h>
1da177e4
LT
80#include <linux/mount.h>
81#include <linux/security.h>
82#include <linux/syscalls.h>
83#include <linux/compat.h>
84#include <linux/kmod.h>
3ec3b2fb 85#include <linux/audit.h>
d86b5e0e 86#include <linux/wireless.h>
1b8d7ae4 87#include <linux/nsproxy.h>
1da177e4
LT
88
89#include <asm/uaccess.h>
90#include <asm/unistd.h>
91
92#include <net/compat.h>
93
94#include <net/sock.h>
95#include <linux/netfilter.h>
96
97static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
98static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
99 unsigned long nr_segs, loff_t pos);
100static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
101 unsigned long nr_segs, loff_t pos);
89bddce5 102static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
103
104static int sock_close(struct inode *inode, struct file *file);
105static unsigned int sock_poll(struct file *file,
106 struct poll_table_struct *wait);
89bddce5 107static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
108#ifdef CONFIG_COMPAT
109static long compat_sock_ioctl(struct file *file,
89bddce5 110 unsigned int cmd, unsigned long arg);
89bbfc95 111#endif
1da177e4 112static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
113static ssize_t sock_sendpage(struct file *file, struct page *page,
114 int offset, size_t size, loff_t *ppos, int more);
115
1da177e4
LT
116/*
117 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
118 * in the operation structures but are done directly via the socketcall() multiplexor.
119 */
120
da7071d7 121static const struct file_operations socket_file_ops = {
1da177e4
LT
122 .owner = THIS_MODULE,
123 .llseek = no_llseek,
124 .aio_read = sock_aio_read,
125 .aio_write = sock_aio_write,
126 .poll = sock_poll,
127 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
128#ifdef CONFIG_COMPAT
129 .compat_ioctl = compat_sock_ioctl,
130#endif
1da177e4
LT
131 .mmap = sock_mmap,
132 .open = sock_no_open, /* special open code to disallow open via /proc */
133 .release = sock_close,
134 .fasync = sock_fasync,
5274f052
JA
135 .sendpage = sock_sendpage,
136 .splice_write = generic_splice_sendpage,
1da177e4
LT
137};
138
139/*
140 * The protocol list. Each protocol is registered in here.
141 */
142
1da177e4 143static DEFINE_SPINLOCK(net_family_lock);
f0fd27d4 144static const struct net_proto_family *net_families[NPROTO] __read_mostly;
1da177e4 145
1da177e4
LT
146/*
147 * Statistics counters of the socket lists
148 */
149
150static DEFINE_PER_CPU(int, sockets_in_use) = 0;
151
152/*
89bddce5
SH
153 * Support routines.
154 * Move socket addresses back and forth across the kernel/user
155 * divide and look after the messy bits.
1da177e4
LT
156 */
157
89bddce5 158#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
159 16 for IP, 16 for IPX,
160 24 for IPv6,
89bddce5 161 about 80 for AX.25
1da177e4
LT
162 must be at least one bigger than
163 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 164 :unix_mkname()).
1da177e4 165 */
89bddce5 166
1da177e4
LT
167/**
168 * move_addr_to_kernel - copy a socket address into kernel space
169 * @uaddr: Address in user space
170 * @kaddr: Address in kernel space
171 * @ulen: Length in user space
172 *
173 * The address is copied into kernel space. If the provided address is
174 * too long an error code of -EINVAL is returned. If the copy gives
175 * invalid addresses -EFAULT is returned. On a success 0 is returned.
176 */
177
178int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
179{
89bddce5 180 if (ulen < 0 || ulen > MAX_SOCK_ADDR)
1da177e4 181 return -EINVAL;
89bddce5 182 if (ulen == 0)
1da177e4 183 return 0;
89bddce5 184 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 185 return -EFAULT;
3ec3b2fb 186 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
187}
188
189/**
190 * move_addr_to_user - copy an address to user space
191 * @kaddr: kernel space address
192 * @klen: length of address in kernel
193 * @uaddr: user space address
194 * @ulen: pointer to user length field
195 *
196 * The value pointed to by ulen on entry is the buffer length available.
197 * This is overwritten with the buffer space used. -EINVAL is returned
198 * if an overlong buffer is specified or a negative buffer size. -EFAULT
199 * is returned if either the buffer or the length field are not
200 * accessible.
201 * After copying the data up to the limit the user specifies, the true
202 * length of the data is written over the length limit the user
203 * specified. Zero is returned for a success.
204 */
89bddce5
SH
205
206int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
207 int __user *ulen)
1da177e4
LT
208{
209 int err;
210 int len;
211
89bddce5
SH
212 err = get_user(len, ulen);
213 if (err)
1da177e4 214 return err;
89bddce5
SH
215 if (len > klen)
216 len = klen;
217 if (len < 0 || len > MAX_SOCK_ADDR)
1da177e4 218 return -EINVAL;
89bddce5 219 if (len) {
d6fe3945
SG
220 if (audit_sockaddr(klen, kaddr))
221 return -ENOMEM;
89bddce5 222 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
223 return -EFAULT;
224 }
225 /*
89bddce5
SH
226 * "fromlen shall refer to the value before truncation.."
227 * 1003.1g
1da177e4
LT
228 */
229 return __put_user(klen, ulen);
230}
231
232#define SOCKFS_MAGIC 0x534F434B
233
e18b890b 234static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
235
236static struct inode *sock_alloc_inode(struct super_block *sb)
237{
238 struct socket_alloc *ei;
89bddce5 239
e94b1766 240 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
241 if (!ei)
242 return NULL;
243 init_waitqueue_head(&ei->socket.wait);
89bddce5 244
1da177e4
LT
245 ei->socket.fasync_list = NULL;
246 ei->socket.state = SS_UNCONNECTED;
247 ei->socket.flags = 0;
248 ei->socket.ops = NULL;
249 ei->socket.sk = NULL;
250 ei->socket.file = NULL;
1da177e4
LT
251
252 return &ei->vfs_inode;
253}
254
255static void sock_destroy_inode(struct inode *inode)
256{
257 kmem_cache_free(sock_inode_cachep,
258 container_of(inode, struct socket_alloc, vfs_inode));
259}
260
4ba9b9d0 261static void init_once(struct kmem_cache *cachep, void *foo)
1da177e4 262{
89bddce5 263 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 264
a35afb83 265 inode_init_once(&ei->vfs_inode);
1da177e4 266}
89bddce5 267
1da177e4
LT
268static int init_inodecache(void)
269{
270 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
271 sizeof(struct socket_alloc),
272 0,
273 (SLAB_HWCACHE_ALIGN |
274 SLAB_RECLAIM_ACCOUNT |
275 SLAB_MEM_SPREAD),
20c2df83 276 init_once);
1da177e4
LT
277 if (sock_inode_cachep == NULL)
278 return -ENOMEM;
279 return 0;
280}
281
282static struct super_operations sockfs_ops = {
283 .alloc_inode = sock_alloc_inode,
284 .destroy_inode =sock_destroy_inode,
285 .statfs = simple_statfs,
286};
287
454e2398 288static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
289 int flags, const char *dev_name, void *data,
290 struct vfsmount *mnt)
1da177e4 291{
454e2398
DH
292 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
293 mnt);
1da177e4
LT
294}
295
ba89966c 296static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
297
298static struct file_system_type sock_fs_type = {
299 .name = "sockfs",
300 .get_sb = sockfs_get_sb,
301 .kill_sb = kill_anon_super,
302};
89bddce5 303
1da177e4
LT
304static int sockfs_delete_dentry(struct dentry *dentry)
305{
304e61e6
ED
306 /*
307 * At creation time, we pretended this dentry was hashed
308 * (by clearing DCACHE_UNHASHED bit in d_flags)
309 * At delete time, we restore the truth : not hashed.
310 * (so that dput() can proceed correctly)
311 */
312 dentry->d_flags |= DCACHE_UNHASHED;
313 return 0;
1da177e4 314}
c23fbb6b
ED
315
316/*
317 * sockfs_dname() is called from d_path().
318 */
319static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
320{
321 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
322 dentry->d_inode->i_ino);
323}
324
1da177e4 325static struct dentry_operations sockfs_dentry_operations = {
89bddce5 326 .d_delete = sockfs_delete_dentry,
c23fbb6b 327 .d_dname = sockfs_dname,
1da177e4
LT
328};
329
330/*
331 * Obtains the first available file descriptor and sets it up for use.
332 *
39d8c1b6
DM
333 * These functions create file structures and maps them to fd space
334 * of the current process. On success it returns file descriptor
1da177e4
LT
335 * and file struct implicitly stored in sock->file.
336 * Note that another thread may close file descriptor before we return
337 * from this function. We use the fact that now we do not refer
338 * to socket after mapping. If one day we will need it, this
339 * function will increment ref. count on file by 1.
340 *
341 * In any case returned fd MAY BE not valid!
342 * This race condition is unavoidable
343 * with shared fd spaces, we cannot solve it inside kernel,
344 * but we take care of internal coherence yet.
345 */
346
39d8c1b6 347static int sock_alloc_fd(struct file **filep)
1da177e4
LT
348{
349 int fd;
1da177e4
LT
350
351 fd = get_unused_fd();
39d8c1b6 352 if (likely(fd >= 0)) {
1da177e4
LT
353 struct file *file = get_empty_filp();
354
39d8c1b6
DM
355 *filep = file;
356 if (unlikely(!file)) {
1da177e4 357 put_unused_fd(fd);
39d8c1b6 358 return -ENFILE;
1da177e4 359 }
39d8c1b6
DM
360 } else
361 *filep = NULL;
362 return fd;
363}
1da177e4 364
39d8c1b6
DM
365static int sock_attach_fd(struct socket *sock, struct file *file)
366{
ce8d2cdf 367 struct dentry *dentry;
c23fbb6b 368 struct qstr name = { .name = "" };
39d8c1b6 369
ce8d2cdf
DH
370 dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
371 if (unlikely(!dentry))
39d8c1b6
DM
372 return -ENOMEM;
373
ce8d2cdf 374 dentry->d_op = &sockfs_dentry_operations;
304e61e6
ED
375 /*
376 * We dont want to push this dentry into global dentry hash table.
377 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
378 * This permits a working /proc/$pid/fd/XXX on sockets
379 */
ce8d2cdf
DH
380 dentry->d_flags &= ~DCACHE_UNHASHED;
381 d_instantiate(dentry, SOCK_INODE(sock));
39d8c1b6
DM
382
383 sock->file = file;
ce8d2cdf
DH
384 init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE,
385 &socket_file_ops);
386 SOCK_INODE(sock)->i_fop = &socket_file_ops;
39d8c1b6
DM
387 file->f_flags = O_RDWR;
388 file->f_pos = 0;
389 file->private_data = sock;
1da177e4 390
39d8c1b6
DM
391 return 0;
392}
393
394int sock_map_fd(struct socket *sock)
395{
396 struct file *newfile;
397 int fd = sock_alloc_fd(&newfile);
398
399 if (likely(fd >= 0)) {
400 int err = sock_attach_fd(sock, newfile);
401
402 if (unlikely(err < 0)) {
403 put_filp(newfile);
1da177e4 404 put_unused_fd(fd);
39d8c1b6 405 return err;
1da177e4 406 }
39d8c1b6 407 fd_install(fd, newfile);
1da177e4 408 }
1da177e4
LT
409 return fd;
410}
411
6cb153ca
BL
412static struct socket *sock_from_file(struct file *file, int *err)
413{
6cb153ca
BL
414 if (file->f_op == &socket_file_ops)
415 return file->private_data; /* set in sock_map_fd */
416
23bb80d2
ED
417 *err = -ENOTSOCK;
418 return NULL;
6cb153ca
BL
419}
420
1da177e4
LT
421/**
422 * sockfd_lookup - Go from a file number to its socket slot
423 * @fd: file handle
424 * @err: pointer to an error code return
425 *
426 * The file handle passed in is locked and the socket it is bound
427 * too is returned. If an error occurs the err pointer is overwritten
428 * with a negative errno code and NULL is returned. The function checks
429 * for both invalid handles and passing a handle which is not a socket.
430 *
431 * On a success the socket object pointer is returned.
432 */
433
434struct socket *sockfd_lookup(int fd, int *err)
435{
436 struct file *file;
1da177e4
LT
437 struct socket *sock;
438
89bddce5
SH
439 file = fget(fd);
440 if (!file) {
1da177e4
LT
441 *err = -EBADF;
442 return NULL;
443 }
89bddce5 444
6cb153ca
BL
445 sock = sock_from_file(file, err);
446 if (!sock)
1da177e4 447 fput(file);
6cb153ca
BL
448 return sock;
449}
1da177e4 450
6cb153ca
BL
451static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
452{
453 struct file *file;
454 struct socket *sock;
455
3672558c 456 *err = -EBADF;
6cb153ca
BL
457 file = fget_light(fd, fput_needed);
458 if (file) {
459 sock = sock_from_file(file, err);
460 if (sock)
461 return sock;
462 fput_light(file, *fput_needed);
1da177e4 463 }
6cb153ca 464 return NULL;
1da177e4
LT
465}
466
467/**
468 * sock_alloc - allocate a socket
89bddce5 469 *
1da177e4
LT
470 * Allocate a new inode and socket object. The two are bound together
471 * and initialised. The socket is then returned. If we are out of inodes
472 * NULL is returned.
473 */
474
475static struct socket *sock_alloc(void)
476{
89bddce5
SH
477 struct inode *inode;
478 struct socket *sock;
1da177e4
LT
479
480 inode = new_inode(sock_mnt->mnt_sb);
481 if (!inode)
482 return NULL;
483
484 sock = SOCKET_I(inode);
485
89bddce5 486 inode->i_mode = S_IFSOCK | S_IRWXUGO;
1da177e4
LT
487 inode->i_uid = current->fsuid;
488 inode->i_gid = current->fsgid;
489
490 get_cpu_var(sockets_in_use)++;
491 put_cpu_var(sockets_in_use);
492 return sock;
493}
494
495/*
496 * In theory you can't get an open on this inode, but /proc provides
497 * a back door. Remember to keep it shut otherwise you'll let the
498 * creepy crawlies in.
499 */
89bddce5 500
1da177e4
LT
501static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
502{
503 return -ENXIO;
504}
505
4b6f5d20 506const struct file_operations bad_sock_fops = {
1da177e4
LT
507 .owner = THIS_MODULE,
508 .open = sock_no_open,
509};
510
511/**
512 * sock_release - close a socket
513 * @sock: socket to close
514 *
515 * The socket is released from the protocol stack if it has a release
516 * callback, and the inode is then released if the socket is bound to
89bddce5 517 * an inode not a file.
1da177e4 518 */
89bddce5 519
1da177e4
LT
520void sock_release(struct socket *sock)
521{
522 if (sock->ops) {
523 struct module *owner = sock->ops->owner;
524
525 sock->ops->release(sock);
526 sock->ops = NULL;
527 module_put(owner);
528 }
529
530 if (sock->fasync_list)
531 printk(KERN_ERR "sock_release: fasync list not empty!\n");
532
533 get_cpu_var(sockets_in_use)--;
534 put_cpu_var(sockets_in_use);
535 if (!sock->file) {
536 iput(SOCK_INODE(sock));
537 return;
538 }
89bddce5 539 sock->file = NULL;
1da177e4
LT
540}
541
89bddce5 542static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
543 struct msghdr *msg, size_t size)
544{
545 struct sock_iocb *si = kiocb_to_siocb(iocb);
546 int err;
547
548 si->sock = sock;
549 si->scm = NULL;
550 si->msg = msg;
551 si->size = size;
552
553 err = security_socket_sendmsg(sock, msg, size);
554 if (err)
555 return err;
556
557 return sock->ops->sendmsg(iocb, sock, msg, size);
558}
559
560int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
561{
562 struct kiocb iocb;
563 struct sock_iocb siocb;
564 int ret;
565
566 init_sync_kiocb(&iocb, NULL);
567 iocb.private = &siocb;
568 ret = __sock_sendmsg(&iocb, sock, msg, size);
569 if (-EIOCBQUEUED == ret)
570 ret = wait_on_sync_kiocb(&iocb);
571 return ret;
572}
573
574int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
575 struct kvec *vec, size_t num, size_t size)
576{
577 mm_segment_t oldfs = get_fs();
578 int result;
579
580 set_fs(KERNEL_DS);
581 /*
582 * the following is safe, since for compiler definitions of kvec and
583 * iovec are identical, yielding the same in-core layout and alignment
584 */
89bddce5 585 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
586 msg->msg_iovlen = num;
587 result = sock_sendmsg(sock, msg, size);
588 set_fs(oldfs);
589 return result;
590}
591
92f37fd2
ED
592/*
593 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
594 */
595void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
596 struct sk_buff *skb)
597{
598 ktime_t kt = skb->tstamp;
599
600 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
601 struct timeval tv;
602 /* Race occurred between timestamp enabling and packet
603 receiving. Fill in the current time for now. */
604 if (kt.tv64 == 0)
605 kt = ktime_get_real();
606 skb->tstamp = kt;
607 tv = ktime_to_timeval(kt);
608 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
609 } else {
610 struct timespec ts;
611 /* Race occurred between timestamp enabling and packet
612 receiving. Fill in the current time for now. */
613 if (kt.tv64 == 0)
614 kt = ktime_get_real();
615 skb->tstamp = kt;
616 ts = ktime_to_timespec(kt);
617 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
618 }
619}
620
7c81fd8b
ACM
621EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
622
89bddce5 623static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
624 struct msghdr *msg, size_t size, int flags)
625{
626 int err;
627 struct sock_iocb *si = kiocb_to_siocb(iocb);
628
629 si->sock = sock;
630 si->scm = NULL;
631 si->msg = msg;
632 si->size = size;
633 si->flags = flags;
634
635 err = security_socket_recvmsg(sock, msg, size, flags);
636 if (err)
637 return err;
638
639 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
640}
641
89bddce5 642int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
643 size_t size, int flags)
644{
645 struct kiocb iocb;
646 struct sock_iocb siocb;
647 int ret;
648
89bddce5 649 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
650 iocb.private = &siocb;
651 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
652 if (-EIOCBQUEUED == ret)
653 ret = wait_on_sync_kiocb(&iocb);
654 return ret;
655}
656
89bddce5
SH
657int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
658 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
659{
660 mm_segment_t oldfs = get_fs();
661 int result;
662
663 set_fs(KERNEL_DS);
664 /*
665 * the following is safe, since for compiler definitions of kvec and
666 * iovec are identical, yielding the same in-core layout and alignment
667 */
89bddce5 668 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
669 result = sock_recvmsg(sock, msg, size, flags);
670 set_fs(oldfs);
671 return result;
672}
673
674static void sock_aio_dtor(struct kiocb *iocb)
675{
676 kfree(iocb->private);
677}
678
ce1d4d3e
CH
679static ssize_t sock_sendpage(struct file *file, struct page *page,
680 int offset, size_t size, loff_t *ppos, int more)
1da177e4 681{
1da177e4
LT
682 struct socket *sock;
683 int flags;
684
ce1d4d3e
CH
685 sock = file->private_data;
686
687 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
688 if (more)
689 flags |= MSG_MORE;
690
691 return sock->ops->sendpage(sock, page, offset, size, flags);
692}
1da177e4 693
ce1d4d3e 694static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 695 struct sock_iocb *siocb)
ce1d4d3e
CH
696{
697 if (!is_sync_kiocb(iocb)) {
698 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
699 if (!siocb)
700 return NULL;
1da177e4
LT
701 iocb->ki_dtor = sock_aio_dtor;
702 }
1da177e4 703
ce1d4d3e 704 siocb->kiocb = iocb;
ce1d4d3e
CH
705 iocb->private = siocb;
706 return siocb;
1da177e4
LT
707}
708
ce1d4d3e 709static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
710 struct file *file, const struct iovec *iov,
711 unsigned long nr_segs)
ce1d4d3e
CH
712{
713 struct socket *sock = file->private_data;
714 size_t size = 0;
715 int i;
1da177e4 716
89bddce5
SH
717 for (i = 0; i < nr_segs; i++)
718 size += iov[i].iov_len;
1da177e4 719
ce1d4d3e
CH
720 msg->msg_name = NULL;
721 msg->msg_namelen = 0;
722 msg->msg_control = NULL;
723 msg->msg_controllen = 0;
89bddce5 724 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
725 msg->msg_iovlen = nr_segs;
726 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
727
728 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
729}
730
027445c3
BP
731static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
732 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
733{
734 struct sock_iocb siocb, *x;
735
1da177e4
LT
736 if (pos != 0)
737 return -ESPIPE;
027445c3
BP
738
739 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
740 return 0;
741
027445c3
BP
742
743 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
744 if (!x)
745 return -ENOMEM;
027445c3 746 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
747}
748
ce1d4d3e 749static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
750 struct file *file, const struct iovec *iov,
751 unsigned long nr_segs)
1da177e4 752{
ce1d4d3e
CH
753 struct socket *sock = file->private_data;
754 size_t size = 0;
755 int i;
1da177e4 756
89bddce5
SH
757 for (i = 0; i < nr_segs; i++)
758 size += iov[i].iov_len;
1da177e4 759
ce1d4d3e
CH
760 msg->msg_name = NULL;
761 msg->msg_namelen = 0;
762 msg->msg_control = NULL;
763 msg->msg_controllen = 0;
89bddce5 764 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
765 msg->msg_iovlen = nr_segs;
766 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
767 if (sock->type == SOCK_SEQPACKET)
768 msg->msg_flags |= MSG_EOR;
1da177e4 769
ce1d4d3e 770 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
771}
772
027445c3
BP
773static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
774 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
775{
776 struct sock_iocb siocb, *x;
1da177e4 777
ce1d4d3e
CH
778 if (pos != 0)
779 return -ESPIPE;
027445c3 780
027445c3 781 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
782 if (!x)
783 return -ENOMEM;
1da177e4 784
027445c3 785 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
786}
787
1da177e4
LT
788/*
789 * Atomic setting of ioctl hooks to avoid race
790 * with module unload.
791 */
792
4a3e2f71 793static DEFINE_MUTEX(br_ioctl_mutex);
881d966b 794static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL;
1da177e4 795
881d966b 796void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 797{
4a3e2f71 798 mutex_lock(&br_ioctl_mutex);
1da177e4 799 br_ioctl_hook = hook;
4a3e2f71 800 mutex_unlock(&br_ioctl_mutex);
1da177e4 801}
89bddce5 802
1da177e4
LT
803EXPORT_SYMBOL(brioctl_set);
804
4a3e2f71 805static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 806static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 807
881d966b 808void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 809{
4a3e2f71 810 mutex_lock(&vlan_ioctl_mutex);
1da177e4 811 vlan_ioctl_hook = hook;
4a3e2f71 812 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 813}
89bddce5 814
1da177e4
LT
815EXPORT_SYMBOL(vlan_ioctl_set);
816
4a3e2f71 817static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 818static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 819
89bddce5 820void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 821{
4a3e2f71 822 mutex_lock(&dlci_ioctl_mutex);
1da177e4 823 dlci_ioctl_hook = hook;
4a3e2f71 824 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 825}
89bddce5 826
1da177e4
LT
827EXPORT_SYMBOL(dlci_ioctl_set);
828
829/*
830 * With an ioctl, arg may well be a user mode pointer, but we don't know
831 * what to do with it - that's up to the protocol still.
832 */
833
834static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
835{
836 struct socket *sock;
881d966b 837 struct sock *sk;
1da177e4
LT
838 void __user *argp = (void __user *)arg;
839 int pid, err;
881d966b 840 struct net *net;
1da177e4 841
b69aee04 842 sock = file->private_data;
881d966b
EB
843 sk = sock->sk;
844 net = sk->sk_net;
1da177e4 845 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 846 err = dev_ioctl(net, cmd, argp);
1da177e4 847 } else
d86b5e0e 848#ifdef CONFIG_WIRELESS_EXT
1da177e4 849 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 850 err = dev_ioctl(net, cmd, argp);
1da177e4 851 } else
89bddce5
SH
852#endif /* CONFIG_WIRELESS_EXT */
853 switch (cmd) {
1da177e4
LT
854 case FIOSETOWN:
855 case SIOCSPGRP:
856 err = -EFAULT;
857 if (get_user(pid, (int __user *)argp))
858 break;
859 err = f_setown(sock->file, pid, 1);
860 break;
861 case FIOGETOWN:
862 case SIOCGPGRP:
609d7fa9 863 err = put_user(f_getown(sock->file),
89bddce5 864 (int __user *)argp);
1da177e4
LT
865 break;
866 case SIOCGIFBR:
867 case SIOCSIFBR:
868 case SIOCBRADDBR:
869 case SIOCBRDELBR:
870 err = -ENOPKG;
871 if (!br_ioctl_hook)
872 request_module("bridge");
873
4a3e2f71 874 mutex_lock(&br_ioctl_mutex);
89bddce5 875 if (br_ioctl_hook)
881d966b 876 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 877 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
878 break;
879 case SIOCGIFVLAN:
880 case SIOCSIFVLAN:
881 err = -ENOPKG;
882 if (!vlan_ioctl_hook)
883 request_module("8021q");
884
4a3e2f71 885 mutex_lock(&vlan_ioctl_mutex);
1da177e4 886 if (vlan_ioctl_hook)
881d966b 887 err = vlan_ioctl_hook(net, argp);
4a3e2f71 888 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 889 break;
1da177e4
LT
890 case SIOCADDDLCI:
891 case SIOCDELDLCI:
892 err = -ENOPKG;
893 if (!dlci_ioctl_hook)
894 request_module("dlci");
895
896 if (dlci_ioctl_hook) {
4a3e2f71 897 mutex_lock(&dlci_ioctl_mutex);
1da177e4 898 err = dlci_ioctl_hook(cmd, argp);
4a3e2f71 899 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
900 }
901 break;
902 default:
903 err = sock->ops->ioctl(sock, cmd, arg);
b5e5fa5e
CH
904
905 /*
906 * If this ioctl is unknown try to hand it down
907 * to the NIC driver.
908 */
909 if (err == -ENOIOCTLCMD)
881d966b 910 err = dev_ioctl(net, cmd, argp);
1da177e4 911 break;
89bddce5 912 }
1da177e4
LT
913 return err;
914}
915
916int sock_create_lite(int family, int type, int protocol, struct socket **res)
917{
918 int err;
919 struct socket *sock = NULL;
89bddce5 920
1da177e4
LT
921 err = security_socket_create(family, type, protocol, 1);
922 if (err)
923 goto out;
924
925 sock = sock_alloc();
926 if (!sock) {
927 err = -ENOMEM;
928 goto out;
929 }
930
1da177e4 931 sock->type = type;
7420ed23
VY
932 err = security_socket_post_create(sock, family, type, protocol, 1);
933 if (err)
934 goto out_release;
935
1da177e4
LT
936out:
937 *res = sock;
938 return err;
7420ed23
VY
939out_release:
940 sock_release(sock);
941 sock = NULL;
942 goto out;
1da177e4
LT
943}
944
945/* No kernel lock held - perfect */
89bddce5 946static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
947{
948 struct socket *sock;
949
950 /*
89bddce5 951 * We can't return errors to poll, so it's either yes or no.
1da177e4 952 */
b69aee04 953 sock = file->private_data;
1da177e4
LT
954 return sock->ops->poll(file, sock, wait);
955}
956
89bddce5 957static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 958{
b69aee04 959 struct socket *sock = file->private_data;
1da177e4
LT
960
961 return sock->ops->mmap(file, sock, vma);
962}
963
20380731 964static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
965{
966 /*
89bddce5
SH
967 * It was possible the inode is NULL we were
968 * closing an unfinished socket.
1da177e4
LT
969 */
970
89bddce5 971 if (!inode) {
1da177e4
LT
972 printk(KERN_DEBUG "sock_close: NULL inode\n");
973 return 0;
974 }
975 sock_fasync(-1, filp, 0);
976 sock_release(SOCKET_I(inode));
977 return 0;
978}
979
980/*
981 * Update the socket async list
982 *
983 * Fasync_list locking strategy.
984 *
985 * 1. fasync_list is modified only under process context socket lock
986 * i.e. under semaphore.
987 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
988 * or under socket lock.
989 * 3. fasync_list can be used from softirq context, so that
990 * modification under socket lock have to be enhanced with
991 * write_lock_bh(&sk->sk_callback_lock).
992 * --ANK (990710)
993 */
994
995static int sock_fasync(int fd, struct file *filp, int on)
996{
89bddce5 997 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
998 struct socket *sock;
999 struct sock *sk;
1000
89bddce5 1001 if (on) {
8b3a7005 1002 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 1003 if (fna == NULL)
1da177e4
LT
1004 return -ENOMEM;
1005 }
1006
b69aee04 1007 sock = filp->private_data;
1da177e4 1008
89bddce5
SH
1009 sk = sock->sk;
1010 if (sk == NULL) {
1da177e4
LT
1011 kfree(fna);
1012 return -EINVAL;
1013 }
1014
1015 lock_sock(sk);
1016
89bddce5 1017 prev = &(sock->fasync_list);
1da177e4 1018
89bddce5
SH
1019 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1020 if (fa->fa_file == filp)
1da177e4
LT
1021 break;
1022
89bddce5
SH
1023 if (on) {
1024 if (fa != NULL) {
1da177e4 1025 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1026 fa->fa_fd = fd;
1da177e4
LT
1027 write_unlock_bh(&sk->sk_callback_lock);
1028
1029 kfree(fna);
1030 goto out;
1031 }
89bddce5
SH
1032 fna->fa_file = filp;
1033 fna->fa_fd = fd;
1034 fna->magic = FASYNC_MAGIC;
1035 fna->fa_next = sock->fasync_list;
1da177e4 1036 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1037 sock->fasync_list = fna;
1da177e4 1038 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1039 } else {
1040 if (fa != NULL) {
1da177e4 1041 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1042 *prev = fa->fa_next;
1da177e4
LT
1043 write_unlock_bh(&sk->sk_callback_lock);
1044 kfree(fa);
1045 }
1046 }
1047
1048out:
1049 release_sock(sock->sk);
1050 return 0;
1051}
1052
1053/* This function may be called only under socket lock or callback_lock */
1054
1055int sock_wake_async(struct socket *sock, int how, int band)
1056{
1057 if (!sock || !sock->fasync_list)
1058 return -1;
89bddce5 1059 switch (how) {
1da177e4 1060 case 1:
89bddce5 1061
1da177e4
LT
1062 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1063 break;
1064 goto call_kill;
1065 case 2:
1066 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1067 break;
1068 /* fall through */
1069 case 0:
89bddce5 1070call_kill:
1da177e4
LT
1071 __kill_fasync(sock->fasync_list, SIGIO, band);
1072 break;
1073 case 3:
1074 __kill_fasync(sock->fasync_list, SIGURG, band);
1075 }
1076 return 0;
1077}
1078
1b8d7ae4 1079static int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1080 struct socket **res, int kern)
1da177e4
LT
1081{
1082 int err;
1083 struct socket *sock;
55737fda 1084 const struct net_proto_family *pf;
1da177e4
LT
1085
1086 /*
89bddce5 1087 * Check protocol is in range
1da177e4
LT
1088 */
1089 if (family < 0 || family >= NPROTO)
1090 return -EAFNOSUPPORT;
1091 if (type < 0 || type >= SOCK_MAX)
1092 return -EINVAL;
1093
1094 /* Compatibility.
1095
1096 This uglymoron is moved from INET layer to here to avoid
1097 deadlock in module load.
1098 */
1099 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1100 static int warned;
1da177e4
LT
1101 if (!warned) {
1102 warned = 1;
89bddce5
SH
1103 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1104 current->comm);
1da177e4
LT
1105 }
1106 family = PF_PACKET;
1107 }
1108
1109 err = security_socket_create(family, type, protocol, kern);
1110 if (err)
1111 return err;
89bddce5 1112
55737fda
SH
1113 /*
1114 * Allocate the socket and allow the family to set things up. if
1115 * the protocol is 0, the family is instructed to select an appropriate
1116 * default.
1117 */
1118 sock = sock_alloc();
1119 if (!sock) {
1120 if (net_ratelimit())
1121 printk(KERN_WARNING "socket: no more sockets\n");
1122 return -ENFILE; /* Not exactly a match, but its the
1123 closest posix thing */
1124 }
1125
1126 sock->type = type;
1127
1da177e4 1128#if defined(CONFIG_KMOD)
89bddce5
SH
1129 /* Attempt to load a protocol module if the find failed.
1130 *
1131 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1132 * requested real, full-featured networking support upon configuration.
1133 * Otherwise module support will break!
1134 */
55737fda 1135 if (net_families[family] == NULL)
89bddce5 1136 request_module("net-pf-%d", family);
1da177e4
LT
1137#endif
1138
55737fda
SH
1139 rcu_read_lock();
1140 pf = rcu_dereference(net_families[family]);
1141 err = -EAFNOSUPPORT;
1142 if (!pf)
1143 goto out_release;
1da177e4
LT
1144
1145 /*
1146 * We will call the ->create function, that possibly is in a loadable
1147 * module, so we have to bump that loadable module refcnt first.
1148 */
55737fda 1149 if (!try_module_get(pf->owner))
1da177e4
LT
1150 goto out_release;
1151
55737fda
SH
1152 /* Now protected by module ref count */
1153 rcu_read_unlock();
1154
1b8d7ae4 1155 err = pf->create(net, sock, protocol);
55737fda 1156 if (err < 0)
1da177e4 1157 goto out_module_put;
a79af59e 1158
1da177e4
LT
1159 /*
1160 * Now to bump the refcnt of the [loadable] module that owns this
1161 * socket at sock_release time we decrement its refcnt.
1162 */
55737fda
SH
1163 if (!try_module_get(sock->ops->owner))
1164 goto out_module_busy;
1165
1da177e4
LT
1166 /*
1167 * Now that we're done with the ->create function, the [loadable]
1168 * module can have its refcnt decremented
1169 */
55737fda 1170 module_put(pf->owner);
7420ed23
VY
1171 err = security_socket_post_create(sock, family, type, protocol, kern);
1172 if (err)
3b185525 1173 goto out_sock_release;
55737fda 1174 *res = sock;
1da177e4 1175
55737fda
SH
1176 return 0;
1177
1178out_module_busy:
1179 err = -EAFNOSUPPORT;
1da177e4 1180out_module_put:
55737fda
SH
1181 sock->ops = NULL;
1182 module_put(pf->owner);
1183out_sock_release:
1da177e4 1184 sock_release(sock);
55737fda
SH
1185 return err;
1186
1187out_release:
1188 rcu_read_unlock();
1189 goto out_sock_release;
1da177e4
LT
1190}
1191
1192int sock_create(int family, int type, int protocol, struct socket **res)
1193{
1b8d7ae4 1194 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4
LT
1195}
1196
1197int sock_create_kern(int family, int type, int protocol, struct socket **res)
1198{
1b8d7ae4 1199 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4
LT
1200}
1201
1202asmlinkage long sys_socket(int family, int type, int protocol)
1203{
1204 int retval;
1205 struct socket *sock;
1206
1207 retval = sock_create(family, type, protocol, &sock);
1208 if (retval < 0)
1209 goto out;
1210
1211 retval = sock_map_fd(sock);
1212 if (retval < 0)
1213 goto out_release;
1214
1215out:
1216 /* It may be already another descriptor 8) Not kernel problem. */
1217 return retval;
1218
1219out_release:
1220 sock_release(sock);
1221 return retval;
1222}
1223
1224/*
1225 * Create a pair of connected sockets.
1226 */
1227
89bddce5
SH
1228asmlinkage long sys_socketpair(int family, int type, int protocol,
1229 int __user *usockvec)
1da177e4
LT
1230{
1231 struct socket *sock1, *sock2;
1232 int fd1, fd2, err;
db349509 1233 struct file *newfile1, *newfile2;
1da177e4
LT
1234
1235 /*
1236 * Obtain the first socket and check if the underlying protocol
1237 * supports the socketpair call.
1238 */
1239
1240 err = sock_create(family, type, protocol, &sock1);
1241 if (err < 0)
1242 goto out;
1243
1244 err = sock_create(family, type, protocol, &sock2);
1245 if (err < 0)
1246 goto out_release_1;
1247
1248 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1249 if (err < 0)
1da177e4
LT
1250 goto out_release_both;
1251
db349509 1252 fd1 = sock_alloc_fd(&newfile1);
bf3c23d1
DM
1253 if (unlikely(fd1 < 0)) {
1254 err = fd1;
db349509 1255 goto out_release_both;
bf3c23d1 1256 }
1da177e4 1257
db349509
AV
1258 fd2 = sock_alloc_fd(&newfile2);
1259 if (unlikely(fd2 < 0)) {
bf3c23d1 1260 err = fd2;
db349509
AV
1261 put_filp(newfile1);
1262 put_unused_fd(fd1);
1da177e4 1263 goto out_release_both;
db349509 1264 }
1da177e4 1265
db349509
AV
1266 err = sock_attach_fd(sock1, newfile1);
1267 if (unlikely(err < 0)) {
1268 goto out_fd2;
1269 }
1270
1271 err = sock_attach_fd(sock2, newfile2);
1272 if (unlikely(err < 0)) {
1273 fput(newfile1);
1274 goto out_fd1;
1275 }
1276
1277 err = audit_fd_pair(fd1, fd2);
1278 if (err < 0) {
1279 fput(newfile1);
1280 fput(newfile2);
1281 goto out_fd;
1282 }
1da177e4 1283
db349509
AV
1284 fd_install(fd1, newfile1);
1285 fd_install(fd2, newfile2);
1da177e4
LT
1286 /* fd1 and fd2 may be already another descriptors.
1287 * Not kernel problem.
1288 */
1289
89bddce5 1290 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1291 if (!err)
1292 err = put_user(fd2, &usockvec[1]);
1293 if (!err)
1294 return 0;
1295
1296 sys_close(fd2);
1297 sys_close(fd1);
1298 return err;
1299
1da177e4 1300out_release_both:
89bddce5 1301 sock_release(sock2);
1da177e4 1302out_release_1:
89bddce5 1303 sock_release(sock1);
1da177e4
LT
1304out:
1305 return err;
db349509
AV
1306
1307out_fd2:
1308 put_filp(newfile1);
1309 sock_release(sock1);
1310out_fd1:
1311 put_filp(newfile2);
1312 sock_release(sock2);
1313out_fd:
1314 put_unused_fd(fd1);
1315 put_unused_fd(fd2);
1316 goto out;
1da177e4
LT
1317}
1318
1da177e4
LT
1319/*
1320 * Bind a name to a socket. Nothing much to do here since it's
1321 * the protocol's responsibility to handle the local address.
1322 *
1323 * We move the socket address to kernel space before we call
1324 * the protocol layer (having also checked the address is ok).
1325 */
1326
1327asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1328{
1329 struct socket *sock;
1330 char address[MAX_SOCK_ADDR];
6cb153ca 1331 int err, fput_needed;
1da177e4 1332
89bddce5 1333 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1334 if (sock) {
89bddce5
SH
1335 err = move_addr_to_kernel(umyaddr, addrlen, address);
1336 if (err >= 0) {
1337 err = security_socket_bind(sock,
1338 (struct sockaddr *)address,
1339 addrlen);
6cb153ca
BL
1340 if (!err)
1341 err = sock->ops->bind(sock,
89bddce5
SH
1342 (struct sockaddr *)
1343 address, addrlen);
1da177e4 1344 }
6cb153ca 1345 fput_light(sock->file, fput_needed);
89bddce5 1346 }
1da177e4
LT
1347 return err;
1348}
1349
1da177e4
LT
1350/*
1351 * Perform a listen. Basically, we allow the protocol to do anything
1352 * necessary for a listen, and if that works, we mark the socket as
1353 * ready for listening.
1354 */
1355
7a42c217 1356int sysctl_somaxconn __read_mostly = SOMAXCONN;
1da177e4
LT
1357
1358asmlinkage long sys_listen(int fd, int backlog)
1359{
1360 struct socket *sock;
6cb153ca 1361 int err, fput_needed;
89bddce5
SH
1362
1363 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1364 if (sock) {
1365 if ((unsigned)backlog > sysctl_somaxconn)
1da177e4
LT
1366 backlog = sysctl_somaxconn;
1367
1368 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1369 if (!err)
1370 err = sock->ops->listen(sock, backlog);
1da177e4 1371
6cb153ca 1372 fput_light(sock->file, fput_needed);
1da177e4
LT
1373 }
1374 return err;
1375}
1376
1da177e4
LT
1377/*
1378 * For accept, we attempt to create a new socket, set up the link
1379 * with the client, wake up the client, then return the new
1380 * connected fd. We collect the address of the connector in kernel
1381 * space and move it to user at the very end. This is unclean because
1382 * we open the socket then return an error.
1383 *
1384 * 1003.1g adds the ability to recvmsg() to query connection pending
1385 * status to recvmsg. We need to add that support in a way thats
1386 * clean when we restucture accept also.
1387 */
1388
89bddce5
SH
1389asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1390 int __user *upeer_addrlen)
1da177e4
LT
1391{
1392 struct socket *sock, *newsock;
39d8c1b6 1393 struct file *newfile;
6cb153ca 1394 int err, len, newfd, fput_needed;
1da177e4
LT
1395 char address[MAX_SOCK_ADDR];
1396
6cb153ca 1397 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1398 if (!sock)
1399 goto out;
1400
1401 err = -ENFILE;
89bddce5 1402 if (!(newsock = sock_alloc()))
1da177e4
LT
1403 goto out_put;
1404
1405 newsock->type = sock->type;
1406 newsock->ops = sock->ops;
1407
1da177e4
LT
1408 /*
1409 * We don't need try_module_get here, as the listening socket (sock)
1410 * has the protocol module (sock->ops->owner) held.
1411 */
1412 __module_get(newsock->ops->owner);
1413
39d8c1b6
DM
1414 newfd = sock_alloc_fd(&newfile);
1415 if (unlikely(newfd < 0)) {
1416 err = newfd;
9a1875e6
DM
1417 sock_release(newsock);
1418 goto out_put;
39d8c1b6
DM
1419 }
1420
1421 err = sock_attach_fd(newsock, newfile);
1422 if (err < 0)
79f4f642 1423 goto out_fd_simple;
39d8c1b6 1424
a79af59e
FF
1425 err = security_socket_accept(sock, newsock);
1426 if (err)
39d8c1b6 1427 goto out_fd;
a79af59e 1428
1da177e4
LT
1429 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1430 if (err < 0)
39d8c1b6 1431 goto out_fd;
1da177e4
LT
1432
1433 if (upeer_sockaddr) {
89bddce5
SH
1434 if (newsock->ops->getname(newsock, (struct sockaddr *)address,
1435 &len, 2) < 0) {
1da177e4 1436 err = -ECONNABORTED;
39d8c1b6 1437 goto out_fd;
1da177e4 1438 }
89bddce5
SH
1439 err = move_addr_to_user(address, len, upeer_sockaddr,
1440 upeer_addrlen);
1da177e4 1441 if (err < 0)
39d8c1b6 1442 goto out_fd;
1da177e4
LT
1443 }
1444
1445 /* File flags are not inherited via accept() unlike another OSes. */
1446
39d8c1b6
DM
1447 fd_install(newfd, newfile);
1448 err = newfd;
1da177e4
LT
1449
1450 security_socket_post_accept(sock, newsock);
1451
1452out_put:
6cb153ca 1453 fput_light(sock->file, fput_needed);
1da177e4
LT
1454out:
1455 return err;
79f4f642
AD
1456out_fd_simple:
1457 sock_release(newsock);
1458 put_filp(newfile);
1459 put_unused_fd(newfd);
1460 goto out_put;
39d8c1b6 1461out_fd:
9606a216 1462 fput(newfile);
39d8c1b6 1463 put_unused_fd(newfd);
1da177e4
LT
1464 goto out_put;
1465}
1466
1da177e4
LT
1467/*
1468 * Attempt to connect to a socket with the server address. The address
1469 * is in user space so we verify it is OK and move it to kernel space.
1470 *
1471 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1472 * break bindings
1473 *
1474 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1475 * other SEQPACKET protocols that take time to connect() as it doesn't
1476 * include the -EINPROGRESS status for such sockets.
1477 */
1478
89bddce5
SH
1479asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
1480 int addrlen)
1da177e4
LT
1481{
1482 struct socket *sock;
1483 char address[MAX_SOCK_ADDR];
6cb153ca 1484 int err, fput_needed;
1da177e4 1485
6cb153ca 1486 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1487 if (!sock)
1488 goto out;
1489 err = move_addr_to_kernel(uservaddr, addrlen, address);
1490 if (err < 0)
1491 goto out_put;
1492
89bddce5
SH
1493 err =
1494 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4
LT
1495 if (err)
1496 goto out_put;
1497
89bddce5 1498 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
1da177e4
LT
1499 sock->file->f_flags);
1500out_put:
6cb153ca 1501 fput_light(sock->file, fput_needed);
1da177e4
LT
1502out:
1503 return err;
1504}
1505
1506/*
1507 * Get the local address ('name') of a socket object. Move the obtained
1508 * name to user space.
1509 */
1510
89bddce5
SH
1511asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1512 int __user *usockaddr_len)
1da177e4
LT
1513{
1514 struct socket *sock;
1515 char address[MAX_SOCK_ADDR];
6cb153ca 1516 int len, err, fput_needed;
89bddce5 1517
6cb153ca 1518 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1519 if (!sock)
1520 goto out;
1521
1522 err = security_socket_getsockname(sock);
1523 if (err)
1524 goto out_put;
1525
1526 err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
1527 if (err)
1528 goto out_put;
1529 err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
1530
1531out_put:
6cb153ca 1532 fput_light(sock->file, fput_needed);
1da177e4
LT
1533out:
1534 return err;
1535}
1536
1537/*
1538 * Get the remote address ('name') of a socket object. Move the obtained
1539 * name to user space.
1540 */
1541
89bddce5
SH
1542asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1543 int __user *usockaddr_len)
1da177e4
LT
1544{
1545 struct socket *sock;
1546 char address[MAX_SOCK_ADDR];
6cb153ca 1547 int len, err, fput_needed;
1da177e4 1548
89bddce5
SH
1549 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1550 if (sock != NULL) {
1da177e4
LT
1551 err = security_socket_getpeername(sock);
1552 if (err) {
6cb153ca 1553 fput_light(sock->file, fput_needed);
1da177e4
LT
1554 return err;
1555 }
1556
89bddce5
SH
1557 err =
1558 sock->ops->getname(sock, (struct sockaddr *)address, &len,
1559 1);
1da177e4 1560 if (!err)
89bddce5
SH
1561 err = move_addr_to_user(address, len, usockaddr,
1562 usockaddr_len);
6cb153ca 1563 fput_light(sock->file, fput_needed);
1da177e4
LT
1564 }
1565 return err;
1566}
1567
1568/*
1569 * Send a datagram to a given address. We move the address into kernel
1570 * space and check the user space data area is readable before invoking
1571 * the protocol.
1572 */
1573
89bddce5
SH
1574asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1575 unsigned flags, struct sockaddr __user *addr,
1576 int addr_len)
1da177e4
LT
1577{
1578 struct socket *sock;
1579 char address[MAX_SOCK_ADDR];
1580 int err;
1581 struct msghdr msg;
1582 struct iovec iov;
6cb153ca
BL
1583 int fput_needed;
1584 struct file *sock_file;
1585
1586 sock_file = fget_light(fd, &fput_needed);
4387ff75 1587 err = -EBADF;
6cb153ca 1588 if (!sock_file)
4387ff75 1589 goto out;
6cb153ca
BL
1590
1591 sock = sock_from_file(sock_file, &err);
1da177e4 1592 if (!sock)
6cb153ca 1593 goto out_put;
89bddce5
SH
1594 iov.iov_base = buff;
1595 iov.iov_len = len;
1596 msg.msg_name = NULL;
1597 msg.msg_iov = &iov;
1598 msg.msg_iovlen = 1;
1599 msg.msg_control = NULL;
1600 msg.msg_controllen = 0;
1601 msg.msg_namelen = 0;
6cb153ca 1602 if (addr) {
1da177e4
LT
1603 err = move_addr_to_kernel(addr, addr_len, address);
1604 if (err < 0)
1605 goto out_put;
89bddce5
SH
1606 msg.msg_name = address;
1607 msg.msg_namelen = addr_len;
1da177e4
LT
1608 }
1609 if (sock->file->f_flags & O_NONBLOCK)
1610 flags |= MSG_DONTWAIT;
1611 msg.msg_flags = flags;
1612 err = sock_sendmsg(sock, &msg, len);
1613
89bddce5 1614out_put:
6cb153ca 1615 fput_light(sock_file, fput_needed);
4387ff75 1616out:
1da177e4
LT
1617 return err;
1618}
1619
1620/*
89bddce5 1621 * Send a datagram down a socket.
1da177e4
LT
1622 */
1623
89bddce5 1624asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
1da177e4
LT
1625{
1626 return sys_sendto(fd, buff, len, flags, NULL, 0);
1627}
1628
1629/*
89bddce5 1630 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1631 * sender. We verify the buffers are writable and if needed move the
1632 * sender address from kernel to user space.
1633 */
1634
89bddce5
SH
1635asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1636 unsigned flags, struct sockaddr __user *addr,
1637 int __user *addr_len)
1da177e4
LT
1638{
1639 struct socket *sock;
1640 struct iovec iov;
1641 struct msghdr msg;
1642 char address[MAX_SOCK_ADDR];
89bddce5 1643 int err, err2;
6cb153ca
BL
1644 struct file *sock_file;
1645 int fput_needed;
1646
1647 sock_file = fget_light(fd, &fput_needed);
4387ff75 1648 err = -EBADF;
6cb153ca 1649 if (!sock_file)
4387ff75 1650 goto out;
1da177e4 1651
6cb153ca 1652 sock = sock_from_file(sock_file, &err);
1da177e4 1653 if (!sock)
4387ff75 1654 goto out_put;
1da177e4 1655
89bddce5
SH
1656 msg.msg_control = NULL;
1657 msg.msg_controllen = 0;
1658 msg.msg_iovlen = 1;
1659 msg.msg_iov = &iov;
1660 iov.iov_len = size;
1661 iov.iov_base = ubuf;
1662 msg.msg_name = address;
1663 msg.msg_namelen = MAX_SOCK_ADDR;
1da177e4
LT
1664 if (sock->file->f_flags & O_NONBLOCK)
1665 flags |= MSG_DONTWAIT;
89bddce5 1666 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1667
89bddce5
SH
1668 if (err >= 0 && addr != NULL) {
1669 err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1670 if (err2 < 0)
1671 err = err2;
1da177e4 1672 }
4387ff75 1673out_put:
6cb153ca 1674 fput_light(sock_file, fput_needed);
4387ff75 1675out:
1da177e4
LT
1676 return err;
1677}
1678
1679/*
89bddce5 1680 * Receive a datagram from a socket.
1da177e4
LT
1681 */
1682
89bddce5
SH
1683asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1684 unsigned flags)
1da177e4
LT
1685{
1686 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1687}
1688
1689/*
1690 * Set a socket option. Because we don't know the option lengths we have
1691 * to pass the user mode parameter for the protocols to sort out.
1692 */
1693
89bddce5
SH
1694asmlinkage long sys_setsockopt(int fd, int level, int optname,
1695 char __user *optval, int optlen)
1da177e4 1696{
6cb153ca 1697 int err, fput_needed;
1da177e4
LT
1698 struct socket *sock;
1699
1700 if (optlen < 0)
1701 return -EINVAL;
89bddce5
SH
1702
1703 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1704 if (sock != NULL) {
1705 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1706 if (err)
1707 goto out_put;
1da177e4
LT
1708
1709 if (level == SOL_SOCKET)
89bddce5
SH
1710 err =
1711 sock_setsockopt(sock, level, optname, optval,
1712 optlen);
1da177e4 1713 else
89bddce5
SH
1714 err =
1715 sock->ops->setsockopt(sock, level, optname, optval,
1716 optlen);
6cb153ca
BL
1717out_put:
1718 fput_light(sock->file, fput_needed);
1da177e4
LT
1719 }
1720 return err;
1721}
1722
1723/*
1724 * Get a socket option. Because we don't know the option lengths we have
1725 * to pass a user mode parameter for the protocols to sort out.
1726 */
1727
89bddce5
SH
1728asmlinkage long sys_getsockopt(int fd, int level, int optname,
1729 char __user *optval, int __user *optlen)
1da177e4 1730{
6cb153ca 1731 int err, fput_needed;
1da177e4
LT
1732 struct socket *sock;
1733
89bddce5
SH
1734 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1735 if (sock != NULL) {
6cb153ca
BL
1736 err = security_socket_getsockopt(sock, level, optname);
1737 if (err)
1738 goto out_put;
1da177e4
LT
1739
1740 if (level == SOL_SOCKET)
89bddce5
SH
1741 err =
1742 sock_getsockopt(sock, level, optname, optval,
1743 optlen);
1da177e4 1744 else
89bddce5
SH
1745 err =
1746 sock->ops->getsockopt(sock, level, optname, optval,
1747 optlen);
6cb153ca
BL
1748out_put:
1749 fput_light(sock->file, fput_needed);
1da177e4
LT
1750 }
1751 return err;
1752}
1753
1da177e4
LT
1754/*
1755 * Shutdown a socket.
1756 */
1757
1758asmlinkage long sys_shutdown(int fd, int how)
1759{
6cb153ca 1760 int err, fput_needed;
1da177e4
LT
1761 struct socket *sock;
1762
89bddce5
SH
1763 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1764 if (sock != NULL) {
1da177e4 1765 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1766 if (!err)
1767 err = sock->ops->shutdown(sock, how);
1768 fput_light(sock->file, fput_needed);
1da177e4
LT
1769 }
1770 return err;
1771}
1772
89bddce5 1773/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1774 * fields which are the same type (int / unsigned) on our platforms.
1775 */
1776#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1777#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1778#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1779
1da177e4
LT
1780/*
1781 * BSD sendmsg interface
1782 */
1783
1784asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1785{
89bddce5
SH
1786 struct compat_msghdr __user *msg_compat =
1787 (struct compat_msghdr __user *)msg;
1da177e4
LT
1788 struct socket *sock;
1789 char address[MAX_SOCK_ADDR];
1790 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1791 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1792 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1793 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1794 unsigned char *ctl_buf = ctl;
1795 struct msghdr msg_sys;
1796 int err, ctl_len, iov_size, total_len;
6cb153ca 1797 int fput_needed;
89bddce5 1798
1da177e4
LT
1799 err = -EFAULT;
1800 if (MSG_CMSG_COMPAT & flags) {
1801 if (get_compat_msghdr(&msg_sys, msg_compat))
1802 return -EFAULT;
89bddce5
SH
1803 }
1804 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1805 return -EFAULT;
1806
6cb153ca 1807 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1808 if (!sock)
1da177e4
LT
1809 goto out;
1810
1811 /* do not move before msg_sys is valid */
1812 err = -EMSGSIZE;
1813 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1814 goto out_put;
1815
89bddce5 1816 /* Check whether to allocate the iovec area */
1da177e4
LT
1817 err = -ENOMEM;
1818 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1819 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1820 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1821 if (!iov)
1822 goto out_put;
1823 }
1824
1825 /* This will also move the address data into kernel space */
1826 if (MSG_CMSG_COMPAT & flags) {
1827 err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
1828 } else
1829 err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
89bddce5 1830 if (err < 0)
1da177e4
LT
1831 goto out_freeiov;
1832 total_len = err;
1833
1834 err = -ENOBUFS;
1835
1836 if (msg_sys.msg_controllen > INT_MAX)
1837 goto out_freeiov;
89bddce5 1838 ctl_len = msg_sys.msg_controllen;
1da177e4 1839 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1840 err =
1841 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1842 sizeof(ctl));
1da177e4
LT
1843 if (err)
1844 goto out_freeiov;
1845 ctl_buf = msg_sys.msg_control;
8920e8f9 1846 ctl_len = msg_sys.msg_controllen;
1da177e4 1847 } else if (ctl_len) {
89bddce5 1848 if (ctl_len > sizeof(ctl)) {
1da177e4 1849 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1850 if (ctl_buf == NULL)
1da177e4
LT
1851 goto out_freeiov;
1852 }
1853 err = -EFAULT;
1854 /*
1855 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1856 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1857 * checking falls down on this.
1858 */
89bddce5
SH
1859 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1860 ctl_len))
1da177e4
LT
1861 goto out_freectl;
1862 msg_sys.msg_control = ctl_buf;
1863 }
1864 msg_sys.msg_flags = flags;
1865
1866 if (sock->file->f_flags & O_NONBLOCK)
1867 msg_sys.msg_flags |= MSG_DONTWAIT;
1868 err = sock_sendmsg(sock, &msg_sys, total_len);
1869
1870out_freectl:
89bddce5 1871 if (ctl_buf != ctl)
1da177e4
LT
1872 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1873out_freeiov:
1874 if (iov != iovstack)
1875 sock_kfree_s(sock->sk, iov, iov_size);
1876out_put:
6cb153ca 1877 fput_light(sock->file, fput_needed);
89bddce5 1878out:
1da177e4
LT
1879 return err;
1880}
1881
1882/*
1883 * BSD recvmsg interface
1884 */
1885
89bddce5
SH
1886asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
1887 unsigned int flags)
1da177e4 1888{
89bddce5
SH
1889 struct compat_msghdr __user *msg_compat =
1890 (struct compat_msghdr __user *)msg;
1da177e4
LT
1891 struct socket *sock;
1892 struct iovec iovstack[UIO_FASTIOV];
89bddce5 1893 struct iovec *iov = iovstack;
1da177e4
LT
1894 struct msghdr msg_sys;
1895 unsigned long cmsg_ptr;
1896 int err, iov_size, total_len, len;
6cb153ca 1897 int fput_needed;
1da177e4
LT
1898
1899 /* kernel mode address */
1900 char addr[MAX_SOCK_ADDR];
1901
1902 /* user mode address pointers */
1903 struct sockaddr __user *uaddr;
1904 int __user *uaddr_len;
89bddce5 1905
1da177e4
LT
1906 if (MSG_CMSG_COMPAT & flags) {
1907 if (get_compat_msghdr(&msg_sys, msg_compat))
1908 return -EFAULT;
89bddce5
SH
1909 }
1910 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1911 return -EFAULT;
1da177e4 1912
6cb153ca 1913 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1914 if (!sock)
1915 goto out;
1916
1917 err = -EMSGSIZE;
1918 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1919 goto out_put;
89bddce5
SH
1920
1921 /* Check whether to allocate the iovec area */
1da177e4
LT
1922 err = -ENOMEM;
1923 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1924 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1925 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1926 if (!iov)
1927 goto out_put;
1928 }
1929
1930 /*
89bddce5
SH
1931 * Save the user-mode address (verify_iovec will change the
1932 * kernel msghdr to use the kernel address space)
1da177e4 1933 */
89bddce5 1934
cfcabdcc 1935 uaddr = (__force void __user *)msg_sys.msg_name;
1da177e4
LT
1936 uaddr_len = COMPAT_NAMELEN(msg);
1937 if (MSG_CMSG_COMPAT & flags) {
1938 err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1939 } else
1940 err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1941 if (err < 0)
1942 goto out_freeiov;
89bddce5 1943 total_len = err;
1da177e4
LT
1944
1945 cmsg_ptr = (unsigned long)msg_sys.msg_control;
4a19542e 1946 msg_sys.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 1947
1da177e4
LT
1948 if (sock->file->f_flags & O_NONBLOCK)
1949 flags |= MSG_DONTWAIT;
1950 err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1951 if (err < 0)
1952 goto out_freeiov;
1953 len = err;
1954
1955 if (uaddr != NULL) {
89bddce5
SH
1956 err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
1957 uaddr_len);
1da177e4
LT
1958 if (err < 0)
1959 goto out_freeiov;
1960 }
37f7f421
DM
1961 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
1962 COMPAT_FLAGS(msg));
1da177e4
LT
1963 if (err)
1964 goto out_freeiov;
1965 if (MSG_CMSG_COMPAT & flags)
89bddce5 1966 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1967 &msg_compat->msg_controllen);
1968 else
89bddce5 1969 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1970 &msg->msg_controllen);
1971 if (err)
1972 goto out_freeiov;
1973 err = len;
1974
1975out_freeiov:
1976 if (iov != iovstack)
1977 sock_kfree_s(sock->sk, iov, iov_size);
1978out_put:
6cb153ca 1979 fput_light(sock->file, fput_needed);
1da177e4
LT
1980out:
1981 return err;
1982}
1983
1984#ifdef __ARCH_WANT_SYS_SOCKETCALL
1985
1986/* Argument list sizes for sys_socketcall */
1987#define AL(x) ((x) * sizeof(unsigned long))
89bddce5
SH
1988static const unsigned char nargs[18]={
1989 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1990 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1991 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
1992};
1993
1da177e4
LT
1994#undef AL
1995
1996/*
89bddce5 1997 * System call vectors.
1da177e4
LT
1998 *
1999 * Argument checking cleaned up. Saved 20% in size.
2000 * This function doesn't need to set the kernel lock because
89bddce5 2001 * it is set by the callees.
1da177e4
LT
2002 */
2003
2004asmlinkage long sys_socketcall(int call, unsigned long __user *args)
2005{
2006 unsigned long a[6];
89bddce5 2007 unsigned long a0, a1;
1da177e4
LT
2008 int err;
2009
89bddce5 2010 if (call < 1 || call > SYS_RECVMSG)
1da177e4
LT
2011 return -EINVAL;
2012
2013 /* copy_from_user should be SMP safe. */
2014 if (copy_from_user(a, args, nargs[call]))
2015 return -EFAULT;
3ec3b2fb 2016
89bddce5 2017 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb
DW
2018 if (err)
2019 return err;
2020
89bddce5
SH
2021 a0 = a[0];
2022 a1 = a[1];
2023
2024 switch (call) {
2025 case SYS_SOCKET:
2026 err = sys_socket(a0, a1, a[2]);
2027 break;
2028 case SYS_BIND:
2029 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2030 break;
2031 case SYS_CONNECT:
2032 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2033 break;
2034 case SYS_LISTEN:
2035 err = sys_listen(a0, a1);
2036 break;
2037 case SYS_ACCEPT:
2038 err =
2039 sys_accept(a0, (struct sockaddr __user *)a1,
2040 (int __user *)a[2]);
2041 break;
2042 case SYS_GETSOCKNAME:
2043 err =
2044 sys_getsockname(a0, (struct sockaddr __user *)a1,
2045 (int __user *)a[2]);
2046 break;
2047 case SYS_GETPEERNAME:
2048 err =
2049 sys_getpeername(a0, (struct sockaddr __user *)a1,
2050 (int __user *)a[2]);
2051 break;
2052 case SYS_SOCKETPAIR:
2053 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2054 break;
2055 case SYS_SEND:
2056 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2057 break;
2058 case SYS_SENDTO:
2059 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2060 (struct sockaddr __user *)a[4], a[5]);
2061 break;
2062 case SYS_RECV:
2063 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2064 break;
2065 case SYS_RECVFROM:
2066 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2067 (struct sockaddr __user *)a[4],
2068 (int __user *)a[5]);
2069 break;
2070 case SYS_SHUTDOWN:
2071 err = sys_shutdown(a0, a1);
2072 break;
2073 case SYS_SETSOCKOPT:
2074 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2075 break;
2076 case SYS_GETSOCKOPT:
2077 err =
2078 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2079 (int __user *)a[4]);
2080 break;
2081 case SYS_SENDMSG:
2082 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2083 break;
2084 case SYS_RECVMSG:
2085 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2086 break;
2087 default:
2088 err = -EINVAL;
2089 break;
1da177e4
LT
2090 }
2091 return err;
2092}
2093
89bddce5 2094#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2095
55737fda
SH
2096/**
2097 * sock_register - add a socket protocol handler
2098 * @ops: description of protocol
2099 *
1da177e4
LT
2100 * This function is called by a protocol handler that wants to
2101 * advertise its address family, and have it linked into the
55737fda
SH
2102 * socket interface. The value ops->family coresponds to the
2103 * socket system call protocol family.
1da177e4 2104 */
f0fd27d4 2105int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2106{
2107 int err;
2108
2109 if (ops->family >= NPROTO) {
89bddce5
SH
2110 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2111 NPROTO);
1da177e4
LT
2112 return -ENOBUFS;
2113 }
55737fda
SH
2114
2115 spin_lock(&net_family_lock);
2116 if (net_families[ops->family])
2117 err = -EEXIST;
2118 else {
89bddce5 2119 net_families[ops->family] = ops;
1da177e4
LT
2120 err = 0;
2121 }
55737fda
SH
2122 spin_unlock(&net_family_lock);
2123
89bddce5 2124 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2125 return err;
2126}
2127
55737fda
SH
2128/**
2129 * sock_unregister - remove a protocol handler
2130 * @family: protocol family to remove
2131 *
1da177e4
LT
2132 * This function is called by a protocol handler that wants to
2133 * remove its address family, and have it unlinked from the
55737fda
SH
2134 * new socket creation.
2135 *
2136 * If protocol handler is a module, then it can use module reference
2137 * counts to protect against new references. If protocol handler is not
2138 * a module then it needs to provide its own protection in
2139 * the ops->create routine.
1da177e4 2140 */
f0fd27d4 2141void sock_unregister(int family)
1da177e4 2142{
f0fd27d4 2143 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2144
55737fda 2145 spin_lock(&net_family_lock);
89bddce5 2146 net_families[family] = NULL;
55737fda
SH
2147 spin_unlock(&net_family_lock);
2148
2149 synchronize_rcu();
2150
89bddce5 2151 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2152}
2153
77d76ea3 2154static int __init sock_init(void)
1da177e4
LT
2155{
2156 /*
89bddce5 2157 * Initialize sock SLAB cache.
1da177e4 2158 */
89bddce5 2159
1da177e4
LT
2160 sk_init();
2161
1da177e4 2162 /*
89bddce5 2163 * Initialize skbuff SLAB cache
1da177e4
LT
2164 */
2165 skb_init();
1da177e4
LT
2166
2167 /*
89bddce5 2168 * Initialize the protocols module.
1da177e4
LT
2169 */
2170
2171 init_inodecache();
2172 register_filesystem(&sock_fs_type);
2173 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2174
2175 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2176 */
2177
2178#ifdef CONFIG_NETFILTER
2179 netfilter_init();
2180#endif
cbeb321a
DM
2181
2182 return 0;
1da177e4
LT
2183}
2184
77d76ea3
AK
2185core_initcall(sock_init); /* early initcall */
2186
1da177e4
LT
2187#ifdef CONFIG_PROC_FS
2188void socket_seq_show(struct seq_file *seq)
2189{
2190 int cpu;
2191 int counter = 0;
2192
6f912042 2193 for_each_possible_cpu(cpu)
89bddce5 2194 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2195
2196 /* It can be negative, by the way. 8) */
2197 if (counter < 0)
2198 counter = 0;
2199
2200 seq_printf(seq, "sockets: used %d\n", counter);
2201}
89bddce5 2202#endif /* CONFIG_PROC_FS */
1da177e4 2203
89bbfc95
SP
2204#ifdef CONFIG_COMPAT
2205static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 2206 unsigned long arg)
89bbfc95
SP
2207{
2208 struct socket *sock = file->private_data;
2209 int ret = -ENOIOCTLCMD;
2210
2211 if (sock->ops->compat_ioctl)
2212 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2213
2214 return ret;
2215}
2216#endif
2217
ac5a488e
SS
2218int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2219{
2220 return sock->ops->bind(sock, addr, addrlen);
2221}
2222
2223int kernel_listen(struct socket *sock, int backlog)
2224{
2225 return sock->ops->listen(sock, backlog);
2226}
2227
2228int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2229{
2230 struct sock *sk = sock->sk;
2231 int err;
2232
2233 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2234 newsock);
2235 if (err < 0)
2236 goto done;
2237
2238 err = sock->ops->accept(sock, *newsock, flags);
2239 if (err < 0) {
2240 sock_release(*newsock);
fa8705b0 2241 *newsock = NULL;
ac5a488e
SS
2242 goto done;
2243 }
2244
2245 (*newsock)->ops = sock->ops;
2246
2247done:
2248 return err;
2249}
2250
2251int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 2252 int flags)
ac5a488e
SS
2253{
2254 return sock->ops->connect(sock, addr, addrlen, flags);
2255}
2256
2257int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2258 int *addrlen)
2259{
2260 return sock->ops->getname(sock, addr, addrlen, 0);
2261}
2262
2263int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2264 int *addrlen)
2265{
2266 return sock->ops->getname(sock, addr, addrlen, 1);
2267}
2268
2269int kernel_getsockopt(struct socket *sock, int level, int optname,
2270 char *optval, int *optlen)
2271{
2272 mm_segment_t oldfs = get_fs();
2273 int err;
2274
2275 set_fs(KERNEL_DS);
2276 if (level == SOL_SOCKET)
2277 err = sock_getsockopt(sock, level, optname, optval, optlen);
2278 else
2279 err = sock->ops->getsockopt(sock, level, optname, optval,
2280 optlen);
2281 set_fs(oldfs);
2282 return err;
2283}
2284
2285int kernel_setsockopt(struct socket *sock, int level, int optname,
2286 char *optval, int optlen)
2287{
2288 mm_segment_t oldfs = get_fs();
2289 int err;
2290
2291 set_fs(KERNEL_DS);
2292 if (level == SOL_SOCKET)
2293 err = sock_setsockopt(sock, level, optname, optval, optlen);
2294 else
2295 err = sock->ops->setsockopt(sock, level, optname, optval,
2296 optlen);
2297 set_fs(oldfs);
2298 return err;
2299}
2300
2301int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2302 size_t size, int flags)
2303{
2304 if (sock->ops->sendpage)
2305 return sock->ops->sendpage(sock, page, offset, size, flags);
2306
2307 return sock_no_sendpage(sock, page, offset, size, flags);
2308}
2309
2310int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2311{
2312 mm_segment_t oldfs = get_fs();
2313 int err;
2314
2315 set_fs(KERNEL_DS);
2316 err = sock->ops->ioctl(sock, cmd, arg);
2317 set_fs(oldfs);
2318
2319 return err;
2320}
2321
1da177e4
LT
2322/* ABI emulation layers need these two */
2323EXPORT_SYMBOL(move_addr_to_kernel);
2324EXPORT_SYMBOL(move_addr_to_user);
2325EXPORT_SYMBOL(sock_create);
2326EXPORT_SYMBOL(sock_create_kern);
2327EXPORT_SYMBOL(sock_create_lite);
2328EXPORT_SYMBOL(sock_map_fd);
2329EXPORT_SYMBOL(sock_recvmsg);
2330EXPORT_SYMBOL(sock_register);
2331EXPORT_SYMBOL(sock_release);
2332EXPORT_SYMBOL(sock_sendmsg);
2333EXPORT_SYMBOL(sock_unregister);
2334EXPORT_SYMBOL(sock_wake_async);
2335EXPORT_SYMBOL(sockfd_lookup);
2336EXPORT_SYMBOL(kernel_sendmsg);
2337EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
2338EXPORT_SYMBOL(kernel_bind);
2339EXPORT_SYMBOL(kernel_listen);
2340EXPORT_SYMBOL(kernel_accept);
2341EXPORT_SYMBOL(kernel_connect);
2342EXPORT_SYMBOL(kernel_getsockname);
2343EXPORT_SYMBOL(kernel_getpeername);
2344EXPORT_SYMBOL(kernel_getsockopt);
2345EXPORT_SYMBOL(kernel_setsockopt);
2346EXPORT_SYMBOL(kernel_sendpage);
2347EXPORT_SYMBOL(kernel_sock_ioctl);