[NET]: socket family using RCU
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
55737fda 66#include <linux/rcupdate.h>
1da177e4
LT
67#include <linux/netdevice.h>
68#include <linux/proc_fs.h>
69#include <linux/seq_file.h>
4a3e2f71 70#include <linux/mutex.h>
1da177e4
LT
71#include <linux/wanrouter.h>
72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
1da177e4
LT
75#include <linux/init.h>
76#include <linux/poll.h>
77#include <linux/cache.h>
78#include <linux/module.h>
79#include <linux/highmem.h>
80#include <linux/divert.h>
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1da177e4
LT
88
89#include <asm/uaccess.h>
90#include <asm/unistd.h>
91
92#include <net/compat.h>
93
94#include <net/sock.h>
95#include <linux/netfilter.h>
96
97static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
98static ssize_t sock_aio_read(struct kiocb *iocb, char __user *buf,
89bddce5 99 size_t size, loff_t pos);
1da177e4 100static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *buf,
89bddce5
SH
101 size_t size, loff_t pos);
102static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
103
104static int sock_close(struct inode *inode, struct file *file);
105static unsigned int sock_poll(struct file *file,
106 struct poll_table_struct *wait);
89bddce5 107static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
108#ifdef CONFIG_COMPAT
109static long compat_sock_ioctl(struct file *file,
89bddce5 110 unsigned int cmd, unsigned long arg);
89bbfc95 111#endif
1da177e4
LT
112static int sock_fasync(int fd, struct file *filp, int on);
113static ssize_t sock_readv(struct file *file, const struct iovec *vector,
114 unsigned long count, loff_t *ppos);
115static ssize_t sock_writev(struct file *file, const struct iovec *vector,
89bddce5 116 unsigned long count, loff_t *ppos);
1da177e4
LT
117static ssize_t sock_sendpage(struct file *file, struct page *page,
118 int offset, size_t size, loff_t *ppos, int more);
119
1da177e4
LT
120/*
121 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
122 * in the operation structures but are done directly via the socketcall() multiplexor.
123 */
124
125static struct file_operations socket_file_ops = {
126 .owner = THIS_MODULE,
127 .llseek = no_llseek,
128 .aio_read = sock_aio_read,
129 .aio_write = sock_aio_write,
130 .poll = sock_poll,
131 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
132#ifdef CONFIG_COMPAT
133 .compat_ioctl = compat_sock_ioctl,
134#endif
1da177e4
LT
135 .mmap = sock_mmap,
136 .open = sock_no_open, /* special open code to disallow open via /proc */
137 .release = sock_close,
138 .fasync = sock_fasync,
139 .readv = sock_readv,
140 .writev = sock_writev,
5274f052
JA
141 .sendpage = sock_sendpage,
142 .splice_write = generic_splice_sendpage,
1da177e4
LT
143};
144
145/*
146 * The protocol list. Each protocol is registered in here.
147 */
148
1da177e4 149static DEFINE_SPINLOCK(net_family_lock);
55737fda 150static const struct net_proto_family *net_families[NPROTO];
1da177e4 151
1da177e4
LT
152/*
153 * Statistics counters of the socket lists
154 */
155
156static DEFINE_PER_CPU(int, sockets_in_use) = 0;
157
158/*
89bddce5
SH
159 * Support routines.
160 * Move socket addresses back and forth across the kernel/user
161 * divide and look after the messy bits.
1da177e4
LT
162 */
163
89bddce5 164#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
165 16 for IP, 16 for IPX,
166 24 for IPv6,
89bddce5 167 about 80 for AX.25
1da177e4
LT
168 must be at least one bigger than
169 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 170 :unix_mkname()).
1da177e4 171 */
89bddce5 172
1da177e4
LT
173/**
174 * move_addr_to_kernel - copy a socket address into kernel space
175 * @uaddr: Address in user space
176 * @kaddr: Address in kernel space
177 * @ulen: Length in user space
178 *
179 * The address is copied into kernel space. If the provided address is
180 * too long an error code of -EINVAL is returned. If the copy gives
181 * invalid addresses -EFAULT is returned. On a success 0 is returned.
182 */
183
184int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
185{
89bddce5 186 if (ulen < 0 || ulen > MAX_SOCK_ADDR)
1da177e4 187 return -EINVAL;
89bddce5 188 if (ulen == 0)
1da177e4 189 return 0;
89bddce5 190 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 191 return -EFAULT;
3ec3b2fb 192 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
193}
194
195/**
196 * move_addr_to_user - copy an address to user space
197 * @kaddr: kernel space address
198 * @klen: length of address in kernel
199 * @uaddr: user space address
200 * @ulen: pointer to user length field
201 *
202 * The value pointed to by ulen on entry is the buffer length available.
203 * This is overwritten with the buffer space used. -EINVAL is returned
204 * if an overlong buffer is specified or a negative buffer size. -EFAULT
205 * is returned if either the buffer or the length field are not
206 * accessible.
207 * After copying the data up to the limit the user specifies, the true
208 * length of the data is written over the length limit the user
209 * specified. Zero is returned for a success.
210 */
89bddce5
SH
211
212int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
213 int __user *ulen)
1da177e4
LT
214{
215 int err;
216 int len;
217
89bddce5
SH
218 err = get_user(len, ulen);
219 if (err)
1da177e4 220 return err;
89bddce5
SH
221 if (len > klen)
222 len = klen;
223 if (len < 0 || len > MAX_SOCK_ADDR)
1da177e4 224 return -EINVAL;
89bddce5 225 if (len) {
d6fe3945
SG
226 if (audit_sockaddr(klen, kaddr))
227 return -ENOMEM;
89bddce5 228 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
229 return -EFAULT;
230 }
231 /*
89bddce5
SH
232 * "fromlen shall refer to the value before truncation.."
233 * 1003.1g
1da177e4
LT
234 */
235 return __put_user(klen, ulen);
236}
237
238#define SOCKFS_MAGIC 0x534F434B
239
89bddce5 240static kmem_cache_t *sock_inode_cachep __read_mostly;
1da177e4
LT
241
242static struct inode *sock_alloc_inode(struct super_block *sb)
243{
244 struct socket_alloc *ei;
89bddce5
SH
245
246 ei = kmem_cache_alloc(sock_inode_cachep, SLAB_KERNEL);
1da177e4
LT
247 if (!ei)
248 return NULL;
249 init_waitqueue_head(&ei->socket.wait);
89bddce5 250
1da177e4
LT
251 ei->socket.fasync_list = NULL;
252 ei->socket.state = SS_UNCONNECTED;
253 ei->socket.flags = 0;
254 ei->socket.ops = NULL;
255 ei->socket.sk = NULL;
256 ei->socket.file = NULL;
257 ei->socket.flags = 0;
258
259 return &ei->vfs_inode;
260}
261
262static void sock_destroy_inode(struct inode *inode)
263{
264 kmem_cache_free(sock_inode_cachep,
265 container_of(inode, struct socket_alloc, vfs_inode));
266}
267
89bddce5 268static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
1da177e4 269{
89bddce5 270 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 271
89bddce5
SH
272 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR))
273 == SLAB_CTOR_CONSTRUCTOR)
1da177e4
LT
274 inode_init_once(&ei->vfs_inode);
275}
89bddce5 276
1da177e4
LT
277static int init_inodecache(void)
278{
279 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
280 sizeof(struct socket_alloc),
281 0,
282 (SLAB_HWCACHE_ALIGN |
283 SLAB_RECLAIM_ACCOUNT |
284 SLAB_MEM_SPREAD),
285 init_once,
286 NULL);
1da177e4
LT
287 if (sock_inode_cachep == NULL)
288 return -ENOMEM;
289 return 0;
290}
291
292static struct super_operations sockfs_ops = {
293 .alloc_inode = sock_alloc_inode,
294 .destroy_inode =sock_destroy_inode,
295 .statfs = simple_statfs,
296};
297
454e2398 298static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
299 int flags, const char *dev_name, void *data,
300 struct vfsmount *mnt)
1da177e4 301{
454e2398
DH
302 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
303 mnt);
1da177e4
LT
304}
305
ba89966c 306static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
307
308static struct file_system_type sock_fs_type = {
309 .name = "sockfs",
310 .get_sb = sockfs_get_sb,
311 .kill_sb = kill_anon_super,
312};
89bddce5 313
1da177e4
LT
314static int sockfs_delete_dentry(struct dentry *dentry)
315{
316 return 1;
317}
318static struct dentry_operations sockfs_dentry_operations = {
89bddce5 319 .d_delete = sockfs_delete_dentry,
1da177e4
LT
320};
321
322/*
323 * Obtains the first available file descriptor and sets it up for use.
324 *
39d8c1b6
DM
325 * These functions create file structures and maps them to fd space
326 * of the current process. On success it returns file descriptor
1da177e4
LT
327 * and file struct implicitly stored in sock->file.
328 * Note that another thread may close file descriptor before we return
329 * from this function. We use the fact that now we do not refer
330 * to socket after mapping. If one day we will need it, this
331 * function will increment ref. count on file by 1.
332 *
333 * In any case returned fd MAY BE not valid!
334 * This race condition is unavoidable
335 * with shared fd spaces, we cannot solve it inside kernel,
336 * but we take care of internal coherence yet.
337 */
338
39d8c1b6 339static int sock_alloc_fd(struct file **filep)
1da177e4
LT
340{
341 int fd;
1da177e4
LT
342
343 fd = get_unused_fd();
39d8c1b6 344 if (likely(fd >= 0)) {
1da177e4
LT
345 struct file *file = get_empty_filp();
346
39d8c1b6
DM
347 *filep = file;
348 if (unlikely(!file)) {
1da177e4 349 put_unused_fd(fd);
39d8c1b6 350 return -ENFILE;
1da177e4 351 }
39d8c1b6
DM
352 } else
353 *filep = NULL;
354 return fd;
355}
1da177e4 356
39d8c1b6
DM
357static int sock_attach_fd(struct socket *sock, struct file *file)
358{
359 struct qstr this;
360 char name[32];
361
362 this.len = sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino);
363 this.name = name;
364 this.hash = SOCK_INODE(sock)->i_ino;
365
366 file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);
367 if (unlikely(!file->f_dentry))
368 return -ENOMEM;
369
370 file->f_dentry->d_op = &sockfs_dentry_operations;
371 d_add(file->f_dentry, SOCK_INODE(sock));
372 file->f_vfsmnt = mntget(sock_mnt);
373 file->f_mapping = file->f_dentry->d_inode->i_mapping;
374
375 sock->file = file;
376 file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
377 file->f_mode = FMODE_READ | FMODE_WRITE;
378 file->f_flags = O_RDWR;
379 file->f_pos = 0;
380 file->private_data = sock;
1da177e4 381
39d8c1b6
DM
382 return 0;
383}
384
385int sock_map_fd(struct socket *sock)
386{
387 struct file *newfile;
388 int fd = sock_alloc_fd(&newfile);
389
390 if (likely(fd >= 0)) {
391 int err = sock_attach_fd(sock, newfile);
392
393 if (unlikely(err < 0)) {
394 put_filp(newfile);
1da177e4 395 put_unused_fd(fd);
39d8c1b6 396 return err;
1da177e4 397 }
39d8c1b6 398 fd_install(fd, newfile);
1da177e4 399 }
1da177e4
LT
400 return fd;
401}
402
6cb153ca
BL
403static struct socket *sock_from_file(struct file *file, int *err)
404{
405 struct inode *inode;
406 struct socket *sock;
407
408 if (file->f_op == &socket_file_ops)
409 return file->private_data; /* set in sock_map_fd */
410
411 inode = file->f_dentry->d_inode;
412 if (!S_ISSOCK(inode->i_mode)) {
413 *err = -ENOTSOCK;
414 return NULL;
415 }
416
417 sock = SOCKET_I(inode);
418 if (sock->file != file) {
419 printk(KERN_ERR "socki_lookup: socket file changed!\n");
420 sock->file = file;
421 }
422 return sock;
423}
424
1da177e4
LT
425/**
426 * sockfd_lookup - Go from a file number to its socket slot
427 * @fd: file handle
428 * @err: pointer to an error code return
429 *
430 * The file handle passed in is locked and the socket it is bound
431 * too is returned. If an error occurs the err pointer is overwritten
432 * with a negative errno code and NULL is returned. The function checks
433 * for both invalid handles and passing a handle which is not a socket.
434 *
435 * On a success the socket object pointer is returned.
436 */
437
438struct socket *sockfd_lookup(int fd, int *err)
439{
440 struct file *file;
1da177e4
LT
441 struct socket *sock;
442
89bddce5
SH
443 file = fget(fd);
444 if (!file) {
1da177e4
LT
445 *err = -EBADF;
446 return NULL;
447 }
89bddce5 448
6cb153ca
BL
449 sock = sock_from_file(file, err);
450 if (!sock)
1da177e4 451 fput(file);
6cb153ca
BL
452 return sock;
453}
1da177e4 454
6cb153ca
BL
455static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
456{
457 struct file *file;
458 struct socket *sock;
459
3672558c 460 *err = -EBADF;
6cb153ca
BL
461 file = fget_light(fd, fput_needed);
462 if (file) {
463 sock = sock_from_file(file, err);
464 if (sock)
465 return sock;
466 fput_light(file, *fput_needed);
1da177e4 467 }
6cb153ca 468 return NULL;
1da177e4
LT
469}
470
471/**
472 * sock_alloc - allocate a socket
89bddce5 473 *
1da177e4
LT
474 * Allocate a new inode and socket object. The two are bound together
475 * and initialised. The socket is then returned. If we are out of inodes
476 * NULL is returned.
477 */
478
479static struct socket *sock_alloc(void)
480{
89bddce5
SH
481 struct inode *inode;
482 struct socket *sock;
1da177e4
LT
483
484 inode = new_inode(sock_mnt->mnt_sb);
485 if (!inode)
486 return NULL;
487
488 sock = SOCKET_I(inode);
489
89bddce5 490 inode->i_mode = S_IFSOCK | S_IRWXUGO;
1da177e4
LT
491 inode->i_uid = current->fsuid;
492 inode->i_gid = current->fsgid;
493
494 get_cpu_var(sockets_in_use)++;
495 put_cpu_var(sockets_in_use);
496 return sock;
497}
498
499/*
500 * In theory you can't get an open on this inode, but /proc provides
501 * a back door. Remember to keep it shut otherwise you'll let the
502 * creepy crawlies in.
503 */
89bddce5 504
1da177e4
LT
505static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
506{
507 return -ENXIO;
508}
509
4b6f5d20 510const struct file_operations bad_sock_fops = {
1da177e4
LT
511 .owner = THIS_MODULE,
512 .open = sock_no_open,
513};
514
515/**
516 * sock_release - close a socket
517 * @sock: socket to close
518 *
519 * The socket is released from the protocol stack if it has a release
520 * callback, and the inode is then released if the socket is bound to
89bddce5 521 * an inode not a file.
1da177e4 522 */
89bddce5 523
1da177e4
LT
524void sock_release(struct socket *sock)
525{
526 if (sock->ops) {
527 struct module *owner = sock->ops->owner;
528
529 sock->ops->release(sock);
530 sock->ops = NULL;
531 module_put(owner);
532 }
533
534 if (sock->fasync_list)
535 printk(KERN_ERR "sock_release: fasync list not empty!\n");
536
537 get_cpu_var(sockets_in_use)--;
538 put_cpu_var(sockets_in_use);
539 if (!sock->file) {
540 iput(SOCK_INODE(sock));
541 return;
542 }
89bddce5 543 sock->file = NULL;
1da177e4
LT
544}
545
89bddce5 546static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
547 struct msghdr *msg, size_t size)
548{
549 struct sock_iocb *si = kiocb_to_siocb(iocb);
550 int err;
551
552 si->sock = sock;
553 si->scm = NULL;
554 si->msg = msg;
555 si->size = size;
556
557 err = security_socket_sendmsg(sock, msg, size);
558 if (err)
559 return err;
560
561 return sock->ops->sendmsg(iocb, sock, msg, size);
562}
563
564int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
565{
566 struct kiocb iocb;
567 struct sock_iocb siocb;
568 int ret;
569
570 init_sync_kiocb(&iocb, NULL);
571 iocb.private = &siocb;
572 ret = __sock_sendmsg(&iocb, sock, msg, size);
573 if (-EIOCBQUEUED == ret)
574 ret = wait_on_sync_kiocb(&iocb);
575 return ret;
576}
577
578int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
579 struct kvec *vec, size_t num, size_t size)
580{
581 mm_segment_t oldfs = get_fs();
582 int result;
583
584 set_fs(KERNEL_DS);
585 /*
586 * the following is safe, since for compiler definitions of kvec and
587 * iovec are identical, yielding the same in-core layout and alignment
588 */
89bddce5 589 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
590 msg->msg_iovlen = num;
591 result = sock_sendmsg(sock, msg, size);
592 set_fs(oldfs);
593 return result;
594}
595
89bddce5 596static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
597 struct msghdr *msg, size_t size, int flags)
598{
599 int err;
600 struct sock_iocb *si = kiocb_to_siocb(iocb);
601
602 si->sock = sock;
603 si->scm = NULL;
604 si->msg = msg;
605 si->size = size;
606 si->flags = flags;
607
608 err = security_socket_recvmsg(sock, msg, size, flags);
609 if (err)
610 return err;
611
612 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
613}
614
89bddce5 615int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
616 size_t size, int flags)
617{
618 struct kiocb iocb;
619 struct sock_iocb siocb;
620 int ret;
621
89bddce5 622 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
623 iocb.private = &siocb;
624 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
625 if (-EIOCBQUEUED == ret)
626 ret = wait_on_sync_kiocb(&iocb);
627 return ret;
628}
629
89bddce5
SH
630int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
631 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
632{
633 mm_segment_t oldfs = get_fs();
634 int result;
635
636 set_fs(KERNEL_DS);
637 /*
638 * the following is safe, since for compiler definitions of kvec and
639 * iovec are identical, yielding the same in-core layout and alignment
640 */
89bddce5 641 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
642 result = sock_recvmsg(sock, msg, size, flags);
643 set_fs(oldfs);
644 return result;
645}
646
647static void sock_aio_dtor(struct kiocb *iocb)
648{
649 kfree(iocb->private);
650}
651
ce1d4d3e
CH
652static ssize_t sock_sendpage(struct file *file, struct page *page,
653 int offset, size_t size, loff_t *ppos, int more)
1da177e4 654{
1da177e4
LT
655 struct socket *sock;
656 int flags;
657
ce1d4d3e
CH
658 sock = file->private_data;
659
660 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
661 if (more)
662 flags |= MSG_MORE;
663
664 return sock->ops->sendpage(sock, page, offset, size, flags);
665}
1da177e4 666
ce1d4d3e 667static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5
SH
668 char __user *ubuf, size_t size,
669 struct sock_iocb *siocb)
ce1d4d3e
CH
670{
671 if (!is_sync_kiocb(iocb)) {
672 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
673 if (!siocb)
674 return NULL;
1da177e4
LT
675 iocb->ki_dtor = sock_aio_dtor;
676 }
1da177e4 677
ce1d4d3e
CH
678 siocb->kiocb = iocb;
679 siocb->async_iov.iov_base = ubuf;
680 siocb->async_iov.iov_len = size;
1da177e4 681
ce1d4d3e
CH
682 iocb->private = siocb;
683 return siocb;
1da177e4
LT
684}
685
ce1d4d3e 686static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
89bddce5
SH
687 struct file *file, struct iovec *iov,
688 unsigned long nr_segs)
ce1d4d3e
CH
689{
690 struct socket *sock = file->private_data;
691 size_t size = 0;
692 int i;
1da177e4 693
89bddce5
SH
694 for (i = 0; i < nr_segs; i++)
695 size += iov[i].iov_len;
1da177e4 696
ce1d4d3e
CH
697 msg->msg_name = NULL;
698 msg->msg_namelen = 0;
699 msg->msg_control = NULL;
700 msg->msg_controllen = 0;
89bddce5 701 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
702 msg->msg_iovlen = nr_segs;
703 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
704
705 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
706}
707
708static ssize_t sock_readv(struct file *file, const struct iovec *iov,
709 unsigned long nr_segs, loff_t *ppos)
1da177e4 710{
ce1d4d3e
CH
711 struct kiocb iocb;
712 struct sock_iocb siocb;
713 struct msghdr msg;
714 int ret;
715
89bddce5 716 init_sync_kiocb(&iocb, NULL);
ce1d4d3e
CH
717 iocb.private = &siocb;
718
719 ret = do_sock_read(&msg, &iocb, file, (struct iovec *)iov, nr_segs);
720 if (-EIOCBQUEUED == ret)
721 ret = wait_on_sync_kiocb(&iocb);
722 return ret;
723}
724
725static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf,
89bddce5 726 size_t count, loff_t pos)
ce1d4d3e
CH
727{
728 struct sock_iocb siocb, *x;
729
1da177e4
LT
730 if (pos != 0)
731 return -ESPIPE;
ce1d4d3e 732 if (count == 0) /* Match SYS5 behaviour */
1da177e4
LT
733 return 0;
734
ce1d4d3e
CH
735 x = alloc_sock_iocb(iocb, ubuf, count, &siocb);
736 if (!x)
737 return -ENOMEM;
738 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp,
89bddce5 739 &x->async_iov, 1);
1da177e4
LT
740}
741
ce1d4d3e 742static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
89bddce5
SH
743 struct file *file, struct iovec *iov,
744 unsigned long nr_segs)
1da177e4 745{
ce1d4d3e
CH
746 struct socket *sock = file->private_data;
747 size_t size = 0;
748 int i;
1da177e4 749
89bddce5
SH
750 for (i = 0; i < nr_segs; i++)
751 size += iov[i].iov_len;
1da177e4 752
ce1d4d3e
CH
753 msg->msg_name = NULL;
754 msg->msg_namelen = 0;
755 msg->msg_control = NULL;
756 msg->msg_controllen = 0;
89bddce5 757 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
758 msg->msg_iovlen = nr_segs;
759 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
760 if (sock->type == SOCK_SEQPACKET)
761 msg->msg_flags |= MSG_EOR;
1da177e4 762
ce1d4d3e 763 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
764}
765
ce1d4d3e
CH
766static ssize_t sock_writev(struct file *file, const struct iovec *iov,
767 unsigned long nr_segs, loff_t *ppos)
1da177e4
LT
768{
769 struct msghdr msg;
ce1d4d3e
CH
770 struct kiocb iocb;
771 struct sock_iocb siocb;
772 int ret;
1da177e4 773
ce1d4d3e
CH
774 init_sync_kiocb(&iocb, NULL);
775 iocb.private = &siocb;
1da177e4 776
ce1d4d3e
CH
777 ret = do_sock_write(&msg, &iocb, file, (struct iovec *)iov, nr_segs);
778 if (-EIOCBQUEUED == ret)
779 ret = wait_on_sync_kiocb(&iocb);
780 return ret;
781}
1da177e4 782
ce1d4d3e 783static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf,
89bddce5 784 size_t count, loff_t pos)
ce1d4d3e
CH
785{
786 struct sock_iocb siocb, *x;
1da177e4 787
ce1d4d3e
CH
788 if (pos != 0)
789 return -ESPIPE;
790 if (count == 0) /* Match SYS5 behaviour */
791 return 0;
1da177e4 792
ce1d4d3e
CH
793 x = alloc_sock_iocb(iocb, (void __user *)ubuf, count, &siocb);
794 if (!x)
795 return -ENOMEM;
1da177e4 796
ce1d4d3e 797 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp,
89bddce5 798 &x->async_iov, 1);
1da177e4
LT
799}
800
1da177e4
LT
801/*
802 * Atomic setting of ioctl hooks to avoid race
803 * with module unload.
804 */
805
4a3e2f71 806static DEFINE_MUTEX(br_ioctl_mutex);
89bddce5 807static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL;
1da177e4 808
89bddce5 809void brioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 810{
4a3e2f71 811 mutex_lock(&br_ioctl_mutex);
1da177e4 812 br_ioctl_hook = hook;
4a3e2f71 813 mutex_unlock(&br_ioctl_mutex);
1da177e4 814}
89bddce5 815
1da177e4
LT
816EXPORT_SYMBOL(brioctl_set);
817
4a3e2f71 818static DEFINE_MUTEX(vlan_ioctl_mutex);
89bddce5 819static int (*vlan_ioctl_hook) (void __user *arg);
1da177e4 820
89bddce5 821void vlan_ioctl_set(int (*hook) (void __user *))
1da177e4 822{
4a3e2f71 823 mutex_lock(&vlan_ioctl_mutex);
1da177e4 824 vlan_ioctl_hook = hook;
4a3e2f71 825 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 826}
89bddce5 827
1da177e4
LT
828EXPORT_SYMBOL(vlan_ioctl_set);
829
4a3e2f71 830static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 831static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 832
89bddce5 833void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 834{
4a3e2f71 835 mutex_lock(&dlci_ioctl_mutex);
1da177e4 836 dlci_ioctl_hook = hook;
4a3e2f71 837 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 838}
89bddce5 839
1da177e4
LT
840EXPORT_SYMBOL(dlci_ioctl_set);
841
842/*
843 * With an ioctl, arg may well be a user mode pointer, but we don't know
844 * what to do with it - that's up to the protocol still.
845 */
846
847static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
848{
849 struct socket *sock;
850 void __user *argp = (void __user *)arg;
851 int pid, err;
852
b69aee04 853 sock = file->private_data;
1da177e4
LT
854 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
855 err = dev_ioctl(cmd, argp);
856 } else
d86b5e0e 857#ifdef CONFIG_WIRELESS_EXT
1da177e4
LT
858 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
859 err = dev_ioctl(cmd, argp);
860 } else
89bddce5
SH
861#endif /* CONFIG_WIRELESS_EXT */
862 switch (cmd) {
1da177e4
LT
863 case FIOSETOWN:
864 case SIOCSPGRP:
865 err = -EFAULT;
866 if (get_user(pid, (int __user *)argp))
867 break;
868 err = f_setown(sock->file, pid, 1);
869 break;
870 case FIOGETOWN:
871 case SIOCGPGRP:
89bddce5
SH
872 err = put_user(sock->file->f_owner.pid,
873 (int __user *)argp);
1da177e4
LT
874 break;
875 case SIOCGIFBR:
876 case SIOCSIFBR:
877 case SIOCBRADDBR:
878 case SIOCBRDELBR:
879 err = -ENOPKG;
880 if (!br_ioctl_hook)
881 request_module("bridge");
882
4a3e2f71 883 mutex_lock(&br_ioctl_mutex);
89bddce5 884 if (br_ioctl_hook)
1da177e4 885 err = br_ioctl_hook(cmd, argp);
4a3e2f71 886 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
887 break;
888 case SIOCGIFVLAN:
889 case SIOCSIFVLAN:
890 err = -ENOPKG;
891 if (!vlan_ioctl_hook)
892 request_module("8021q");
893
4a3e2f71 894 mutex_lock(&vlan_ioctl_mutex);
1da177e4
LT
895 if (vlan_ioctl_hook)
896 err = vlan_ioctl_hook(argp);
4a3e2f71 897 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
898 break;
899 case SIOCGIFDIVERT:
900 case SIOCSIFDIVERT:
89bddce5 901 /* Convert this to call through a hook */
1da177e4
LT
902 err = divert_ioctl(cmd, argp);
903 break;
904 case SIOCADDDLCI:
905 case SIOCDELDLCI:
906 err = -ENOPKG;
907 if (!dlci_ioctl_hook)
908 request_module("dlci");
909
910 if (dlci_ioctl_hook) {
4a3e2f71 911 mutex_lock(&dlci_ioctl_mutex);
1da177e4 912 err = dlci_ioctl_hook(cmd, argp);
4a3e2f71 913 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
914 }
915 break;
916 default:
917 err = sock->ops->ioctl(sock, cmd, arg);
b5e5fa5e
CH
918
919 /*
920 * If this ioctl is unknown try to hand it down
921 * to the NIC driver.
922 */
923 if (err == -ENOIOCTLCMD)
924 err = dev_ioctl(cmd, argp);
1da177e4 925 break;
89bddce5 926 }
1da177e4
LT
927 return err;
928}
929
930int sock_create_lite(int family, int type, int protocol, struct socket **res)
931{
932 int err;
933 struct socket *sock = NULL;
89bddce5 934
1da177e4
LT
935 err = security_socket_create(family, type, protocol, 1);
936 if (err)
937 goto out;
938
939 sock = sock_alloc();
940 if (!sock) {
941 err = -ENOMEM;
942 goto out;
943 }
944
1da177e4 945 sock->type = type;
7420ed23
VY
946 err = security_socket_post_create(sock, family, type, protocol, 1);
947 if (err)
948 goto out_release;
949
1da177e4
LT
950out:
951 *res = sock;
952 return err;
7420ed23
VY
953out_release:
954 sock_release(sock);
955 sock = NULL;
956 goto out;
1da177e4
LT
957}
958
959/* No kernel lock held - perfect */
89bddce5 960static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
961{
962 struct socket *sock;
963
964 /*
89bddce5 965 * We can't return errors to poll, so it's either yes or no.
1da177e4 966 */
b69aee04 967 sock = file->private_data;
1da177e4
LT
968 return sock->ops->poll(file, sock, wait);
969}
970
89bddce5 971static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 972{
b69aee04 973 struct socket *sock = file->private_data;
1da177e4
LT
974
975 return sock->ops->mmap(file, sock, vma);
976}
977
20380731 978static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
979{
980 /*
89bddce5
SH
981 * It was possible the inode is NULL we were
982 * closing an unfinished socket.
1da177e4
LT
983 */
984
89bddce5 985 if (!inode) {
1da177e4
LT
986 printk(KERN_DEBUG "sock_close: NULL inode\n");
987 return 0;
988 }
989 sock_fasync(-1, filp, 0);
990 sock_release(SOCKET_I(inode));
991 return 0;
992}
993
994/*
995 * Update the socket async list
996 *
997 * Fasync_list locking strategy.
998 *
999 * 1. fasync_list is modified only under process context socket lock
1000 * i.e. under semaphore.
1001 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
1002 * or under socket lock.
1003 * 3. fasync_list can be used from softirq context, so that
1004 * modification under socket lock have to be enhanced with
1005 * write_lock_bh(&sk->sk_callback_lock).
1006 * --ANK (990710)
1007 */
1008
1009static int sock_fasync(int fd, struct file *filp, int on)
1010{
89bddce5 1011 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
1012 struct socket *sock;
1013 struct sock *sk;
1014
89bddce5 1015 if (on) {
8b3a7005 1016 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 1017 if (fna == NULL)
1da177e4
LT
1018 return -ENOMEM;
1019 }
1020
b69aee04 1021 sock = filp->private_data;
1da177e4 1022
89bddce5
SH
1023 sk = sock->sk;
1024 if (sk == NULL) {
1da177e4
LT
1025 kfree(fna);
1026 return -EINVAL;
1027 }
1028
1029 lock_sock(sk);
1030
89bddce5 1031 prev = &(sock->fasync_list);
1da177e4 1032
89bddce5
SH
1033 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1034 if (fa->fa_file == filp)
1da177e4
LT
1035 break;
1036
89bddce5
SH
1037 if (on) {
1038 if (fa != NULL) {
1da177e4 1039 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1040 fa->fa_fd = fd;
1da177e4
LT
1041 write_unlock_bh(&sk->sk_callback_lock);
1042
1043 kfree(fna);
1044 goto out;
1045 }
89bddce5
SH
1046 fna->fa_file = filp;
1047 fna->fa_fd = fd;
1048 fna->magic = FASYNC_MAGIC;
1049 fna->fa_next = sock->fasync_list;
1da177e4 1050 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1051 sock->fasync_list = fna;
1da177e4 1052 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1053 } else {
1054 if (fa != NULL) {
1da177e4 1055 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1056 *prev = fa->fa_next;
1da177e4
LT
1057 write_unlock_bh(&sk->sk_callback_lock);
1058 kfree(fa);
1059 }
1060 }
1061
1062out:
1063 release_sock(sock->sk);
1064 return 0;
1065}
1066
1067/* This function may be called only under socket lock or callback_lock */
1068
1069int sock_wake_async(struct socket *sock, int how, int band)
1070{
1071 if (!sock || !sock->fasync_list)
1072 return -1;
89bddce5 1073 switch (how) {
1da177e4 1074 case 1:
89bddce5 1075
1da177e4
LT
1076 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1077 break;
1078 goto call_kill;
1079 case 2:
1080 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1081 break;
1082 /* fall through */
1083 case 0:
89bddce5 1084call_kill:
1da177e4
LT
1085 __kill_fasync(sock->fasync_list, SIGIO, band);
1086 break;
1087 case 3:
1088 __kill_fasync(sock->fasync_list, SIGURG, band);
1089 }
1090 return 0;
1091}
1092
89bddce5
SH
1093static int __sock_create(int family, int type, int protocol,
1094 struct socket **res, int kern)
1da177e4
LT
1095{
1096 int err;
1097 struct socket *sock;
55737fda 1098 const struct net_proto_family *pf;
1da177e4
LT
1099
1100 /*
89bddce5 1101 * Check protocol is in range
1da177e4
LT
1102 */
1103 if (family < 0 || family >= NPROTO)
1104 return -EAFNOSUPPORT;
1105 if (type < 0 || type >= SOCK_MAX)
1106 return -EINVAL;
1107
1108 /* Compatibility.
1109
1110 This uglymoron is moved from INET layer to here to avoid
1111 deadlock in module load.
1112 */
1113 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1114 static int warned;
1da177e4
LT
1115 if (!warned) {
1116 warned = 1;
89bddce5
SH
1117 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1118 current->comm);
1da177e4
LT
1119 }
1120 family = PF_PACKET;
1121 }
1122
1123 err = security_socket_create(family, type, protocol, kern);
1124 if (err)
1125 return err;
89bddce5 1126
55737fda
SH
1127 /*
1128 * Allocate the socket and allow the family to set things up. if
1129 * the protocol is 0, the family is instructed to select an appropriate
1130 * default.
1131 */
1132 sock = sock_alloc();
1133 if (!sock) {
1134 if (net_ratelimit())
1135 printk(KERN_WARNING "socket: no more sockets\n");
1136 return -ENFILE; /* Not exactly a match, but its the
1137 closest posix thing */
1138 }
1139
1140 sock->type = type;
1141
1da177e4 1142#if defined(CONFIG_KMOD)
89bddce5
SH
1143 /* Attempt to load a protocol module if the find failed.
1144 *
1145 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1146 * requested real, full-featured networking support upon configuration.
1147 * Otherwise module support will break!
1148 */
55737fda 1149 if (net_families[family] == NULL)
89bddce5 1150 request_module("net-pf-%d", family);
1da177e4
LT
1151#endif
1152
55737fda
SH
1153 rcu_read_lock();
1154 pf = rcu_dereference(net_families[family]);
1155 err = -EAFNOSUPPORT;
1156 if (!pf)
1157 goto out_release;
1da177e4
LT
1158
1159 /*
1160 * We will call the ->create function, that possibly is in a loadable
1161 * module, so we have to bump that loadable module refcnt first.
1162 */
55737fda 1163 if (!try_module_get(pf->owner))
1da177e4
LT
1164 goto out_release;
1165
55737fda
SH
1166 /* Now protected by module ref count */
1167 rcu_read_unlock();
1168
1169 err = pf->create(sock, protocol);
1170 if (err < 0)
1da177e4 1171 goto out_module_put;
a79af59e 1172
1da177e4
LT
1173 /*
1174 * Now to bump the refcnt of the [loadable] module that owns this
1175 * socket at sock_release time we decrement its refcnt.
1176 */
55737fda
SH
1177 if (!try_module_get(sock->ops->owner))
1178 goto out_module_busy;
1179
1da177e4
LT
1180 /*
1181 * Now that we're done with the ->create function, the [loadable]
1182 * module can have its refcnt decremented
1183 */
55737fda 1184 module_put(pf->owner);
7420ed23
VY
1185 err = security_socket_post_create(sock, family, type, protocol, kern);
1186 if (err)
1187 goto out_release;
55737fda 1188 *res = sock;
1da177e4 1189
55737fda
SH
1190 return 0;
1191
1192out_module_busy:
1193 err = -EAFNOSUPPORT;
1da177e4 1194out_module_put:
55737fda
SH
1195 sock->ops = NULL;
1196 module_put(pf->owner);
1197out_sock_release:
1da177e4 1198 sock_release(sock);
55737fda
SH
1199 return err;
1200
1201out_release:
1202 rcu_read_unlock();
1203 goto out_sock_release;
1da177e4
LT
1204}
1205
1206int sock_create(int family, int type, int protocol, struct socket **res)
1207{
1208 return __sock_create(family, type, protocol, res, 0);
1209}
1210
1211int sock_create_kern(int family, int type, int protocol, struct socket **res)
1212{
1213 return __sock_create(family, type, protocol, res, 1);
1214}
1215
1216asmlinkage long sys_socket(int family, int type, int protocol)
1217{
1218 int retval;
1219 struct socket *sock;
1220
1221 retval = sock_create(family, type, protocol, &sock);
1222 if (retval < 0)
1223 goto out;
1224
1225 retval = sock_map_fd(sock);
1226 if (retval < 0)
1227 goto out_release;
1228
1229out:
1230 /* It may be already another descriptor 8) Not kernel problem. */
1231 return retval;
1232
1233out_release:
1234 sock_release(sock);
1235 return retval;
1236}
1237
1238/*
1239 * Create a pair of connected sockets.
1240 */
1241
89bddce5
SH
1242asmlinkage long sys_socketpair(int family, int type, int protocol,
1243 int __user *usockvec)
1da177e4
LT
1244{
1245 struct socket *sock1, *sock2;
1246 int fd1, fd2, err;
1247
1248 /*
1249 * Obtain the first socket and check if the underlying protocol
1250 * supports the socketpair call.
1251 */
1252
1253 err = sock_create(family, type, protocol, &sock1);
1254 if (err < 0)
1255 goto out;
1256
1257 err = sock_create(family, type, protocol, &sock2);
1258 if (err < 0)
1259 goto out_release_1;
1260
1261 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1262 if (err < 0)
1da177e4
LT
1263 goto out_release_both;
1264
1265 fd1 = fd2 = -1;
1266
1267 err = sock_map_fd(sock1);
1268 if (err < 0)
1269 goto out_release_both;
1270 fd1 = err;
1271
1272 err = sock_map_fd(sock2);
1273 if (err < 0)
1274 goto out_close_1;
1275 fd2 = err;
1276
1277 /* fd1 and fd2 may be already another descriptors.
1278 * Not kernel problem.
1279 */
1280
89bddce5 1281 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1282 if (!err)
1283 err = put_user(fd2, &usockvec[1]);
1284 if (!err)
1285 return 0;
1286
1287 sys_close(fd2);
1288 sys_close(fd1);
1289 return err;
1290
1291out_close_1:
89bddce5 1292 sock_release(sock2);
1da177e4
LT
1293 sys_close(fd1);
1294 return err;
1295
1296out_release_both:
89bddce5 1297 sock_release(sock2);
1da177e4 1298out_release_1:
89bddce5 1299 sock_release(sock1);
1da177e4
LT
1300out:
1301 return err;
1302}
1303
1da177e4
LT
1304/*
1305 * Bind a name to a socket. Nothing much to do here since it's
1306 * the protocol's responsibility to handle the local address.
1307 *
1308 * We move the socket address to kernel space before we call
1309 * the protocol layer (having also checked the address is ok).
1310 */
1311
1312asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1313{
1314 struct socket *sock;
1315 char address[MAX_SOCK_ADDR];
6cb153ca 1316 int err, fput_needed;
1da177e4 1317
89bddce5
SH
1318 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1319 if(sock) {
1320 err = move_addr_to_kernel(umyaddr, addrlen, address);
1321 if (err >= 0) {
1322 err = security_socket_bind(sock,
1323 (struct sockaddr *)address,
1324 addrlen);
6cb153ca
BL
1325 if (!err)
1326 err = sock->ops->bind(sock,
89bddce5
SH
1327 (struct sockaddr *)
1328 address, addrlen);
1da177e4 1329 }
6cb153ca 1330 fput_light(sock->file, fput_needed);
89bddce5 1331 }
1da177e4
LT
1332 return err;
1333}
1334
1da177e4
LT
1335/*
1336 * Perform a listen. Basically, we allow the protocol to do anything
1337 * necessary for a listen, and if that works, we mark the socket as
1338 * ready for listening.
1339 */
1340
1341int sysctl_somaxconn = SOMAXCONN;
1342
1343asmlinkage long sys_listen(int fd, int backlog)
1344{
1345 struct socket *sock;
6cb153ca 1346 int err, fput_needed;
89bddce5
SH
1347
1348 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1349 if (sock) {
1350 if ((unsigned)backlog > sysctl_somaxconn)
1da177e4
LT
1351 backlog = sysctl_somaxconn;
1352
1353 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1354 if (!err)
1355 err = sock->ops->listen(sock, backlog);
1da177e4 1356
6cb153ca 1357 fput_light(sock->file, fput_needed);
1da177e4
LT
1358 }
1359 return err;
1360}
1361
1da177e4
LT
1362/*
1363 * For accept, we attempt to create a new socket, set up the link
1364 * with the client, wake up the client, then return the new
1365 * connected fd. We collect the address of the connector in kernel
1366 * space and move it to user at the very end. This is unclean because
1367 * we open the socket then return an error.
1368 *
1369 * 1003.1g adds the ability to recvmsg() to query connection pending
1370 * status to recvmsg. We need to add that support in a way thats
1371 * clean when we restucture accept also.
1372 */
1373
89bddce5
SH
1374asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1375 int __user *upeer_addrlen)
1da177e4
LT
1376{
1377 struct socket *sock, *newsock;
39d8c1b6 1378 struct file *newfile;
6cb153ca 1379 int err, len, newfd, fput_needed;
1da177e4
LT
1380 char address[MAX_SOCK_ADDR];
1381
6cb153ca 1382 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1383 if (!sock)
1384 goto out;
1385
1386 err = -ENFILE;
89bddce5 1387 if (!(newsock = sock_alloc()))
1da177e4
LT
1388 goto out_put;
1389
1390 newsock->type = sock->type;
1391 newsock->ops = sock->ops;
1392
1da177e4
LT
1393 /*
1394 * We don't need try_module_get here, as the listening socket (sock)
1395 * has the protocol module (sock->ops->owner) held.
1396 */
1397 __module_get(newsock->ops->owner);
1398
39d8c1b6
DM
1399 newfd = sock_alloc_fd(&newfile);
1400 if (unlikely(newfd < 0)) {
1401 err = newfd;
9a1875e6
DM
1402 sock_release(newsock);
1403 goto out_put;
39d8c1b6
DM
1404 }
1405
1406 err = sock_attach_fd(newsock, newfile);
1407 if (err < 0)
1408 goto out_fd;
1409
a79af59e
FF
1410 err = security_socket_accept(sock, newsock);
1411 if (err)
39d8c1b6 1412 goto out_fd;
a79af59e 1413
1da177e4
LT
1414 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1415 if (err < 0)
39d8c1b6 1416 goto out_fd;
1da177e4
LT
1417
1418 if (upeer_sockaddr) {
89bddce5
SH
1419 if (newsock->ops->getname(newsock, (struct sockaddr *)address,
1420 &len, 2) < 0) {
1da177e4 1421 err = -ECONNABORTED;
39d8c1b6 1422 goto out_fd;
1da177e4 1423 }
89bddce5
SH
1424 err = move_addr_to_user(address, len, upeer_sockaddr,
1425 upeer_addrlen);
1da177e4 1426 if (err < 0)
39d8c1b6 1427 goto out_fd;
1da177e4
LT
1428 }
1429
1430 /* File flags are not inherited via accept() unlike another OSes. */
1431
39d8c1b6
DM
1432 fd_install(newfd, newfile);
1433 err = newfd;
1da177e4
LT
1434
1435 security_socket_post_accept(sock, newsock);
1436
1437out_put:
6cb153ca 1438 fput_light(sock->file, fput_needed);
1da177e4
LT
1439out:
1440 return err;
39d8c1b6 1441out_fd:
9606a216 1442 fput(newfile);
39d8c1b6 1443 put_unused_fd(newfd);
1da177e4
LT
1444 goto out_put;
1445}
1446
1da177e4
LT
1447/*
1448 * Attempt to connect to a socket with the server address. The address
1449 * is in user space so we verify it is OK and move it to kernel space.
1450 *
1451 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1452 * break bindings
1453 *
1454 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1455 * other SEQPACKET protocols that take time to connect() as it doesn't
1456 * include the -EINPROGRESS status for such sockets.
1457 */
1458
89bddce5
SH
1459asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
1460 int addrlen)
1da177e4
LT
1461{
1462 struct socket *sock;
1463 char address[MAX_SOCK_ADDR];
6cb153ca 1464 int err, fput_needed;
1da177e4 1465
6cb153ca 1466 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1467 if (!sock)
1468 goto out;
1469 err = move_addr_to_kernel(uservaddr, addrlen, address);
1470 if (err < 0)
1471 goto out_put;
1472
89bddce5
SH
1473 err =
1474 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4
LT
1475 if (err)
1476 goto out_put;
1477
89bddce5 1478 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
1da177e4
LT
1479 sock->file->f_flags);
1480out_put:
6cb153ca 1481 fput_light(sock->file, fput_needed);
1da177e4
LT
1482out:
1483 return err;
1484}
1485
1486/*
1487 * Get the local address ('name') of a socket object. Move the obtained
1488 * name to user space.
1489 */
1490
89bddce5
SH
1491asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1492 int __user *usockaddr_len)
1da177e4
LT
1493{
1494 struct socket *sock;
1495 char address[MAX_SOCK_ADDR];
6cb153ca 1496 int len, err, fput_needed;
89bddce5 1497
6cb153ca 1498 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1499 if (!sock)
1500 goto out;
1501
1502 err = security_socket_getsockname(sock);
1503 if (err)
1504 goto out_put;
1505
1506 err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
1507 if (err)
1508 goto out_put;
1509 err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
1510
1511out_put:
6cb153ca 1512 fput_light(sock->file, fput_needed);
1da177e4
LT
1513out:
1514 return err;
1515}
1516
1517/*
1518 * Get the remote address ('name') of a socket object. Move the obtained
1519 * name to user space.
1520 */
1521
89bddce5
SH
1522asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1523 int __user *usockaddr_len)
1da177e4
LT
1524{
1525 struct socket *sock;
1526 char address[MAX_SOCK_ADDR];
6cb153ca 1527 int len, err, fput_needed;
1da177e4 1528
89bddce5
SH
1529 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1530 if (sock != NULL) {
1da177e4
LT
1531 err = security_socket_getpeername(sock);
1532 if (err) {
6cb153ca 1533 fput_light(sock->file, fput_needed);
1da177e4
LT
1534 return err;
1535 }
1536
89bddce5
SH
1537 err =
1538 sock->ops->getname(sock, (struct sockaddr *)address, &len,
1539 1);
1da177e4 1540 if (!err)
89bddce5
SH
1541 err = move_addr_to_user(address, len, usockaddr,
1542 usockaddr_len);
6cb153ca 1543 fput_light(sock->file, fput_needed);
1da177e4
LT
1544 }
1545 return err;
1546}
1547
1548/*
1549 * Send a datagram to a given address. We move the address into kernel
1550 * space and check the user space data area is readable before invoking
1551 * the protocol.
1552 */
1553
89bddce5
SH
1554asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1555 unsigned flags, struct sockaddr __user *addr,
1556 int addr_len)
1da177e4
LT
1557{
1558 struct socket *sock;
1559 char address[MAX_SOCK_ADDR];
1560 int err;
1561 struct msghdr msg;
1562 struct iovec iov;
6cb153ca
BL
1563 int fput_needed;
1564 struct file *sock_file;
1565
1566 sock_file = fget_light(fd, &fput_needed);
1567 if (!sock_file)
1568 return -EBADF;
1569
1570 sock = sock_from_file(sock_file, &err);
1da177e4 1571 if (!sock)
6cb153ca 1572 goto out_put;
89bddce5
SH
1573 iov.iov_base = buff;
1574 iov.iov_len = len;
1575 msg.msg_name = NULL;
1576 msg.msg_iov = &iov;
1577 msg.msg_iovlen = 1;
1578 msg.msg_control = NULL;
1579 msg.msg_controllen = 0;
1580 msg.msg_namelen = 0;
6cb153ca 1581 if (addr) {
1da177e4
LT
1582 err = move_addr_to_kernel(addr, addr_len, address);
1583 if (err < 0)
1584 goto out_put;
89bddce5
SH
1585 msg.msg_name = address;
1586 msg.msg_namelen = addr_len;
1da177e4
LT
1587 }
1588 if (sock->file->f_flags & O_NONBLOCK)
1589 flags |= MSG_DONTWAIT;
1590 msg.msg_flags = flags;
1591 err = sock_sendmsg(sock, &msg, len);
1592
89bddce5 1593out_put:
6cb153ca 1594 fput_light(sock_file, fput_needed);
1da177e4
LT
1595 return err;
1596}
1597
1598/*
89bddce5 1599 * Send a datagram down a socket.
1da177e4
LT
1600 */
1601
89bddce5 1602asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
1da177e4
LT
1603{
1604 return sys_sendto(fd, buff, len, flags, NULL, 0);
1605}
1606
1607/*
89bddce5 1608 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1609 * sender. We verify the buffers are writable and if needed move the
1610 * sender address from kernel to user space.
1611 */
1612
89bddce5
SH
1613asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1614 unsigned flags, struct sockaddr __user *addr,
1615 int __user *addr_len)
1da177e4
LT
1616{
1617 struct socket *sock;
1618 struct iovec iov;
1619 struct msghdr msg;
1620 char address[MAX_SOCK_ADDR];
89bddce5 1621 int err, err2;
6cb153ca
BL
1622 struct file *sock_file;
1623 int fput_needed;
1624
1625 sock_file = fget_light(fd, &fput_needed);
1626 if (!sock_file)
1627 return -EBADF;
1da177e4 1628
6cb153ca 1629 sock = sock_from_file(sock_file, &err);
1da177e4
LT
1630 if (!sock)
1631 goto out;
1632
89bddce5
SH
1633 msg.msg_control = NULL;
1634 msg.msg_controllen = 0;
1635 msg.msg_iovlen = 1;
1636 msg.msg_iov = &iov;
1637 iov.iov_len = size;
1638 iov.iov_base = ubuf;
1639 msg.msg_name = address;
1640 msg.msg_namelen = MAX_SOCK_ADDR;
1da177e4
LT
1641 if (sock->file->f_flags & O_NONBLOCK)
1642 flags |= MSG_DONTWAIT;
89bddce5 1643 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1644
89bddce5
SH
1645 if (err >= 0 && addr != NULL) {
1646 err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1647 if (err2 < 0)
1648 err = err2;
1da177e4 1649 }
1da177e4 1650out:
6cb153ca 1651 fput_light(sock_file, fput_needed);
1da177e4
LT
1652 return err;
1653}
1654
1655/*
89bddce5 1656 * Receive a datagram from a socket.
1da177e4
LT
1657 */
1658
89bddce5
SH
1659asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1660 unsigned flags)
1da177e4
LT
1661{
1662 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1663}
1664
1665/*
1666 * Set a socket option. Because we don't know the option lengths we have
1667 * to pass the user mode parameter for the protocols to sort out.
1668 */
1669
89bddce5
SH
1670asmlinkage long sys_setsockopt(int fd, int level, int optname,
1671 char __user *optval, int optlen)
1da177e4 1672{
6cb153ca 1673 int err, fput_needed;
1da177e4
LT
1674 struct socket *sock;
1675
1676 if (optlen < 0)
1677 return -EINVAL;
89bddce5
SH
1678
1679 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1680 if (sock != NULL) {
1681 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1682 if (err)
1683 goto out_put;
1da177e4
LT
1684
1685 if (level == SOL_SOCKET)
89bddce5
SH
1686 err =
1687 sock_setsockopt(sock, level, optname, optval,
1688 optlen);
1da177e4 1689 else
89bddce5
SH
1690 err =
1691 sock->ops->setsockopt(sock, level, optname, optval,
1692 optlen);
6cb153ca
BL
1693out_put:
1694 fput_light(sock->file, fput_needed);
1da177e4
LT
1695 }
1696 return err;
1697}
1698
1699/*
1700 * Get a socket option. Because we don't know the option lengths we have
1701 * to pass a user mode parameter for the protocols to sort out.
1702 */
1703
89bddce5
SH
1704asmlinkage long sys_getsockopt(int fd, int level, int optname,
1705 char __user *optval, int __user *optlen)
1da177e4 1706{
6cb153ca 1707 int err, fput_needed;
1da177e4
LT
1708 struct socket *sock;
1709
89bddce5
SH
1710 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1711 if (sock != NULL) {
6cb153ca
BL
1712 err = security_socket_getsockopt(sock, level, optname);
1713 if (err)
1714 goto out_put;
1da177e4
LT
1715
1716 if (level == SOL_SOCKET)
89bddce5
SH
1717 err =
1718 sock_getsockopt(sock, level, optname, optval,
1719 optlen);
1da177e4 1720 else
89bddce5
SH
1721 err =
1722 sock->ops->getsockopt(sock, level, optname, optval,
1723 optlen);
6cb153ca
BL
1724out_put:
1725 fput_light(sock->file, fput_needed);
1da177e4
LT
1726 }
1727 return err;
1728}
1729
1da177e4
LT
1730/*
1731 * Shutdown a socket.
1732 */
1733
1734asmlinkage long sys_shutdown(int fd, int how)
1735{
6cb153ca 1736 int err, fput_needed;
1da177e4
LT
1737 struct socket *sock;
1738
89bddce5
SH
1739 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1740 if (sock != NULL) {
1da177e4 1741 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1742 if (!err)
1743 err = sock->ops->shutdown(sock, how);
1744 fput_light(sock->file, fput_needed);
1da177e4
LT
1745 }
1746 return err;
1747}
1748
89bddce5 1749/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1750 * fields which are the same type (int / unsigned) on our platforms.
1751 */
1752#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1753#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1754#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1755
1da177e4
LT
1756/*
1757 * BSD sendmsg interface
1758 */
1759
1760asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1761{
89bddce5
SH
1762 struct compat_msghdr __user *msg_compat =
1763 (struct compat_msghdr __user *)msg;
1da177e4
LT
1764 struct socket *sock;
1765 char address[MAX_SOCK_ADDR];
1766 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1767 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1768 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1769 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1770 unsigned char *ctl_buf = ctl;
1771 struct msghdr msg_sys;
1772 int err, ctl_len, iov_size, total_len;
6cb153ca 1773 int fput_needed;
89bddce5 1774
1da177e4
LT
1775 err = -EFAULT;
1776 if (MSG_CMSG_COMPAT & flags) {
1777 if (get_compat_msghdr(&msg_sys, msg_compat))
1778 return -EFAULT;
89bddce5
SH
1779 }
1780 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1781 return -EFAULT;
1782
6cb153ca 1783 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1784 if (!sock)
1da177e4
LT
1785 goto out;
1786
1787 /* do not move before msg_sys is valid */
1788 err = -EMSGSIZE;
1789 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1790 goto out_put;
1791
89bddce5 1792 /* Check whether to allocate the iovec area */
1da177e4
LT
1793 err = -ENOMEM;
1794 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1795 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1796 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1797 if (!iov)
1798 goto out_put;
1799 }
1800
1801 /* This will also move the address data into kernel space */
1802 if (MSG_CMSG_COMPAT & flags) {
1803 err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
1804 } else
1805 err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
89bddce5 1806 if (err < 0)
1da177e4
LT
1807 goto out_freeiov;
1808 total_len = err;
1809
1810 err = -ENOBUFS;
1811
1812 if (msg_sys.msg_controllen > INT_MAX)
1813 goto out_freeiov;
89bddce5 1814 ctl_len = msg_sys.msg_controllen;
1da177e4 1815 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1816 err =
1817 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1818 sizeof(ctl));
1da177e4
LT
1819 if (err)
1820 goto out_freeiov;
1821 ctl_buf = msg_sys.msg_control;
8920e8f9 1822 ctl_len = msg_sys.msg_controllen;
1da177e4 1823 } else if (ctl_len) {
89bddce5 1824 if (ctl_len > sizeof(ctl)) {
1da177e4 1825 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1826 if (ctl_buf == NULL)
1da177e4
LT
1827 goto out_freeiov;
1828 }
1829 err = -EFAULT;
1830 /*
1831 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1832 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1833 * checking falls down on this.
1834 */
89bddce5
SH
1835 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1836 ctl_len))
1da177e4
LT
1837 goto out_freectl;
1838 msg_sys.msg_control = ctl_buf;
1839 }
1840 msg_sys.msg_flags = flags;
1841
1842 if (sock->file->f_flags & O_NONBLOCK)
1843 msg_sys.msg_flags |= MSG_DONTWAIT;
1844 err = sock_sendmsg(sock, &msg_sys, total_len);
1845
1846out_freectl:
89bddce5 1847 if (ctl_buf != ctl)
1da177e4
LT
1848 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1849out_freeiov:
1850 if (iov != iovstack)
1851 sock_kfree_s(sock->sk, iov, iov_size);
1852out_put:
6cb153ca 1853 fput_light(sock->file, fput_needed);
89bddce5 1854out:
1da177e4
LT
1855 return err;
1856}
1857
1858/*
1859 * BSD recvmsg interface
1860 */
1861
89bddce5
SH
1862asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
1863 unsigned int flags)
1da177e4 1864{
89bddce5
SH
1865 struct compat_msghdr __user *msg_compat =
1866 (struct compat_msghdr __user *)msg;
1da177e4
LT
1867 struct socket *sock;
1868 struct iovec iovstack[UIO_FASTIOV];
89bddce5 1869 struct iovec *iov = iovstack;
1da177e4
LT
1870 struct msghdr msg_sys;
1871 unsigned long cmsg_ptr;
1872 int err, iov_size, total_len, len;
6cb153ca 1873 int fput_needed;
1da177e4
LT
1874
1875 /* kernel mode address */
1876 char addr[MAX_SOCK_ADDR];
1877
1878 /* user mode address pointers */
1879 struct sockaddr __user *uaddr;
1880 int __user *uaddr_len;
89bddce5 1881
1da177e4
LT
1882 if (MSG_CMSG_COMPAT & flags) {
1883 if (get_compat_msghdr(&msg_sys, msg_compat))
1884 return -EFAULT;
89bddce5
SH
1885 }
1886 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1887 return -EFAULT;
1da177e4 1888
6cb153ca 1889 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1890 if (!sock)
1891 goto out;
1892
1893 err = -EMSGSIZE;
1894 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1895 goto out_put;
89bddce5
SH
1896
1897 /* Check whether to allocate the iovec area */
1da177e4
LT
1898 err = -ENOMEM;
1899 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1900 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1901 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1902 if (!iov)
1903 goto out_put;
1904 }
1905
1906 /*
89bddce5
SH
1907 * Save the user-mode address (verify_iovec will change the
1908 * kernel msghdr to use the kernel address space)
1da177e4 1909 */
89bddce5
SH
1910
1911 uaddr = (void __user *)msg_sys.msg_name;
1da177e4
LT
1912 uaddr_len = COMPAT_NAMELEN(msg);
1913 if (MSG_CMSG_COMPAT & flags) {
1914 err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1915 } else
1916 err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1917 if (err < 0)
1918 goto out_freeiov;
89bddce5 1919 total_len = err;
1da177e4
LT
1920
1921 cmsg_ptr = (unsigned long)msg_sys.msg_control;
1922 msg_sys.msg_flags = 0;
1923 if (MSG_CMSG_COMPAT & flags)
1924 msg_sys.msg_flags = MSG_CMSG_COMPAT;
89bddce5 1925
1da177e4
LT
1926 if (sock->file->f_flags & O_NONBLOCK)
1927 flags |= MSG_DONTWAIT;
1928 err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1929 if (err < 0)
1930 goto out_freeiov;
1931 len = err;
1932
1933 if (uaddr != NULL) {
89bddce5
SH
1934 err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
1935 uaddr_len);
1da177e4
LT
1936 if (err < 0)
1937 goto out_freeiov;
1938 }
37f7f421
DM
1939 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
1940 COMPAT_FLAGS(msg));
1da177e4
LT
1941 if (err)
1942 goto out_freeiov;
1943 if (MSG_CMSG_COMPAT & flags)
89bddce5 1944 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1945 &msg_compat->msg_controllen);
1946 else
89bddce5 1947 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1948 &msg->msg_controllen);
1949 if (err)
1950 goto out_freeiov;
1951 err = len;
1952
1953out_freeiov:
1954 if (iov != iovstack)
1955 sock_kfree_s(sock->sk, iov, iov_size);
1956out_put:
6cb153ca 1957 fput_light(sock->file, fput_needed);
1da177e4
LT
1958out:
1959 return err;
1960}
1961
1962#ifdef __ARCH_WANT_SYS_SOCKETCALL
1963
1964/* Argument list sizes for sys_socketcall */
1965#define AL(x) ((x) * sizeof(unsigned long))
89bddce5
SH
1966static const unsigned char nargs[18]={
1967 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1968 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1969 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
1970};
1971
1da177e4
LT
1972#undef AL
1973
1974/*
89bddce5 1975 * System call vectors.
1da177e4
LT
1976 *
1977 * Argument checking cleaned up. Saved 20% in size.
1978 * This function doesn't need to set the kernel lock because
89bddce5 1979 * it is set by the callees.
1da177e4
LT
1980 */
1981
1982asmlinkage long sys_socketcall(int call, unsigned long __user *args)
1983{
1984 unsigned long a[6];
89bddce5 1985 unsigned long a0, a1;
1da177e4
LT
1986 int err;
1987
89bddce5 1988 if (call < 1 || call > SYS_RECVMSG)
1da177e4
LT
1989 return -EINVAL;
1990
1991 /* copy_from_user should be SMP safe. */
1992 if (copy_from_user(a, args, nargs[call]))
1993 return -EFAULT;
3ec3b2fb 1994
89bddce5 1995 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb
DW
1996 if (err)
1997 return err;
1998
89bddce5
SH
1999 a0 = a[0];
2000 a1 = a[1];
2001
2002 switch (call) {
2003 case SYS_SOCKET:
2004 err = sys_socket(a0, a1, a[2]);
2005 break;
2006 case SYS_BIND:
2007 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2008 break;
2009 case SYS_CONNECT:
2010 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2011 break;
2012 case SYS_LISTEN:
2013 err = sys_listen(a0, a1);
2014 break;
2015 case SYS_ACCEPT:
2016 err =
2017 sys_accept(a0, (struct sockaddr __user *)a1,
2018 (int __user *)a[2]);
2019 break;
2020 case SYS_GETSOCKNAME:
2021 err =
2022 sys_getsockname(a0, (struct sockaddr __user *)a1,
2023 (int __user *)a[2]);
2024 break;
2025 case SYS_GETPEERNAME:
2026 err =
2027 sys_getpeername(a0, (struct sockaddr __user *)a1,
2028 (int __user *)a[2]);
2029 break;
2030 case SYS_SOCKETPAIR:
2031 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2032 break;
2033 case SYS_SEND:
2034 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2035 break;
2036 case SYS_SENDTO:
2037 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2038 (struct sockaddr __user *)a[4], a[5]);
2039 break;
2040 case SYS_RECV:
2041 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2042 break;
2043 case SYS_RECVFROM:
2044 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2045 (struct sockaddr __user *)a[4],
2046 (int __user *)a[5]);
2047 break;
2048 case SYS_SHUTDOWN:
2049 err = sys_shutdown(a0, a1);
2050 break;
2051 case SYS_SETSOCKOPT:
2052 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2053 break;
2054 case SYS_GETSOCKOPT:
2055 err =
2056 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2057 (int __user *)a[4]);
2058 break;
2059 case SYS_SENDMSG:
2060 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2061 break;
2062 case SYS_RECVMSG:
2063 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2064 break;
2065 default:
2066 err = -EINVAL;
2067 break;
1da177e4
LT
2068 }
2069 return err;
2070}
2071
89bddce5 2072#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2073
55737fda
SH
2074/**
2075 * sock_register - add a socket protocol handler
2076 * @ops: description of protocol
2077 *
1da177e4
LT
2078 * This function is called by a protocol handler that wants to
2079 * advertise its address family, and have it linked into the
55737fda
SH
2080 * socket interface. The value ops->family coresponds to the
2081 * socket system call protocol family.
1da177e4 2082 */
1da177e4
LT
2083int sock_register(struct net_proto_family *ops)
2084{
2085 int err;
2086
2087 if (ops->family >= NPROTO) {
89bddce5
SH
2088 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2089 NPROTO);
1da177e4
LT
2090 return -ENOBUFS;
2091 }
55737fda
SH
2092
2093 spin_lock(&net_family_lock);
2094 if (net_families[ops->family])
2095 err = -EEXIST;
2096 else {
89bddce5 2097 net_families[ops->family] = ops;
1da177e4
LT
2098 err = 0;
2099 }
55737fda
SH
2100 spin_unlock(&net_family_lock);
2101
89bddce5 2102 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2103 return err;
2104}
2105
55737fda
SH
2106/**
2107 * sock_unregister - remove a protocol handler
2108 * @family: protocol family to remove
2109 *
1da177e4
LT
2110 * This function is called by a protocol handler that wants to
2111 * remove its address family, and have it unlinked from the
55737fda
SH
2112 * new socket creation.
2113 *
2114 * If protocol handler is a module, then it can use module reference
2115 * counts to protect against new references. If protocol handler is not
2116 * a module then it needs to provide its own protection in
2117 * the ops->create routine.
1da177e4 2118 */
1da177e4
LT
2119int sock_unregister(int family)
2120{
2121 if (family < 0 || family >= NPROTO)
55737fda 2122 return -EINVAL;
1da177e4 2123
55737fda 2124 spin_lock(&net_family_lock);
89bddce5 2125 net_families[family] = NULL;
55737fda
SH
2126 spin_unlock(&net_family_lock);
2127
2128 synchronize_rcu();
2129
89bddce5 2130 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2131 return 0;
2132}
2133
77d76ea3 2134static int __init sock_init(void)
1da177e4
LT
2135{
2136 /*
89bddce5 2137 * Initialize sock SLAB cache.
1da177e4 2138 */
89bddce5 2139
1da177e4
LT
2140 sk_init();
2141
1da177e4 2142 /*
89bddce5 2143 * Initialize skbuff SLAB cache
1da177e4
LT
2144 */
2145 skb_init();
1da177e4
LT
2146
2147 /*
89bddce5 2148 * Initialize the protocols module.
1da177e4
LT
2149 */
2150
2151 init_inodecache();
2152 register_filesystem(&sock_fs_type);
2153 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2154
2155 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2156 */
2157
2158#ifdef CONFIG_NETFILTER
2159 netfilter_init();
2160#endif
cbeb321a
DM
2161
2162 return 0;
1da177e4
LT
2163}
2164
77d76ea3
AK
2165core_initcall(sock_init); /* early initcall */
2166
1da177e4
LT
2167#ifdef CONFIG_PROC_FS
2168void socket_seq_show(struct seq_file *seq)
2169{
2170 int cpu;
2171 int counter = 0;
2172
6f912042 2173 for_each_possible_cpu(cpu)
89bddce5 2174 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2175
2176 /* It can be negative, by the way. 8) */
2177 if (counter < 0)
2178 counter = 0;
2179
2180 seq_printf(seq, "sockets: used %d\n", counter);
2181}
89bddce5 2182#endif /* CONFIG_PROC_FS */
1da177e4 2183
89bbfc95
SP
2184#ifdef CONFIG_COMPAT
2185static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 2186 unsigned long arg)
89bbfc95
SP
2187{
2188 struct socket *sock = file->private_data;
2189 int ret = -ENOIOCTLCMD;
2190
2191 if (sock->ops->compat_ioctl)
2192 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2193
2194 return ret;
2195}
2196#endif
2197
ac5a488e
SS
2198int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2199{
2200 return sock->ops->bind(sock, addr, addrlen);
2201}
2202
2203int kernel_listen(struct socket *sock, int backlog)
2204{
2205 return sock->ops->listen(sock, backlog);
2206}
2207
2208int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2209{
2210 struct sock *sk = sock->sk;
2211 int err;
2212
2213 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2214 newsock);
2215 if (err < 0)
2216 goto done;
2217
2218 err = sock->ops->accept(sock, *newsock, flags);
2219 if (err < 0) {
2220 sock_release(*newsock);
2221 goto done;
2222 }
2223
2224 (*newsock)->ops = sock->ops;
2225
2226done:
2227 return err;
2228}
2229
2230int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
2231 int flags)
2232{
2233 return sock->ops->connect(sock, addr, addrlen, flags);
2234}
2235
2236int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2237 int *addrlen)
2238{
2239 return sock->ops->getname(sock, addr, addrlen, 0);
2240}
2241
2242int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2243 int *addrlen)
2244{
2245 return sock->ops->getname(sock, addr, addrlen, 1);
2246}
2247
2248int kernel_getsockopt(struct socket *sock, int level, int optname,
2249 char *optval, int *optlen)
2250{
2251 mm_segment_t oldfs = get_fs();
2252 int err;
2253
2254 set_fs(KERNEL_DS);
2255 if (level == SOL_SOCKET)
2256 err = sock_getsockopt(sock, level, optname, optval, optlen);
2257 else
2258 err = sock->ops->getsockopt(sock, level, optname, optval,
2259 optlen);
2260 set_fs(oldfs);
2261 return err;
2262}
2263
2264int kernel_setsockopt(struct socket *sock, int level, int optname,
2265 char *optval, int optlen)
2266{
2267 mm_segment_t oldfs = get_fs();
2268 int err;
2269
2270 set_fs(KERNEL_DS);
2271 if (level == SOL_SOCKET)
2272 err = sock_setsockopt(sock, level, optname, optval, optlen);
2273 else
2274 err = sock->ops->setsockopt(sock, level, optname, optval,
2275 optlen);
2276 set_fs(oldfs);
2277 return err;
2278}
2279
2280int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2281 size_t size, int flags)
2282{
2283 if (sock->ops->sendpage)
2284 return sock->ops->sendpage(sock, page, offset, size, flags);
2285
2286 return sock_no_sendpage(sock, page, offset, size, flags);
2287}
2288
2289int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2290{
2291 mm_segment_t oldfs = get_fs();
2292 int err;
2293
2294 set_fs(KERNEL_DS);
2295 err = sock->ops->ioctl(sock, cmd, arg);
2296 set_fs(oldfs);
2297
2298 return err;
2299}
2300
1da177e4
LT
2301/* ABI emulation layers need these two */
2302EXPORT_SYMBOL(move_addr_to_kernel);
2303EXPORT_SYMBOL(move_addr_to_user);
2304EXPORT_SYMBOL(sock_create);
2305EXPORT_SYMBOL(sock_create_kern);
2306EXPORT_SYMBOL(sock_create_lite);
2307EXPORT_SYMBOL(sock_map_fd);
2308EXPORT_SYMBOL(sock_recvmsg);
2309EXPORT_SYMBOL(sock_register);
2310EXPORT_SYMBOL(sock_release);
2311EXPORT_SYMBOL(sock_sendmsg);
2312EXPORT_SYMBOL(sock_unregister);
2313EXPORT_SYMBOL(sock_wake_async);
2314EXPORT_SYMBOL(sockfd_lookup);
2315EXPORT_SYMBOL(kernel_sendmsg);
2316EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
2317EXPORT_SYMBOL(kernel_bind);
2318EXPORT_SYMBOL(kernel_listen);
2319EXPORT_SYMBOL(kernel_accept);
2320EXPORT_SYMBOL(kernel_connect);
2321EXPORT_SYMBOL(kernel_getsockname);
2322EXPORT_SYMBOL(kernel_getpeername);
2323EXPORT_SYMBOL(kernel_getsockopt);
2324EXPORT_SYMBOL(kernel_setsockopt);
2325EXPORT_SYMBOL(kernel_sendpage);
2326EXPORT_SYMBOL(kernel_sock_ioctl);