[NET]: drop unused elements from net_proto_family
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4
LT
61#include <linux/mm.h>
62#include <linux/smp_lock.h>
63#include <linux/socket.h>
64#include <linux/file.h>
65#include <linux/net.h>
66#include <linux/interrupt.h>
67#include <linux/netdevice.h>
68#include <linux/proc_fs.h>
69#include <linux/seq_file.h>
4a3e2f71 70#include <linux/mutex.h>
1da177e4
LT
71#include <linux/wanrouter.h>
72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
1da177e4
LT
75#include <linux/init.h>
76#include <linux/poll.h>
77#include <linux/cache.h>
78#include <linux/module.h>
79#include <linux/highmem.h>
80#include <linux/divert.h>
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1da177e4
LT
88
89#include <asm/uaccess.h>
90#include <asm/unistd.h>
91
92#include <net/compat.h>
93
94#include <net/sock.h>
95#include <linux/netfilter.h>
96
97static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
98static ssize_t sock_aio_read(struct kiocb *iocb, char __user *buf,
89bddce5 99 size_t size, loff_t pos);
1da177e4 100static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *buf,
89bddce5
SH
101 size_t size, loff_t pos);
102static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
103
104static int sock_close(struct inode *inode, struct file *file);
105static unsigned int sock_poll(struct file *file,
106 struct poll_table_struct *wait);
89bddce5 107static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
108#ifdef CONFIG_COMPAT
109static long compat_sock_ioctl(struct file *file,
89bddce5 110 unsigned int cmd, unsigned long arg);
89bbfc95 111#endif
1da177e4
LT
112static int sock_fasync(int fd, struct file *filp, int on);
113static ssize_t sock_readv(struct file *file, const struct iovec *vector,
114 unsigned long count, loff_t *ppos);
115static ssize_t sock_writev(struct file *file, const struct iovec *vector,
89bddce5 116 unsigned long count, loff_t *ppos);
1da177e4
LT
117static ssize_t sock_sendpage(struct file *file, struct page *page,
118 int offset, size_t size, loff_t *ppos, int more);
119
1da177e4
LT
120/*
121 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
122 * in the operation structures but are done directly via the socketcall() multiplexor.
123 */
124
125static struct file_operations socket_file_ops = {
126 .owner = THIS_MODULE,
127 .llseek = no_llseek,
128 .aio_read = sock_aio_read,
129 .aio_write = sock_aio_write,
130 .poll = sock_poll,
131 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
132#ifdef CONFIG_COMPAT
133 .compat_ioctl = compat_sock_ioctl,
134#endif
1da177e4
LT
135 .mmap = sock_mmap,
136 .open = sock_no_open, /* special open code to disallow open via /proc */
137 .release = sock_close,
138 .fasync = sock_fasync,
139 .readv = sock_readv,
140 .writev = sock_writev,
5274f052
JA
141 .sendpage = sock_sendpage,
142 .splice_write = generic_splice_sendpage,
1da177e4
LT
143};
144
145/*
146 * The protocol list. Each protocol is registered in here.
147 */
148
149static struct net_proto_family *net_families[NPROTO];
150
151#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
152static atomic_t net_family_lockct = ATOMIC_INIT(0);
153static DEFINE_SPINLOCK(net_family_lock);
154
155/* The strategy is: modifications net_family vector are short, do not
156 sleep and veeery rare, but read access should be free of any exclusive
157 locks.
158 */
159
160static void net_family_write_lock(void)
161{
162 spin_lock(&net_family_lock);
163 while (atomic_read(&net_family_lockct) != 0) {
164 spin_unlock(&net_family_lock);
165
166 yield();
167
168 spin_lock(&net_family_lock);
169 }
170}
171
172static __inline__ void net_family_write_unlock(void)
173{
174 spin_unlock(&net_family_lock);
175}
176
177static __inline__ void net_family_read_lock(void)
178{
179 atomic_inc(&net_family_lockct);
180 spin_unlock_wait(&net_family_lock);
181}
182
183static __inline__ void net_family_read_unlock(void)
184{
185 atomic_dec(&net_family_lockct);
186}
187
188#else
189#define net_family_write_lock() do { } while(0)
190#define net_family_write_unlock() do { } while(0)
191#define net_family_read_lock() do { } while(0)
192#define net_family_read_unlock() do { } while(0)
193#endif
194
1da177e4
LT
195/*
196 * Statistics counters of the socket lists
197 */
198
199static DEFINE_PER_CPU(int, sockets_in_use) = 0;
200
201/*
89bddce5
SH
202 * Support routines.
203 * Move socket addresses back and forth across the kernel/user
204 * divide and look after the messy bits.
1da177e4
LT
205 */
206
89bddce5 207#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
208 16 for IP, 16 for IPX,
209 24 for IPv6,
89bddce5 210 about 80 for AX.25
1da177e4
LT
211 must be at least one bigger than
212 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 213 :unix_mkname()).
1da177e4 214 */
89bddce5 215
1da177e4
LT
216/**
217 * move_addr_to_kernel - copy a socket address into kernel space
218 * @uaddr: Address in user space
219 * @kaddr: Address in kernel space
220 * @ulen: Length in user space
221 *
222 * The address is copied into kernel space. If the provided address is
223 * too long an error code of -EINVAL is returned. If the copy gives
224 * invalid addresses -EFAULT is returned. On a success 0 is returned.
225 */
226
227int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
228{
89bddce5 229 if (ulen < 0 || ulen > MAX_SOCK_ADDR)
1da177e4 230 return -EINVAL;
89bddce5 231 if (ulen == 0)
1da177e4 232 return 0;
89bddce5 233 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 234 return -EFAULT;
3ec3b2fb 235 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
236}
237
238/**
239 * move_addr_to_user - copy an address to user space
240 * @kaddr: kernel space address
241 * @klen: length of address in kernel
242 * @uaddr: user space address
243 * @ulen: pointer to user length field
244 *
245 * The value pointed to by ulen on entry is the buffer length available.
246 * This is overwritten with the buffer space used. -EINVAL is returned
247 * if an overlong buffer is specified or a negative buffer size. -EFAULT
248 * is returned if either the buffer or the length field are not
249 * accessible.
250 * After copying the data up to the limit the user specifies, the true
251 * length of the data is written over the length limit the user
252 * specified. Zero is returned for a success.
253 */
89bddce5
SH
254
255int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
256 int __user *ulen)
1da177e4
LT
257{
258 int err;
259 int len;
260
89bddce5
SH
261 err = get_user(len, ulen);
262 if (err)
1da177e4 263 return err;
89bddce5
SH
264 if (len > klen)
265 len = klen;
266 if (len < 0 || len > MAX_SOCK_ADDR)
1da177e4 267 return -EINVAL;
89bddce5 268 if (len) {
d6fe3945
SG
269 if (audit_sockaddr(klen, kaddr))
270 return -ENOMEM;
89bddce5 271 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
272 return -EFAULT;
273 }
274 /*
89bddce5
SH
275 * "fromlen shall refer to the value before truncation.."
276 * 1003.1g
1da177e4
LT
277 */
278 return __put_user(klen, ulen);
279}
280
281#define SOCKFS_MAGIC 0x534F434B
282
89bddce5 283static kmem_cache_t *sock_inode_cachep __read_mostly;
1da177e4
LT
284
285static struct inode *sock_alloc_inode(struct super_block *sb)
286{
287 struct socket_alloc *ei;
89bddce5
SH
288
289 ei = kmem_cache_alloc(sock_inode_cachep, SLAB_KERNEL);
1da177e4
LT
290 if (!ei)
291 return NULL;
292 init_waitqueue_head(&ei->socket.wait);
89bddce5 293
1da177e4
LT
294 ei->socket.fasync_list = NULL;
295 ei->socket.state = SS_UNCONNECTED;
296 ei->socket.flags = 0;
297 ei->socket.ops = NULL;
298 ei->socket.sk = NULL;
299 ei->socket.file = NULL;
300 ei->socket.flags = 0;
301
302 return &ei->vfs_inode;
303}
304
305static void sock_destroy_inode(struct inode *inode)
306{
307 kmem_cache_free(sock_inode_cachep,
308 container_of(inode, struct socket_alloc, vfs_inode));
309}
310
89bddce5 311static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
1da177e4 312{
89bddce5 313 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 314
89bddce5
SH
315 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR))
316 == SLAB_CTOR_CONSTRUCTOR)
1da177e4
LT
317 inode_init_once(&ei->vfs_inode);
318}
89bddce5 319
1da177e4
LT
320static int init_inodecache(void)
321{
322 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
323 sizeof(struct socket_alloc),
324 0,
325 (SLAB_HWCACHE_ALIGN |
326 SLAB_RECLAIM_ACCOUNT |
327 SLAB_MEM_SPREAD),
328 init_once,
329 NULL);
1da177e4
LT
330 if (sock_inode_cachep == NULL)
331 return -ENOMEM;
332 return 0;
333}
334
335static struct super_operations sockfs_ops = {
336 .alloc_inode = sock_alloc_inode,
337 .destroy_inode =sock_destroy_inode,
338 .statfs = simple_statfs,
339};
340
454e2398 341static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
342 int flags, const char *dev_name, void *data,
343 struct vfsmount *mnt)
1da177e4 344{
454e2398
DH
345 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
346 mnt);
1da177e4
LT
347}
348
ba89966c 349static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
350
351static struct file_system_type sock_fs_type = {
352 .name = "sockfs",
353 .get_sb = sockfs_get_sb,
354 .kill_sb = kill_anon_super,
355};
89bddce5 356
1da177e4
LT
357static int sockfs_delete_dentry(struct dentry *dentry)
358{
359 return 1;
360}
361static struct dentry_operations sockfs_dentry_operations = {
89bddce5 362 .d_delete = sockfs_delete_dentry,
1da177e4
LT
363};
364
365/*
366 * Obtains the first available file descriptor and sets it up for use.
367 *
39d8c1b6
DM
368 * These functions create file structures and maps them to fd space
369 * of the current process. On success it returns file descriptor
1da177e4
LT
370 * and file struct implicitly stored in sock->file.
371 * Note that another thread may close file descriptor before we return
372 * from this function. We use the fact that now we do not refer
373 * to socket after mapping. If one day we will need it, this
374 * function will increment ref. count on file by 1.
375 *
376 * In any case returned fd MAY BE not valid!
377 * This race condition is unavoidable
378 * with shared fd spaces, we cannot solve it inside kernel,
379 * but we take care of internal coherence yet.
380 */
381
39d8c1b6 382static int sock_alloc_fd(struct file **filep)
1da177e4
LT
383{
384 int fd;
1da177e4
LT
385
386 fd = get_unused_fd();
39d8c1b6 387 if (likely(fd >= 0)) {
1da177e4
LT
388 struct file *file = get_empty_filp();
389
39d8c1b6
DM
390 *filep = file;
391 if (unlikely(!file)) {
1da177e4 392 put_unused_fd(fd);
39d8c1b6 393 return -ENFILE;
1da177e4 394 }
39d8c1b6
DM
395 } else
396 *filep = NULL;
397 return fd;
398}
1da177e4 399
39d8c1b6
DM
400static int sock_attach_fd(struct socket *sock, struct file *file)
401{
402 struct qstr this;
403 char name[32];
404
405 this.len = sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino);
406 this.name = name;
407 this.hash = SOCK_INODE(sock)->i_ino;
408
409 file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);
410 if (unlikely(!file->f_dentry))
411 return -ENOMEM;
412
413 file->f_dentry->d_op = &sockfs_dentry_operations;
414 d_add(file->f_dentry, SOCK_INODE(sock));
415 file->f_vfsmnt = mntget(sock_mnt);
416 file->f_mapping = file->f_dentry->d_inode->i_mapping;
417
418 sock->file = file;
419 file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
420 file->f_mode = FMODE_READ | FMODE_WRITE;
421 file->f_flags = O_RDWR;
422 file->f_pos = 0;
423 file->private_data = sock;
1da177e4 424
39d8c1b6
DM
425 return 0;
426}
427
428int sock_map_fd(struct socket *sock)
429{
430 struct file *newfile;
431 int fd = sock_alloc_fd(&newfile);
432
433 if (likely(fd >= 0)) {
434 int err = sock_attach_fd(sock, newfile);
435
436 if (unlikely(err < 0)) {
437 put_filp(newfile);
1da177e4 438 put_unused_fd(fd);
39d8c1b6 439 return err;
1da177e4 440 }
39d8c1b6 441 fd_install(fd, newfile);
1da177e4 442 }
1da177e4
LT
443 return fd;
444}
445
6cb153ca
BL
446static struct socket *sock_from_file(struct file *file, int *err)
447{
448 struct inode *inode;
449 struct socket *sock;
450
451 if (file->f_op == &socket_file_ops)
452 return file->private_data; /* set in sock_map_fd */
453
454 inode = file->f_dentry->d_inode;
455 if (!S_ISSOCK(inode->i_mode)) {
456 *err = -ENOTSOCK;
457 return NULL;
458 }
459
460 sock = SOCKET_I(inode);
461 if (sock->file != file) {
462 printk(KERN_ERR "socki_lookup: socket file changed!\n");
463 sock->file = file;
464 }
465 return sock;
466}
467
1da177e4
LT
468/**
469 * sockfd_lookup - Go from a file number to its socket slot
470 * @fd: file handle
471 * @err: pointer to an error code return
472 *
473 * The file handle passed in is locked and the socket it is bound
474 * too is returned. If an error occurs the err pointer is overwritten
475 * with a negative errno code and NULL is returned. The function checks
476 * for both invalid handles and passing a handle which is not a socket.
477 *
478 * On a success the socket object pointer is returned.
479 */
480
481struct socket *sockfd_lookup(int fd, int *err)
482{
483 struct file *file;
1da177e4
LT
484 struct socket *sock;
485
89bddce5
SH
486 file = fget(fd);
487 if (!file) {
1da177e4
LT
488 *err = -EBADF;
489 return NULL;
490 }
89bddce5 491
6cb153ca
BL
492 sock = sock_from_file(file, err);
493 if (!sock)
1da177e4 494 fput(file);
6cb153ca
BL
495 return sock;
496}
1da177e4 497
6cb153ca
BL
498static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
499{
500 struct file *file;
501 struct socket *sock;
502
3672558c 503 *err = -EBADF;
6cb153ca
BL
504 file = fget_light(fd, fput_needed);
505 if (file) {
506 sock = sock_from_file(file, err);
507 if (sock)
508 return sock;
509 fput_light(file, *fput_needed);
1da177e4 510 }
6cb153ca 511 return NULL;
1da177e4
LT
512}
513
514/**
515 * sock_alloc - allocate a socket
89bddce5 516 *
1da177e4
LT
517 * Allocate a new inode and socket object. The two are bound together
518 * and initialised. The socket is then returned. If we are out of inodes
519 * NULL is returned.
520 */
521
522static struct socket *sock_alloc(void)
523{
89bddce5
SH
524 struct inode *inode;
525 struct socket *sock;
1da177e4
LT
526
527 inode = new_inode(sock_mnt->mnt_sb);
528 if (!inode)
529 return NULL;
530
531 sock = SOCKET_I(inode);
532
89bddce5 533 inode->i_mode = S_IFSOCK | S_IRWXUGO;
1da177e4
LT
534 inode->i_uid = current->fsuid;
535 inode->i_gid = current->fsgid;
536
537 get_cpu_var(sockets_in_use)++;
538 put_cpu_var(sockets_in_use);
539 return sock;
540}
541
542/*
543 * In theory you can't get an open on this inode, but /proc provides
544 * a back door. Remember to keep it shut otherwise you'll let the
545 * creepy crawlies in.
546 */
89bddce5 547
1da177e4
LT
548static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
549{
550 return -ENXIO;
551}
552
4b6f5d20 553const struct file_operations bad_sock_fops = {
1da177e4
LT
554 .owner = THIS_MODULE,
555 .open = sock_no_open,
556};
557
558/**
559 * sock_release - close a socket
560 * @sock: socket to close
561 *
562 * The socket is released from the protocol stack if it has a release
563 * callback, and the inode is then released if the socket is bound to
89bddce5 564 * an inode not a file.
1da177e4 565 */
89bddce5 566
1da177e4
LT
567void sock_release(struct socket *sock)
568{
569 if (sock->ops) {
570 struct module *owner = sock->ops->owner;
571
572 sock->ops->release(sock);
573 sock->ops = NULL;
574 module_put(owner);
575 }
576
577 if (sock->fasync_list)
578 printk(KERN_ERR "sock_release: fasync list not empty!\n");
579
580 get_cpu_var(sockets_in_use)--;
581 put_cpu_var(sockets_in_use);
582 if (!sock->file) {
583 iput(SOCK_INODE(sock));
584 return;
585 }
89bddce5 586 sock->file = NULL;
1da177e4
LT
587}
588
89bddce5 589static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
590 struct msghdr *msg, size_t size)
591{
592 struct sock_iocb *si = kiocb_to_siocb(iocb);
593 int err;
594
595 si->sock = sock;
596 si->scm = NULL;
597 si->msg = msg;
598 si->size = size;
599
600 err = security_socket_sendmsg(sock, msg, size);
601 if (err)
602 return err;
603
604 return sock->ops->sendmsg(iocb, sock, msg, size);
605}
606
607int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
608{
609 struct kiocb iocb;
610 struct sock_iocb siocb;
611 int ret;
612
613 init_sync_kiocb(&iocb, NULL);
614 iocb.private = &siocb;
615 ret = __sock_sendmsg(&iocb, sock, msg, size);
616 if (-EIOCBQUEUED == ret)
617 ret = wait_on_sync_kiocb(&iocb);
618 return ret;
619}
620
621int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
622 struct kvec *vec, size_t num, size_t size)
623{
624 mm_segment_t oldfs = get_fs();
625 int result;
626
627 set_fs(KERNEL_DS);
628 /*
629 * the following is safe, since for compiler definitions of kvec and
630 * iovec are identical, yielding the same in-core layout and alignment
631 */
89bddce5 632 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
633 msg->msg_iovlen = num;
634 result = sock_sendmsg(sock, msg, size);
635 set_fs(oldfs);
636 return result;
637}
638
89bddce5 639static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
640 struct msghdr *msg, size_t size, int flags)
641{
642 int err;
643 struct sock_iocb *si = kiocb_to_siocb(iocb);
644
645 si->sock = sock;
646 si->scm = NULL;
647 si->msg = msg;
648 si->size = size;
649 si->flags = flags;
650
651 err = security_socket_recvmsg(sock, msg, size, flags);
652 if (err)
653 return err;
654
655 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
656}
657
89bddce5 658int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
659 size_t size, int flags)
660{
661 struct kiocb iocb;
662 struct sock_iocb siocb;
663 int ret;
664
89bddce5 665 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
666 iocb.private = &siocb;
667 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
668 if (-EIOCBQUEUED == ret)
669 ret = wait_on_sync_kiocb(&iocb);
670 return ret;
671}
672
89bddce5
SH
673int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
674 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
675{
676 mm_segment_t oldfs = get_fs();
677 int result;
678
679 set_fs(KERNEL_DS);
680 /*
681 * the following is safe, since for compiler definitions of kvec and
682 * iovec are identical, yielding the same in-core layout and alignment
683 */
89bddce5 684 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
685 result = sock_recvmsg(sock, msg, size, flags);
686 set_fs(oldfs);
687 return result;
688}
689
690static void sock_aio_dtor(struct kiocb *iocb)
691{
692 kfree(iocb->private);
693}
694
ce1d4d3e
CH
695static ssize_t sock_sendpage(struct file *file, struct page *page,
696 int offset, size_t size, loff_t *ppos, int more)
1da177e4 697{
1da177e4
LT
698 struct socket *sock;
699 int flags;
700
ce1d4d3e
CH
701 sock = file->private_data;
702
703 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
704 if (more)
705 flags |= MSG_MORE;
706
707 return sock->ops->sendpage(sock, page, offset, size, flags);
708}
1da177e4 709
ce1d4d3e 710static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5
SH
711 char __user *ubuf, size_t size,
712 struct sock_iocb *siocb)
ce1d4d3e
CH
713{
714 if (!is_sync_kiocb(iocb)) {
715 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
716 if (!siocb)
717 return NULL;
1da177e4
LT
718 iocb->ki_dtor = sock_aio_dtor;
719 }
1da177e4 720
ce1d4d3e
CH
721 siocb->kiocb = iocb;
722 siocb->async_iov.iov_base = ubuf;
723 siocb->async_iov.iov_len = size;
1da177e4 724
ce1d4d3e
CH
725 iocb->private = siocb;
726 return siocb;
1da177e4
LT
727}
728
ce1d4d3e 729static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
89bddce5
SH
730 struct file *file, struct iovec *iov,
731 unsigned long nr_segs)
ce1d4d3e
CH
732{
733 struct socket *sock = file->private_data;
734 size_t size = 0;
735 int i;
1da177e4 736
89bddce5
SH
737 for (i = 0; i < nr_segs; i++)
738 size += iov[i].iov_len;
1da177e4 739
ce1d4d3e
CH
740 msg->msg_name = NULL;
741 msg->msg_namelen = 0;
742 msg->msg_control = NULL;
743 msg->msg_controllen = 0;
89bddce5 744 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
745 msg->msg_iovlen = nr_segs;
746 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
747
748 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
749}
750
751static ssize_t sock_readv(struct file *file, const struct iovec *iov,
752 unsigned long nr_segs, loff_t *ppos)
1da177e4 753{
ce1d4d3e
CH
754 struct kiocb iocb;
755 struct sock_iocb siocb;
756 struct msghdr msg;
757 int ret;
758
89bddce5 759 init_sync_kiocb(&iocb, NULL);
ce1d4d3e
CH
760 iocb.private = &siocb;
761
762 ret = do_sock_read(&msg, &iocb, file, (struct iovec *)iov, nr_segs);
763 if (-EIOCBQUEUED == ret)
764 ret = wait_on_sync_kiocb(&iocb);
765 return ret;
766}
767
768static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf,
89bddce5 769 size_t count, loff_t pos)
ce1d4d3e
CH
770{
771 struct sock_iocb siocb, *x;
772
1da177e4
LT
773 if (pos != 0)
774 return -ESPIPE;
ce1d4d3e 775 if (count == 0) /* Match SYS5 behaviour */
1da177e4
LT
776 return 0;
777
ce1d4d3e
CH
778 x = alloc_sock_iocb(iocb, ubuf, count, &siocb);
779 if (!x)
780 return -ENOMEM;
781 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp,
89bddce5 782 &x->async_iov, 1);
1da177e4
LT
783}
784
ce1d4d3e 785static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
89bddce5
SH
786 struct file *file, struct iovec *iov,
787 unsigned long nr_segs)
1da177e4 788{
ce1d4d3e
CH
789 struct socket *sock = file->private_data;
790 size_t size = 0;
791 int i;
1da177e4 792
89bddce5
SH
793 for (i = 0; i < nr_segs; i++)
794 size += iov[i].iov_len;
1da177e4 795
ce1d4d3e
CH
796 msg->msg_name = NULL;
797 msg->msg_namelen = 0;
798 msg->msg_control = NULL;
799 msg->msg_controllen = 0;
89bddce5 800 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
801 msg->msg_iovlen = nr_segs;
802 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
803 if (sock->type == SOCK_SEQPACKET)
804 msg->msg_flags |= MSG_EOR;
1da177e4 805
ce1d4d3e 806 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
807}
808
ce1d4d3e
CH
809static ssize_t sock_writev(struct file *file, const struct iovec *iov,
810 unsigned long nr_segs, loff_t *ppos)
1da177e4
LT
811{
812 struct msghdr msg;
ce1d4d3e
CH
813 struct kiocb iocb;
814 struct sock_iocb siocb;
815 int ret;
1da177e4 816
ce1d4d3e
CH
817 init_sync_kiocb(&iocb, NULL);
818 iocb.private = &siocb;
1da177e4 819
ce1d4d3e
CH
820 ret = do_sock_write(&msg, &iocb, file, (struct iovec *)iov, nr_segs);
821 if (-EIOCBQUEUED == ret)
822 ret = wait_on_sync_kiocb(&iocb);
823 return ret;
824}
1da177e4 825
ce1d4d3e 826static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf,
89bddce5 827 size_t count, loff_t pos)
ce1d4d3e
CH
828{
829 struct sock_iocb siocb, *x;
1da177e4 830
ce1d4d3e
CH
831 if (pos != 0)
832 return -ESPIPE;
833 if (count == 0) /* Match SYS5 behaviour */
834 return 0;
1da177e4 835
ce1d4d3e
CH
836 x = alloc_sock_iocb(iocb, (void __user *)ubuf, count, &siocb);
837 if (!x)
838 return -ENOMEM;
1da177e4 839
ce1d4d3e 840 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp,
89bddce5 841 &x->async_iov, 1);
1da177e4
LT
842}
843
1da177e4
LT
844/*
845 * Atomic setting of ioctl hooks to avoid race
846 * with module unload.
847 */
848
4a3e2f71 849static DEFINE_MUTEX(br_ioctl_mutex);
89bddce5 850static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL;
1da177e4 851
89bddce5 852void brioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 853{
4a3e2f71 854 mutex_lock(&br_ioctl_mutex);
1da177e4 855 br_ioctl_hook = hook;
4a3e2f71 856 mutex_unlock(&br_ioctl_mutex);
1da177e4 857}
89bddce5 858
1da177e4
LT
859EXPORT_SYMBOL(brioctl_set);
860
4a3e2f71 861static DEFINE_MUTEX(vlan_ioctl_mutex);
89bddce5 862static int (*vlan_ioctl_hook) (void __user *arg);
1da177e4 863
89bddce5 864void vlan_ioctl_set(int (*hook) (void __user *))
1da177e4 865{
4a3e2f71 866 mutex_lock(&vlan_ioctl_mutex);
1da177e4 867 vlan_ioctl_hook = hook;
4a3e2f71 868 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 869}
89bddce5 870
1da177e4
LT
871EXPORT_SYMBOL(vlan_ioctl_set);
872
4a3e2f71 873static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 874static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 875
89bddce5 876void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 877{
4a3e2f71 878 mutex_lock(&dlci_ioctl_mutex);
1da177e4 879 dlci_ioctl_hook = hook;
4a3e2f71 880 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 881}
89bddce5 882
1da177e4
LT
883EXPORT_SYMBOL(dlci_ioctl_set);
884
885/*
886 * With an ioctl, arg may well be a user mode pointer, but we don't know
887 * what to do with it - that's up to the protocol still.
888 */
889
890static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
891{
892 struct socket *sock;
893 void __user *argp = (void __user *)arg;
894 int pid, err;
895
b69aee04 896 sock = file->private_data;
1da177e4
LT
897 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
898 err = dev_ioctl(cmd, argp);
899 } else
d86b5e0e 900#ifdef CONFIG_WIRELESS_EXT
1da177e4
LT
901 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
902 err = dev_ioctl(cmd, argp);
903 } else
89bddce5
SH
904#endif /* CONFIG_WIRELESS_EXT */
905 switch (cmd) {
1da177e4
LT
906 case FIOSETOWN:
907 case SIOCSPGRP:
908 err = -EFAULT;
909 if (get_user(pid, (int __user *)argp))
910 break;
911 err = f_setown(sock->file, pid, 1);
912 break;
913 case FIOGETOWN:
914 case SIOCGPGRP:
89bddce5
SH
915 err = put_user(sock->file->f_owner.pid,
916 (int __user *)argp);
1da177e4
LT
917 break;
918 case SIOCGIFBR:
919 case SIOCSIFBR:
920 case SIOCBRADDBR:
921 case SIOCBRDELBR:
922 err = -ENOPKG;
923 if (!br_ioctl_hook)
924 request_module("bridge");
925
4a3e2f71 926 mutex_lock(&br_ioctl_mutex);
89bddce5 927 if (br_ioctl_hook)
1da177e4 928 err = br_ioctl_hook(cmd, argp);
4a3e2f71 929 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
930 break;
931 case SIOCGIFVLAN:
932 case SIOCSIFVLAN:
933 err = -ENOPKG;
934 if (!vlan_ioctl_hook)
935 request_module("8021q");
936
4a3e2f71 937 mutex_lock(&vlan_ioctl_mutex);
1da177e4
LT
938 if (vlan_ioctl_hook)
939 err = vlan_ioctl_hook(argp);
4a3e2f71 940 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
941 break;
942 case SIOCGIFDIVERT:
943 case SIOCSIFDIVERT:
89bddce5 944 /* Convert this to call through a hook */
1da177e4
LT
945 err = divert_ioctl(cmd, argp);
946 break;
947 case SIOCADDDLCI:
948 case SIOCDELDLCI:
949 err = -ENOPKG;
950 if (!dlci_ioctl_hook)
951 request_module("dlci");
952
953 if (dlci_ioctl_hook) {
4a3e2f71 954 mutex_lock(&dlci_ioctl_mutex);
1da177e4 955 err = dlci_ioctl_hook(cmd, argp);
4a3e2f71 956 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
957 }
958 break;
959 default:
960 err = sock->ops->ioctl(sock, cmd, arg);
b5e5fa5e
CH
961
962 /*
963 * If this ioctl is unknown try to hand it down
964 * to the NIC driver.
965 */
966 if (err == -ENOIOCTLCMD)
967 err = dev_ioctl(cmd, argp);
1da177e4 968 break;
89bddce5 969 }
1da177e4
LT
970 return err;
971}
972
973int sock_create_lite(int family, int type, int protocol, struct socket **res)
974{
975 int err;
976 struct socket *sock = NULL;
89bddce5 977
1da177e4
LT
978 err = security_socket_create(family, type, protocol, 1);
979 if (err)
980 goto out;
981
982 sock = sock_alloc();
983 if (!sock) {
984 err = -ENOMEM;
985 goto out;
986 }
987
1da177e4 988 sock->type = type;
7420ed23
VY
989 err = security_socket_post_create(sock, family, type, protocol, 1);
990 if (err)
991 goto out_release;
992
1da177e4
LT
993out:
994 *res = sock;
995 return err;
7420ed23
VY
996out_release:
997 sock_release(sock);
998 sock = NULL;
999 goto out;
1da177e4
LT
1000}
1001
1002/* No kernel lock held - perfect */
89bddce5 1003static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
1004{
1005 struct socket *sock;
1006
1007 /*
89bddce5 1008 * We can't return errors to poll, so it's either yes or no.
1da177e4 1009 */
b69aee04 1010 sock = file->private_data;
1da177e4
LT
1011 return sock->ops->poll(file, sock, wait);
1012}
1013
89bddce5 1014static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1015{
b69aee04 1016 struct socket *sock = file->private_data;
1da177e4
LT
1017
1018 return sock->ops->mmap(file, sock, vma);
1019}
1020
20380731 1021static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
1022{
1023 /*
89bddce5
SH
1024 * It was possible the inode is NULL we were
1025 * closing an unfinished socket.
1da177e4
LT
1026 */
1027
89bddce5 1028 if (!inode) {
1da177e4
LT
1029 printk(KERN_DEBUG "sock_close: NULL inode\n");
1030 return 0;
1031 }
1032 sock_fasync(-1, filp, 0);
1033 sock_release(SOCKET_I(inode));
1034 return 0;
1035}
1036
1037/*
1038 * Update the socket async list
1039 *
1040 * Fasync_list locking strategy.
1041 *
1042 * 1. fasync_list is modified only under process context socket lock
1043 * i.e. under semaphore.
1044 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
1045 * or under socket lock.
1046 * 3. fasync_list can be used from softirq context, so that
1047 * modification under socket lock have to be enhanced with
1048 * write_lock_bh(&sk->sk_callback_lock).
1049 * --ANK (990710)
1050 */
1051
1052static int sock_fasync(int fd, struct file *filp, int on)
1053{
89bddce5 1054 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
1055 struct socket *sock;
1056 struct sock *sk;
1057
89bddce5 1058 if (on) {
8b3a7005 1059 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 1060 if (fna == NULL)
1da177e4
LT
1061 return -ENOMEM;
1062 }
1063
b69aee04 1064 sock = filp->private_data;
1da177e4 1065
89bddce5
SH
1066 sk = sock->sk;
1067 if (sk == NULL) {
1da177e4
LT
1068 kfree(fna);
1069 return -EINVAL;
1070 }
1071
1072 lock_sock(sk);
1073
89bddce5 1074 prev = &(sock->fasync_list);
1da177e4 1075
89bddce5
SH
1076 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1077 if (fa->fa_file == filp)
1da177e4
LT
1078 break;
1079
89bddce5
SH
1080 if (on) {
1081 if (fa != NULL) {
1da177e4 1082 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1083 fa->fa_fd = fd;
1da177e4
LT
1084 write_unlock_bh(&sk->sk_callback_lock);
1085
1086 kfree(fna);
1087 goto out;
1088 }
89bddce5
SH
1089 fna->fa_file = filp;
1090 fna->fa_fd = fd;
1091 fna->magic = FASYNC_MAGIC;
1092 fna->fa_next = sock->fasync_list;
1da177e4 1093 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1094 sock->fasync_list = fna;
1da177e4 1095 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1096 } else {
1097 if (fa != NULL) {
1da177e4 1098 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1099 *prev = fa->fa_next;
1da177e4
LT
1100 write_unlock_bh(&sk->sk_callback_lock);
1101 kfree(fa);
1102 }
1103 }
1104
1105out:
1106 release_sock(sock->sk);
1107 return 0;
1108}
1109
1110/* This function may be called only under socket lock or callback_lock */
1111
1112int sock_wake_async(struct socket *sock, int how, int band)
1113{
1114 if (!sock || !sock->fasync_list)
1115 return -1;
89bddce5 1116 switch (how) {
1da177e4 1117 case 1:
89bddce5 1118
1da177e4
LT
1119 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1120 break;
1121 goto call_kill;
1122 case 2:
1123 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1124 break;
1125 /* fall through */
1126 case 0:
89bddce5 1127call_kill:
1da177e4
LT
1128 __kill_fasync(sock->fasync_list, SIGIO, band);
1129 break;
1130 case 3:
1131 __kill_fasync(sock->fasync_list, SIGURG, band);
1132 }
1133 return 0;
1134}
1135
89bddce5
SH
1136static int __sock_create(int family, int type, int protocol,
1137 struct socket **res, int kern)
1da177e4
LT
1138{
1139 int err;
1140 struct socket *sock;
1141
1142 /*
89bddce5 1143 * Check protocol is in range
1da177e4
LT
1144 */
1145 if (family < 0 || family >= NPROTO)
1146 return -EAFNOSUPPORT;
1147 if (type < 0 || type >= SOCK_MAX)
1148 return -EINVAL;
1149
1150 /* Compatibility.
1151
1152 This uglymoron is moved from INET layer to here to avoid
1153 deadlock in module load.
1154 */
1155 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1156 static int warned;
1da177e4
LT
1157 if (!warned) {
1158 warned = 1;
89bddce5
SH
1159 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1160 current->comm);
1da177e4
LT
1161 }
1162 family = PF_PACKET;
1163 }
1164
1165 err = security_socket_create(family, type, protocol, kern);
1166 if (err)
1167 return err;
89bddce5 1168
1da177e4 1169#if defined(CONFIG_KMOD)
89bddce5
SH
1170 /* Attempt to load a protocol module if the find failed.
1171 *
1172 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1173 * requested real, full-featured networking support upon configuration.
1174 * Otherwise module support will break!
1175 */
89bddce5
SH
1176 if (net_families[family] == NULL) {
1177 request_module("net-pf-%d", family);
1da177e4
LT
1178 }
1179#endif
1180
1181 net_family_read_lock();
1182 if (net_families[family] == NULL) {
1183 err = -EAFNOSUPPORT;
1184 goto out;
1185 }
1186
1187/*
1188 * Allocate the socket and allow the family to set things up. if
1189 * the protocol is 0, the family is instructed to select an appropriate
1190 * default.
1191 */
1192
1193 if (!(sock = sock_alloc())) {
5991c844
AM
1194 if (net_ratelimit())
1195 printk(KERN_WARNING "socket: no more sockets\n");
89bddce5
SH
1196 err = -ENFILE; /* Not exactly a match, but its the
1197 closest posix thing */
1da177e4
LT
1198 goto out;
1199 }
1200
89bddce5 1201 sock->type = type;
1da177e4
LT
1202
1203 /*
1204 * We will call the ->create function, that possibly is in a loadable
1205 * module, so we have to bump that loadable module refcnt first.
1206 */
1207 err = -EAFNOSUPPORT;
1208 if (!try_module_get(net_families[family]->owner))
1209 goto out_release;
1210
a79af59e
FF
1211 if ((err = net_families[family]->create(sock, protocol)) < 0) {
1212 sock->ops = NULL;
1da177e4 1213 goto out_module_put;
a79af59e
FF
1214 }
1215
1da177e4
LT
1216 /*
1217 * Now to bump the refcnt of the [loadable] module that owns this
1218 * socket at sock_release time we decrement its refcnt.
1219 */
1220 if (!try_module_get(sock->ops->owner)) {
1221 sock->ops = NULL;
1222 goto out_module_put;
1223 }
1224 /*
1225 * Now that we're done with the ->create function, the [loadable]
1226 * module can have its refcnt decremented
1227 */
1228 module_put(net_families[family]->owner);
1229 *res = sock;
7420ed23
VY
1230 err = security_socket_post_create(sock, family, type, protocol, kern);
1231 if (err)
1232 goto out_release;
1da177e4
LT
1233
1234out:
1235 net_family_read_unlock();
1236 return err;
1237out_module_put:
1238 module_put(net_families[family]->owner);
1239out_release:
1240 sock_release(sock);
1241 goto out;
1242}
1243
1244int sock_create(int family, int type, int protocol, struct socket **res)
1245{
1246 return __sock_create(family, type, protocol, res, 0);
1247}
1248
1249int sock_create_kern(int family, int type, int protocol, struct socket **res)
1250{
1251 return __sock_create(family, type, protocol, res, 1);
1252}
1253
1254asmlinkage long sys_socket(int family, int type, int protocol)
1255{
1256 int retval;
1257 struct socket *sock;
1258
1259 retval = sock_create(family, type, protocol, &sock);
1260 if (retval < 0)
1261 goto out;
1262
1263 retval = sock_map_fd(sock);
1264 if (retval < 0)
1265 goto out_release;
1266
1267out:
1268 /* It may be already another descriptor 8) Not kernel problem. */
1269 return retval;
1270
1271out_release:
1272 sock_release(sock);
1273 return retval;
1274}
1275
1276/*
1277 * Create a pair of connected sockets.
1278 */
1279
89bddce5
SH
1280asmlinkage long sys_socketpair(int family, int type, int protocol,
1281 int __user *usockvec)
1da177e4
LT
1282{
1283 struct socket *sock1, *sock2;
1284 int fd1, fd2, err;
1285
1286 /*
1287 * Obtain the first socket and check if the underlying protocol
1288 * supports the socketpair call.
1289 */
1290
1291 err = sock_create(family, type, protocol, &sock1);
1292 if (err < 0)
1293 goto out;
1294
1295 err = sock_create(family, type, protocol, &sock2);
1296 if (err < 0)
1297 goto out_release_1;
1298
1299 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1300 if (err < 0)
1da177e4
LT
1301 goto out_release_both;
1302
1303 fd1 = fd2 = -1;
1304
1305 err = sock_map_fd(sock1);
1306 if (err < 0)
1307 goto out_release_both;
1308 fd1 = err;
1309
1310 err = sock_map_fd(sock2);
1311 if (err < 0)
1312 goto out_close_1;
1313 fd2 = err;
1314
1315 /* fd1 and fd2 may be already another descriptors.
1316 * Not kernel problem.
1317 */
1318
89bddce5 1319 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1320 if (!err)
1321 err = put_user(fd2, &usockvec[1]);
1322 if (!err)
1323 return 0;
1324
1325 sys_close(fd2);
1326 sys_close(fd1);
1327 return err;
1328
1329out_close_1:
89bddce5 1330 sock_release(sock2);
1da177e4
LT
1331 sys_close(fd1);
1332 return err;
1333
1334out_release_both:
89bddce5 1335 sock_release(sock2);
1da177e4 1336out_release_1:
89bddce5 1337 sock_release(sock1);
1da177e4
LT
1338out:
1339 return err;
1340}
1341
1da177e4
LT
1342/*
1343 * Bind a name to a socket. Nothing much to do here since it's
1344 * the protocol's responsibility to handle the local address.
1345 *
1346 * We move the socket address to kernel space before we call
1347 * the protocol layer (having also checked the address is ok).
1348 */
1349
1350asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1351{
1352 struct socket *sock;
1353 char address[MAX_SOCK_ADDR];
6cb153ca 1354 int err, fput_needed;
1da177e4 1355
89bddce5
SH
1356 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1357 if(sock) {
1358 err = move_addr_to_kernel(umyaddr, addrlen, address);
1359 if (err >= 0) {
1360 err = security_socket_bind(sock,
1361 (struct sockaddr *)address,
1362 addrlen);
6cb153ca
BL
1363 if (!err)
1364 err = sock->ops->bind(sock,
89bddce5
SH
1365 (struct sockaddr *)
1366 address, addrlen);
1da177e4 1367 }
6cb153ca 1368 fput_light(sock->file, fput_needed);
89bddce5 1369 }
1da177e4
LT
1370 return err;
1371}
1372
1da177e4
LT
1373/*
1374 * Perform a listen. Basically, we allow the protocol to do anything
1375 * necessary for a listen, and if that works, we mark the socket as
1376 * ready for listening.
1377 */
1378
1379int sysctl_somaxconn = SOMAXCONN;
1380
1381asmlinkage long sys_listen(int fd, int backlog)
1382{
1383 struct socket *sock;
6cb153ca 1384 int err, fput_needed;
89bddce5
SH
1385
1386 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1387 if (sock) {
1388 if ((unsigned)backlog > sysctl_somaxconn)
1da177e4
LT
1389 backlog = sysctl_somaxconn;
1390
1391 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1392 if (!err)
1393 err = sock->ops->listen(sock, backlog);
1da177e4 1394
6cb153ca 1395 fput_light(sock->file, fput_needed);
1da177e4
LT
1396 }
1397 return err;
1398}
1399
1da177e4
LT
1400/*
1401 * For accept, we attempt to create a new socket, set up the link
1402 * with the client, wake up the client, then return the new
1403 * connected fd. We collect the address of the connector in kernel
1404 * space and move it to user at the very end. This is unclean because
1405 * we open the socket then return an error.
1406 *
1407 * 1003.1g adds the ability to recvmsg() to query connection pending
1408 * status to recvmsg. We need to add that support in a way thats
1409 * clean when we restucture accept also.
1410 */
1411
89bddce5
SH
1412asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1413 int __user *upeer_addrlen)
1da177e4
LT
1414{
1415 struct socket *sock, *newsock;
39d8c1b6 1416 struct file *newfile;
6cb153ca 1417 int err, len, newfd, fput_needed;
1da177e4
LT
1418 char address[MAX_SOCK_ADDR];
1419
6cb153ca 1420 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1421 if (!sock)
1422 goto out;
1423
1424 err = -ENFILE;
89bddce5 1425 if (!(newsock = sock_alloc()))
1da177e4
LT
1426 goto out_put;
1427
1428 newsock->type = sock->type;
1429 newsock->ops = sock->ops;
1430
1da177e4
LT
1431 /*
1432 * We don't need try_module_get here, as the listening socket (sock)
1433 * has the protocol module (sock->ops->owner) held.
1434 */
1435 __module_get(newsock->ops->owner);
1436
39d8c1b6
DM
1437 newfd = sock_alloc_fd(&newfile);
1438 if (unlikely(newfd < 0)) {
1439 err = newfd;
9a1875e6
DM
1440 sock_release(newsock);
1441 goto out_put;
39d8c1b6
DM
1442 }
1443
1444 err = sock_attach_fd(newsock, newfile);
1445 if (err < 0)
1446 goto out_fd;
1447
a79af59e
FF
1448 err = security_socket_accept(sock, newsock);
1449 if (err)
39d8c1b6 1450 goto out_fd;
a79af59e 1451
1da177e4
LT
1452 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1453 if (err < 0)
39d8c1b6 1454 goto out_fd;
1da177e4
LT
1455
1456 if (upeer_sockaddr) {
89bddce5
SH
1457 if (newsock->ops->getname(newsock, (struct sockaddr *)address,
1458 &len, 2) < 0) {
1da177e4 1459 err = -ECONNABORTED;
39d8c1b6 1460 goto out_fd;
1da177e4 1461 }
89bddce5
SH
1462 err = move_addr_to_user(address, len, upeer_sockaddr,
1463 upeer_addrlen);
1da177e4 1464 if (err < 0)
39d8c1b6 1465 goto out_fd;
1da177e4
LT
1466 }
1467
1468 /* File flags are not inherited via accept() unlike another OSes. */
1469
39d8c1b6
DM
1470 fd_install(newfd, newfile);
1471 err = newfd;
1da177e4
LT
1472
1473 security_socket_post_accept(sock, newsock);
1474
1475out_put:
6cb153ca 1476 fput_light(sock->file, fput_needed);
1da177e4
LT
1477out:
1478 return err;
39d8c1b6 1479out_fd:
9606a216 1480 fput(newfile);
39d8c1b6 1481 put_unused_fd(newfd);
1da177e4
LT
1482 goto out_put;
1483}
1484
1da177e4
LT
1485/*
1486 * Attempt to connect to a socket with the server address. The address
1487 * is in user space so we verify it is OK and move it to kernel space.
1488 *
1489 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1490 * break bindings
1491 *
1492 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1493 * other SEQPACKET protocols that take time to connect() as it doesn't
1494 * include the -EINPROGRESS status for such sockets.
1495 */
1496
89bddce5
SH
1497asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
1498 int addrlen)
1da177e4
LT
1499{
1500 struct socket *sock;
1501 char address[MAX_SOCK_ADDR];
6cb153ca 1502 int err, fput_needed;
1da177e4 1503
6cb153ca 1504 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1505 if (!sock)
1506 goto out;
1507 err = move_addr_to_kernel(uservaddr, addrlen, address);
1508 if (err < 0)
1509 goto out_put;
1510
89bddce5
SH
1511 err =
1512 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4
LT
1513 if (err)
1514 goto out_put;
1515
89bddce5 1516 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
1da177e4
LT
1517 sock->file->f_flags);
1518out_put:
6cb153ca 1519 fput_light(sock->file, fput_needed);
1da177e4
LT
1520out:
1521 return err;
1522}
1523
1524/*
1525 * Get the local address ('name') of a socket object. Move the obtained
1526 * name to user space.
1527 */
1528
89bddce5
SH
1529asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1530 int __user *usockaddr_len)
1da177e4
LT
1531{
1532 struct socket *sock;
1533 char address[MAX_SOCK_ADDR];
6cb153ca 1534 int len, err, fput_needed;
89bddce5 1535
6cb153ca 1536 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1537 if (!sock)
1538 goto out;
1539
1540 err = security_socket_getsockname(sock);
1541 if (err)
1542 goto out_put;
1543
1544 err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
1545 if (err)
1546 goto out_put;
1547 err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
1548
1549out_put:
6cb153ca 1550 fput_light(sock->file, fput_needed);
1da177e4
LT
1551out:
1552 return err;
1553}
1554
1555/*
1556 * Get the remote address ('name') of a socket object. Move the obtained
1557 * name to user space.
1558 */
1559
89bddce5
SH
1560asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1561 int __user *usockaddr_len)
1da177e4
LT
1562{
1563 struct socket *sock;
1564 char address[MAX_SOCK_ADDR];
6cb153ca 1565 int len, err, fput_needed;
1da177e4 1566
89bddce5
SH
1567 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1568 if (sock != NULL) {
1da177e4
LT
1569 err = security_socket_getpeername(sock);
1570 if (err) {
6cb153ca 1571 fput_light(sock->file, fput_needed);
1da177e4
LT
1572 return err;
1573 }
1574
89bddce5
SH
1575 err =
1576 sock->ops->getname(sock, (struct sockaddr *)address, &len,
1577 1);
1da177e4 1578 if (!err)
89bddce5
SH
1579 err = move_addr_to_user(address, len, usockaddr,
1580 usockaddr_len);
6cb153ca 1581 fput_light(sock->file, fput_needed);
1da177e4
LT
1582 }
1583 return err;
1584}
1585
1586/*
1587 * Send a datagram to a given address. We move the address into kernel
1588 * space and check the user space data area is readable before invoking
1589 * the protocol.
1590 */
1591
89bddce5
SH
1592asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1593 unsigned flags, struct sockaddr __user *addr,
1594 int addr_len)
1da177e4
LT
1595{
1596 struct socket *sock;
1597 char address[MAX_SOCK_ADDR];
1598 int err;
1599 struct msghdr msg;
1600 struct iovec iov;
6cb153ca
BL
1601 int fput_needed;
1602 struct file *sock_file;
1603
1604 sock_file = fget_light(fd, &fput_needed);
1605 if (!sock_file)
1606 return -EBADF;
1607
1608 sock = sock_from_file(sock_file, &err);
1da177e4 1609 if (!sock)
6cb153ca 1610 goto out_put;
89bddce5
SH
1611 iov.iov_base = buff;
1612 iov.iov_len = len;
1613 msg.msg_name = NULL;
1614 msg.msg_iov = &iov;
1615 msg.msg_iovlen = 1;
1616 msg.msg_control = NULL;
1617 msg.msg_controllen = 0;
1618 msg.msg_namelen = 0;
6cb153ca 1619 if (addr) {
1da177e4
LT
1620 err = move_addr_to_kernel(addr, addr_len, address);
1621 if (err < 0)
1622 goto out_put;
89bddce5
SH
1623 msg.msg_name = address;
1624 msg.msg_namelen = addr_len;
1da177e4
LT
1625 }
1626 if (sock->file->f_flags & O_NONBLOCK)
1627 flags |= MSG_DONTWAIT;
1628 msg.msg_flags = flags;
1629 err = sock_sendmsg(sock, &msg, len);
1630
89bddce5 1631out_put:
6cb153ca 1632 fput_light(sock_file, fput_needed);
1da177e4
LT
1633 return err;
1634}
1635
1636/*
89bddce5 1637 * Send a datagram down a socket.
1da177e4
LT
1638 */
1639
89bddce5 1640asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
1da177e4
LT
1641{
1642 return sys_sendto(fd, buff, len, flags, NULL, 0);
1643}
1644
1645/*
89bddce5 1646 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1647 * sender. We verify the buffers are writable and if needed move the
1648 * sender address from kernel to user space.
1649 */
1650
89bddce5
SH
1651asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1652 unsigned flags, struct sockaddr __user *addr,
1653 int __user *addr_len)
1da177e4
LT
1654{
1655 struct socket *sock;
1656 struct iovec iov;
1657 struct msghdr msg;
1658 char address[MAX_SOCK_ADDR];
89bddce5 1659 int err, err2;
6cb153ca
BL
1660 struct file *sock_file;
1661 int fput_needed;
1662
1663 sock_file = fget_light(fd, &fput_needed);
1664 if (!sock_file)
1665 return -EBADF;
1da177e4 1666
6cb153ca 1667 sock = sock_from_file(sock_file, &err);
1da177e4
LT
1668 if (!sock)
1669 goto out;
1670
89bddce5
SH
1671 msg.msg_control = NULL;
1672 msg.msg_controllen = 0;
1673 msg.msg_iovlen = 1;
1674 msg.msg_iov = &iov;
1675 iov.iov_len = size;
1676 iov.iov_base = ubuf;
1677 msg.msg_name = address;
1678 msg.msg_namelen = MAX_SOCK_ADDR;
1da177e4
LT
1679 if (sock->file->f_flags & O_NONBLOCK)
1680 flags |= MSG_DONTWAIT;
89bddce5 1681 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1682
89bddce5
SH
1683 if (err >= 0 && addr != NULL) {
1684 err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1685 if (err2 < 0)
1686 err = err2;
1da177e4 1687 }
1da177e4 1688out:
6cb153ca 1689 fput_light(sock_file, fput_needed);
1da177e4
LT
1690 return err;
1691}
1692
1693/*
89bddce5 1694 * Receive a datagram from a socket.
1da177e4
LT
1695 */
1696
89bddce5
SH
1697asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1698 unsigned flags)
1da177e4
LT
1699{
1700 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1701}
1702
1703/*
1704 * Set a socket option. Because we don't know the option lengths we have
1705 * to pass the user mode parameter for the protocols to sort out.
1706 */
1707
89bddce5
SH
1708asmlinkage long sys_setsockopt(int fd, int level, int optname,
1709 char __user *optval, int optlen)
1da177e4 1710{
6cb153ca 1711 int err, fput_needed;
1da177e4
LT
1712 struct socket *sock;
1713
1714 if (optlen < 0)
1715 return -EINVAL;
89bddce5
SH
1716
1717 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1718 if (sock != NULL) {
1719 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1720 if (err)
1721 goto out_put;
1da177e4
LT
1722
1723 if (level == SOL_SOCKET)
89bddce5
SH
1724 err =
1725 sock_setsockopt(sock, level, optname, optval,
1726 optlen);
1da177e4 1727 else
89bddce5
SH
1728 err =
1729 sock->ops->setsockopt(sock, level, optname, optval,
1730 optlen);
6cb153ca
BL
1731out_put:
1732 fput_light(sock->file, fput_needed);
1da177e4
LT
1733 }
1734 return err;
1735}
1736
1737/*
1738 * Get a socket option. Because we don't know the option lengths we have
1739 * to pass a user mode parameter for the protocols to sort out.
1740 */
1741
89bddce5
SH
1742asmlinkage long sys_getsockopt(int fd, int level, int optname,
1743 char __user *optval, int __user *optlen)
1da177e4 1744{
6cb153ca 1745 int err, fput_needed;
1da177e4
LT
1746 struct socket *sock;
1747
89bddce5
SH
1748 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1749 if (sock != NULL) {
6cb153ca
BL
1750 err = security_socket_getsockopt(sock, level, optname);
1751 if (err)
1752 goto out_put;
1da177e4
LT
1753
1754 if (level == SOL_SOCKET)
89bddce5
SH
1755 err =
1756 sock_getsockopt(sock, level, optname, optval,
1757 optlen);
1da177e4 1758 else
89bddce5
SH
1759 err =
1760 sock->ops->getsockopt(sock, level, optname, optval,
1761 optlen);
6cb153ca
BL
1762out_put:
1763 fput_light(sock->file, fput_needed);
1da177e4
LT
1764 }
1765 return err;
1766}
1767
1da177e4
LT
1768/*
1769 * Shutdown a socket.
1770 */
1771
1772asmlinkage long sys_shutdown(int fd, int how)
1773{
6cb153ca 1774 int err, fput_needed;
1da177e4
LT
1775 struct socket *sock;
1776
89bddce5
SH
1777 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1778 if (sock != NULL) {
1da177e4 1779 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1780 if (!err)
1781 err = sock->ops->shutdown(sock, how);
1782 fput_light(sock->file, fput_needed);
1da177e4
LT
1783 }
1784 return err;
1785}
1786
89bddce5 1787/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1788 * fields which are the same type (int / unsigned) on our platforms.
1789 */
1790#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1791#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1792#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1793
1da177e4
LT
1794/*
1795 * BSD sendmsg interface
1796 */
1797
1798asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1799{
89bddce5
SH
1800 struct compat_msghdr __user *msg_compat =
1801 (struct compat_msghdr __user *)msg;
1da177e4
LT
1802 struct socket *sock;
1803 char address[MAX_SOCK_ADDR];
1804 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1805 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1806 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1807 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1808 unsigned char *ctl_buf = ctl;
1809 struct msghdr msg_sys;
1810 int err, ctl_len, iov_size, total_len;
6cb153ca 1811 int fput_needed;
89bddce5 1812
1da177e4
LT
1813 err = -EFAULT;
1814 if (MSG_CMSG_COMPAT & flags) {
1815 if (get_compat_msghdr(&msg_sys, msg_compat))
1816 return -EFAULT;
89bddce5
SH
1817 }
1818 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1819 return -EFAULT;
1820
6cb153ca 1821 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1822 if (!sock)
1da177e4
LT
1823 goto out;
1824
1825 /* do not move before msg_sys is valid */
1826 err = -EMSGSIZE;
1827 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1828 goto out_put;
1829
89bddce5 1830 /* Check whether to allocate the iovec area */
1da177e4
LT
1831 err = -ENOMEM;
1832 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1833 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1834 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1835 if (!iov)
1836 goto out_put;
1837 }
1838
1839 /* This will also move the address data into kernel space */
1840 if (MSG_CMSG_COMPAT & flags) {
1841 err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
1842 } else
1843 err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
89bddce5 1844 if (err < 0)
1da177e4
LT
1845 goto out_freeiov;
1846 total_len = err;
1847
1848 err = -ENOBUFS;
1849
1850 if (msg_sys.msg_controllen > INT_MAX)
1851 goto out_freeiov;
89bddce5 1852 ctl_len = msg_sys.msg_controllen;
1da177e4 1853 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1854 err =
1855 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1856 sizeof(ctl));
1da177e4
LT
1857 if (err)
1858 goto out_freeiov;
1859 ctl_buf = msg_sys.msg_control;
8920e8f9 1860 ctl_len = msg_sys.msg_controllen;
1da177e4 1861 } else if (ctl_len) {
89bddce5 1862 if (ctl_len > sizeof(ctl)) {
1da177e4 1863 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1864 if (ctl_buf == NULL)
1da177e4
LT
1865 goto out_freeiov;
1866 }
1867 err = -EFAULT;
1868 /*
1869 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1870 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1871 * checking falls down on this.
1872 */
89bddce5
SH
1873 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1874 ctl_len))
1da177e4
LT
1875 goto out_freectl;
1876 msg_sys.msg_control = ctl_buf;
1877 }
1878 msg_sys.msg_flags = flags;
1879
1880 if (sock->file->f_flags & O_NONBLOCK)
1881 msg_sys.msg_flags |= MSG_DONTWAIT;
1882 err = sock_sendmsg(sock, &msg_sys, total_len);
1883
1884out_freectl:
89bddce5 1885 if (ctl_buf != ctl)
1da177e4
LT
1886 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1887out_freeiov:
1888 if (iov != iovstack)
1889 sock_kfree_s(sock->sk, iov, iov_size);
1890out_put:
6cb153ca 1891 fput_light(sock->file, fput_needed);
89bddce5 1892out:
1da177e4
LT
1893 return err;
1894}
1895
1896/*
1897 * BSD recvmsg interface
1898 */
1899
89bddce5
SH
1900asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
1901 unsigned int flags)
1da177e4 1902{
89bddce5
SH
1903 struct compat_msghdr __user *msg_compat =
1904 (struct compat_msghdr __user *)msg;
1da177e4
LT
1905 struct socket *sock;
1906 struct iovec iovstack[UIO_FASTIOV];
89bddce5 1907 struct iovec *iov = iovstack;
1da177e4
LT
1908 struct msghdr msg_sys;
1909 unsigned long cmsg_ptr;
1910 int err, iov_size, total_len, len;
6cb153ca 1911 int fput_needed;
1da177e4
LT
1912
1913 /* kernel mode address */
1914 char addr[MAX_SOCK_ADDR];
1915
1916 /* user mode address pointers */
1917 struct sockaddr __user *uaddr;
1918 int __user *uaddr_len;
89bddce5 1919
1da177e4
LT
1920 if (MSG_CMSG_COMPAT & flags) {
1921 if (get_compat_msghdr(&msg_sys, msg_compat))
1922 return -EFAULT;
89bddce5
SH
1923 }
1924 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1925 return -EFAULT;
1da177e4 1926
6cb153ca 1927 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1928 if (!sock)
1929 goto out;
1930
1931 err = -EMSGSIZE;
1932 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1933 goto out_put;
89bddce5
SH
1934
1935 /* Check whether to allocate the iovec area */
1da177e4
LT
1936 err = -ENOMEM;
1937 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1938 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1939 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1940 if (!iov)
1941 goto out_put;
1942 }
1943
1944 /*
89bddce5
SH
1945 * Save the user-mode address (verify_iovec will change the
1946 * kernel msghdr to use the kernel address space)
1da177e4 1947 */
89bddce5
SH
1948
1949 uaddr = (void __user *)msg_sys.msg_name;
1da177e4
LT
1950 uaddr_len = COMPAT_NAMELEN(msg);
1951 if (MSG_CMSG_COMPAT & flags) {
1952 err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1953 } else
1954 err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1955 if (err < 0)
1956 goto out_freeiov;
89bddce5 1957 total_len = err;
1da177e4
LT
1958
1959 cmsg_ptr = (unsigned long)msg_sys.msg_control;
1960 msg_sys.msg_flags = 0;
1961 if (MSG_CMSG_COMPAT & flags)
1962 msg_sys.msg_flags = MSG_CMSG_COMPAT;
89bddce5 1963
1da177e4
LT
1964 if (sock->file->f_flags & O_NONBLOCK)
1965 flags |= MSG_DONTWAIT;
1966 err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1967 if (err < 0)
1968 goto out_freeiov;
1969 len = err;
1970
1971 if (uaddr != NULL) {
89bddce5
SH
1972 err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
1973 uaddr_len);
1da177e4
LT
1974 if (err < 0)
1975 goto out_freeiov;
1976 }
37f7f421
DM
1977 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
1978 COMPAT_FLAGS(msg));
1da177e4
LT
1979 if (err)
1980 goto out_freeiov;
1981 if (MSG_CMSG_COMPAT & flags)
89bddce5 1982 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1983 &msg_compat->msg_controllen);
1984 else
89bddce5 1985 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1986 &msg->msg_controllen);
1987 if (err)
1988 goto out_freeiov;
1989 err = len;
1990
1991out_freeiov:
1992 if (iov != iovstack)
1993 sock_kfree_s(sock->sk, iov, iov_size);
1994out_put:
6cb153ca 1995 fput_light(sock->file, fput_needed);
1da177e4
LT
1996out:
1997 return err;
1998}
1999
2000#ifdef __ARCH_WANT_SYS_SOCKETCALL
2001
2002/* Argument list sizes for sys_socketcall */
2003#define AL(x) ((x) * sizeof(unsigned long))
89bddce5
SH
2004static const unsigned char nargs[18]={
2005 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
2006 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
2007 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
2008};
2009
1da177e4
LT
2010#undef AL
2011
2012/*
89bddce5 2013 * System call vectors.
1da177e4
LT
2014 *
2015 * Argument checking cleaned up. Saved 20% in size.
2016 * This function doesn't need to set the kernel lock because
89bddce5 2017 * it is set by the callees.
1da177e4
LT
2018 */
2019
2020asmlinkage long sys_socketcall(int call, unsigned long __user *args)
2021{
2022 unsigned long a[6];
89bddce5 2023 unsigned long a0, a1;
1da177e4
LT
2024 int err;
2025
89bddce5 2026 if (call < 1 || call > SYS_RECVMSG)
1da177e4
LT
2027 return -EINVAL;
2028
2029 /* copy_from_user should be SMP safe. */
2030 if (copy_from_user(a, args, nargs[call]))
2031 return -EFAULT;
3ec3b2fb 2032
89bddce5 2033 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb
DW
2034 if (err)
2035 return err;
2036
89bddce5
SH
2037 a0 = a[0];
2038 a1 = a[1];
2039
2040 switch (call) {
2041 case SYS_SOCKET:
2042 err = sys_socket(a0, a1, a[2]);
2043 break;
2044 case SYS_BIND:
2045 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2046 break;
2047 case SYS_CONNECT:
2048 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2049 break;
2050 case SYS_LISTEN:
2051 err = sys_listen(a0, a1);
2052 break;
2053 case SYS_ACCEPT:
2054 err =
2055 sys_accept(a0, (struct sockaddr __user *)a1,
2056 (int __user *)a[2]);
2057 break;
2058 case SYS_GETSOCKNAME:
2059 err =
2060 sys_getsockname(a0, (struct sockaddr __user *)a1,
2061 (int __user *)a[2]);
2062 break;
2063 case SYS_GETPEERNAME:
2064 err =
2065 sys_getpeername(a0, (struct sockaddr __user *)a1,
2066 (int __user *)a[2]);
2067 break;
2068 case SYS_SOCKETPAIR:
2069 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2070 break;
2071 case SYS_SEND:
2072 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2073 break;
2074 case SYS_SENDTO:
2075 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2076 (struct sockaddr __user *)a[4], a[5]);
2077 break;
2078 case SYS_RECV:
2079 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2080 break;
2081 case SYS_RECVFROM:
2082 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2083 (struct sockaddr __user *)a[4],
2084 (int __user *)a[5]);
2085 break;
2086 case SYS_SHUTDOWN:
2087 err = sys_shutdown(a0, a1);
2088 break;
2089 case SYS_SETSOCKOPT:
2090 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2091 break;
2092 case SYS_GETSOCKOPT:
2093 err =
2094 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2095 (int __user *)a[4]);
2096 break;
2097 case SYS_SENDMSG:
2098 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2099 break;
2100 case SYS_RECVMSG:
2101 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2102 break;
2103 default:
2104 err = -EINVAL;
2105 break;
1da177e4
LT
2106 }
2107 return err;
2108}
2109
89bddce5 2110#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4
LT
2111
2112/*
2113 * This function is called by a protocol handler that wants to
2114 * advertise its address family, and have it linked into the
2115 * SOCKET module.
2116 */
2117
2118int sock_register(struct net_proto_family *ops)
2119{
2120 int err;
2121
2122 if (ops->family >= NPROTO) {
89bddce5
SH
2123 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2124 NPROTO);
1da177e4
LT
2125 return -ENOBUFS;
2126 }
2127 net_family_write_lock();
2128 err = -EEXIST;
2129 if (net_families[ops->family] == NULL) {
89bddce5 2130 net_families[ops->family] = ops;
1da177e4
LT
2131 err = 0;
2132 }
2133 net_family_write_unlock();
89bddce5 2134 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2135 return err;
2136}
2137
2138/*
2139 * This function is called by a protocol handler that wants to
2140 * remove its address family, and have it unlinked from the
2141 * SOCKET module.
2142 */
2143
2144int sock_unregister(int family)
2145{
2146 if (family < 0 || family >= NPROTO)
2147 return -1;
2148
2149 net_family_write_lock();
89bddce5 2150 net_families[family] = NULL;
1da177e4 2151 net_family_write_unlock();
89bddce5 2152 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2153 return 0;
2154}
2155
77d76ea3 2156static int __init sock_init(void)
1da177e4
LT
2157{
2158 /*
89bddce5 2159 * Initialize sock SLAB cache.
1da177e4 2160 */
89bddce5 2161
1da177e4
LT
2162 sk_init();
2163
1da177e4 2164 /*
89bddce5 2165 * Initialize skbuff SLAB cache
1da177e4
LT
2166 */
2167 skb_init();
1da177e4
LT
2168
2169 /*
89bddce5 2170 * Initialize the protocols module.
1da177e4
LT
2171 */
2172
2173 init_inodecache();
2174 register_filesystem(&sock_fs_type);
2175 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2176
2177 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2178 */
2179
2180#ifdef CONFIG_NETFILTER
2181 netfilter_init();
2182#endif
cbeb321a
DM
2183
2184 return 0;
1da177e4
LT
2185}
2186
77d76ea3
AK
2187core_initcall(sock_init); /* early initcall */
2188
1da177e4
LT
2189#ifdef CONFIG_PROC_FS
2190void socket_seq_show(struct seq_file *seq)
2191{
2192 int cpu;
2193 int counter = 0;
2194
6f912042 2195 for_each_possible_cpu(cpu)
89bddce5 2196 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2197
2198 /* It can be negative, by the way. 8) */
2199 if (counter < 0)
2200 counter = 0;
2201
2202 seq_printf(seq, "sockets: used %d\n", counter);
2203}
89bddce5 2204#endif /* CONFIG_PROC_FS */
1da177e4 2205
89bbfc95
SP
2206#ifdef CONFIG_COMPAT
2207static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 2208 unsigned long arg)
89bbfc95
SP
2209{
2210 struct socket *sock = file->private_data;
2211 int ret = -ENOIOCTLCMD;
2212
2213 if (sock->ops->compat_ioctl)
2214 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2215
2216 return ret;
2217}
2218#endif
2219
ac5a488e
SS
2220int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2221{
2222 return sock->ops->bind(sock, addr, addrlen);
2223}
2224
2225int kernel_listen(struct socket *sock, int backlog)
2226{
2227 return sock->ops->listen(sock, backlog);
2228}
2229
2230int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2231{
2232 struct sock *sk = sock->sk;
2233 int err;
2234
2235 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2236 newsock);
2237 if (err < 0)
2238 goto done;
2239
2240 err = sock->ops->accept(sock, *newsock, flags);
2241 if (err < 0) {
2242 sock_release(*newsock);
2243 goto done;
2244 }
2245
2246 (*newsock)->ops = sock->ops;
2247
2248done:
2249 return err;
2250}
2251
2252int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
2253 int flags)
2254{
2255 return sock->ops->connect(sock, addr, addrlen, flags);
2256}
2257
2258int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2259 int *addrlen)
2260{
2261 return sock->ops->getname(sock, addr, addrlen, 0);
2262}
2263
2264int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2265 int *addrlen)
2266{
2267 return sock->ops->getname(sock, addr, addrlen, 1);
2268}
2269
2270int kernel_getsockopt(struct socket *sock, int level, int optname,
2271 char *optval, int *optlen)
2272{
2273 mm_segment_t oldfs = get_fs();
2274 int err;
2275
2276 set_fs(KERNEL_DS);
2277 if (level == SOL_SOCKET)
2278 err = sock_getsockopt(sock, level, optname, optval, optlen);
2279 else
2280 err = sock->ops->getsockopt(sock, level, optname, optval,
2281 optlen);
2282 set_fs(oldfs);
2283 return err;
2284}
2285
2286int kernel_setsockopt(struct socket *sock, int level, int optname,
2287 char *optval, int optlen)
2288{
2289 mm_segment_t oldfs = get_fs();
2290 int err;
2291
2292 set_fs(KERNEL_DS);
2293 if (level == SOL_SOCKET)
2294 err = sock_setsockopt(sock, level, optname, optval, optlen);
2295 else
2296 err = sock->ops->setsockopt(sock, level, optname, optval,
2297 optlen);
2298 set_fs(oldfs);
2299 return err;
2300}
2301
2302int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2303 size_t size, int flags)
2304{
2305 if (sock->ops->sendpage)
2306 return sock->ops->sendpage(sock, page, offset, size, flags);
2307
2308 return sock_no_sendpage(sock, page, offset, size, flags);
2309}
2310
2311int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2312{
2313 mm_segment_t oldfs = get_fs();
2314 int err;
2315
2316 set_fs(KERNEL_DS);
2317 err = sock->ops->ioctl(sock, cmd, arg);
2318 set_fs(oldfs);
2319
2320 return err;
2321}
2322
1da177e4
LT
2323/* ABI emulation layers need these two */
2324EXPORT_SYMBOL(move_addr_to_kernel);
2325EXPORT_SYMBOL(move_addr_to_user);
2326EXPORT_SYMBOL(sock_create);
2327EXPORT_SYMBOL(sock_create_kern);
2328EXPORT_SYMBOL(sock_create_lite);
2329EXPORT_SYMBOL(sock_map_fd);
2330EXPORT_SYMBOL(sock_recvmsg);
2331EXPORT_SYMBOL(sock_register);
2332EXPORT_SYMBOL(sock_release);
2333EXPORT_SYMBOL(sock_sendmsg);
2334EXPORT_SYMBOL(sock_unregister);
2335EXPORT_SYMBOL(sock_wake_async);
2336EXPORT_SYMBOL(sockfd_lookup);
2337EXPORT_SYMBOL(kernel_sendmsg);
2338EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
2339EXPORT_SYMBOL(kernel_bind);
2340EXPORT_SYMBOL(kernel_listen);
2341EXPORT_SYMBOL(kernel_accept);
2342EXPORT_SYMBOL(kernel_connect);
2343EXPORT_SYMBOL(kernel_getsockname);
2344EXPORT_SYMBOL(kernel_getpeername);
2345EXPORT_SYMBOL(kernel_getsockopt);
2346EXPORT_SYMBOL(kernel_setsockopt);
2347EXPORT_SYMBOL(kernel_sendpage);
2348EXPORT_SYMBOL(kernel_sock_ioctl);