[PATCH] reiserfs: eliminate minimum window size for bitmap searching
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
55737fda 66#include <linux/rcupdate.h>
1da177e4
LT
67#include <linux/netdevice.h>
68#include <linux/proc_fs.h>
69#include <linux/seq_file.h>
4a3e2f71 70#include <linux/mutex.h>
1da177e4
LT
71#include <linux/wanrouter.h>
72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
1da177e4
LT
75#include <linux/init.h>
76#include <linux/poll.h>
77#include <linux/cache.h>
78#include <linux/module.h>
79#include <linux/highmem.h>
80#include <linux/divert.h>
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1da177e4
LT
88
89#include <asm/uaccess.h>
90#include <asm/unistd.h>
91
92#include <net/compat.h>
93
94#include <net/sock.h>
95#include <linux/netfilter.h>
96
97static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
98static ssize_t sock_aio_read(struct kiocb *iocb, char __user *buf,
89bddce5 99 size_t size, loff_t pos);
1da177e4 100static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *buf,
89bddce5
SH
101 size_t size, loff_t pos);
102static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
103
104static int sock_close(struct inode *inode, struct file *file);
105static unsigned int sock_poll(struct file *file,
106 struct poll_table_struct *wait);
89bddce5 107static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
108#ifdef CONFIG_COMPAT
109static long compat_sock_ioctl(struct file *file,
89bddce5 110 unsigned int cmd, unsigned long arg);
89bbfc95 111#endif
1da177e4
LT
112static int sock_fasync(int fd, struct file *filp, int on);
113static ssize_t sock_readv(struct file *file, const struct iovec *vector,
114 unsigned long count, loff_t *ppos);
115static ssize_t sock_writev(struct file *file, const struct iovec *vector,
89bddce5 116 unsigned long count, loff_t *ppos);
1da177e4
LT
117static ssize_t sock_sendpage(struct file *file, struct page *page,
118 int offset, size_t size, loff_t *ppos, int more);
119
1da177e4
LT
120/*
121 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
122 * in the operation structures but are done directly via the socketcall() multiplexor.
123 */
124
125static struct file_operations socket_file_ops = {
126 .owner = THIS_MODULE,
127 .llseek = no_llseek,
128 .aio_read = sock_aio_read,
129 .aio_write = sock_aio_write,
130 .poll = sock_poll,
131 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
132#ifdef CONFIG_COMPAT
133 .compat_ioctl = compat_sock_ioctl,
134#endif
1da177e4
LT
135 .mmap = sock_mmap,
136 .open = sock_no_open, /* special open code to disallow open via /proc */
137 .release = sock_close,
138 .fasync = sock_fasync,
139 .readv = sock_readv,
140 .writev = sock_writev,
5274f052
JA
141 .sendpage = sock_sendpage,
142 .splice_write = generic_splice_sendpage,
1da177e4
LT
143};
144
145/*
146 * The protocol list. Each protocol is registered in here.
147 */
148
1da177e4 149static DEFINE_SPINLOCK(net_family_lock);
f0fd27d4 150static const struct net_proto_family *net_families[NPROTO] __read_mostly;
1da177e4 151
1da177e4
LT
152/*
153 * Statistics counters of the socket lists
154 */
155
156static DEFINE_PER_CPU(int, sockets_in_use) = 0;
157
158/*
89bddce5
SH
159 * Support routines.
160 * Move socket addresses back and forth across the kernel/user
161 * divide and look after the messy bits.
1da177e4
LT
162 */
163
89bddce5 164#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
165 16 for IP, 16 for IPX,
166 24 for IPv6,
89bddce5 167 about 80 for AX.25
1da177e4
LT
168 must be at least one bigger than
169 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 170 :unix_mkname()).
1da177e4 171 */
89bddce5 172
1da177e4
LT
173/**
174 * move_addr_to_kernel - copy a socket address into kernel space
175 * @uaddr: Address in user space
176 * @kaddr: Address in kernel space
177 * @ulen: Length in user space
178 *
179 * The address is copied into kernel space. If the provided address is
180 * too long an error code of -EINVAL is returned. If the copy gives
181 * invalid addresses -EFAULT is returned. On a success 0 is returned.
182 */
183
184int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
185{
89bddce5 186 if (ulen < 0 || ulen > MAX_SOCK_ADDR)
1da177e4 187 return -EINVAL;
89bddce5 188 if (ulen == 0)
1da177e4 189 return 0;
89bddce5 190 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 191 return -EFAULT;
3ec3b2fb 192 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
193}
194
195/**
196 * move_addr_to_user - copy an address to user space
197 * @kaddr: kernel space address
198 * @klen: length of address in kernel
199 * @uaddr: user space address
200 * @ulen: pointer to user length field
201 *
202 * The value pointed to by ulen on entry is the buffer length available.
203 * This is overwritten with the buffer space used. -EINVAL is returned
204 * if an overlong buffer is specified or a negative buffer size. -EFAULT
205 * is returned if either the buffer or the length field are not
206 * accessible.
207 * After copying the data up to the limit the user specifies, the true
208 * length of the data is written over the length limit the user
209 * specified. Zero is returned for a success.
210 */
89bddce5
SH
211
212int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
213 int __user *ulen)
1da177e4
LT
214{
215 int err;
216 int len;
217
89bddce5
SH
218 err = get_user(len, ulen);
219 if (err)
1da177e4 220 return err;
89bddce5
SH
221 if (len > klen)
222 len = klen;
223 if (len < 0 || len > MAX_SOCK_ADDR)
1da177e4 224 return -EINVAL;
89bddce5 225 if (len) {
d6fe3945
SG
226 if (audit_sockaddr(klen, kaddr))
227 return -ENOMEM;
89bddce5 228 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
229 return -EFAULT;
230 }
231 /*
89bddce5
SH
232 * "fromlen shall refer to the value before truncation.."
233 * 1003.1g
1da177e4
LT
234 */
235 return __put_user(klen, ulen);
236}
237
238#define SOCKFS_MAGIC 0x534F434B
239
89bddce5 240static kmem_cache_t *sock_inode_cachep __read_mostly;
1da177e4
LT
241
242static struct inode *sock_alloc_inode(struct super_block *sb)
243{
244 struct socket_alloc *ei;
89bddce5
SH
245
246 ei = kmem_cache_alloc(sock_inode_cachep, SLAB_KERNEL);
1da177e4
LT
247 if (!ei)
248 return NULL;
249 init_waitqueue_head(&ei->socket.wait);
89bddce5 250
1da177e4
LT
251 ei->socket.fasync_list = NULL;
252 ei->socket.state = SS_UNCONNECTED;
253 ei->socket.flags = 0;
254 ei->socket.ops = NULL;
255 ei->socket.sk = NULL;
256 ei->socket.file = NULL;
1da177e4
LT
257
258 return &ei->vfs_inode;
259}
260
261static void sock_destroy_inode(struct inode *inode)
262{
263 kmem_cache_free(sock_inode_cachep,
264 container_of(inode, struct socket_alloc, vfs_inode));
265}
266
89bddce5 267static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
1da177e4 268{
89bddce5 269 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 270
89bddce5
SH
271 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR))
272 == SLAB_CTOR_CONSTRUCTOR)
1da177e4
LT
273 inode_init_once(&ei->vfs_inode);
274}
89bddce5 275
1da177e4
LT
276static int init_inodecache(void)
277{
278 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
279 sizeof(struct socket_alloc),
280 0,
281 (SLAB_HWCACHE_ALIGN |
282 SLAB_RECLAIM_ACCOUNT |
283 SLAB_MEM_SPREAD),
284 init_once,
285 NULL);
1da177e4
LT
286 if (sock_inode_cachep == NULL)
287 return -ENOMEM;
288 return 0;
289}
290
291static struct super_operations sockfs_ops = {
292 .alloc_inode = sock_alloc_inode,
293 .destroy_inode =sock_destroy_inode,
294 .statfs = simple_statfs,
295};
296
454e2398 297static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
298 int flags, const char *dev_name, void *data,
299 struct vfsmount *mnt)
1da177e4 300{
454e2398
DH
301 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
302 mnt);
1da177e4
LT
303}
304
ba89966c 305static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
306
307static struct file_system_type sock_fs_type = {
308 .name = "sockfs",
309 .get_sb = sockfs_get_sb,
310 .kill_sb = kill_anon_super,
311};
89bddce5 312
1da177e4
LT
313static int sockfs_delete_dentry(struct dentry *dentry)
314{
315 return 1;
316}
317static struct dentry_operations sockfs_dentry_operations = {
89bddce5 318 .d_delete = sockfs_delete_dentry,
1da177e4
LT
319};
320
321/*
322 * Obtains the first available file descriptor and sets it up for use.
323 *
39d8c1b6
DM
324 * These functions create file structures and maps them to fd space
325 * of the current process. On success it returns file descriptor
1da177e4
LT
326 * and file struct implicitly stored in sock->file.
327 * Note that another thread may close file descriptor before we return
328 * from this function. We use the fact that now we do not refer
329 * to socket after mapping. If one day we will need it, this
330 * function will increment ref. count on file by 1.
331 *
332 * In any case returned fd MAY BE not valid!
333 * This race condition is unavoidable
334 * with shared fd spaces, we cannot solve it inside kernel,
335 * but we take care of internal coherence yet.
336 */
337
39d8c1b6 338static int sock_alloc_fd(struct file **filep)
1da177e4
LT
339{
340 int fd;
1da177e4
LT
341
342 fd = get_unused_fd();
39d8c1b6 343 if (likely(fd >= 0)) {
1da177e4
LT
344 struct file *file = get_empty_filp();
345
39d8c1b6
DM
346 *filep = file;
347 if (unlikely(!file)) {
1da177e4 348 put_unused_fd(fd);
39d8c1b6 349 return -ENFILE;
1da177e4 350 }
39d8c1b6
DM
351 } else
352 *filep = NULL;
353 return fd;
354}
1da177e4 355
39d8c1b6
DM
356static int sock_attach_fd(struct socket *sock, struct file *file)
357{
358 struct qstr this;
359 char name[32];
360
361 this.len = sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino);
362 this.name = name;
363 this.hash = SOCK_INODE(sock)->i_ino;
364
365 file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);
366 if (unlikely(!file->f_dentry))
367 return -ENOMEM;
368
369 file->f_dentry->d_op = &sockfs_dentry_operations;
370 d_add(file->f_dentry, SOCK_INODE(sock));
371 file->f_vfsmnt = mntget(sock_mnt);
372 file->f_mapping = file->f_dentry->d_inode->i_mapping;
373
374 sock->file = file;
375 file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
376 file->f_mode = FMODE_READ | FMODE_WRITE;
377 file->f_flags = O_RDWR;
378 file->f_pos = 0;
379 file->private_data = sock;
1da177e4 380
39d8c1b6
DM
381 return 0;
382}
383
384int sock_map_fd(struct socket *sock)
385{
386 struct file *newfile;
387 int fd = sock_alloc_fd(&newfile);
388
389 if (likely(fd >= 0)) {
390 int err = sock_attach_fd(sock, newfile);
391
392 if (unlikely(err < 0)) {
393 put_filp(newfile);
1da177e4 394 put_unused_fd(fd);
39d8c1b6 395 return err;
1da177e4 396 }
39d8c1b6 397 fd_install(fd, newfile);
1da177e4 398 }
1da177e4
LT
399 return fd;
400}
401
6cb153ca
BL
402static struct socket *sock_from_file(struct file *file, int *err)
403{
404 struct inode *inode;
405 struct socket *sock;
406
407 if (file->f_op == &socket_file_ops)
408 return file->private_data; /* set in sock_map_fd */
409
410 inode = file->f_dentry->d_inode;
411 if (!S_ISSOCK(inode->i_mode)) {
412 *err = -ENOTSOCK;
413 return NULL;
414 }
415
416 sock = SOCKET_I(inode);
417 if (sock->file != file) {
418 printk(KERN_ERR "socki_lookup: socket file changed!\n");
419 sock->file = file;
420 }
421 return sock;
422}
423
1da177e4
LT
424/**
425 * sockfd_lookup - Go from a file number to its socket slot
426 * @fd: file handle
427 * @err: pointer to an error code return
428 *
429 * The file handle passed in is locked and the socket it is bound
430 * too is returned. If an error occurs the err pointer is overwritten
431 * with a negative errno code and NULL is returned. The function checks
432 * for both invalid handles and passing a handle which is not a socket.
433 *
434 * On a success the socket object pointer is returned.
435 */
436
437struct socket *sockfd_lookup(int fd, int *err)
438{
439 struct file *file;
1da177e4
LT
440 struct socket *sock;
441
89bddce5
SH
442 file = fget(fd);
443 if (!file) {
1da177e4
LT
444 *err = -EBADF;
445 return NULL;
446 }
89bddce5 447
6cb153ca
BL
448 sock = sock_from_file(file, err);
449 if (!sock)
1da177e4 450 fput(file);
6cb153ca
BL
451 return sock;
452}
1da177e4 453
6cb153ca
BL
454static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
455{
456 struct file *file;
457 struct socket *sock;
458
3672558c 459 *err = -EBADF;
6cb153ca
BL
460 file = fget_light(fd, fput_needed);
461 if (file) {
462 sock = sock_from_file(file, err);
463 if (sock)
464 return sock;
465 fput_light(file, *fput_needed);
1da177e4 466 }
6cb153ca 467 return NULL;
1da177e4
LT
468}
469
470/**
471 * sock_alloc - allocate a socket
89bddce5 472 *
1da177e4
LT
473 * Allocate a new inode and socket object. The two are bound together
474 * and initialised. The socket is then returned. If we are out of inodes
475 * NULL is returned.
476 */
477
478static struct socket *sock_alloc(void)
479{
89bddce5
SH
480 struct inode *inode;
481 struct socket *sock;
1da177e4
LT
482
483 inode = new_inode(sock_mnt->mnt_sb);
484 if (!inode)
485 return NULL;
486
487 sock = SOCKET_I(inode);
488
89bddce5 489 inode->i_mode = S_IFSOCK | S_IRWXUGO;
1da177e4
LT
490 inode->i_uid = current->fsuid;
491 inode->i_gid = current->fsgid;
492
493 get_cpu_var(sockets_in_use)++;
494 put_cpu_var(sockets_in_use);
495 return sock;
496}
497
498/*
499 * In theory you can't get an open on this inode, but /proc provides
500 * a back door. Remember to keep it shut otherwise you'll let the
501 * creepy crawlies in.
502 */
89bddce5 503
1da177e4
LT
504static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
505{
506 return -ENXIO;
507}
508
4b6f5d20 509const struct file_operations bad_sock_fops = {
1da177e4
LT
510 .owner = THIS_MODULE,
511 .open = sock_no_open,
512};
513
514/**
515 * sock_release - close a socket
516 * @sock: socket to close
517 *
518 * The socket is released from the protocol stack if it has a release
519 * callback, and the inode is then released if the socket is bound to
89bddce5 520 * an inode not a file.
1da177e4 521 */
89bddce5 522
1da177e4
LT
523void sock_release(struct socket *sock)
524{
525 if (sock->ops) {
526 struct module *owner = sock->ops->owner;
527
528 sock->ops->release(sock);
529 sock->ops = NULL;
530 module_put(owner);
531 }
532
533 if (sock->fasync_list)
534 printk(KERN_ERR "sock_release: fasync list not empty!\n");
535
536 get_cpu_var(sockets_in_use)--;
537 put_cpu_var(sockets_in_use);
538 if (!sock->file) {
539 iput(SOCK_INODE(sock));
540 return;
541 }
89bddce5 542 sock->file = NULL;
1da177e4
LT
543}
544
89bddce5 545static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
546 struct msghdr *msg, size_t size)
547{
548 struct sock_iocb *si = kiocb_to_siocb(iocb);
549 int err;
550
551 si->sock = sock;
552 si->scm = NULL;
553 si->msg = msg;
554 si->size = size;
555
556 err = security_socket_sendmsg(sock, msg, size);
557 if (err)
558 return err;
559
560 return sock->ops->sendmsg(iocb, sock, msg, size);
561}
562
563int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
564{
565 struct kiocb iocb;
566 struct sock_iocb siocb;
567 int ret;
568
569 init_sync_kiocb(&iocb, NULL);
570 iocb.private = &siocb;
571 ret = __sock_sendmsg(&iocb, sock, msg, size);
572 if (-EIOCBQUEUED == ret)
573 ret = wait_on_sync_kiocb(&iocb);
574 return ret;
575}
576
577int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
578 struct kvec *vec, size_t num, size_t size)
579{
580 mm_segment_t oldfs = get_fs();
581 int result;
582
583 set_fs(KERNEL_DS);
584 /*
585 * the following is safe, since for compiler definitions of kvec and
586 * iovec are identical, yielding the same in-core layout and alignment
587 */
89bddce5 588 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
589 msg->msg_iovlen = num;
590 result = sock_sendmsg(sock, msg, size);
591 set_fs(oldfs);
592 return result;
593}
594
89bddce5 595static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
596 struct msghdr *msg, size_t size, int flags)
597{
598 int err;
599 struct sock_iocb *si = kiocb_to_siocb(iocb);
600
601 si->sock = sock;
602 si->scm = NULL;
603 si->msg = msg;
604 si->size = size;
605 si->flags = flags;
606
607 err = security_socket_recvmsg(sock, msg, size, flags);
608 if (err)
609 return err;
610
611 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
612}
613
89bddce5 614int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
615 size_t size, int flags)
616{
617 struct kiocb iocb;
618 struct sock_iocb siocb;
619 int ret;
620
89bddce5 621 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
622 iocb.private = &siocb;
623 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
624 if (-EIOCBQUEUED == ret)
625 ret = wait_on_sync_kiocb(&iocb);
626 return ret;
627}
628
89bddce5
SH
629int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
630 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
631{
632 mm_segment_t oldfs = get_fs();
633 int result;
634
635 set_fs(KERNEL_DS);
636 /*
637 * the following is safe, since for compiler definitions of kvec and
638 * iovec are identical, yielding the same in-core layout and alignment
639 */
89bddce5 640 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
641 result = sock_recvmsg(sock, msg, size, flags);
642 set_fs(oldfs);
643 return result;
644}
645
646static void sock_aio_dtor(struct kiocb *iocb)
647{
648 kfree(iocb->private);
649}
650
ce1d4d3e
CH
651static ssize_t sock_sendpage(struct file *file, struct page *page,
652 int offset, size_t size, loff_t *ppos, int more)
1da177e4 653{
1da177e4
LT
654 struct socket *sock;
655 int flags;
656
ce1d4d3e
CH
657 sock = file->private_data;
658
659 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
660 if (more)
661 flags |= MSG_MORE;
662
663 return sock->ops->sendpage(sock, page, offset, size, flags);
664}
1da177e4 665
ce1d4d3e 666static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5
SH
667 char __user *ubuf, size_t size,
668 struct sock_iocb *siocb)
ce1d4d3e
CH
669{
670 if (!is_sync_kiocb(iocb)) {
671 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
672 if (!siocb)
673 return NULL;
1da177e4
LT
674 iocb->ki_dtor = sock_aio_dtor;
675 }
1da177e4 676
ce1d4d3e
CH
677 siocb->kiocb = iocb;
678 siocb->async_iov.iov_base = ubuf;
679 siocb->async_iov.iov_len = size;
1da177e4 680
ce1d4d3e
CH
681 iocb->private = siocb;
682 return siocb;
1da177e4
LT
683}
684
ce1d4d3e 685static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
89bddce5
SH
686 struct file *file, struct iovec *iov,
687 unsigned long nr_segs)
ce1d4d3e
CH
688{
689 struct socket *sock = file->private_data;
690 size_t size = 0;
691 int i;
1da177e4 692
89bddce5
SH
693 for (i = 0; i < nr_segs; i++)
694 size += iov[i].iov_len;
1da177e4 695
ce1d4d3e
CH
696 msg->msg_name = NULL;
697 msg->msg_namelen = 0;
698 msg->msg_control = NULL;
699 msg->msg_controllen = 0;
89bddce5 700 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
701 msg->msg_iovlen = nr_segs;
702 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
703
704 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
705}
706
707static ssize_t sock_readv(struct file *file, const struct iovec *iov,
708 unsigned long nr_segs, loff_t *ppos)
1da177e4 709{
ce1d4d3e
CH
710 struct kiocb iocb;
711 struct sock_iocb siocb;
712 struct msghdr msg;
713 int ret;
714
89bddce5 715 init_sync_kiocb(&iocb, NULL);
ce1d4d3e
CH
716 iocb.private = &siocb;
717
718 ret = do_sock_read(&msg, &iocb, file, (struct iovec *)iov, nr_segs);
719 if (-EIOCBQUEUED == ret)
720 ret = wait_on_sync_kiocb(&iocb);
721 return ret;
722}
723
724static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf,
89bddce5 725 size_t count, loff_t pos)
ce1d4d3e
CH
726{
727 struct sock_iocb siocb, *x;
728
1da177e4
LT
729 if (pos != 0)
730 return -ESPIPE;
ce1d4d3e 731 if (count == 0) /* Match SYS5 behaviour */
1da177e4
LT
732 return 0;
733
ce1d4d3e
CH
734 x = alloc_sock_iocb(iocb, ubuf, count, &siocb);
735 if (!x)
736 return -ENOMEM;
737 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp,
89bddce5 738 &x->async_iov, 1);
1da177e4
LT
739}
740
ce1d4d3e 741static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
89bddce5
SH
742 struct file *file, struct iovec *iov,
743 unsigned long nr_segs)
1da177e4 744{
ce1d4d3e
CH
745 struct socket *sock = file->private_data;
746 size_t size = 0;
747 int i;
1da177e4 748
89bddce5
SH
749 for (i = 0; i < nr_segs; i++)
750 size += iov[i].iov_len;
1da177e4 751
ce1d4d3e
CH
752 msg->msg_name = NULL;
753 msg->msg_namelen = 0;
754 msg->msg_control = NULL;
755 msg->msg_controllen = 0;
89bddce5 756 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
757 msg->msg_iovlen = nr_segs;
758 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
759 if (sock->type == SOCK_SEQPACKET)
760 msg->msg_flags |= MSG_EOR;
1da177e4 761
ce1d4d3e 762 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
763}
764
ce1d4d3e
CH
765static ssize_t sock_writev(struct file *file, const struct iovec *iov,
766 unsigned long nr_segs, loff_t *ppos)
1da177e4
LT
767{
768 struct msghdr msg;
ce1d4d3e
CH
769 struct kiocb iocb;
770 struct sock_iocb siocb;
771 int ret;
1da177e4 772
ce1d4d3e
CH
773 init_sync_kiocb(&iocb, NULL);
774 iocb.private = &siocb;
1da177e4 775
ce1d4d3e
CH
776 ret = do_sock_write(&msg, &iocb, file, (struct iovec *)iov, nr_segs);
777 if (-EIOCBQUEUED == ret)
778 ret = wait_on_sync_kiocb(&iocb);
779 return ret;
780}
1da177e4 781
ce1d4d3e 782static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf,
89bddce5 783 size_t count, loff_t pos)
ce1d4d3e
CH
784{
785 struct sock_iocb siocb, *x;
1da177e4 786
ce1d4d3e
CH
787 if (pos != 0)
788 return -ESPIPE;
789 if (count == 0) /* Match SYS5 behaviour */
790 return 0;
1da177e4 791
ce1d4d3e
CH
792 x = alloc_sock_iocb(iocb, (void __user *)ubuf, count, &siocb);
793 if (!x)
794 return -ENOMEM;
1da177e4 795
ce1d4d3e 796 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp,
89bddce5 797 &x->async_iov, 1);
1da177e4
LT
798}
799
1da177e4
LT
800/*
801 * Atomic setting of ioctl hooks to avoid race
802 * with module unload.
803 */
804
4a3e2f71 805static DEFINE_MUTEX(br_ioctl_mutex);
89bddce5 806static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL;
1da177e4 807
89bddce5 808void brioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 809{
4a3e2f71 810 mutex_lock(&br_ioctl_mutex);
1da177e4 811 br_ioctl_hook = hook;
4a3e2f71 812 mutex_unlock(&br_ioctl_mutex);
1da177e4 813}
89bddce5 814
1da177e4
LT
815EXPORT_SYMBOL(brioctl_set);
816
4a3e2f71 817static DEFINE_MUTEX(vlan_ioctl_mutex);
89bddce5 818static int (*vlan_ioctl_hook) (void __user *arg);
1da177e4 819
89bddce5 820void vlan_ioctl_set(int (*hook) (void __user *))
1da177e4 821{
4a3e2f71 822 mutex_lock(&vlan_ioctl_mutex);
1da177e4 823 vlan_ioctl_hook = hook;
4a3e2f71 824 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 825}
89bddce5 826
1da177e4
LT
827EXPORT_SYMBOL(vlan_ioctl_set);
828
4a3e2f71 829static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 830static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 831
89bddce5 832void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 833{
4a3e2f71 834 mutex_lock(&dlci_ioctl_mutex);
1da177e4 835 dlci_ioctl_hook = hook;
4a3e2f71 836 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 837}
89bddce5 838
1da177e4
LT
839EXPORT_SYMBOL(dlci_ioctl_set);
840
841/*
842 * With an ioctl, arg may well be a user mode pointer, but we don't know
843 * what to do with it - that's up to the protocol still.
844 */
845
846static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
847{
848 struct socket *sock;
849 void __user *argp = (void __user *)arg;
850 int pid, err;
851
b69aee04 852 sock = file->private_data;
1da177e4
LT
853 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
854 err = dev_ioctl(cmd, argp);
855 } else
d86b5e0e 856#ifdef CONFIG_WIRELESS_EXT
1da177e4
LT
857 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
858 err = dev_ioctl(cmd, argp);
859 } else
89bddce5
SH
860#endif /* CONFIG_WIRELESS_EXT */
861 switch (cmd) {
1da177e4
LT
862 case FIOSETOWN:
863 case SIOCSPGRP:
864 err = -EFAULT;
865 if (get_user(pid, (int __user *)argp))
866 break;
867 err = f_setown(sock->file, pid, 1);
868 break;
869 case FIOGETOWN:
870 case SIOCGPGRP:
89bddce5
SH
871 err = put_user(sock->file->f_owner.pid,
872 (int __user *)argp);
1da177e4
LT
873 break;
874 case SIOCGIFBR:
875 case SIOCSIFBR:
876 case SIOCBRADDBR:
877 case SIOCBRDELBR:
878 err = -ENOPKG;
879 if (!br_ioctl_hook)
880 request_module("bridge");
881
4a3e2f71 882 mutex_lock(&br_ioctl_mutex);
89bddce5 883 if (br_ioctl_hook)
1da177e4 884 err = br_ioctl_hook(cmd, argp);
4a3e2f71 885 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
886 break;
887 case SIOCGIFVLAN:
888 case SIOCSIFVLAN:
889 err = -ENOPKG;
890 if (!vlan_ioctl_hook)
891 request_module("8021q");
892
4a3e2f71 893 mutex_lock(&vlan_ioctl_mutex);
1da177e4
LT
894 if (vlan_ioctl_hook)
895 err = vlan_ioctl_hook(argp);
4a3e2f71 896 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
897 break;
898 case SIOCGIFDIVERT:
899 case SIOCSIFDIVERT:
89bddce5 900 /* Convert this to call through a hook */
1da177e4
LT
901 err = divert_ioctl(cmd, argp);
902 break;
903 case SIOCADDDLCI:
904 case SIOCDELDLCI:
905 err = -ENOPKG;
906 if (!dlci_ioctl_hook)
907 request_module("dlci");
908
909 if (dlci_ioctl_hook) {
4a3e2f71 910 mutex_lock(&dlci_ioctl_mutex);
1da177e4 911 err = dlci_ioctl_hook(cmd, argp);
4a3e2f71 912 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
913 }
914 break;
915 default:
916 err = sock->ops->ioctl(sock, cmd, arg);
b5e5fa5e
CH
917
918 /*
919 * If this ioctl is unknown try to hand it down
920 * to the NIC driver.
921 */
922 if (err == -ENOIOCTLCMD)
923 err = dev_ioctl(cmd, argp);
1da177e4 924 break;
89bddce5 925 }
1da177e4
LT
926 return err;
927}
928
929int sock_create_lite(int family, int type, int protocol, struct socket **res)
930{
931 int err;
932 struct socket *sock = NULL;
89bddce5 933
1da177e4
LT
934 err = security_socket_create(family, type, protocol, 1);
935 if (err)
936 goto out;
937
938 sock = sock_alloc();
939 if (!sock) {
940 err = -ENOMEM;
941 goto out;
942 }
943
1da177e4 944 sock->type = type;
7420ed23
VY
945 err = security_socket_post_create(sock, family, type, protocol, 1);
946 if (err)
947 goto out_release;
948
1da177e4
LT
949out:
950 *res = sock;
951 return err;
7420ed23
VY
952out_release:
953 sock_release(sock);
954 sock = NULL;
955 goto out;
1da177e4
LT
956}
957
958/* No kernel lock held - perfect */
89bddce5 959static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
960{
961 struct socket *sock;
962
963 /*
89bddce5 964 * We can't return errors to poll, so it's either yes or no.
1da177e4 965 */
b69aee04 966 sock = file->private_data;
1da177e4
LT
967 return sock->ops->poll(file, sock, wait);
968}
969
89bddce5 970static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 971{
b69aee04 972 struct socket *sock = file->private_data;
1da177e4
LT
973
974 return sock->ops->mmap(file, sock, vma);
975}
976
20380731 977static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
978{
979 /*
89bddce5
SH
980 * It was possible the inode is NULL we were
981 * closing an unfinished socket.
1da177e4
LT
982 */
983
89bddce5 984 if (!inode) {
1da177e4
LT
985 printk(KERN_DEBUG "sock_close: NULL inode\n");
986 return 0;
987 }
988 sock_fasync(-1, filp, 0);
989 sock_release(SOCKET_I(inode));
990 return 0;
991}
992
993/*
994 * Update the socket async list
995 *
996 * Fasync_list locking strategy.
997 *
998 * 1. fasync_list is modified only under process context socket lock
999 * i.e. under semaphore.
1000 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
1001 * or under socket lock.
1002 * 3. fasync_list can be used from softirq context, so that
1003 * modification under socket lock have to be enhanced with
1004 * write_lock_bh(&sk->sk_callback_lock).
1005 * --ANK (990710)
1006 */
1007
1008static int sock_fasync(int fd, struct file *filp, int on)
1009{
89bddce5 1010 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
1011 struct socket *sock;
1012 struct sock *sk;
1013
89bddce5 1014 if (on) {
8b3a7005 1015 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 1016 if (fna == NULL)
1da177e4
LT
1017 return -ENOMEM;
1018 }
1019
b69aee04 1020 sock = filp->private_data;
1da177e4 1021
89bddce5
SH
1022 sk = sock->sk;
1023 if (sk == NULL) {
1da177e4
LT
1024 kfree(fna);
1025 return -EINVAL;
1026 }
1027
1028 lock_sock(sk);
1029
89bddce5 1030 prev = &(sock->fasync_list);
1da177e4 1031
89bddce5
SH
1032 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1033 if (fa->fa_file == filp)
1da177e4
LT
1034 break;
1035
89bddce5
SH
1036 if (on) {
1037 if (fa != NULL) {
1da177e4 1038 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1039 fa->fa_fd = fd;
1da177e4
LT
1040 write_unlock_bh(&sk->sk_callback_lock);
1041
1042 kfree(fna);
1043 goto out;
1044 }
89bddce5
SH
1045 fna->fa_file = filp;
1046 fna->fa_fd = fd;
1047 fna->magic = FASYNC_MAGIC;
1048 fna->fa_next = sock->fasync_list;
1da177e4 1049 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1050 sock->fasync_list = fna;
1da177e4 1051 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1052 } else {
1053 if (fa != NULL) {
1da177e4 1054 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1055 *prev = fa->fa_next;
1da177e4
LT
1056 write_unlock_bh(&sk->sk_callback_lock);
1057 kfree(fa);
1058 }
1059 }
1060
1061out:
1062 release_sock(sock->sk);
1063 return 0;
1064}
1065
1066/* This function may be called only under socket lock or callback_lock */
1067
1068int sock_wake_async(struct socket *sock, int how, int band)
1069{
1070 if (!sock || !sock->fasync_list)
1071 return -1;
89bddce5 1072 switch (how) {
1da177e4 1073 case 1:
89bddce5 1074
1da177e4
LT
1075 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1076 break;
1077 goto call_kill;
1078 case 2:
1079 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1080 break;
1081 /* fall through */
1082 case 0:
89bddce5 1083call_kill:
1da177e4
LT
1084 __kill_fasync(sock->fasync_list, SIGIO, band);
1085 break;
1086 case 3:
1087 __kill_fasync(sock->fasync_list, SIGURG, band);
1088 }
1089 return 0;
1090}
1091
89bddce5
SH
1092static int __sock_create(int family, int type, int protocol,
1093 struct socket **res, int kern)
1da177e4
LT
1094{
1095 int err;
1096 struct socket *sock;
55737fda 1097 const struct net_proto_family *pf;
1da177e4
LT
1098
1099 /*
89bddce5 1100 * Check protocol is in range
1da177e4
LT
1101 */
1102 if (family < 0 || family >= NPROTO)
1103 return -EAFNOSUPPORT;
1104 if (type < 0 || type >= SOCK_MAX)
1105 return -EINVAL;
1106
1107 /* Compatibility.
1108
1109 This uglymoron is moved from INET layer to here to avoid
1110 deadlock in module load.
1111 */
1112 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1113 static int warned;
1da177e4
LT
1114 if (!warned) {
1115 warned = 1;
89bddce5
SH
1116 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1117 current->comm);
1da177e4
LT
1118 }
1119 family = PF_PACKET;
1120 }
1121
1122 err = security_socket_create(family, type, protocol, kern);
1123 if (err)
1124 return err;
89bddce5 1125
55737fda
SH
1126 /*
1127 * Allocate the socket and allow the family to set things up. if
1128 * the protocol is 0, the family is instructed to select an appropriate
1129 * default.
1130 */
1131 sock = sock_alloc();
1132 if (!sock) {
1133 if (net_ratelimit())
1134 printk(KERN_WARNING "socket: no more sockets\n");
1135 return -ENFILE; /* Not exactly a match, but its the
1136 closest posix thing */
1137 }
1138
1139 sock->type = type;
1140
1da177e4 1141#if defined(CONFIG_KMOD)
89bddce5
SH
1142 /* Attempt to load a protocol module if the find failed.
1143 *
1144 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1145 * requested real, full-featured networking support upon configuration.
1146 * Otherwise module support will break!
1147 */
55737fda 1148 if (net_families[family] == NULL)
89bddce5 1149 request_module("net-pf-%d", family);
1da177e4
LT
1150#endif
1151
55737fda
SH
1152 rcu_read_lock();
1153 pf = rcu_dereference(net_families[family]);
1154 err = -EAFNOSUPPORT;
1155 if (!pf)
1156 goto out_release;
1da177e4
LT
1157
1158 /*
1159 * We will call the ->create function, that possibly is in a loadable
1160 * module, so we have to bump that loadable module refcnt first.
1161 */
55737fda 1162 if (!try_module_get(pf->owner))
1da177e4
LT
1163 goto out_release;
1164
55737fda
SH
1165 /* Now protected by module ref count */
1166 rcu_read_unlock();
1167
1168 err = pf->create(sock, protocol);
1169 if (err < 0)
1da177e4 1170 goto out_module_put;
a79af59e 1171
1da177e4
LT
1172 /*
1173 * Now to bump the refcnt of the [loadable] module that owns this
1174 * socket at sock_release time we decrement its refcnt.
1175 */
55737fda
SH
1176 if (!try_module_get(sock->ops->owner))
1177 goto out_module_busy;
1178
1da177e4
LT
1179 /*
1180 * Now that we're done with the ->create function, the [loadable]
1181 * module can have its refcnt decremented
1182 */
55737fda 1183 module_put(pf->owner);
7420ed23
VY
1184 err = security_socket_post_create(sock, family, type, protocol, kern);
1185 if (err)
1186 goto out_release;
55737fda 1187 *res = sock;
1da177e4 1188
55737fda
SH
1189 return 0;
1190
1191out_module_busy:
1192 err = -EAFNOSUPPORT;
1da177e4 1193out_module_put:
55737fda
SH
1194 sock->ops = NULL;
1195 module_put(pf->owner);
1196out_sock_release:
1da177e4 1197 sock_release(sock);
55737fda
SH
1198 return err;
1199
1200out_release:
1201 rcu_read_unlock();
1202 goto out_sock_release;
1da177e4
LT
1203}
1204
1205int sock_create(int family, int type, int protocol, struct socket **res)
1206{
1207 return __sock_create(family, type, protocol, res, 0);
1208}
1209
1210int sock_create_kern(int family, int type, int protocol, struct socket **res)
1211{
1212 return __sock_create(family, type, protocol, res, 1);
1213}
1214
1215asmlinkage long sys_socket(int family, int type, int protocol)
1216{
1217 int retval;
1218 struct socket *sock;
1219
1220 retval = sock_create(family, type, protocol, &sock);
1221 if (retval < 0)
1222 goto out;
1223
1224 retval = sock_map_fd(sock);
1225 if (retval < 0)
1226 goto out_release;
1227
1228out:
1229 /* It may be already another descriptor 8) Not kernel problem. */
1230 return retval;
1231
1232out_release:
1233 sock_release(sock);
1234 return retval;
1235}
1236
1237/*
1238 * Create a pair of connected sockets.
1239 */
1240
89bddce5
SH
1241asmlinkage long sys_socketpair(int family, int type, int protocol,
1242 int __user *usockvec)
1da177e4
LT
1243{
1244 struct socket *sock1, *sock2;
1245 int fd1, fd2, err;
1246
1247 /*
1248 * Obtain the first socket and check if the underlying protocol
1249 * supports the socketpair call.
1250 */
1251
1252 err = sock_create(family, type, protocol, &sock1);
1253 if (err < 0)
1254 goto out;
1255
1256 err = sock_create(family, type, protocol, &sock2);
1257 if (err < 0)
1258 goto out_release_1;
1259
1260 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1261 if (err < 0)
1da177e4
LT
1262 goto out_release_both;
1263
1264 fd1 = fd2 = -1;
1265
1266 err = sock_map_fd(sock1);
1267 if (err < 0)
1268 goto out_release_both;
1269 fd1 = err;
1270
1271 err = sock_map_fd(sock2);
1272 if (err < 0)
1273 goto out_close_1;
1274 fd2 = err;
1275
1276 /* fd1 and fd2 may be already another descriptors.
1277 * Not kernel problem.
1278 */
1279
89bddce5 1280 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1281 if (!err)
1282 err = put_user(fd2, &usockvec[1]);
1283 if (!err)
1284 return 0;
1285
1286 sys_close(fd2);
1287 sys_close(fd1);
1288 return err;
1289
1290out_close_1:
89bddce5 1291 sock_release(sock2);
1da177e4
LT
1292 sys_close(fd1);
1293 return err;
1294
1295out_release_both:
89bddce5 1296 sock_release(sock2);
1da177e4 1297out_release_1:
89bddce5 1298 sock_release(sock1);
1da177e4
LT
1299out:
1300 return err;
1301}
1302
1da177e4
LT
1303/*
1304 * Bind a name to a socket. Nothing much to do here since it's
1305 * the protocol's responsibility to handle the local address.
1306 *
1307 * We move the socket address to kernel space before we call
1308 * the protocol layer (having also checked the address is ok).
1309 */
1310
1311asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1312{
1313 struct socket *sock;
1314 char address[MAX_SOCK_ADDR];
6cb153ca 1315 int err, fput_needed;
1da177e4 1316
89bddce5
SH
1317 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1318 if(sock) {
1319 err = move_addr_to_kernel(umyaddr, addrlen, address);
1320 if (err >= 0) {
1321 err = security_socket_bind(sock,
1322 (struct sockaddr *)address,
1323 addrlen);
6cb153ca
BL
1324 if (!err)
1325 err = sock->ops->bind(sock,
89bddce5
SH
1326 (struct sockaddr *)
1327 address, addrlen);
1da177e4 1328 }
6cb153ca 1329 fput_light(sock->file, fput_needed);
89bddce5 1330 }
1da177e4
LT
1331 return err;
1332}
1333
1da177e4
LT
1334/*
1335 * Perform a listen. Basically, we allow the protocol to do anything
1336 * necessary for a listen, and if that works, we mark the socket as
1337 * ready for listening.
1338 */
1339
7a42c217 1340int sysctl_somaxconn __read_mostly = SOMAXCONN;
1da177e4
LT
1341
1342asmlinkage long sys_listen(int fd, int backlog)
1343{
1344 struct socket *sock;
6cb153ca 1345 int err, fput_needed;
89bddce5
SH
1346
1347 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1348 if (sock) {
1349 if ((unsigned)backlog > sysctl_somaxconn)
1da177e4
LT
1350 backlog = sysctl_somaxconn;
1351
1352 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1353 if (!err)
1354 err = sock->ops->listen(sock, backlog);
1da177e4 1355
6cb153ca 1356 fput_light(sock->file, fput_needed);
1da177e4
LT
1357 }
1358 return err;
1359}
1360
1da177e4
LT
1361/*
1362 * For accept, we attempt to create a new socket, set up the link
1363 * with the client, wake up the client, then return the new
1364 * connected fd. We collect the address of the connector in kernel
1365 * space and move it to user at the very end. This is unclean because
1366 * we open the socket then return an error.
1367 *
1368 * 1003.1g adds the ability to recvmsg() to query connection pending
1369 * status to recvmsg. We need to add that support in a way thats
1370 * clean when we restucture accept also.
1371 */
1372
89bddce5
SH
1373asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1374 int __user *upeer_addrlen)
1da177e4
LT
1375{
1376 struct socket *sock, *newsock;
39d8c1b6 1377 struct file *newfile;
6cb153ca 1378 int err, len, newfd, fput_needed;
1da177e4
LT
1379 char address[MAX_SOCK_ADDR];
1380
6cb153ca 1381 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1382 if (!sock)
1383 goto out;
1384
1385 err = -ENFILE;
89bddce5 1386 if (!(newsock = sock_alloc()))
1da177e4
LT
1387 goto out_put;
1388
1389 newsock->type = sock->type;
1390 newsock->ops = sock->ops;
1391
1da177e4
LT
1392 /*
1393 * We don't need try_module_get here, as the listening socket (sock)
1394 * has the protocol module (sock->ops->owner) held.
1395 */
1396 __module_get(newsock->ops->owner);
1397
39d8c1b6
DM
1398 newfd = sock_alloc_fd(&newfile);
1399 if (unlikely(newfd < 0)) {
1400 err = newfd;
9a1875e6
DM
1401 sock_release(newsock);
1402 goto out_put;
39d8c1b6
DM
1403 }
1404
1405 err = sock_attach_fd(newsock, newfile);
1406 if (err < 0)
1407 goto out_fd;
1408
a79af59e
FF
1409 err = security_socket_accept(sock, newsock);
1410 if (err)
39d8c1b6 1411 goto out_fd;
a79af59e 1412
1da177e4
LT
1413 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1414 if (err < 0)
39d8c1b6 1415 goto out_fd;
1da177e4
LT
1416
1417 if (upeer_sockaddr) {
89bddce5
SH
1418 if (newsock->ops->getname(newsock, (struct sockaddr *)address,
1419 &len, 2) < 0) {
1da177e4 1420 err = -ECONNABORTED;
39d8c1b6 1421 goto out_fd;
1da177e4 1422 }
89bddce5
SH
1423 err = move_addr_to_user(address, len, upeer_sockaddr,
1424 upeer_addrlen);
1da177e4 1425 if (err < 0)
39d8c1b6 1426 goto out_fd;
1da177e4
LT
1427 }
1428
1429 /* File flags are not inherited via accept() unlike another OSes. */
1430
39d8c1b6
DM
1431 fd_install(newfd, newfile);
1432 err = newfd;
1da177e4
LT
1433
1434 security_socket_post_accept(sock, newsock);
1435
1436out_put:
6cb153ca 1437 fput_light(sock->file, fput_needed);
1da177e4
LT
1438out:
1439 return err;
39d8c1b6 1440out_fd:
9606a216 1441 fput(newfile);
39d8c1b6 1442 put_unused_fd(newfd);
1da177e4
LT
1443 goto out_put;
1444}
1445
1da177e4
LT
1446/*
1447 * Attempt to connect to a socket with the server address. The address
1448 * is in user space so we verify it is OK and move it to kernel space.
1449 *
1450 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1451 * break bindings
1452 *
1453 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1454 * other SEQPACKET protocols that take time to connect() as it doesn't
1455 * include the -EINPROGRESS status for such sockets.
1456 */
1457
89bddce5
SH
1458asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
1459 int addrlen)
1da177e4
LT
1460{
1461 struct socket *sock;
1462 char address[MAX_SOCK_ADDR];
6cb153ca 1463 int err, fput_needed;
1da177e4 1464
6cb153ca 1465 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1466 if (!sock)
1467 goto out;
1468 err = move_addr_to_kernel(uservaddr, addrlen, address);
1469 if (err < 0)
1470 goto out_put;
1471
89bddce5
SH
1472 err =
1473 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4
LT
1474 if (err)
1475 goto out_put;
1476
89bddce5 1477 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
1da177e4
LT
1478 sock->file->f_flags);
1479out_put:
6cb153ca 1480 fput_light(sock->file, fput_needed);
1da177e4
LT
1481out:
1482 return err;
1483}
1484
1485/*
1486 * Get the local address ('name') of a socket object. Move the obtained
1487 * name to user space.
1488 */
1489
89bddce5
SH
1490asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1491 int __user *usockaddr_len)
1da177e4
LT
1492{
1493 struct socket *sock;
1494 char address[MAX_SOCK_ADDR];
6cb153ca 1495 int len, err, fput_needed;
89bddce5 1496
6cb153ca 1497 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1498 if (!sock)
1499 goto out;
1500
1501 err = security_socket_getsockname(sock);
1502 if (err)
1503 goto out_put;
1504
1505 err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
1506 if (err)
1507 goto out_put;
1508 err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
1509
1510out_put:
6cb153ca 1511 fput_light(sock->file, fput_needed);
1da177e4
LT
1512out:
1513 return err;
1514}
1515
1516/*
1517 * Get the remote address ('name') of a socket object. Move the obtained
1518 * name to user space.
1519 */
1520
89bddce5
SH
1521asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1522 int __user *usockaddr_len)
1da177e4
LT
1523{
1524 struct socket *sock;
1525 char address[MAX_SOCK_ADDR];
6cb153ca 1526 int len, err, fput_needed;
1da177e4 1527
89bddce5
SH
1528 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1529 if (sock != NULL) {
1da177e4
LT
1530 err = security_socket_getpeername(sock);
1531 if (err) {
6cb153ca 1532 fput_light(sock->file, fput_needed);
1da177e4
LT
1533 return err;
1534 }
1535
89bddce5
SH
1536 err =
1537 sock->ops->getname(sock, (struct sockaddr *)address, &len,
1538 1);
1da177e4 1539 if (!err)
89bddce5
SH
1540 err = move_addr_to_user(address, len, usockaddr,
1541 usockaddr_len);
6cb153ca 1542 fput_light(sock->file, fput_needed);
1da177e4
LT
1543 }
1544 return err;
1545}
1546
1547/*
1548 * Send a datagram to a given address. We move the address into kernel
1549 * space and check the user space data area is readable before invoking
1550 * the protocol.
1551 */
1552
89bddce5
SH
1553asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1554 unsigned flags, struct sockaddr __user *addr,
1555 int addr_len)
1da177e4
LT
1556{
1557 struct socket *sock;
1558 char address[MAX_SOCK_ADDR];
1559 int err;
1560 struct msghdr msg;
1561 struct iovec iov;
6cb153ca
BL
1562 int fput_needed;
1563 struct file *sock_file;
1564
1565 sock_file = fget_light(fd, &fput_needed);
1566 if (!sock_file)
1567 return -EBADF;
1568
1569 sock = sock_from_file(sock_file, &err);
1da177e4 1570 if (!sock)
6cb153ca 1571 goto out_put;
89bddce5
SH
1572 iov.iov_base = buff;
1573 iov.iov_len = len;
1574 msg.msg_name = NULL;
1575 msg.msg_iov = &iov;
1576 msg.msg_iovlen = 1;
1577 msg.msg_control = NULL;
1578 msg.msg_controllen = 0;
1579 msg.msg_namelen = 0;
6cb153ca 1580 if (addr) {
1da177e4
LT
1581 err = move_addr_to_kernel(addr, addr_len, address);
1582 if (err < 0)
1583 goto out_put;
89bddce5
SH
1584 msg.msg_name = address;
1585 msg.msg_namelen = addr_len;
1da177e4
LT
1586 }
1587 if (sock->file->f_flags & O_NONBLOCK)
1588 flags |= MSG_DONTWAIT;
1589 msg.msg_flags = flags;
1590 err = sock_sendmsg(sock, &msg, len);
1591
89bddce5 1592out_put:
6cb153ca 1593 fput_light(sock_file, fput_needed);
1da177e4
LT
1594 return err;
1595}
1596
1597/*
89bddce5 1598 * Send a datagram down a socket.
1da177e4
LT
1599 */
1600
89bddce5 1601asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
1da177e4
LT
1602{
1603 return sys_sendto(fd, buff, len, flags, NULL, 0);
1604}
1605
1606/*
89bddce5 1607 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1608 * sender. We verify the buffers are writable and if needed move the
1609 * sender address from kernel to user space.
1610 */
1611
89bddce5
SH
1612asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1613 unsigned flags, struct sockaddr __user *addr,
1614 int __user *addr_len)
1da177e4
LT
1615{
1616 struct socket *sock;
1617 struct iovec iov;
1618 struct msghdr msg;
1619 char address[MAX_SOCK_ADDR];
89bddce5 1620 int err, err2;
6cb153ca
BL
1621 struct file *sock_file;
1622 int fput_needed;
1623
1624 sock_file = fget_light(fd, &fput_needed);
1625 if (!sock_file)
1626 return -EBADF;
1da177e4 1627
6cb153ca 1628 sock = sock_from_file(sock_file, &err);
1da177e4
LT
1629 if (!sock)
1630 goto out;
1631
89bddce5
SH
1632 msg.msg_control = NULL;
1633 msg.msg_controllen = 0;
1634 msg.msg_iovlen = 1;
1635 msg.msg_iov = &iov;
1636 iov.iov_len = size;
1637 iov.iov_base = ubuf;
1638 msg.msg_name = address;
1639 msg.msg_namelen = MAX_SOCK_ADDR;
1da177e4
LT
1640 if (sock->file->f_flags & O_NONBLOCK)
1641 flags |= MSG_DONTWAIT;
89bddce5 1642 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1643
89bddce5
SH
1644 if (err >= 0 && addr != NULL) {
1645 err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1646 if (err2 < 0)
1647 err = err2;
1da177e4 1648 }
1da177e4 1649out:
6cb153ca 1650 fput_light(sock_file, fput_needed);
1da177e4
LT
1651 return err;
1652}
1653
1654/*
89bddce5 1655 * Receive a datagram from a socket.
1da177e4
LT
1656 */
1657
89bddce5
SH
1658asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1659 unsigned flags)
1da177e4
LT
1660{
1661 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1662}
1663
1664/*
1665 * Set a socket option. Because we don't know the option lengths we have
1666 * to pass the user mode parameter for the protocols to sort out.
1667 */
1668
89bddce5
SH
1669asmlinkage long sys_setsockopt(int fd, int level, int optname,
1670 char __user *optval, int optlen)
1da177e4 1671{
6cb153ca 1672 int err, fput_needed;
1da177e4
LT
1673 struct socket *sock;
1674
1675 if (optlen < 0)
1676 return -EINVAL;
89bddce5
SH
1677
1678 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1679 if (sock != NULL) {
1680 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1681 if (err)
1682 goto out_put;
1da177e4
LT
1683
1684 if (level == SOL_SOCKET)
89bddce5
SH
1685 err =
1686 sock_setsockopt(sock, level, optname, optval,
1687 optlen);
1da177e4 1688 else
89bddce5
SH
1689 err =
1690 sock->ops->setsockopt(sock, level, optname, optval,
1691 optlen);
6cb153ca
BL
1692out_put:
1693 fput_light(sock->file, fput_needed);
1da177e4
LT
1694 }
1695 return err;
1696}
1697
1698/*
1699 * Get a socket option. Because we don't know the option lengths we have
1700 * to pass a user mode parameter for the protocols to sort out.
1701 */
1702
89bddce5
SH
1703asmlinkage long sys_getsockopt(int fd, int level, int optname,
1704 char __user *optval, int __user *optlen)
1da177e4 1705{
6cb153ca 1706 int err, fput_needed;
1da177e4
LT
1707 struct socket *sock;
1708
89bddce5
SH
1709 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1710 if (sock != NULL) {
6cb153ca
BL
1711 err = security_socket_getsockopt(sock, level, optname);
1712 if (err)
1713 goto out_put;
1da177e4
LT
1714
1715 if (level == SOL_SOCKET)
89bddce5
SH
1716 err =
1717 sock_getsockopt(sock, level, optname, optval,
1718 optlen);
1da177e4 1719 else
89bddce5
SH
1720 err =
1721 sock->ops->getsockopt(sock, level, optname, optval,
1722 optlen);
6cb153ca
BL
1723out_put:
1724 fput_light(sock->file, fput_needed);
1da177e4
LT
1725 }
1726 return err;
1727}
1728
1da177e4
LT
1729/*
1730 * Shutdown a socket.
1731 */
1732
1733asmlinkage long sys_shutdown(int fd, int how)
1734{
6cb153ca 1735 int err, fput_needed;
1da177e4
LT
1736 struct socket *sock;
1737
89bddce5
SH
1738 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1739 if (sock != NULL) {
1da177e4 1740 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1741 if (!err)
1742 err = sock->ops->shutdown(sock, how);
1743 fput_light(sock->file, fput_needed);
1da177e4
LT
1744 }
1745 return err;
1746}
1747
89bddce5 1748/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1749 * fields which are the same type (int / unsigned) on our platforms.
1750 */
1751#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1752#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1753#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1754
1da177e4
LT
1755/*
1756 * BSD sendmsg interface
1757 */
1758
1759asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1760{
89bddce5
SH
1761 struct compat_msghdr __user *msg_compat =
1762 (struct compat_msghdr __user *)msg;
1da177e4
LT
1763 struct socket *sock;
1764 char address[MAX_SOCK_ADDR];
1765 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1766 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1767 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1768 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1769 unsigned char *ctl_buf = ctl;
1770 struct msghdr msg_sys;
1771 int err, ctl_len, iov_size, total_len;
6cb153ca 1772 int fput_needed;
89bddce5 1773
1da177e4
LT
1774 err = -EFAULT;
1775 if (MSG_CMSG_COMPAT & flags) {
1776 if (get_compat_msghdr(&msg_sys, msg_compat))
1777 return -EFAULT;
89bddce5
SH
1778 }
1779 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1780 return -EFAULT;
1781
6cb153ca 1782 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1783 if (!sock)
1da177e4
LT
1784 goto out;
1785
1786 /* do not move before msg_sys is valid */
1787 err = -EMSGSIZE;
1788 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1789 goto out_put;
1790
89bddce5 1791 /* Check whether to allocate the iovec area */
1da177e4
LT
1792 err = -ENOMEM;
1793 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1794 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1795 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1796 if (!iov)
1797 goto out_put;
1798 }
1799
1800 /* This will also move the address data into kernel space */
1801 if (MSG_CMSG_COMPAT & flags) {
1802 err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
1803 } else
1804 err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
89bddce5 1805 if (err < 0)
1da177e4
LT
1806 goto out_freeiov;
1807 total_len = err;
1808
1809 err = -ENOBUFS;
1810
1811 if (msg_sys.msg_controllen > INT_MAX)
1812 goto out_freeiov;
89bddce5 1813 ctl_len = msg_sys.msg_controllen;
1da177e4 1814 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1815 err =
1816 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1817 sizeof(ctl));
1da177e4
LT
1818 if (err)
1819 goto out_freeiov;
1820 ctl_buf = msg_sys.msg_control;
8920e8f9 1821 ctl_len = msg_sys.msg_controllen;
1da177e4 1822 } else if (ctl_len) {
89bddce5 1823 if (ctl_len > sizeof(ctl)) {
1da177e4 1824 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1825 if (ctl_buf == NULL)
1da177e4
LT
1826 goto out_freeiov;
1827 }
1828 err = -EFAULT;
1829 /*
1830 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1831 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1832 * checking falls down on this.
1833 */
89bddce5
SH
1834 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1835 ctl_len))
1da177e4
LT
1836 goto out_freectl;
1837 msg_sys.msg_control = ctl_buf;
1838 }
1839 msg_sys.msg_flags = flags;
1840
1841 if (sock->file->f_flags & O_NONBLOCK)
1842 msg_sys.msg_flags |= MSG_DONTWAIT;
1843 err = sock_sendmsg(sock, &msg_sys, total_len);
1844
1845out_freectl:
89bddce5 1846 if (ctl_buf != ctl)
1da177e4
LT
1847 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1848out_freeiov:
1849 if (iov != iovstack)
1850 sock_kfree_s(sock->sk, iov, iov_size);
1851out_put:
6cb153ca 1852 fput_light(sock->file, fput_needed);
89bddce5 1853out:
1da177e4
LT
1854 return err;
1855}
1856
1857/*
1858 * BSD recvmsg interface
1859 */
1860
89bddce5
SH
1861asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
1862 unsigned int flags)
1da177e4 1863{
89bddce5
SH
1864 struct compat_msghdr __user *msg_compat =
1865 (struct compat_msghdr __user *)msg;
1da177e4
LT
1866 struct socket *sock;
1867 struct iovec iovstack[UIO_FASTIOV];
89bddce5 1868 struct iovec *iov = iovstack;
1da177e4
LT
1869 struct msghdr msg_sys;
1870 unsigned long cmsg_ptr;
1871 int err, iov_size, total_len, len;
6cb153ca 1872 int fput_needed;
1da177e4
LT
1873
1874 /* kernel mode address */
1875 char addr[MAX_SOCK_ADDR];
1876
1877 /* user mode address pointers */
1878 struct sockaddr __user *uaddr;
1879 int __user *uaddr_len;
89bddce5 1880
1da177e4
LT
1881 if (MSG_CMSG_COMPAT & flags) {
1882 if (get_compat_msghdr(&msg_sys, msg_compat))
1883 return -EFAULT;
89bddce5
SH
1884 }
1885 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1886 return -EFAULT;
1da177e4 1887
6cb153ca 1888 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1889 if (!sock)
1890 goto out;
1891
1892 err = -EMSGSIZE;
1893 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1894 goto out_put;
89bddce5
SH
1895
1896 /* Check whether to allocate the iovec area */
1da177e4
LT
1897 err = -ENOMEM;
1898 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1899 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1900 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1901 if (!iov)
1902 goto out_put;
1903 }
1904
1905 /*
89bddce5
SH
1906 * Save the user-mode address (verify_iovec will change the
1907 * kernel msghdr to use the kernel address space)
1da177e4 1908 */
89bddce5
SH
1909
1910 uaddr = (void __user *)msg_sys.msg_name;
1da177e4
LT
1911 uaddr_len = COMPAT_NAMELEN(msg);
1912 if (MSG_CMSG_COMPAT & flags) {
1913 err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1914 } else
1915 err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1916 if (err < 0)
1917 goto out_freeiov;
89bddce5 1918 total_len = err;
1da177e4
LT
1919
1920 cmsg_ptr = (unsigned long)msg_sys.msg_control;
1921 msg_sys.msg_flags = 0;
1922 if (MSG_CMSG_COMPAT & flags)
1923 msg_sys.msg_flags = MSG_CMSG_COMPAT;
89bddce5 1924
1da177e4
LT
1925 if (sock->file->f_flags & O_NONBLOCK)
1926 flags |= MSG_DONTWAIT;
1927 err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1928 if (err < 0)
1929 goto out_freeiov;
1930 len = err;
1931
1932 if (uaddr != NULL) {
89bddce5
SH
1933 err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
1934 uaddr_len);
1da177e4
LT
1935 if (err < 0)
1936 goto out_freeiov;
1937 }
37f7f421
DM
1938 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
1939 COMPAT_FLAGS(msg));
1da177e4
LT
1940 if (err)
1941 goto out_freeiov;
1942 if (MSG_CMSG_COMPAT & flags)
89bddce5 1943 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1944 &msg_compat->msg_controllen);
1945 else
89bddce5 1946 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1947 &msg->msg_controllen);
1948 if (err)
1949 goto out_freeiov;
1950 err = len;
1951
1952out_freeiov:
1953 if (iov != iovstack)
1954 sock_kfree_s(sock->sk, iov, iov_size);
1955out_put:
6cb153ca 1956 fput_light(sock->file, fput_needed);
1da177e4
LT
1957out:
1958 return err;
1959}
1960
1961#ifdef __ARCH_WANT_SYS_SOCKETCALL
1962
1963/* Argument list sizes for sys_socketcall */
1964#define AL(x) ((x) * sizeof(unsigned long))
89bddce5
SH
1965static const unsigned char nargs[18]={
1966 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1967 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1968 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
1969};
1970
1da177e4
LT
1971#undef AL
1972
1973/*
89bddce5 1974 * System call vectors.
1da177e4
LT
1975 *
1976 * Argument checking cleaned up. Saved 20% in size.
1977 * This function doesn't need to set the kernel lock because
89bddce5 1978 * it is set by the callees.
1da177e4
LT
1979 */
1980
1981asmlinkage long sys_socketcall(int call, unsigned long __user *args)
1982{
1983 unsigned long a[6];
89bddce5 1984 unsigned long a0, a1;
1da177e4
LT
1985 int err;
1986
89bddce5 1987 if (call < 1 || call > SYS_RECVMSG)
1da177e4
LT
1988 return -EINVAL;
1989
1990 /* copy_from_user should be SMP safe. */
1991 if (copy_from_user(a, args, nargs[call]))
1992 return -EFAULT;
3ec3b2fb 1993
89bddce5 1994 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb
DW
1995 if (err)
1996 return err;
1997
89bddce5
SH
1998 a0 = a[0];
1999 a1 = a[1];
2000
2001 switch (call) {
2002 case SYS_SOCKET:
2003 err = sys_socket(a0, a1, a[2]);
2004 break;
2005 case SYS_BIND:
2006 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2007 break;
2008 case SYS_CONNECT:
2009 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2010 break;
2011 case SYS_LISTEN:
2012 err = sys_listen(a0, a1);
2013 break;
2014 case SYS_ACCEPT:
2015 err =
2016 sys_accept(a0, (struct sockaddr __user *)a1,
2017 (int __user *)a[2]);
2018 break;
2019 case SYS_GETSOCKNAME:
2020 err =
2021 sys_getsockname(a0, (struct sockaddr __user *)a1,
2022 (int __user *)a[2]);
2023 break;
2024 case SYS_GETPEERNAME:
2025 err =
2026 sys_getpeername(a0, (struct sockaddr __user *)a1,
2027 (int __user *)a[2]);
2028 break;
2029 case SYS_SOCKETPAIR:
2030 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2031 break;
2032 case SYS_SEND:
2033 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2034 break;
2035 case SYS_SENDTO:
2036 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2037 (struct sockaddr __user *)a[4], a[5]);
2038 break;
2039 case SYS_RECV:
2040 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2041 break;
2042 case SYS_RECVFROM:
2043 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2044 (struct sockaddr __user *)a[4],
2045 (int __user *)a[5]);
2046 break;
2047 case SYS_SHUTDOWN:
2048 err = sys_shutdown(a0, a1);
2049 break;
2050 case SYS_SETSOCKOPT:
2051 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2052 break;
2053 case SYS_GETSOCKOPT:
2054 err =
2055 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2056 (int __user *)a[4]);
2057 break;
2058 case SYS_SENDMSG:
2059 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2060 break;
2061 case SYS_RECVMSG:
2062 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2063 break;
2064 default:
2065 err = -EINVAL;
2066 break;
1da177e4
LT
2067 }
2068 return err;
2069}
2070
89bddce5 2071#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2072
55737fda
SH
2073/**
2074 * sock_register - add a socket protocol handler
2075 * @ops: description of protocol
2076 *
1da177e4
LT
2077 * This function is called by a protocol handler that wants to
2078 * advertise its address family, and have it linked into the
55737fda
SH
2079 * socket interface. The value ops->family coresponds to the
2080 * socket system call protocol family.
1da177e4 2081 */
f0fd27d4 2082int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2083{
2084 int err;
2085
2086 if (ops->family >= NPROTO) {
89bddce5
SH
2087 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2088 NPROTO);
1da177e4
LT
2089 return -ENOBUFS;
2090 }
55737fda
SH
2091
2092 spin_lock(&net_family_lock);
2093 if (net_families[ops->family])
2094 err = -EEXIST;
2095 else {
89bddce5 2096 net_families[ops->family] = ops;
1da177e4
LT
2097 err = 0;
2098 }
55737fda
SH
2099 spin_unlock(&net_family_lock);
2100
89bddce5 2101 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2102 return err;
2103}
2104
55737fda
SH
2105/**
2106 * sock_unregister - remove a protocol handler
2107 * @family: protocol family to remove
2108 *
1da177e4
LT
2109 * This function is called by a protocol handler that wants to
2110 * remove its address family, and have it unlinked from the
55737fda
SH
2111 * new socket creation.
2112 *
2113 * If protocol handler is a module, then it can use module reference
2114 * counts to protect against new references. If protocol handler is not
2115 * a module then it needs to provide its own protection in
2116 * the ops->create routine.
1da177e4 2117 */
f0fd27d4 2118void sock_unregister(int family)
1da177e4 2119{
f0fd27d4 2120 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2121
55737fda 2122 spin_lock(&net_family_lock);
89bddce5 2123 net_families[family] = NULL;
55737fda
SH
2124 spin_unlock(&net_family_lock);
2125
2126 synchronize_rcu();
2127
89bddce5 2128 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2129}
2130
77d76ea3 2131static int __init sock_init(void)
1da177e4
LT
2132{
2133 /*
89bddce5 2134 * Initialize sock SLAB cache.
1da177e4 2135 */
89bddce5 2136
1da177e4
LT
2137 sk_init();
2138
1da177e4 2139 /*
89bddce5 2140 * Initialize skbuff SLAB cache
1da177e4
LT
2141 */
2142 skb_init();
1da177e4
LT
2143
2144 /*
89bddce5 2145 * Initialize the protocols module.
1da177e4
LT
2146 */
2147
2148 init_inodecache();
2149 register_filesystem(&sock_fs_type);
2150 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2151
2152 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2153 */
2154
2155#ifdef CONFIG_NETFILTER
2156 netfilter_init();
2157#endif
cbeb321a
DM
2158
2159 return 0;
1da177e4
LT
2160}
2161
77d76ea3
AK
2162core_initcall(sock_init); /* early initcall */
2163
1da177e4
LT
2164#ifdef CONFIG_PROC_FS
2165void socket_seq_show(struct seq_file *seq)
2166{
2167 int cpu;
2168 int counter = 0;
2169
6f912042 2170 for_each_possible_cpu(cpu)
89bddce5 2171 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2172
2173 /* It can be negative, by the way. 8) */
2174 if (counter < 0)
2175 counter = 0;
2176
2177 seq_printf(seq, "sockets: used %d\n", counter);
2178}
89bddce5 2179#endif /* CONFIG_PROC_FS */
1da177e4 2180
89bbfc95
SP
2181#ifdef CONFIG_COMPAT
2182static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 2183 unsigned long arg)
89bbfc95
SP
2184{
2185 struct socket *sock = file->private_data;
2186 int ret = -ENOIOCTLCMD;
2187
2188 if (sock->ops->compat_ioctl)
2189 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2190
2191 return ret;
2192}
2193#endif
2194
ac5a488e
SS
2195int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2196{
2197 return sock->ops->bind(sock, addr, addrlen);
2198}
2199
2200int kernel_listen(struct socket *sock, int backlog)
2201{
2202 return sock->ops->listen(sock, backlog);
2203}
2204
2205int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2206{
2207 struct sock *sk = sock->sk;
2208 int err;
2209
2210 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2211 newsock);
2212 if (err < 0)
2213 goto done;
2214
2215 err = sock->ops->accept(sock, *newsock, flags);
2216 if (err < 0) {
2217 sock_release(*newsock);
2218 goto done;
2219 }
2220
2221 (*newsock)->ops = sock->ops;
2222
2223done:
2224 return err;
2225}
2226
2227int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
2228 int flags)
2229{
2230 return sock->ops->connect(sock, addr, addrlen, flags);
2231}
2232
2233int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2234 int *addrlen)
2235{
2236 return sock->ops->getname(sock, addr, addrlen, 0);
2237}
2238
2239int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2240 int *addrlen)
2241{
2242 return sock->ops->getname(sock, addr, addrlen, 1);
2243}
2244
2245int kernel_getsockopt(struct socket *sock, int level, int optname,
2246 char *optval, int *optlen)
2247{
2248 mm_segment_t oldfs = get_fs();
2249 int err;
2250
2251 set_fs(KERNEL_DS);
2252 if (level == SOL_SOCKET)
2253 err = sock_getsockopt(sock, level, optname, optval, optlen);
2254 else
2255 err = sock->ops->getsockopt(sock, level, optname, optval,
2256 optlen);
2257 set_fs(oldfs);
2258 return err;
2259}
2260
2261int kernel_setsockopt(struct socket *sock, int level, int optname,
2262 char *optval, int optlen)
2263{
2264 mm_segment_t oldfs = get_fs();
2265 int err;
2266
2267 set_fs(KERNEL_DS);
2268 if (level == SOL_SOCKET)
2269 err = sock_setsockopt(sock, level, optname, optval, optlen);
2270 else
2271 err = sock->ops->setsockopt(sock, level, optname, optval,
2272 optlen);
2273 set_fs(oldfs);
2274 return err;
2275}
2276
2277int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2278 size_t size, int flags)
2279{
2280 if (sock->ops->sendpage)
2281 return sock->ops->sendpage(sock, page, offset, size, flags);
2282
2283 return sock_no_sendpage(sock, page, offset, size, flags);
2284}
2285
2286int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2287{
2288 mm_segment_t oldfs = get_fs();
2289 int err;
2290
2291 set_fs(KERNEL_DS);
2292 err = sock->ops->ioctl(sock, cmd, arg);
2293 set_fs(oldfs);
2294
2295 return err;
2296}
2297
1da177e4
LT
2298/* ABI emulation layers need these two */
2299EXPORT_SYMBOL(move_addr_to_kernel);
2300EXPORT_SYMBOL(move_addr_to_user);
2301EXPORT_SYMBOL(sock_create);
2302EXPORT_SYMBOL(sock_create_kern);
2303EXPORT_SYMBOL(sock_create_lite);
2304EXPORT_SYMBOL(sock_map_fd);
2305EXPORT_SYMBOL(sock_recvmsg);
2306EXPORT_SYMBOL(sock_register);
2307EXPORT_SYMBOL(sock_release);
2308EXPORT_SYMBOL(sock_sendmsg);
2309EXPORT_SYMBOL(sock_unregister);
2310EXPORT_SYMBOL(sock_wake_async);
2311EXPORT_SYMBOL(sockfd_lookup);
2312EXPORT_SYMBOL(kernel_sendmsg);
2313EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
2314EXPORT_SYMBOL(kernel_bind);
2315EXPORT_SYMBOL(kernel_listen);
2316EXPORT_SYMBOL(kernel_accept);
2317EXPORT_SYMBOL(kernel_connect);
2318EXPORT_SYMBOL(kernel_getsockname);
2319EXPORT_SYMBOL(kernel_getpeername);
2320EXPORT_SYMBOL(kernel_getsockopt);
2321EXPORT_SYMBOL(kernel_setsockopt);
2322EXPORT_SYMBOL(kernel_sendpage);
2323EXPORT_SYMBOL(kernel_sock_ioctl);