[NET]: change layout of ehash table
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
55737fda 66#include <linux/rcupdate.h>
1da177e4
LT
67#include <linux/netdevice.h>
68#include <linux/proc_fs.h>
69#include <linux/seq_file.h>
4a3e2f71 70#include <linux/mutex.h>
1da177e4
LT
71#include <linux/wanrouter.h>
72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
1da177e4
LT
75#include <linux/init.h>
76#include <linux/poll.h>
77#include <linux/cache.h>
78#include <linux/module.h>
79#include <linux/highmem.h>
1da177e4
LT
80#include <linux/mount.h>
81#include <linux/security.h>
82#include <linux/syscalls.h>
83#include <linux/compat.h>
84#include <linux/kmod.h>
3ec3b2fb 85#include <linux/audit.h>
d86b5e0e 86#include <linux/wireless.h>
1da177e4
LT
87
88#include <asm/uaccess.h>
89#include <asm/unistd.h>
90
91#include <net/compat.h>
92
93#include <net/sock.h>
94#include <linux/netfilter.h>
95
96static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
97static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
98 unsigned long nr_segs, loff_t pos);
99static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
100 unsigned long nr_segs, loff_t pos);
89bddce5 101static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
102
103static int sock_close(struct inode *inode, struct file *file);
104static unsigned int sock_poll(struct file *file,
105 struct poll_table_struct *wait);
89bddce5 106static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
107#ifdef CONFIG_COMPAT
108static long compat_sock_ioctl(struct file *file,
89bddce5 109 unsigned int cmd, unsigned long arg);
89bbfc95 110#endif
1da177e4 111static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
112static ssize_t sock_sendpage(struct file *file, struct page *page,
113 int offset, size_t size, loff_t *ppos, int more);
114
1da177e4
LT
115/*
116 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
117 * in the operation structures but are done directly via the socketcall() multiplexor.
118 */
119
120static struct file_operations socket_file_ops = {
121 .owner = THIS_MODULE,
122 .llseek = no_llseek,
123 .aio_read = sock_aio_read,
124 .aio_write = sock_aio_write,
125 .poll = sock_poll,
126 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
127#ifdef CONFIG_COMPAT
128 .compat_ioctl = compat_sock_ioctl,
129#endif
1da177e4
LT
130 .mmap = sock_mmap,
131 .open = sock_no_open, /* special open code to disallow open via /proc */
132 .release = sock_close,
133 .fasync = sock_fasync,
5274f052
JA
134 .sendpage = sock_sendpage,
135 .splice_write = generic_splice_sendpage,
1da177e4
LT
136};
137
138/*
139 * The protocol list. Each protocol is registered in here.
140 */
141
1da177e4 142static DEFINE_SPINLOCK(net_family_lock);
f0fd27d4 143static const struct net_proto_family *net_families[NPROTO] __read_mostly;
1da177e4 144
1da177e4
LT
145/*
146 * Statistics counters of the socket lists
147 */
148
149static DEFINE_PER_CPU(int, sockets_in_use) = 0;
150
151/*
89bddce5
SH
152 * Support routines.
153 * Move socket addresses back and forth across the kernel/user
154 * divide and look after the messy bits.
1da177e4
LT
155 */
156
89bddce5 157#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
158 16 for IP, 16 for IPX,
159 24 for IPv6,
89bddce5 160 about 80 for AX.25
1da177e4
LT
161 must be at least one bigger than
162 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 163 :unix_mkname()).
1da177e4 164 */
89bddce5 165
1da177e4
LT
166/**
167 * move_addr_to_kernel - copy a socket address into kernel space
168 * @uaddr: Address in user space
169 * @kaddr: Address in kernel space
170 * @ulen: Length in user space
171 *
172 * The address is copied into kernel space. If the provided address is
173 * too long an error code of -EINVAL is returned. If the copy gives
174 * invalid addresses -EFAULT is returned. On a success 0 is returned.
175 */
176
177int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
178{
89bddce5 179 if (ulen < 0 || ulen > MAX_SOCK_ADDR)
1da177e4 180 return -EINVAL;
89bddce5 181 if (ulen == 0)
1da177e4 182 return 0;
89bddce5 183 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 184 return -EFAULT;
3ec3b2fb 185 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
186}
187
188/**
189 * move_addr_to_user - copy an address to user space
190 * @kaddr: kernel space address
191 * @klen: length of address in kernel
192 * @uaddr: user space address
193 * @ulen: pointer to user length field
194 *
195 * The value pointed to by ulen on entry is the buffer length available.
196 * This is overwritten with the buffer space used. -EINVAL is returned
197 * if an overlong buffer is specified or a negative buffer size. -EFAULT
198 * is returned if either the buffer or the length field are not
199 * accessible.
200 * After copying the data up to the limit the user specifies, the true
201 * length of the data is written over the length limit the user
202 * specified. Zero is returned for a success.
203 */
89bddce5
SH
204
205int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
206 int __user *ulen)
1da177e4
LT
207{
208 int err;
209 int len;
210
89bddce5
SH
211 err = get_user(len, ulen);
212 if (err)
1da177e4 213 return err;
89bddce5
SH
214 if (len > klen)
215 len = klen;
216 if (len < 0 || len > MAX_SOCK_ADDR)
1da177e4 217 return -EINVAL;
89bddce5 218 if (len) {
d6fe3945
SG
219 if (audit_sockaddr(klen, kaddr))
220 return -ENOMEM;
89bddce5 221 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
222 return -EFAULT;
223 }
224 /*
89bddce5
SH
225 * "fromlen shall refer to the value before truncation.."
226 * 1003.1g
1da177e4
LT
227 */
228 return __put_user(klen, ulen);
229}
230
231#define SOCKFS_MAGIC 0x534F434B
232
e18b890b 233static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
234
235static struct inode *sock_alloc_inode(struct super_block *sb)
236{
237 struct socket_alloc *ei;
89bddce5 238
e94b1766 239 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
240 if (!ei)
241 return NULL;
242 init_waitqueue_head(&ei->socket.wait);
89bddce5 243
1da177e4
LT
244 ei->socket.fasync_list = NULL;
245 ei->socket.state = SS_UNCONNECTED;
246 ei->socket.flags = 0;
247 ei->socket.ops = NULL;
248 ei->socket.sk = NULL;
249 ei->socket.file = NULL;
1da177e4
LT
250
251 return &ei->vfs_inode;
252}
253
254static void sock_destroy_inode(struct inode *inode)
255{
256 kmem_cache_free(sock_inode_cachep,
257 container_of(inode, struct socket_alloc, vfs_inode));
258}
259
e18b890b 260static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
1da177e4 261{
89bddce5 262 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 263
89bddce5
SH
264 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR))
265 == SLAB_CTOR_CONSTRUCTOR)
1da177e4
LT
266 inode_init_once(&ei->vfs_inode);
267}
89bddce5 268
1da177e4
LT
269static int init_inodecache(void)
270{
271 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
272 sizeof(struct socket_alloc),
273 0,
274 (SLAB_HWCACHE_ALIGN |
275 SLAB_RECLAIM_ACCOUNT |
276 SLAB_MEM_SPREAD),
277 init_once,
278 NULL);
1da177e4
LT
279 if (sock_inode_cachep == NULL)
280 return -ENOMEM;
281 return 0;
282}
283
284static struct super_operations sockfs_ops = {
285 .alloc_inode = sock_alloc_inode,
286 .destroy_inode =sock_destroy_inode,
287 .statfs = simple_statfs,
288};
289
454e2398 290static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
291 int flags, const char *dev_name, void *data,
292 struct vfsmount *mnt)
1da177e4 293{
454e2398
DH
294 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
295 mnt);
1da177e4
LT
296}
297
ba89966c 298static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
299
300static struct file_system_type sock_fs_type = {
301 .name = "sockfs",
302 .get_sb = sockfs_get_sb,
303 .kill_sb = kill_anon_super,
304};
89bddce5 305
1da177e4
LT
306static int sockfs_delete_dentry(struct dentry *dentry)
307{
304e61e6
ED
308 /*
309 * At creation time, we pretended this dentry was hashed
310 * (by clearing DCACHE_UNHASHED bit in d_flags)
311 * At delete time, we restore the truth : not hashed.
312 * (so that dput() can proceed correctly)
313 */
314 dentry->d_flags |= DCACHE_UNHASHED;
315 return 0;
1da177e4
LT
316}
317static struct dentry_operations sockfs_dentry_operations = {
89bddce5 318 .d_delete = sockfs_delete_dentry,
1da177e4
LT
319};
320
321/*
322 * Obtains the first available file descriptor and sets it up for use.
323 *
39d8c1b6
DM
324 * These functions create file structures and maps them to fd space
325 * of the current process. On success it returns file descriptor
1da177e4
LT
326 * and file struct implicitly stored in sock->file.
327 * Note that another thread may close file descriptor before we return
328 * from this function. We use the fact that now we do not refer
329 * to socket after mapping. If one day we will need it, this
330 * function will increment ref. count on file by 1.
331 *
332 * In any case returned fd MAY BE not valid!
333 * This race condition is unavoidable
334 * with shared fd spaces, we cannot solve it inside kernel,
335 * but we take care of internal coherence yet.
336 */
337
39d8c1b6 338static int sock_alloc_fd(struct file **filep)
1da177e4
LT
339{
340 int fd;
1da177e4
LT
341
342 fd = get_unused_fd();
39d8c1b6 343 if (likely(fd >= 0)) {
1da177e4
LT
344 struct file *file = get_empty_filp();
345
39d8c1b6
DM
346 *filep = file;
347 if (unlikely(!file)) {
1da177e4 348 put_unused_fd(fd);
39d8c1b6 349 return -ENFILE;
1da177e4 350 }
39d8c1b6
DM
351 } else
352 *filep = NULL;
353 return fd;
354}
1da177e4 355
39d8c1b6
DM
356static int sock_attach_fd(struct socket *sock, struct file *file)
357{
358 struct qstr this;
359 char name[32];
360
361 this.len = sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino);
362 this.name = name;
304e61e6 363 this.hash = 0;
39d8c1b6 364
3126a42c
JS
365 file->f_path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);
366 if (unlikely(!file->f_path.dentry))
39d8c1b6
DM
367 return -ENOMEM;
368
3126a42c 369 file->f_path.dentry->d_op = &sockfs_dentry_operations;
304e61e6
ED
370 /*
371 * We dont want to push this dentry into global dentry hash table.
372 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
373 * This permits a working /proc/$pid/fd/XXX on sockets
374 */
3126a42c
JS
375 file->f_path.dentry->d_flags &= ~DCACHE_UNHASHED;
376 d_instantiate(file->f_path.dentry, SOCK_INODE(sock));
377 file->f_path.mnt = mntget(sock_mnt);
378 file->f_mapping = file->f_path.dentry->d_inode->i_mapping;
39d8c1b6
DM
379
380 sock->file = file;
381 file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
382 file->f_mode = FMODE_READ | FMODE_WRITE;
383 file->f_flags = O_RDWR;
384 file->f_pos = 0;
385 file->private_data = sock;
1da177e4 386
39d8c1b6
DM
387 return 0;
388}
389
390int sock_map_fd(struct socket *sock)
391{
392 struct file *newfile;
393 int fd = sock_alloc_fd(&newfile);
394
395 if (likely(fd >= 0)) {
396 int err = sock_attach_fd(sock, newfile);
397
398 if (unlikely(err < 0)) {
399 put_filp(newfile);
1da177e4 400 put_unused_fd(fd);
39d8c1b6 401 return err;
1da177e4 402 }
39d8c1b6 403 fd_install(fd, newfile);
1da177e4 404 }
1da177e4
LT
405 return fd;
406}
407
6cb153ca
BL
408static struct socket *sock_from_file(struct file *file, int *err)
409{
410 struct inode *inode;
411 struct socket *sock;
412
413 if (file->f_op == &socket_file_ops)
414 return file->private_data; /* set in sock_map_fd */
415
3126a42c 416 inode = file->f_path.dentry->d_inode;
6cb153ca
BL
417 if (!S_ISSOCK(inode->i_mode)) {
418 *err = -ENOTSOCK;
419 return NULL;
420 }
421
422 sock = SOCKET_I(inode);
423 if (sock->file != file) {
424 printk(KERN_ERR "socki_lookup: socket file changed!\n");
425 sock->file = file;
426 }
427 return sock;
428}
429
1da177e4
LT
430/**
431 * sockfd_lookup - Go from a file number to its socket slot
432 * @fd: file handle
433 * @err: pointer to an error code return
434 *
435 * The file handle passed in is locked and the socket it is bound
436 * too is returned. If an error occurs the err pointer is overwritten
437 * with a negative errno code and NULL is returned. The function checks
438 * for both invalid handles and passing a handle which is not a socket.
439 *
440 * On a success the socket object pointer is returned.
441 */
442
443struct socket *sockfd_lookup(int fd, int *err)
444{
445 struct file *file;
1da177e4
LT
446 struct socket *sock;
447
89bddce5
SH
448 file = fget(fd);
449 if (!file) {
1da177e4
LT
450 *err = -EBADF;
451 return NULL;
452 }
89bddce5 453
6cb153ca
BL
454 sock = sock_from_file(file, err);
455 if (!sock)
1da177e4 456 fput(file);
6cb153ca
BL
457 return sock;
458}
1da177e4 459
6cb153ca
BL
460static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
461{
462 struct file *file;
463 struct socket *sock;
464
3672558c 465 *err = -EBADF;
6cb153ca
BL
466 file = fget_light(fd, fput_needed);
467 if (file) {
468 sock = sock_from_file(file, err);
469 if (sock)
470 return sock;
471 fput_light(file, *fput_needed);
1da177e4 472 }
6cb153ca 473 return NULL;
1da177e4
LT
474}
475
476/**
477 * sock_alloc - allocate a socket
89bddce5 478 *
1da177e4
LT
479 * Allocate a new inode and socket object. The two are bound together
480 * and initialised. The socket is then returned. If we are out of inodes
481 * NULL is returned.
482 */
483
484static struct socket *sock_alloc(void)
485{
89bddce5
SH
486 struct inode *inode;
487 struct socket *sock;
1da177e4
LT
488
489 inode = new_inode(sock_mnt->mnt_sb);
490 if (!inode)
491 return NULL;
492
493 sock = SOCKET_I(inode);
494
89bddce5 495 inode->i_mode = S_IFSOCK | S_IRWXUGO;
1da177e4
LT
496 inode->i_uid = current->fsuid;
497 inode->i_gid = current->fsgid;
498
499 get_cpu_var(sockets_in_use)++;
500 put_cpu_var(sockets_in_use);
501 return sock;
502}
503
504/*
505 * In theory you can't get an open on this inode, but /proc provides
506 * a back door. Remember to keep it shut otherwise you'll let the
507 * creepy crawlies in.
508 */
89bddce5 509
1da177e4
LT
510static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
511{
512 return -ENXIO;
513}
514
4b6f5d20 515const struct file_operations bad_sock_fops = {
1da177e4
LT
516 .owner = THIS_MODULE,
517 .open = sock_no_open,
518};
519
520/**
521 * sock_release - close a socket
522 * @sock: socket to close
523 *
524 * The socket is released from the protocol stack if it has a release
525 * callback, and the inode is then released if the socket is bound to
89bddce5 526 * an inode not a file.
1da177e4 527 */
89bddce5 528
1da177e4
LT
529void sock_release(struct socket *sock)
530{
531 if (sock->ops) {
532 struct module *owner = sock->ops->owner;
533
534 sock->ops->release(sock);
535 sock->ops = NULL;
536 module_put(owner);
537 }
538
539 if (sock->fasync_list)
540 printk(KERN_ERR "sock_release: fasync list not empty!\n");
541
542 get_cpu_var(sockets_in_use)--;
543 put_cpu_var(sockets_in_use);
544 if (!sock->file) {
545 iput(SOCK_INODE(sock));
546 return;
547 }
89bddce5 548 sock->file = NULL;
1da177e4
LT
549}
550
89bddce5 551static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
552 struct msghdr *msg, size_t size)
553{
554 struct sock_iocb *si = kiocb_to_siocb(iocb);
555 int err;
556
557 si->sock = sock;
558 si->scm = NULL;
559 si->msg = msg;
560 si->size = size;
561
562 err = security_socket_sendmsg(sock, msg, size);
563 if (err)
564 return err;
565
566 return sock->ops->sendmsg(iocb, sock, msg, size);
567}
568
569int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
570{
571 struct kiocb iocb;
572 struct sock_iocb siocb;
573 int ret;
574
575 init_sync_kiocb(&iocb, NULL);
576 iocb.private = &siocb;
577 ret = __sock_sendmsg(&iocb, sock, msg, size);
578 if (-EIOCBQUEUED == ret)
579 ret = wait_on_sync_kiocb(&iocb);
580 return ret;
581}
582
583int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
584 struct kvec *vec, size_t num, size_t size)
585{
586 mm_segment_t oldfs = get_fs();
587 int result;
588
589 set_fs(KERNEL_DS);
590 /*
591 * the following is safe, since for compiler definitions of kvec and
592 * iovec are identical, yielding the same in-core layout and alignment
593 */
89bddce5 594 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
595 msg->msg_iovlen = num;
596 result = sock_sendmsg(sock, msg, size);
597 set_fs(oldfs);
598 return result;
599}
600
89bddce5 601static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
602 struct msghdr *msg, size_t size, int flags)
603{
604 int err;
605 struct sock_iocb *si = kiocb_to_siocb(iocb);
606
607 si->sock = sock;
608 si->scm = NULL;
609 si->msg = msg;
610 si->size = size;
611 si->flags = flags;
612
613 err = security_socket_recvmsg(sock, msg, size, flags);
614 if (err)
615 return err;
616
617 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
618}
619
89bddce5 620int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
621 size_t size, int flags)
622{
623 struct kiocb iocb;
624 struct sock_iocb siocb;
625 int ret;
626
89bddce5 627 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
628 iocb.private = &siocb;
629 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
630 if (-EIOCBQUEUED == ret)
631 ret = wait_on_sync_kiocb(&iocb);
632 return ret;
633}
634
89bddce5
SH
635int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
636 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
637{
638 mm_segment_t oldfs = get_fs();
639 int result;
640
641 set_fs(KERNEL_DS);
642 /*
643 * the following is safe, since for compiler definitions of kvec and
644 * iovec are identical, yielding the same in-core layout and alignment
645 */
89bddce5 646 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
647 result = sock_recvmsg(sock, msg, size, flags);
648 set_fs(oldfs);
649 return result;
650}
651
652static void sock_aio_dtor(struct kiocb *iocb)
653{
654 kfree(iocb->private);
655}
656
ce1d4d3e
CH
657static ssize_t sock_sendpage(struct file *file, struct page *page,
658 int offset, size_t size, loff_t *ppos, int more)
1da177e4 659{
1da177e4
LT
660 struct socket *sock;
661 int flags;
662
ce1d4d3e
CH
663 sock = file->private_data;
664
665 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
666 if (more)
667 flags |= MSG_MORE;
668
669 return sock->ops->sendpage(sock, page, offset, size, flags);
670}
1da177e4 671
ce1d4d3e 672static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 673 struct sock_iocb *siocb)
ce1d4d3e
CH
674{
675 if (!is_sync_kiocb(iocb)) {
676 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
677 if (!siocb)
678 return NULL;
1da177e4
LT
679 iocb->ki_dtor = sock_aio_dtor;
680 }
1da177e4 681
ce1d4d3e 682 siocb->kiocb = iocb;
ce1d4d3e
CH
683 iocb->private = siocb;
684 return siocb;
1da177e4
LT
685}
686
ce1d4d3e 687static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
688 struct file *file, const struct iovec *iov,
689 unsigned long nr_segs)
ce1d4d3e
CH
690{
691 struct socket *sock = file->private_data;
692 size_t size = 0;
693 int i;
1da177e4 694
89bddce5
SH
695 for (i = 0; i < nr_segs; i++)
696 size += iov[i].iov_len;
1da177e4 697
ce1d4d3e
CH
698 msg->msg_name = NULL;
699 msg->msg_namelen = 0;
700 msg->msg_control = NULL;
701 msg->msg_controllen = 0;
89bddce5 702 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
703 msg->msg_iovlen = nr_segs;
704 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
705
706 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
707}
708
027445c3
BP
709static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
710 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
711{
712 struct sock_iocb siocb, *x;
713
1da177e4
LT
714 if (pos != 0)
715 return -ESPIPE;
027445c3
BP
716
717 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
718 return 0;
719
027445c3
BP
720
721 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
722 if (!x)
723 return -ENOMEM;
027445c3 724 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
725}
726
ce1d4d3e 727static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
728 struct file *file, const struct iovec *iov,
729 unsigned long nr_segs)
1da177e4 730{
ce1d4d3e
CH
731 struct socket *sock = file->private_data;
732 size_t size = 0;
733 int i;
1da177e4 734
89bddce5
SH
735 for (i = 0; i < nr_segs; i++)
736 size += iov[i].iov_len;
1da177e4 737
ce1d4d3e
CH
738 msg->msg_name = NULL;
739 msg->msg_namelen = 0;
740 msg->msg_control = NULL;
741 msg->msg_controllen = 0;
89bddce5 742 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
743 msg->msg_iovlen = nr_segs;
744 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
745 if (sock->type == SOCK_SEQPACKET)
746 msg->msg_flags |= MSG_EOR;
1da177e4 747
ce1d4d3e 748 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
749}
750
027445c3
BP
751static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
752 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
753{
754 struct sock_iocb siocb, *x;
1da177e4 755
ce1d4d3e
CH
756 if (pos != 0)
757 return -ESPIPE;
027445c3
BP
758
759 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
ce1d4d3e 760 return 0;
1da177e4 761
027445c3 762 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
763 if (!x)
764 return -ENOMEM;
1da177e4 765
027445c3 766 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
767}
768
1da177e4
LT
769/*
770 * Atomic setting of ioctl hooks to avoid race
771 * with module unload.
772 */
773
4a3e2f71 774static DEFINE_MUTEX(br_ioctl_mutex);
89bddce5 775static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL;
1da177e4 776
89bddce5 777void brioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 778{
4a3e2f71 779 mutex_lock(&br_ioctl_mutex);
1da177e4 780 br_ioctl_hook = hook;
4a3e2f71 781 mutex_unlock(&br_ioctl_mutex);
1da177e4 782}
89bddce5 783
1da177e4
LT
784EXPORT_SYMBOL(brioctl_set);
785
4a3e2f71 786static DEFINE_MUTEX(vlan_ioctl_mutex);
89bddce5 787static int (*vlan_ioctl_hook) (void __user *arg);
1da177e4 788
89bddce5 789void vlan_ioctl_set(int (*hook) (void __user *))
1da177e4 790{
4a3e2f71 791 mutex_lock(&vlan_ioctl_mutex);
1da177e4 792 vlan_ioctl_hook = hook;
4a3e2f71 793 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 794}
89bddce5 795
1da177e4
LT
796EXPORT_SYMBOL(vlan_ioctl_set);
797
4a3e2f71 798static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 799static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 800
89bddce5 801void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 802{
4a3e2f71 803 mutex_lock(&dlci_ioctl_mutex);
1da177e4 804 dlci_ioctl_hook = hook;
4a3e2f71 805 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 806}
89bddce5 807
1da177e4
LT
808EXPORT_SYMBOL(dlci_ioctl_set);
809
810/*
811 * With an ioctl, arg may well be a user mode pointer, but we don't know
812 * what to do with it - that's up to the protocol still.
813 */
814
815static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
816{
817 struct socket *sock;
818 void __user *argp = (void __user *)arg;
819 int pid, err;
820
b69aee04 821 sock = file->private_data;
1da177e4
LT
822 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
823 err = dev_ioctl(cmd, argp);
824 } else
d86b5e0e 825#ifdef CONFIG_WIRELESS_EXT
1da177e4
LT
826 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
827 err = dev_ioctl(cmd, argp);
828 } else
89bddce5
SH
829#endif /* CONFIG_WIRELESS_EXT */
830 switch (cmd) {
1da177e4
LT
831 case FIOSETOWN:
832 case SIOCSPGRP:
833 err = -EFAULT;
834 if (get_user(pid, (int __user *)argp))
835 break;
836 err = f_setown(sock->file, pid, 1);
837 break;
838 case FIOGETOWN:
839 case SIOCGPGRP:
609d7fa9 840 err = put_user(f_getown(sock->file),
89bddce5 841 (int __user *)argp);
1da177e4
LT
842 break;
843 case SIOCGIFBR:
844 case SIOCSIFBR:
845 case SIOCBRADDBR:
846 case SIOCBRDELBR:
847 err = -ENOPKG;
848 if (!br_ioctl_hook)
849 request_module("bridge");
850
4a3e2f71 851 mutex_lock(&br_ioctl_mutex);
89bddce5 852 if (br_ioctl_hook)
1da177e4 853 err = br_ioctl_hook(cmd, argp);
4a3e2f71 854 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
855 break;
856 case SIOCGIFVLAN:
857 case SIOCSIFVLAN:
858 err = -ENOPKG;
859 if (!vlan_ioctl_hook)
860 request_module("8021q");
861
4a3e2f71 862 mutex_lock(&vlan_ioctl_mutex);
1da177e4
LT
863 if (vlan_ioctl_hook)
864 err = vlan_ioctl_hook(argp);
4a3e2f71 865 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 866 break;
1da177e4
LT
867 case SIOCADDDLCI:
868 case SIOCDELDLCI:
869 err = -ENOPKG;
870 if (!dlci_ioctl_hook)
871 request_module("dlci");
872
873 if (dlci_ioctl_hook) {
4a3e2f71 874 mutex_lock(&dlci_ioctl_mutex);
1da177e4 875 err = dlci_ioctl_hook(cmd, argp);
4a3e2f71 876 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
877 }
878 break;
879 default:
880 err = sock->ops->ioctl(sock, cmd, arg);
b5e5fa5e
CH
881
882 /*
883 * If this ioctl is unknown try to hand it down
884 * to the NIC driver.
885 */
886 if (err == -ENOIOCTLCMD)
887 err = dev_ioctl(cmd, argp);
1da177e4 888 break;
89bddce5 889 }
1da177e4
LT
890 return err;
891}
892
893int sock_create_lite(int family, int type, int protocol, struct socket **res)
894{
895 int err;
896 struct socket *sock = NULL;
89bddce5 897
1da177e4
LT
898 err = security_socket_create(family, type, protocol, 1);
899 if (err)
900 goto out;
901
902 sock = sock_alloc();
903 if (!sock) {
904 err = -ENOMEM;
905 goto out;
906 }
907
1da177e4 908 sock->type = type;
7420ed23
VY
909 err = security_socket_post_create(sock, family, type, protocol, 1);
910 if (err)
911 goto out_release;
912
1da177e4
LT
913out:
914 *res = sock;
915 return err;
7420ed23
VY
916out_release:
917 sock_release(sock);
918 sock = NULL;
919 goto out;
1da177e4
LT
920}
921
922/* No kernel lock held - perfect */
89bddce5 923static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
924{
925 struct socket *sock;
926
927 /*
89bddce5 928 * We can't return errors to poll, so it's either yes or no.
1da177e4 929 */
b69aee04 930 sock = file->private_data;
1da177e4
LT
931 return sock->ops->poll(file, sock, wait);
932}
933
89bddce5 934static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 935{
b69aee04 936 struct socket *sock = file->private_data;
1da177e4
LT
937
938 return sock->ops->mmap(file, sock, vma);
939}
940
20380731 941static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
942{
943 /*
89bddce5
SH
944 * It was possible the inode is NULL we were
945 * closing an unfinished socket.
1da177e4
LT
946 */
947
89bddce5 948 if (!inode) {
1da177e4
LT
949 printk(KERN_DEBUG "sock_close: NULL inode\n");
950 return 0;
951 }
952 sock_fasync(-1, filp, 0);
953 sock_release(SOCKET_I(inode));
954 return 0;
955}
956
957/*
958 * Update the socket async list
959 *
960 * Fasync_list locking strategy.
961 *
962 * 1. fasync_list is modified only under process context socket lock
963 * i.e. under semaphore.
964 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
965 * or under socket lock.
966 * 3. fasync_list can be used from softirq context, so that
967 * modification under socket lock have to be enhanced with
968 * write_lock_bh(&sk->sk_callback_lock).
969 * --ANK (990710)
970 */
971
972static int sock_fasync(int fd, struct file *filp, int on)
973{
89bddce5 974 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
975 struct socket *sock;
976 struct sock *sk;
977
89bddce5 978 if (on) {
8b3a7005 979 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 980 if (fna == NULL)
1da177e4
LT
981 return -ENOMEM;
982 }
983
b69aee04 984 sock = filp->private_data;
1da177e4 985
89bddce5
SH
986 sk = sock->sk;
987 if (sk == NULL) {
1da177e4
LT
988 kfree(fna);
989 return -EINVAL;
990 }
991
992 lock_sock(sk);
993
89bddce5 994 prev = &(sock->fasync_list);
1da177e4 995
89bddce5
SH
996 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
997 if (fa->fa_file == filp)
1da177e4
LT
998 break;
999
89bddce5
SH
1000 if (on) {
1001 if (fa != NULL) {
1da177e4 1002 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1003 fa->fa_fd = fd;
1da177e4
LT
1004 write_unlock_bh(&sk->sk_callback_lock);
1005
1006 kfree(fna);
1007 goto out;
1008 }
89bddce5
SH
1009 fna->fa_file = filp;
1010 fna->fa_fd = fd;
1011 fna->magic = FASYNC_MAGIC;
1012 fna->fa_next = sock->fasync_list;
1da177e4 1013 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1014 sock->fasync_list = fna;
1da177e4 1015 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1016 } else {
1017 if (fa != NULL) {
1da177e4 1018 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1019 *prev = fa->fa_next;
1da177e4
LT
1020 write_unlock_bh(&sk->sk_callback_lock);
1021 kfree(fa);
1022 }
1023 }
1024
1025out:
1026 release_sock(sock->sk);
1027 return 0;
1028}
1029
1030/* This function may be called only under socket lock or callback_lock */
1031
1032int sock_wake_async(struct socket *sock, int how, int band)
1033{
1034 if (!sock || !sock->fasync_list)
1035 return -1;
89bddce5 1036 switch (how) {
1da177e4 1037 case 1:
89bddce5 1038
1da177e4
LT
1039 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1040 break;
1041 goto call_kill;
1042 case 2:
1043 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1044 break;
1045 /* fall through */
1046 case 0:
89bddce5 1047call_kill:
1da177e4
LT
1048 __kill_fasync(sock->fasync_list, SIGIO, band);
1049 break;
1050 case 3:
1051 __kill_fasync(sock->fasync_list, SIGURG, band);
1052 }
1053 return 0;
1054}
1055
89bddce5
SH
1056static int __sock_create(int family, int type, int protocol,
1057 struct socket **res, int kern)
1da177e4
LT
1058{
1059 int err;
1060 struct socket *sock;
55737fda 1061 const struct net_proto_family *pf;
1da177e4
LT
1062
1063 /*
89bddce5 1064 * Check protocol is in range
1da177e4
LT
1065 */
1066 if (family < 0 || family >= NPROTO)
1067 return -EAFNOSUPPORT;
1068 if (type < 0 || type >= SOCK_MAX)
1069 return -EINVAL;
1070
1071 /* Compatibility.
1072
1073 This uglymoron is moved from INET layer to here to avoid
1074 deadlock in module load.
1075 */
1076 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1077 static int warned;
1da177e4
LT
1078 if (!warned) {
1079 warned = 1;
89bddce5
SH
1080 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1081 current->comm);
1da177e4
LT
1082 }
1083 family = PF_PACKET;
1084 }
1085
1086 err = security_socket_create(family, type, protocol, kern);
1087 if (err)
1088 return err;
89bddce5 1089
55737fda
SH
1090 /*
1091 * Allocate the socket and allow the family to set things up. if
1092 * the protocol is 0, the family is instructed to select an appropriate
1093 * default.
1094 */
1095 sock = sock_alloc();
1096 if (!sock) {
1097 if (net_ratelimit())
1098 printk(KERN_WARNING "socket: no more sockets\n");
1099 return -ENFILE; /* Not exactly a match, but its the
1100 closest posix thing */
1101 }
1102
1103 sock->type = type;
1104
1da177e4 1105#if defined(CONFIG_KMOD)
89bddce5
SH
1106 /* Attempt to load a protocol module if the find failed.
1107 *
1108 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1109 * requested real, full-featured networking support upon configuration.
1110 * Otherwise module support will break!
1111 */
55737fda 1112 if (net_families[family] == NULL)
89bddce5 1113 request_module("net-pf-%d", family);
1da177e4
LT
1114#endif
1115
55737fda
SH
1116 rcu_read_lock();
1117 pf = rcu_dereference(net_families[family]);
1118 err = -EAFNOSUPPORT;
1119 if (!pf)
1120 goto out_release;
1da177e4
LT
1121
1122 /*
1123 * We will call the ->create function, that possibly is in a loadable
1124 * module, so we have to bump that loadable module refcnt first.
1125 */
55737fda 1126 if (!try_module_get(pf->owner))
1da177e4
LT
1127 goto out_release;
1128
55737fda
SH
1129 /* Now protected by module ref count */
1130 rcu_read_unlock();
1131
1132 err = pf->create(sock, protocol);
1133 if (err < 0)
1da177e4 1134 goto out_module_put;
a79af59e 1135
1da177e4
LT
1136 /*
1137 * Now to bump the refcnt of the [loadable] module that owns this
1138 * socket at sock_release time we decrement its refcnt.
1139 */
55737fda
SH
1140 if (!try_module_get(sock->ops->owner))
1141 goto out_module_busy;
1142
1da177e4
LT
1143 /*
1144 * Now that we're done with the ->create function, the [loadable]
1145 * module can have its refcnt decremented
1146 */
55737fda 1147 module_put(pf->owner);
7420ed23
VY
1148 err = security_socket_post_create(sock, family, type, protocol, kern);
1149 if (err)
1150 goto out_release;
55737fda 1151 *res = sock;
1da177e4 1152
55737fda
SH
1153 return 0;
1154
1155out_module_busy:
1156 err = -EAFNOSUPPORT;
1da177e4 1157out_module_put:
55737fda
SH
1158 sock->ops = NULL;
1159 module_put(pf->owner);
1160out_sock_release:
1da177e4 1161 sock_release(sock);
55737fda
SH
1162 return err;
1163
1164out_release:
1165 rcu_read_unlock();
1166 goto out_sock_release;
1da177e4
LT
1167}
1168
1169int sock_create(int family, int type, int protocol, struct socket **res)
1170{
1171 return __sock_create(family, type, protocol, res, 0);
1172}
1173
1174int sock_create_kern(int family, int type, int protocol, struct socket **res)
1175{
1176 return __sock_create(family, type, protocol, res, 1);
1177}
1178
1179asmlinkage long sys_socket(int family, int type, int protocol)
1180{
1181 int retval;
1182 struct socket *sock;
1183
1184 retval = sock_create(family, type, protocol, &sock);
1185 if (retval < 0)
1186 goto out;
1187
1188 retval = sock_map_fd(sock);
1189 if (retval < 0)
1190 goto out_release;
1191
1192out:
1193 /* It may be already another descriptor 8) Not kernel problem. */
1194 return retval;
1195
1196out_release:
1197 sock_release(sock);
1198 return retval;
1199}
1200
1201/*
1202 * Create a pair of connected sockets.
1203 */
1204
89bddce5
SH
1205asmlinkage long sys_socketpair(int family, int type, int protocol,
1206 int __user *usockvec)
1da177e4
LT
1207{
1208 struct socket *sock1, *sock2;
1209 int fd1, fd2, err;
1210
1211 /*
1212 * Obtain the first socket and check if the underlying protocol
1213 * supports the socketpair call.
1214 */
1215
1216 err = sock_create(family, type, protocol, &sock1);
1217 if (err < 0)
1218 goto out;
1219
1220 err = sock_create(family, type, protocol, &sock2);
1221 if (err < 0)
1222 goto out_release_1;
1223
1224 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1225 if (err < 0)
1da177e4
LT
1226 goto out_release_both;
1227
1228 fd1 = fd2 = -1;
1229
1230 err = sock_map_fd(sock1);
1231 if (err < 0)
1232 goto out_release_both;
1233 fd1 = err;
1234
1235 err = sock_map_fd(sock2);
1236 if (err < 0)
1237 goto out_close_1;
1238 fd2 = err;
1239
1240 /* fd1 and fd2 may be already another descriptors.
1241 * Not kernel problem.
1242 */
1243
89bddce5 1244 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1245 if (!err)
1246 err = put_user(fd2, &usockvec[1]);
1247 if (!err)
1248 return 0;
1249
1250 sys_close(fd2);
1251 sys_close(fd1);
1252 return err;
1253
1254out_close_1:
89bddce5 1255 sock_release(sock2);
1da177e4
LT
1256 sys_close(fd1);
1257 return err;
1258
1259out_release_both:
89bddce5 1260 sock_release(sock2);
1da177e4 1261out_release_1:
89bddce5 1262 sock_release(sock1);
1da177e4
LT
1263out:
1264 return err;
1265}
1266
1da177e4
LT
1267/*
1268 * Bind a name to a socket. Nothing much to do here since it's
1269 * the protocol's responsibility to handle the local address.
1270 *
1271 * We move the socket address to kernel space before we call
1272 * the protocol layer (having also checked the address is ok).
1273 */
1274
1275asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1276{
1277 struct socket *sock;
1278 char address[MAX_SOCK_ADDR];
6cb153ca 1279 int err, fput_needed;
1da177e4 1280
89bddce5
SH
1281 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1282 if(sock) {
1283 err = move_addr_to_kernel(umyaddr, addrlen, address);
1284 if (err >= 0) {
1285 err = security_socket_bind(sock,
1286 (struct sockaddr *)address,
1287 addrlen);
6cb153ca
BL
1288 if (!err)
1289 err = sock->ops->bind(sock,
89bddce5
SH
1290 (struct sockaddr *)
1291 address, addrlen);
1da177e4 1292 }
6cb153ca 1293 fput_light(sock->file, fput_needed);
89bddce5 1294 }
1da177e4
LT
1295 return err;
1296}
1297
1da177e4
LT
1298/*
1299 * Perform a listen. Basically, we allow the protocol to do anything
1300 * necessary for a listen, and if that works, we mark the socket as
1301 * ready for listening.
1302 */
1303
7a42c217 1304int sysctl_somaxconn __read_mostly = SOMAXCONN;
1da177e4
LT
1305
1306asmlinkage long sys_listen(int fd, int backlog)
1307{
1308 struct socket *sock;
6cb153ca 1309 int err, fput_needed;
89bddce5
SH
1310
1311 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1312 if (sock) {
1313 if ((unsigned)backlog > sysctl_somaxconn)
1da177e4
LT
1314 backlog = sysctl_somaxconn;
1315
1316 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1317 if (!err)
1318 err = sock->ops->listen(sock, backlog);
1da177e4 1319
6cb153ca 1320 fput_light(sock->file, fput_needed);
1da177e4
LT
1321 }
1322 return err;
1323}
1324
1da177e4
LT
1325/*
1326 * For accept, we attempt to create a new socket, set up the link
1327 * with the client, wake up the client, then return the new
1328 * connected fd. We collect the address of the connector in kernel
1329 * space and move it to user at the very end. This is unclean because
1330 * we open the socket then return an error.
1331 *
1332 * 1003.1g adds the ability to recvmsg() to query connection pending
1333 * status to recvmsg. We need to add that support in a way thats
1334 * clean when we restucture accept also.
1335 */
1336
89bddce5
SH
1337asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1338 int __user *upeer_addrlen)
1da177e4
LT
1339{
1340 struct socket *sock, *newsock;
39d8c1b6 1341 struct file *newfile;
6cb153ca 1342 int err, len, newfd, fput_needed;
1da177e4
LT
1343 char address[MAX_SOCK_ADDR];
1344
6cb153ca 1345 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1346 if (!sock)
1347 goto out;
1348
1349 err = -ENFILE;
89bddce5 1350 if (!(newsock = sock_alloc()))
1da177e4
LT
1351 goto out_put;
1352
1353 newsock->type = sock->type;
1354 newsock->ops = sock->ops;
1355
1da177e4
LT
1356 /*
1357 * We don't need try_module_get here, as the listening socket (sock)
1358 * has the protocol module (sock->ops->owner) held.
1359 */
1360 __module_get(newsock->ops->owner);
1361
39d8c1b6
DM
1362 newfd = sock_alloc_fd(&newfile);
1363 if (unlikely(newfd < 0)) {
1364 err = newfd;
9a1875e6
DM
1365 sock_release(newsock);
1366 goto out_put;
39d8c1b6
DM
1367 }
1368
1369 err = sock_attach_fd(newsock, newfile);
1370 if (err < 0)
1371 goto out_fd;
1372
a79af59e
FF
1373 err = security_socket_accept(sock, newsock);
1374 if (err)
39d8c1b6 1375 goto out_fd;
a79af59e 1376
1da177e4
LT
1377 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1378 if (err < 0)
39d8c1b6 1379 goto out_fd;
1da177e4
LT
1380
1381 if (upeer_sockaddr) {
89bddce5
SH
1382 if (newsock->ops->getname(newsock, (struct sockaddr *)address,
1383 &len, 2) < 0) {
1da177e4 1384 err = -ECONNABORTED;
39d8c1b6 1385 goto out_fd;
1da177e4 1386 }
89bddce5
SH
1387 err = move_addr_to_user(address, len, upeer_sockaddr,
1388 upeer_addrlen);
1da177e4 1389 if (err < 0)
39d8c1b6 1390 goto out_fd;
1da177e4
LT
1391 }
1392
1393 /* File flags are not inherited via accept() unlike another OSes. */
1394
39d8c1b6
DM
1395 fd_install(newfd, newfile);
1396 err = newfd;
1da177e4
LT
1397
1398 security_socket_post_accept(sock, newsock);
1399
1400out_put:
6cb153ca 1401 fput_light(sock->file, fput_needed);
1da177e4
LT
1402out:
1403 return err;
39d8c1b6 1404out_fd:
9606a216 1405 fput(newfile);
39d8c1b6 1406 put_unused_fd(newfd);
1da177e4
LT
1407 goto out_put;
1408}
1409
1da177e4
LT
1410/*
1411 * Attempt to connect to a socket with the server address. The address
1412 * is in user space so we verify it is OK and move it to kernel space.
1413 *
1414 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1415 * break bindings
1416 *
1417 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1418 * other SEQPACKET protocols that take time to connect() as it doesn't
1419 * include the -EINPROGRESS status for such sockets.
1420 */
1421
89bddce5
SH
1422asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
1423 int addrlen)
1da177e4
LT
1424{
1425 struct socket *sock;
1426 char address[MAX_SOCK_ADDR];
6cb153ca 1427 int err, fput_needed;
1da177e4 1428
6cb153ca 1429 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1430 if (!sock)
1431 goto out;
1432 err = move_addr_to_kernel(uservaddr, addrlen, address);
1433 if (err < 0)
1434 goto out_put;
1435
89bddce5
SH
1436 err =
1437 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4
LT
1438 if (err)
1439 goto out_put;
1440
89bddce5 1441 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
1da177e4
LT
1442 sock->file->f_flags);
1443out_put:
6cb153ca 1444 fput_light(sock->file, fput_needed);
1da177e4
LT
1445out:
1446 return err;
1447}
1448
1449/*
1450 * Get the local address ('name') of a socket object. Move the obtained
1451 * name to user space.
1452 */
1453
89bddce5
SH
1454asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1455 int __user *usockaddr_len)
1da177e4
LT
1456{
1457 struct socket *sock;
1458 char address[MAX_SOCK_ADDR];
6cb153ca 1459 int len, err, fput_needed;
89bddce5 1460
6cb153ca 1461 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1462 if (!sock)
1463 goto out;
1464
1465 err = security_socket_getsockname(sock);
1466 if (err)
1467 goto out_put;
1468
1469 err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
1470 if (err)
1471 goto out_put;
1472 err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
1473
1474out_put:
6cb153ca 1475 fput_light(sock->file, fput_needed);
1da177e4
LT
1476out:
1477 return err;
1478}
1479
1480/*
1481 * Get the remote address ('name') of a socket object. Move the obtained
1482 * name to user space.
1483 */
1484
89bddce5
SH
1485asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1486 int __user *usockaddr_len)
1da177e4
LT
1487{
1488 struct socket *sock;
1489 char address[MAX_SOCK_ADDR];
6cb153ca 1490 int len, err, fput_needed;
1da177e4 1491
89bddce5
SH
1492 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1493 if (sock != NULL) {
1da177e4
LT
1494 err = security_socket_getpeername(sock);
1495 if (err) {
6cb153ca 1496 fput_light(sock->file, fput_needed);
1da177e4
LT
1497 return err;
1498 }
1499
89bddce5
SH
1500 err =
1501 sock->ops->getname(sock, (struct sockaddr *)address, &len,
1502 1);
1da177e4 1503 if (!err)
89bddce5
SH
1504 err = move_addr_to_user(address, len, usockaddr,
1505 usockaddr_len);
6cb153ca 1506 fput_light(sock->file, fput_needed);
1da177e4
LT
1507 }
1508 return err;
1509}
1510
1511/*
1512 * Send a datagram to a given address. We move the address into kernel
1513 * space and check the user space data area is readable before invoking
1514 * the protocol.
1515 */
1516
89bddce5
SH
1517asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1518 unsigned flags, struct sockaddr __user *addr,
1519 int addr_len)
1da177e4
LT
1520{
1521 struct socket *sock;
1522 char address[MAX_SOCK_ADDR];
1523 int err;
1524 struct msghdr msg;
1525 struct iovec iov;
6cb153ca
BL
1526 int fput_needed;
1527 struct file *sock_file;
1528
1529 sock_file = fget_light(fd, &fput_needed);
1530 if (!sock_file)
1531 return -EBADF;
1532
1533 sock = sock_from_file(sock_file, &err);
1da177e4 1534 if (!sock)
6cb153ca 1535 goto out_put;
89bddce5
SH
1536 iov.iov_base = buff;
1537 iov.iov_len = len;
1538 msg.msg_name = NULL;
1539 msg.msg_iov = &iov;
1540 msg.msg_iovlen = 1;
1541 msg.msg_control = NULL;
1542 msg.msg_controllen = 0;
1543 msg.msg_namelen = 0;
6cb153ca 1544 if (addr) {
1da177e4
LT
1545 err = move_addr_to_kernel(addr, addr_len, address);
1546 if (err < 0)
1547 goto out_put;
89bddce5
SH
1548 msg.msg_name = address;
1549 msg.msg_namelen = addr_len;
1da177e4
LT
1550 }
1551 if (sock->file->f_flags & O_NONBLOCK)
1552 flags |= MSG_DONTWAIT;
1553 msg.msg_flags = flags;
1554 err = sock_sendmsg(sock, &msg, len);
1555
89bddce5 1556out_put:
6cb153ca 1557 fput_light(sock_file, fput_needed);
1da177e4
LT
1558 return err;
1559}
1560
1561/*
89bddce5 1562 * Send a datagram down a socket.
1da177e4
LT
1563 */
1564
89bddce5 1565asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
1da177e4
LT
1566{
1567 return sys_sendto(fd, buff, len, flags, NULL, 0);
1568}
1569
1570/*
89bddce5 1571 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1572 * sender. We verify the buffers are writable and if needed move the
1573 * sender address from kernel to user space.
1574 */
1575
89bddce5
SH
1576asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1577 unsigned flags, struct sockaddr __user *addr,
1578 int __user *addr_len)
1da177e4
LT
1579{
1580 struct socket *sock;
1581 struct iovec iov;
1582 struct msghdr msg;
1583 char address[MAX_SOCK_ADDR];
89bddce5 1584 int err, err2;
6cb153ca
BL
1585 struct file *sock_file;
1586 int fput_needed;
1587
1588 sock_file = fget_light(fd, &fput_needed);
1589 if (!sock_file)
1590 return -EBADF;
1da177e4 1591
6cb153ca 1592 sock = sock_from_file(sock_file, &err);
1da177e4
LT
1593 if (!sock)
1594 goto out;
1595
89bddce5
SH
1596 msg.msg_control = NULL;
1597 msg.msg_controllen = 0;
1598 msg.msg_iovlen = 1;
1599 msg.msg_iov = &iov;
1600 iov.iov_len = size;
1601 iov.iov_base = ubuf;
1602 msg.msg_name = address;
1603 msg.msg_namelen = MAX_SOCK_ADDR;
1da177e4
LT
1604 if (sock->file->f_flags & O_NONBLOCK)
1605 flags |= MSG_DONTWAIT;
89bddce5 1606 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1607
89bddce5
SH
1608 if (err >= 0 && addr != NULL) {
1609 err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1610 if (err2 < 0)
1611 err = err2;
1da177e4 1612 }
1da177e4 1613out:
6cb153ca 1614 fput_light(sock_file, fput_needed);
1da177e4
LT
1615 return err;
1616}
1617
1618/*
89bddce5 1619 * Receive a datagram from a socket.
1da177e4
LT
1620 */
1621
89bddce5
SH
1622asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1623 unsigned flags)
1da177e4
LT
1624{
1625 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1626}
1627
1628/*
1629 * Set a socket option. Because we don't know the option lengths we have
1630 * to pass the user mode parameter for the protocols to sort out.
1631 */
1632
89bddce5
SH
1633asmlinkage long sys_setsockopt(int fd, int level, int optname,
1634 char __user *optval, int optlen)
1da177e4 1635{
6cb153ca 1636 int err, fput_needed;
1da177e4
LT
1637 struct socket *sock;
1638
1639 if (optlen < 0)
1640 return -EINVAL;
89bddce5
SH
1641
1642 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1643 if (sock != NULL) {
1644 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1645 if (err)
1646 goto out_put;
1da177e4
LT
1647
1648 if (level == SOL_SOCKET)
89bddce5
SH
1649 err =
1650 sock_setsockopt(sock, level, optname, optval,
1651 optlen);
1da177e4 1652 else
89bddce5
SH
1653 err =
1654 sock->ops->setsockopt(sock, level, optname, optval,
1655 optlen);
6cb153ca
BL
1656out_put:
1657 fput_light(sock->file, fput_needed);
1da177e4
LT
1658 }
1659 return err;
1660}
1661
1662/*
1663 * Get a socket option. Because we don't know the option lengths we have
1664 * to pass a user mode parameter for the protocols to sort out.
1665 */
1666
89bddce5
SH
1667asmlinkage long sys_getsockopt(int fd, int level, int optname,
1668 char __user *optval, int __user *optlen)
1da177e4 1669{
6cb153ca 1670 int err, fput_needed;
1da177e4
LT
1671 struct socket *sock;
1672
89bddce5
SH
1673 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1674 if (sock != NULL) {
6cb153ca
BL
1675 err = security_socket_getsockopt(sock, level, optname);
1676 if (err)
1677 goto out_put;
1da177e4
LT
1678
1679 if (level == SOL_SOCKET)
89bddce5
SH
1680 err =
1681 sock_getsockopt(sock, level, optname, optval,
1682 optlen);
1da177e4 1683 else
89bddce5
SH
1684 err =
1685 sock->ops->getsockopt(sock, level, optname, optval,
1686 optlen);
6cb153ca
BL
1687out_put:
1688 fput_light(sock->file, fput_needed);
1da177e4
LT
1689 }
1690 return err;
1691}
1692
1da177e4
LT
1693/*
1694 * Shutdown a socket.
1695 */
1696
1697asmlinkage long sys_shutdown(int fd, int how)
1698{
6cb153ca 1699 int err, fput_needed;
1da177e4
LT
1700 struct socket *sock;
1701
89bddce5
SH
1702 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1703 if (sock != NULL) {
1da177e4 1704 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1705 if (!err)
1706 err = sock->ops->shutdown(sock, how);
1707 fput_light(sock->file, fput_needed);
1da177e4
LT
1708 }
1709 return err;
1710}
1711
89bddce5 1712/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1713 * fields which are the same type (int / unsigned) on our platforms.
1714 */
1715#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1716#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1717#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1718
1da177e4
LT
1719/*
1720 * BSD sendmsg interface
1721 */
1722
1723asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1724{
89bddce5
SH
1725 struct compat_msghdr __user *msg_compat =
1726 (struct compat_msghdr __user *)msg;
1da177e4
LT
1727 struct socket *sock;
1728 char address[MAX_SOCK_ADDR];
1729 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1730 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1731 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1732 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1733 unsigned char *ctl_buf = ctl;
1734 struct msghdr msg_sys;
1735 int err, ctl_len, iov_size, total_len;
6cb153ca 1736 int fput_needed;
89bddce5 1737
1da177e4
LT
1738 err = -EFAULT;
1739 if (MSG_CMSG_COMPAT & flags) {
1740 if (get_compat_msghdr(&msg_sys, msg_compat))
1741 return -EFAULT;
89bddce5
SH
1742 }
1743 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1744 return -EFAULT;
1745
6cb153ca 1746 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1747 if (!sock)
1da177e4
LT
1748 goto out;
1749
1750 /* do not move before msg_sys is valid */
1751 err = -EMSGSIZE;
1752 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1753 goto out_put;
1754
89bddce5 1755 /* Check whether to allocate the iovec area */
1da177e4
LT
1756 err = -ENOMEM;
1757 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1758 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1759 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1760 if (!iov)
1761 goto out_put;
1762 }
1763
1764 /* This will also move the address data into kernel space */
1765 if (MSG_CMSG_COMPAT & flags) {
1766 err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
1767 } else
1768 err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
89bddce5 1769 if (err < 0)
1da177e4
LT
1770 goto out_freeiov;
1771 total_len = err;
1772
1773 err = -ENOBUFS;
1774
1775 if (msg_sys.msg_controllen > INT_MAX)
1776 goto out_freeiov;
89bddce5 1777 ctl_len = msg_sys.msg_controllen;
1da177e4 1778 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1779 err =
1780 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1781 sizeof(ctl));
1da177e4
LT
1782 if (err)
1783 goto out_freeiov;
1784 ctl_buf = msg_sys.msg_control;
8920e8f9 1785 ctl_len = msg_sys.msg_controllen;
1da177e4 1786 } else if (ctl_len) {
89bddce5 1787 if (ctl_len > sizeof(ctl)) {
1da177e4 1788 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1789 if (ctl_buf == NULL)
1da177e4
LT
1790 goto out_freeiov;
1791 }
1792 err = -EFAULT;
1793 /*
1794 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1795 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1796 * checking falls down on this.
1797 */
89bddce5
SH
1798 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1799 ctl_len))
1da177e4
LT
1800 goto out_freectl;
1801 msg_sys.msg_control = ctl_buf;
1802 }
1803 msg_sys.msg_flags = flags;
1804
1805 if (sock->file->f_flags & O_NONBLOCK)
1806 msg_sys.msg_flags |= MSG_DONTWAIT;
1807 err = sock_sendmsg(sock, &msg_sys, total_len);
1808
1809out_freectl:
89bddce5 1810 if (ctl_buf != ctl)
1da177e4
LT
1811 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1812out_freeiov:
1813 if (iov != iovstack)
1814 sock_kfree_s(sock->sk, iov, iov_size);
1815out_put:
6cb153ca 1816 fput_light(sock->file, fput_needed);
89bddce5 1817out:
1da177e4
LT
1818 return err;
1819}
1820
1821/*
1822 * BSD recvmsg interface
1823 */
1824
89bddce5
SH
1825asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
1826 unsigned int flags)
1da177e4 1827{
89bddce5
SH
1828 struct compat_msghdr __user *msg_compat =
1829 (struct compat_msghdr __user *)msg;
1da177e4
LT
1830 struct socket *sock;
1831 struct iovec iovstack[UIO_FASTIOV];
89bddce5 1832 struct iovec *iov = iovstack;
1da177e4
LT
1833 struct msghdr msg_sys;
1834 unsigned long cmsg_ptr;
1835 int err, iov_size, total_len, len;
6cb153ca 1836 int fput_needed;
1da177e4
LT
1837
1838 /* kernel mode address */
1839 char addr[MAX_SOCK_ADDR];
1840
1841 /* user mode address pointers */
1842 struct sockaddr __user *uaddr;
1843 int __user *uaddr_len;
89bddce5 1844
1da177e4
LT
1845 if (MSG_CMSG_COMPAT & flags) {
1846 if (get_compat_msghdr(&msg_sys, msg_compat))
1847 return -EFAULT;
89bddce5
SH
1848 }
1849 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1850 return -EFAULT;
1da177e4 1851
6cb153ca 1852 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1853 if (!sock)
1854 goto out;
1855
1856 err = -EMSGSIZE;
1857 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1858 goto out_put;
89bddce5
SH
1859
1860 /* Check whether to allocate the iovec area */
1da177e4
LT
1861 err = -ENOMEM;
1862 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1863 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1864 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1865 if (!iov)
1866 goto out_put;
1867 }
1868
1869 /*
89bddce5
SH
1870 * Save the user-mode address (verify_iovec will change the
1871 * kernel msghdr to use the kernel address space)
1da177e4 1872 */
89bddce5
SH
1873
1874 uaddr = (void __user *)msg_sys.msg_name;
1da177e4
LT
1875 uaddr_len = COMPAT_NAMELEN(msg);
1876 if (MSG_CMSG_COMPAT & flags) {
1877 err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1878 } else
1879 err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1880 if (err < 0)
1881 goto out_freeiov;
89bddce5 1882 total_len = err;
1da177e4
LT
1883
1884 cmsg_ptr = (unsigned long)msg_sys.msg_control;
1885 msg_sys.msg_flags = 0;
1886 if (MSG_CMSG_COMPAT & flags)
1887 msg_sys.msg_flags = MSG_CMSG_COMPAT;
89bddce5 1888
1da177e4
LT
1889 if (sock->file->f_flags & O_NONBLOCK)
1890 flags |= MSG_DONTWAIT;
1891 err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1892 if (err < 0)
1893 goto out_freeiov;
1894 len = err;
1895
1896 if (uaddr != NULL) {
89bddce5
SH
1897 err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
1898 uaddr_len);
1da177e4
LT
1899 if (err < 0)
1900 goto out_freeiov;
1901 }
37f7f421
DM
1902 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
1903 COMPAT_FLAGS(msg));
1da177e4
LT
1904 if (err)
1905 goto out_freeiov;
1906 if (MSG_CMSG_COMPAT & flags)
89bddce5 1907 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1908 &msg_compat->msg_controllen);
1909 else
89bddce5 1910 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1911 &msg->msg_controllen);
1912 if (err)
1913 goto out_freeiov;
1914 err = len;
1915
1916out_freeiov:
1917 if (iov != iovstack)
1918 sock_kfree_s(sock->sk, iov, iov_size);
1919out_put:
6cb153ca 1920 fput_light(sock->file, fput_needed);
1da177e4
LT
1921out:
1922 return err;
1923}
1924
1925#ifdef __ARCH_WANT_SYS_SOCKETCALL
1926
1927/* Argument list sizes for sys_socketcall */
1928#define AL(x) ((x) * sizeof(unsigned long))
89bddce5
SH
1929static const unsigned char nargs[18]={
1930 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1931 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1932 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
1933};
1934
1da177e4
LT
1935#undef AL
1936
1937/*
89bddce5 1938 * System call vectors.
1da177e4
LT
1939 *
1940 * Argument checking cleaned up. Saved 20% in size.
1941 * This function doesn't need to set the kernel lock because
89bddce5 1942 * it is set by the callees.
1da177e4
LT
1943 */
1944
1945asmlinkage long sys_socketcall(int call, unsigned long __user *args)
1946{
1947 unsigned long a[6];
89bddce5 1948 unsigned long a0, a1;
1da177e4
LT
1949 int err;
1950
89bddce5 1951 if (call < 1 || call > SYS_RECVMSG)
1da177e4
LT
1952 return -EINVAL;
1953
1954 /* copy_from_user should be SMP safe. */
1955 if (copy_from_user(a, args, nargs[call]))
1956 return -EFAULT;
3ec3b2fb 1957
89bddce5 1958 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb
DW
1959 if (err)
1960 return err;
1961
89bddce5
SH
1962 a0 = a[0];
1963 a1 = a[1];
1964
1965 switch (call) {
1966 case SYS_SOCKET:
1967 err = sys_socket(a0, a1, a[2]);
1968 break;
1969 case SYS_BIND:
1970 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
1971 break;
1972 case SYS_CONNECT:
1973 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
1974 break;
1975 case SYS_LISTEN:
1976 err = sys_listen(a0, a1);
1977 break;
1978 case SYS_ACCEPT:
1979 err =
1980 sys_accept(a0, (struct sockaddr __user *)a1,
1981 (int __user *)a[2]);
1982 break;
1983 case SYS_GETSOCKNAME:
1984 err =
1985 sys_getsockname(a0, (struct sockaddr __user *)a1,
1986 (int __user *)a[2]);
1987 break;
1988 case SYS_GETPEERNAME:
1989 err =
1990 sys_getpeername(a0, (struct sockaddr __user *)a1,
1991 (int __user *)a[2]);
1992 break;
1993 case SYS_SOCKETPAIR:
1994 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
1995 break;
1996 case SYS_SEND:
1997 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
1998 break;
1999 case SYS_SENDTO:
2000 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2001 (struct sockaddr __user *)a[4], a[5]);
2002 break;
2003 case SYS_RECV:
2004 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2005 break;
2006 case SYS_RECVFROM:
2007 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2008 (struct sockaddr __user *)a[4],
2009 (int __user *)a[5]);
2010 break;
2011 case SYS_SHUTDOWN:
2012 err = sys_shutdown(a0, a1);
2013 break;
2014 case SYS_SETSOCKOPT:
2015 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2016 break;
2017 case SYS_GETSOCKOPT:
2018 err =
2019 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2020 (int __user *)a[4]);
2021 break;
2022 case SYS_SENDMSG:
2023 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2024 break;
2025 case SYS_RECVMSG:
2026 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2027 break;
2028 default:
2029 err = -EINVAL;
2030 break;
1da177e4
LT
2031 }
2032 return err;
2033}
2034
89bddce5 2035#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2036
55737fda
SH
2037/**
2038 * sock_register - add a socket protocol handler
2039 * @ops: description of protocol
2040 *
1da177e4
LT
2041 * This function is called by a protocol handler that wants to
2042 * advertise its address family, and have it linked into the
55737fda
SH
2043 * socket interface. The value ops->family coresponds to the
2044 * socket system call protocol family.
1da177e4 2045 */
f0fd27d4 2046int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2047{
2048 int err;
2049
2050 if (ops->family >= NPROTO) {
89bddce5
SH
2051 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2052 NPROTO);
1da177e4
LT
2053 return -ENOBUFS;
2054 }
55737fda
SH
2055
2056 spin_lock(&net_family_lock);
2057 if (net_families[ops->family])
2058 err = -EEXIST;
2059 else {
89bddce5 2060 net_families[ops->family] = ops;
1da177e4
LT
2061 err = 0;
2062 }
55737fda
SH
2063 spin_unlock(&net_family_lock);
2064
89bddce5 2065 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2066 return err;
2067}
2068
55737fda
SH
2069/**
2070 * sock_unregister - remove a protocol handler
2071 * @family: protocol family to remove
2072 *
1da177e4
LT
2073 * This function is called by a protocol handler that wants to
2074 * remove its address family, and have it unlinked from the
55737fda
SH
2075 * new socket creation.
2076 *
2077 * If protocol handler is a module, then it can use module reference
2078 * counts to protect against new references. If protocol handler is not
2079 * a module then it needs to provide its own protection in
2080 * the ops->create routine.
1da177e4 2081 */
f0fd27d4 2082void sock_unregister(int family)
1da177e4 2083{
f0fd27d4 2084 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2085
55737fda 2086 spin_lock(&net_family_lock);
89bddce5 2087 net_families[family] = NULL;
55737fda
SH
2088 spin_unlock(&net_family_lock);
2089
2090 synchronize_rcu();
2091
89bddce5 2092 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2093}
2094
77d76ea3 2095static int __init sock_init(void)
1da177e4
LT
2096{
2097 /*
89bddce5 2098 * Initialize sock SLAB cache.
1da177e4 2099 */
89bddce5 2100
1da177e4
LT
2101 sk_init();
2102
1da177e4 2103 /*
89bddce5 2104 * Initialize skbuff SLAB cache
1da177e4
LT
2105 */
2106 skb_init();
1da177e4
LT
2107
2108 /*
89bddce5 2109 * Initialize the protocols module.
1da177e4
LT
2110 */
2111
2112 init_inodecache();
2113 register_filesystem(&sock_fs_type);
2114 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2115
2116 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2117 */
2118
2119#ifdef CONFIG_NETFILTER
2120 netfilter_init();
2121#endif
cbeb321a
DM
2122
2123 return 0;
1da177e4
LT
2124}
2125
77d76ea3
AK
2126core_initcall(sock_init); /* early initcall */
2127
1da177e4
LT
2128#ifdef CONFIG_PROC_FS
2129void socket_seq_show(struct seq_file *seq)
2130{
2131 int cpu;
2132 int counter = 0;
2133
6f912042 2134 for_each_possible_cpu(cpu)
89bddce5 2135 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2136
2137 /* It can be negative, by the way. 8) */
2138 if (counter < 0)
2139 counter = 0;
2140
2141 seq_printf(seq, "sockets: used %d\n", counter);
2142}
89bddce5 2143#endif /* CONFIG_PROC_FS */
1da177e4 2144
89bbfc95
SP
2145#ifdef CONFIG_COMPAT
2146static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 2147 unsigned long arg)
89bbfc95
SP
2148{
2149 struct socket *sock = file->private_data;
2150 int ret = -ENOIOCTLCMD;
2151
2152 if (sock->ops->compat_ioctl)
2153 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2154
2155 return ret;
2156}
2157#endif
2158
ac5a488e
SS
2159int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2160{
2161 return sock->ops->bind(sock, addr, addrlen);
2162}
2163
2164int kernel_listen(struct socket *sock, int backlog)
2165{
2166 return sock->ops->listen(sock, backlog);
2167}
2168
2169int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2170{
2171 struct sock *sk = sock->sk;
2172 int err;
2173
2174 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2175 newsock);
2176 if (err < 0)
2177 goto done;
2178
2179 err = sock->ops->accept(sock, *newsock, flags);
2180 if (err < 0) {
2181 sock_release(*newsock);
2182 goto done;
2183 }
2184
2185 (*newsock)->ops = sock->ops;
2186
2187done:
2188 return err;
2189}
2190
2191int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
2192 int flags)
2193{
2194 return sock->ops->connect(sock, addr, addrlen, flags);
2195}
2196
2197int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2198 int *addrlen)
2199{
2200 return sock->ops->getname(sock, addr, addrlen, 0);
2201}
2202
2203int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2204 int *addrlen)
2205{
2206 return sock->ops->getname(sock, addr, addrlen, 1);
2207}
2208
2209int kernel_getsockopt(struct socket *sock, int level, int optname,
2210 char *optval, int *optlen)
2211{
2212 mm_segment_t oldfs = get_fs();
2213 int err;
2214
2215 set_fs(KERNEL_DS);
2216 if (level == SOL_SOCKET)
2217 err = sock_getsockopt(sock, level, optname, optval, optlen);
2218 else
2219 err = sock->ops->getsockopt(sock, level, optname, optval,
2220 optlen);
2221 set_fs(oldfs);
2222 return err;
2223}
2224
2225int kernel_setsockopt(struct socket *sock, int level, int optname,
2226 char *optval, int optlen)
2227{
2228 mm_segment_t oldfs = get_fs();
2229 int err;
2230
2231 set_fs(KERNEL_DS);
2232 if (level == SOL_SOCKET)
2233 err = sock_setsockopt(sock, level, optname, optval, optlen);
2234 else
2235 err = sock->ops->setsockopt(sock, level, optname, optval,
2236 optlen);
2237 set_fs(oldfs);
2238 return err;
2239}
2240
2241int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2242 size_t size, int flags)
2243{
2244 if (sock->ops->sendpage)
2245 return sock->ops->sendpage(sock, page, offset, size, flags);
2246
2247 return sock_no_sendpage(sock, page, offset, size, flags);
2248}
2249
2250int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2251{
2252 mm_segment_t oldfs = get_fs();
2253 int err;
2254
2255 set_fs(KERNEL_DS);
2256 err = sock->ops->ioctl(sock, cmd, arg);
2257 set_fs(oldfs);
2258
2259 return err;
2260}
2261
1da177e4
LT
2262/* ABI emulation layers need these two */
2263EXPORT_SYMBOL(move_addr_to_kernel);
2264EXPORT_SYMBOL(move_addr_to_user);
2265EXPORT_SYMBOL(sock_create);
2266EXPORT_SYMBOL(sock_create_kern);
2267EXPORT_SYMBOL(sock_create_lite);
2268EXPORT_SYMBOL(sock_map_fd);
2269EXPORT_SYMBOL(sock_recvmsg);
2270EXPORT_SYMBOL(sock_register);
2271EXPORT_SYMBOL(sock_release);
2272EXPORT_SYMBOL(sock_sendmsg);
2273EXPORT_SYMBOL(sock_unregister);
2274EXPORT_SYMBOL(sock_wake_async);
2275EXPORT_SYMBOL(sockfd_lookup);
2276EXPORT_SYMBOL(kernel_sendmsg);
2277EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
2278EXPORT_SYMBOL(kernel_bind);
2279EXPORT_SYMBOL(kernel_listen);
2280EXPORT_SYMBOL(kernel_accept);
2281EXPORT_SYMBOL(kernel_connect);
2282EXPORT_SYMBOL(kernel_getsockname);
2283EXPORT_SYMBOL(kernel_getpeername);
2284EXPORT_SYMBOL(kernel_getsockopt);
2285EXPORT_SYMBOL(kernel_setsockopt);
2286EXPORT_SYMBOL(kernel_sendpage);
2287EXPORT_SYMBOL(kernel_sock_ioctl);