[NET]: cleanup sock_from_file()
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
55737fda 66#include <linux/rcupdate.h>
1da177e4
LT
67#include <linux/netdevice.h>
68#include <linux/proc_fs.h>
69#include <linux/seq_file.h>
4a3e2f71 70#include <linux/mutex.h>
1da177e4
LT
71#include <linux/wanrouter.h>
72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
1da177e4
LT
75#include <linux/init.h>
76#include <linux/poll.h>
77#include <linux/cache.h>
78#include <linux/module.h>
79#include <linux/highmem.h>
1da177e4
LT
80#include <linux/mount.h>
81#include <linux/security.h>
82#include <linux/syscalls.h>
83#include <linux/compat.h>
84#include <linux/kmod.h>
3ec3b2fb 85#include <linux/audit.h>
d86b5e0e 86#include <linux/wireless.h>
1da177e4
LT
87
88#include <asm/uaccess.h>
89#include <asm/unistd.h>
90
91#include <net/compat.h>
92
93#include <net/sock.h>
94#include <linux/netfilter.h>
95
96static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
97static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
98 unsigned long nr_segs, loff_t pos);
99static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
100 unsigned long nr_segs, loff_t pos);
89bddce5 101static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
102
103static int sock_close(struct inode *inode, struct file *file);
104static unsigned int sock_poll(struct file *file,
105 struct poll_table_struct *wait);
89bddce5 106static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
107#ifdef CONFIG_COMPAT
108static long compat_sock_ioctl(struct file *file,
89bddce5 109 unsigned int cmd, unsigned long arg);
89bbfc95 110#endif
1da177e4 111static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
112static ssize_t sock_sendpage(struct file *file, struct page *page,
113 int offset, size_t size, loff_t *ppos, int more);
114
1da177e4
LT
115/*
116 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
117 * in the operation structures but are done directly via the socketcall() multiplexor.
118 */
119
120static struct file_operations socket_file_ops = {
121 .owner = THIS_MODULE,
122 .llseek = no_llseek,
123 .aio_read = sock_aio_read,
124 .aio_write = sock_aio_write,
125 .poll = sock_poll,
126 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
127#ifdef CONFIG_COMPAT
128 .compat_ioctl = compat_sock_ioctl,
129#endif
1da177e4
LT
130 .mmap = sock_mmap,
131 .open = sock_no_open, /* special open code to disallow open via /proc */
132 .release = sock_close,
133 .fasync = sock_fasync,
5274f052
JA
134 .sendpage = sock_sendpage,
135 .splice_write = generic_splice_sendpage,
1da177e4
LT
136};
137
138/*
139 * The protocol list. Each protocol is registered in here.
140 */
141
1da177e4 142static DEFINE_SPINLOCK(net_family_lock);
f0fd27d4 143static const struct net_proto_family *net_families[NPROTO] __read_mostly;
1da177e4 144
1da177e4
LT
145/*
146 * Statistics counters of the socket lists
147 */
148
149static DEFINE_PER_CPU(int, sockets_in_use) = 0;
150
151/*
89bddce5
SH
152 * Support routines.
153 * Move socket addresses back and forth across the kernel/user
154 * divide and look after the messy bits.
1da177e4
LT
155 */
156
89bddce5 157#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
158 16 for IP, 16 for IPX,
159 24 for IPv6,
89bddce5 160 about 80 for AX.25
1da177e4
LT
161 must be at least one bigger than
162 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 163 :unix_mkname()).
1da177e4 164 */
89bddce5 165
1da177e4
LT
166/**
167 * move_addr_to_kernel - copy a socket address into kernel space
168 * @uaddr: Address in user space
169 * @kaddr: Address in kernel space
170 * @ulen: Length in user space
171 *
172 * The address is copied into kernel space. If the provided address is
173 * too long an error code of -EINVAL is returned. If the copy gives
174 * invalid addresses -EFAULT is returned. On a success 0 is returned.
175 */
176
177int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
178{
89bddce5 179 if (ulen < 0 || ulen > MAX_SOCK_ADDR)
1da177e4 180 return -EINVAL;
89bddce5 181 if (ulen == 0)
1da177e4 182 return 0;
89bddce5 183 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 184 return -EFAULT;
3ec3b2fb 185 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
186}
187
188/**
189 * move_addr_to_user - copy an address to user space
190 * @kaddr: kernel space address
191 * @klen: length of address in kernel
192 * @uaddr: user space address
193 * @ulen: pointer to user length field
194 *
195 * The value pointed to by ulen on entry is the buffer length available.
196 * This is overwritten with the buffer space used. -EINVAL is returned
197 * if an overlong buffer is specified or a negative buffer size. -EFAULT
198 * is returned if either the buffer or the length field are not
199 * accessible.
200 * After copying the data up to the limit the user specifies, the true
201 * length of the data is written over the length limit the user
202 * specified. Zero is returned for a success.
203 */
89bddce5
SH
204
205int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
206 int __user *ulen)
1da177e4
LT
207{
208 int err;
209 int len;
210
89bddce5
SH
211 err = get_user(len, ulen);
212 if (err)
1da177e4 213 return err;
89bddce5
SH
214 if (len > klen)
215 len = klen;
216 if (len < 0 || len > MAX_SOCK_ADDR)
1da177e4 217 return -EINVAL;
89bddce5 218 if (len) {
d6fe3945
SG
219 if (audit_sockaddr(klen, kaddr))
220 return -ENOMEM;
89bddce5 221 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
222 return -EFAULT;
223 }
224 /*
89bddce5
SH
225 * "fromlen shall refer to the value before truncation.."
226 * 1003.1g
1da177e4
LT
227 */
228 return __put_user(klen, ulen);
229}
230
231#define SOCKFS_MAGIC 0x534F434B
232
e18b890b 233static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
234
235static struct inode *sock_alloc_inode(struct super_block *sb)
236{
237 struct socket_alloc *ei;
89bddce5 238
e94b1766 239 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
240 if (!ei)
241 return NULL;
242 init_waitqueue_head(&ei->socket.wait);
89bddce5 243
1da177e4
LT
244 ei->socket.fasync_list = NULL;
245 ei->socket.state = SS_UNCONNECTED;
246 ei->socket.flags = 0;
247 ei->socket.ops = NULL;
248 ei->socket.sk = NULL;
249 ei->socket.file = NULL;
1da177e4
LT
250
251 return &ei->vfs_inode;
252}
253
254static void sock_destroy_inode(struct inode *inode)
255{
256 kmem_cache_free(sock_inode_cachep,
257 container_of(inode, struct socket_alloc, vfs_inode));
258}
259
e18b890b 260static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
1da177e4 261{
89bddce5 262 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 263
89bddce5
SH
264 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR))
265 == SLAB_CTOR_CONSTRUCTOR)
1da177e4
LT
266 inode_init_once(&ei->vfs_inode);
267}
89bddce5 268
1da177e4
LT
269static int init_inodecache(void)
270{
271 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
272 sizeof(struct socket_alloc),
273 0,
274 (SLAB_HWCACHE_ALIGN |
275 SLAB_RECLAIM_ACCOUNT |
276 SLAB_MEM_SPREAD),
277 init_once,
278 NULL);
1da177e4
LT
279 if (sock_inode_cachep == NULL)
280 return -ENOMEM;
281 return 0;
282}
283
284static struct super_operations sockfs_ops = {
285 .alloc_inode = sock_alloc_inode,
286 .destroy_inode =sock_destroy_inode,
287 .statfs = simple_statfs,
288};
289
454e2398 290static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
291 int flags, const char *dev_name, void *data,
292 struct vfsmount *mnt)
1da177e4 293{
454e2398
DH
294 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
295 mnt);
1da177e4
LT
296}
297
ba89966c 298static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
299
300static struct file_system_type sock_fs_type = {
301 .name = "sockfs",
302 .get_sb = sockfs_get_sb,
303 .kill_sb = kill_anon_super,
304};
89bddce5 305
1da177e4
LT
306static int sockfs_delete_dentry(struct dentry *dentry)
307{
304e61e6
ED
308 /*
309 * At creation time, we pretended this dentry was hashed
310 * (by clearing DCACHE_UNHASHED bit in d_flags)
311 * At delete time, we restore the truth : not hashed.
312 * (so that dput() can proceed correctly)
313 */
314 dentry->d_flags |= DCACHE_UNHASHED;
315 return 0;
1da177e4
LT
316}
317static struct dentry_operations sockfs_dentry_operations = {
89bddce5 318 .d_delete = sockfs_delete_dentry,
1da177e4
LT
319};
320
321/*
322 * Obtains the first available file descriptor and sets it up for use.
323 *
39d8c1b6
DM
324 * These functions create file structures and maps them to fd space
325 * of the current process. On success it returns file descriptor
1da177e4
LT
326 * and file struct implicitly stored in sock->file.
327 * Note that another thread may close file descriptor before we return
328 * from this function. We use the fact that now we do not refer
329 * to socket after mapping. If one day we will need it, this
330 * function will increment ref. count on file by 1.
331 *
332 * In any case returned fd MAY BE not valid!
333 * This race condition is unavoidable
334 * with shared fd spaces, we cannot solve it inside kernel,
335 * but we take care of internal coherence yet.
336 */
337
39d8c1b6 338static int sock_alloc_fd(struct file **filep)
1da177e4
LT
339{
340 int fd;
1da177e4
LT
341
342 fd = get_unused_fd();
39d8c1b6 343 if (likely(fd >= 0)) {
1da177e4
LT
344 struct file *file = get_empty_filp();
345
39d8c1b6
DM
346 *filep = file;
347 if (unlikely(!file)) {
1da177e4 348 put_unused_fd(fd);
39d8c1b6 349 return -ENFILE;
1da177e4 350 }
39d8c1b6
DM
351 } else
352 *filep = NULL;
353 return fd;
354}
1da177e4 355
39d8c1b6
DM
356static int sock_attach_fd(struct socket *sock, struct file *file)
357{
358 struct qstr this;
359 char name[32];
360
361 this.len = sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino);
362 this.name = name;
304e61e6 363 this.hash = 0;
39d8c1b6 364
3126a42c
JS
365 file->f_path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);
366 if (unlikely(!file->f_path.dentry))
39d8c1b6
DM
367 return -ENOMEM;
368
3126a42c 369 file->f_path.dentry->d_op = &sockfs_dentry_operations;
304e61e6
ED
370 /*
371 * We dont want to push this dentry into global dentry hash table.
372 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
373 * This permits a working /proc/$pid/fd/XXX on sockets
374 */
3126a42c
JS
375 file->f_path.dentry->d_flags &= ~DCACHE_UNHASHED;
376 d_instantiate(file->f_path.dentry, SOCK_INODE(sock));
377 file->f_path.mnt = mntget(sock_mnt);
378 file->f_mapping = file->f_path.dentry->d_inode->i_mapping;
39d8c1b6
DM
379
380 sock->file = file;
381 file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
382 file->f_mode = FMODE_READ | FMODE_WRITE;
383 file->f_flags = O_RDWR;
384 file->f_pos = 0;
385 file->private_data = sock;
1da177e4 386
39d8c1b6
DM
387 return 0;
388}
389
390int sock_map_fd(struct socket *sock)
391{
392 struct file *newfile;
393 int fd = sock_alloc_fd(&newfile);
394
395 if (likely(fd >= 0)) {
396 int err = sock_attach_fd(sock, newfile);
397
398 if (unlikely(err < 0)) {
399 put_filp(newfile);
1da177e4 400 put_unused_fd(fd);
39d8c1b6 401 return err;
1da177e4 402 }
39d8c1b6 403 fd_install(fd, newfile);
1da177e4 404 }
1da177e4
LT
405 return fd;
406}
407
6cb153ca
BL
408static struct socket *sock_from_file(struct file *file, int *err)
409{
6cb153ca
BL
410 if (file->f_op == &socket_file_ops)
411 return file->private_data; /* set in sock_map_fd */
412
23bb80d2
ED
413 *err = -ENOTSOCK;
414 return NULL;
6cb153ca
BL
415}
416
1da177e4
LT
417/**
418 * sockfd_lookup - Go from a file number to its socket slot
419 * @fd: file handle
420 * @err: pointer to an error code return
421 *
422 * The file handle passed in is locked and the socket it is bound
423 * too is returned. If an error occurs the err pointer is overwritten
424 * with a negative errno code and NULL is returned. The function checks
425 * for both invalid handles and passing a handle which is not a socket.
426 *
427 * On a success the socket object pointer is returned.
428 */
429
430struct socket *sockfd_lookup(int fd, int *err)
431{
432 struct file *file;
1da177e4
LT
433 struct socket *sock;
434
89bddce5
SH
435 file = fget(fd);
436 if (!file) {
1da177e4
LT
437 *err = -EBADF;
438 return NULL;
439 }
89bddce5 440
6cb153ca
BL
441 sock = sock_from_file(file, err);
442 if (!sock)
1da177e4 443 fput(file);
6cb153ca
BL
444 return sock;
445}
1da177e4 446
6cb153ca
BL
447static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
448{
449 struct file *file;
450 struct socket *sock;
451
3672558c 452 *err = -EBADF;
6cb153ca
BL
453 file = fget_light(fd, fput_needed);
454 if (file) {
455 sock = sock_from_file(file, err);
456 if (sock)
457 return sock;
458 fput_light(file, *fput_needed);
1da177e4 459 }
6cb153ca 460 return NULL;
1da177e4
LT
461}
462
463/**
464 * sock_alloc - allocate a socket
89bddce5 465 *
1da177e4
LT
466 * Allocate a new inode and socket object. The two are bound together
467 * and initialised. The socket is then returned. If we are out of inodes
468 * NULL is returned.
469 */
470
471static struct socket *sock_alloc(void)
472{
89bddce5
SH
473 struct inode *inode;
474 struct socket *sock;
1da177e4
LT
475
476 inode = new_inode(sock_mnt->mnt_sb);
477 if (!inode)
478 return NULL;
479
480 sock = SOCKET_I(inode);
481
89bddce5 482 inode->i_mode = S_IFSOCK | S_IRWXUGO;
1da177e4
LT
483 inode->i_uid = current->fsuid;
484 inode->i_gid = current->fsgid;
485
486 get_cpu_var(sockets_in_use)++;
487 put_cpu_var(sockets_in_use);
488 return sock;
489}
490
491/*
492 * In theory you can't get an open on this inode, but /proc provides
493 * a back door. Remember to keep it shut otherwise you'll let the
494 * creepy crawlies in.
495 */
89bddce5 496
1da177e4
LT
497static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
498{
499 return -ENXIO;
500}
501
4b6f5d20 502const struct file_operations bad_sock_fops = {
1da177e4
LT
503 .owner = THIS_MODULE,
504 .open = sock_no_open,
505};
506
507/**
508 * sock_release - close a socket
509 * @sock: socket to close
510 *
511 * The socket is released from the protocol stack if it has a release
512 * callback, and the inode is then released if the socket is bound to
89bddce5 513 * an inode not a file.
1da177e4 514 */
89bddce5 515
1da177e4
LT
516void sock_release(struct socket *sock)
517{
518 if (sock->ops) {
519 struct module *owner = sock->ops->owner;
520
521 sock->ops->release(sock);
522 sock->ops = NULL;
523 module_put(owner);
524 }
525
526 if (sock->fasync_list)
527 printk(KERN_ERR "sock_release: fasync list not empty!\n");
528
529 get_cpu_var(sockets_in_use)--;
530 put_cpu_var(sockets_in_use);
531 if (!sock->file) {
532 iput(SOCK_INODE(sock));
533 return;
534 }
89bddce5 535 sock->file = NULL;
1da177e4
LT
536}
537
89bddce5 538static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
539 struct msghdr *msg, size_t size)
540{
541 struct sock_iocb *si = kiocb_to_siocb(iocb);
542 int err;
543
544 si->sock = sock;
545 si->scm = NULL;
546 si->msg = msg;
547 si->size = size;
548
549 err = security_socket_sendmsg(sock, msg, size);
550 if (err)
551 return err;
552
553 return sock->ops->sendmsg(iocb, sock, msg, size);
554}
555
556int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
557{
558 struct kiocb iocb;
559 struct sock_iocb siocb;
560 int ret;
561
562 init_sync_kiocb(&iocb, NULL);
563 iocb.private = &siocb;
564 ret = __sock_sendmsg(&iocb, sock, msg, size);
565 if (-EIOCBQUEUED == ret)
566 ret = wait_on_sync_kiocb(&iocb);
567 return ret;
568}
569
570int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
571 struct kvec *vec, size_t num, size_t size)
572{
573 mm_segment_t oldfs = get_fs();
574 int result;
575
576 set_fs(KERNEL_DS);
577 /*
578 * the following is safe, since for compiler definitions of kvec and
579 * iovec are identical, yielding the same in-core layout and alignment
580 */
89bddce5 581 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
582 msg->msg_iovlen = num;
583 result = sock_sendmsg(sock, msg, size);
584 set_fs(oldfs);
585 return result;
586}
587
89bddce5 588static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
589 struct msghdr *msg, size_t size, int flags)
590{
591 int err;
592 struct sock_iocb *si = kiocb_to_siocb(iocb);
593
594 si->sock = sock;
595 si->scm = NULL;
596 si->msg = msg;
597 si->size = size;
598 si->flags = flags;
599
600 err = security_socket_recvmsg(sock, msg, size, flags);
601 if (err)
602 return err;
603
604 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
605}
606
89bddce5 607int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
608 size_t size, int flags)
609{
610 struct kiocb iocb;
611 struct sock_iocb siocb;
612 int ret;
613
89bddce5 614 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
615 iocb.private = &siocb;
616 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
617 if (-EIOCBQUEUED == ret)
618 ret = wait_on_sync_kiocb(&iocb);
619 return ret;
620}
621
89bddce5
SH
622int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
623 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
624{
625 mm_segment_t oldfs = get_fs();
626 int result;
627
628 set_fs(KERNEL_DS);
629 /*
630 * the following is safe, since for compiler definitions of kvec and
631 * iovec are identical, yielding the same in-core layout and alignment
632 */
89bddce5 633 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
634 result = sock_recvmsg(sock, msg, size, flags);
635 set_fs(oldfs);
636 return result;
637}
638
639static void sock_aio_dtor(struct kiocb *iocb)
640{
641 kfree(iocb->private);
642}
643
ce1d4d3e
CH
644static ssize_t sock_sendpage(struct file *file, struct page *page,
645 int offset, size_t size, loff_t *ppos, int more)
1da177e4 646{
1da177e4
LT
647 struct socket *sock;
648 int flags;
649
ce1d4d3e
CH
650 sock = file->private_data;
651
652 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
653 if (more)
654 flags |= MSG_MORE;
655
656 return sock->ops->sendpage(sock, page, offset, size, flags);
657}
1da177e4 658
ce1d4d3e 659static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 660 struct sock_iocb *siocb)
ce1d4d3e
CH
661{
662 if (!is_sync_kiocb(iocb)) {
663 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
664 if (!siocb)
665 return NULL;
1da177e4
LT
666 iocb->ki_dtor = sock_aio_dtor;
667 }
1da177e4 668
ce1d4d3e 669 siocb->kiocb = iocb;
ce1d4d3e
CH
670 iocb->private = siocb;
671 return siocb;
1da177e4
LT
672}
673
ce1d4d3e 674static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
675 struct file *file, const struct iovec *iov,
676 unsigned long nr_segs)
ce1d4d3e
CH
677{
678 struct socket *sock = file->private_data;
679 size_t size = 0;
680 int i;
1da177e4 681
89bddce5
SH
682 for (i = 0; i < nr_segs; i++)
683 size += iov[i].iov_len;
1da177e4 684
ce1d4d3e
CH
685 msg->msg_name = NULL;
686 msg->msg_namelen = 0;
687 msg->msg_control = NULL;
688 msg->msg_controllen = 0;
89bddce5 689 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
690 msg->msg_iovlen = nr_segs;
691 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
692
693 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
694}
695
027445c3
BP
696static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
697 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
698{
699 struct sock_iocb siocb, *x;
700
1da177e4
LT
701 if (pos != 0)
702 return -ESPIPE;
027445c3
BP
703
704 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
705 return 0;
706
027445c3
BP
707
708 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
709 if (!x)
710 return -ENOMEM;
027445c3 711 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
712}
713
ce1d4d3e 714static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
715 struct file *file, const struct iovec *iov,
716 unsigned long nr_segs)
1da177e4 717{
ce1d4d3e
CH
718 struct socket *sock = file->private_data;
719 size_t size = 0;
720 int i;
1da177e4 721
89bddce5
SH
722 for (i = 0; i < nr_segs; i++)
723 size += iov[i].iov_len;
1da177e4 724
ce1d4d3e
CH
725 msg->msg_name = NULL;
726 msg->msg_namelen = 0;
727 msg->msg_control = NULL;
728 msg->msg_controllen = 0;
89bddce5 729 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
730 msg->msg_iovlen = nr_segs;
731 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
732 if (sock->type == SOCK_SEQPACKET)
733 msg->msg_flags |= MSG_EOR;
1da177e4 734
ce1d4d3e 735 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
736}
737
027445c3
BP
738static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
739 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
740{
741 struct sock_iocb siocb, *x;
1da177e4 742
ce1d4d3e
CH
743 if (pos != 0)
744 return -ESPIPE;
027445c3
BP
745
746 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
ce1d4d3e 747 return 0;
1da177e4 748
027445c3 749 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
750 if (!x)
751 return -ENOMEM;
1da177e4 752
027445c3 753 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
754}
755
1da177e4
LT
756/*
757 * Atomic setting of ioctl hooks to avoid race
758 * with module unload.
759 */
760
4a3e2f71 761static DEFINE_MUTEX(br_ioctl_mutex);
89bddce5 762static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL;
1da177e4 763
89bddce5 764void brioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 765{
4a3e2f71 766 mutex_lock(&br_ioctl_mutex);
1da177e4 767 br_ioctl_hook = hook;
4a3e2f71 768 mutex_unlock(&br_ioctl_mutex);
1da177e4 769}
89bddce5 770
1da177e4
LT
771EXPORT_SYMBOL(brioctl_set);
772
4a3e2f71 773static DEFINE_MUTEX(vlan_ioctl_mutex);
89bddce5 774static int (*vlan_ioctl_hook) (void __user *arg);
1da177e4 775
89bddce5 776void vlan_ioctl_set(int (*hook) (void __user *))
1da177e4 777{
4a3e2f71 778 mutex_lock(&vlan_ioctl_mutex);
1da177e4 779 vlan_ioctl_hook = hook;
4a3e2f71 780 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 781}
89bddce5 782
1da177e4
LT
783EXPORT_SYMBOL(vlan_ioctl_set);
784
4a3e2f71 785static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 786static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 787
89bddce5 788void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 789{
4a3e2f71 790 mutex_lock(&dlci_ioctl_mutex);
1da177e4 791 dlci_ioctl_hook = hook;
4a3e2f71 792 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 793}
89bddce5 794
1da177e4
LT
795EXPORT_SYMBOL(dlci_ioctl_set);
796
797/*
798 * With an ioctl, arg may well be a user mode pointer, but we don't know
799 * what to do with it - that's up to the protocol still.
800 */
801
802static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
803{
804 struct socket *sock;
805 void __user *argp = (void __user *)arg;
806 int pid, err;
807
b69aee04 808 sock = file->private_data;
1da177e4
LT
809 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
810 err = dev_ioctl(cmd, argp);
811 } else
d86b5e0e 812#ifdef CONFIG_WIRELESS_EXT
1da177e4
LT
813 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
814 err = dev_ioctl(cmd, argp);
815 } else
89bddce5
SH
816#endif /* CONFIG_WIRELESS_EXT */
817 switch (cmd) {
1da177e4
LT
818 case FIOSETOWN:
819 case SIOCSPGRP:
820 err = -EFAULT;
821 if (get_user(pid, (int __user *)argp))
822 break;
823 err = f_setown(sock->file, pid, 1);
824 break;
825 case FIOGETOWN:
826 case SIOCGPGRP:
609d7fa9 827 err = put_user(f_getown(sock->file),
89bddce5 828 (int __user *)argp);
1da177e4
LT
829 break;
830 case SIOCGIFBR:
831 case SIOCSIFBR:
832 case SIOCBRADDBR:
833 case SIOCBRDELBR:
834 err = -ENOPKG;
835 if (!br_ioctl_hook)
836 request_module("bridge");
837
4a3e2f71 838 mutex_lock(&br_ioctl_mutex);
89bddce5 839 if (br_ioctl_hook)
1da177e4 840 err = br_ioctl_hook(cmd, argp);
4a3e2f71 841 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
842 break;
843 case SIOCGIFVLAN:
844 case SIOCSIFVLAN:
845 err = -ENOPKG;
846 if (!vlan_ioctl_hook)
847 request_module("8021q");
848
4a3e2f71 849 mutex_lock(&vlan_ioctl_mutex);
1da177e4
LT
850 if (vlan_ioctl_hook)
851 err = vlan_ioctl_hook(argp);
4a3e2f71 852 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 853 break;
1da177e4
LT
854 case SIOCADDDLCI:
855 case SIOCDELDLCI:
856 err = -ENOPKG;
857 if (!dlci_ioctl_hook)
858 request_module("dlci");
859
860 if (dlci_ioctl_hook) {
4a3e2f71 861 mutex_lock(&dlci_ioctl_mutex);
1da177e4 862 err = dlci_ioctl_hook(cmd, argp);
4a3e2f71 863 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
864 }
865 break;
866 default:
867 err = sock->ops->ioctl(sock, cmd, arg);
b5e5fa5e
CH
868
869 /*
870 * If this ioctl is unknown try to hand it down
871 * to the NIC driver.
872 */
873 if (err == -ENOIOCTLCMD)
874 err = dev_ioctl(cmd, argp);
1da177e4 875 break;
89bddce5 876 }
1da177e4
LT
877 return err;
878}
879
880int sock_create_lite(int family, int type, int protocol, struct socket **res)
881{
882 int err;
883 struct socket *sock = NULL;
89bddce5 884
1da177e4
LT
885 err = security_socket_create(family, type, protocol, 1);
886 if (err)
887 goto out;
888
889 sock = sock_alloc();
890 if (!sock) {
891 err = -ENOMEM;
892 goto out;
893 }
894
1da177e4 895 sock->type = type;
7420ed23
VY
896 err = security_socket_post_create(sock, family, type, protocol, 1);
897 if (err)
898 goto out_release;
899
1da177e4
LT
900out:
901 *res = sock;
902 return err;
7420ed23
VY
903out_release:
904 sock_release(sock);
905 sock = NULL;
906 goto out;
1da177e4
LT
907}
908
909/* No kernel lock held - perfect */
89bddce5 910static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
911{
912 struct socket *sock;
913
914 /*
89bddce5 915 * We can't return errors to poll, so it's either yes or no.
1da177e4 916 */
b69aee04 917 sock = file->private_data;
1da177e4
LT
918 return sock->ops->poll(file, sock, wait);
919}
920
89bddce5 921static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 922{
b69aee04 923 struct socket *sock = file->private_data;
1da177e4
LT
924
925 return sock->ops->mmap(file, sock, vma);
926}
927
20380731 928static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
929{
930 /*
89bddce5
SH
931 * It was possible the inode is NULL we were
932 * closing an unfinished socket.
1da177e4
LT
933 */
934
89bddce5 935 if (!inode) {
1da177e4
LT
936 printk(KERN_DEBUG "sock_close: NULL inode\n");
937 return 0;
938 }
939 sock_fasync(-1, filp, 0);
940 sock_release(SOCKET_I(inode));
941 return 0;
942}
943
944/*
945 * Update the socket async list
946 *
947 * Fasync_list locking strategy.
948 *
949 * 1. fasync_list is modified only under process context socket lock
950 * i.e. under semaphore.
951 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
952 * or under socket lock.
953 * 3. fasync_list can be used from softirq context, so that
954 * modification under socket lock have to be enhanced with
955 * write_lock_bh(&sk->sk_callback_lock).
956 * --ANK (990710)
957 */
958
959static int sock_fasync(int fd, struct file *filp, int on)
960{
89bddce5 961 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
962 struct socket *sock;
963 struct sock *sk;
964
89bddce5 965 if (on) {
8b3a7005 966 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 967 if (fna == NULL)
1da177e4
LT
968 return -ENOMEM;
969 }
970
b69aee04 971 sock = filp->private_data;
1da177e4 972
89bddce5
SH
973 sk = sock->sk;
974 if (sk == NULL) {
1da177e4
LT
975 kfree(fna);
976 return -EINVAL;
977 }
978
979 lock_sock(sk);
980
89bddce5 981 prev = &(sock->fasync_list);
1da177e4 982
89bddce5
SH
983 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
984 if (fa->fa_file == filp)
1da177e4
LT
985 break;
986
89bddce5
SH
987 if (on) {
988 if (fa != NULL) {
1da177e4 989 write_lock_bh(&sk->sk_callback_lock);
89bddce5 990 fa->fa_fd = fd;
1da177e4
LT
991 write_unlock_bh(&sk->sk_callback_lock);
992
993 kfree(fna);
994 goto out;
995 }
89bddce5
SH
996 fna->fa_file = filp;
997 fna->fa_fd = fd;
998 fna->magic = FASYNC_MAGIC;
999 fna->fa_next = sock->fasync_list;
1da177e4 1000 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1001 sock->fasync_list = fna;
1da177e4 1002 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1003 } else {
1004 if (fa != NULL) {
1da177e4 1005 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1006 *prev = fa->fa_next;
1da177e4
LT
1007 write_unlock_bh(&sk->sk_callback_lock);
1008 kfree(fa);
1009 }
1010 }
1011
1012out:
1013 release_sock(sock->sk);
1014 return 0;
1015}
1016
1017/* This function may be called only under socket lock or callback_lock */
1018
1019int sock_wake_async(struct socket *sock, int how, int band)
1020{
1021 if (!sock || !sock->fasync_list)
1022 return -1;
89bddce5 1023 switch (how) {
1da177e4 1024 case 1:
89bddce5 1025
1da177e4
LT
1026 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1027 break;
1028 goto call_kill;
1029 case 2:
1030 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1031 break;
1032 /* fall through */
1033 case 0:
89bddce5 1034call_kill:
1da177e4
LT
1035 __kill_fasync(sock->fasync_list, SIGIO, band);
1036 break;
1037 case 3:
1038 __kill_fasync(sock->fasync_list, SIGURG, band);
1039 }
1040 return 0;
1041}
1042
89bddce5
SH
1043static int __sock_create(int family, int type, int protocol,
1044 struct socket **res, int kern)
1da177e4
LT
1045{
1046 int err;
1047 struct socket *sock;
55737fda 1048 const struct net_proto_family *pf;
1da177e4
LT
1049
1050 /*
89bddce5 1051 * Check protocol is in range
1da177e4
LT
1052 */
1053 if (family < 0 || family >= NPROTO)
1054 return -EAFNOSUPPORT;
1055 if (type < 0 || type >= SOCK_MAX)
1056 return -EINVAL;
1057
1058 /* Compatibility.
1059
1060 This uglymoron is moved from INET layer to here to avoid
1061 deadlock in module load.
1062 */
1063 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1064 static int warned;
1da177e4
LT
1065 if (!warned) {
1066 warned = 1;
89bddce5
SH
1067 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1068 current->comm);
1da177e4
LT
1069 }
1070 family = PF_PACKET;
1071 }
1072
1073 err = security_socket_create(family, type, protocol, kern);
1074 if (err)
1075 return err;
89bddce5 1076
55737fda
SH
1077 /*
1078 * Allocate the socket and allow the family to set things up. if
1079 * the protocol is 0, the family is instructed to select an appropriate
1080 * default.
1081 */
1082 sock = sock_alloc();
1083 if (!sock) {
1084 if (net_ratelimit())
1085 printk(KERN_WARNING "socket: no more sockets\n");
1086 return -ENFILE; /* Not exactly a match, but its the
1087 closest posix thing */
1088 }
1089
1090 sock->type = type;
1091
1da177e4 1092#if defined(CONFIG_KMOD)
89bddce5
SH
1093 /* Attempt to load a protocol module if the find failed.
1094 *
1095 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1096 * requested real, full-featured networking support upon configuration.
1097 * Otherwise module support will break!
1098 */
55737fda 1099 if (net_families[family] == NULL)
89bddce5 1100 request_module("net-pf-%d", family);
1da177e4
LT
1101#endif
1102
55737fda
SH
1103 rcu_read_lock();
1104 pf = rcu_dereference(net_families[family]);
1105 err = -EAFNOSUPPORT;
1106 if (!pf)
1107 goto out_release;
1da177e4
LT
1108
1109 /*
1110 * We will call the ->create function, that possibly is in a loadable
1111 * module, so we have to bump that loadable module refcnt first.
1112 */
55737fda 1113 if (!try_module_get(pf->owner))
1da177e4
LT
1114 goto out_release;
1115
55737fda
SH
1116 /* Now protected by module ref count */
1117 rcu_read_unlock();
1118
1119 err = pf->create(sock, protocol);
1120 if (err < 0)
1da177e4 1121 goto out_module_put;
a79af59e 1122
1da177e4
LT
1123 /*
1124 * Now to bump the refcnt of the [loadable] module that owns this
1125 * socket at sock_release time we decrement its refcnt.
1126 */
55737fda
SH
1127 if (!try_module_get(sock->ops->owner))
1128 goto out_module_busy;
1129
1da177e4
LT
1130 /*
1131 * Now that we're done with the ->create function, the [loadable]
1132 * module can have its refcnt decremented
1133 */
55737fda 1134 module_put(pf->owner);
7420ed23
VY
1135 err = security_socket_post_create(sock, family, type, protocol, kern);
1136 if (err)
1137 goto out_release;
55737fda 1138 *res = sock;
1da177e4 1139
55737fda
SH
1140 return 0;
1141
1142out_module_busy:
1143 err = -EAFNOSUPPORT;
1da177e4 1144out_module_put:
55737fda
SH
1145 sock->ops = NULL;
1146 module_put(pf->owner);
1147out_sock_release:
1da177e4 1148 sock_release(sock);
55737fda
SH
1149 return err;
1150
1151out_release:
1152 rcu_read_unlock();
1153 goto out_sock_release;
1da177e4
LT
1154}
1155
1156int sock_create(int family, int type, int protocol, struct socket **res)
1157{
1158 return __sock_create(family, type, protocol, res, 0);
1159}
1160
1161int sock_create_kern(int family, int type, int protocol, struct socket **res)
1162{
1163 return __sock_create(family, type, protocol, res, 1);
1164}
1165
1166asmlinkage long sys_socket(int family, int type, int protocol)
1167{
1168 int retval;
1169 struct socket *sock;
1170
1171 retval = sock_create(family, type, protocol, &sock);
1172 if (retval < 0)
1173 goto out;
1174
1175 retval = sock_map_fd(sock);
1176 if (retval < 0)
1177 goto out_release;
1178
1179out:
1180 /* It may be already another descriptor 8) Not kernel problem. */
1181 return retval;
1182
1183out_release:
1184 sock_release(sock);
1185 return retval;
1186}
1187
1188/*
1189 * Create a pair of connected sockets.
1190 */
1191
89bddce5
SH
1192asmlinkage long sys_socketpair(int family, int type, int protocol,
1193 int __user *usockvec)
1da177e4
LT
1194{
1195 struct socket *sock1, *sock2;
1196 int fd1, fd2, err;
1197
1198 /*
1199 * Obtain the first socket and check if the underlying protocol
1200 * supports the socketpair call.
1201 */
1202
1203 err = sock_create(family, type, protocol, &sock1);
1204 if (err < 0)
1205 goto out;
1206
1207 err = sock_create(family, type, protocol, &sock2);
1208 if (err < 0)
1209 goto out_release_1;
1210
1211 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1212 if (err < 0)
1da177e4
LT
1213 goto out_release_both;
1214
1215 fd1 = fd2 = -1;
1216
1217 err = sock_map_fd(sock1);
1218 if (err < 0)
1219 goto out_release_both;
1220 fd1 = err;
1221
1222 err = sock_map_fd(sock2);
1223 if (err < 0)
1224 goto out_close_1;
1225 fd2 = err;
1226
1227 /* fd1 and fd2 may be already another descriptors.
1228 * Not kernel problem.
1229 */
1230
89bddce5 1231 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1232 if (!err)
1233 err = put_user(fd2, &usockvec[1]);
1234 if (!err)
1235 return 0;
1236
1237 sys_close(fd2);
1238 sys_close(fd1);
1239 return err;
1240
1241out_close_1:
89bddce5 1242 sock_release(sock2);
1da177e4
LT
1243 sys_close(fd1);
1244 return err;
1245
1246out_release_both:
89bddce5 1247 sock_release(sock2);
1da177e4 1248out_release_1:
89bddce5 1249 sock_release(sock1);
1da177e4
LT
1250out:
1251 return err;
1252}
1253
1da177e4
LT
1254/*
1255 * Bind a name to a socket. Nothing much to do here since it's
1256 * the protocol's responsibility to handle the local address.
1257 *
1258 * We move the socket address to kernel space before we call
1259 * the protocol layer (having also checked the address is ok).
1260 */
1261
1262asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1263{
1264 struct socket *sock;
1265 char address[MAX_SOCK_ADDR];
6cb153ca 1266 int err, fput_needed;
1da177e4 1267
89bddce5
SH
1268 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1269 if(sock) {
1270 err = move_addr_to_kernel(umyaddr, addrlen, address);
1271 if (err >= 0) {
1272 err = security_socket_bind(sock,
1273 (struct sockaddr *)address,
1274 addrlen);
6cb153ca
BL
1275 if (!err)
1276 err = sock->ops->bind(sock,
89bddce5
SH
1277 (struct sockaddr *)
1278 address, addrlen);
1da177e4 1279 }
6cb153ca 1280 fput_light(sock->file, fput_needed);
89bddce5 1281 }
1da177e4
LT
1282 return err;
1283}
1284
1da177e4
LT
1285/*
1286 * Perform a listen. Basically, we allow the protocol to do anything
1287 * necessary for a listen, and if that works, we mark the socket as
1288 * ready for listening.
1289 */
1290
7a42c217 1291int sysctl_somaxconn __read_mostly = SOMAXCONN;
1da177e4
LT
1292
1293asmlinkage long sys_listen(int fd, int backlog)
1294{
1295 struct socket *sock;
6cb153ca 1296 int err, fput_needed;
89bddce5
SH
1297
1298 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1299 if (sock) {
1300 if ((unsigned)backlog > sysctl_somaxconn)
1da177e4
LT
1301 backlog = sysctl_somaxconn;
1302
1303 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1304 if (!err)
1305 err = sock->ops->listen(sock, backlog);
1da177e4 1306
6cb153ca 1307 fput_light(sock->file, fput_needed);
1da177e4
LT
1308 }
1309 return err;
1310}
1311
1da177e4
LT
1312/*
1313 * For accept, we attempt to create a new socket, set up the link
1314 * with the client, wake up the client, then return the new
1315 * connected fd. We collect the address of the connector in kernel
1316 * space and move it to user at the very end. This is unclean because
1317 * we open the socket then return an error.
1318 *
1319 * 1003.1g adds the ability to recvmsg() to query connection pending
1320 * status to recvmsg. We need to add that support in a way thats
1321 * clean when we restucture accept also.
1322 */
1323
89bddce5
SH
1324asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1325 int __user *upeer_addrlen)
1da177e4
LT
1326{
1327 struct socket *sock, *newsock;
39d8c1b6 1328 struct file *newfile;
6cb153ca 1329 int err, len, newfd, fput_needed;
1da177e4
LT
1330 char address[MAX_SOCK_ADDR];
1331
6cb153ca 1332 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1333 if (!sock)
1334 goto out;
1335
1336 err = -ENFILE;
89bddce5 1337 if (!(newsock = sock_alloc()))
1da177e4
LT
1338 goto out_put;
1339
1340 newsock->type = sock->type;
1341 newsock->ops = sock->ops;
1342
1da177e4
LT
1343 /*
1344 * We don't need try_module_get here, as the listening socket (sock)
1345 * has the protocol module (sock->ops->owner) held.
1346 */
1347 __module_get(newsock->ops->owner);
1348
39d8c1b6
DM
1349 newfd = sock_alloc_fd(&newfile);
1350 if (unlikely(newfd < 0)) {
1351 err = newfd;
9a1875e6
DM
1352 sock_release(newsock);
1353 goto out_put;
39d8c1b6
DM
1354 }
1355
1356 err = sock_attach_fd(newsock, newfile);
1357 if (err < 0)
1358 goto out_fd;
1359
a79af59e
FF
1360 err = security_socket_accept(sock, newsock);
1361 if (err)
39d8c1b6 1362 goto out_fd;
a79af59e 1363
1da177e4
LT
1364 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1365 if (err < 0)
39d8c1b6 1366 goto out_fd;
1da177e4
LT
1367
1368 if (upeer_sockaddr) {
89bddce5
SH
1369 if (newsock->ops->getname(newsock, (struct sockaddr *)address,
1370 &len, 2) < 0) {
1da177e4 1371 err = -ECONNABORTED;
39d8c1b6 1372 goto out_fd;
1da177e4 1373 }
89bddce5
SH
1374 err = move_addr_to_user(address, len, upeer_sockaddr,
1375 upeer_addrlen);
1da177e4 1376 if (err < 0)
39d8c1b6 1377 goto out_fd;
1da177e4
LT
1378 }
1379
1380 /* File flags are not inherited via accept() unlike another OSes. */
1381
39d8c1b6
DM
1382 fd_install(newfd, newfile);
1383 err = newfd;
1da177e4
LT
1384
1385 security_socket_post_accept(sock, newsock);
1386
1387out_put:
6cb153ca 1388 fput_light(sock->file, fput_needed);
1da177e4
LT
1389out:
1390 return err;
39d8c1b6 1391out_fd:
9606a216 1392 fput(newfile);
39d8c1b6 1393 put_unused_fd(newfd);
1da177e4
LT
1394 goto out_put;
1395}
1396
1da177e4
LT
1397/*
1398 * Attempt to connect to a socket with the server address. The address
1399 * is in user space so we verify it is OK and move it to kernel space.
1400 *
1401 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1402 * break bindings
1403 *
1404 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1405 * other SEQPACKET protocols that take time to connect() as it doesn't
1406 * include the -EINPROGRESS status for such sockets.
1407 */
1408
89bddce5
SH
1409asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
1410 int addrlen)
1da177e4
LT
1411{
1412 struct socket *sock;
1413 char address[MAX_SOCK_ADDR];
6cb153ca 1414 int err, fput_needed;
1da177e4 1415
6cb153ca 1416 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1417 if (!sock)
1418 goto out;
1419 err = move_addr_to_kernel(uservaddr, addrlen, address);
1420 if (err < 0)
1421 goto out_put;
1422
89bddce5
SH
1423 err =
1424 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4
LT
1425 if (err)
1426 goto out_put;
1427
89bddce5 1428 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
1da177e4
LT
1429 sock->file->f_flags);
1430out_put:
6cb153ca 1431 fput_light(sock->file, fput_needed);
1da177e4
LT
1432out:
1433 return err;
1434}
1435
1436/*
1437 * Get the local address ('name') of a socket object. Move the obtained
1438 * name to user space.
1439 */
1440
89bddce5
SH
1441asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1442 int __user *usockaddr_len)
1da177e4
LT
1443{
1444 struct socket *sock;
1445 char address[MAX_SOCK_ADDR];
6cb153ca 1446 int len, err, fput_needed;
89bddce5 1447
6cb153ca 1448 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1449 if (!sock)
1450 goto out;
1451
1452 err = security_socket_getsockname(sock);
1453 if (err)
1454 goto out_put;
1455
1456 err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
1457 if (err)
1458 goto out_put;
1459 err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
1460
1461out_put:
6cb153ca 1462 fput_light(sock->file, fput_needed);
1da177e4
LT
1463out:
1464 return err;
1465}
1466
1467/*
1468 * Get the remote address ('name') of a socket object. Move the obtained
1469 * name to user space.
1470 */
1471
89bddce5
SH
1472asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1473 int __user *usockaddr_len)
1da177e4
LT
1474{
1475 struct socket *sock;
1476 char address[MAX_SOCK_ADDR];
6cb153ca 1477 int len, err, fput_needed;
1da177e4 1478
89bddce5
SH
1479 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1480 if (sock != NULL) {
1da177e4
LT
1481 err = security_socket_getpeername(sock);
1482 if (err) {
6cb153ca 1483 fput_light(sock->file, fput_needed);
1da177e4
LT
1484 return err;
1485 }
1486
89bddce5
SH
1487 err =
1488 sock->ops->getname(sock, (struct sockaddr *)address, &len,
1489 1);
1da177e4 1490 if (!err)
89bddce5
SH
1491 err = move_addr_to_user(address, len, usockaddr,
1492 usockaddr_len);
6cb153ca 1493 fput_light(sock->file, fput_needed);
1da177e4
LT
1494 }
1495 return err;
1496}
1497
1498/*
1499 * Send a datagram to a given address. We move the address into kernel
1500 * space and check the user space data area is readable before invoking
1501 * the protocol.
1502 */
1503
89bddce5
SH
1504asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1505 unsigned flags, struct sockaddr __user *addr,
1506 int addr_len)
1da177e4
LT
1507{
1508 struct socket *sock;
1509 char address[MAX_SOCK_ADDR];
1510 int err;
1511 struct msghdr msg;
1512 struct iovec iov;
6cb153ca
BL
1513 int fput_needed;
1514 struct file *sock_file;
1515
1516 sock_file = fget_light(fd, &fput_needed);
1517 if (!sock_file)
1518 return -EBADF;
1519
1520 sock = sock_from_file(sock_file, &err);
1da177e4 1521 if (!sock)
6cb153ca 1522 goto out_put;
89bddce5
SH
1523 iov.iov_base = buff;
1524 iov.iov_len = len;
1525 msg.msg_name = NULL;
1526 msg.msg_iov = &iov;
1527 msg.msg_iovlen = 1;
1528 msg.msg_control = NULL;
1529 msg.msg_controllen = 0;
1530 msg.msg_namelen = 0;
6cb153ca 1531 if (addr) {
1da177e4
LT
1532 err = move_addr_to_kernel(addr, addr_len, address);
1533 if (err < 0)
1534 goto out_put;
89bddce5
SH
1535 msg.msg_name = address;
1536 msg.msg_namelen = addr_len;
1da177e4
LT
1537 }
1538 if (sock->file->f_flags & O_NONBLOCK)
1539 flags |= MSG_DONTWAIT;
1540 msg.msg_flags = flags;
1541 err = sock_sendmsg(sock, &msg, len);
1542
89bddce5 1543out_put:
6cb153ca 1544 fput_light(sock_file, fput_needed);
1da177e4
LT
1545 return err;
1546}
1547
1548/*
89bddce5 1549 * Send a datagram down a socket.
1da177e4
LT
1550 */
1551
89bddce5 1552asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
1da177e4
LT
1553{
1554 return sys_sendto(fd, buff, len, flags, NULL, 0);
1555}
1556
1557/*
89bddce5 1558 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1559 * sender. We verify the buffers are writable and if needed move the
1560 * sender address from kernel to user space.
1561 */
1562
89bddce5
SH
1563asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1564 unsigned flags, struct sockaddr __user *addr,
1565 int __user *addr_len)
1da177e4
LT
1566{
1567 struct socket *sock;
1568 struct iovec iov;
1569 struct msghdr msg;
1570 char address[MAX_SOCK_ADDR];
89bddce5 1571 int err, err2;
6cb153ca
BL
1572 struct file *sock_file;
1573 int fput_needed;
1574
1575 sock_file = fget_light(fd, &fput_needed);
1576 if (!sock_file)
1577 return -EBADF;
1da177e4 1578
6cb153ca 1579 sock = sock_from_file(sock_file, &err);
1da177e4
LT
1580 if (!sock)
1581 goto out;
1582
89bddce5
SH
1583 msg.msg_control = NULL;
1584 msg.msg_controllen = 0;
1585 msg.msg_iovlen = 1;
1586 msg.msg_iov = &iov;
1587 iov.iov_len = size;
1588 iov.iov_base = ubuf;
1589 msg.msg_name = address;
1590 msg.msg_namelen = MAX_SOCK_ADDR;
1da177e4
LT
1591 if (sock->file->f_flags & O_NONBLOCK)
1592 flags |= MSG_DONTWAIT;
89bddce5 1593 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1594
89bddce5
SH
1595 if (err >= 0 && addr != NULL) {
1596 err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1597 if (err2 < 0)
1598 err = err2;
1da177e4 1599 }
1da177e4 1600out:
6cb153ca 1601 fput_light(sock_file, fput_needed);
1da177e4
LT
1602 return err;
1603}
1604
1605/*
89bddce5 1606 * Receive a datagram from a socket.
1da177e4
LT
1607 */
1608
89bddce5
SH
1609asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1610 unsigned flags)
1da177e4
LT
1611{
1612 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1613}
1614
1615/*
1616 * Set a socket option. Because we don't know the option lengths we have
1617 * to pass the user mode parameter for the protocols to sort out.
1618 */
1619
89bddce5
SH
1620asmlinkage long sys_setsockopt(int fd, int level, int optname,
1621 char __user *optval, int optlen)
1da177e4 1622{
6cb153ca 1623 int err, fput_needed;
1da177e4
LT
1624 struct socket *sock;
1625
1626 if (optlen < 0)
1627 return -EINVAL;
89bddce5
SH
1628
1629 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1630 if (sock != NULL) {
1631 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1632 if (err)
1633 goto out_put;
1da177e4
LT
1634
1635 if (level == SOL_SOCKET)
89bddce5
SH
1636 err =
1637 sock_setsockopt(sock, level, optname, optval,
1638 optlen);
1da177e4 1639 else
89bddce5
SH
1640 err =
1641 sock->ops->setsockopt(sock, level, optname, optval,
1642 optlen);
6cb153ca
BL
1643out_put:
1644 fput_light(sock->file, fput_needed);
1da177e4
LT
1645 }
1646 return err;
1647}
1648
1649/*
1650 * Get a socket option. Because we don't know the option lengths we have
1651 * to pass a user mode parameter for the protocols to sort out.
1652 */
1653
89bddce5
SH
1654asmlinkage long sys_getsockopt(int fd, int level, int optname,
1655 char __user *optval, int __user *optlen)
1da177e4 1656{
6cb153ca 1657 int err, fput_needed;
1da177e4
LT
1658 struct socket *sock;
1659
89bddce5
SH
1660 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1661 if (sock != NULL) {
6cb153ca
BL
1662 err = security_socket_getsockopt(sock, level, optname);
1663 if (err)
1664 goto out_put;
1da177e4
LT
1665
1666 if (level == SOL_SOCKET)
89bddce5
SH
1667 err =
1668 sock_getsockopt(sock, level, optname, optval,
1669 optlen);
1da177e4 1670 else
89bddce5
SH
1671 err =
1672 sock->ops->getsockopt(sock, level, optname, optval,
1673 optlen);
6cb153ca
BL
1674out_put:
1675 fput_light(sock->file, fput_needed);
1da177e4
LT
1676 }
1677 return err;
1678}
1679
1da177e4
LT
1680/*
1681 * Shutdown a socket.
1682 */
1683
1684asmlinkage long sys_shutdown(int fd, int how)
1685{
6cb153ca 1686 int err, fput_needed;
1da177e4
LT
1687 struct socket *sock;
1688
89bddce5
SH
1689 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1690 if (sock != NULL) {
1da177e4 1691 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1692 if (!err)
1693 err = sock->ops->shutdown(sock, how);
1694 fput_light(sock->file, fput_needed);
1da177e4
LT
1695 }
1696 return err;
1697}
1698
89bddce5 1699/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1700 * fields which are the same type (int / unsigned) on our platforms.
1701 */
1702#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1703#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1704#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1705
1da177e4
LT
1706/*
1707 * BSD sendmsg interface
1708 */
1709
1710asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1711{
89bddce5
SH
1712 struct compat_msghdr __user *msg_compat =
1713 (struct compat_msghdr __user *)msg;
1da177e4
LT
1714 struct socket *sock;
1715 char address[MAX_SOCK_ADDR];
1716 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1717 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1718 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1719 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1720 unsigned char *ctl_buf = ctl;
1721 struct msghdr msg_sys;
1722 int err, ctl_len, iov_size, total_len;
6cb153ca 1723 int fput_needed;
89bddce5 1724
1da177e4
LT
1725 err = -EFAULT;
1726 if (MSG_CMSG_COMPAT & flags) {
1727 if (get_compat_msghdr(&msg_sys, msg_compat))
1728 return -EFAULT;
89bddce5
SH
1729 }
1730 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1731 return -EFAULT;
1732
6cb153ca 1733 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1734 if (!sock)
1da177e4
LT
1735 goto out;
1736
1737 /* do not move before msg_sys is valid */
1738 err = -EMSGSIZE;
1739 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1740 goto out_put;
1741
89bddce5 1742 /* Check whether to allocate the iovec area */
1da177e4
LT
1743 err = -ENOMEM;
1744 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1745 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1746 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1747 if (!iov)
1748 goto out_put;
1749 }
1750
1751 /* This will also move the address data into kernel space */
1752 if (MSG_CMSG_COMPAT & flags) {
1753 err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
1754 } else
1755 err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
89bddce5 1756 if (err < 0)
1da177e4
LT
1757 goto out_freeiov;
1758 total_len = err;
1759
1760 err = -ENOBUFS;
1761
1762 if (msg_sys.msg_controllen > INT_MAX)
1763 goto out_freeiov;
89bddce5 1764 ctl_len = msg_sys.msg_controllen;
1da177e4 1765 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1766 err =
1767 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1768 sizeof(ctl));
1da177e4
LT
1769 if (err)
1770 goto out_freeiov;
1771 ctl_buf = msg_sys.msg_control;
8920e8f9 1772 ctl_len = msg_sys.msg_controllen;
1da177e4 1773 } else if (ctl_len) {
89bddce5 1774 if (ctl_len > sizeof(ctl)) {
1da177e4 1775 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1776 if (ctl_buf == NULL)
1da177e4
LT
1777 goto out_freeiov;
1778 }
1779 err = -EFAULT;
1780 /*
1781 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1782 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1783 * checking falls down on this.
1784 */
89bddce5
SH
1785 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1786 ctl_len))
1da177e4
LT
1787 goto out_freectl;
1788 msg_sys.msg_control = ctl_buf;
1789 }
1790 msg_sys.msg_flags = flags;
1791
1792 if (sock->file->f_flags & O_NONBLOCK)
1793 msg_sys.msg_flags |= MSG_DONTWAIT;
1794 err = sock_sendmsg(sock, &msg_sys, total_len);
1795
1796out_freectl:
89bddce5 1797 if (ctl_buf != ctl)
1da177e4
LT
1798 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1799out_freeiov:
1800 if (iov != iovstack)
1801 sock_kfree_s(sock->sk, iov, iov_size);
1802out_put:
6cb153ca 1803 fput_light(sock->file, fput_needed);
89bddce5 1804out:
1da177e4
LT
1805 return err;
1806}
1807
1808/*
1809 * BSD recvmsg interface
1810 */
1811
89bddce5
SH
1812asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
1813 unsigned int flags)
1da177e4 1814{
89bddce5
SH
1815 struct compat_msghdr __user *msg_compat =
1816 (struct compat_msghdr __user *)msg;
1da177e4
LT
1817 struct socket *sock;
1818 struct iovec iovstack[UIO_FASTIOV];
89bddce5 1819 struct iovec *iov = iovstack;
1da177e4
LT
1820 struct msghdr msg_sys;
1821 unsigned long cmsg_ptr;
1822 int err, iov_size, total_len, len;
6cb153ca 1823 int fput_needed;
1da177e4
LT
1824
1825 /* kernel mode address */
1826 char addr[MAX_SOCK_ADDR];
1827
1828 /* user mode address pointers */
1829 struct sockaddr __user *uaddr;
1830 int __user *uaddr_len;
89bddce5 1831
1da177e4
LT
1832 if (MSG_CMSG_COMPAT & flags) {
1833 if (get_compat_msghdr(&msg_sys, msg_compat))
1834 return -EFAULT;
89bddce5
SH
1835 }
1836 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1837 return -EFAULT;
1da177e4 1838
6cb153ca 1839 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1840 if (!sock)
1841 goto out;
1842
1843 err = -EMSGSIZE;
1844 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1845 goto out_put;
89bddce5
SH
1846
1847 /* Check whether to allocate the iovec area */
1da177e4
LT
1848 err = -ENOMEM;
1849 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1850 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1851 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1852 if (!iov)
1853 goto out_put;
1854 }
1855
1856 /*
89bddce5
SH
1857 * Save the user-mode address (verify_iovec will change the
1858 * kernel msghdr to use the kernel address space)
1da177e4 1859 */
89bddce5
SH
1860
1861 uaddr = (void __user *)msg_sys.msg_name;
1da177e4
LT
1862 uaddr_len = COMPAT_NAMELEN(msg);
1863 if (MSG_CMSG_COMPAT & flags) {
1864 err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1865 } else
1866 err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1867 if (err < 0)
1868 goto out_freeiov;
89bddce5 1869 total_len = err;
1da177e4
LT
1870
1871 cmsg_ptr = (unsigned long)msg_sys.msg_control;
1872 msg_sys.msg_flags = 0;
1873 if (MSG_CMSG_COMPAT & flags)
1874 msg_sys.msg_flags = MSG_CMSG_COMPAT;
89bddce5 1875
1da177e4
LT
1876 if (sock->file->f_flags & O_NONBLOCK)
1877 flags |= MSG_DONTWAIT;
1878 err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1879 if (err < 0)
1880 goto out_freeiov;
1881 len = err;
1882
1883 if (uaddr != NULL) {
89bddce5
SH
1884 err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
1885 uaddr_len);
1da177e4
LT
1886 if (err < 0)
1887 goto out_freeiov;
1888 }
37f7f421
DM
1889 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
1890 COMPAT_FLAGS(msg));
1da177e4
LT
1891 if (err)
1892 goto out_freeiov;
1893 if (MSG_CMSG_COMPAT & flags)
89bddce5 1894 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1895 &msg_compat->msg_controllen);
1896 else
89bddce5 1897 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1898 &msg->msg_controllen);
1899 if (err)
1900 goto out_freeiov;
1901 err = len;
1902
1903out_freeiov:
1904 if (iov != iovstack)
1905 sock_kfree_s(sock->sk, iov, iov_size);
1906out_put:
6cb153ca 1907 fput_light(sock->file, fput_needed);
1da177e4
LT
1908out:
1909 return err;
1910}
1911
1912#ifdef __ARCH_WANT_SYS_SOCKETCALL
1913
1914/* Argument list sizes for sys_socketcall */
1915#define AL(x) ((x) * sizeof(unsigned long))
89bddce5
SH
1916static const unsigned char nargs[18]={
1917 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1918 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1919 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
1920};
1921
1da177e4
LT
1922#undef AL
1923
1924/*
89bddce5 1925 * System call vectors.
1da177e4
LT
1926 *
1927 * Argument checking cleaned up. Saved 20% in size.
1928 * This function doesn't need to set the kernel lock because
89bddce5 1929 * it is set by the callees.
1da177e4
LT
1930 */
1931
1932asmlinkage long sys_socketcall(int call, unsigned long __user *args)
1933{
1934 unsigned long a[6];
89bddce5 1935 unsigned long a0, a1;
1da177e4
LT
1936 int err;
1937
89bddce5 1938 if (call < 1 || call > SYS_RECVMSG)
1da177e4
LT
1939 return -EINVAL;
1940
1941 /* copy_from_user should be SMP safe. */
1942 if (copy_from_user(a, args, nargs[call]))
1943 return -EFAULT;
3ec3b2fb 1944
89bddce5 1945 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb
DW
1946 if (err)
1947 return err;
1948
89bddce5
SH
1949 a0 = a[0];
1950 a1 = a[1];
1951
1952 switch (call) {
1953 case SYS_SOCKET:
1954 err = sys_socket(a0, a1, a[2]);
1955 break;
1956 case SYS_BIND:
1957 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
1958 break;
1959 case SYS_CONNECT:
1960 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
1961 break;
1962 case SYS_LISTEN:
1963 err = sys_listen(a0, a1);
1964 break;
1965 case SYS_ACCEPT:
1966 err =
1967 sys_accept(a0, (struct sockaddr __user *)a1,
1968 (int __user *)a[2]);
1969 break;
1970 case SYS_GETSOCKNAME:
1971 err =
1972 sys_getsockname(a0, (struct sockaddr __user *)a1,
1973 (int __user *)a[2]);
1974 break;
1975 case SYS_GETPEERNAME:
1976 err =
1977 sys_getpeername(a0, (struct sockaddr __user *)a1,
1978 (int __user *)a[2]);
1979 break;
1980 case SYS_SOCKETPAIR:
1981 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
1982 break;
1983 case SYS_SEND:
1984 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
1985 break;
1986 case SYS_SENDTO:
1987 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
1988 (struct sockaddr __user *)a[4], a[5]);
1989 break;
1990 case SYS_RECV:
1991 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
1992 break;
1993 case SYS_RECVFROM:
1994 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
1995 (struct sockaddr __user *)a[4],
1996 (int __user *)a[5]);
1997 break;
1998 case SYS_SHUTDOWN:
1999 err = sys_shutdown(a0, a1);
2000 break;
2001 case SYS_SETSOCKOPT:
2002 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2003 break;
2004 case SYS_GETSOCKOPT:
2005 err =
2006 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2007 (int __user *)a[4]);
2008 break;
2009 case SYS_SENDMSG:
2010 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2011 break;
2012 case SYS_RECVMSG:
2013 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2014 break;
2015 default:
2016 err = -EINVAL;
2017 break;
1da177e4
LT
2018 }
2019 return err;
2020}
2021
89bddce5 2022#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2023
55737fda
SH
2024/**
2025 * sock_register - add a socket protocol handler
2026 * @ops: description of protocol
2027 *
1da177e4
LT
2028 * This function is called by a protocol handler that wants to
2029 * advertise its address family, and have it linked into the
55737fda
SH
2030 * socket interface. The value ops->family coresponds to the
2031 * socket system call protocol family.
1da177e4 2032 */
f0fd27d4 2033int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2034{
2035 int err;
2036
2037 if (ops->family >= NPROTO) {
89bddce5
SH
2038 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2039 NPROTO);
1da177e4
LT
2040 return -ENOBUFS;
2041 }
55737fda
SH
2042
2043 spin_lock(&net_family_lock);
2044 if (net_families[ops->family])
2045 err = -EEXIST;
2046 else {
89bddce5 2047 net_families[ops->family] = ops;
1da177e4
LT
2048 err = 0;
2049 }
55737fda
SH
2050 spin_unlock(&net_family_lock);
2051
89bddce5 2052 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2053 return err;
2054}
2055
55737fda
SH
2056/**
2057 * sock_unregister - remove a protocol handler
2058 * @family: protocol family to remove
2059 *
1da177e4
LT
2060 * This function is called by a protocol handler that wants to
2061 * remove its address family, and have it unlinked from the
55737fda
SH
2062 * new socket creation.
2063 *
2064 * If protocol handler is a module, then it can use module reference
2065 * counts to protect against new references. If protocol handler is not
2066 * a module then it needs to provide its own protection in
2067 * the ops->create routine.
1da177e4 2068 */
f0fd27d4 2069void sock_unregister(int family)
1da177e4 2070{
f0fd27d4 2071 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2072
55737fda 2073 spin_lock(&net_family_lock);
89bddce5 2074 net_families[family] = NULL;
55737fda
SH
2075 spin_unlock(&net_family_lock);
2076
2077 synchronize_rcu();
2078
89bddce5 2079 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2080}
2081
77d76ea3 2082static int __init sock_init(void)
1da177e4
LT
2083{
2084 /*
89bddce5 2085 * Initialize sock SLAB cache.
1da177e4 2086 */
89bddce5 2087
1da177e4
LT
2088 sk_init();
2089
1da177e4 2090 /*
89bddce5 2091 * Initialize skbuff SLAB cache
1da177e4
LT
2092 */
2093 skb_init();
1da177e4
LT
2094
2095 /*
89bddce5 2096 * Initialize the protocols module.
1da177e4
LT
2097 */
2098
2099 init_inodecache();
2100 register_filesystem(&sock_fs_type);
2101 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2102
2103 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2104 */
2105
2106#ifdef CONFIG_NETFILTER
2107 netfilter_init();
2108#endif
cbeb321a
DM
2109
2110 return 0;
1da177e4
LT
2111}
2112
77d76ea3
AK
2113core_initcall(sock_init); /* early initcall */
2114
1da177e4
LT
2115#ifdef CONFIG_PROC_FS
2116void socket_seq_show(struct seq_file *seq)
2117{
2118 int cpu;
2119 int counter = 0;
2120
6f912042 2121 for_each_possible_cpu(cpu)
89bddce5 2122 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2123
2124 /* It can be negative, by the way. 8) */
2125 if (counter < 0)
2126 counter = 0;
2127
2128 seq_printf(seq, "sockets: used %d\n", counter);
2129}
89bddce5 2130#endif /* CONFIG_PROC_FS */
1da177e4 2131
89bbfc95
SP
2132#ifdef CONFIG_COMPAT
2133static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 2134 unsigned long arg)
89bbfc95
SP
2135{
2136 struct socket *sock = file->private_data;
2137 int ret = -ENOIOCTLCMD;
2138
2139 if (sock->ops->compat_ioctl)
2140 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2141
2142 return ret;
2143}
2144#endif
2145
ac5a488e
SS
2146int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2147{
2148 return sock->ops->bind(sock, addr, addrlen);
2149}
2150
2151int kernel_listen(struct socket *sock, int backlog)
2152{
2153 return sock->ops->listen(sock, backlog);
2154}
2155
2156int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2157{
2158 struct sock *sk = sock->sk;
2159 int err;
2160
2161 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2162 newsock);
2163 if (err < 0)
2164 goto done;
2165
2166 err = sock->ops->accept(sock, *newsock, flags);
2167 if (err < 0) {
2168 sock_release(*newsock);
2169 goto done;
2170 }
2171
2172 (*newsock)->ops = sock->ops;
2173
2174done:
2175 return err;
2176}
2177
2178int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
2179 int flags)
2180{
2181 return sock->ops->connect(sock, addr, addrlen, flags);
2182}
2183
2184int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2185 int *addrlen)
2186{
2187 return sock->ops->getname(sock, addr, addrlen, 0);
2188}
2189
2190int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2191 int *addrlen)
2192{
2193 return sock->ops->getname(sock, addr, addrlen, 1);
2194}
2195
2196int kernel_getsockopt(struct socket *sock, int level, int optname,
2197 char *optval, int *optlen)
2198{
2199 mm_segment_t oldfs = get_fs();
2200 int err;
2201
2202 set_fs(KERNEL_DS);
2203 if (level == SOL_SOCKET)
2204 err = sock_getsockopt(sock, level, optname, optval, optlen);
2205 else
2206 err = sock->ops->getsockopt(sock, level, optname, optval,
2207 optlen);
2208 set_fs(oldfs);
2209 return err;
2210}
2211
2212int kernel_setsockopt(struct socket *sock, int level, int optname,
2213 char *optval, int optlen)
2214{
2215 mm_segment_t oldfs = get_fs();
2216 int err;
2217
2218 set_fs(KERNEL_DS);
2219 if (level == SOL_SOCKET)
2220 err = sock_setsockopt(sock, level, optname, optval, optlen);
2221 else
2222 err = sock->ops->setsockopt(sock, level, optname, optval,
2223 optlen);
2224 set_fs(oldfs);
2225 return err;
2226}
2227
2228int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2229 size_t size, int flags)
2230{
2231 if (sock->ops->sendpage)
2232 return sock->ops->sendpage(sock, page, offset, size, flags);
2233
2234 return sock_no_sendpage(sock, page, offset, size, flags);
2235}
2236
2237int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2238{
2239 mm_segment_t oldfs = get_fs();
2240 int err;
2241
2242 set_fs(KERNEL_DS);
2243 err = sock->ops->ioctl(sock, cmd, arg);
2244 set_fs(oldfs);
2245
2246 return err;
2247}
2248
1da177e4
LT
2249/* ABI emulation layers need these two */
2250EXPORT_SYMBOL(move_addr_to_kernel);
2251EXPORT_SYMBOL(move_addr_to_user);
2252EXPORT_SYMBOL(sock_create);
2253EXPORT_SYMBOL(sock_create_kern);
2254EXPORT_SYMBOL(sock_create_lite);
2255EXPORT_SYMBOL(sock_map_fd);
2256EXPORT_SYMBOL(sock_recvmsg);
2257EXPORT_SYMBOL(sock_register);
2258EXPORT_SYMBOL(sock_release);
2259EXPORT_SYMBOL(sock_sendmsg);
2260EXPORT_SYMBOL(sock_unregister);
2261EXPORT_SYMBOL(sock_wake_async);
2262EXPORT_SYMBOL(sockfd_lookup);
2263EXPORT_SYMBOL(kernel_sendmsg);
2264EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
2265EXPORT_SYMBOL(kernel_bind);
2266EXPORT_SYMBOL(kernel_listen);
2267EXPORT_SYMBOL(kernel_accept);
2268EXPORT_SYMBOL(kernel_connect);
2269EXPORT_SYMBOL(kernel_getsockname);
2270EXPORT_SYMBOL(kernel_getpeername);
2271EXPORT_SYMBOL(kernel_getsockopt);
2272EXPORT_SYMBOL(kernel_setsockopt);
2273EXPORT_SYMBOL(kernel_sendpage);
2274EXPORT_SYMBOL(kernel_sock_ioctl);