ipv6 netns: Make several "global" sysctl variables namespace aware.
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
55737fda 66#include <linux/rcupdate.h>
1da177e4
LT
67#include <linux/netdevice.h>
68#include <linux/proc_fs.h>
69#include <linux/seq_file.h>
4a3e2f71 70#include <linux/mutex.h>
1da177e4
LT
71#include <linux/wanrouter.h>
72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
1da177e4
LT
75#include <linux/init.h>
76#include <linux/poll.h>
77#include <linux/cache.h>
78#include <linux/module.h>
79#include <linux/highmem.h>
1da177e4
LT
80#include <linux/mount.h>
81#include <linux/security.h>
82#include <linux/syscalls.h>
83#include <linux/compat.h>
84#include <linux/kmod.h>
3ec3b2fb 85#include <linux/audit.h>
d86b5e0e 86#include <linux/wireless.h>
1b8d7ae4 87#include <linux/nsproxy.h>
1da177e4
LT
88
89#include <asm/uaccess.h>
90#include <asm/unistd.h>
91
92#include <net/compat.h>
87de87d5 93#include <net/wext.h>
1da177e4
LT
94
95#include <net/sock.h>
96#include <linux/netfilter.h>
97
98static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
99static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
100 unsigned long nr_segs, loff_t pos);
101static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
102 unsigned long nr_segs, loff_t pos);
89bddce5 103static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
104
105static int sock_close(struct inode *inode, struct file *file);
106static unsigned int sock_poll(struct file *file,
107 struct poll_table_struct *wait);
89bddce5 108static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
109#ifdef CONFIG_COMPAT
110static long compat_sock_ioctl(struct file *file,
89bddce5 111 unsigned int cmd, unsigned long arg);
89bbfc95 112#endif
1da177e4 113static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
114static ssize_t sock_sendpage(struct file *file, struct page *page,
115 int offset, size_t size, loff_t *ppos, int more);
9c55e01c
JA
116static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
117 struct pipe_inode_info *pipe, size_t len,
118 unsigned int flags);
1da177e4 119
1da177e4
LT
120/*
121 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
122 * in the operation structures but are done directly via the socketcall() multiplexor.
123 */
124
da7071d7 125static const struct file_operations socket_file_ops = {
1da177e4
LT
126 .owner = THIS_MODULE,
127 .llseek = no_llseek,
128 .aio_read = sock_aio_read,
129 .aio_write = sock_aio_write,
130 .poll = sock_poll,
131 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
132#ifdef CONFIG_COMPAT
133 .compat_ioctl = compat_sock_ioctl,
134#endif
1da177e4
LT
135 .mmap = sock_mmap,
136 .open = sock_no_open, /* special open code to disallow open via /proc */
137 .release = sock_close,
138 .fasync = sock_fasync,
5274f052
JA
139 .sendpage = sock_sendpage,
140 .splice_write = generic_splice_sendpage,
9c55e01c 141 .splice_read = sock_splice_read,
1da177e4
LT
142};
143
144/*
145 * The protocol list. Each protocol is registered in here.
146 */
147
1da177e4 148static DEFINE_SPINLOCK(net_family_lock);
f0fd27d4 149static const struct net_proto_family *net_families[NPROTO] __read_mostly;
1da177e4 150
1da177e4
LT
151/*
152 * Statistics counters of the socket lists
153 */
154
155static DEFINE_PER_CPU(int, sockets_in_use) = 0;
156
157/*
89bddce5
SH
158 * Support routines.
159 * Move socket addresses back and forth across the kernel/user
160 * divide and look after the messy bits.
1da177e4
LT
161 */
162
89bddce5 163#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
164 16 for IP, 16 for IPX,
165 24 for IPv6,
89bddce5 166 about 80 for AX.25
1da177e4
LT
167 must be at least one bigger than
168 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 169 :unix_mkname()).
1da177e4 170 */
89bddce5 171
1da177e4
LT
172/**
173 * move_addr_to_kernel - copy a socket address into kernel space
174 * @uaddr: Address in user space
175 * @kaddr: Address in kernel space
176 * @ulen: Length in user space
177 *
178 * The address is copied into kernel space. If the provided address is
179 * too long an error code of -EINVAL is returned. If the copy gives
180 * invalid addresses -EFAULT is returned. On a success 0 is returned.
181 */
182
183int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
184{
89bddce5 185 if (ulen < 0 || ulen > MAX_SOCK_ADDR)
1da177e4 186 return -EINVAL;
89bddce5 187 if (ulen == 0)
1da177e4 188 return 0;
89bddce5 189 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 190 return -EFAULT;
3ec3b2fb 191 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
192}
193
194/**
195 * move_addr_to_user - copy an address to user space
196 * @kaddr: kernel space address
197 * @klen: length of address in kernel
198 * @uaddr: user space address
199 * @ulen: pointer to user length field
200 *
201 * The value pointed to by ulen on entry is the buffer length available.
202 * This is overwritten with the buffer space used. -EINVAL is returned
203 * if an overlong buffer is specified or a negative buffer size. -EFAULT
204 * is returned if either the buffer or the length field are not
205 * accessible.
206 * After copying the data up to the limit the user specifies, the true
207 * length of the data is written over the length limit the user
208 * specified. Zero is returned for a success.
209 */
89bddce5
SH
210
211int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
212 int __user *ulen)
1da177e4
LT
213{
214 int err;
215 int len;
216
89bddce5
SH
217 err = get_user(len, ulen);
218 if (err)
1da177e4 219 return err;
89bddce5
SH
220 if (len > klen)
221 len = klen;
222 if (len < 0 || len > MAX_SOCK_ADDR)
1da177e4 223 return -EINVAL;
89bddce5 224 if (len) {
d6fe3945
SG
225 if (audit_sockaddr(klen, kaddr))
226 return -ENOMEM;
89bddce5 227 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
228 return -EFAULT;
229 }
230 /*
89bddce5
SH
231 * "fromlen shall refer to the value before truncation.."
232 * 1003.1g
1da177e4
LT
233 */
234 return __put_user(klen, ulen);
235}
236
237#define SOCKFS_MAGIC 0x534F434B
238
e18b890b 239static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
240
241static struct inode *sock_alloc_inode(struct super_block *sb)
242{
243 struct socket_alloc *ei;
89bddce5 244
e94b1766 245 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
246 if (!ei)
247 return NULL;
248 init_waitqueue_head(&ei->socket.wait);
89bddce5 249
1da177e4
LT
250 ei->socket.fasync_list = NULL;
251 ei->socket.state = SS_UNCONNECTED;
252 ei->socket.flags = 0;
253 ei->socket.ops = NULL;
254 ei->socket.sk = NULL;
255 ei->socket.file = NULL;
1da177e4
LT
256
257 return &ei->vfs_inode;
258}
259
260static void sock_destroy_inode(struct inode *inode)
261{
262 kmem_cache_free(sock_inode_cachep,
263 container_of(inode, struct socket_alloc, vfs_inode));
264}
265
4ba9b9d0 266static void init_once(struct kmem_cache *cachep, void *foo)
1da177e4 267{
89bddce5 268 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 269
a35afb83 270 inode_init_once(&ei->vfs_inode);
1da177e4 271}
89bddce5 272
1da177e4
LT
273static int init_inodecache(void)
274{
275 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
276 sizeof(struct socket_alloc),
277 0,
278 (SLAB_HWCACHE_ALIGN |
279 SLAB_RECLAIM_ACCOUNT |
280 SLAB_MEM_SPREAD),
20c2df83 281 init_once);
1da177e4
LT
282 if (sock_inode_cachep == NULL)
283 return -ENOMEM;
284 return 0;
285}
286
287static struct super_operations sockfs_ops = {
288 .alloc_inode = sock_alloc_inode,
289 .destroy_inode =sock_destroy_inode,
290 .statfs = simple_statfs,
291};
292
454e2398 293static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
294 int flags, const char *dev_name, void *data,
295 struct vfsmount *mnt)
1da177e4 296{
454e2398
DH
297 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
298 mnt);
1da177e4
LT
299}
300
ba89966c 301static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
302
303static struct file_system_type sock_fs_type = {
304 .name = "sockfs",
305 .get_sb = sockfs_get_sb,
306 .kill_sb = kill_anon_super,
307};
89bddce5 308
1da177e4
LT
309static int sockfs_delete_dentry(struct dentry *dentry)
310{
304e61e6
ED
311 /*
312 * At creation time, we pretended this dentry was hashed
313 * (by clearing DCACHE_UNHASHED bit in d_flags)
314 * At delete time, we restore the truth : not hashed.
315 * (so that dput() can proceed correctly)
316 */
317 dentry->d_flags |= DCACHE_UNHASHED;
318 return 0;
1da177e4 319}
c23fbb6b
ED
320
321/*
322 * sockfs_dname() is called from d_path().
323 */
324static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
325{
326 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
327 dentry->d_inode->i_ino);
328}
329
1da177e4 330static struct dentry_operations sockfs_dentry_operations = {
89bddce5 331 .d_delete = sockfs_delete_dentry,
c23fbb6b 332 .d_dname = sockfs_dname,
1da177e4
LT
333};
334
335/*
336 * Obtains the first available file descriptor and sets it up for use.
337 *
39d8c1b6
DM
338 * These functions create file structures and maps them to fd space
339 * of the current process. On success it returns file descriptor
1da177e4
LT
340 * and file struct implicitly stored in sock->file.
341 * Note that another thread may close file descriptor before we return
342 * from this function. We use the fact that now we do not refer
343 * to socket after mapping. If one day we will need it, this
344 * function will increment ref. count on file by 1.
345 *
346 * In any case returned fd MAY BE not valid!
347 * This race condition is unavoidable
348 * with shared fd spaces, we cannot solve it inside kernel,
349 * but we take care of internal coherence yet.
350 */
351
39d8c1b6 352static int sock_alloc_fd(struct file **filep)
1da177e4
LT
353{
354 int fd;
1da177e4
LT
355
356 fd = get_unused_fd();
39d8c1b6 357 if (likely(fd >= 0)) {
1da177e4
LT
358 struct file *file = get_empty_filp();
359
39d8c1b6
DM
360 *filep = file;
361 if (unlikely(!file)) {
1da177e4 362 put_unused_fd(fd);
39d8c1b6 363 return -ENFILE;
1da177e4 364 }
39d8c1b6
DM
365 } else
366 *filep = NULL;
367 return fd;
368}
1da177e4 369
39d8c1b6
DM
370static int sock_attach_fd(struct socket *sock, struct file *file)
371{
ce8d2cdf 372 struct dentry *dentry;
c23fbb6b 373 struct qstr name = { .name = "" };
39d8c1b6 374
ce8d2cdf
DH
375 dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
376 if (unlikely(!dentry))
39d8c1b6
DM
377 return -ENOMEM;
378
ce8d2cdf 379 dentry->d_op = &sockfs_dentry_operations;
304e61e6
ED
380 /*
381 * We dont want to push this dentry into global dentry hash table.
382 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
383 * This permits a working /proc/$pid/fd/XXX on sockets
384 */
ce8d2cdf
DH
385 dentry->d_flags &= ~DCACHE_UNHASHED;
386 d_instantiate(dentry, SOCK_INODE(sock));
39d8c1b6
DM
387
388 sock->file = file;
ce8d2cdf
DH
389 init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE,
390 &socket_file_ops);
391 SOCK_INODE(sock)->i_fop = &socket_file_ops;
39d8c1b6
DM
392 file->f_flags = O_RDWR;
393 file->f_pos = 0;
394 file->private_data = sock;
1da177e4 395
39d8c1b6
DM
396 return 0;
397}
398
399int sock_map_fd(struct socket *sock)
400{
401 struct file *newfile;
402 int fd = sock_alloc_fd(&newfile);
403
404 if (likely(fd >= 0)) {
405 int err = sock_attach_fd(sock, newfile);
406
407 if (unlikely(err < 0)) {
408 put_filp(newfile);
1da177e4 409 put_unused_fd(fd);
39d8c1b6 410 return err;
1da177e4 411 }
39d8c1b6 412 fd_install(fd, newfile);
1da177e4 413 }
1da177e4
LT
414 return fd;
415}
416
6cb153ca
BL
417static struct socket *sock_from_file(struct file *file, int *err)
418{
6cb153ca
BL
419 if (file->f_op == &socket_file_ops)
420 return file->private_data; /* set in sock_map_fd */
421
23bb80d2
ED
422 *err = -ENOTSOCK;
423 return NULL;
6cb153ca
BL
424}
425
1da177e4
LT
426/**
427 * sockfd_lookup - Go from a file number to its socket slot
428 * @fd: file handle
429 * @err: pointer to an error code return
430 *
431 * The file handle passed in is locked and the socket it is bound
432 * too is returned. If an error occurs the err pointer is overwritten
433 * with a negative errno code and NULL is returned. The function checks
434 * for both invalid handles and passing a handle which is not a socket.
435 *
436 * On a success the socket object pointer is returned.
437 */
438
439struct socket *sockfd_lookup(int fd, int *err)
440{
441 struct file *file;
1da177e4
LT
442 struct socket *sock;
443
89bddce5
SH
444 file = fget(fd);
445 if (!file) {
1da177e4
LT
446 *err = -EBADF;
447 return NULL;
448 }
89bddce5 449
6cb153ca
BL
450 sock = sock_from_file(file, err);
451 if (!sock)
1da177e4 452 fput(file);
6cb153ca
BL
453 return sock;
454}
1da177e4 455
6cb153ca
BL
456static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
457{
458 struct file *file;
459 struct socket *sock;
460
3672558c 461 *err = -EBADF;
6cb153ca
BL
462 file = fget_light(fd, fput_needed);
463 if (file) {
464 sock = sock_from_file(file, err);
465 if (sock)
466 return sock;
467 fput_light(file, *fput_needed);
1da177e4 468 }
6cb153ca 469 return NULL;
1da177e4
LT
470}
471
472/**
473 * sock_alloc - allocate a socket
89bddce5 474 *
1da177e4
LT
475 * Allocate a new inode and socket object. The two are bound together
476 * and initialised. The socket is then returned. If we are out of inodes
477 * NULL is returned.
478 */
479
480static struct socket *sock_alloc(void)
481{
89bddce5
SH
482 struct inode *inode;
483 struct socket *sock;
1da177e4
LT
484
485 inode = new_inode(sock_mnt->mnt_sb);
486 if (!inode)
487 return NULL;
488
489 sock = SOCKET_I(inode);
490
89bddce5 491 inode->i_mode = S_IFSOCK | S_IRWXUGO;
1da177e4
LT
492 inode->i_uid = current->fsuid;
493 inode->i_gid = current->fsgid;
494
495 get_cpu_var(sockets_in_use)++;
496 put_cpu_var(sockets_in_use);
497 return sock;
498}
499
500/*
501 * In theory you can't get an open on this inode, but /proc provides
502 * a back door. Remember to keep it shut otherwise you'll let the
503 * creepy crawlies in.
504 */
89bddce5 505
1da177e4
LT
506static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
507{
508 return -ENXIO;
509}
510
4b6f5d20 511const struct file_operations bad_sock_fops = {
1da177e4
LT
512 .owner = THIS_MODULE,
513 .open = sock_no_open,
514};
515
516/**
517 * sock_release - close a socket
518 * @sock: socket to close
519 *
520 * The socket is released from the protocol stack if it has a release
521 * callback, and the inode is then released if the socket is bound to
89bddce5 522 * an inode not a file.
1da177e4 523 */
89bddce5 524
1da177e4
LT
525void sock_release(struct socket *sock)
526{
527 if (sock->ops) {
528 struct module *owner = sock->ops->owner;
529
530 sock->ops->release(sock);
531 sock->ops = NULL;
532 module_put(owner);
533 }
534
535 if (sock->fasync_list)
536 printk(KERN_ERR "sock_release: fasync list not empty!\n");
537
538 get_cpu_var(sockets_in_use)--;
539 put_cpu_var(sockets_in_use);
540 if (!sock->file) {
541 iput(SOCK_INODE(sock));
542 return;
543 }
89bddce5 544 sock->file = NULL;
1da177e4
LT
545}
546
89bddce5 547static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
548 struct msghdr *msg, size_t size)
549{
550 struct sock_iocb *si = kiocb_to_siocb(iocb);
551 int err;
552
553 si->sock = sock;
554 si->scm = NULL;
555 si->msg = msg;
556 si->size = size;
557
558 err = security_socket_sendmsg(sock, msg, size);
559 if (err)
560 return err;
561
562 return sock->ops->sendmsg(iocb, sock, msg, size);
563}
564
565int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
566{
567 struct kiocb iocb;
568 struct sock_iocb siocb;
569 int ret;
570
571 init_sync_kiocb(&iocb, NULL);
572 iocb.private = &siocb;
573 ret = __sock_sendmsg(&iocb, sock, msg, size);
574 if (-EIOCBQUEUED == ret)
575 ret = wait_on_sync_kiocb(&iocb);
576 return ret;
577}
578
579int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
580 struct kvec *vec, size_t num, size_t size)
581{
582 mm_segment_t oldfs = get_fs();
583 int result;
584
585 set_fs(KERNEL_DS);
586 /*
587 * the following is safe, since for compiler definitions of kvec and
588 * iovec are identical, yielding the same in-core layout and alignment
589 */
89bddce5 590 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
591 msg->msg_iovlen = num;
592 result = sock_sendmsg(sock, msg, size);
593 set_fs(oldfs);
594 return result;
595}
596
92f37fd2
ED
597/*
598 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
599 */
600void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
601 struct sk_buff *skb)
602{
603 ktime_t kt = skb->tstamp;
604
605 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
606 struct timeval tv;
607 /* Race occurred between timestamp enabling and packet
608 receiving. Fill in the current time for now. */
609 if (kt.tv64 == 0)
610 kt = ktime_get_real();
611 skb->tstamp = kt;
612 tv = ktime_to_timeval(kt);
613 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
614 } else {
615 struct timespec ts;
616 /* Race occurred between timestamp enabling and packet
617 receiving. Fill in the current time for now. */
618 if (kt.tv64 == 0)
619 kt = ktime_get_real();
620 skb->tstamp = kt;
621 ts = ktime_to_timespec(kt);
622 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
623 }
624}
625
7c81fd8b
ACM
626EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
627
89bddce5 628static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
629 struct msghdr *msg, size_t size, int flags)
630{
631 int err;
632 struct sock_iocb *si = kiocb_to_siocb(iocb);
633
634 si->sock = sock;
635 si->scm = NULL;
636 si->msg = msg;
637 si->size = size;
638 si->flags = flags;
639
640 err = security_socket_recvmsg(sock, msg, size, flags);
641 if (err)
642 return err;
643
644 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
645}
646
89bddce5 647int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
648 size_t size, int flags)
649{
650 struct kiocb iocb;
651 struct sock_iocb siocb;
652 int ret;
653
89bddce5 654 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
655 iocb.private = &siocb;
656 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
657 if (-EIOCBQUEUED == ret)
658 ret = wait_on_sync_kiocb(&iocb);
659 return ret;
660}
661
89bddce5
SH
662int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
663 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
664{
665 mm_segment_t oldfs = get_fs();
666 int result;
667
668 set_fs(KERNEL_DS);
669 /*
670 * the following is safe, since for compiler definitions of kvec and
671 * iovec are identical, yielding the same in-core layout and alignment
672 */
89bddce5 673 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
674 result = sock_recvmsg(sock, msg, size, flags);
675 set_fs(oldfs);
676 return result;
677}
678
679static void sock_aio_dtor(struct kiocb *iocb)
680{
681 kfree(iocb->private);
682}
683
ce1d4d3e
CH
684static ssize_t sock_sendpage(struct file *file, struct page *page,
685 int offset, size_t size, loff_t *ppos, int more)
1da177e4 686{
1da177e4
LT
687 struct socket *sock;
688 int flags;
689
ce1d4d3e
CH
690 sock = file->private_data;
691
692 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
693 if (more)
694 flags |= MSG_MORE;
695
696 return sock->ops->sendpage(sock, page, offset, size, flags);
697}
1da177e4 698
9c55e01c
JA
699static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
700 struct pipe_inode_info *pipe, size_t len,
701 unsigned int flags)
702{
703 struct socket *sock = file->private_data;
704
997b37da
RDC
705 if (unlikely(!sock->ops->splice_read))
706 return -EINVAL;
707
9c55e01c
JA
708 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
709}
710
ce1d4d3e 711static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 712 struct sock_iocb *siocb)
ce1d4d3e
CH
713{
714 if (!is_sync_kiocb(iocb)) {
715 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
716 if (!siocb)
717 return NULL;
1da177e4
LT
718 iocb->ki_dtor = sock_aio_dtor;
719 }
1da177e4 720
ce1d4d3e 721 siocb->kiocb = iocb;
ce1d4d3e
CH
722 iocb->private = siocb;
723 return siocb;
1da177e4
LT
724}
725
ce1d4d3e 726static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
727 struct file *file, const struct iovec *iov,
728 unsigned long nr_segs)
ce1d4d3e
CH
729{
730 struct socket *sock = file->private_data;
731 size_t size = 0;
732 int i;
1da177e4 733
89bddce5
SH
734 for (i = 0; i < nr_segs; i++)
735 size += iov[i].iov_len;
1da177e4 736
ce1d4d3e
CH
737 msg->msg_name = NULL;
738 msg->msg_namelen = 0;
739 msg->msg_control = NULL;
740 msg->msg_controllen = 0;
89bddce5 741 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
742 msg->msg_iovlen = nr_segs;
743 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
744
745 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
746}
747
027445c3
BP
748static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
749 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
750{
751 struct sock_iocb siocb, *x;
752
1da177e4
LT
753 if (pos != 0)
754 return -ESPIPE;
027445c3
BP
755
756 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
757 return 0;
758
027445c3
BP
759
760 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
761 if (!x)
762 return -ENOMEM;
027445c3 763 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
764}
765
ce1d4d3e 766static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
767 struct file *file, const struct iovec *iov,
768 unsigned long nr_segs)
1da177e4 769{
ce1d4d3e
CH
770 struct socket *sock = file->private_data;
771 size_t size = 0;
772 int i;
1da177e4 773
89bddce5
SH
774 for (i = 0; i < nr_segs; i++)
775 size += iov[i].iov_len;
1da177e4 776
ce1d4d3e
CH
777 msg->msg_name = NULL;
778 msg->msg_namelen = 0;
779 msg->msg_control = NULL;
780 msg->msg_controllen = 0;
89bddce5 781 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
782 msg->msg_iovlen = nr_segs;
783 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
784 if (sock->type == SOCK_SEQPACKET)
785 msg->msg_flags |= MSG_EOR;
1da177e4 786
ce1d4d3e 787 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
788}
789
027445c3
BP
790static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
791 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
792{
793 struct sock_iocb siocb, *x;
1da177e4 794
ce1d4d3e
CH
795 if (pos != 0)
796 return -ESPIPE;
027445c3 797
027445c3 798 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
799 if (!x)
800 return -ENOMEM;
1da177e4 801
027445c3 802 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
803}
804
1da177e4
LT
805/*
806 * Atomic setting of ioctl hooks to avoid race
807 * with module unload.
808 */
809
4a3e2f71 810static DEFINE_MUTEX(br_ioctl_mutex);
881d966b 811static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL;
1da177e4 812
881d966b 813void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 814{
4a3e2f71 815 mutex_lock(&br_ioctl_mutex);
1da177e4 816 br_ioctl_hook = hook;
4a3e2f71 817 mutex_unlock(&br_ioctl_mutex);
1da177e4 818}
89bddce5 819
1da177e4
LT
820EXPORT_SYMBOL(brioctl_set);
821
4a3e2f71 822static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 823static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 824
881d966b 825void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 826{
4a3e2f71 827 mutex_lock(&vlan_ioctl_mutex);
1da177e4 828 vlan_ioctl_hook = hook;
4a3e2f71 829 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 830}
89bddce5 831
1da177e4
LT
832EXPORT_SYMBOL(vlan_ioctl_set);
833
4a3e2f71 834static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 835static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 836
89bddce5 837void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 838{
4a3e2f71 839 mutex_lock(&dlci_ioctl_mutex);
1da177e4 840 dlci_ioctl_hook = hook;
4a3e2f71 841 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 842}
89bddce5 843
1da177e4
LT
844EXPORT_SYMBOL(dlci_ioctl_set);
845
846/*
847 * With an ioctl, arg may well be a user mode pointer, but we don't know
848 * what to do with it - that's up to the protocol still.
849 */
850
851static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
852{
853 struct socket *sock;
881d966b 854 struct sock *sk;
1da177e4
LT
855 void __user *argp = (void __user *)arg;
856 int pid, err;
881d966b 857 struct net *net;
1da177e4 858
b69aee04 859 sock = file->private_data;
881d966b 860 sk = sock->sk;
3b1e0a65 861 net = sock_net(sk);
1da177e4 862 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 863 err = dev_ioctl(net, cmd, argp);
1da177e4 864 } else
d86b5e0e 865#ifdef CONFIG_WIRELESS_EXT
1da177e4 866 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 867 err = dev_ioctl(net, cmd, argp);
1da177e4 868 } else
89bddce5
SH
869#endif /* CONFIG_WIRELESS_EXT */
870 switch (cmd) {
1da177e4
LT
871 case FIOSETOWN:
872 case SIOCSPGRP:
873 err = -EFAULT;
874 if (get_user(pid, (int __user *)argp))
875 break;
876 err = f_setown(sock->file, pid, 1);
877 break;
878 case FIOGETOWN:
879 case SIOCGPGRP:
609d7fa9 880 err = put_user(f_getown(sock->file),
89bddce5 881 (int __user *)argp);
1da177e4
LT
882 break;
883 case SIOCGIFBR:
884 case SIOCSIFBR:
885 case SIOCBRADDBR:
886 case SIOCBRDELBR:
887 err = -ENOPKG;
888 if (!br_ioctl_hook)
889 request_module("bridge");
890
4a3e2f71 891 mutex_lock(&br_ioctl_mutex);
89bddce5 892 if (br_ioctl_hook)
881d966b 893 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 894 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
895 break;
896 case SIOCGIFVLAN:
897 case SIOCSIFVLAN:
898 err = -ENOPKG;
899 if (!vlan_ioctl_hook)
900 request_module("8021q");
901
4a3e2f71 902 mutex_lock(&vlan_ioctl_mutex);
1da177e4 903 if (vlan_ioctl_hook)
881d966b 904 err = vlan_ioctl_hook(net, argp);
4a3e2f71 905 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 906 break;
1da177e4
LT
907 case SIOCADDDLCI:
908 case SIOCDELDLCI:
909 err = -ENOPKG;
910 if (!dlci_ioctl_hook)
911 request_module("dlci");
912
7512cbf6
PE
913 mutex_lock(&dlci_ioctl_mutex);
914 if (dlci_ioctl_hook)
1da177e4 915 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 916 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
917 break;
918 default:
919 err = sock->ops->ioctl(sock, cmd, arg);
b5e5fa5e
CH
920
921 /*
922 * If this ioctl is unknown try to hand it down
923 * to the NIC driver.
924 */
925 if (err == -ENOIOCTLCMD)
881d966b 926 err = dev_ioctl(net, cmd, argp);
1da177e4 927 break;
89bddce5 928 }
1da177e4
LT
929 return err;
930}
931
932int sock_create_lite(int family, int type, int protocol, struct socket **res)
933{
934 int err;
935 struct socket *sock = NULL;
89bddce5 936
1da177e4
LT
937 err = security_socket_create(family, type, protocol, 1);
938 if (err)
939 goto out;
940
941 sock = sock_alloc();
942 if (!sock) {
943 err = -ENOMEM;
944 goto out;
945 }
946
1da177e4 947 sock->type = type;
7420ed23
VY
948 err = security_socket_post_create(sock, family, type, protocol, 1);
949 if (err)
950 goto out_release;
951
1da177e4
LT
952out:
953 *res = sock;
954 return err;
7420ed23
VY
955out_release:
956 sock_release(sock);
957 sock = NULL;
958 goto out;
1da177e4
LT
959}
960
961/* No kernel lock held - perfect */
89bddce5 962static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
963{
964 struct socket *sock;
965
966 /*
89bddce5 967 * We can't return errors to poll, so it's either yes or no.
1da177e4 968 */
b69aee04 969 sock = file->private_data;
1da177e4
LT
970 return sock->ops->poll(file, sock, wait);
971}
972
89bddce5 973static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 974{
b69aee04 975 struct socket *sock = file->private_data;
1da177e4
LT
976
977 return sock->ops->mmap(file, sock, vma);
978}
979
20380731 980static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
981{
982 /*
89bddce5
SH
983 * It was possible the inode is NULL we were
984 * closing an unfinished socket.
1da177e4
LT
985 */
986
89bddce5 987 if (!inode) {
1da177e4
LT
988 printk(KERN_DEBUG "sock_close: NULL inode\n");
989 return 0;
990 }
991 sock_fasync(-1, filp, 0);
992 sock_release(SOCKET_I(inode));
993 return 0;
994}
995
996/*
997 * Update the socket async list
998 *
999 * Fasync_list locking strategy.
1000 *
1001 * 1. fasync_list is modified only under process context socket lock
1002 * i.e. under semaphore.
1003 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
1004 * or under socket lock.
1005 * 3. fasync_list can be used from softirq context, so that
1006 * modification under socket lock have to be enhanced with
1007 * write_lock_bh(&sk->sk_callback_lock).
1008 * --ANK (990710)
1009 */
1010
1011static int sock_fasync(int fd, struct file *filp, int on)
1012{
89bddce5 1013 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
1014 struct socket *sock;
1015 struct sock *sk;
1016
89bddce5 1017 if (on) {
8b3a7005 1018 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 1019 if (fna == NULL)
1da177e4
LT
1020 return -ENOMEM;
1021 }
1022
b69aee04 1023 sock = filp->private_data;
1da177e4 1024
89bddce5
SH
1025 sk = sock->sk;
1026 if (sk == NULL) {
1da177e4
LT
1027 kfree(fna);
1028 return -EINVAL;
1029 }
1030
1031 lock_sock(sk);
1032
89bddce5 1033 prev = &(sock->fasync_list);
1da177e4 1034
89bddce5
SH
1035 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1036 if (fa->fa_file == filp)
1da177e4
LT
1037 break;
1038
89bddce5
SH
1039 if (on) {
1040 if (fa != NULL) {
1da177e4 1041 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1042 fa->fa_fd = fd;
1da177e4
LT
1043 write_unlock_bh(&sk->sk_callback_lock);
1044
1045 kfree(fna);
1046 goto out;
1047 }
89bddce5
SH
1048 fna->fa_file = filp;
1049 fna->fa_fd = fd;
1050 fna->magic = FASYNC_MAGIC;
1051 fna->fa_next = sock->fasync_list;
1da177e4 1052 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1053 sock->fasync_list = fna;
1da177e4 1054 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1055 } else {
1056 if (fa != NULL) {
1da177e4 1057 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1058 *prev = fa->fa_next;
1da177e4
LT
1059 write_unlock_bh(&sk->sk_callback_lock);
1060 kfree(fa);
1061 }
1062 }
1063
1064out:
1065 release_sock(sock->sk);
1066 return 0;
1067}
1068
1069/* This function may be called only under socket lock or callback_lock */
1070
1071int sock_wake_async(struct socket *sock, int how, int band)
1072{
1073 if (!sock || !sock->fasync_list)
1074 return -1;
89bddce5 1075 switch (how) {
8d8ad9d7 1076 case SOCK_WAKE_WAITD:
1da177e4
LT
1077 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1078 break;
1079 goto call_kill;
8d8ad9d7 1080 case SOCK_WAKE_SPACE:
1da177e4
LT
1081 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1082 break;
1083 /* fall through */
8d8ad9d7 1084 case SOCK_WAKE_IO:
89bddce5 1085call_kill:
1da177e4
LT
1086 __kill_fasync(sock->fasync_list, SIGIO, band);
1087 break;
8d8ad9d7 1088 case SOCK_WAKE_URG:
1da177e4
LT
1089 __kill_fasync(sock->fasync_list, SIGURG, band);
1090 }
1091 return 0;
1092}
1093
1b8d7ae4 1094static int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1095 struct socket **res, int kern)
1da177e4
LT
1096{
1097 int err;
1098 struct socket *sock;
55737fda 1099 const struct net_proto_family *pf;
1da177e4
LT
1100
1101 /*
89bddce5 1102 * Check protocol is in range
1da177e4
LT
1103 */
1104 if (family < 0 || family >= NPROTO)
1105 return -EAFNOSUPPORT;
1106 if (type < 0 || type >= SOCK_MAX)
1107 return -EINVAL;
1108
1109 /* Compatibility.
1110
1111 This uglymoron is moved from INET layer to here to avoid
1112 deadlock in module load.
1113 */
1114 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1115 static int warned;
1da177e4
LT
1116 if (!warned) {
1117 warned = 1;
89bddce5
SH
1118 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1119 current->comm);
1da177e4
LT
1120 }
1121 family = PF_PACKET;
1122 }
1123
1124 err = security_socket_create(family, type, protocol, kern);
1125 if (err)
1126 return err;
89bddce5 1127
55737fda
SH
1128 /*
1129 * Allocate the socket and allow the family to set things up. if
1130 * the protocol is 0, the family is instructed to select an appropriate
1131 * default.
1132 */
1133 sock = sock_alloc();
1134 if (!sock) {
1135 if (net_ratelimit())
1136 printk(KERN_WARNING "socket: no more sockets\n");
1137 return -ENFILE; /* Not exactly a match, but its the
1138 closest posix thing */
1139 }
1140
1141 sock->type = type;
1142
1da177e4 1143#if defined(CONFIG_KMOD)
89bddce5
SH
1144 /* Attempt to load a protocol module if the find failed.
1145 *
1146 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1147 * requested real, full-featured networking support upon configuration.
1148 * Otherwise module support will break!
1149 */
55737fda 1150 if (net_families[family] == NULL)
89bddce5 1151 request_module("net-pf-%d", family);
1da177e4
LT
1152#endif
1153
55737fda
SH
1154 rcu_read_lock();
1155 pf = rcu_dereference(net_families[family]);
1156 err = -EAFNOSUPPORT;
1157 if (!pf)
1158 goto out_release;
1da177e4
LT
1159
1160 /*
1161 * We will call the ->create function, that possibly is in a loadable
1162 * module, so we have to bump that loadable module refcnt first.
1163 */
55737fda 1164 if (!try_module_get(pf->owner))
1da177e4
LT
1165 goto out_release;
1166
55737fda
SH
1167 /* Now protected by module ref count */
1168 rcu_read_unlock();
1169
1b8d7ae4 1170 err = pf->create(net, sock, protocol);
55737fda 1171 if (err < 0)
1da177e4 1172 goto out_module_put;
a79af59e 1173
1da177e4
LT
1174 /*
1175 * Now to bump the refcnt of the [loadable] module that owns this
1176 * socket at sock_release time we decrement its refcnt.
1177 */
55737fda
SH
1178 if (!try_module_get(sock->ops->owner))
1179 goto out_module_busy;
1180
1da177e4
LT
1181 /*
1182 * Now that we're done with the ->create function, the [loadable]
1183 * module can have its refcnt decremented
1184 */
55737fda 1185 module_put(pf->owner);
7420ed23
VY
1186 err = security_socket_post_create(sock, family, type, protocol, kern);
1187 if (err)
3b185525 1188 goto out_sock_release;
55737fda 1189 *res = sock;
1da177e4 1190
55737fda
SH
1191 return 0;
1192
1193out_module_busy:
1194 err = -EAFNOSUPPORT;
1da177e4 1195out_module_put:
55737fda
SH
1196 sock->ops = NULL;
1197 module_put(pf->owner);
1198out_sock_release:
1da177e4 1199 sock_release(sock);
55737fda
SH
1200 return err;
1201
1202out_release:
1203 rcu_read_unlock();
1204 goto out_sock_release;
1da177e4
LT
1205}
1206
1207int sock_create(int family, int type, int protocol, struct socket **res)
1208{
1b8d7ae4 1209 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4
LT
1210}
1211
1212int sock_create_kern(int family, int type, int protocol, struct socket **res)
1213{
1b8d7ae4 1214 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4
LT
1215}
1216
1217asmlinkage long sys_socket(int family, int type, int protocol)
1218{
1219 int retval;
1220 struct socket *sock;
1221
1222 retval = sock_create(family, type, protocol, &sock);
1223 if (retval < 0)
1224 goto out;
1225
1226 retval = sock_map_fd(sock);
1227 if (retval < 0)
1228 goto out_release;
1229
1230out:
1231 /* It may be already another descriptor 8) Not kernel problem. */
1232 return retval;
1233
1234out_release:
1235 sock_release(sock);
1236 return retval;
1237}
1238
1239/*
1240 * Create a pair of connected sockets.
1241 */
1242
89bddce5
SH
1243asmlinkage long sys_socketpair(int family, int type, int protocol,
1244 int __user *usockvec)
1da177e4
LT
1245{
1246 struct socket *sock1, *sock2;
1247 int fd1, fd2, err;
db349509 1248 struct file *newfile1, *newfile2;
1da177e4
LT
1249
1250 /*
1251 * Obtain the first socket and check if the underlying protocol
1252 * supports the socketpair call.
1253 */
1254
1255 err = sock_create(family, type, protocol, &sock1);
1256 if (err < 0)
1257 goto out;
1258
1259 err = sock_create(family, type, protocol, &sock2);
1260 if (err < 0)
1261 goto out_release_1;
1262
1263 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1264 if (err < 0)
1da177e4
LT
1265 goto out_release_both;
1266
db349509 1267 fd1 = sock_alloc_fd(&newfile1);
bf3c23d1
DM
1268 if (unlikely(fd1 < 0)) {
1269 err = fd1;
db349509 1270 goto out_release_both;
bf3c23d1 1271 }
1da177e4 1272
db349509
AV
1273 fd2 = sock_alloc_fd(&newfile2);
1274 if (unlikely(fd2 < 0)) {
bf3c23d1 1275 err = fd2;
db349509
AV
1276 put_filp(newfile1);
1277 put_unused_fd(fd1);
1da177e4 1278 goto out_release_both;
db349509 1279 }
1da177e4 1280
db349509
AV
1281 err = sock_attach_fd(sock1, newfile1);
1282 if (unlikely(err < 0)) {
1283 goto out_fd2;
1284 }
1285
1286 err = sock_attach_fd(sock2, newfile2);
1287 if (unlikely(err < 0)) {
1288 fput(newfile1);
1289 goto out_fd1;
1290 }
1291
1292 err = audit_fd_pair(fd1, fd2);
1293 if (err < 0) {
1294 fput(newfile1);
1295 fput(newfile2);
1296 goto out_fd;
1297 }
1da177e4 1298
db349509
AV
1299 fd_install(fd1, newfile1);
1300 fd_install(fd2, newfile2);
1da177e4
LT
1301 /* fd1 and fd2 may be already another descriptors.
1302 * Not kernel problem.
1303 */
1304
89bddce5 1305 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1306 if (!err)
1307 err = put_user(fd2, &usockvec[1]);
1308 if (!err)
1309 return 0;
1310
1311 sys_close(fd2);
1312 sys_close(fd1);
1313 return err;
1314
1da177e4 1315out_release_both:
89bddce5 1316 sock_release(sock2);
1da177e4 1317out_release_1:
89bddce5 1318 sock_release(sock1);
1da177e4
LT
1319out:
1320 return err;
db349509
AV
1321
1322out_fd2:
1323 put_filp(newfile1);
1324 sock_release(sock1);
1325out_fd1:
1326 put_filp(newfile2);
1327 sock_release(sock2);
1328out_fd:
1329 put_unused_fd(fd1);
1330 put_unused_fd(fd2);
1331 goto out;
1da177e4
LT
1332}
1333
1da177e4
LT
1334/*
1335 * Bind a name to a socket. Nothing much to do here since it's
1336 * the protocol's responsibility to handle the local address.
1337 *
1338 * We move the socket address to kernel space before we call
1339 * the protocol layer (having also checked the address is ok).
1340 */
1341
1342asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1343{
1344 struct socket *sock;
1345 char address[MAX_SOCK_ADDR];
6cb153ca 1346 int err, fput_needed;
1da177e4 1347
89bddce5 1348 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1349 if (sock) {
89bddce5
SH
1350 err = move_addr_to_kernel(umyaddr, addrlen, address);
1351 if (err >= 0) {
1352 err = security_socket_bind(sock,
1353 (struct sockaddr *)address,
1354 addrlen);
6cb153ca
BL
1355 if (!err)
1356 err = sock->ops->bind(sock,
89bddce5
SH
1357 (struct sockaddr *)
1358 address, addrlen);
1da177e4 1359 }
6cb153ca 1360 fput_light(sock->file, fput_needed);
89bddce5 1361 }
1da177e4
LT
1362 return err;
1363}
1364
1da177e4
LT
1365/*
1366 * Perform a listen. Basically, we allow the protocol to do anything
1367 * necessary for a listen, and if that works, we mark the socket as
1368 * ready for listening.
1369 */
1370
1da177e4
LT
1371asmlinkage long sys_listen(int fd, int backlog)
1372{
1373 struct socket *sock;
6cb153ca 1374 int err, fput_needed;
b8e1f9b5 1375 int somaxconn;
89bddce5
SH
1376
1377 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1378 if (sock) {
8efa6e93 1379 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
b8e1f9b5
PE
1380 if ((unsigned)backlog > somaxconn)
1381 backlog = somaxconn;
1da177e4
LT
1382
1383 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1384 if (!err)
1385 err = sock->ops->listen(sock, backlog);
1da177e4 1386
6cb153ca 1387 fput_light(sock->file, fput_needed);
1da177e4
LT
1388 }
1389 return err;
1390}
1391
1da177e4
LT
1392/*
1393 * For accept, we attempt to create a new socket, set up the link
1394 * with the client, wake up the client, then return the new
1395 * connected fd. We collect the address of the connector in kernel
1396 * space and move it to user at the very end. This is unclean because
1397 * we open the socket then return an error.
1398 *
1399 * 1003.1g adds the ability to recvmsg() to query connection pending
1400 * status to recvmsg. We need to add that support in a way thats
1401 * clean when we restucture accept also.
1402 */
1403
89bddce5
SH
1404asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1405 int __user *upeer_addrlen)
1da177e4
LT
1406{
1407 struct socket *sock, *newsock;
39d8c1b6 1408 struct file *newfile;
6cb153ca 1409 int err, len, newfd, fput_needed;
1da177e4
LT
1410 char address[MAX_SOCK_ADDR];
1411
6cb153ca 1412 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1413 if (!sock)
1414 goto out;
1415
1416 err = -ENFILE;
89bddce5 1417 if (!(newsock = sock_alloc()))
1da177e4
LT
1418 goto out_put;
1419
1420 newsock->type = sock->type;
1421 newsock->ops = sock->ops;
1422
1da177e4
LT
1423 /*
1424 * We don't need try_module_get here, as the listening socket (sock)
1425 * has the protocol module (sock->ops->owner) held.
1426 */
1427 __module_get(newsock->ops->owner);
1428
39d8c1b6
DM
1429 newfd = sock_alloc_fd(&newfile);
1430 if (unlikely(newfd < 0)) {
1431 err = newfd;
9a1875e6
DM
1432 sock_release(newsock);
1433 goto out_put;
39d8c1b6
DM
1434 }
1435
1436 err = sock_attach_fd(newsock, newfile);
1437 if (err < 0)
79f4f642 1438 goto out_fd_simple;
39d8c1b6 1439
a79af59e
FF
1440 err = security_socket_accept(sock, newsock);
1441 if (err)
39d8c1b6 1442 goto out_fd;
a79af59e 1443
1da177e4
LT
1444 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1445 if (err < 0)
39d8c1b6 1446 goto out_fd;
1da177e4
LT
1447
1448 if (upeer_sockaddr) {
89bddce5
SH
1449 if (newsock->ops->getname(newsock, (struct sockaddr *)address,
1450 &len, 2) < 0) {
1da177e4 1451 err = -ECONNABORTED;
39d8c1b6 1452 goto out_fd;
1da177e4 1453 }
89bddce5
SH
1454 err = move_addr_to_user(address, len, upeer_sockaddr,
1455 upeer_addrlen);
1da177e4 1456 if (err < 0)
39d8c1b6 1457 goto out_fd;
1da177e4
LT
1458 }
1459
1460 /* File flags are not inherited via accept() unlike another OSes. */
1461
39d8c1b6
DM
1462 fd_install(newfd, newfile);
1463 err = newfd;
1da177e4
LT
1464
1465 security_socket_post_accept(sock, newsock);
1466
1467out_put:
6cb153ca 1468 fput_light(sock->file, fput_needed);
1da177e4
LT
1469out:
1470 return err;
79f4f642
AD
1471out_fd_simple:
1472 sock_release(newsock);
1473 put_filp(newfile);
1474 put_unused_fd(newfd);
1475 goto out_put;
39d8c1b6 1476out_fd:
9606a216 1477 fput(newfile);
39d8c1b6 1478 put_unused_fd(newfd);
1da177e4
LT
1479 goto out_put;
1480}
1481
1da177e4
LT
1482/*
1483 * Attempt to connect to a socket with the server address. The address
1484 * is in user space so we verify it is OK and move it to kernel space.
1485 *
1486 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1487 * break bindings
1488 *
1489 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1490 * other SEQPACKET protocols that take time to connect() as it doesn't
1491 * include the -EINPROGRESS status for such sockets.
1492 */
1493
89bddce5
SH
1494asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
1495 int addrlen)
1da177e4
LT
1496{
1497 struct socket *sock;
1498 char address[MAX_SOCK_ADDR];
6cb153ca 1499 int err, fput_needed;
1da177e4 1500
6cb153ca 1501 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1502 if (!sock)
1503 goto out;
1504 err = move_addr_to_kernel(uservaddr, addrlen, address);
1505 if (err < 0)
1506 goto out_put;
1507
89bddce5
SH
1508 err =
1509 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4
LT
1510 if (err)
1511 goto out_put;
1512
89bddce5 1513 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
1da177e4
LT
1514 sock->file->f_flags);
1515out_put:
6cb153ca 1516 fput_light(sock->file, fput_needed);
1da177e4
LT
1517out:
1518 return err;
1519}
1520
1521/*
1522 * Get the local address ('name') of a socket object. Move the obtained
1523 * name to user space.
1524 */
1525
89bddce5
SH
1526asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1527 int __user *usockaddr_len)
1da177e4
LT
1528{
1529 struct socket *sock;
1530 char address[MAX_SOCK_ADDR];
6cb153ca 1531 int len, err, fput_needed;
89bddce5 1532
6cb153ca 1533 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1534 if (!sock)
1535 goto out;
1536
1537 err = security_socket_getsockname(sock);
1538 if (err)
1539 goto out_put;
1540
1541 err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
1542 if (err)
1543 goto out_put;
1544 err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
1545
1546out_put:
6cb153ca 1547 fput_light(sock->file, fput_needed);
1da177e4
LT
1548out:
1549 return err;
1550}
1551
1552/*
1553 * Get the remote address ('name') of a socket object. Move the obtained
1554 * name to user space.
1555 */
1556
89bddce5
SH
1557asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1558 int __user *usockaddr_len)
1da177e4
LT
1559{
1560 struct socket *sock;
1561 char address[MAX_SOCK_ADDR];
6cb153ca 1562 int len, err, fput_needed;
1da177e4 1563
89bddce5
SH
1564 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1565 if (sock != NULL) {
1da177e4
LT
1566 err = security_socket_getpeername(sock);
1567 if (err) {
6cb153ca 1568 fput_light(sock->file, fput_needed);
1da177e4
LT
1569 return err;
1570 }
1571
89bddce5
SH
1572 err =
1573 sock->ops->getname(sock, (struct sockaddr *)address, &len,
1574 1);
1da177e4 1575 if (!err)
89bddce5
SH
1576 err = move_addr_to_user(address, len, usockaddr,
1577 usockaddr_len);
6cb153ca 1578 fput_light(sock->file, fput_needed);
1da177e4
LT
1579 }
1580 return err;
1581}
1582
1583/*
1584 * Send a datagram to a given address. We move the address into kernel
1585 * space and check the user space data area is readable before invoking
1586 * the protocol.
1587 */
1588
89bddce5
SH
1589asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1590 unsigned flags, struct sockaddr __user *addr,
1591 int addr_len)
1da177e4
LT
1592{
1593 struct socket *sock;
1594 char address[MAX_SOCK_ADDR];
1595 int err;
1596 struct msghdr msg;
1597 struct iovec iov;
6cb153ca 1598 int fput_needed;
6cb153ca 1599
de0fa95c
PE
1600 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1601 if (!sock)
4387ff75 1602 goto out;
6cb153ca 1603
89bddce5
SH
1604 iov.iov_base = buff;
1605 iov.iov_len = len;
1606 msg.msg_name = NULL;
1607 msg.msg_iov = &iov;
1608 msg.msg_iovlen = 1;
1609 msg.msg_control = NULL;
1610 msg.msg_controllen = 0;
1611 msg.msg_namelen = 0;
6cb153ca 1612 if (addr) {
1da177e4
LT
1613 err = move_addr_to_kernel(addr, addr_len, address);
1614 if (err < 0)
1615 goto out_put;
89bddce5
SH
1616 msg.msg_name = address;
1617 msg.msg_namelen = addr_len;
1da177e4
LT
1618 }
1619 if (sock->file->f_flags & O_NONBLOCK)
1620 flags |= MSG_DONTWAIT;
1621 msg.msg_flags = flags;
1622 err = sock_sendmsg(sock, &msg, len);
1623
89bddce5 1624out_put:
de0fa95c 1625 fput_light(sock->file, fput_needed);
4387ff75 1626out:
1da177e4
LT
1627 return err;
1628}
1629
1630/*
89bddce5 1631 * Send a datagram down a socket.
1da177e4
LT
1632 */
1633
89bddce5 1634asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
1da177e4
LT
1635{
1636 return sys_sendto(fd, buff, len, flags, NULL, 0);
1637}
1638
1639/*
89bddce5 1640 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1641 * sender. We verify the buffers are writable and if needed move the
1642 * sender address from kernel to user space.
1643 */
1644
89bddce5
SH
1645asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1646 unsigned flags, struct sockaddr __user *addr,
1647 int __user *addr_len)
1da177e4
LT
1648{
1649 struct socket *sock;
1650 struct iovec iov;
1651 struct msghdr msg;
1652 char address[MAX_SOCK_ADDR];
89bddce5 1653 int err, err2;
6cb153ca
BL
1654 int fput_needed;
1655
de0fa95c 1656 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1657 if (!sock)
de0fa95c 1658 goto out;
1da177e4 1659
89bddce5
SH
1660 msg.msg_control = NULL;
1661 msg.msg_controllen = 0;
1662 msg.msg_iovlen = 1;
1663 msg.msg_iov = &iov;
1664 iov.iov_len = size;
1665 iov.iov_base = ubuf;
1666 msg.msg_name = address;
1667 msg.msg_namelen = MAX_SOCK_ADDR;
1da177e4
LT
1668 if (sock->file->f_flags & O_NONBLOCK)
1669 flags |= MSG_DONTWAIT;
89bddce5 1670 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1671
89bddce5
SH
1672 if (err >= 0 && addr != NULL) {
1673 err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1674 if (err2 < 0)
1675 err = err2;
1da177e4 1676 }
de0fa95c
PE
1677
1678 fput_light(sock->file, fput_needed);
4387ff75 1679out:
1da177e4
LT
1680 return err;
1681}
1682
1683/*
89bddce5 1684 * Receive a datagram from a socket.
1da177e4
LT
1685 */
1686
89bddce5
SH
1687asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1688 unsigned flags)
1da177e4
LT
1689{
1690 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1691}
1692
1693/*
1694 * Set a socket option. Because we don't know the option lengths we have
1695 * to pass the user mode parameter for the protocols to sort out.
1696 */
1697
89bddce5
SH
1698asmlinkage long sys_setsockopt(int fd, int level, int optname,
1699 char __user *optval, int optlen)
1da177e4 1700{
6cb153ca 1701 int err, fput_needed;
1da177e4
LT
1702 struct socket *sock;
1703
1704 if (optlen < 0)
1705 return -EINVAL;
89bddce5
SH
1706
1707 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1708 if (sock != NULL) {
1709 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1710 if (err)
1711 goto out_put;
1da177e4
LT
1712
1713 if (level == SOL_SOCKET)
89bddce5
SH
1714 err =
1715 sock_setsockopt(sock, level, optname, optval,
1716 optlen);
1da177e4 1717 else
89bddce5
SH
1718 err =
1719 sock->ops->setsockopt(sock, level, optname, optval,
1720 optlen);
6cb153ca
BL
1721out_put:
1722 fput_light(sock->file, fput_needed);
1da177e4
LT
1723 }
1724 return err;
1725}
1726
1727/*
1728 * Get a socket option. Because we don't know the option lengths we have
1729 * to pass a user mode parameter for the protocols to sort out.
1730 */
1731
89bddce5
SH
1732asmlinkage long sys_getsockopt(int fd, int level, int optname,
1733 char __user *optval, int __user *optlen)
1da177e4 1734{
6cb153ca 1735 int err, fput_needed;
1da177e4
LT
1736 struct socket *sock;
1737
89bddce5
SH
1738 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1739 if (sock != NULL) {
6cb153ca
BL
1740 err = security_socket_getsockopt(sock, level, optname);
1741 if (err)
1742 goto out_put;
1da177e4
LT
1743
1744 if (level == SOL_SOCKET)
89bddce5
SH
1745 err =
1746 sock_getsockopt(sock, level, optname, optval,
1747 optlen);
1da177e4 1748 else
89bddce5
SH
1749 err =
1750 sock->ops->getsockopt(sock, level, optname, optval,
1751 optlen);
6cb153ca
BL
1752out_put:
1753 fput_light(sock->file, fput_needed);
1da177e4
LT
1754 }
1755 return err;
1756}
1757
1da177e4
LT
1758/*
1759 * Shutdown a socket.
1760 */
1761
1762asmlinkage long sys_shutdown(int fd, int how)
1763{
6cb153ca 1764 int err, fput_needed;
1da177e4
LT
1765 struct socket *sock;
1766
89bddce5
SH
1767 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1768 if (sock != NULL) {
1da177e4 1769 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1770 if (!err)
1771 err = sock->ops->shutdown(sock, how);
1772 fput_light(sock->file, fput_needed);
1da177e4
LT
1773 }
1774 return err;
1775}
1776
89bddce5 1777/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1778 * fields which are the same type (int / unsigned) on our platforms.
1779 */
1780#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1781#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1782#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1783
1da177e4
LT
1784/*
1785 * BSD sendmsg interface
1786 */
1787
1788asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1789{
89bddce5
SH
1790 struct compat_msghdr __user *msg_compat =
1791 (struct compat_msghdr __user *)msg;
1da177e4
LT
1792 struct socket *sock;
1793 char address[MAX_SOCK_ADDR];
1794 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1795 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1796 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1797 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1798 unsigned char *ctl_buf = ctl;
1799 struct msghdr msg_sys;
1800 int err, ctl_len, iov_size, total_len;
6cb153ca 1801 int fput_needed;
89bddce5 1802
1da177e4
LT
1803 err = -EFAULT;
1804 if (MSG_CMSG_COMPAT & flags) {
1805 if (get_compat_msghdr(&msg_sys, msg_compat))
1806 return -EFAULT;
89bddce5
SH
1807 }
1808 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1809 return -EFAULT;
1810
6cb153ca 1811 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1812 if (!sock)
1da177e4
LT
1813 goto out;
1814
1815 /* do not move before msg_sys is valid */
1816 err = -EMSGSIZE;
1817 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1818 goto out_put;
1819
89bddce5 1820 /* Check whether to allocate the iovec area */
1da177e4
LT
1821 err = -ENOMEM;
1822 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1823 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1824 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1825 if (!iov)
1826 goto out_put;
1827 }
1828
1829 /* This will also move the address data into kernel space */
1830 if (MSG_CMSG_COMPAT & flags) {
1831 err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
1832 } else
1833 err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
89bddce5 1834 if (err < 0)
1da177e4
LT
1835 goto out_freeiov;
1836 total_len = err;
1837
1838 err = -ENOBUFS;
1839
1840 if (msg_sys.msg_controllen > INT_MAX)
1841 goto out_freeiov;
89bddce5 1842 ctl_len = msg_sys.msg_controllen;
1da177e4 1843 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1844 err =
1845 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1846 sizeof(ctl));
1da177e4
LT
1847 if (err)
1848 goto out_freeiov;
1849 ctl_buf = msg_sys.msg_control;
8920e8f9 1850 ctl_len = msg_sys.msg_controllen;
1da177e4 1851 } else if (ctl_len) {
89bddce5 1852 if (ctl_len > sizeof(ctl)) {
1da177e4 1853 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1854 if (ctl_buf == NULL)
1da177e4
LT
1855 goto out_freeiov;
1856 }
1857 err = -EFAULT;
1858 /*
1859 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1860 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1861 * checking falls down on this.
1862 */
89bddce5
SH
1863 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1864 ctl_len))
1da177e4
LT
1865 goto out_freectl;
1866 msg_sys.msg_control = ctl_buf;
1867 }
1868 msg_sys.msg_flags = flags;
1869
1870 if (sock->file->f_flags & O_NONBLOCK)
1871 msg_sys.msg_flags |= MSG_DONTWAIT;
1872 err = sock_sendmsg(sock, &msg_sys, total_len);
1873
1874out_freectl:
89bddce5 1875 if (ctl_buf != ctl)
1da177e4
LT
1876 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1877out_freeiov:
1878 if (iov != iovstack)
1879 sock_kfree_s(sock->sk, iov, iov_size);
1880out_put:
6cb153ca 1881 fput_light(sock->file, fput_needed);
89bddce5 1882out:
1da177e4
LT
1883 return err;
1884}
1885
1886/*
1887 * BSD recvmsg interface
1888 */
1889
89bddce5
SH
1890asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
1891 unsigned int flags)
1da177e4 1892{
89bddce5
SH
1893 struct compat_msghdr __user *msg_compat =
1894 (struct compat_msghdr __user *)msg;
1da177e4
LT
1895 struct socket *sock;
1896 struct iovec iovstack[UIO_FASTIOV];
89bddce5 1897 struct iovec *iov = iovstack;
1da177e4
LT
1898 struct msghdr msg_sys;
1899 unsigned long cmsg_ptr;
1900 int err, iov_size, total_len, len;
6cb153ca 1901 int fput_needed;
1da177e4
LT
1902
1903 /* kernel mode address */
1904 char addr[MAX_SOCK_ADDR];
1905
1906 /* user mode address pointers */
1907 struct sockaddr __user *uaddr;
1908 int __user *uaddr_len;
89bddce5 1909
1da177e4
LT
1910 if (MSG_CMSG_COMPAT & flags) {
1911 if (get_compat_msghdr(&msg_sys, msg_compat))
1912 return -EFAULT;
89bddce5
SH
1913 }
1914 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1915 return -EFAULT;
1da177e4 1916
6cb153ca 1917 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1918 if (!sock)
1919 goto out;
1920
1921 err = -EMSGSIZE;
1922 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1923 goto out_put;
89bddce5
SH
1924
1925 /* Check whether to allocate the iovec area */
1da177e4
LT
1926 err = -ENOMEM;
1927 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1928 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1929 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1930 if (!iov)
1931 goto out_put;
1932 }
1933
1934 /*
89bddce5
SH
1935 * Save the user-mode address (verify_iovec will change the
1936 * kernel msghdr to use the kernel address space)
1da177e4 1937 */
89bddce5 1938
cfcabdcc 1939 uaddr = (__force void __user *)msg_sys.msg_name;
1da177e4
LT
1940 uaddr_len = COMPAT_NAMELEN(msg);
1941 if (MSG_CMSG_COMPAT & flags) {
1942 err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1943 } else
1944 err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1945 if (err < 0)
1946 goto out_freeiov;
89bddce5 1947 total_len = err;
1da177e4
LT
1948
1949 cmsg_ptr = (unsigned long)msg_sys.msg_control;
4a19542e 1950 msg_sys.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 1951
1da177e4
LT
1952 if (sock->file->f_flags & O_NONBLOCK)
1953 flags |= MSG_DONTWAIT;
1954 err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1955 if (err < 0)
1956 goto out_freeiov;
1957 len = err;
1958
1959 if (uaddr != NULL) {
89bddce5
SH
1960 err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
1961 uaddr_len);
1da177e4
LT
1962 if (err < 0)
1963 goto out_freeiov;
1964 }
37f7f421
DM
1965 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
1966 COMPAT_FLAGS(msg));
1da177e4
LT
1967 if (err)
1968 goto out_freeiov;
1969 if (MSG_CMSG_COMPAT & flags)
89bddce5 1970 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1971 &msg_compat->msg_controllen);
1972 else
89bddce5 1973 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1974 &msg->msg_controllen);
1975 if (err)
1976 goto out_freeiov;
1977 err = len;
1978
1979out_freeiov:
1980 if (iov != iovstack)
1981 sock_kfree_s(sock->sk, iov, iov_size);
1982out_put:
6cb153ca 1983 fput_light(sock->file, fput_needed);
1da177e4
LT
1984out:
1985 return err;
1986}
1987
1988#ifdef __ARCH_WANT_SYS_SOCKETCALL
1989
1990/* Argument list sizes for sys_socketcall */
1991#define AL(x) ((x) * sizeof(unsigned long))
89bddce5
SH
1992static const unsigned char nargs[18]={
1993 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1994 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1995 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
1996};
1997
1da177e4
LT
1998#undef AL
1999
2000/*
89bddce5 2001 * System call vectors.
1da177e4
LT
2002 *
2003 * Argument checking cleaned up. Saved 20% in size.
2004 * This function doesn't need to set the kernel lock because
89bddce5 2005 * it is set by the callees.
1da177e4
LT
2006 */
2007
2008asmlinkage long sys_socketcall(int call, unsigned long __user *args)
2009{
2010 unsigned long a[6];
89bddce5 2011 unsigned long a0, a1;
1da177e4
LT
2012 int err;
2013
89bddce5 2014 if (call < 1 || call > SYS_RECVMSG)
1da177e4
LT
2015 return -EINVAL;
2016
2017 /* copy_from_user should be SMP safe. */
2018 if (copy_from_user(a, args, nargs[call]))
2019 return -EFAULT;
3ec3b2fb 2020
89bddce5 2021 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb
DW
2022 if (err)
2023 return err;
2024
89bddce5
SH
2025 a0 = a[0];
2026 a1 = a[1];
2027
2028 switch (call) {
2029 case SYS_SOCKET:
2030 err = sys_socket(a0, a1, a[2]);
2031 break;
2032 case SYS_BIND:
2033 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2034 break;
2035 case SYS_CONNECT:
2036 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2037 break;
2038 case SYS_LISTEN:
2039 err = sys_listen(a0, a1);
2040 break;
2041 case SYS_ACCEPT:
2042 err =
2043 sys_accept(a0, (struct sockaddr __user *)a1,
2044 (int __user *)a[2]);
2045 break;
2046 case SYS_GETSOCKNAME:
2047 err =
2048 sys_getsockname(a0, (struct sockaddr __user *)a1,
2049 (int __user *)a[2]);
2050 break;
2051 case SYS_GETPEERNAME:
2052 err =
2053 sys_getpeername(a0, (struct sockaddr __user *)a1,
2054 (int __user *)a[2]);
2055 break;
2056 case SYS_SOCKETPAIR:
2057 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2058 break;
2059 case SYS_SEND:
2060 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2061 break;
2062 case SYS_SENDTO:
2063 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2064 (struct sockaddr __user *)a[4], a[5]);
2065 break;
2066 case SYS_RECV:
2067 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2068 break;
2069 case SYS_RECVFROM:
2070 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2071 (struct sockaddr __user *)a[4],
2072 (int __user *)a[5]);
2073 break;
2074 case SYS_SHUTDOWN:
2075 err = sys_shutdown(a0, a1);
2076 break;
2077 case SYS_SETSOCKOPT:
2078 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2079 break;
2080 case SYS_GETSOCKOPT:
2081 err =
2082 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2083 (int __user *)a[4]);
2084 break;
2085 case SYS_SENDMSG:
2086 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2087 break;
2088 case SYS_RECVMSG:
2089 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2090 break;
2091 default:
2092 err = -EINVAL;
2093 break;
1da177e4
LT
2094 }
2095 return err;
2096}
2097
89bddce5 2098#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2099
55737fda
SH
2100/**
2101 * sock_register - add a socket protocol handler
2102 * @ops: description of protocol
2103 *
1da177e4
LT
2104 * This function is called by a protocol handler that wants to
2105 * advertise its address family, and have it linked into the
55737fda
SH
2106 * socket interface. The value ops->family coresponds to the
2107 * socket system call protocol family.
1da177e4 2108 */
f0fd27d4 2109int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2110{
2111 int err;
2112
2113 if (ops->family >= NPROTO) {
89bddce5
SH
2114 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2115 NPROTO);
1da177e4
LT
2116 return -ENOBUFS;
2117 }
55737fda
SH
2118
2119 spin_lock(&net_family_lock);
2120 if (net_families[ops->family])
2121 err = -EEXIST;
2122 else {
89bddce5 2123 net_families[ops->family] = ops;
1da177e4
LT
2124 err = 0;
2125 }
55737fda
SH
2126 spin_unlock(&net_family_lock);
2127
89bddce5 2128 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2129 return err;
2130}
2131
55737fda
SH
2132/**
2133 * sock_unregister - remove a protocol handler
2134 * @family: protocol family to remove
2135 *
1da177e4
LT
2136 * This function is called by a protocol handler that wants to
2137 * remove its address family, and have it unlinked from the
55737fda
SH
2138 * new socket creation.
2139 *
2140 * If protocol handler is a module, then it can use module reference
2141 * counts to protect against new references. If protocol handler is not
2142 * a module then it needs to provide its own protection in
2143 * the ops->create routine.
1da177e4 2144 */
f0fd27d4 2145void sock_unregister(int family)
1da177e4 2146{
f0fd27d4 2147 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2148
55737fda 2149 spin_lock(&net_family_lock);
89bddce5 2150 net_families[family] = NULL;
55737fda
SH
2151 spin_unlock(&net_family_lock);
2152
2153 synchronize_rcu();
2154
89bddce5 2155 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2156}
2157
77d76ea3 2158static int __init sock_init(void)
1da177e4
LT
2159{
2160 /*
89bddce5 2161 * Initialize sock SLAB cache.
1da177e4 2162 */
89bddce5 2163
1da177e4
LT
2164 sk_init();
2165
1da177e4 2166 /*
89bddce5 2167 * Initialize skbuff SLAB cache
1da177e4
LT
2168 */
2169 skb_init();
1da177e4
LT
2170
2171 /*
89bddce5 2172 * Initialize the protocols module.
1da177e4
LT
2173 */
2174
2175 init_inodecache();
2176 register_filesystem(&sock_fs_type);
2177 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2178
2179 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2180 */
2181
2182#ifdef CONFIG_NETFILTER
2183 netfilter_init();
2184#endif
cbeb321a
DM
2185
2186 return 0;
1da177e4
LT
2187}
2188
77d76ea3
AK
2189core_initcall(sock_init); /* early initcall */
2190
1da177e4
LT
2191#ifdef CONFIG_PROC_FS
2192void socket_seq_show(struct seq_file *seq)
2193{
2194 int cpu;
2195 int counter = 0;
2196
6f912042 2197 for_each_possible_cpu(cpu)
89bddce5 2198 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2199
2200 /* It can be negative, by the way. 8) */
2201 if (counter < 0)
2202 counter = 0;
2203
2204 seq_printf(seq, "sockets: used %d\n", counter);
2205}
89bddce5 2206#endif /* CONFIG_PROC_FS */
1da177e4 2207
89bbfc95
SP
2208#ifdef CONFIG_COMPAT
2209static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 2210 unsigned long arg)
89bbfc95
SP
2211{
2212 struct socket *sock = file->private_data;
2213 int ret = -ENOIOCTLCMD;
87de87d5
DM
2214 struct sock *sk;
2215 struct net *net;
2216
2217 sk = sock->sk;
2218 net = sock_net(sk);
89bbfc95
SP
2219
2220 if (sock->ops->compat_ioctl)
2221 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2222
87de87d5
DM
2223 if (ret == -ENOIOCTLCMD &&
2224 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
2225 ret = compat_wext_handle_ioctl(net, cmd, arg);
2226
89bbfc95
SP
2227 return ret;
2228}
2229#endif
2230
ac5a488e
SS
2231int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2232{
2233 return sock->ops->bind(sock, addr, addrlen);
2234}
2235
2236int kernel_listen(struct socket *sock, int backlog)
2237{
2238 return sock->ops->listen(sock, backlog);
2239}
2240
2241int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2242{
2243 struct sock *sk = sock->sk;
2244 int err;
2245
2246 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2247 newsock);
2248 if (err < 0)
2249 goto done;
2250
2251 err = sock->ops->accept(sock, *newsock, flags);
2252 if (err < 0) {
2253 sock_release(*newsock);
fa8705b0 2254 *newsock = NULL;
ac5a488e
SS
2255 goto done;
2256 }
2257
2258 (*newsock)->ops = sock->ops;
2259
2260done:
2261 return err;
2262}
2263
2264int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 2265 int flags)
ac5a488e
SS
2266{
2267 return sock->ops->connect(sock, addr, addrlen, flags);
2268}
2269
2270int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2271 int *addrlen)
2272{
2273 return sock->ops->getname(sock, addr, addrlen, 0);
2274}
2275
2276int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2277 int *addrlen)
2278{
2279 return sock->ops->getname(sock, addr, addrlen, 1);
2280}
2281
2282int kernel_getsockopt(struct socket *sock, int level, int optname,
2283 char *optval, int *optlen)
2284{
2285 mm_segment_t oldfs = get_fs();
2286 int err;
2287
2288 set_fs(KERNEL_DS);
2289 if (level == SOL_SOCKET)
2290 err = sock_getsockopt(sock, level, optname, optval, optlen);
2291 else
2292 err = sock->ops->getsockopt(sock, level, optname, optval,
2293 optlen);
2294 set_fs(oldfs);
2295 return err;
2296}
2297
2298int kernel_setsockopt(struct socket *sock, int level, int optname,
2299 char *optval, int optlen)
2300{
2301 mm_segment_t oldfs = get_fs();
2302 int err;
2303
2304 set_fs(KERNEL_DS);
2305 if (level == SOL_SOCKET)
2306 err = sock_setsockopt(sock, level, optname, optval, optlen);
2307 else
2308 err = sock->ops->setsockopt(sock, level, optname, optval,
2309 optlen);
2310 set_fs(oldfs);
2311 return err;
2312}
2313
2314int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2315 size_t size, int flags)
2316{
2317 if (sock->ops->sendpage)
2318 return sock->ops->sendpage(sock, page, offset, size, flags);
2319
2320 return sock_no_sendpage(sock, page, offset, size, flags);
2321}
2322
2323int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2324{
2325 mm_segment_t oldfs = get_fs();
2326 int err;
2327
2328 set_fs(KERNEL_DS);
2329 err = sock->ops->ioctl(sock, cmd, arg);
2330 set_fs(oldfs);
2331
2332 return err;
2333}
2334
91cf45f0
TM
2335int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
2336{
2337 return sock->ops->shutdown(sock, how);
2338}
2339
1da177e4
LT
2340EXPORT_SYMBOL(sock_create);
2341EXPORT_SYMBOL(sock_create_kern);
2342EXPORT_SYMBOL(sock_create_lite);
2343EXPORT_SYMBOL(sock_map_fd);
2344EXPORT_SYMBOL(sock_recvmsg);
2345EXPORT_SYMBOL(sock_register);
2346EXPORT_SYMBOL(sock_release);
2347EXPORT_SYMBOL(sock_sendmsg);
2348EXPORT_SYMBOL(sock_unregister);
2349EXPORT_SYMBOL(sock_wake_async);
2350EXPORT_SYMBOL(sockfd_lookup);
2351EXPORT_SYMBOL(kernel_sendmsg);
2352EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
2353EXPORT_SYMBOL(kernel_bind);
2354EXPORT_SYMBOL(kernel_listen);
2355EXPORT_SYMBOL(kernel_accept);
2356EXPORT_SYMBOL(kernel_connect);
2357EXPORT_SYMBOL(kernel_getsockname);
2358EXPORT_SYMBOL(kernel_getpeername);
2359EXPORT_SYMBOL(kernel_getsockopt);
2360EXPORT_SYMBOL(kernel_setsockopt);
2361EXPORT_SYMBOL(kernel_sendpage);
2362EXPORT_SYMBOL(kernel_sock_ioctl);
91cf45f0 2363EXPORT_SYMBOL(kernel_sock_shutdown);