binfmt_misc: use simple_read_from_buffer()
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
55737fda 66#include <linux/rcupdate.h>
1da177e4
LT
67#include <linux/netdevice.h>
68#include <linux/proc_fs.h>
69#include <linux/seq_file.h>
4a3e2f71 70#include <linux/mutex.h>
1da177e4
LT
71#include <linux/wanrouter.h>
72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
1da177e4
LT
75#include <linux/init.h>
76#include <linux/poll.h>
77#include <linux/cache.h>
78#include <linux/module.h>
79#include <linux/highmem.h>
1da177e4
LT
80#include <linux/mount.h>
81#include <linux/security.h>
82#include <linux/syscalls.h>
83#include <linux/compat.h>
84#include <linux/kmod.h>
3ec3b2fb 85#include <linux/audit.h>
d86b5e0e 86#include <linux/wireless.h>
1b8d7ae4 87#include <linux/nsproxy.h>
1da177e4
LT
88
89#include <asm/uaccess.h>
90#include <asm/unistd.h>
91
92#include <net/compat.h>
87de87d5 93#include <net/wext.h>
1da177e4
LT
94
95#include <net/sock.h>
96#include <linux/netfilter.h>
97
98static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
99static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
100 unsigned long nr_segs, loff_t pos);
101static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
102 unsigned long nr_segs, loff_t pos);
89bddce5 103static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
104
105static int sock_close(struct inode *inode, struct file *file);
106static unsigned int sock_poll(struct file *file,
107 struct poll_table_struct *wait);
89bddce5 108static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
109#ifdef CONFIG_COMPAT
110static long compat_sock_ioctl(struct file *file,
89bddce5 111 unsigned int cmd, unsigned long arg);
89bbfc95 112#endif
1da177e4 113static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
114static ssize_t sock_sendpage(struct file *file, struct page *page,
115 int offset, size_t size, loff_t *ppos, int more);
9c55e01c
JA
116static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
117 struct pipe_inode_info *pipe, size_t len,
118 unsigned int flags);
1da177e4 119
1da177e4
LT
120/*
121 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
122 * in the operation structures but are done directly via the socketcall() multiplexor.
123 */
124
da7071d7 125static const struct file_operations socket_file_ops = {
1da177e4
LT
126 .owner = THIS_MODULE,
127 .llseek = no_llseek,
128 .aio_read = sock_aio_read,
129 .aio_write = sock_aio_write,
130 .poll = sock_poll,
131 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
132#ifdef CONFIG_COMPAT
133 .compat_ioctl = compat_sock_ioctl,
134#endif
1da177e4
LT
135 .mmap = sock_mmap,
136 .open = sock_no_open, /* special open code to disallow open via /proc */
137 .release = sock_close,
138 .fasync = sock_fasync,
5274f052
JA
139 .sendpage = sock_sendpage,
140 .splice_write = generic_splice_sendpage,
9c55e01c 141 .splice_read = sock_splice_read,
1da177e4
LT
142};
143
144/*
145 * The protocol list. Each protocol is registered in here.
146 */
147
1da177e4 148static DEFINE_SPINLOCK(net_family_lock);
f0fd27d4 149static const struct net_proto_family *net_families[NPROTO] __read_mostly;
1da177e4 150
1da177e4
LT
151/*
152 * Statistics counters of the socket lists
153 */
154
155static DEFINE_PER_CPU(int, sockets_in_use) = 0;
156
157/*
89bddce5
SH
158 * Support routines.
159 * Move socket addresses back and forth across the kernel/user
160 * divide and look after the messy bits.
1da177e4
LT
161 */
162
89bddce5 163#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
164 16 for IP, 16 for IPX,
165 24 for IPv6,
89bddce5 166 about 80 for AX.25
1da177e4
LT
167 must be at least one bigger than
168 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 169 :unix_mkname()).
1da177e4 170 */
89bddce5 171
1da177e4
LT
172/**
173 * move_addr_to_kernel - copy a socket address into kernel space
174 * @uaddr: Address in user space
175 * @kaddr: Address in kernel space
176 * @ulen: Length in user space
177 *
178 * The address is copied into kernel space. If the provided address is
179 * too long an error code of -EINVAL is returned. If the copy gives
180 * invalid addresses -EFAULT is returned. On a success 0 is returned.
181 */
182
230b1839 183int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr)
1da177e4 184{
230b1839 185 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 186 return -EINVAL;
89bddce5 187 if (ulen == 0)
1da177e4 188 return 0;
89bddce5 189 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 190 return -EFAULT;
3ec3b2fb 191 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
192}
193
194/**
195 * move_addr_to_user - copy an address to user space
196 * @kaddr: kernel space address
197 * @klen: length of address in kernel
198 * @uaddr: user space address
199 * @ulen: pointer to user length field
200 *
201 * The value pointed to by ulen on entry is the buffer length available.
202 * This is overwritten with the buffer space used. -EINVAL is returned
203 * if an overlong buffer is specified or a negative buffer size. -EFAULT
204 * is returned if either the buffer or the length field are not
205 * accessible.
206 * After copying the data up to the limit the user specifies, the true
207 * length of the data is written over the length limit the user
208 * specified. Zero is returned for a success.
209 */
89bddce5 210
230b1839 211int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr,
89bddce5 212 int __user *ulen)
1da177e4
LT
213{
214 int err;
215 int len;
216
89bddce5
SH
217 err = get_user(len, ulen);
218 if (err)
1da177e4 219 return err;
89bddce5
SH
220 if (len > klen)
221 len = klen;
230b1839 222 if (len < 0 || len > sizeof(struct sockaddr_storage))
1da177e4 223 return -EINVAL;
89bddce5 224 if (len) {
d6fe3945
SG
225 if (audit_sockaddr(klen, kaddr))
226 return -ENOMEM;
89bddce5 227 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
228 return -EFAULT;
229 }
230 /*
89bddce5
SH
231 * "fromlen shall refer to the value before truncation.."
232 * 1003.1g
1da177e4
LT
233 */
234 return __put_user(klen, ulen);
235}
236
237#define SOCKFS_MAGIC 0x534F434B
238
e18b890b 239static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
240
241static struct inode *sock_alloc_inode(struct super_block *sb)
242{
243 struct socket_alloc *ei;
89bddce5 244
e94b1766 245 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
246 if (!ei)
247 return NULL;
248 init_waitqueue_head(&ei->socket.wait);
89bddce5 249
1da177e4
LT
250 ei->socket.fasync_list = NULL;
251 ei->socket.state = SS_UNCONNECTED;
252 ei->socket.flags = 0;
253 ei->socket.ops = NULL;
254 ei->socket.sk = NULL;
255 ei->socket.file = NULL;
1da177e4
LT
256
257 return &ei->vfs_inode;
258}
259
260static void sock_destroy_inode(struct inode *inode)
261{
262 kmem_cache_free(sock_inode_cachep,
263 container_of(inode, struct socket_alloc, vfs_inode));
264}
265
4ba9b9d0 266static void init_once(struct kmem_cache *cachep, void *foo)
1da177e4 267{
89bddce5 268 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 269
a35afb83 270 inode_init_once(&ei->vfs_inode);
1da177e4 271}
89bddce5 272
1da177e4
LT
273static int init_inodecache(void)
274{
275 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
276 sizeof(struct socket_alloc),
277 0,
278 (SLAB_HWCACHE_ALIGN |
279 SLAB_RECLAIM_ACCOUNT |
280 SLAB_MEM_SPREAD),
20c2df83 281 init_once);
1da177e4
LT
282 if (sock_inode_cachep == NULL)
283 return -ENOMEM;
284 return 0;
285}
286
287static struct super_operations sockfs_ops = {
288 .alloc_inode = sock_alloc_inode,
289 .destroy_inode =sock_destroy_inode,
290 .statfs = simple_statfs,
291};
292
454e2398 293static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
294 int flags, const char *dev_name, void *data,
295 struct vfsmount *mnt)
1da177e4 296{
454e2398
DH
297 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
298 mnt);
1da177e4
LT
299}
300
ba89966c 301static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
302
303static struct file_system_type sock_fs_type = {
304 .name = "sockfs",
305 .get_sb = sockfs_get_sb,
306 .kill_sb = kill_anon_super,
307};
89bddce5 308
1da177e4
LT
309static int sockfs_delete_dentry(struct dentry *dentry)
310{
304e61e6
ED
311 /*
312 * At creation time, we pretended this dentry was hashed
313 * (by clearing DCACHE_UNHASHED bit in d_flags)
314 * At delete time, we restore the truth : not hashed.
315 * (so that dput() can proceed correctly)
316 */
317 dentry->d_flags |= DCACHE_UNHASHED;
318 return 0;
1da177e4 319}
c23fbb6b
ED
320
321/*
322 * sockfs_dname() is called from d_path().
323 */
324static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
325{
326 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
327 dentry->d_inode->i_ino);
328}
329
1da177e4 330static struct dentry_operations sockfs_dentry_operations = {
89bddce5 331 .d_delete = sockfs_delete_dentry,
c23fbb6b 332 .d_dname = sockfs_dname,
1da177e4
LT
333};
334
335/*
336 * Obtains the first available file descriptor and sets it up for use.
337 *
39d8c1b6
DM
338 * These functions create file structures and maps them to fd space
339 * of the current process. On success it returns file descriptor
1da177e4
LT
340 * and file struct implicitly stored in sock->file.
341 * Note that another thread may close file descriptor before we return
342 * from this function. We use the fact that now we do not refer
343 * to socket after mapping. If one day we will need it, this
344 * function will increment ref. count on file by 1.
345 *
346 * In any case returned fd MAY BE not valid!
347 * This race condition is unavoidable
348 * with shared fd spaces, we cannot solve it inside kernel,
349 * but we take care of internal coherence yet.
350 */
351
39d8c1b6 352static int sock_alloc_fd(struct file **filep)
1da177e4
LT
353{
354 int fd;
1da177e4
LT
355
356 fd = get_unused_fd();
39d8c1b6 357 if (likely(fd >= 0)) {
1da177e4
LT
358 struct file *file = get_empty_filp();
359
39d8c1b6
DM
360 *filep = file;
361 if (unlikely(!file)) {
1da177e4 362 put_unused_fd(fd);
39d8c1b6 363 return -ENFILE;
1da177e4 364 }
39d8c1b6
DM
365 } else
366 *filep = NULL;
367 return fd;
368}
1da177e4 369
39d8c1b6
DM
370static int sock_attach_fd(struct socket *sock, struct file *file)
371{
ce8d2cdf 372 struct dentry *dentry;
c23fbb6b 373 struct qstr name = { .name = "" };
39d8c1b6 374
ce8d2cdf
DH
375 dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
376 if (unlikely(!dentry))
39d8c1b6
DM
377 return -ENOMEM;
378
ce8d2cdf 379 dentry->d_op = &sockfs_dentry_operations;
304e61e6
ED
380 /*
381 * We dont want to push this dentry into global dentry hash table.
382 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
383 * This permits a working /proc/$pid/fd/XXX on sockets
384 */
ce8d2cdf
DH
385 dentry->d_flags &= ~DCACHE_UNHASHED;
386 d_instantiate(dentry, SOCK_INODE(sock));
39d8c1b6
DM
387
388 sock->file = file;
ce8d2cdf
DH
389 init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE,
390 &socket_file_ops);
391 SOCK_INODE(sock)->i_fop = &socket_file_ops;
39d8c1b6
DM
392 file->f_flags = O_RDWR;
393 file->f_pos = 0;
394 file->private_data = sock;
1da177e4 395
39d8c1b6
DM
396 return 0;
397}
398
399int sock_map_fd(struct socket *sock)
400{
401 struct file *newfile;
402 int fd = sock_alloc_fd(&newfile);
403
404 if (likely(fd >= 0)) {
405 int err = sock_attach_fd(sock, newfile);
406
407 if (unlikely(err < 0)) {
408 put_filp(newfile);
1da177e4 409 put_unused_fd(fd);
39d8c1b6 410 return err;
1da177e4 411 }
39d8c1b6 412 fd_install(fd, newfile);
1da177e4 413 }
1da177e4
LT
414 return fd;
415}
416
6cb153ca
BL
417static struct socket *sock_from_file(struct file *file, int *err)
418{
6cb153ca
BL
419 if (file->f_op == &socket_file_ops)
420 return file->private_data; /* set in sock_map_fd */
421
23bb80d2
ED
422 *err = -ENOTSOCK;
423 return NULL;
6cb153ca
BL
424}
425
1da177e4
LT
426/**
427 * sockfd_lookup - Go from a file number to its socket slot
428 * @fd: file handle
429 * @err: pointer to an error code return
430 *
431 * The file handle passed in is locked and the socket it is bound
432 * too is returned. If an error occurs the err pointer is overwritten
433 * with a negative errno code and NULL is returned. The function checks
434 * for both invalid handles and passing a handle which is not a socket.
435 *
436 * On a success the socket object pointer is returned.
437 */
438
439struct socket *sockfd_lookup(int fd, int *err)
440{
441 struct file *file;
1da177e4
LT
442 struct socket *sock;
443
89bddce5
SH
444 file = fget(fd);
445 if (!file) {
1da177e4
LT
446 *err = -EBADF;
447 return NULL;
448 }
89bddce5 449
6cb153ca
BL
450 sock = sock_from_file(file, err);
451 if (!sock)
1da177e4 452 fput(file);
6cb153ca
BL
453 return sock;
454}
1da177e4 455
6cb153ca
BL
456static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
457{
458 struct file *file;
459 struct socket *sock;
460
3672558c 461 *err = -EBADF;
6cb153ca
BL
462 file = fget_light(fd, fput_needed);
463 if (file) {
464 sock = sock_from_file(file, err);
465 if (sock)
466 return sock;
467 fput_light(file, *fput_needed);
1da177e4 468 }
6cb153ca 469 return NULL;
1da177e4
LT
470}
471
472/**
473 * sock_alloc - allocate a socket
89bddce5 474 *
1da177e4
LT
475 * Allocate a new inode and socket object. The two are bound together
476 * and initialised. The socket is then returned. If we are out of inodes
477 * NULL is returned.
478 */
479
480static struct socket *sock_alloc(void)
481{
89bddce5
SH
482 struct inode *inode;
483 struct socket *sock;
1da177e4
LT
484
485 inode = new_inode(sock_mnt->mnt_sb);
486 if (!inode)
487 return NULL;
488
489 sock = SOCKET_I(inode);
490
89bddce5 491 inode->i_mode = S_IFSOCK | S_IRWXUGO;
1da177e4
LT
492 inode->i_uid = current->fsuid;
493 inode->i_gid = current->fsgid;
494
495 get_cpu_var(sockets_in_use)++;
496 put_cpu_var(sockets_in_use);
497 return sock;
498}
499
500/*
501 * In theory you can't get an open on this inode, but /proc provides
502 * a back door. Remember to keep it shut otherwise you'll let the
503 * creepy crawlies in.
504 */
89bddce5 505
1da177e4
LT
506static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
507{
508 return -ENXIO;
509}
510
4b6f5d20 511const struct file_operations bad_sock_fops = {
1da177e4
LT
512 .owner = THIS_MODULE,
513 .open = sock_no_open,
514};
515
516/**
517 * sock_release - close a socket
518 * @sock: socket to close
519 *
520 * The socket is released from the protocol stack if it has a release
521 * callback, and the inode is then released if the socket is bound to
89bddce5 522 * an inode not a file.
1da177e4 523 */
89bddce5 524
1da177e4
LT
525void sock_release(struct socket *sock)
526{
527 if (sock->ops) {
528 struct module *owner = sock->ops->owner;
529
530 sock->ops->release(sock);
531 sock->ops = NULL;
532 module_put(owner);
533 }
534
535 if (sock->fasync_list)
536 printk(KERN_ERR "sock_release: fasync list not empty!\n");
537
538 get_cpu_var(sockets_in_use)--;
539 put_cpu_var(sockets_in_use);
540 if (!sock->file) {
541 iput(SOCK_INODE(sock));
542 return;
543 }
89bddce5 544 sock->file = NULL;
1da177e4
LT
545}
546
89bddce5 547static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
548 struct msghdr *msg, size_t size)
549{
550 struct sock_iocb *si = kiocb_to_siocb(iocb);
551 int err;
552
553 si->sock = sock;
554 si->scm = NULL;
555 si->msg = msg;
556 si->size = size;
557
558 err = security_socket_sendmsg(sock, msg, size);
559 if (err)
560 return err;
561
562 return sock->ops->sendmsg(iocb, sock, msg, size);
563}
564
565int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
566{
567 struct kiocb iocb;
568 struct sock_iocb siocb;
569 int ret;
570
571 init_sync_kiocb(&iocb, NULL);
572 iocb.private = &siocb;
573 ret = __sock_sendmsg(&iocb, sock, msg, size);
574 if (-EIOCBQUEUED == ret)
575 ret = wait_on_sync_kiocb(&iocb);
576 return ret;
577}
578
579int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
580 struct kvec *vec, size_t num, size_t size)
581{
582 mm_segment_t oldfs = get_fs();
583 int result;
584
585 set_fs(KERNEL_DS);
586 /*
587 * the following is safe, since for compiler definitions of kvec and
588 * iovec are identical, yielding the same in-core layout and alignment
589 */
89bddce5 590 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
591 msg->msg_iovlen = num;
592 result = sock_sendmsg(sock, msg, size);
593 set_fs(oldfs);
594 return result;
595}
596
92f37fd2
ED
597/*
598 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
599 */
600void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
601 struct sk_buff *skb)
602{
603 ktime_t kt = skb->tstamp;
604
605 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
606 struct timeval tv;
607 /* Race occurred between timestamp enabling and packet
608 receiving. Fill in the current time for now. */
609 if (kt.tv64 == 0)
610 kt = ktime_get_real();
611 skb->tstamp = kt;
612 tv = ktime_to_timeval(kt);
613 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
614 } else {
615 struct timespec ts;
616 /* Race occurred between timestamp enabling and packet
617 receiving. Fill in the current time for now. */
618 if (kt.tv64 == 0)
619 kt = ktime_get_real();
620 skb->tstamp = kt;
621 ts = ktime_to_timespec(kt);
622 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
623 }
624}
625
7c81fd8b
ACM
626EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
627
89bddce5 628static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
629 struct msghdr *msg, size_t size, int flags)
630{
631 int err;
632 struct sock_iocb *si = kiocb_to_siocb(iocb);
633
634 si->sock = sock;
635 si->scm = NULL;
636 si->msg = msg;
637 si->size = size;
638 si->flags = flags;
639
640 err = security_socket_recvmsg(sock, msg, size, flags);
641 if (err)
642 return err;
643
644 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
645}
646
89bddce5 647int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
648 size_t size, int flags)
649{
650 struct kiocb iocb;
651 struct sock_iocb siocb;
652 int ret;
653
89bddce5 654 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
655 iocb.private = &siocb;
656 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
657 if (-EIOCBQUEUED == ret)
658 ret = wait_on_sync_kiocb(&iocb);
659 return ret;
660}
661
89bddce5
SH
662int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
663 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
664{
665 mm_segment_t oldfs = get_fs();
666 int result;
667
668 set_fs(KERNEL_DS);
669 /*
670 * the following is safe, since for compiler definitions of kvec and
671 * iovec are identical, yielding the same in-core layout and alignment
672 */
89bddce5 673 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
674 result = sock_recvmsg(sock, msg, size, flags);
675 set_fs(oldfs);
676 return result;
677}
678
679static void sock_aio_dtor(struct kiocb *iocb)
680{
681 kfree(iocb->private);
682}
683
ce1d4d3e
CH
684static ssize_t sock_sendpage(struct file *file, struct page *page,
685 int offset, size_t size, loff_t *ppos, int more)
1da177e4 686{
1da177e4
LT
687 struct socket *sock;
688 int flags;
689
ce1d4d3e
CH
690 sock = file->private_data;
691
692 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
693 if (more)
694 flags |= MSG_MORE;
695
696 return sock->ops->sendpage(sock, page, offset, size, flags);
697}
1da177e4 698
9c55e01c
JA
699static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
700 struct pipe_inode_info *pipe, size_t len,
701 unsigned int flags)
702{
703 struct socket *sock = file->private_data;
704
997b37da
RDC
705 if (unlikely(!sock->ops->splice_read))
706 return -EINVAL;
707
9c55e01c
JA
708 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
709}
710
ce1d4d3e 711static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 712 struct sock_iocb *siocb)
ce1d4d3e
CH
713{
714 if (!is_sync_kiocb(iocb)) {
715 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
716 if (!siocb)
717 return NULL;
1da177e4
LT
718 iocb->ki_dtor = sock_aio_dtor;
719 }
1da177e4 720
ce1d4d3e 721 siocb->kiocb = iocb;
ce1d4d3e
CH
722 iocb->private = siocb;
723 return siocb;
1da177e4
LT
724}
725
ce1d4d3e 726static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
727 struct file *file, const struct iovec *iov,
728 unsigned long nr_segs)
ce1d4d3e
CH
729{
730 struct socket *sock = file->private_data;
731 size_t size = 0;
732 int i;
1da177e4 733
89bddce5
SH
734 for (i = 0; i < nr_segs; i++)
735 size += iov[i].iov_len;
1da177e4 736
ce1d4d3e
CH
737 msg->msg_name = NULL;
738 msg->msg_namelen = 0;
739 msg->msg_control = NULL;
740 msg->msg_controllen = 0;
89bddce5 741 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
742 msg->msg_iovlen = nr_segs;
743 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
744
745 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
746}
747
027445c3
BP
748static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
749 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
750{
751 struct sock_iocb siocb, *x;
752
1da177e4
LT
753 if (pos != 0)
754 return -ESPIPE;
027445c3
BP
755
756 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
757 return 0;
758
027445c3
BP
759
760 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
761 if (!x)
762 return -ENOMEM;
027445c3 763 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
764}
765
ce1d4d3e 766static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
767 struct file *file, const struct iovec *iov,
768 unsigned long nr_segs)
1da177e4 769{
ce1d4d3e
CH
770 struct socket *sock = file->private_data;
771 size_t size = 0;
772 int i;
1da177e4 773
89bddce5
SH
774 for (i = 0; i < nr_segs; i++)
775 size += iov[i].iov_len;
1da177e4 776
ce1d4d3e
CH
777 msg->msg_name = NULL;
778 msg->msg_namelen = 0;
779 msg->msg_control = NULL;
780 msg->msg_controllen = 0;
89bddce5 781 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
782 msg->msg_iovlen = nr_segs;
783 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
784 if (sock->type == SOCK_SEQPACKET)
785 msg->msg_flags |= MSG_EOR;
1da177e4 786
ce1d4d3e 787 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
788}
789
027445c3
BP
790static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
791 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
792{
793 struct sock_iocb siocb, *x;
1da177e4 794
ce1d4d3e
CH
795 if (pos != 0)
796 return -ESPIPE;
027445c3 797
027445c3 798 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
799 if (!x)
800 return -ENOMEM;
1da177e4 801
027445c3 802 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
803}
804
1da177e4
LT
805/*
806 * Atomic setting of ioctl hooks to avoid race
807 * with module unload.
808 */
809
4a3e2f71 810static DEFINE_MUTEX(br_ioctl_mutex);
881d966b 811static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL;
1da177e4 812
881d966b 813void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 814{
4a3e2f71 815 mutex_lock(&br_ioctl_mutex);
1da177e4 816 br_ioctl_hook = hook;
4a3e2f71 817 mutex_unlock(&br_ioctl_mutex);
1da177e4 818}
89bddce5 819
1da177e4
LT
820EXPORT_SYMBOL(brioctl_set);
821
4a3e2f71 822static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 823static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 824
881d966b 825void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 826{
4a3e2f71 827 mutex_lock(&vlan_ioctl_mutex);
1da177e4 828 vlan_ioctl_hook = hook;
4a3e2f71 829 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 830}
89bddce5 831
1da177e4
LT
832EXPORT_SYMBOL(vlan_ioctl_set);
833
4a3e2f71 834static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 835static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 836
89bddce5 837void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 838{
4a3e2f71 839 mutex_lock(&dlci_ioctl_mutex);
1da177e4 840 dlci_ioctl_hook = hook;
4a3e2f71 841 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 842}
89bddce5 843
1da177e4
LT
844EXPORT_SYMBOL(dlci_ioctl_set);
845
846/*
847 * With an ioctl, arg may well be a user mode pointer, but we don't know
848 * what to do with it - that's up to the protocol still.
849 */
850
851static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
852{
853 struct socket *sock;
881d966b 854 struct sock *sk;
1da177e4
LT
855 void __user *argp = (void __user *)arg;
856 int pid, err;
881d966b 857 struct net *net;
1da177e4 858
b69aee04 859 sock = file->private_data;
881d966b 860 sk = sock->sk;
3b1e0a65 861 net = sock_net(sk);
1da177e4 862 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 863 err = dev_ioctl(net, cmd, argp);
1da177e4 864 } else
d86b5e0e 865#ifdef CONFIG_WIRELESS_EXT
1da177e4 866 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 867 err = dev_ioctl(net, cmd, argp);
1da177e4 868 } else
89bddce5
SH
869#endif /* CONFIG_WIRELESS_EXT */
870 switch (cmd) {
1da177e4
LT
871 case FIOSETOWN:
872 case SIOCSPGRP:
873 err = -EFAULT;
874 if (get_user(pid, (int __user *)argp))
875 break;
876 err = f_setown(sock->file, pid, 1);
877 break;
878 case FIOGETOWN:
879 case SIOCGPGRP:
609d7fa9 880 err = put_user(f_getown(sock->file),
89bddce5 881 (int __user *)argp);
1da177e4
LT
882 break;
883 case SIOCGIFBR:
884 case SIOCSIFBR:
885 case SIOCBRADDBR:
886 case SIOCBRDELBR:
887 err = -ENOPKG;
888 if (!br_ioctl_hook)
889 request_module("bridge");
890
4a3e2f71 891 mutex_lock(&br_ioctl_mutex);
89bddce5 892 if (br_ioctl_hook)
881d966b 893 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 894 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
895 break;
896 case SIOCGIFVLAN:
897 case SIOCSIFVLAN:
898 err = -ENOPKG;
899 if (!vlan_ioctl_hook)
900 request_module("8021q");
901
4a3e2f71 902 mutex_lock(&vlan_ioctl_mutex);
1da177e4 903 if (vlan_ioctl_hook)
881d966b 904 err = vlan_ioctl_hook(net, argp);
4a3e2f71 905 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 906 break;
1da177e4
LT
907 case SIOCADDDLCI:
908 case SIOCDELDLCI:
909 err = -ENOPKG;
910 if (!dlci_ioctl_hook)
911 request_module("dlci");
912
7512cbf6
PE
913 mutex_lock(&dlci_ioctl_mutex);
914 if (dlci_ioctl_hook)
1da177e4 915 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 916 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
917 break;
918 default:
919 err = sock->ops->ioctl(sock, cmd, arg);
b5e5fa5e
CH
920
921 /*
922 * If this ioctl is unknown try to hand it down
923 * to the NIC driver.
924 */
925 if (err == -ENOIOCTLCMD)
881d966b 926 err = dev_ioctl(net, cmd, argp);
1da177e4 927 break;
89bddce5 928 }
1da177e4
LT
929 return err;
930}
931
932int sock_create_lite(int family, int type, int protocol, struct socket **res)
933{
934 int err;
935 struct socket *sock = NULL;
89bddce5 936
1da177e4
LT
937 err = security_socket_create(family, type, protocol, 1);
938 if (err)
939 goto out;
940
941 sock = sock_alloc();
942 if (!sock) {
943 err = -ENOMEM;
944 goto out;
945 }
946
1da177e4 947 sock->type = type;
7420ed23
VY
948 err = security_socket_post_create(sock, family, type, protocol, 1);
949 if (err)
950 goto out_release;
951
1da177e4
LT
952out:
953 *res = sock;
954 return err;
7420ed23
VY
955out_release:
956 sock_release(sock);
957 sock = NULL;
958 goto out;
1da177e4
LT
959}
960
961/* No kernel lock held - perfect */
89bddce5 962static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
963{
964 struct socket *sock;
965
966 /*
89bddce5 967 * We can't return errors to poll, so it's either yes or no.
1da177e4 968 */
b69aee04 969 sock = file->private_data;
1da177e4
LT
970 return sock->ops->poll(file, sock, wait);
971}
972
89bddce5 973static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 974{
b69aee04 975 struct socket *sock = file->private_data;
1da177e4
LT
976
977 return sock->ops->mmap(file, sock, vma);
978}
979
20380731 980static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
981{
982 /*
89bddce5
SH
983 * It was possible the inode is NULL we were
984 * closing an unfinished socket.
1da177e4
LT
985 */
986
89bddce5 987 if (!inode) {
1da177e4
LT
988 printk(KERN_DEBUG "sock_close: NULL inode\n");
989 return 0;
990 }
991 sock_fasync(-1, filp, 0);
992 sock_release(SOCKET_I(inode));
993 return 0;
994}
995
996/*
997 * Update the socket async list
998 *
999 * Fasync_list locking strategy.
1000 *
1001 * 1. fasync_list is modified only under process context socket lock
1002 * i.e. under semaphore.
1003 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
1004 * or under socket lock.
1005 * 3. fasync_list can be used from softirq context, so that
1006 * modification under socket lock have to be enhanced with
1007 * write_lock_bh(&sk->sk_callback_lock).
1008 * --ANK (990710)
1009 */
1010
1011static int sock_fasync(int fd, struct file *filp, int on)
1012{
89bddce5 1013 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
1014 struct socket *sock;
1015 struct sock *sk;
1016
89bddce5 1017 if (on) {
8b3a7005 1018 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 1019 if (fna == NULL)
1da177e4
LT
1020 return -ENOMEM;
1021 }
1022
b69aee04 1023 sock = filp->private_data;
1da177e4 1024
89bddce5
SH
1025 sk = sock->sk;
1026 if (sk == NULL) {
1da177e4
LT
1027 kfree(fna);
1028 return -EINVAL;
1029 }
1030
1031 lock_sock(sk);
1032
89bddce5 1033 prev = &(sock->fasync_list);
1da177e4 1034
89bddce5
SH
1035 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1036 if (fa->fa_file == filp)
1da177e4
LT
1037 break;
1038
89bddce5
SH
1039 if (on) {
1040 if (fa != NULL) {
1da177e4 1041 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1042 fa->fa_fd = fd;
1da177e4
LT
1043 write_unlock_bh(&sk->sk_callback_lock);
1044
1045 kfree(fna);
1046 goto out;
1047 }
89bddce5
SH
1048 fna->fa_file = filp;
1049 fna->fa_fd = fd;
1050 fna->magic = FASYNC_MAGIC;
1051 fna->fa_next = sock->fasync_list;
1da177e4 1052 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1053 sock->fasync_list = fna;
1da177e4 1054 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1055 } else {
1056 if (fa != NULL) {
1da177e4 1057 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1058 *prev = fa->fa_next;
1da177e4
LT
1059 write_unlock_bh(&sk->sk_callback_lock);
1060 kfree(fa);
1061 }
1062 }
1063
1064out:
1065 release_sock(sock->sk);
1066 return 0;
1067}
1068
1069/* This function may be called only under socket lock or callback_lock */
1070
1071int sock_wake_async(struct socket *sock, int how, int band)
1072{
1073 if (!sock || !sock->fasync_list)
1074 return -1;
89bddce5 1075 switch (how) {
8d8ad9d7 1076 case SOCK_WAKE_WAITD:
1da177e4
LT
1077 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1078 break;
1079 goto call_kill;
8d8ad9d7 1080 case SOCK_WAKE_SPACE:
1da177e4
LT
1081 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1082 break;
1083 /* fall through */
8d8ad9d7 1084 case SOCK_WAKE_IO:
89bddce5 1085call_kill:
1da177e4
LT
1086 __kill_fasync(sock->fasync_list, SIGIO, band);
1087 break;
8d8ad9d7 1088 case SOCK_WAKE_URG:
1da177e4
LT
1089 __kill_fasync(sock->fasync_list, SIGURG, band);
1090 }
1091 return 0;
1092}
1093
1b8d7ae4 1094static int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1095 struct socket **res, int kern)
1da177e4
LT
1096{
1097 int err;
1098 struct socket *sock;
55737fda 1099 const struct net_proto_family *pf;
1da177e4
LT
1100
1101 /*
89bddce5 1102 * Check protocol is in range
1da177e4
LT
1103 */
1104 if (family < 0 || family >= NPROTO)
1105 return -EAFNOSUPPORT;
1106 if (type < 0 || type >= SOCK_MAX)
1107 return -EINVAL;
1108
1109 /* Compatibility.
1110
1111 This uglymoron is moved from INET layer to here to avoid
1112 deadlock in module load.
1113 */
1114 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1115 static int warned;
1da177e4
LT
1116 if (!warned) {
1117 warned = 1;
89bddce5
SH
1118 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1119 current->comm);
1da177e4
LT
1120 }
1121 family = PF_PACKET;
1122 }
1123
1124 err = security_socket_create(family, type, protocol, kern);
1125 if (err)
1126 return err;
89bddce5 1127
55737fda
SH
1128 /*
1129 * Allocate the socket and allow the family to set things up. if
1130 * the protocol is 0, the family is instructed to select an appropriate
1131 * default.
1132 */
1133 sock = sock_alloc();
1134 if (!sock) {
1135 if (net_ratelimit())
1136 printk(KERN_WARNING "socket: no more sockets\n");
1137 return -ENFILE; /* Not exactly a match, but its the
1138 closest posix thing */
1139 }
1140
1141 sock->type = type;
1142
1da177e4 1143#if defined(CONFIG_KMOD)
89bddce5
SH
1144 /* Attempt to load a protocol module if the find failed.
1145 *
1146 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1147 * requested real, full-featured networking support upon configuration.
1148 * Otherwise module support will break!
1149 */
55737fda 1150 if (net_families[family] == NULL)
89bddce5 1151 request_module("net-pf-%d", family);
1da177e4
LT
1152#endif
1153
55737fda
SH
1154 rcu_read_lock();
1155 pf = rcu_dereference(net_families[family]);
1156 err = -EAFNOSUPPORT;
1157 if (!pf)
1158 goto out_release;
1da177e4
LT
1159
1160 /*
1161 * We will call the ->create function, that possibly is in a loadable
1162 * module, so we have to bump that loadable module refcnt first.
1163 */
55737fda 1164 if (!try_module_get(pf->owner))
1da177e4
LT
1165 goto out_release;
1166
55737fda
SH
1167 /* Now protected by module ref count */
1168 rcu_read_unlock();
1169
1b8d7ae4 1170 err = pf->create(net, sock, protocol);
55737fda 1171 if (err < 0)
1da177e4 1172 goto out_module_put;
a79af59e 1173
1da177e4
LT
1174 /*
1175 * Now to bump the refcnt of the [loadable] module that owns this
1176 * socket at sock_release time we decrement its refcnt.
1177 */
55737fda
SH
1178 if (!try_module_get(sock->ops->owner))
1179 goto out_module_busy;
1180
1da177e4
LT
1181 /*
1182 * Now that we're done with the ->create function, the [loadable]
1183 * module can have its refcnt decremented
1184 */
55737fda 1185 module_put(pf->owner);
7420ed23
VY
1186 err = security_socket_post_create(sock, family, type, protocol, kern);
1187 if (err)
3b185525 1188 goto out_sock_release;
55737fda 1189 *res = sock;
1da177e4 1190
55737fda
SH
1191 return 0;
1192
1193out_module_busy:
1194 err = -EAFNOSUPPORT;
1da177e4 1195out_module_put:
55737fda
SH
1196 sock->ops = NULL;
1197 module_put(pf->owner);
1198out_sock_release:
1da177e4 1199 sock_release(sock);
55737fda
SH
1200 return err;
1201
1202out_release:
1203 rcu_read_unlock();
1204 goto out_sock_release;
1da177e4
LT
1205}
1206
1207int sock_create(int family, int type, int protocol, struct socket **res)
1208{
1b8d7ae4 1209 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4
LT
1210}
1211
1212int sock_create_kern(int family, int type, int protocol, struct socket **res)
1213{
1b8d7ae4 1214 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4
LT
1215}
1216
1217asmlinkage long sys_socket(int family, int type, int protocol)
1218{
1219 int retval;
1220 struct socket *sock;
1221
1222 retval = sock_create(family, type, protocol, &sock);
1223 if (retval < 0)
1224 goto out;
1225
1226 retval = sock_map_fd(sock);
1227 if (retval < 0)
1228 goto out_release;
1229
1230out:
1231 /* It may be already another descriptor 8) Not kernel problem. */
1232 return retval;
1233
1234out_release:
1235 sock_release(sock);
1236 return retval;
1237}
1238
1239/*
1240 * Create a pair of connected sockets.
1241 */
1242
89bddce5
SH
1243asmlinkage long sys_socketpair(int family, int type, int protocol,
1244 int __user *usockvec)
1da177e4
LT
1245{
1246 struct socket *sock1, *sock2;
1247 int fd1, fd2, err;
db349509 1248 struct file *newfile1, *newfile2;
1da177e4
LT
1249
1250 /*
1251 * Obtain the first socket and check if the underlying protocol
1252 * supports the socketpair call.
1253 */
1254
1255 err = sock_create(family, type, protocol, &sock1);
1256 if (err < 0)
1257 goto out;
1258
1259 err = sock_create(family, type, protocol, &sock2);
1260 if (err < 0)
1261 goto out_release_1;
1262
1263 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1264 if (err < 0)
1da177e4
LT
1265 goto out_release_both;
1266
db349509 1267 fd1 = sock_alloc_fd(&newfile1);
bf3c23d1
DM
1268 if (unlikely(fd1 < 0)) {
1269 err = fd1;
db349509 1270 goto out_release_both;
bf3c23d1 1271 }
1da177e4 1272
db349509
AV
1273 fd2 = sock_alloc_fd(&newfile2);
1274 if (unlikely(fd2 < 0)) {
bf3c23d1 1275 err = fd2;
db349509
AV
1276 put_filp(newfile1);
1277 put_unused_fd(fd1);
1da177e4 1278 goto out_release_both;
db349509 1279 }
1da177e4 1280
db349509
AV
1281 err = sock_attach_fd(sock1, newfile1);
1282 if (unlikely(err < 0)) {
1283 goto out_fd2;
1284 }
1285
1286 err = sock_attach_fd(sock2, newfile2);
1287 if (unlikely(err < 0)) {
1288 fput(newfile1);
1289 goto out_fd1;
1290 }
1291
1292 err = audit_fd_pair(fd1, fd2);
1293 if (err < 0) {
1294 fput(newfile1);
1295 fput(newfile2);
1296 goto out_fd;
1297 }
1da177e4 1298
db349509
AV
1299 fd_install(fd1, newfile1);
1300 fd_install(fd2, newfile2);
1da177e4
LT
1301 /* fd1 and fd2 may be already another descriptors.
1302 * Not kernel problem.
1303 */
1304
89bddce5 1305 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1306 if (!err)
1307 err = put_user(fd2, &usockvec[1]);
1308 if (!err)
1309 return 0;
1310
1311 sys_close(fd2);
1312 sys_close(fd1);
1313 return err;
1314
1da177e4 1315out_release_both:
89bddce5 1316 sock_release(sock2);
1da177e4 1317out_release_1:
89bddce5 1318 sock_release(sock1);
1da177e4
LT
1319out:
1320 return err;
db349509
AV
1321
1322out_fd2:
1323 put_filp(newfile1);
1324 sock_release(sock1);
1325out_fd1:
1326 put_filp(newfile2);
1327 sock_release(sock2);
1328out_fd:
1329 put_unused_fd(fd1);
1330 put_unused_fd(fd2);
1331 goto out;
1da177e4
LT
1332}
1333
1da177e4
LT
1334/*
1335 * Bind a name to a socket. Nothing much to do here since it's
1336 * the protocol's responsibility to handle the local address.
1337 *
1338 * We move the socket address to kernel space before we call
1339 * the protocol layer (having also checked the address is ok).
1340 */
1341
1342asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1343{
1344 struct socket *sock;
230b1839 1345 struct sockaddr_storage address;
6cb153ca 1346 int err, fput_needed;
1da177e4 1347
89bddce5 1348 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1349 if (sock) {
230b1839 1350 err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address);
89bddce5
SH
1351 if (err >= 0) {
1352 err = security_socket_bind(sock,
230b1839 1353 (struct sockaddr *)&address,
89bddce5 1354 addrlen);
6cb153ca
BL
1355 if (!err)
1356 err = sock->ops->bind(sock,
89bddce5 1357 (struct sockaddr *)
230b1839 1358 &address, addrlen);
1da177e4 1359 }
6cb153ca 1360 fput_light(sock->file, fput_needed);
89bddce5 1361 }
1da177e4
LT
1362 return err;
1363}
1364
1da177e4
LT
1365/*
1366 * Perform a listen. Basically, we allow the protocol to do anything
1367 * necessary for a listen, and if that works, we mark the socket as
1368 * ready for listening.
1369 */
1370
1da177e4
LT
1371asmlinkage long sys_listen(int fd, int backlog)
1372{
1373 struct socket *sock;
6cb153ca 1374 int err, fput_needed;
b8e1f9b5 1375 int somaxconn;
89bddce5
SH
1376
1377 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1378 if (sock) {
8efa6e93 1379 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
b8e1f9b5
PE
1380 if ((unsigned)backlog > somaxconn)
1381 backlog = somaxconn;
1da177e4
LT
1382
1383 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1384 if (!err)
1385 err = sock->ops->listen(sock, backlog);
1da177e4 1386
6cb153ca 1387 fput_light(sock->file, fput_needed);
1da177e4
LT
1388 }
1389 return err;
1390}
1391
1da177e4
LT
1392/*
1393 * For accept, we attempt to create a new socket, set up the link
1394 * with the client, wake up the client, then return the new
1395 * connected fd. We collect the address of the connector in kernel
1396 * space and move it to user at the very end. This is unclean because
1397 * we open the socket then return an error.
1398 *
1399 * 1003.1g adds the ability to recvmsg() to query connection pending
1400 * status to recvmsg. We need to add that support in a way thats
1401 * clean when we restucture accept also.
1402 */
1403
89bddce5
SH
1404asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1405 int __user *upeer_addrlen)
1da177e4
LT
1406{
1407 struct socket *sock, *newsock;
39d8c1b6 1408 struct file *newfile;
6cb153ca 1409 int err, len, newfd, fput_needed;
230b1839 1410 struct sockaddr_storage address;
1da177e4 1411
6cb153ca 1412 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1413 if (!sock)
1414 goto out;
1415
1416 err = -ENFILE;
89bddce5 1417 if (!(newsock = sock_alloc()))
1da177e4
LT
1418 goto out_put;
1419
1420 newsock->type = sock->type;
1421 newsock->ops = sock->ops;
1422
1da177e4
LT
1423 /*
1424 * We don't need try_module_get here, as the listening socket (sock)
1425 * has the protocol module (sock->ops->owner) held.
1426 */
1427 __module_get(newsock->ops->owner);
1428
39d8c1b6
DM
1429 newfd = sock_alloc_fd(&newfile);
1430 if (unlikely(newfd < 0)) {
1431 err = newfd;
9a1875e6
DM
1432 sock_release(newsock);
1433 goto out_put;
39d8c1b6
DM
1434 }
1435
1436 err = sock_attach_fd(newsock, newfile);
1437 if (err < 0)
79f4f642 1438 goto out_fd_simple;
39d8c1b6 1439
a79af59e
FF
1440 err = security_socket_accept(sock, newsock);
1441 if (err)
39d8c1b6 1442 goto out_fd;
a79af59e 1443
1da177e4
LT
1444 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1445 if (err < 0)
39d8c1b6 1446 goto out_fd;
1da177e4
LT
1447
1448 if (upeer_sockaddr) {
230b1839 1449 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1450 &len, 2) < 0) {
1da177e4 1451 err = -ECONNABORTED;
39d8c1b6 1452 goto out_fd;
1da177e4 1453 }
230b1839
YH
1454 err = move_addr_to_user((struct sockaddr *)&address,
1455 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1456 if (err < 0)
39d8c1b6 1457 goto out_fd;
1da177e4
LT
1458 }
1459
1460 /* File flags are not inherited via accept() unlike another OSes. */
1461
39d8c1b6
DM
1462 fd_install(newfd, newfile);
1463 err = newfd;
1da177e4
LT
1464
1465 security_socket_post_accept(sock, newsock);
1466
1467out_put:
6cb153ca 1468 fput_light(sock->file, fput_needed);
1da177e4
LT
1469out:
1470 return err;
79f4f642
AD
1471out_fd_simple:
1472 sock_release(newsock);
1473 put_filp(newfile);
1474 put_unused_fd(newfd);
1475 goto out_put;
39d8c1b6 1476out_fd:
9606a216 1477 fput(newfile);
39d8c1b6 1478 put_unused_fd(newfd);
1da177e4
LT
1479 goto out_put;
1480}
1481
1da177e4
LT
1482/*
1483 * Attempt to connect to a socket with the server address. The address
1484 * is in user space so we verify it is OK and move it to kernel space.
1485 *
1486 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1487 * break bindings
1488 *
1489 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1490 * other SEQPACKET protocols that take time to connect() as it doesn't
1491 * include the -EINPROGRESS status for such sockets.
1492 */
1493
89bddce5
SH
1494asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
1495 int addrlen)
1da177e4
LT
1496{
1497 struct socket *sock;
230b1839 1498 struct sockaddr_storage address;
6cb153ca 1499 int err, fput_needed;
1da177e4 1500
6cb153ca 1501 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1502 if (!sock)
1503 goto out;
230b1839 1504 err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address);
1da177e4
LT
1505 if (err < 0)
1506 goto out_put;
1507
89bddce5 1508 err =
230b1839 1509 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1510 if (err)
1511 goto out_put;
1512
230b1839 1513 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1514 sock->file->f_flags);
1515out_put:
6cb153ca 1516 fput_light(sock->file, fput_needed);
1da177e4
LT
1517out:
1518 return err;
1519}
1520
1521/*
1522 * Get the local address ('name') of a socket object. Move the obtained
1523 * name to user space.
1524 */
1525
89bddce5
SH
1526asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1527 int __user *usockaddr_len)
1da177e4
LT
1528{
1529 struct socket *sock;
230b1839 1530 struct sockaddr_storage address;
6cb153ca 1531 int len, err, fput_needed;
89bddce5 1532
6cb153ca 1533 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1534 if (!sock)
1535 goto out;
1536
1537 err = security_socket_getsockname(sock);
1538 if (err)
1539 goto out_put;
1540
230b1839 1541 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1542 if (err)
1543 goto out_put;
230b1839 1544 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1545
1546out_put:
6cb153ca 1547 fput_light(sock->file, fput_needed);
1da177e4
LT
1548out:
1549 return err;
1550}
1551
1552/*
1553 * Get the remote address ('name') of a socket object. Move the obtained
1554 * name to user space.
1555 */
1556
89bddce5
SH
1557asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1558 int __user *usockaddr_len)
1da177e4
LT
1559{
1560 struct socket *sock;
230b1839 1561 struct sockaddr_storage address;
6cb153ca 1562 int len, err, fput_needed;
1da177e4 1563
89bddce5
SH
1564 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1565 if (sock != NULL) {
1da177e4
LT
1566 err = security_socket_getpeername(sock);
1567 if (err) {
6cb153ca 1568 fput_light(sock->file, fput_needed);
1da177e4
LT
1569 return err;
1570 }
1571
89bddce5 1572 err =
230b1839 1573 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1574 1);
1da177e4 1575 if (!err)
230b1839 1576 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr,
89bddce5 1577 usockaddr_len);
6cb153ca 1578 fput_light(sock->file, fput_needed);
1da177e4
LT
1579 }
1580 return err;
1581}
1582
1583/*
1584 * Send a datagram to a given address. We move the address into kernel
1585 * space and check the user space data area is readable before invoking
1586 * the protocol.
1587 */
1588
89bddce5
SH
1589asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1590 unsigned flags, struct sockaddr __user *addr,
1591 int addr_len)
1da177e4
LT
1592{
1593 struct socket *sock;
230b1839 1594 struct sockaddr_storage address;
1da177e4
LT
1595 int err;
1596 struct msghdr msg;
1597 struct iovec iov;
6cb153ca 1598 int fput_needed;
6cb153ca 1599
de0fa95c
PE
1600 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1601 if (!sock)
4387ff75 1602 goto out;
6cb153ca 1603
89bddce5
SH
1604 iov.iov_base = buff;
1605 iov.iov_len = len;
1606 msg.msg_name = NULL;
1607 msg.msg_iov = &iov;
1608 msg.msg_iovlen = 1;
1609 msg.msg_control = NULL;
1610 msg.msg_controllen = 0;
1611 msg.msg_namelen = 0;
6cb153ca 1612 if (addr) {
230b1839 1613 err = move_addr_to_kernel(addr, addr_len, (struct sockaddr *)&address);
1da177e4
LT
1614 if (err < 0)
1615 goto out_put;
230b1839 1616 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1617 msg.msg_namelen = addr_len;
1da177e4
LT
1618 }
1619 if (sock->file->f_flags & O_NONBLOCK)
1620 flags |= MSG_DONTWAIT;
1621 msg.msg_flags = flags;
1622 err = sock_sendmsg(sock, &msg, len);
1623
89bddce5 1624out_put:
de0fa95c 1625 fput_light(sock->file, fput_needed);
4387ff75 1626out:
1da177e4
LT
1627 return err;
1628}
1629
1630/*
89bddce5 1631 * Send a datagram down a socket.
1da177e4
LT
1632 */
1633
89bddce5 1634asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
1da177e4
LT
1635{
1636 return sys_sendto(fd, buff, len, flags, NULL, 0);
1637}
1638
1639/*
89bddce5 1640 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1641 * sender. We verify the buffers are writable and if needed move the
1642 * sender address from kernel to user space.
1643 */
1644
89bddce5
SH
1645asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1646 unsigned flags, struct sockaddr __user *addr,
1647 int __user *addr_len)
1da177e4
LT
1648{
1649 struct socket *sock;
1650 struct iovec iov;
1651 struct msghdr msg;
230b1839 1652 struct sockaddr_storage address;
89bddce5 1653 int err, err2;
6cb153ca
BL
1654 int fput_needed;
1655
de0fa95c 1656 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1657 if (!sock)
de0fa95c 1658 goto out;
1da177e4 1659
89bddce5
SH
1660 msg.msg_control = NULL;
1661 msg.msg_controllen = 0;
1662 msg.msg_iovlen = 1;
1663 msg.msg_iov = &iov;
1664 iov.iov_len = size;
1665 iov.iov_base = ubuf;
230b1839
YH
1666 msg.msg_name = (struct sockaddr *)&address;
1667 msg.msg_namelen = sizeof(address);
1da177e4
LT
1668 if (sock->file->f_flags & O_NONBLOCK)
1669 flags |= MSG_DONTWAIT;
89bddce5 1670 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1671
89bddce5 1672 if (err >= 0 && addr != NULL) {
230b1839
YH
1673 err2 = move_addr_to_user((struct sockaddr *)&address,
1674 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1675 if (err2 < 0)
1676 err = err2;
1da177e4 1677 }
de0fa95c
PE
1678
1679 fput_light(sock->file, fput_needed);
4387ff75 1680out:
1da177e4
LT
1681 return err;
1682}
1683
1684/*
89bddce5 1685 * Receive a datagram from a socket.
1da177e4
LT
1686 */
1687
89bddce5
SH
1688asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1689 unsigned flags)
1da177e4
LT
1690{
1691 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1692}
1693
1694/*
1695 * Set a socket option. Because we don't know the option lengths we have
1696 * to pass the user mode parameter for the protocols to sort out.
1697 */
1698
89bddce5
SH
1699asmlinkage long sys_setsockopt(int fd, int level, int optname,
1700 char __user *optval, int optlen)
1da177e4 1701{
6cb153ca 1702 int err, fput_needed;
1da177e4
LT
1703 struct socket *sock;
1704
1705 if (optlen < 0)
1706 return -EINVAL;
89bddce5
SH
1707
1708 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1709 if (sock != NULL) {
1710 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1711 if (err)
1712 goto out_put;
1da177e4
LT
1713
1714 if (level == SOL_SOCKET)
89bddce5
SH
1715 err =
1716 sock_setsockopt(sock, level, optname, optval,
1717 optlen);
1da177e4 1718 else
89bddce5
SH
1719 err =
1720 sock->ops->setsockopt(sock, level, optname, optval,
1721 optlen);
6cb153ca
BL
1722out_put:
1723 fput_light(sock->file, fput_needed);
1da177e4
LT
1724 }
1725 return err;
1726}
1727
1728/*
1729 * Get a socket option. Because we don't know the option lengths we have
1730 * to pass a user mode parameter for the protocols to sort out.
1731 */
1732
89bddce5
SH
1733asmlinkage long sys_getsockopt(int fd, int level, int optname,
1734 char __user *optval, int __user *optlen)
1da177e4 1735{
6cb153ca 1736 int err, fput_needed;
1da177e4
LT
1737 struct socket *sock;
1738
89bddce5
SH
1739 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1740 if (sock != NULL) {
6cb153ca
BL
1741 err = security_socket_getsockopt(sock, level, optname);
1742 if (err)
1743 goto out_put;
1da177e4
LT
1744
1745 if (level == SOL_SOCKET)
89bddce5
SH
1746 err =
1747 sock_getsockopt(sock, level, optname, optval,
1748 optlen);
1da177e4 1749 else
89bddce5
SH
1750 err =
1751 sock->ops->getsockopt(sock, level, optname, optval,
1752 optlen);
6cb153ca
BL
1753out_put:
1754 fput_light(sock->file, fput_needed);
1da177e4
LT
1755 }
1756 return err;
1757}
1758
1da177e4
LT
1759/*
1760 * Shutdown a socket.
1761 */
1762
1763asmlinkage long sys_shutdown(int fd, int how)
1764{
6cb153ca 1765 int err, fput_needed;
1da177e4
LT
1766 struct socket *sock;
1767
89bddce5
SH
1768 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1769 if (sock != NULL) {
1da177e4 1770 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1771 if (!err)
1772 err = sock->ops->shutdown(sock, how);
1773 fput_light(sock->file, fput_needed);
1da177e4
LT
1774 }
1775 return err;
1776}
1777
89bddce5 1778/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1779 * fields which are the same type (int / unsigned) on our platforms.
1780 */
1781#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1782#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1783#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1784
1da177e4
LT
1785/*
1786 * BSD sendmsg interface
1787 */
1788
1789asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1790{
89bddce5
SH
1791 struct compat_msghdr __user *msg_compat =
1792 (struct compat_msghdr __user *)msg;
1da177e4 1793 struct socket *sock;
230b1839 1794 struct sockaddr_storage address;
1da177e4 1795 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1796 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1797 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1798 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1799 unsigned char *ctl_buf = ctl;
1800 struct msghdr msg_sys;
1801 int err, ctl_len, iov_size, total_len;
6cb153ca 1802 int fput_needed;
89bddce5 1803
1da177e4
LT
1804 err = -EFAULT;
1805 if (MSG_CMSG_COMPAT & flags) {
1806 if (get_compat_msghdr(&msg_sys, msg_compat))
1807 return -EFAULT;
89bddce5
SH
1808 }
1809 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1810 return -EFAULT;
1811
6cb153ca 1812 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1813 if (!sock)
1da177e4
LT
1814 goto out;
1815
1816 /* do not move before msg_sys is valid */
1817 err = -EMSGSIZE;
1818 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1819 goto out_put;
1820
89bddce5 1821 /* Check whether to allocate the iovec area */
1da177e4
LT
1822 err = -ENOMEM;
1823 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1824 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1825 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1826 if (!iov)
1827 goto out_put;
1828 }
1829
1830 /* This will also move the address data into kernel space */
1831 if (MSG_CMSG_COMPAT & flags) {
230b1839
YH
1832 err = verify_compat_iovec(&msg_sys, iov,
1833 (struct sockaddr *)&address,
1834 VERIFY_READ);
1da177e4 1835 } else
230b1839
YH
1836 err = verify_iovec(&msg_sys, iov,
1837 (struct sockaddr *)&address,
1838 VERIFY_READ);
89bddce5 1839 if (err < 0)
1da177e4
LT
1840 goto out_freeiov;
1841 total_len = err;
1842
1843 err = -ENOBUFS;
1844
1845 if (msg_sys.msg_controllen > INT_MAX)
1846 goto out_freeiov;
89bddce5 1847 ctl_len = msg_sys.msg_controllen;
1da177e4 1848 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1849 err =
1850 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1851 sizeof(ctl));
1da177e4
LT
1852 if (err)
1853 goto out_freeiov;
1854 ctl_buf = msg_sys.msg_control;
8920e8f9 1855 ctl_len = msg_sys.msg_controllen;
1da177e4 1856 } else if (ctl_len) {
89bddce5 1857 if (ctl_len > sizeof(ctl)) {
1da177e4 1858 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1859 if (ctl_buf == NULL)
1da177e4
LT
1860 goto out_freeiov;
1861 }
1862 err = -EFAULT;
1863 /*
1864 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1865 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1866 * checking falls down on this.
1867 */
89bddce5
SH
1868 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1869 ctl_len))
1da177e4
LT
1870 goto out_freectl;
1871 msg_sys.msg_control = ctl_buf;
1872 }
1873 msg_sys.msg_flags = flags;
1874
1875 if (sock->file->f_flags & O_NONBLOCK)
1876 msg_sys.msg_flags |= MSG_DONTWAIT;
1877 err = sock_sendmsg(sock, &msg_sys, total_len);
1878
1879out_freectl:
89bddce5 1880 if (ctl_buf != ctl)
1da177e4
LT
1881 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1882out_freeiov:
1883 if (iov != iovstack)
1884 sock_kfree_s(sock->sk, iov, iov_size);
1885out_put:
6cb153ca 1886 fput_light(sock->file, fput_needed);
89bddce5 1887out:
1da177e4
LT
1888 return err;
1889}
1890
1891/*
1892 * BSD recvmsg interface
1893 */
1894
89bddce5
SH
1895asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
1896 unsigned int flags)
1da177e4 1897{
89bddce5
SH
1898 struct compat_msghdr __user *msg_compat =
1899 (struct compat_msghdr __user *)msg;
1da177e4
LT
1900 struct socket *sock;
1901 struct iovec iovstack[UIO_FASTIOV];
89bddce5 1902 struct iovec *iov = iovstack;
1da177e4
LT
1903 struct msghdr msg_sys;
1904 unsigned long cmsg_ptr;
1905 int err, iov_size, total_len, len;
6cb153ca 1906 int fput_needed;
1da177e4
LT
1907
1908 /* kernel mode address */
230b1839 1909 struct sockaddr_storage addr;
1da177e4
LT
1910
1911 /* user mode address pointers */
1912 struct sockaddr __user *uaddr;
1913 int __user *uaddr_len;
89bddce5 1914
1da177e4
LT
1915 if (MSG_CMSG_COMPAT & flags) {
1916 if (get_compat_msghdr(&msg_sys, msg_compat))
1917 return -EFAULT;
89bddce5
SH
1918 }
1919 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1920 return -EFAULT;
1da177e4 1921
6cb153ca 1922 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1923 if (!sock)
1924 goto out;
1925
1926 err = -EMSGSIZE;
1927 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1928 goto out_put;
89bddce5
SH
1929
1930 /* Check whether to allocate the iovec area */
1da177e4
LT
1931 err = -ENOMEM;
1932 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1933 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1934 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1935 if (!iov)
1936 goto out_put;
1937 }
1938
1939 /*
89bddce5
SH
1940 * Save the user-mode address (verify_iovec will change the
1941 * kernel msghdr to use the kernel address space)
1da177e4 1942 */
89bddce5 1943
cfcabdcc 1944 uaddr = (__force void __user *)msg_sys.msg_name;
1da177e4
LT
1945 uaddr_len = COMPAT_NAMELEN(msg);
1946 if (MSG_CMSG_COMPAT & flags) {
230b1839
YH
1947 err = verify_compat_iovec(&msg_sys, iov,
1948 (struct sockaddr *)&addr,
1949 VERIFY_WRITE);
1da177e4 1950 } else
230b1839
YH
1951 err = verify_iovec(&msg_sys, iov,
1952 (struct sockaddr *)&addr,
1953 VERIFY_WRITE);
1da177e4
LT
1954 if (err < 0)
1955 goto out_freeiov;
89bddce5 1956 total_len = err;
1da177e4
LT
1957
1958 cmsg_ptr = (unsigned long)msg_sys.msg_control;
4a19542e 1959 msg_sys.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 1960
1da177e4
LT
1961 if (sock->file->f_flags & O_NONBLOCK)
1962 flags |= MSG_DONTWAIT;
1963 err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1964 if (err < 0)
1965 goto out_freeiov;
1966 len = err;
1967
1968 if (uaddr != NULL) {
230b1839
YH
1969 err = move_addr_to_user((struct sockaddr *)&addr,
1970 msg_sys.msg_namelen, uaddr,
89bddce5 1971 uaddr_len);
1da177e4
LT
1972 if (err < 0)
1973 goto out_freeiov;
1974 }
37f7f421
DM
1975 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
1976 COMPAT_FLAGS(msg));
1da177e4
LT
1977 if (err)
1978 goto out_freeiov;
1979 if (MSG_CMSG_COMPAT & flags)
89bddce5 1980 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1981 &msg_compat->msg_controllen);
1982 else
89bddce5 1983 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1984 &msg->msg_controllen);
1985 if (err)
1986 goto out_freeiov;
1987 err = len;
1988
1989out_freeiov:
1990 if (iov != iovstack)
1991 sock_kfree_s(sock->sk, iov, iov_size);
1992out_put:
6cb153ca 1993 fput_light(sock->file, fput_needed);
1da177e4
LT
1994out:
1995 return err;
1996}
1997
1998#ifdef __ARCH_WANT_SYS_SOCKETCALL
1999
2000/* Argument list sizes for sys_socketcall */
2001#define AL(x) ((x) * sizeof(unsigned long))
89bddce5
SH
2002static const unsigned char nargs[18]={
2003 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
2004 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
2005 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
2006};
2007
1da177e4
LT
2008#undef AL
2009
2010/*
89bddce5 2011 * System call vectors.
1da177e4
LT
2012 *
2013 * Argument checking cleaned up. Saved 20% in size.
2014 * This function doesn't need to set the kernel lock because
89bddce5 2015 * it is set by the callees.
1da177e4
LT
2016 */
2017
2018asmlinkage long sys_socketcall(int call, unsigned long __user *args)
2019{
2020 unsigned long a[6];
89bddce5 2021 unsigned long a0, a1;
1da177e4
LT
2022 int err;
2023
89bddce5 2024 if (call < 1 || call > SYS_RECVMSG)
1da177e4
LT
2025 return -EINVAL;
2026
2027 /* copy_from_user should be SMP safe. */
2028 if (copy_from_user(a, args, nargs[call]))
2029 return -EFAULT;
3ec3b2fb 2030
89bddce5 2031 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb
DW
2032 if (err)
2033 return err;
2034
89bddce5
SH
2035 a0 = a[0];
2036 a1 = a[1];
2037
2038 switch (call) {
2039 case SYS_SOCKET:
2040 err = sys_socket(a0, a1, a[2]);
2041 break;
2042 case SYS_BIND:
2043 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2044 break;
2045 case SYS_CONNECT:
2046 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2047 break;
2048 case SYS_LISTEN:
2049 err = sys_listen(a0, a1);
2050 break;
2051 case SYS_ACCEPT:
2052 err =
2053 sys_accept(a0, (struct sockaddr __user *)a1,
2054 (int __user *)a[2]);
2055 break;
2056 case SYS_GETSOCKNAME:
2057 err =
2058 sys_getsockname(a0, (struct sockaddr __user *)a1,
2059 (int __user *)a[2]);
2060 break;
2061 case SYS_GETPEERNAME:
2062 err =
2063 sys_getpeername(a0, (struct sockaddr __user *)a1,
2064 (int __user *)a[2]);
2065 break;
2066 case SYS_SOCKETPAIR:
2067 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2068 break;
2069 case SYS_SEND:
2070 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2071 break;
2072 case SYS_SENDTO:
2073 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2074 (struct sockaddr __user *)a[4], a[5]);
2075 break;
2076 case SYS_RECV:
2077 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2078 break;
2079 case SYS_RECVFROM:
2080 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2081 (struct sockaddr __user *)a[4],
2082 (int __user *)a[5]);
2083 break;
2084 case SYS_SHUTDOWN:
2085 err = sys_shutdown(a0, a1);
2086 break;
2087 case SYS_SETSOCKOPT:
2088 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2089 break;
2090 case SYS_GETSOCKOPT:
2091 err =
2092 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2093 (int __user *)a[4]);
2094 break;
2095 case SYS_SENDMSG:
2096 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2097 break;
2098 case SYS_RECVMSG:
2099 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2100 break;
2101 default:
2102 err = -EINVAL;
2103 break;
1da177e4
LT
2104 }
2105 return err;
2106}
2107
89bddce5 2108#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2109
55737fda
SH
2110/**
2111 * sock_register - add a socket protocol handler
2112 * @ops: description of protocol
2113 *
1da177e4
LT
2114 * This function is called by a protocol handler that wants to
2115 * advertise its address family, and have it linked into the
55737fda
SH
2116 * socket interface. The value ops->family coresponds to the
2117 * socket system call protocol family.
1da177e4 2118 */
f0fd27d4 2119int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2120{
2121 int err;
2122
2123 if (ops->family >= NPROTO) {
89bddce5
SH
2124 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2125 NPROTO);
1da177e4
LT
2126 return -ENOBUFS;
2127 }
55737fda
SH
2128
2129 spin_lock(&net_family_lock);
2130 if (net_families[ops->family])
2131 err = -EEXIST;
2132 else {
89bddce5 2133 net_families[ops->family] = ops;
1da177e4
LT
2134 err = 0;
2135 }
55737fda
SH
2136 spin_unlock(&net_family_lock);
2137
89bddce5 2138 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2139 return err;
2140}
2141
55737fda
SH
2142/**
2143 * sock_unregister - remove a protocol handler
2144 * @family: protocol family to remove
2145 *
1da177e4
LT
2146 * This function is called by a protocol handler that wants to
2147 * remove its address family, and have it unlinked from the
55737fda
SH
2148 * new socket creation.
2149 *
2150 * If protocol handler is a module, then it can use module reference
2151 * counts to protect against new references. If protocol handler is not
2152 * a module then it needs to provide its own protection in
2153 * the ops->create routine.
1da177e4 2154 */
f0fd27d4 2155void sock_unregister(int family)
1da177e4 2156{
f0fd27d4 2157 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2158
55737fda 2159 spin_lock(&net_family_lock);
89bddce5 2160 net_families[family] = NULL;
55737fda
SH
2161 spin_unlock(&net_family_lock);
2162
2163 synchronize_rcu();
2164
89bddce5 2165 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2166}
2167
77d76ea3 2168static int __init sock_init(void)
1da177e4
LT
2169{
2170 /*
89bddce5 2171 * Initialize sock SLAB cache.
1da177e4 2172 */
89bddce5 2173
1da177e4
LT
2174 sk_init();
2175
1da177e4 2176 /*
89bddce5 2177 * Initialize skbuff SLAB cache
1da177e4
LT
2178 */
2179 skb_init();
1da177e4
LT
2180
2181 /*
89bddce5 2182 * Initialize the protocols module.
1da177e4
LT
2183 */
2184
2185 init_inodecache();
2186 register_filesystem(&sock_fs_type);
2187 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2188
2189 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2190 */
2191
2192#ifdef CONFIG_NETFILTER
2193 netfilter_init();
2194#endif
cbeb321a
DM
2195
2196 return 0;
1da177e4
LT
2197}
2198
77d76ea3
AK
2199core_initcall(sock_init); /* early initcall */
2200
1da177e4
LT
2201#ifdef CONFIG_PROC_FS
2202void socket_seq_show(struct seq_file *seq)
2203{
2204 int cpu;
2205 int counter = 0;
2206
6f912042 2207 for_each_possible_cpu(cpu)
89bddce5 2208 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2209
2210 /* It can be negative, by the way. 8) */
2211 if (counter < 0)
2212 counter = 0;
2213
2214 seq_printf(seq, "sockets: used %d\n", counter);
2215}
89bddce5 2216#endif /* CONFIG_PROC_FS */
1da177e4 2217
89bbfc95
SP
2218#ifdef CONFIG_COMPAT
2219static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 2220 unsigned long arg)
89bbfc95
SP
2221{
2222 struct socket *sock = file->private_data;
2223 int ret = -ENOIOCTLCMD;
87de87d5
DM
2224 struct sock *sk;
2225 struct net *net;
2226
2227 sk = sock->sk;
2228 net = sock_net(sk);
89bbfc95
SP
2229
2230 if (sock->ops->compat_ioctl)
2231 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2232
87de87d5
DM
2233 if (ret == -ENOIOCTLCMD &&
2234 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
2235 ret = compat_wext_handle_ioctl(net, cmd, arg);
2236
89bbfc95
SP
2237 return ret;
2238}
2239#endif
2240
ac5a488e
SS
2241int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2242{
2243 return sock->ops->bind(sock, addr, addrlen);
2244}
2245
2246int kernel_listen(struct socket *sock, int backlog)
2247{
2248 return sock->ops->listen(sock, backlog);
2249}
2250
2251int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2252{
2253 struct sock *sk = sock->sk;
2254 int err;
2255
2256 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2257 newsock);
2258 if (err < 0)
2259 goto done;
2260
2261 err = sock->ops->accept(sock, *newsock, flags);
2262 if (err < 0) {
2263 sock_release(*newsock);
fa8705b0 2264 *newsock = NULL;
ac5a488e
SS
2265 goto done;
2266 }
2267
2268 (*newsock)->ops = sock->ops;
2269
2270done:
2271 return err;
2272}
2273
2274int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 2275 int flags)
ac5a488e
SS
2276{
2277 return sock->ops->connect(sock, addr, addrlen, flags);
2278}
2279
2280int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2281 int *addrlen)
2282{
2283 return sock->ops->getname(sock, addr, addrlen, 0);
2284}
2285
2286int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2287 int *addrlen)
2288{
2289 return sock->ops->getname(sock, addr, addrlen, 1);
2290}
2291
2292int kernel_getsockopt(struct socket *sock, int level, int optname,
2293 char *optval, int *optlen)
2294{
2295 mm_segment_t oldfs = get_fs();
2296 int err;
2297
2298 set_fs(KERNEL_DS);
2299 if (level == SOL_SOCKET)
2300 err = sock_getsockopt(sock, level, optname, optval, optlen);
2301 else
2302 err = sock->ops->getsockopt(sock, level, optname, optval,
2303 optlen);
2304 set_fs(oldfs);
2305 return err;
2306}
2307
2308int kernel_setsockopt(struct socket *sock, int level, int optname,
2309 char *optval, int optlen)
2310{
2311 mm_segment_t oldfs = get_fs();
2312 int err;
2313
2314 set_fs(KERNEL_DS);
2315 if (level == SOL_SOCKET)
2316 err = sock_setsockopt(sock, level, optname, optval, optlen);
2317 else
2318 err = sock->ops->setsockopt(sock, level, optname, optval,
2319 optlen);
2320 set_fs(oldfs);
2321 return err;
2322}
2323
2324int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2325 size_t size, int flags)
2326{
2327 if (sock->ops->sendpage)
2328 return sock->ops->sendpage(sock, page, offset, size, flags);
2329
2330 return sock_no_sendpage(sock, page, offset, size, flags);
2331}
2332
2333int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2334{
2335 mm_segment_t oldfs = get_fs();
2336 int err;
2337
2338 set_fs(KERNEL_DS);
2339 err = sock->ops->ioctl(sock, cmd, arg);
2340 set_fs(oldfs);
2341
2342 return err;
2343}
2344
91cf45f0
TM
2345int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
2346{
2347 return sock->ops->shutdown(sock, how);
2348}
2349
1da177e4
LT
2350EXPORT_SYMBOL(sock_create);
2351EXPORT_SYMBOL(sock_create_kern);
2352EXPORT_SYMBOL(sock_create_lite);
2353EXPORT_SYMBOL(sock_map_fd);
2354EXPORT_SYMBOL(sock_recvmsg);
2355EXPORT_SYMBOL(sock_register);
2356EXPORT_SYMBOL(sock_release);
2357EXPORT_SYMBOL(sock_sendmsg);
2358EXPORT_SYMBOL(sock_unregister);
2359EXPORT_SYMBOL(sock_wake_async);
2360EXPORT_SYMBOL(sockfd_lookup);
2361EXPORT_SYMBOL(kernel_sendmsg);
2362EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
2363EXPORT_SYMBOL(kernel_bind);
2364EXPORT_SYMBOL(kernel_listen);
2365EXPORT_SYMBOL(kernel_accept);
2366EXPORT_SYMBOL(kernel_connect);
2367EXPORT_SYMBOL(kernel_getsockname);
2368EXPORT_SYMBOL(kernel_getpeername);
2369EXPORT_SYMBOL(kernel_getsockopt);
2370EXPORT_SYMBOL(kernel_setsockopt);
2371EXPORT_SYMBOL(kernel_sendpage);
2372EXPORT_SYMBOL(kernel_sock_ioctl);
91cf45f0 2373EXPORT_SYMBOL(kernel_sock_shutdown);