Merge branch 'fixes-davem' of master.kernel.org:/pub/scm/linux/kernel/git/linville...
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
55737fda 66#include <linux/rcupdate.h>
1da177e4
LT
67#include <linux/netdevice.h>
68#include <linux/proc_fs.h>
69#include <linux/seq_file.h>
4a3e2f71 70#include <linux/mutex.h>
1da177e4
LT
71#include <linux/wanrouter.h>
72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
1da177e4
LT
75#include <linux/init.h>
76#include <linux/poll.h>
77#include <linux/cache.h>
78#include <linux/module.h>
79#include <linux/highmem.h>
1da177e4
LT
80#include <linux/mount.h>
81#include <linux/security.h>
82#include <linux/syscalls.h>
83#include <linux/compat.h>
84#include <linux/kmod.h>
3ec3b2fb 85#include <linux/audit.h>
d86b5e0e 86#include <linux/wireless.h>
1da177e4
LT
87
88#include <asm/uaccess.h>
89#include <asm/unistd.h>
90
91#include <net/compat.h>
92
93#include <net/sock.h>
94#include <linux/netfilter.h>
95
96static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
97static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
98 unsigned long nr_segs, loff_t pos);
99static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
100 unsigned long nr_segs, loff_t pos);
89bddce5 101static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
102
103static int sock_close(struct inode *inode, struct file *file);
104static unsigned int sock_poll(struct file *file,
105 struct poll_table_struct *wait);
89bddce5 106static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
107#ifdef CONFIG_COMPAT
108static long compat_sock_ioctl(struct file *file,
89bddce5 109 unsigned int cmd, unsigned long arg);
89bbfc95 110#endif
1da177e4 111static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
112static ssize_t sock_sendpage(struct file *file, struct page *page,
113 int offset, size_t size, loff_t *ppos, int more);
114
1da177e4
LT
115/*
116 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
117 * in the operation structures but are done directly via the socketcall() multiplexor.
118 */
119
da7071d7 120static const struct file_operations socket_file_ops = {
1da177e4
LT
121 .owner = THIS_MODULE,
122 .llseek = no_llseek,
123 .aio_read = sock_aio_read,
124 .aio_write = sock_aio_write,
125 .poll = sock_poll,
126 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
127#ifdef CONFIG_COMPAT
128 .compat_ioctl = compat_sock_ioctl,
129#endif
1da177e4
LT
130 .mmap = sock_mmap,
131 .open = sock_no_open, /* special open code to disallow open via /proc */
132 .release = sock_close,
133 .fasync = sock_fasync,
5274f052
JA
134 .sendpage = sock_sendpage,
135 .splice_write = generic_splice_sendpage,
1da177e4
LT
136};
137
138/*
139 * The protocol list. Each protocol is registered in here.
140 */
141
1da177e4 142static DEFINE_SPINLOCK(net_family_lock);
f0fd27d4 143static const struct net_proto_family *net_families[NPROTO] __read_mostly;
1da177e4 144
1da177e4
LT
145/*
146 * Statistics counters of the socket lists
147 */
148
149static DEFINE_PER_CPU(int, sockets_in_use) = 0;
150
151/*
89bddce5
SH
152 * Support routines.
153 * Move socket addresses back and forth across the kernel/user
154 * divide and look after the messy bits.
1da177e4
LT
155 */
156
89bddce5 157#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
158 16 for IP, 16 for IPX,
159 24 for IPv6,
89bddce5 160 about 80 for AX.25
1da177e4
LT
161 must be at least one bigger than
162 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 163 :unix_mkname()).
1da177e4 164 */
89bddce5 165
1da177e4
LT
166/**
167 * move_addr_to_kernel - copy a socket address into kernel space
168 * @uaddr: Address in user space
169 * @kaddr: Address in kernel space
170 * @ulen: Length in user space
171 *
172 * The address is copied into kernel space. If the provided address is
173 * too long an error code of -EINVAL is returned. If the copy gives
174 * invalid addresses -EFAULT is returned. On a success 0 is returned.
175 */
176
177int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
178{
89bddce5 179 if (ulen < 0 || ulen > MAX_SOCK_ADDR)
1da177e4 180 return -EINVAL;
89bddce5 181 if (ulen == 0)
1da177e4 182 return 0;
89bddce5 183 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 184 return -EFAULT;
3ec3b2fb 185 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
186}
187
188/**
189 * move_addr_to_user - copy an address to user space
190 * @kaddr: kernel space address
191 * @klen: length of address in kernel
192 * @uaddr: user space address
193 * @ulen: pointer to user length field
194 *
195 * The value pointed to by ulen on entry is the buffer length available.
196 * This is overwritten with the buffer space used. -EINVAL is returned
197 * if an overlong buffer is specified or a negative buffer size. -EFAULT
198 * is returned if either the buffer or the length field are not
199 * accessible.
200 * After copying the data up to the limit the user specifies, the true
201 * length of the data is written over the length limit the user
202 * specified. Zero is returned for a success.
203 */
89bddce5
SH
204
205int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
206 int __user *ulen)
1da177e4
LT
207{
208 int err;
209 int len;
210
89bddce5
SH
211 err = get_user(len, ulen);
212 if (err)
1da177e4 213 return err;
89bddce5
SH
214 if (len > klen)
215 len = klen;
216 if (len < 0 || len > MAX_SOCK_ADDR)
1da177e4 217 return -EINVAL;
89bddce5 218 if (len) {
d6fe3945
SG
219 if (audit_sockaddr(klen, kaddr))
220 return -ENOMEM;
89bddce5 221 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
222 return -EFAULT;
223 }
224 /*
89bddce5
SH
225 * "fromlen shall refer to the value before truncation.."
226 * 1003.1g
1da177e4
LT
227 */
228 return __put_user(klen, ulen);
229}
230
231#define SOCKFS_MAGIC 0x534F434B
232
e18b890b 233static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
234
235static struct inode *sock_alloc_inode(struct super_block *sb)
236{
237 struct socket_alloc *ei;
89bddce5 238
e94b1766 239 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
240 if (!ei)
241 return NULL;
242 init_waitqueue_head(&ei->socket.wait);
89bddce5 243
1da177e4
LT
244 ei->socket.fasync_list = NULL;
245 ei->socket.state = SS_UNCONNECTED;
246 ei->socket.flags = 0;
247 ei->socket.ops = NULL;
248 ei->socket.sk = NULL;
249 ei->socket.file = NULL;
1da177e4
LT
250
251 return &ei->vfs_inode;
252}
253
254static void sock_destroy_inode(struct inode *inode)
255{
256 kmem_cache_free(sock_inode_cachep,
257 container_of(inode, struct socket_alloc, vfs_inode));
258}
259
e18b890b 260static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
1da177e4 261{
89bddce5 262 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 263
a35afb83 264 inode_init_once(&ei->vfs_inode);
1da177e4 265}
89bddce5 266
1da177e4
LT
267static int init_inodecache(void)
268{
269 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
270 sizeof(struct socket_alloc),
271 0,
272 (SLAB_HWCACHE_ALIGN |
273 SLAB_RECLAIM_ACCOUNT |
274 SLAB_MEM_SPREAD),
20c2df83 275 init_once);
1da177e4
LT
276 if (sock_inode_cachep == NULL)
277 return -ENOMEM;
278 return 0;
279}
280
281static struct super_operations sockfs_ops = {
282 .alloc_inode = sock_alloc_inode,
283 .destroy_inode =sock_destroy_inode,
284 .statfs = simple_statfs,
285};
286
454e2398 287static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
288 int flags, const char *dev_name, void *data,
289 struct vfsmount *mnt)
1da177e4 290{
454e2398
DH
291 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
292 mnt);
1da177e4
LT
293}
294
ba89966c 295static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
296
297static struct file_system_type sock_fs_type = {
298 .name = "sockfs",
299 .get_sb = sockfs_get_sb,
300 .kill_sb = kill_anon_super,
301};
89bddce5 302
1da177e4
LT
303static int sockfs_delete_dentry(struct dentry *dentry)
304{
304e61e6
ED
305 /*
306 * At creation time, we pretended this dentry was hashed
307 * (by clearing DCACHE_UNHASHED bit in d_flags)
308 * At delete time, we restore the truth : not hashed.
309 * (so that dput() can proceed correctly)
310 */
311 dentry->d_flags |= DCACHE_UNHASHED;
312 return 0;
1da177e4 313}
c23fbb6b
ED
314
315/*
316 * sockfs_dname() is called from d_path().
317 */
318static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
319{
320 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
321 dentry->d_inode->i_ino);
322}
323
1da177e4 324static struct dentry_operations sockfs_dentry_operations = {
89bddce5 325 .d_delete = sockfs_delete_dentry,
c23fbb6b 326 .d_dname = sockfs_dname,
1da177e4
LT
327};
328
329/*
330 * Obtains the first available file descriptor and sets it up for use.
331 *
39d8c1b6
DM
332 * These functions create file structures and maps them to fd space
333 * of the current process. On success it returns file descriptor
1da177e4
LT
334 * and file struct implicitly stored in sock->file.
335 * Note that another thread may close file descriptor before we return
336 * from this function. We use the fact that now we do not refer
337 * to socket after mapping. If one day we will need it, this
338 * function will increment ref. count on file by 1.
339 *
340 * In any case returned fd MAY BE not valid!
341 * This race condition is unavoidable
342 * with shared fd spaces, we cannot solve it inside kernel,
343 * but we take care of internal coherence yet.
344 */
345
39d8c1b6 346static int sock_alloc_fd(struct file **filep)
1da177e4
LT
347{
348 int fd;
1da177e4
LT
349
350 fd = get_unused_fd();
39d8c1b6 351 if (likely(fd >= 0)) {
1da177e4
LT
352 struct file *file = get_empty_filp();
353
39d8c1b6
DM
354 *filep = file;
355 if (unlikely(!file)) {
1da177e4 356 put_unused_fd(fd);
39d8c1b6 357 return -ENFILE;
1da177e4 358 }
39d8c1b6
DM
359 } else
360 *filep = NULL;
361 return fd;
362}
1da177e4 363
39d8c1b6
DM
364static int sock_attach_fd(struct socket *sock, struct file *file)
365{
c23fbb6b 366 struct qstr name = { .name = "" };
39d8c1b6 367
c23fbb6b 368 file->f_path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
3126a42c 369 if (unlikely(!file->f_path.dentry))
39d8c1b6
DM
370 return -ENOMEM;
371
3126a42c 372 file->f_path.dentry->d_op = &sockfs_dentry_operations;
304e61e6
ED
373 /*
374 * We dont want to push this dentry into global dentry hash table.
375 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
376 * This permits a working /proc/$pid/fd/XXX on sockets
377 */
3126a42c
JS
378 file->f_path.dentry->d_flags &= ~DCACHE_UNHASHED;
379 d_instantiate(file->f_path.dentry, SOCK_INODE(sock));
380 file->f_path.mnt = mntget(sock_mnt);
381 file->f_mapping = file->f_path.dentry->d_inode->i_mapping;
39d8c1b6
DM
382
383 sock->file = file;
384 file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
385 file->f_mode = FMODE_READ | FMODE_WRITE;
386 file->f_flags = O_RDWR;
387 file->f_pos = 0;
388 file->private_data = sock;
1da177e4 389
39d8c1b6
DM
390 return 0;
391}
392
393int sock_map_fd(struct socket *sock)
394{
395 struct file *newfile;
396 int fd = sock_alloc_fd(&newfile);
397
398 if (likely(fd >= 0)) {
399 int err = sock_attach_fd(sock, newfile);
400
401 if (unlikely(err < 0)) {
402 put_filp(newfile);
1da177e4 403 put_unused_fd(fd);
39d8c1b6 404 return err;
1da177e4 405 }
39d8c1b6 406 fd_install(fd, newfile);
1da177e4 407 }
1da177e4
LT
408 return fd;
409}
410
6cb153ca
BL
411static struct socket *sock_from_file(struct file *file, int *err)
412{
6cb153ca
BL
413 if (file->f_op == &socket_file_ops)
414 return file->private_data; /* set in sock_map_fd */
415
23bb80d2
ED
416 *err = -ENOTSOCK;
417 return NULL;
6cb153ca
BL
418}
419
1da177e4
LT
420/**
421 * sockfd_lookup - Go from a file number to its socket slot
422 * @fd: file handle
423 * @err: pointer to an error code return
424 *
425 * The file handle passed in is locked and the socket it is bound
426 * too is returned. If an error occurs the err pointer is overwritten
427 * with a negative errno code and NULL is returned. The function checks
428 * for both invalid handles and passing a handle which is not a socket.
429 *
430 * On a success the socket object pointer is returned.
431 */
432
433struct socket *sockfd_lookup(int fd, int *err)
434{
435 struct file *file;
1da177e4
LT
436 struct socket *sock;
437
89bddce5
SH
438 file = fget(fd);
439 if (!file) {
1da177e4
LT
440 *err = -EBADF;
441 return NULL;
442 }
89bddce5 443
6cb153ca
BL
444 sock = sock_from_file(file, err);
445 if (!sock)
1da177e4 446 fput(file);
6cb153ca
BL
447 return sock;
448}
1da177e4 449
6cb153ca
BL
450static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
451{
452 struct file *file;
453 struct socket *sock;
454
3672558c 455 *err = -EBADF;
6cb153ca
BL
456 file = fget_light(fd, fput_needed);
457 if (file) {
458 sock = sock_from_file(file, err);
459 if (sock)
460 return sock;
461 fput_light(file, *fput_needed);
1da177e4 462 }
6cb153ca 463 return NULL;
1da177e4
LT
464}
465
466/**
467 * sock_alloc - allocate a socket
89bddce5 468 *
1da177e4
LT
469 * Allocate a new inode and socket object. The two are bound together
470 * and initialised. The socket is then returned. If we are out of inodes
471 * NULL is returned.
472 */
473
474static struct socket *sock_alloc(void)
475{
89bddce5
SH
476 struct inode *inode;
477 struct socket *sock;
1da177e4
LT
478
479 inode = new_inode(sock_mnt->mnt_sb);
480 if (!inode)
481 return NULL;
482
483 sock = SOCKET_I(inode);
484
89bddce5 485 inode->i_mode = S_IFSOCK | S_IRWXUGO;
1da177e4
LT
486 inode->i_uid = current->fsuid;
487 inode->i_gid = current->fsgid;
488
489 get_cpu_var(sockets_in_use)++;
490 put_cpu_var(sockets_in_use);
491 return sock;
492}
493
494/*
495 * In theory you can't get an open on this inode, but /proc provides
496 * a back door. Remember to keep it shut otherwise you'll let the
497 * creepy crawlies in.
498 */
89bddce5 499
1da177e4
LT
500static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
501{
502 return -ENXIO;
503}
504
4b6f5d20 505const struct file_operations bad_sock_fops = {
1da177e4
LT
506 .owner = THIS_MODULE,
507 .open = sock_no_open,
508};
509
510/**
511 * sock_release - close a socket
512 * @sock: socket to close
513 *
514 * The socket is released from the protocol stack if it has a release
515 * callback, and the inode is then released if the socket is bound to
89bddce5 516 * an inode not a file.
1da177e4 517 */
89bddce5 518
1da177e4
LT
519void sock_release(struct socket *sock)
520{
521 if (sock->ops) {
522 struct module *owner = sock->ops->owner;
523
524 sock->ops->release(sock);
525 sock->ops = NULL;
526 module_put(owner);
527 }
528
529 if (sock->fasync_list)
530 printk(KERN_ERR "sock_release: fasync list not empty!\n");
531
532 get_cpu_var(sockets_in_use)--;
533 put_cpu_var(sockets_in_use);
534 if (!sock->file) {
535 iput(SOCK_INODE(sock));
536 return;
537 }
89bddce5 538 sock->file = NULL;
1da177e4
LT
539}
540
89bddce5 541static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
542 struct msghdr *msg, size_t size)
543{
544 struct sock_iocb *si = kiocb_to_siocb(iocb);
545 int err;
546
547 si->sock = sock;
548 si->scm = NULL;
549 si->msg = msg;
550 si->size = size;
551
552 err = security_socket_sendmsg(sock, msg, size);
553 if (err)
554 return err;
555
556 return sock->ops->sendmsg(iocb, sock, msg, size);
557}
558
559int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
560{
561 struct kiocb iocb;
562 struct sock_iocb siocb;
563 int ret;
564
565 init_sync_kiocb(&iocb, NULL);
566 iocb.private = &siocb;
567 ret = __sock_sendmsg(&iocb, sock, msg, size);
568 if (-EIOCBQUEUED == ret)
569 ret = wait_on_sync_kiocb(&iocb);
570 return ret;
571}
572
573int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
574 struct kvec *vec, size_t num, size_t size)
575{
576 mm_segment_t oldfs = get_fs();
577 int result;
578
579 set_fs(KERNEL_DS);
580 /*
581 * the following is safe, since for compiler definitions of kvec and
582 * iovec are identical, yielding the same in-core layout and alignment
583 */
89bddce5 584 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
585 msg->msg_iovlen = num;
586 result = sock_sendmsg(sock, msg, size);
587 set_fs(oldfs);
588 return result;
589}
590
92f37fd2
ED
591/*
592 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
593 */
594void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
595 struct sk_buff *skb)
596{
597 ktime_t kt = skb->tstamp;
598
599 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
600 struct timeval tv;
601 /* Race occurred between timestamp enabling and packet
602 receiving. Fill in the current time for now. */
603 if (kt.tv64 == 0)
604 kt = ktime_get_real();
605 skb->tstamp = kt;
606 tv = ktime_to_timeval(kt);
607 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
608 } else {
609 struct timespec ts;
610 /* Race occurred between timestamp enabling and packet
611 receiving. Fill in the current time for now. */
612 if (kt.tv64 == 0)
613 kt = ktime_get_real();
614 skb->tstamp = kt;
615 ts = ktime_to_timespec(kt);
616 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
617 }
618}
619
7c81fd8b
ACM
620EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
621
89bddce5 622static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
623 struct msghdr *msg, size_t size, int flags)
624{
625 int err;
626 struct sock_iocb *si = kiocb_to_siocb(iocb);
627
628 si->sock = sock;
629 si->scm = NULL;
630 si->msg = msg;
631 si->size = size;
632 si->flags = flags;
633
634 err = security_socket_recvmsg(sock, msg, size, flags);
635 if (err)
636 return err;
637
638 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
639}
640
89bddce5 641int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
642 size_t size, int flags)
643{
644 struct kiocb iocb;
645 struct sock_iocb siocb;
646 int ret;
647
89bddce5 648 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
649 iocb.private = &siocb;
650 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
651 if (-EIOCBQUEUED == ret)
652 ret = wait_on_sync_kiocb(&iocb);
653 return ret;
654}
655
89bddce5
SH
656int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
657 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
658{
659 mm_segment_t oldfs = get_fs();
660 int result;
661
662 set_fs(KERNEL_DS);
663 /*
664 * the following is safe, since for compiler definitions of kvec and
665 * iovec are identical, yielding the same in-core layout and alignment
666 */
89bddce5 667 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
668 result = sock_recvmsg(sock, msg, size, flags);
669 set_fs(oldfs);
670 return result;
671}
672
673static void sock_aio_dtor(struct kiocb *iocb)
674{
675 kfree(iocb->private);
676}
677
ce1d4d3e
CH
678static ssize_t sock_sendpage(struct file *file, struct page *page,
679 int offset, size_t size, loff_t *ppos, int more)
1da177e4 680{
1da177e4
LT
681 struct socket *sock;
682 int flags;
683
ce1d4d3e
CH
684 sock = file->private_data;
685
686 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
687 if (more)
688 flags |= MSG_MORE;
689
690 return sock->ops->sendpage(sock, page, offset, size, flags);
691}
1da177e4 692
ce1d4d3e 693static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 694 struct sock_iocb *siocb)
ce1d4d3e
CH
695{
696 if (!is_sync_kiocb(iocb)) {
697 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
698 if (!siocb)
699 return NULL;
1da177e4
LT
700 iocb->ki_dtor = sock_aio_dtor;
701 }
1da177e4 702
ce1d4d3e 703 siocb->kiocb = iocb;
ce1d4d3e
CH
704 iocb->private = siocb;
705 return siocb;
1da177e4
LT
706}
707
ce1d4d3e 708static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
709 struct file *file, const struct iovec *iov,
710 unsigned long nr_segs)
ce1d4d3e
CH
711{
712 struct socket *sock = file->private_data;
713 size_t size = 0;
714 int i;
1da177e4 715
89bddce5
SH
716 for (i = 0; i < nr_segs; i++)
717 size += iov[i].iov_len;
1da177e4 718
ce1d4d3e
CH
719 msg->msg_name = NULL;
720 msg->msg_namelen = 0;
721 msg->msg_control = NULL;
722 msg->msg_controllen = 0;
89bddce5 723 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
724 msg->msg_iovlen = nr_segs;
725 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
726
727 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
728}
729
027445c3
BP
730static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
731 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
732{
733 struct sock_iocb siocb, *x;
734
1da177e4
LT
735 if (pos != 0)
736 return -ESPIPE;
027445c3
BP
737
738 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
739 return 0;
740
027445c3
BP
741
742 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
743 if (!x)
744 return -ENOMEM;
027445c3 745 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
746}
747
ce1d4d3e 748static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
749 struct file *file, const struct iovec *iov,
750 unsigned long nr_segs)
1da177e4 751{
ce1d4d3e
CH
752 struct socket *sock = file->private_data;
753 size_t size = 0;
754 int i;
1da177e4 755
89bddce5
SH
756 for (i = 0; i < nr_segs; i++)
757 size += iov[i].iov_len;
1da177e4 758
ce1d4d3e
CH
759 msg->msg_name = NULL;
760 msg->msg_namelen = 0;
761 msg->msg_control = NULL;
762 msg->msg_controllen = 0;
89bddce5 763 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
764 msg->msg_iovlen = nr_segs;
765 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
766 if (sock->type == SOCK_SEQPACKET)
767 msg->msg_flags |= MSG_EOR;
1da177e4 768
ce1d4d3e 769 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
770}
771
027445c3
BP
772static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
773 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
774{
775 struct sock_iocb siocb, *x;
1da177e4 776
ce1d4d3e
CH
777 if (pos != 0)
778 return -ESPIPE;
027445c3
BP
779
780 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
ce1d4d3e 781 return 0;
1da177e4 782
027445c3 783 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
784 if (!x)
785 return -ENOMEM;
1da177e4 786
027445c3 787 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
788}
789
1da177e4
LT
790/*
791 * Atomic setting of ioctl hooks to avoid race
792 * with module unload.
793 */
794
4a3e2f71 795static DEFINE_MUTEX(br_ioctl_mutex);
89bddce5 796static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL;
1da177e4 797
89bddce5 798void brioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 799{
4a3e2f71 800 mutex_lock(&br_ioctl_mutex);
1da177e4 801 br_ioctl_hook = hook;
4a3e2f71 802 mutex_unlock(&br_ioctl_mutex);
1da177e4 803}
89bddce5 804
1da177e4
LT
805EXPORT_SYMBOL(brioctl_set);
806
4a3e2f71 807static DEFINE_MUTEX(vlan_ioctl_mutex);
89bddce5 808static int (*vlan_ioctl_hook) (void __user *arg);
1da177e4 809
89bddce5 810void vlan_ioctl_set(int (*hook) (void __user *))
1da177e4 811{
4a3e2f71 812 mutex_lock(&vlan_ioctl_mutex);
1da177e4 813 vlan_ioctl_hook = hook;
4a3e2f71 814 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 815}
89bddce5 816
1da177e4
LT
817EXPORT_SYMBOL(vlan_ioctl_set);
818
4a3e2f71 819static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 820static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 821
89bddce5 822void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 823{
4a3e2f71 824 mutex_lock(&dlci_ioctl_mutex);
1da177e4 825 dlci_ioctl_hook = hook;
4a3e2f71 826 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 827}
89bddce5 828
1da177e4
LT
829EXPORT_SYMBOL(dlci_ioctl_set);
830
831/*
832 * With an ioctl, arg may well be a user mode pointer, but we don't know
833 * what to do with it - that's up to the protocol still.
834 */
835
836static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
837{
838 struct socket *sock;
839 void __user *argp = (void __user *)arg;
840 int pid, err;
841
b69aee04 842 sock = file->private_data;
1da177e4
LT
843 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
844 err = dev_ioctl(cmd, argp);
845 } else
d86b5e0e 846#ifdef CONFIG_WIRELESS_EXT
1da177e4
LT
847 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
848 err = dev_ioctl(cmd, argp);
849 } else
89bddce5
SH
850#endif /* CONFIG_WIRELESS_EXT */
851 switch (cmd) {
1da177e4
LT
852 case FIOSETOWN:
853 case SIOCSPGRP:
854 err = -EFAULT;
855 if (get_user(pid, (int __user *)argp))
856 break;
857 err = f_setown(sock->file, pid, 1);
858 break;
859 case FIOGETOWN:
860 case SIOCGPGRP:
609d7fa9 861 err = put_user(f_getown(sock->file),
89bddce5 862 (int __user *)argp);
1da177e4
LT
863 break;
864 case SIOCGIFBR:
865 case SIOCSIFBR:
866 case SIOCBRADDBR:
867 case SIOCBRDELBR:
868 err = -ENOPKG;
869 if (!br_ioctl_hook)
870 request_module("bridge");
871
4a3e2f71 872 mutex_lock(&br_ioctl_mutex);
89bddce5 873 if (br_ioctl_hook)
1da177e4 874 err = br_ioctl_hook(cmd, argp);
4a3e2f71 875 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
876 break;
877 case SIOCGIFVLAN:
878 case SIOCSIFVLAN:
879 err = -ENOPKG;
880 if (!vlan_ioctl_hook)
881 request_module("8021q");
882
4a3e2f71 883 mutex_lock(&vlan_ioctl_mutex);
1da177e4
LT
884 if (vlan_ioctl_hook)
885 err = vlan_ioctl_hook(argp);
4a3e2f71 886 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 887 break;
1da177e4
LT
888 case SIOCADDDLCI:
889 case SIOCDELDLCI:
890 err = -ENOPKG;
891 if (!dlci_ioctl_hook)
892 request_module("dlci");
893
894 if (dlci_ioctl_hook) {
4a3e2f71 895 mutex_lock(&dlci_ioctl_mutex);
1da177e4 896 err = dlci_ioctl_hook(cmd, argp);
4a3e2f71 897 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
898 }
899 break;
900 default:
901 err = sock->ops->ioctl(sock, cmd, arg);
b5e5fa5e
CH
902
903 /*
904 * If this ioctl is unknown try to hand it down
905 * to the NIC driver.
906 */
907 if (err == -ENOIOCTLCMD)
908 err = dev_ioctl(cmd, argp);
1da177e4 909 break;
89bddce5 910 }
1da177e4
LT
911 return err;
912}
913
914int sock_create_lite(int family, int type, int protocol, struct socket **res)
915{
916 int err;
917 struct socket *sock = NULL;
89bddce5 918
1da177e4
LT
919 err = security_socket_create(family, type, protocol, 1);
920 if (err)
921 goto out;
922
923 sock = sock_alloc();
924 if (!sock) {
925 err = -ENOMEM;
926 goto out;
927 }
928
1da177e4 929 sock->type = type;
7420ed23
VY
930 err = security_socket_post_create(sock, family, type, protocol, 1);
931 if (err)
932 goto out_release;
933
1da177e4
LT
934out:
935 *res = sock;
936 return err;
7420ed23
VY
937out_release:
938 sock_release(sock);
939 sock = NULL;
940 goto out;
1da177e4
LT
941}
942
943/* No kernel lock held - perfect */
89bddce5 944static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
945{
946 struct socket *sock;
947
948 /*
89bddce5 949 * We can't return errors to poll, so it's either yes or no.
1da177e4 950 */
b69aee04 951 sock = file->private_data;
1da177e4
LT
952 return sock->ops->poll(file, sock, wait);
953}
954
89bddce5 955static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 956{
b69aee04 957 struct socket *sock = file->private_data;
1da177e4
LT
958
959 return sock->ops->mmap(file, sock, vma);
960}
961
20380731 962static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
963{
964 /*
89bddce5
SH
965 * It was possible the inode is NULL we were
966 * closing an unfinished socket.
1da177e4
LT
967 */
968
89bddce5 969 if (!inode) {
1da177e4
LT
970 printk(KERN_DEBUG "sock_close: NULL inode\n");
971 return 0;
972 }
973 sock_fasync(-1, filp, 0);
974 sock_release(SOCKET_I(inode));
975 return 0;
976}
977
978/*
979 * Update the socket async list
980 *
981 * Fasync_list locking strategy.
982 *
983 * 1. fasync_list is modified only under process context socket lock
984 * i.e. under semaphore.
985 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
986 * or under socket lock.
987 * 3. fasync_list can be used from softirq context, so that
988 * modification under socket lock have to be enhanced with
989 * write_lock_bh(&sk->sk_callback_lock).
990 * --ANK (990710)
991 */
992
993static int sock_fasync(int fd, struct file *filp, int on)
994{
89bddce5 995 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
996 struct socket *sock;
997 struct sock *sk;
998
89bddce5 999 if (on) {
8b3a7005 1000 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 1001 if (fna == NULL)
1da177e4
LT
1002 return -ENOMEM;
1003 }
1004
b69aee04 1005 sock = filp->private_data;
1da177e4 1006
89bddce5
SH
1007 sk = sock->sk;
1008 if (sk == NULL) {
1da177e4
LT
1009 kfree(fna);
1010 return -EINVAL;
1011 }
1012
1013 lock_sock(sk);
1014
89bddce5 1015 prev = &(sock->fasync_list);
1da177e4 1016
89bddce5
SH
1017 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1018 if (fa->fa_file == filp)
1da177e4
LT
1019 break;
1020
89bddce5
SH
1021 if (on) {
1022 if (fa != NULL) {
1da177e4 1023 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1024 fa->fa_fd = fd;
1da177e4
LT
1025 write_unlock_bh(&sk->sk_callback_lock);
1026
1027 kfree(fna);
1028 goto out;
1029 }
89bddce5
SH
1030 fna->fa_file = filp;
1031 fna->fa_fd = fd;
1032 fna->magic = FASYNC_MAGIC;
1033 fna->fa_next = sock->fasync_list;
1da177e4 1034 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1035 sock->fasync_list = fna;
1da177e4 1036 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1037 } else {
1038 if (fa != NULL) {
1da177e4 1039 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1040 *prev = fa->fa_next;
1da177e4
LT
1041 write_unlock_bh(&sk->sk_callback_lock);
1042 kfree(fa);
1043 }
1044 }
1045
1046out:
1047 release_sock(sock->sk);
1048 return 0;
1049}
1050
1051/* This function may be called only under socket lock or callback_lock */
1052
1053int sock_wake_async(struct socket *sock, int how, int band)
1054{
1055 if (!sock || !sock->fasync_list)
1056 return -1;
89bddce5 1057 switch (how) {
1da177e4 1058 case 1:
89bddce5 1059
1da177e4
LT
1060 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1061 break;
1062 goto call_kill;
1063 case 2:
1064 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1065 break;
1066 /* fall through */
1067 case 0:
89bddce5 1068call_kill:
1da177e4
LT
1069 __kill_fasync(sock->fasync_list, SIGIO, band);
1070 break;
1071 case 3:
1072 __kill_fasync(sock->fasync_list, SIGURG, band);
1073 }
1074 return 0;
1075}
1076
89bddce5
SH
1077static int __sock_create(int family, int type, int protocol,
1078 struct socket **res, int kern)
1da177e4
LT
1079{
1080 int err;
1081 struct socket *sock;
55737fda 1082 const struct net_proto_family *pf;
1da177e4
LT
1083
1084 /*
89bddce5 1085 * Check protocol is in range
1da177e4
LT
1086 */
1087 if (family < 0 || family >= NPROTO)
1088 return -EAFNOSUPPORT;
1089 if (type < 0 || type >= SOCK_MAX)
1090 return -EINVAL;
1091
1092 /* Compatibility.
1093
1094 This uglymoron is moved from INET layer to here to avoid
1095 deadlock in module load.
1096 */
1097 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1098 static int warned;
1da177e4
LT
1099 if (!warned) {
1100 warned = 1;
89bddce5
SH
1101 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1102 current->comm);
1da177e4
LT
1103 }
1104 family = PF_PACKET;
1105 }
1106
1107 err = security_socket_create(family, type, protocol, kern);
1108 if (err)
1109 return err;
89bddce5 1110
55737fda
SH
1111 /*
1112 * Allocate the socket and allow the family to set things up. if
1113 * the protocol is 0, the family is instructed to select an appropriate
1114 * default.
1115 */
1116 sock = sock_alloc();
1117 if (!sock) {
1118 if (net_ratelimit())
1119 printk(KERN_WARNING "socket: no more sockets\n");
1120 return -ENFILE; /* Not exactly a match, but its the
1121 closest posix thing */
1122 }
1123
1124 sock->type = type;
1125
1da177e4 1126#if defined(CONFIG_KMOD)
89bddce5
SH
1127 /* Attempt to load a protocol module if the find failed.
1128 *
1129 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1130 * requested real, full-featured networking support upon configuration.
1131 * Otherwise module support will break!
1132 */
55737fda 1133 if (net_families[family] == NULL)
89bddce5 1134 request_module("net-pf-%d", family);
1da177e4
LT
1135#endif
1136
55737fda
SH
1137 rcu_read_lock();
1138 pf = rcu_dereference(net_families[family]);
1139 err = -EAFNOSUPPORT;
1140 if (!pf)
1141 goto out_release;
1da177e4
LT
1142
1143 /*
1144 * We will call the ->create function, that possibly is in a loadable
1145 * module, so we have to bump that loadable module refcnt first.
1146 */
55737fda 1147 if (!try_module_get(pf->owner))
1da177e4
LT
1148 goto out_release;
1149
55737fda
SH
1150 /* Now protected by module ref count */
1151 rcu_read_unlock();
1152
1153 err = pf->create(sock, protocol);
1154 if (err < 0)
1da177e4 1155 goto out_module_put;
a79af59e 1156
1da177e4
LT
1157 /*
1158 * Now to bump the refcnt of the [loadable] module that owns this
1159 * socket at sock_release time we decrement its refcnt.
1160 */
55737fda
SH
1161 if (!try_module_get(sock->ops->owner))
1162 goto out_module_busy;
1163
1da177e4
LT
1164 /*
1165 * Now that we're done with the ->create function, the [loadable]
1166 * module can have its refcnt decremented
1167 */
55737fda 1168 module_put(pf->owner);
7420ed23
VY
1169 err = security_socket_post_create(sock, family, type, protocol, kern);
1170 if (err)
1171 goto out_release;
55737fda 1172 *res = sock;
1da177e4 1173
55737fda
SH
1174 return 0;
1175
1176out_module_busy:
1177 err = -EAFNOSUPPORT;
1da177e4 1178out_module_put:
55737fda
SH
1179 sock->ops = NULL;
1180 module_put(pf->owner);
1181out_sock_release:
1da177e4 1182 sock_release(sock);
55737fda
SH
1183 return err;
1184
1185out_release:
1186 rcu_read_unlock();
1187 goto out_sock_release;
1da177e4
LT
1188}
1189
1190int sock_create(int family, int type, int protocol, struct socket **res)
1191{
1192 return __sock_create(family, type, protocol, res, 0);
1193}
1194
1195int sock_create_kern(int family, int type, int protocol, struct socket **res)
1196{
1197 return __sock_create(family, type, protocol, res, 1);
1198}
1199
1200asmlinkage long sys_socket(int family, int type, int protocol)
1201{
1202 int retval;
1203 struct socket *sock;
1204
1205 retval = sock_create(family, type, protocol, &sock);
1206 if (retval < 0)
1207 goto out;
1208
1209 retval = sock_map_fd(sock);
1210 if (retval < 0)
1211 goto out_release;
1212
1213out:
1214 /* It may be already another descriptor 8) Not kernel problem. */
1215 return retval;
1216
1217out_release:
1218 sock_release(sock);
1219 return retval;
1220}
1221
1222/*
1223 * Create a pair of connected sockets.
1224 */
1225
89bddce5
SH
1226asmlinkage long sys_socketpair(int family, int type, int protocol,
1227 int __user *usockvec)
1da177e4
LT
1228{
1229 struct socket *sock1, *sock2;
1230 int fd1, fd2, err;
db349509 1231 struct file *newfile1, *newfile2;
1da177e4
LT
1232
1233 /*
1234 * Obtain the first socket and check if the underlying protocol
1235 * supports the socketpair call.
1236 */
1237
1238 err = sock_create(family, type, protocol, &sock1);
1239 if (err < 0)
1240 goto out;
1241
1242 err = sock_create(family, type, protocol, &sock2);
1243 if (err < 0)
1244 goto out_release_1;
1245
1246 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1247 if (err < 0)
1da177e4
LT
1248 goto out_release_both;
1249
db349509
AV
1250 fd1 = sock_alloc_fd(&newfile1);
1251 if (unlikely(fd1 < 0))
1252 goto out_release_both;
1da177e4 1253
db349509
AV
1254 fd2 = sock_alloc_fd(&newfile2);
1255 if (unlikely(fd2 < 0)) {
1256 put_filp(newfile1);
1257 put_unused_fd(fd1);
1da177e4 1258 goto out_release_both;
db349509 1259 }
1da177e4 1260
db349509
AV
1261 err = sock_attach_fd(sock1, newfile1);
1262 if (unlikely(err < 0)) {
1263 goto out_fd2;
1264 }
1265
1266 err = sock_attach_fd(sock2, newfile2);
1267 if (unlikely(err < 0)) {
1268 fput(newfile1);
1269 goto out_fd1;
1270 }
1271
1272 err = audit_fd_pair(fd1, fd2);
1273 if (err < 0) {
1274 fput(newfile1);
1275 fput(newfile2);
1276 goto out_fd;
1277 }
1da177e4 1278
db349509
AV
1279 fd_install(fd1, newfile1);
1280 fd_install(fd2, newfile2);
1da177e4
LT
1281 /* fd1 and fd2 may be already another descriptors.
1282 * Not kernel problem.
1283 */
1284
89bddce5 1285 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1286 if (!err)
1287 err = put_user(fd2, &usockvec[1]);
1288 if (!err)
1289 return 0;
1290
1291 sys_close(fd2);
1292 sys_close(fd1);
1293 return err;
1294
1da177e4 1295out_release_both:
89bddce5 1296 sock_release(sock2);
1da177e4 1297out_release_1:
89bddce5 1298 sock_release(sock1);
1da177e4
LT
1299out:
1300 return err;
db349509
AV
1301
1302out_fd2:
1303 put_filp(newfile1);
1304 sock_release(sock1);
1305out_fd1:
1306 put_filp(newfile2);
1307 sock_release(sock2);
1308out_fd:
1309 put_unused_fd(fd1);
1310 put_unused_fd(fd2);
1311 goto out;
1da177e4
LT
1312}
1313
1da177e4
LT
1314/*
1315 * Bind a name to a socket. Nothing much to do here since it's
1316 * the protocol's responsibility to handle the local address.
1317 *
1318 * We move the socket address to kernel space before we call
1319 * the protocol layer (having also checked the address is ok).
1320 */
1321
1322asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1323{
1324 struct socket *sock;
1325 char address[MAX_SOCK_ADDR];
6cb153ca 1326 int err, fput_needed;
1da177e4 1327
89bddce5 1328 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1329 if (sock) {
89bddce5
SH
1330 err = move_addr_to_kernel(umyaddr, addrlen, address);
1331 if (err >= 0) {
1332 err = security_socket_bind(sock,
1333 (struct sockaddr *)address,
1334 addrlen);
6cb153ca
BL
1335 if (!err)
1336 err = sock->ops->bind(sock,
89bddce5
SH
1337 (struct sockaddr *)
1338 address, addrlen);
1da177e4 1339 }
6cb153ca 1340 fput_light(sock->file, fput_needed);
89bddce5 1341 }
1da177e4
LT
1342 return err;
1343}
1344
1da177e4
LT
1345/*
1346 * Perform a listen. Basically, we allow the protocol to do anything
1347 * necessary for a listen, and if that works, we mark the socket as
1348 * ready for listening.
1349 */
1350
7a42c217 1351int sysctl_somaxconn __read_mostly = SOMAXCONN;
1da177e4
LT
1352
1353asmlinkage long sys_listen(int fd, int backlog)
1354{
1355 struct socket *sock;
6cb153ca 1356 int err, fput_needed;
89bddce5
SH
1357
1358 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1359 if (sock) {
1360 if ((unsigned)backlog > sysctl_somaxconn)
1da177e4
LT
1361 backlog = sysctl_somaxconn;
1362
1363 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1364 if (!err)
1365 err = sock->ops->listen(sock, backlog);
1da177e4 1366
6cb153ca 1367 fput_light(sock->file, fput_needed);
1da177e4
LT
1368 }
1369 return err;
1370}
1371
1da177e4
LT
1372/*
1373 * For accept, we attempt to create a new socket, set up the link
1374 * with the client, wake up the client, then return the new
1375 * connected fd. We collect the address of the connector in kernel
1376 * space and move it to user at the very end. This is unclean because
1377 * we open the socket then return an error.
1378 *
1379 * 1003.1g adds the ability to recvmsg() to query connection pending
1380 * status to recvmsg. We need to add that support in a way thats
1381 * clean when we restucture accept also.
1382 */
1383
89bddce5
SH
1384asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1385 int __user *upeer_addrlen)
1da177e4
LT
1386{
1387 struct socket *sock, *newsock;
39d8c1b6 1388 struct file *newfile;
6cb153ca 1389 int err, len, newfd, fput_needed;
1da177e4
LT
1390 char address[MAX_SOCK_ADDR];
1391
6cb153ca 1392 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1393 if (!sock)
1394 goto out;
1395
1396 err = -ENFILE;
89bddce5 1397 if (!(newsock = sock_alloc()))
1da177e4
LT
1398 goto out_put;
1399
1400 newsock->type = sock->type;
1401 newsock->ops = sock->ops;
1402
1da177e4
LT
1403 /*
1404 * We don't need try_module_get here, as the listening socket (sock)
1405 * has the protocol module (sock->ops->owner) held.
1406 */
1407 __module_get(newsock->ops->owner);
1408
39d8c1b6
DM
1409 newfd = sock_alloc_fd(&newfile);
1410 if (unlikely(newfd < 0)) {
1411 err = newfd;
9a1875e6
DM
1412 sock_release(newsock);
1413 goto out_put;
39d8c1b6
DM
1414 }
1415
1416 err = sock_attach_fd(newsock, newfile);
1417 if (err < 0)
79f4f642 1418 goto out_fd_simple;
39d8c1b6 1419
a79af59e
FF
1420 err = security_socket_accept(sock, newsock);
1421 if (err)
39d8c1b6 1422 goto out_fd;
a79af59e 1423
1da177e4
LT
1424 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1425 if (err < 0)
39d8c1b6 1426 goto out_fd;
1da177e4
LT
1427
1428 if (upeer_sockaddr) {
89bddce5
SH
1429 if (newsock->ops->getname(newsock, (struct sockaddr *)address,
1430 &len, 2) < 0) {
1da177e4 1431 err = -ECONNABORTED;
39d8c1b6 1432 goto out_fd;
1da177e4 1433 }
89bddce5
SH
1434 err = move_addr_to_user(address, len, upeer_sockaddr,
1435 upeer_addrlen);
1da177e4 1436 if (err < 0)
39d8c1b6 1437 goto out_fd;
1da177e4
LT
1438 }
1439
1440 /* File flags are not inherited via accept() unlike another OSes. */
1441
39d8c1b6
DM
1442 fd_install(newfd, newfile);
1443 err = newfd;
1da177e4
LT
1444
1445 security_socket_post_accept(sock, newsock);
1446
1447out_put:
6cb153ca 1448 fput_light(sock->file, fput_needed);
1da177e4
LT
1449out:
1450 return err;
79f4f642
AD
1451out_fd_simple:
1452 sock_release(newsock);
1453 put_filp(newfile);
1454 put_unused_fd(newfd);
1455 goto out_put;
39d8c1b6 1456out_fd:
9606a216 1457 fput(newfile);
39d8c1b6 1458 put_unused_fd(newfd);
1da177e4
LT
1459 goto out_put;
1460}
1461
1da177e4
LT
1462/*
1463 * Attempt to connect to a socket with the server address. The address
1464 * is in user space so we verify it is OK and move it to kernel space.
1465 *
1466 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1467 * break bindings
1468 *
1469 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1470 * other SEQPACKET protocols that take time to connect() as it doesn't
1471 * include the -EINPROGRESS status for such sockets.
1472 */
1473
89bddce5
SH
1474asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
1475 int addrlen)
1da177e4
LT
1476{
1477 struct socket *sock;
1478 char address[MAX_SOCK_ADDR];
6cb153ca 1479 int err, fput_needed;
1da177e4 1480
6cb153ca 1481 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1482 if (!sock)
1483 goto out;
1484 err = move_addr_to_kernel(uservaddr, addrlen, address);
1485 if (err < 0)
1486 goto out_put;
1487
89bddce5
SH
1488 err =
1489 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4
LT
1490 if (err)
1491 goto out_put;
1492
89bddce5 1493 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
1da177e4
LT
1494 sock->file->f_flags);
1495out_put:
6cb153ca 1496 fput_light(sock->file, fput_needed);
1da177e4
LT
1497out:
1498 return err;
1499}
1500
1501/*
1502 * Get the local address ('name') of a socket object. Move the obtained
1503 * name to user space.
1504 */
1505
89bddce5
SH
1506asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1507 int __user *usockaddr_len)
1da177e4
LT
1508{
1509 struct socket *sock;
1510 char address[MAX_SOCK_ADDR];
6cb153ca 1511 int len, err, fput_needed;
89bddce5 1512
6cb153ca 1513 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1514 if (!sock)
1515 goto out;
1516
1517 err = security_socket_getsockname(sock);
1518 if (err)
1519 goto out_put;
1520
1521 err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
1522 if (err)
1523 goto out_put;
1524 err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
1525
1526out_put:
6cb153ca 1527 fput_light(sock->file, fput_needed);
1da177e4
LT
1528out:
1529 return err;
1530}
1531
1532/*
1533 * Get the remote address ('name') of a socket object. Move the obtained
1534 * name to user space.
1535 */
1536
89bddce5
SH
1537asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1538 int __user *usockaddr_len)
1da177e4
LT
1539{
1540 struct socket *sock;
1541 char address[MAX_SOCK_ADDR];
6cb153ca 1542 int len, err, fput_needed;
1da177e4 1543
89bddce5
SH
1544 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1545 if (sock != NULL) {
1da177e4
LT
1546 err = security_socket_getpeername(sock);
1547 if (err) {
6cb153ca 1548 fput_light(sock->file, fput_needed);
1da177e4
LT
1549 return err;
1550 }
1551
89bddce5
SH
1552 err =
1553 sock->ops->getname(sock, (struct sockaddr *)address, &len,
1554 1);
1da177e4 1555 if (!err)
89bddce5
SH
1556 err = move_addr_to_user(address, len, usockaddr,
1557 usockaddr_len);
6cb153ca 1558 fput_light(sock->file, fput_needed);
1da177e4
LT
1559 }
1560 return err;
1561}
1562
1563/*
1564 * Send a datagram to a given address. We move the address into kernel
1565 * space and check the user space data area is readable before invoking
1566 * the protocol.
1567 */
1568
89bddce5
SH
1569asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1570 unsigned flags, struct sockaddr __user *addr,
1571 int addr_len)
1da177e4
LT
1572{
1573 struct socket *sock;
1574 char address[MAX_SOCK_ADDR];
1575 int err;
1576 struct msghdr msg;
1577 struct iovec iov;
6cb153ca
BL
1578 int fput_needed;
1579 struct file *sock_file;
1580
1581 sock_file = fget_light(fd, &fput_needed);
4387ff75 1582 err = -EBADF;
6cb153ca 1583 if (!sock_file)
4387ff75 1584 goto out;
6cb153ca
BL
1585
1586 sock = sock_from_file(sock_file, &err);
1da177e4 1587 if (!sock)
6cb153ca 1588 goto out_put;
89bddce5
SH
1589 iov.iov_base = buff;
1590 iov.iov_len = len;
1591 msg.msg_name = NULL;
1592 msg.msg_iov = &iov;
1593 msg.msg_iovlen = 1;
1594 msg.msg_control = NULL;
1595 msg.msg_controllen = 0;
1596 msg.msg_namelen = 0;
6cb153ca 1597 if (addr) {
1da177e4
LT
1598 err = move_addr_to_kernel(addr, addr_len, address);
1599 if (err < 0)
1600 goto out_put;
89bddce5
SH
1601 msg.msg_name = address;
1602 msg.msg_namelen = addr_len;
1da177e4
LT
1603 }
1604 if (sock->file->f_flags & O_NONBLOCK)
1605 flags |= MSG_DONTWAIT;
1606 msg.msg_flags = flags;
1607 err = sock_sendmsg(sock, &msg, len);
1608
89bddce5 1609out_put:
6cb153ca 1610 fput_light(sock_file, fput_needed);
4387ff75 1611out:
1da177e4
LT
1612 return err;
1613}
1614
1615/*
89bddce5 1616 * Send a datagram down a socket.
1da177e4
LT
1617 */
1618
89bddce5 1619asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
1da177e4
LT
1620{
1621 return sys_sendto(fd, buff, len, flags, NULL, 0);
1622}
1623
1624/*
89bddce5 1625 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1626 * sender. We verify the buffers are writable and if needed move the
1627 * sender address from kernel to user space.
1628 */
1629
89bddce5
SH
1630asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1631 unsigned flags, struct sockaddr __user *addr,
1632 int __user *addr_len)
1da177e4
LT
1633{
1634 struct socket *sock;
1635 struct iovec iov;
1636 struct msghdr msg;
1637 char address[MAX_SOCK_ADDR];
89bddce5 1638 int err, err2;
6cb153ca
BL
1639 struct file *sock_file;
1640 int fput_needed;
1641
1642 sock_file = fget_light(fd, &fput_needed);
4387ff75 1643 err = -EBADF;
6cb153ca 1644 if (!sock_file)
4387ff75 1645 goto out;
1da177e4 1646
6cb153ca 1647 sock = sock_from_file(sock_file, &err);
1da177e4 1648 if (!sock)
4387ff75 1649 goto out_put;
1da177e4 1650
89bddce5
SH
1651 msg.msg_control = NULL;
1652 msg.msg_controllen = 0;
1653 msg.msg_iovlen = 1;
1654 msg.msg_iov = &iov;
1655 iov.iov_len = size;
1656 iov.iov_base = ubuf;
1657 msg.msg_name = address;
1658 msg.msg_namelen = MAX_SOCK_ADDR;
1da177e4
LT
1659 if (sock->file->f_flags & O_NONBLOCK)
1660 flags |= MSG_DONTWAIT;
89bddce5 1661 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1662
89bddce5
SH
1663 if (err >= 0 && addr != NULL) {
1664 err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1665 if (err2 < 0)
1666 err = err2;
1da177e4 1667 }
4387ff75 1668out_put:
6cb153ca 1669 fput_light(sock_file, fput_needed);
4387ff75 1670out:
1da177e4
LT
1671 return err;
1672}
1673
1674/*
89bddce5 1675 * Receive a datagram from a socket.
1da177e4
LT
1676 */
1677
89bddce5
SH
1678asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1679 unsigned flags)
1da177e4
LT
1680{
1681 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1682}
1683
1684/*
1685 * Set a socket option. Because we don't know the option lengths we have
1686 * to pass the user mode parameter for the protocols to sort out.
1687 */
1688
89bddce5
SH
1689asmlinkage long sys_setsockopt(int fd, int level, int optname,
1690 char __user *optval, int optlen)
1da177e4 1691{
6cb153ca 1692 int err, fput_needed;
1da177e4
LT
1693 struct socket *sock;
1694
1695 if (optlen < 0)
1696 return -EINVAL;
89bddce5
SH
1697
1698 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1699 if (sock != NULL) {
1700 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1701 if (err)
1702 goto out_put;
1da177e4
LT
1703
1704 if (level == SOL_SOCKET)
89bddce5
SH
1705 err =
1706 sock_setsockopt(sock, level, optname, optval,
1707 optlen);
1da177e4 1708 else
89bddce5
SH
1709 err =
1710 sock->ops->setsockopt(sock, level, optname, optval,
1711 optlen);
6cb153ca
BL
1712out_put:
1713 fput_light(sock->file, fput_needed);
1da177e4
LT
1714 }
1715 return err;
1716}
1717
1718/*
1719 * Get a socket option. Because we don't know the option lengths we have
1720 * to pass a user mode parameter for the protocols to sort out.
1721 */
1722
89bddce5
SH
1723asmlinkage long sys_getsockopt(int fd, int level, int optname,
1724 char __user *optval, int __user *optlen)
1da177e4 1725{
6cb153ca 1726 int err, fput_needed;
1da177e4
LT
1727 struct socket *sock;
1728
89bddce5
SH
1729 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1730 if (sock != NULL) {
6cb153ca
BL
1731 err = security_socket_getsockopt(sock, level, optname);
1732 if (err)
1733 goto out_put;
1da177e4
LT
1734
1735 if (level == SOL_SOCKET)
89bddce5
SH
1736 err =
1737 sock_getsockopt(sock, level, optname, optval,
1738 optlen);
1da177e4 1739 else
89bddce5
SH
1740 err =
1741 sock->ops->getsockopt(sock, level, optname, optval,
1742 optlen);
6cb153ca
BL
1743out_put:
1744 fput_light(sock->file, fput_needed);
1da177e4
LT
1745 }
1746 return err;
1747}
1748
1da177e4
LT
1749/*
1750 * Shutdown a socket.
1751 */
1752
1753asmlinkage long sys_shutdown(int fd, int how)
1754{
6cb153ca 1755 int err, fput_needed;
1da177e4
LT
1756 struct socket *sock;
1757
89bddce5
SH
1758 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1759 if (sock != NULL) {
1da177e4 1760 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1761 if (!err)
1762 err = sock->ops->shutdown(sock, how);
1763 fput_light(sock->file, fput_needed);
1da177e4
LT
1764 }
1765 return err;
1766}
1767
89bddce5 1768/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1769 * fields which are the same type (int / unsigned) on our platforms.
1770 */
1771#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1772#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1773#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1774
1da177e4
LT
1775/*
1776 * BSD sendmsg interface
1777 */
1778
1779asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1780{
89bddce5
SH
1781 struct compat_msghdr __user *msg_compat =
1782 (struct compat_msghdr __user *)msg;
1da177e4
LT
1783 struct socket *sock;
1784 char address[MAX_SOCK_ADDR];
1785 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1786 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1787 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1788 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1789 unsigned char *ctl_buf = ctl;
1790 struct msghdr msg_sys;
1791 int err, ctl_len, iov_size, total_len;
6cb153ca 1792 int fput_needed;
89bddce5 1793
1da177e4
LT
1794 err = -EFAULT;
1795 if (MSG_CMSG_COMPAT & flags) {
1796 if (get_compat_msghdr(&msg_sys, msg_compat))
1797 return -EFAULT;
89bddce5
SH
1798 }
1799 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1800 return -EFAULT;
1801
6cb153ca 1802 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1803 if (!sock)
1da177e4
LT
1804 goto out;
1805
1806 /* do not move before msg_sys is valid */
1807 err = -EMSGSIZE;
1808 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1809 goto out_put;
1810
89bddce5 1811 /* Check whether to allocate the iovec area */
1da177e4
LT
1812 err = -ENOMEM;
1813 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1814 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1815 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1816 if (!iov)
1817 goto out_put;
1818 }
1819
1820 /* This will also move the address data into kernel space */
1821 if (MSG_CMSG_COMPAT & flags) {
1822 err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
1823 } else
1824 err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
89bddce5 1825 if (err < 0)
1da177e4
LT
1826 goto out_freeiov;
1827 total_len = err;
1828
1829 err = -ENOBUFS;
1830
1831 if (msg_sys.msg_controllen > INT_MAX)
1832 goto out_freeiov;
89bddce5 1833 ctl_len = msg_sys.msg_controllen;
1da177e4 1834 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1835 err =
1836 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1837 sizeof(ctl));
1da177e4
LT
1838 if (err)
1839 goto out_freeiov;
1840 ctl_buf = msg_sys.msg_control;
8920e8f9 1841 ctl_len = msg_sys.msg_controllen;
1da177e4 1842 } else if (ctl_len) {
89bddce5 1843 if (ctl_len > sizeof(ctl)) {
1da177e4 1844 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1845 if (ctl_buf == NULL)
1da177e4
LT
1846 goto out_freeiov;
1847 }
1848 err = -EFAULT;
1849 /*
1850 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1851 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1852 * checking falls down on this.
1853 */
89bddce5
SH
1854 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1855 ctl_len))
1da177e4
LT
1856 goto out_freectl;
1857 msg_sys.msg_control = ctl_buf;
1858 }
1859 msg_sys.msg_flags = flags;
1860
1861 if (sock->file->f_flags & O_NONBLOCK)
1862 msg_sys.msg_flags |= MSG_DONTWAIT;
1863 err = sock_sendmsg(sock, &msg_sys, total_len);
1864
1865out_freectl:
89bddce5 1866 if (ctl_buf != ctl)
1da177e4
LT
1867 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1868out_freeiov:
1869 if (iov != iovstack)
1870 sock_kfree_s(sock->sk, iov, iov_size);
1871out_put:
6cb153ca 1872 fput_light(sock->file, fput_needed);
89bddce5 1873out:
1da177e4
LT
1874 return err;
1875}
1876
1877/*
1878 * BSD recvmsg interface
1879 */
1880
89bddce5
SH
1881asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
1882 unsigned int flags)
1da177e4 1883{
89bddce5
SH
1884 struct compat_msghdr __user *msg_compat =
1885 (struct compat_msghdr __user *)msg;
1da177e4
LT
1886 struct socket *sock;
1887 struct iovec iovstack[UIO_FASTIOV];
89bddce5 1888 struct iovec *iov = iovstack;
1da177e4
LT
1889 struct msghdr msg_sys;
1890 unsigned long cmsg_ptr;
1891 int err, iov_size, total_len, len;
6cb153ca 1892 int fput_needed;
1da177e4
LT
1893
1894 /* kernel mode address */
1895 char addr[MAX_SOCK_ADDR];
1896
1897 /* user mode address pointers */
1898 struct sockaddr __user *uaddr;
1899 int __user *uaddr_len;
89bddce5 1900
1da177e4
LT
1901 if (MSG_CMSG_COMPAT & flags) {
1902 if (get_compat_msghdr(&msg_sys, msg_compat))
1903 return -EFAULT;
89bddce5
SH
1904 }
1905 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1906 return -EFAULT;
1da177e4 1907
6cb153ca 1908 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1909 if (!sock)
1910 goto out;
1911
1912 err = -EMSGSIZE;
1913 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1914 goto out_put;
89bddce5
SH
1915
1916 /* Check whether to allocate the iovec area */
1da177e4
LT
1917 err = -ENOMEM;
1918 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1919 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1920 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1921 if (!iov)
1922 goto out_put;
1923 }
1924
1925 /*
89bddce5
SH
1926 * Save the user-mode address (verify_iovec will change the
1927 * kernel msghdr to use the kernel address space)
1da177e4 1928 */
89bddce5
SH
1929
1930 uaddr = (void __user *)msg_sys.msg_name;
1da177e4
LT
1931 uaddr_len = COMPAT_NAMELEN(msg);
1932 if (MSG_CMSG_COMPAT & flags) {
1933 err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1934 } else
1935 err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1936 if (err < 0)
1937 goto out_freeiov;
89bddce5 1938 total_len = err;
1da177e4
LT
1939
1940 cmsg_ptr = (unsigned long)msg_sys.msg_control;
4a19542e 1941 msg_sys.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 1942
1da177e4
LT
1943 if (sock->file->f_flags & O_NONBLOCK)
1944 flags |= MSG_DONTWAIT;
1945 err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1946 if (err < 0)
1947 goto out_freeiov;
1948 len = err;
1949
1950 if (uaddr != NULL) {
89bddce5
SH
1951 err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
1952 uaddr_len);
1da177e4
LT
1953 if (err < 0)
1954 goto out_freeiov;
1955 }
37f7f421
DM
1956 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
1957 COMPAT_FLAGS(msg));
1da177e4
LT
1958 if (err)
1959 goto out_freeiov;
1960 if (MSG_CMSG_COMPAT & flags)
89bddce5 1961 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1962 &msg_compat->msg_controllen);
1963 else
89bddce5 1964 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1965 &msg->msg_controllen);
1966 if (err)
1967 goto out_freeiov;
1968 err = len;
1969
1970out_freeiov:
1971 if (iov != iovstack)
1972 sock_kfree_s(sock->sk, iov, iov_size);
1973out_put:
6cb153ca 1974 fput_light(sock->file, fput_needed);
1da177e4
LT
1975out:
1976 return err;
1977}
1978
1979#ifdef __ARCH_WANT_SYS_SOCKETCALL
1980
1981/* Argument list sizes for sys_socketcall */
1982#define AL(x) ((x) * sizeof(unsigned long))
89bddce5
SH
1983static const unsigned char nargs[18]={
1984 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1985 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1986 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
1987};
1988
1da177e4
LT
1989#undef AL
1990
1991/*
89bddce5 1992 * System call vectors.
1da177e4
LT
1993 *
1994 * Argument checking cleaned up. Saved 20% in size.
1995 * This function doesn't need to set the kernel lock because
89bddce5 1996 * it is set by the callees.
1da177e4
LT
1997 */
1998
1999asmlinkage long sys_socketcall(int call, unsigned long __user *args)
2000{
2001 unsigned long a[6];
89bddce5 2002 unsigned long a0, a1;
1da177e4
LT
2003 int err;
2004
89bddce5 2005 if (call < 1 || call > SYS_RECVMSG)
1da177e4
LT
2006 return -EINVAL;
2007
2008 /* copy_from_user should be SMP safe. */
2009 if (copy_from_user(a, args, nargs[call]))
2010 return -EFAULT;
3ec3b2fb 2011
89bddce5 2012 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb
DW
2013 if (err)
2014 return err;
2015
89bddce5
SH
2016 a0 = a[0];
2017 a1 = a[1];
2018
2019 switch (call) {
2020 case SYS_SOCKET:
2021 err = sys_socket(a0, a1, a[2]);
2022 break;
2023 case SYS_BIND:
2024 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2025 break;
2026 case SYS_CONNECT:
2027 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2028 break;
2029 case SYS_LISTEN:
2030 err = sys_listen(a0, a1);
2031 break;
2032 case SYS_ACCEPT:
2033 err =
2034 sys_accept(a0, (struct sockaddr __user *)a1,
2035 (int __user *)a[2]);
2036 break;
2037 case SYS_GETSOCKNAME:
2038 err =
2039 sys_getsockname(a0, (struct sockaddr __user *)a1,
2040 (int __user *)a[2]);
2041 break;
2042 case SYS_GETPEERNAME:
2043 err =
2044 sys_getpeername(a0, (struct sockaddr __user *)a1,
2045 (int __user *)a[2]);
2046 break;
2047 case SYS_SOCKETPAIR:
2048 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2049 break;
2050 case SYS_SEND:
2051 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2052 break;
2053 case SYS_SENDTO:
2054 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2055 (struct sockaddr __user *)a[4], a[5]);
2056 break;
2057 case SYS_RECV:
2058 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2059 break;
2060 case SYS_RECVFROM:
2061 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2062 (struct sockaddr __user *)a[4],
2063 (int __user *)a[5]);
2064 break;
2065 case SYS_SHUTDOWN:
2066 err = sys_shutdown(a0, a1);
2067 break;
2068 case SYS_SETSOCKOPT:
2069 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2070 break;
2071 case SYS_GETSOCKOPT:
2072 err =
2073 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2074 (int __user *)a[4]);
2075 break;
2076 case SYS_SENDMSG:
2077 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2078 break;
2079 case SYS_RECVMSG:
2080 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2081 break;
2082 default:
2083 err = -EINVAL;
2084 break;
1da177e4
LT
2085 }
2086 return err;
2087}
2088
89bddce5 2089#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2090
55737fda
SH
2091/**
2092 * sock_register - add a socket protocol handler
2093 * @ops: description of protocol
2094 *
1da177e4
LT
2095 * This function is called by a protocol handler that wants to
2096 * advertise its address family, and have it linked into the
55737fda
SH
2097 * socket interface. The value ops->family coresponds to the
2098 * socket system call protocol family.
1da177e4 2099 */
f0fd27d4 2100int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2101{
2102 int err;
2103
2104 if (ops->family >= NPROTO) {
89bddce5
SH
2105 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2106 NPROTO);
1da177e4
LT
2107 return -ENOBUFS;
2108 }
55737fda
SH
2109
2110 spin_lock(&net_family_lock);
2111 if (net_families[ops->family])
2112 err = -EEXIST;
2113 else {
89bddce5 2114 net_families[ops->family] = ops;
1da177e4
LT
2115 err = 0;
2116 }
55737fda
SH
2117 spin_unlock(&net_family_lock);
2118
89bddce5 2119 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2120 return err;
2121}
2122
55737fda
SH
2123/**
2124 * sock_unregister - remove a protocol handler
2125 * @family: protocol family to remove
2126 *
1da177e4
LT
2127 * This function is called by a protocol handler that wants to
2128 * remove its address family, and have it unlinked from the
55737fda
SH
2129 * new socket creation.
2130 *
2131 * If protocol handler is a module, then it can use module reference
2132 * counts to protect against new references. If protocol handler is not
2133 * a module then it needs to provide its own protection in
2134 * the ops->create routine.
1da177e4 2135 */
f0fd27d4 2136void sock_unregister(int family)
1da177e4 2137{
f0fd27d4 2138 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2139
55737fda 2140 spin_lock(&net_family_lock);
89bddce5 2141 net_families[family] = NULL;
55737fda
SH
2142 spin_unlock(&net_family_lock);
2143
2144 synchronize_rcu();
2145
89bddce5 2146 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2147}
2148
77d76ea3 2149static int __init sock_init(void)
1da177e4
LT
2150{
2151 /*
89bddce5 2152 * Initialize sock SLAB cache.
1da177e4 2153 */
89bddce5 2154
1da177e4
LT
2155 sk_init();
2156
1da177e4 2157 /*
89bddce5 2158 * Initialize skbuff SLAB cache
1da177e4
LT
2159 */
2160 skb_init();
1da177e4
LT
2161
2162 /*
89bddce5 2163 * Initialize the protocols module.
1da177e4
LT
2164 */
2165
2166 init_inodecache();
2167 register_filesystem(&sock_fs_type);
2168 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2169
2170 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2171 */
2172
2173#ifdef CONFIG_NETFILTER
2174 netfilter_init();
2175#endif
cbeb321a
DM
2176
2177 return 0;
1da177e4
LT
2178}
2179
77d76ea3
AK
2180core_initcall(sock_init); /* early initcall */
2181
1da177e4
LT
2182#ifdef CONFIG_PROC_FS
2183void socket_seq_show(struct seq_file *seq)
2184{
2185 int cpu;
2186 int counter = 0;
2187
6f912042 2188 for_each_possible_cpu(cpu)
89bddce5 2189 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2190
2191 /* It can be negative, by the way. 8) */
2192 if (counter < 0)
2193 counter = 0;
2194
2195 seq_printf(seq, "sockets: used %d\n", counter);
2196}
89bddce5 2197#endif /* CONFIG_PROC_FS */
1da177e4 2198
89bbfc95
SP
2199#ifdef CONFIG_COMPAT
2200static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 2201 unsigned long arg)
89bbfc95
SP
2202{
2203 struct socket *sock = file->private_data;
2204 int ret = -ENOIOCTLCMD;
2205
2206 if (sock->ops->compat_ioctl)
2207 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2208
2209 return ret;
2210}
2211#endif
2212
ac5a488e
SS
2213int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2214{
2215 return sock->ops->bind(sock, addr, addrlen);
2216}
2217
2218int kernel_listen(struct socket *sock, int backlog)
2219{
2220 return sock->ops->listen(sock, backlog);
2221}
2222
2223int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2224{
2225 struct sock *sk = sock->sk;
2226 int err;
2227
2228 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2229 newsock);
2230 if (err < 0)
2231 goto done;
2232
2233 err = sock->ops->accept(sock, *newsock, flags);
2234 if (err < 0) {
2235 sock_release(*newsock);
2236 goto done;
2237 }
2238
2239 (*newsock)->ops = sock->ops;
2240
2241done:
2242 return err;
2243}
2244
2245int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 2246 int flags)
ac5a488e
SS
2247{
2248 return sock->ops->connect(sock, addr, addrlen, flags);
2249}
2250
2251int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2252 int *addrlen)
2253{
2254 return sock->ops->getname(sock, addr, addrlen, 0);
2255}
2256
2257int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2258 int *addrlen)
2259{
2260 return sock->ops->getname(sock, addr, addrlen, 1);
2261}
2262
2263int kernel_getsockopt(struct socket *sock, int level, int optname,
2264 char *optval, int *optlen)
2265{
2266 mm_segment_t oldfs = get_fs();
2267 int err;
2268
2269 set_fs(KERNEL_DS);
2270 if (level == SOL_SOCKET)
2271 err = sock_getsockopt(sock, level, optname, optval, optlen);
2272 else
2273 err = sock->ops->getsockopt(sock, level, optname, optval,
2274 optlen);
2275 set_fs(oldfs);
2276 return err;
2277}
2278
2279int kernel_setsockopt(struct socket *sock, int level, int optname,
2280 char *optval, int optlen)
2281{
2282 mm_segment_t oldfs = get_fs();
2283 int err;
2284
2285 set_fs(KERNEL_DS);
2286 if (level == SOL_SOCKET)
2287 err = sock_setsockopt(sock, level, optname, optval, optlen);
2288 else
2289 err = sock->ops->setsockopt(sock, level, optname, optval,
2290 optlen);
2291 set_fs(oldfs);
2292 return err;
2293}
2294
2295int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2296 size_t size, int flags)
2297{
2298 if (sock->ops->sendpage)
2299 return sock->ops->sendpage(sock, page, offset, size, flags);
2300
2301 return sock_no_sendpage(sock, page, offset, size, flags);
2302}
2303
2304int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2305{
2306 mm_segment_t oldfs = get_fs();
2307 int err;
2308
2309 set_fs(KERNEL_DS);
2310 err = sock->ops->ioctl(sock, cmd, arg);
2311 set_fs(oldfs);
2312
2313 return err;
2314}
2315
1da177e4
LT
2316/* ABI emulation layers need these two */
2317EXPORT_SYMBOL(move_addr_to_kernel);
2318EXPORT_SYMBOL(move_addr_to_user);
2319EXPORT_SYMBOL(sock_create);
2320EXPORT_SYMBOL(sock_create_kern);
2321EXPORT_SYMBOL(sock_create_lite);
2322EXPORT_SYMBOL(sock_map_fd);
2323EXPORT_SYMBOL(sock_recvmsg);
2324EXPORT_SYMBOL(sock_register);
2325EXPORT_SYMBOL(sock_release);
2326EXPORT_SYMBOL(sock_sendmsg);
2327EXPORT_SYMBOL(sock_unregister);
2328EXPORT_SYMBOL(sock_wake_async);
2329EXPORT_SYMBOL(sockfd_lookup);
2330EXPORT_SYMBOL(kernel_sendmsg);
2331EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
2332EXPORT_SYMBOL(kernel_bind);
2333EXPORT_SYMBOL(kernel_listen);
2334EXPORT_SYMBOL(kernel_accept);
2335EXPORT_SYMBOL(kernel_connect);
2336EXPORT_SYMBOL(kernel_getsockname);
2337EXPORT_SYMBOL(kernel_getpeername);
2338EXPORT_SYMBOL(kernel_getsockopt);
2339EXPORT_SYMBOL(kernel_setsockopt);
2340EXPORT_SYMBOL(kernel_sendpage);
2341EXPORT_SYMBOL(kernel_sock_ioctl);