netconsole: avoid null pointer dereference at show_local_mac()
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
55737fda 66#include <linux/rcupdate.h>
1da177e4
LT
67#include <linux/netdevice.h>
68#include <linux/proc_fs.h>
69#include <linux/seq_file.h>
4a3e2f71 70#include <linux/mutex.h>
1da177e4
LT
71#include <linux/wanrouter.h>
72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
1da177e4
LT
75#include <linux/init.h>
76#include <linux/poll.h>
77#include <linux/cache.h>
78#include <linux/module.h>
79#include <linux/highmem.h>
1da177e4
LT
80#include <linux/mount.h>
81#include <linux/security.h>
82#include <linux/syscalls.h>
83#include <linux/compat.h>
84#include <linux/kmod.h>
3ec3b2fb 85#include <linux/audit.h>
d86b5e0e 86#include <linux/wireless.h>
1b8d7ae4 87#include <linux/nsproxy.h>
1da177e4
LT
88
89#include <asm/uaccess.h>
90#include <asm/unistd.h>
91
92#include <net/compat.h>
93
94#include <net/sock.h>
95#include <linux/netfilter.h>
96
97static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
98static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
99 unsigned long nr_segs, loff_t pos);
100static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
101 unsigned long nr_segs, loff_t pos);
89bddce5 102static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
103
104static int sock_close(struct inode *inode, struct file *file);
105static unsigned int sock_poll(struct file *file,
106 struct poll_table_struct *wait);
89bddce5 107static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
108#ifdef CONFIG_COMPAT
109static long compat_sock_ioctl(struct file *file,
89bddce5 110 unsigned int cmd, unsigned long arg);
89bbfc95 111#endif
1da177e4 112static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
113static ssize_t sock_sendpage(struct file *file, struct page *page,
114 int offset, size_t size, loff_t *ppos, int more);
9c55e01c
JA
115static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
116 struct pipe_inode_info *pipe, size_t len,
117 unsigned int flags);
1da177e4 118
1da177e4
LT
119/*
120 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
121 * in the operation structures but are done directly via the socketcall() multiplexor.
122 */
123
da7071d7 124static const struct file_operations socket_file_ops = {
1da177e4
LT
125 .owner = THIS_MODULE,
126 .llseek = no_llseek,
127 .aio_read = sock_aio_read,
128 .aio_write = sock_aio_write,
129 .poll = sock_poll,
130 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
131#ifdef CONFIG_COMPAT
132 .compat_ioctl = compat_sock_ioctl,
133#endif
1da177e4
LT
134 .mmap = sock_mmap,
135 .open = sock_no_open, /* special open code to disallow open via /proc */
136 .release = sock_close,
137 .fasync = sock_fasync,
5274f052
JA
138 .sendpage = sock_sendpage,
139 .splice_write = generic_splice_sendpage,
9c55e01c 140 .splice_read = sock_splice_read,
1da177e4
LT
141};
142
143/*
144 * The protocol list. Each protocol is registered in here.
145 */
146
1da177e4 147static DEFINE_SPINLOCK(net_family_lock);
f0fd27d4 148static const struct net_proto_family *net_families[NPROTO] __read_mostly;
1da177e4 149
1da177e4
LT
150/*
151 * Statistics counters of the socket lists
152 */
153
154static DEFINE_PER_CPU(int, sockets_in_use) = 0;
155
156/*
89bddce5
SH
157 * Support routines.
158 * Move socket addresses back and forth across the kernel/user
159 * divide and look after the messy bits.
1da177e4
LT
160 */
161
89bddce5 162#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
163 16 for IP, 16 for IPX,
164 24 for IPv6,
89bddce5 165 about 80 for AX.25
1da177e4
LT
166 must be at least one bigger than
167 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 168 :unix_mkname()).
1da177e4 169 */
89bddce5 170
1da177e4
LT
171/**
172 * move_addr_to_kernel - copy a socket address into kernel space
173 * @uaddr: Address in user space
174 * @kaddr: Address in kernel space
175 * @ulen: Length in user space
176 *
177 * The address is copied into kernel space. If the provided address is
178 * too long an error code of -EINVAL is returned. If the copy gives
179 * invalid addresses -EFAULT is returned. On a success 0 is returned.
180 */
181
182int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
183{
89bddce5 184 if (ulen < 0 || ulen > MAX_SOCK_ADDR)
1da177e4 185 return -EINVAL;
89bddce5 186 if (ulen == 0)
1da177e4 187 return 0;
89bddce5 188 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 189 return -EFAULT;
3ec3b2fb 190 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
191}
192
193/**
194 * move_addr_to_user - copy an address to user space
195 * @kaddr: kernel space address
196 * @klen: length of address in kernel
197 * @uaddr: user space address
198 * @ulen: pointer to user length field
199 *
200 * The value pointed to by ulen on entry is the buffer length available.
201 * This is overwritten with the buffer space used. -EINVAL is returned
202 * if an overlong buffer is specified or a negative buffer size. -EFAULT
203 * is returned if either the buffer or the length field are not
204 * accessible.
205 * After copying the data up to the limit the user specifies, the true
206 * length of the data is written over the length limit the user
207 * specified. Zero is returned for a success.
208 */
89bddce5
SH
209
210int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
211 int __user *ulen)
1da177e4
LT
212{
213 int err;
214 int len;
215
89bddce5
SH
216 err = get_user(len, ulen);
217 if (err)
1da177e4 218 return err;
89bddce5
SH
219 if (len > klen)
220 len = klen;
221 if (len < 0 || len > MAX_SOCK_ADDR)
1da177e4 222 return -EINVAL;
89bddce5 223 if (len) {
d6fe3945
SG
224 if (audit_sockaddr(klen, kaddr))
225 return -ENOMEM;
89bddce5 226 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
227 return -EFAULT;
228 }
229 /*
89bddce5
SH
230 * "fromlen shall refer to the value before truncation.."
231 * 1003.1g
1da177e4
LT
232 */
233 return __put_user(klen, ulen);
234}
235
236#define SOCKFS_MAGIC 0x534F434B
237
e18b890b 238static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
239
240static struct inode *sock_alloc_inode(struct super_block *sb)
241{
242 struct socket_alloc *ei;
89bddce5 243
e94b1766 244 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
245 if (!ei)
246 return NULL;
247 init_waitqueue_head(&ei->socket.wait);
89bddce5 248
1da177e4
LT
249 ei->socket.fasync_list = NULL;
250 ei->socket.state = SS_UNCONNECTED;
251 ei->socket.flags = 0;
252 ei->socket.ops = NULL;
253 ei->socket.sk = NULL;
254 ei->socket.file = NULL;
1da177e4
LT
255
256 return &ei->vfs_inode;
257}
258
259static void sock_destroy_inode(struct inode *inode)
260{
261 kmem_cache_free(sock_inode_cachep,
262 container_of(inode, struct socket_alloc, vfs_inode));
263}
264
4ba9b9d0 265static void init_once(struct kmem_cache *cachep, void *foo)
1da177e4 266{
89bddce5 267 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 268
a35afb83 269 inode_init_once(&ei->vfs_inode);
1da177e4 270}
89bddce5 271
1da177e4
LT
272static int init_inodecache(void)
273{
274 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
275 sizeof(struct socket_alloc),
276 0,
277 (SLAB_HWCACHE_ALIGN |
278 SLAB_RECLAIM_ACCOUNT |
279 SLAB_MEM_SPREAD),
20c2df83 280 init_once);
1da177e4
LT
281 if (sock_inode_cachep == NULL)
282 return -ENOMEM;
283 return 0;
284}
285
286static struct super_operations sockfs_ops = {
287 .alloc_inode = sock_alloc_inode,
288 .destroy_inode =sock_destroy_inode,
289 .statfs = simple_statfs,
290};
291
454e2398 292static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
293 int flags, const char *dev_name, void *data,
294 struct vfsmount *mnt)
1da177e4 295{
454e2398
DH
296 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
297 mnt);
1da177e4
LT
298}
299
ba89966c 300static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
301
302static struct file_system_type sock_fs_type = {
303 .name = "sockfs",
304 .get_sb = sockfs_get_sb,
305 .kill_sb = kill_anon_super,
306};
89bddce5 307
1da177e4
LT
308static int sockfs_delete_dentry(struct dentry *dentry)
309{
304e61e6
ED
310 /*
311 * At creation time, we pretended this dentry was hashed
312 * (by clearing DCACHE_UNHASHED bit in d_flags)
313 * At delete time, we restore the truth : not hashed.
314 * (so that dput() can proceed correctly)
315 */
316 dentry->d_flags |= DCACHE_UNHASHED;
317 return 0;
1da177e4 318}
c23fbb6b
ED
319
320/*
321 * sockfs_dname() is called from d_path().
322 */
323static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
324{
325 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
326 dentry->d_inode->i_ino);
327}
328
1da177e4 329static struct dentry_operations sockfs_dentry_operations = {
89bddce5 330 .d_delete = sockfs_delete_dentry,
c23fbb6b 331 .d_dname = sockfs_dname,
1da177e4
LT
332};
333
334/*
335 * Obtains the first available file descriptor and sets it up for use.
336 *
39d8c1b6
DM
337 * These functions create file structures and maps them to fd space
338 * of the current process. On success it returns file descriptor
1da177e4
LT
339 * and file struct implicitly stored in sock->file.
340 * Note that another thread may close file descriptor before we return
341 * from this function. We use the fact that now we do not refer
342 * to socket after mapping. If one day we will need it, this
343 * function will increment ref. count on file by 1.
344 *
345 * In any case returned fd MAY BE not valid!
346 * This race condition is unavoidable
347 * with shared fd spaces, we cannot solve it inside kernel,
348 * but we take care of internal coherence yet.
349 */
350
39d8c1b6 351static int sock_alloc_fd(struct file **filep)
1da177e4
LT
352{
353 int fd;
1da177e4
LT
354
355 fd = get_unused_fd();
39d8c1b6 356 if (likely(fd >= 0)) {
1da177e4
LT
357 struct file *file = get_empty_filp();
358
39d8c1b6
DM
359 *filep = file;
360 if (unlikely(!file)) {
1da177e4 361 put_unused_fd(fd);
39d8c1b6 362 return -ENFILE;
1da177e4 363 }
39d8c1b6
DM
364 } else
365 *filep = NULL;
366 return fd;
367}
1da177e4 368
39d8c1b6
DM
369static int sock_attach_fd(struct socket *sock, struct file *file)
370{
ce8d2cdf 371 struct dentry *dentry;
c23fbb6b 372 struct qstr name = { .name = "" };
39d8c1b6 373
ce8d2cdf
DH
374 dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
375 if (unlikely(!dentry))
39d8c1b6
DM
376 return -ENOMEM;
377
ce8d2cdf 378 dentry->d_op = &sockfs_dentry_operations;
304e61e6
ED
379 /*
380 * We dont want to push this dentry into global dentry hash table.
381 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
382 * This permits a working /proc/$pid/fd/XXX on sockets
383 */
ce8d2cdf
DH
384 dentry->d_flags &= ~DCACHE_UNHASHED;
385 d_instantiate(dentry, SOCK_INODE(sock));
39d8c1b6
DM
386
387 sock->file = file;
ce8d2cdf
DH
388 init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE,
389 &socket_file_ops);
390 SOCK_INODE(sock)->i_fop = &socket_file_ops;
39d8c1b6
DM
391 file->f_flags = O_RDWR;
392 file->f_pos = 0;
393 file->private_data = sock;
1da177e4 394
39d8c1b6
DM
395 return 0;
396}
397
398int sock_map_fd(struct socket *sock)
399{
400 struct file *newfile;
401 int fd = sock_alloc_fd(&newfile);
402
403 if (likely(fd >= 0)) {
404 int err = sock_attach_fd(sock, newfile);
405
406 if (unlikely(err < 0)) {
407 put_filp(newfile);
1da177e4 408 put_unused_fd(fd);
39d8c1b6 409 return err;
1da177e4 410 }
39d8c1b6 411 fd_install(fd, newfile);
1da177e4 412 }
1da177e4
LT
413 return fd;
414}
415
6cb153ca
BL
416static struct socket *sock_from_file(struct file *file, int *err)
417{
6cb153ca
BL
418 if (file->f_op == &socket_file_ops)
419 return file->private_data; /* set in sock_map_fd */
420
23bb80d2
ED
421 *err = -ENOTSOCK;
422 return NULL;
6cb153ca
BL
423}
424
1da177e4
LT
425/**
426 * sockfd_lookup - Go from a file number to its socket slot
427 * @fd: file handle
428 * @err: pointer to an error code return
429 *
430 * The file handle passed in is locked and the socket it is bound
431 * too is returned. If an error occurs the err pointer is overwritten
432 * with a negative errno code and NULL is returned. The function checks
433 * for both invalid handles and passing a handle which is not a socket.
434 *
435 * On a success the socket object pointer is returned.
436 */
437
438struct socket *sockfd_lookup(int fd, int *err)
439{
440 struct file *file;
1da177e4
LT
441 struct socket *sock;
442
89bddce5
SH
443 file = fget(fd);
444 if (!file) {
1da177e4
LT
445 *err = -EBADF;
446 return NULL;
447 }
89bddce5 448
6cb153ca
BL
449 sock = sock_from_file(file, err);
450 if (!sock)
1da177e4 451 fput(file);
6cb153ca
BL
452 return sock;
453}
1da177e4 454
6cb153ca
BL
455static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
456{
457 struct file *file;
458 struct socket *sock;
459
3672558c 460 *err = -EBADF;
6cb153ca
BL
461 file = fget_light(fd, fput_needed);
462 if (file) {
463 sock = sock_from_file(file, err);
464 if (sock)
465 return sock;
466 fput_light(file, *fput_needed);
1da177e4 467 }
6cb153ca 468 return NULL;
1da177e4
LT
469}
470
471/**
472 * sock_alloc - allocate a socket
89bddce5 473 *
1da177e4
LT
474 * Allocate a new inode and socket object. The two are bound together
475 * and initialised. The socket is then returned. If we are out of inodes
476 * NULL is returned.
477 */
478
479static struct socket *sock_alloc(void)
480{
89bddce5
SH
481 struct inode *inode;
482 struct socket *sock;
1da177e4
LT
483
484 inode = new_inode(sock_mnt->mnt_sb);
485 if (!inode)
486 return NULL;
487
488 sock = SOCKET_I(inode);
489
89bddce5 490 inode->i_mode = S_IFSOCK | S_IRWXUGO;
1da177e4
LT
491 inode->i_uid = current->fsuid;
492 inode->i_gid = current->fsgid;
493
494 get_cpu_var(sockets_in_use)++;
495 put_cpu_var(sockets_in_use);
496 return sock;
497}
498
499/*
500 * In theory you can't get an open on this inode, but /proc provides
501 * a back door. Remember to keep it shut otherwise you'll let the
502 * creepy crawlies in.
503 */
89bddce5 504
1da177e4
LT
505static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
506{
507 return -ENXIO;
508}
509
4b6f5d20 510const struct file_operations bad_sock_fops = {
1da177e4
LT
511 .owner = THIS_MODULE,
512 .open = sock_no_open,
513};
514
515/**
516 * sock_release - close a socket
517 * @sock: socket to close
518 *
519 * The socket is released from the protocol stack if it has a release
520 * callback, and the inode is then released if the socket is bound to
89bddce5 521 * an inode not a file.
1da177e4 522 */
89bddce5 523
1da177e4
LT
524void sock_release(struct socket *sock)
525{
526 if (sock->ops) {
527 struct module *owner = sock->ops->owner;
528
529 sock->ops->release(sock);
530 sock->ops = NULL;
531 module_put(owner);
532 }
533
534 if (sock->fasync_list)
535 printk(KERN_ERR "sock_release: fasync list not empty!\n");
536
537 get_cpu_var(sockets_in_use)--;
538 put_cpu_var(sockets_in_use);
539 if (!sock->file) {
540 iput(SOCK_INODE(sock));
541 return;
542 }
89bddce5 543 sock->file = NULL;
1da177e4
LT
544}
545
89bddce5 546static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
547 struct msghdr *msg, size_t size)
548{
549 struct sock_iocb *si = kiocb_to_siocb(iocb);
550 int err;
551
552 si->sock = sock;
553 si->scm = NULL;
554 si->msg = msg;
555 si->size = size;
556
557 err = security_socket_sendmsg(sock, msg, size);
558 if (err)
559 return err;
560
561 return sock->ops->sendmsg(iocb, sock, msg, size);
562}
563
564int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
565{
566 struct kiocb iocb;
567 struct sock_iocb siocb;
568 int ret;
569
570 init_sync_kiocb(&iocb, NULL);
571 iocb.private = &siocb;
572 ret = __sock_sendmsg(&iocb, sock, msg, size);
573 if (-EIOCBQUEUED == ret)
574 ret = wait_on_sync_kiocb(&iocb);
575 return ret;
576}
577
578int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
579 struct kvec *vec, size_t num, size_t size)
580{
581 mm_segment_t oldfs = get_fs();
582 int result;
583
584 set_fs(KERNEL_DS);
585 /*
586 * the following is safe, since for compiler definitions of kvec and
587 * iovec are identical, yielding the same in-core layout and alignment
588 */
89bddce5 589 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
590 msg->msg_iovlen = num;
591 result = sock_sendmsg(sock, msg, size);
592 set_fs(oldfs);
593 return result;
594}
595
92f37fd2
ED
596/*
597 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
598 */
599void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
600 struct sk_buff *skb)
601{
602 ktime_t kt = skb->tstamp;
603
604 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
605 struct timeval tv;
606 /* Race occurred between timestamp enabling and packet
607 receiving. Fill in the current time for now. */
608 if (kt.tv64 == 0)
609 kt = ktime_get_real();
610 skb->tstamp = kt;
611 tv = ktime_to_timeval(kt);
612 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
613 } else {
614 struct timespec ts;
615 /* Race occurred between timestamp enabling and packet
616 receiving. Fill in the current time for now. */
617 if (kt.tv64 == 0)
618 kt = ktime_get_real();
619 skb->tstamp = kt;
620 ts = ktime_to_timespec(kt);
621 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
622 }
623}
624
7c81fd8b
ACM
625EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
626
89bddce5 627static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
628 struct msghdr *msg, size_t size, int flags)
629{
630 int err;
631 struct sock_iocb *si = kiocb_to_siocb(iocb);
632
633 si->sock = sock;
634 si->scm = NULL;
635 si->msg = msg;
636 si->size = size;
637 si->flags = flags;
638
639 err = security_socket_recvmsg(sock, msg, size, flags);
640 if (err)
641 return err;
642
643 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
644}
645
89bddce5 646int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
647 size_t size, int flags)
648{
649 struct kiocb iocb;
650 struct sock_iocb siocb;
651 int ret;
652
89bddce5 653 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
654 iocb.private = &siocb;
655 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
656 if (-EIOCBQUEUED == ret)
657 ret = wait_on_sync_kiocb(&iocb);
658 return ret;
659}
660
89bddce5
SH
661int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
662 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
663{
664 mm_segment_t oldfs = get_fs();
665 int result;
666
667 set_fs(KERNEL_DS);
668 /*
669 * the following is safe, since for compiler definitions of kvec and
670 * iovec are identical, yielding the same in-core layout and alignment
671 */
89bddce5 672 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
673 result = sock_recvmsg(sock, msg, size, flags);
674 set_fs(oldfs);
675 return result;
676}
677
678static void sock_aio_dtor(struct kiocb *iocb)
679{
680 kfree(iocb->private);
681}
682
ce1d4d3e
CH
683static ssize_t sock_sendpage(struct file *file, struct page *page,
684 int offset, size_t size, loff_t *ppos, int more)
1da177e4 685{
1da177e4
LT
686 struct socket *sock;
687 int flags;
688
ce1d4d3e
CH
689 sock = file->private_data;
690
691 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
692 if (more)
693 flags |= MSG_MORE;
694
695 return sock->ops->sendpage(sock, page, offset, size, flags);
696}
1da177e4 697
9c55e01c
JA
698static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
699 struct pipe_inode_info *pipe, size_t len,
700 unsigned int flags)
701{
702 struct socket *sock = file->private_data;
703
704 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
705}
706
ce1d4d3e 707static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 708 struct sock_iocb *siocb)
ce1d4d3e
CH
709{
710 if (!is_sync_kiocb(iocb)) {
711 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
712 if (!siocb)
713 return NULL;
1da177e4
LT
714 iocb->ki_dtor = sock_aio_dtor;
715 }
1da177e4 716
ce1d4d3e 717 siocb->kiocb = iocb;
ce1d4d3e
CH
718 iocb->private = siocb;
719 return siocb;
1da177e4
LT
720}
721
ce1d4d3e 722static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
723 struct file *file, const struct iovec *iov,
724 unsigned long nr_segs)
ce1d4d3e
CH
725{
726 struct socket *sock = file->private_data;
727 size_t size = 0;
728 int i;
1da177e4 729
89bddce5
SH
730 for (i = 0; i < nr_segs; i++)
731 size += iov[i].iov_len;
1da177e4 732
ce1d4d3e
CH
733 msg->msg_name = NULL;
734 msg->msg_namelen = 0;
735 msg->msg_control = NULL;
736 msg->msg_controllen = 0;
89bddce5 737 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
738 msg->msg_iovlen = nr_segs;
739 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
740
741 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
742}
743
027445c3
BP
744static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
745 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
746{
747 struct sock_iocb siocb, *x;
748
1da177e4
LT
749 if (pos != 0)
750 return -ESPIPE;
027445c3
BP
751
752 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
753 return 0;
754
027445c3
BP
755
756 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
757 if (!x)
758 return -ENOMEM;
027445c3 759 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
760}
761
ce1d4d3e 762static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
763 struct file *file, const struct iovec *iov,
764 unsigned long nr_segs)
1da177e4 765{
ce1d4d3e
CH
766 struct socket *sock = file->private_data;
767 size_t size = 0;
768 int i;
1da177e4 769
89bddce5
SH
770 for (i = 0; i < nr_segs; i++)
771 size += iov[i].iov_len;
1da177e4 772
ce1d4d3e
CH
773 msg->msg_name = NULL;
774 msg->msg_namelen = 0;
775 msg->msg_control = NULL;
776 msg->msg_controllen = 0;
89bddce5 777 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
778 msg->msg_iovlen = nr_segs;
779 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
780 if (sock->type == SOCK_SEQPACKET)
781 msg->msg_flags |= MSG_EOR;
1da177e4 782
ce1d4d3e 783 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
784}
785
027445c3
BP
786static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
787 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
788{
789 struct sock_iocb siocb, *x;
1da177e4 790
ce1d4d3e
CH
791 if (pos != 0)
792 return -ESPIPE;
027445c3 793
027445c3 794 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
795 if (!x)
796 return -ENOMEM;
1da177e4 797
027445c3 798 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
799}
800
1da177e4
LT
801/*
802 * Atomic setting of ioctl hooks to avoid race
803 * with module unload.
804 */
805
4a3e2f71 806static DEFINE_MUTEX(br_ioctl_mutex);
881d966b 807static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL;
1da177e4 808
881d966b 809void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 810{
4a3e2f71 811 mutex_lock(&br_ioctl_mutex);
1da177e4 812 br_ioctl_hook = hook;
4a3e2f71 813 mutex_unlock(&br_ioctl_mutex);
1da177e4 814}
89bddce5 815
1da177e4
LT
816EXPORT_SYMBOL(brioctl_set);
817
4a3e2f71 818static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 819static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 820
881d966b 821void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 822{
4a3e2f71 823 mutex_lock(&vlan_ioctl_mutex);
1da177e4 824 vlan_ioctl_hook = hook;
4a3e2f71 825 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 826}
89bddce5 827
1da177e4
LT
828EXPORT_SYMBOL(vlan_ioctl_set);
829
4a3e2f71 830static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 831static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 832
89bddce5 833void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 834{
4a3e2f71 835 mutex_lock(&dlci_ioctl_mutex);
1da177e4 836 dlci_ioctl_hook = hook;
4a3e2f71 837 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 838}
89bddce5 839
1da177e4
LT
840EXPORT_SYMBOL(dlci_ioctl_set);
841
842/*
843 * With an ioctl, arg may well be a user mode pointer, but we don't know
844 * what to do with it - that's up to the protocol still.
845 */
846
847static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
848{
849 struct socket *sock;
881d966b 850 struct sock *sk;
1da177e4
LT
851 void __user *argp = (void __user *)arg;
852 int pid, err;
881d966b 853 struct net *net;
1da177e4 854
b69aee04 855 sock = file->private_data;
881d966b
EB
856 sk = sock->sk;
857 net = sk->sk_net;
1da177e4 858 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 859 err = dev_ioctl(net, cmd, argp);
1da177e4 860 } else
d86b5e0e 861#ifdef CONFIG_WIRELESS_EXT
1da177e4 862 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 863 err = dev_ioctl(net, cmd, argp);
1da177e4 864 } else
89bddce5
SH
865#endif /* CONFIG_WIRELESS_EXT */
866 switch (cmd) {
1da177e4
LT
867 case FIOSETOWN:
868 case SIOCSPGRP:
869 err = -EFAULT;
870 if (get_user(pid, (int __user *)argp))
871 break;
872 err = f_setown(sock->file, pid, 1);
873 break;
874 case FIOGETOWN:
875 case SIOCGPGRP:
609d7fa9 876 err = put_user(f_getown(sock->file),
89bddce5 877 (int __user *)argp);
1da177e4
LT
878 break;
879 case SIOCGIFBR:
880 case SIOCSIFBR:
881 case SIOCBRADDBR:
882 case SIOCBRDELBR:
883 err = -ENOPKG;
884 if (!br_ioctl_hook)
885 request_module("bridge");
886
4a3e2f71 887 mutex_lock(&br_ioctl_mutex);
89bddce5 888 if (br_ioctl_hook)
881d966b 889 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 890 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
891 break;
892 case SIOCGIFVLAN:
893 case SIOCSIFVLAN:
894 err = -ENOPKG;
895 if (!vlan_ioctl_hook)
896 request_module("8021q");
897
4a3e2f71 898 mutex_lock(&vlan_ioctl_mutex);
1da177e4 899 if (vlan_ioctl_hook)
881d966b 900 err = vlan_ioctl_hook(net, argp);
4a3e2f71 901 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 902 break;
1da177e4
LT
903 case SIOCADDDLCI:
904 case SIOCDELDLCI:
905 err = -ENOPKG;
906 if (!dlci_ioctl_hook)
907 request_module("dlci");
908
909 if (dlci_ioctl_hook) {
4a3e2f71 910 mutex_lock(&dlci_ioctl_mutex);
1da177e4 911 err = dlci_ioctl_hook(cmd, argp);
4a3e2f71 912 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
913 }
914 break;
915 default:
916 err = sock->ops->ioctl(sock, cmd, arg);
b5e5fa5e
CH
917
918 /*
919 * If this ioctl is unknown try to hand it down
920 * to the NIC driver.
921 */
922 if (err == -ENOIOCTLCMD)
881d966b 923 err = dev_ioctl(net, cmd, argp);
1da177e4 924 break;
89bddce5 925 }
1da177e4
LT
926 return err;
927}
928
929int sock_create_lite(int family, int type, int protocol, struct socket **res)
930{
931 int err;
932 struct socket *sock = NULL;
89bddce5 933
1da177e4
LT
934 err = security_socket_create(family, type, protocol, 1);
935 if (err)
936 goto out;
937
938 sock = sock_alloc();
939 if (!sock) {
940 err = -ENOMEM;
941 goto out;
942 }
943
1da177e4 944 sock->type = type;
7420ed23
VY
945 err = security_socket_post_create(sock, family, type, protocol, 1);
946 if (err)
947 goto out_release;
948
1da177e4
LT
949out:
950 *res = sock;
951 return err;
7420ed23
VY
952out_release:
953 sock_release(sock);
954 sock = NULL;
955 goto out;
1da177e4
LT
956}
957
958/* No kernel lock held - perfect */
89bddce5 959static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
960{
961 struct socket *sock;
962
963 /*
89bddce5 964 * We can't return errors to poll, so it's either yes or no.
1da177e4 965 */
b69aee04 966 sock = file->private_data;
1da177e4
LT
967 return sock->ops->poll(file, sock, wait);
968}
969
89bddce5 970static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 971{
b69aee04 972 struct socket *sock = file->private_data;
1da177e4
LT
973
974 return sock->ops->mmap(file, sock, vma);
975}
976
20380731 977static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
978{
979 /*
89bddce5
SH
980 * It was possible the inode is NULL we were
981 * closing an unfinished socket.
1da177e4
LT
982 */
983
89bddce5 984 if (!inode) {
1da177e4
LT
985 printk(KERN_DEBUG "sock_close: NULL inode\n");
986 return 0;
987 }
988 sock_fasync(-1, filp, 0);
989 sock_release(SOCKET_I(inode));
990 return 0;
991}
992
993/*
994 * Update the socket async list
995 *
996 * Fasync_list locking strategy.
997 *
998 * 1. fasync_list is modified only under process context socket lock
999 * i.e. under semaphore.
1000 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
1001 * or under socket lock.
1002 * 3. fasync_list can be used from softirq context, so that
1003 * modification under socket lock have to be enhanced with
1004 * write_lock_bh(&sk->sk_callback_lock).
1005 * --ANK (990710)
1006 */
1007
1008static int sock_fasync(int fd, struct file *filp, int on)
1009{
89bddce5 1010 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
1011 struct socket *sock;
1012 struct sock *sk;
1013
89bddce5 1014 if (on) {
8b3a7005 1015 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 1016 if (fna == NULL)
1da177e4
LT
1017 return -ENOMEM;
1018 }
1019
b69aee04 1020 sock = filp->private_data;
1da177e4 1021
89bddce5
SH
1022 sk = sock->sk;
1023 if (sk == NULL) {
1da177e4
LT
1024 kfree(fna);
1025 return -EINVAL;
1026 }
1027
1028 lock_sock(sk);
1029
89bddce5 1030 prev = &(sock->fasync_list);
1da177e4 1031
89bddce5
SH
1032 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1033 if (fa->fa_file == filp)
1da177e4
LT
1034 break;
1035
89bddce5
SH
1036 if (on) {
1037 if (fa != NULL) {
1da177e4 1038 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1039 fa->fa_fd = fd;
1da177e4
LT
1040 write_unlock_bh(&sk->sk_callback_lock);
1041
1042 kfree(fna);
1043 goto out;
1044 }
89bddce5
SH
1045 fna->fa_file = filp;
1046 fna->fa_fd = fd;
1047 fna->magic = FASYNC_MAGIC;
1048 fna->fa_next = sock->fasync_list;
1da177e4 1049 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1050 sock->fasync_list = fna;
1da177e4 1051 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1052 } else {
1053 if (fa != NULL) {
1da177e4 1054 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1055 *prev = fa->fa_next;
1da177e4
LT
1056 write_unlock_bh(&sk->sk_callback_lock);
1057 kfree(fa);
1058 }
1059 }
1060
1061out:
1062 release_sock(sock->sk);
1063 return 0;
1064}
1065
1066/* This function may be called only under socket lock or callback_lock */
1067
1068int sock_wake_async(struct socket *sock, int how, int band)
1069{
1070 if (!sock || !sock->fasync_list)
1071 return -1;
89bddce5 1072 switch (how) {
8d8ad9d7 1073 case SOCK_WAKE_WAITD:
1da177e4
LT
1074 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1075 break;
1076 goto call_kill;
8d8ad9d7 1077 case SOCK_WAKE_SPACE:
1da177e4
LT
1078 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1079 break;
1080 /* fall through */
8d8ad9d7 1081 case SOCK_WAKE_IO:
89bddce5 1082call_kill:
1da177e4
LT
1083 __kill_fasync(sock->fasync_list, SIGIO, band);
1084 break;
8d8ad9d7 1085 case SOCK_WAKE_URG:
1da177e4
LT
1086 __kill_fasync(sock->fasync_list, SIGURG, band);
1087 }
1088 return 0;
1089}
1090
1b8d7ae4 1091static int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1092 struct socket **res, int kern)
1da177e4
LT
1093{
1094 int err;
1095 struct socket *sock;
55737fda 1096 const struct net_proto_family *pf;
1da177e4
LT
1097
1098 /*
89bddce5 1099 * Check protocol is in range
1da177e4
LT
1100 */
1101 if (family < 0 || family >= NPROTO)
1102 return -EAFNOSUPPORT;
1103 if (type < 0 || type >= SOCK_MAX)
1104 return -EINVAL;
1105
1106 /* Compatibility.
1107
1108 This uglymoron is moved from INET layer to here to avoid
1109 deadlock in module load.
1110 */
1111 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1112 static int warned;
1da177e4
LT
1113 if (!warned) {
1114 warned = 1;
89bddce5
SH
1115 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1116 current->comm);
1da177e4
LT
1117 }
1118 family = PF_PACKET;
1119 }
1120
1121 err = security_socket_create(family, type, protocol, kern);
1122 if (err)
1123 return err;
89bddce5 1124
55737fda
SH
1125 /*
1126 * Allocate the socket and allow the family to set things up. if
1127 * the protocol is 0, the family is instructed to select an appropriate
1128 * default.
1129 */
1130 sock = sock_alloc();
1131 if (!sock) {
1132 if (net_ratelimit())
1133 printk(KERN_WARNING "socket: no more sockets\n");
1134 return -ENFILE; /* Not exactly a match, but its the
1135 closest posix thing */
1136 }
1137
1138 sock->type = type;
1139
1da177e4 1140#if defined(CONFIG_KMOD)
89bddce5
SH
1141 /* Attempt to load a protocol module if the find failed.
1142 *
1143 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1144 * requested real, full-featured networking support upon configuration.
1145 * Otherwise module support will break!
1146 */
55737fda 1147 if (net_families[family] == NULL)
89bddce5 1148 request_module("net-pf-%d", family);
1da177e4
LT
1149#endif
1150
55737fda
SH
1151 rcu_read_lock();
1152 pf = rcu_dereference(net_families[family]);
1153 err = -EAFNOSUPPORT;
1154 if (!pf)
1155 goto out_release;
1da177e4
LT
1156
1157 /*
1158 * We will call the ->create function, that possibly is in a loadable
1159 * module, so we have to bump that loadable module refcnt first.
1160 */
55737fda 1161 if (!try_module_get(pf->owner))
1da177e4
LT
1162 goto out_release;
1163
55737fda
SH
1164 /* Now protected by module ref count */
1165 rcu_read_unlock();
1166
1b8d7ae4 1167 err = pf->create(net, sock, protocol);
55737fda 1168 if (err < 0)
1da177e4 1169 goto out_module_put;
a79af59e 1170
1da177e4
LT
1171 /*
1172 * Now to bump the refcnt of the [loadable] module that owns this
1173 * socket at sock_release time we decrement its refcnt.
1174 */
55737fda
SH
1175 if (!try_module_get(sock->ops->owner))
1176 goto out_module_busy;
1177
1da177e4
LT
1178 /*
1179 * Now that we're done with the ->create function, the [loadable]
1180 * module can have its refcnt decremented
1181 */
55737fda 1182 module_put(pf->owner);
7420ed23
VY
1183 err = security_socket_post_create(sock, family, type, protocol, kern);
1184 if (err)
3b185525 1185 goto out_sock_release;
55737fda 1186 *res = sock;
1da177e4 1187
55737fda
SH
1188 return 0;
1189
1190out_module_busy:
1191 err = -EAFNOSUPPORT;
1da177e4 1192out_module_put:
55737fda
SH
1193 sock->ops = NULL;
1194 module_put(pf->owner);
1195out_sock_release:
1da177e4 1196 sock_release(sock);
55737fda
SH
1197 return err;
1198
1199out_release:
1200 rcu_read_unlock();
1201 goto out_sock_release;
1da177e4
LT
1202}
1203
1204int sock_create(int family, int type, int protocol, struct socket **res)
1205{
1b8d7ae4 1206 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4
LT
1207}
1208
1209int sock_create_kern(int family, int type, int protocol, struct socket **res)
1210{
1b8d7ae4 1211 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4
LT
1212}
1213
1214asmlinkage long sys_socket(int family, int type, int protocol)
1215{
1216 int retval;
1217 struct socket *sock;
1218
1219 retval = sock_create(family, type, protocol, &sock);
1220 if (retval < 0)
1221 goto out;
1222
1223 retval = sock_map_fd(sock);
1224 if (retval < 0)
1225 goto out_release;
1226
1227out:
1228 /* It may be already another descriptor 8) Not kernel problem. */
1229 return retval;
1230
1231out_release:
1232 sock_release(sock);
1233 return retval;
1234}
1235
1236/*
1237 * Create a pair of connected sockets.
1238 */
1239
89bddce5
SH
1240asmlinkage long sys_socketpair(int family, int type, int protocol,
1241 int __user *usockvec)
1da177e4
LT
1242{
1243 struct socket *sock1, *sock2;
1244 int fd1, fd2, err;
db349509 1245 struct file *newfile1, *newfile2;
1da177e4
LT
1246
1247 /*
1248 * Obtain the first socket and check if the underlying protocol
1249 * supports the socketpair call.
1250 */
1251
1252 err = sock_create(family, type, protocol, &sock1);
1253 if (err < 0)
1254 goto out;
1255
1256 err = sock_create(family, type, protocol, &sock2);
1257 if (err < 0)
1258 goto out_release_1;
1259
1260 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1261 if (err < 0)
1da177e4
LT
1262 goto out_release_both;
1263
db349509 1264 fd1 = sock_alloc_fd(&newfile1);
bf3c23d1
DM
1265 if (unlikely(fd1 < 0)) {
1266 err = fd1;
db349509 1267 goto out_release_both;
bf3c23d1 1268 }
1da177e4 1269
db349509
AV
1270 fd2 = sock_alloc_fd(&newfile2);
1271 if (unlikely(fd2 < 0)) {
bf3c23d1 1272 err = fd2;
db349509
AV
1273 put_filp(newfile1);
1274 put_unused_fd(fd1);
1da177e4 1275 goto out_release_both;
db349509 1276 }
1da177e4 1277
db349509
AV
1278 err = sock_attach_fd(sock1, newfile1);
1279 if (unlikely(err < 0)) {
1280 goto out_fd2;
1281 }
1282
1283 err = sock_attach_fd(sock2, newfile2);
1284 if (unlikely(err < 0)) {
1285 fput(newfile1);
1286 goto out_fd1;
1287 }
1288
1289 err = audit_fd_pair(fd1, fd2);
1290 if (err < 0) {
1291 fput(newfile1);
1292 fput(newfile2);
1293 goto out_fd;
1294 }
1da177e4 1295
db349509
AV
1296 fd_install(fd1, newfile1);
1297 fd_install(fd2, newfile2);
1da177e4
LT
1298 /* fd1 and fd2 may be already another descriptors.
1299 * Not kernel problem.
1300 */
1301
89bddce5 1302 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1303 if (!err)
1304 err = put_user(fd2, &usockvec[1]);
1305 if (!err)
1306 return 0;
1307
1308 sys_close(fd2);
1309 sys_close(fd1);
1310 return err;
1311
1da177e4 1312out_release_both:
89bddce5 1313 sock_release(sock2);
1da177e4 1314out_release_1:
89bddce5 1315 sock_release(sock1);
1da177e4
LT
1316out:
1317 return err;
db349509
AV
1318
1319out_fd2:
1320 put_filp(newfile1);
1321 sock_release(sock1);
1322out_fd1:
1323 put_filp(newfile2);
1324 sock_release(sock2);
1325out_fd:
1326 put_unused_fd(fd1);
1327 put_unused_fd(fd2);
1328 goto out;
1da177e4
LT
1329}
1330
1da177e4
LT
1331/*
1332 * Bind a name to a socket. Nothing much to do here since it's
1333 * the protocol's responsibility to handle the local address.
1334 *
1335 * We move the socket address to kernel space before we call
1336 * the protocol layer (having also checked the address is ok).
1337 */
1338
1339asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1340{
1341 struct socket *sock;
1342 char address[MAX_SOCK_ADDR];
6cb153ca 1343 int err, fput_needed;
1da177e4 1344
89bddce5 1345 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1346 if (sock) {
89bddce5
SH
1347 err = move_addr_to_kernel(umyaddr, addrlen, address);
1348 if (err >= 0) {
1349 err = security_socket_bind(sock,
1350 (struct sockaddr *)address,
1351 addrlen);
6cb153ca
BL
1352 if (!err)
1353 err = sock->ops->bind(sock,
89bddce5
SH
1354 (struct sockaddr *)
1355 address, addrlen);
1da177e4 1356 }
6cb153ca 1357 fput_light(sock->file, fput_needed);
89bddce5 1358 }
1da177e4
LT
1359 return err;
1360}
1361
1da177e4
LT
1362/*
1363 * Perform a listen. Basically, we allow the protocol to do anything
1364 * necessary for a listen, and if that works, we mark the socket as
1365 * ready for listening.
1366 */
1367
1da177e4
LT
1368asmlinkage long sys_listen(int fd, int backlog)
1369{
1370 struct socket *sock;
6cb153ca 1371 int err, fput_needed;
b8e1f9b5 1372 int somaxconn;
89bddce5
SH
1373
1374 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1375 if (sock) {
b8e1f9b5
PE
1376 somaxconn = sock->sk->sk_net->sysctl_somaxconn;
1377 if ((unsigned)backlog > somaxconn)
1378 backlog = somaxconn;
1da177e4
LT
1379
1380 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1381 if (!err)
1382 err = sock->ops->listen(sock, backlog);
1da177e4 1383
6cb153ca 1384 fput_light(sock->file, fput_needed);
1da177e4
LT
1385 }
1386 return err;
1387}
1388
1da177e4
LT
1389/*
1390 * For accept, we attempt to create a new socket, set up the link
1391 * with the client, wake up the client, then return the new
1392 * connected fd. We collect the address of the connector in kernel
1393 * space and move it to user at the very end. This is unclean because
1394 * we open the socket then return an error.
1395 *
1396 * 1003.1g adds the ability to recvmsg() to query connection pending
1397 * status to recvmsg. We need to add that support in a way thats
1398 * clean when we restucture accept also.
1399 */
1400
89bddce5
SH
1401asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1402 int __user *upeer_addrlen)
1da177e4
LT
1403{
1404 struct socket *sock, *newsock;
39d8c1b6 1405 struct file *newfile;
6cb153ca 1406 int err, len, newfd, fput_needed;
1da177e4
LT
1407 char address[MAX_SOCK_ADDR];
1408
6cb153ca 1409 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1410 if (!sock)
1411 goto out;
1412
1413 err = -ENFILE;
89bddce5 1414 if (!(newsock = sock_alloc()))
1da177e4
LT
1415 goto out_put;
1416
1417 newsock->type = sock->type;
1418 newsock->ops = sock->ops;
1419
1da177e4
LT
1420 /*
1421 * We don't need try_module_get here, as the listening socket (sock)
1422 * has the protocol module (sock->ops->owner) held.
1423 */
1424 __module_get(newsock->ops->owner);
1425
39d8c1b6
DM
1426 newfd = sock_alloc_fd(&newfile);
1427 if (unlikely(newfd < 0)) {
1428 err = newfd;
9a1875e6
DM
1429 sock_release(newsock);
1430 goto out_put;
39d8c1b6
DM
1431 }
1432
1433 err = sock_attach_fd(newsock, newfile);
1434 if (err < 0)
79f4f642 1435 goto out_fd_simple;
39d8c1b6 1436
a79af59e
FF
1437 err = security_socket_accept(sock, newsock);
1438 if (err)
39d8c1b6 1439 goto out_fd;
a79af59e 1440
1da177e4
LT
1441 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1442 if (err < 0)
39d8c1b6 1443 goto out_fd;
1da177e4
LT
1444
1445 if (upeer_sockaddr) {
89bddce5
SH
1446 if (newsock->ops->getname(newsock, (struct sockaddr *)address,
1447 &len, 2) < 0) {
1da177e4 1448 err = -ECONNABORTED;
39d8c1b6 1449 goto out_fd;
1da177e4 1450 }
89bddce5
SH
1451 err = move_addr_to_user(address, len, upeer_sockaddr,
1452 upeer_addrlen);
1da177e4 1453 if (err < 0)
39d8c1b6 1454 goto out_fd;
1da177e4
LT
1455 }
1456
1457 /* File flags are not inherited via accept() unlike another OSes. */
1458
39d8c1b6
DM
1459 fd_install(newfd, newfile);
1460 err = newfd;
1da177e4
LT
1461
1462 security_socket_post_accept(sock, newsock);
1463
1464out_put:
6cb153ca 1465 fput_light(sock->file, fput_needed);
1da177e4
LT
1466out:
1467 return err;
79f4f642
AD
1468out_fd_simple:
1469 sock_release(newsock);
1470 put_filp(newfile);
1471 put_unused_fd(newfd);
1472 goto out_put;
39d8c1b6 1473out_fd:
9606a216 1474 fput(newfile);
39d8c1b6 1475 put_unused_fd(newfd);
1da177e4
LT
1476 goto out_put;
1477}
1478
1da177e4
LT
1479/*
1480 * Attempt to connect to a socket with the server address. The address
1481 * is in user space so we verify it is OK and move it to kernel space.
1482 *
1483 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1484 * break bindings
1485 *
1486 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1487 * other SEQPACKET protocols that take time to connect() as it doesn't
1488 * include the -EINPROGRESS status for such sockets.
1489 */
1490
89bddce5
SH
1491asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
1492 int addrlen)
1da177e4
LT
1493{
1494 struct socket *sock;
1495 char address[MAX_SOCK_ADDR];
6cb153ca 1496 int err, fput_needed;
1da177e4 1497
6cb153ca 1498 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1499 if (!sock)
1500 goto out;
1501 err = move_addr_to_kernel(uservaddr, addrlen, address);
1502 if (err < 0)
1503 goto out_put;
1504
89bddce5
SH
1505 err =
1506 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4
LT
1507 if (err)
1508 goto out_put;
1509
89bddce5 1510 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
1da177e4
LT
1511 sock->file->f_flags);
1512out_put:
6cb153ca 1513 fput_light(sock->file, fput_needed);
1da177e4
LT
1514out:
1515 return err;
1516}
1517
1518/*
1519 * Get the local address ('name') of a socket object. Move the obtained
1520 * name to user space.
1521 */
1522
89bddce5
SH
1523asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1524 int __user *usockaddr_len)
1da177e4
LT
1525{
1526 struct socket *sock;
1527 char address[MAX_SOCK_ADDR];
6cb153ca 1528 int len, err, fput_needed;
89bddce5 1529
6cb153ca 1530 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1531 if (!sock)
1532 goto out;
1533
1534 err = security_socket_getsockname(sock);
1535 if (err)
1536 goto out_put;
1537
1538 err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
1539 if (err)
1540 goto out_put;
1541 err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
1542
1543out_put:
6cb153ca 1544 fput_light(sock->file, fput_needed);
1da177e4
LT
1545out:
1546 return err;
1547}
1548
1549/*
1550 * Get the remote address ('name') of a socket object. Move the obtained
1551 * name to user space.
1552 */
1553
89bddce5
SH
1554asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1555 int __user *usockaddr_len)
1da177e4
LT
1556{
1557 struct socket *sock;
1558 char address[MAX_SOCK_ADDR];
6cb153ca 1559 int len, err, fput_needed;
1da177e4 1560
89bddce5
SH
1561 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1562 if (sock != NULL) {
1da177e4
LT
1563 err = security_socket_getpeername(sock);
1564 if (err) {
6cb153ca 1565 fput_light(sock->file, fput_needed);
1da177e4
LT
1566 return err;
1567 }
1568
89bddce5
SH
1569 err =
1570 sock->ops->getname(sock, (struct sockaddr *)address, &len,
1571 1);
1da177e4 1572 if (!err)
89bddce5
SH
1573 err = move_addr_to_user(address, len, usockaddr,
1574 usockaddr_len);
6cb153ca 1575 fput_light(sock->file, fput_needed);
1da177e4
LT
1576 }
1577 return err;
1578}
1579
1580/*
1581 * Send a datagram to a given address. We move the address into kernel
1582 * space and check the user space data area is readable before invoking
1583 * the protocol.
1584 */
1585
89bddce5
SH
1586asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1587 unsigned flags, struct sockaddr __user *addr,
1588 int addr_len)
1da177e4
LT
1589{
1590 struct socket *sock;
1591 char address[MAX_SOCK_ADDR];
1592 int err;
1593 struct msghdr msg;
1594 struct iovec iov;
6cb153ca 1595 int fput_needed;
6cb153ca 1596
de0fa95c
PE
1597 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1598 if (!sock)
4387ff75 1599 goto out;
6cb153ca 1600
89bddce5
SH
1601 iov.iov_base = buff;
1602 iov.iov_len = len;
1603 msg.msg_name = NULL;
1604 msg.msg_iov = &iov;
1605 msg.msg_iovlen = 1;
1606 msg.msg_control = NULL;
1607 msg.msg_controllen = 0;
1608 msg.msg_namelen = 0;
6cb153ca 1609 if (addr) {
1da177e4
LT
1610 err = move_addr_to_kernel(addr, addr_len, address);
1611 if (err < 0)
1612 goto out_put;
89bddce5
SH
1613 msg.msg_name = address;
1614 msg.msg_namelen = addr_len;
1da177e4
LT
1615 }
1616 if (sock->file->f_flags & O_NONBLOCK)
1617 flags |= MSG_DONTWAIT;
1618 msg.msg_flags = flags;
1619 err = sock_sendmsg(sock, &msg, len);
1620
89bddce5 1621out_put:
de0fa95c 1622 fput_light(sock->file, fput_needed);
4387ff75 1623out:
1da177e4
LT
1624 return err;
1625}
1626
1627/*
89bddce5 1628 * Send a datagram down a socket.
1da177e4
LT
1629 */
1630
89bddce5 1631asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
1da177e4
LT
1632{
1633 return sys_sendto(fd, buff, len, flags, NULL, 0);
1634}
1635
1636/*
89bddce5 1637 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1638 * sender. We verify the buffers are writable and if needed move the
1639 * sender address from kernel to user space.
1640 */
1641
89bddce5
SH
1642asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1643 unsigned flags, struct sockaddr __user *addr,
1644 int __user *addr_len)
1da177e4
LT
1645{
1646 struct socket *sock;
1647 struct iovec iov;
1648 struct msghdr msg;
1649 char address[MAX_SOCK_ADDR];
89bddce5 1650 int err, err2;
6cb153ca
BL
1651 int fput_needed;
1652
de0fa95c 1653 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1654 if (!sock)
de0fa95c 1655 goto out;
1da177e4 1656
89bddce5
SH
1657 msg.msg_control = NULL;
1658 msg.msg_controllen = 0;
1659 msg.msg_iovlen = 1;
1660 msg.msg_iov = &iov;
1661 iov.iov_len = size;
1662 iov.iov_base = ubuf;
1663 msg.msg_name = address;
1664 msg.msg_namelen = MAX_SOCK_ADDR;
1da177e4
LT
1665 if (sock->file->f_flags & O_NONBLOCK)
1666 flags |= MSG_DONTWAIT;
89bddce5 1667 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1668
89bddce5
SH
1669 if (err >= 0 && addr != NULL) {
1670 err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1671 if (err2 < 0)
1672 err = err2;
1da177e4 1673 }
de0fa95c
PE
1674
1675 fput_light(sock->file, fput_needed);
4387ff75 1676out:
1da177e4
LT
1677 return err;
1678}
1679
1680/*
89bddce5 1681 * Receive a datagram from a socket.
1da177e4
LT
1682 */
1683
89bddce5
SH
1684asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1685 unsigned flags)
1da177e4
LT
1686{
1687 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1688}
1689
1690/*
1691 * Set a socket option. Because we don't know the option lengths we have
1692 * to pass the user mode parameter for the protocols to sort out.
1693 */
1694
89bddce5
SH
1695asmlinkage long sys_setsockopt(int fd, int level, int optname,
1696 char __user *optval, int optlen)
1da177e4 1697{
6cb153ca 1698 int err, fput_needed;
1da177e4
LT
1699 struct socket *sock;
1700
1701 if (optlen < 0)
1702 return -EINVAL;
89bddce5
SH
1703
1704 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1705 if (sock != NULL) {
1706 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1707 if (err)
1708 goto out_put;
1da177e4
LT
1709
1710 if (level == SOL_SOCKET)
89bddce5
SH
1711 err =
1712 sock_setsockopt(sock, level, optname, optval,
1713 optlen);
1da177e4 1714 else
89bddce5
SH
1715 err =
1716 sock->ops->setsockopt(sock, level, optname, optval,
1717 optlen);
6cb153ca
BL
1718out_put:
1719 fput_light(sock->file, fput_needed);
1da177e4
LT
1720 }
1721 return err;
1722}
1723
1724/*
1725 * Get a socket option. Because we don't know the option lengths we have
1726 * to pass a user mode parameter for the protocols to sort out.
1727 */
1728
89bddce5
SH
1729asmlinkage long sys_getsockopt(int fd, int level, int optname,
1730 char __user *optval, int __user *optlen)
1da177e4 1731{
6cb153ca 1732 int err, fput_needed;
1da177e4
LT
1733 struct socket *sock;
1734
89bddce5
SH
1735 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1736 if (sock != NULL) {
6cb153ca
BL
1737 err = security_socket_getsockopt(sock, level, optname);
1738 if (err)
1739 goto out_put;
1da177e4
LT
1740
1741 if (level == SOL_SOCKET)
89bddce5
SH
1742 err =
1743 sock_getsockopt(sock, level, optname, optval,
1744 optlen);
1da177e4 1745 else
89bddce5
SH
1746 err =
1747 sock->ops->getsockopt(sock, level, optname, optval,
1748 optlen);
6cb153ca
BL
1749out_put:
1750 fput_light(sock->file, fput_needed);
1da177e4
LT
1751 }
1752 return err;
1753}
1754
1da177e4
LT
1755/*
1756 * Shutdown a socket.
1757 */
1758
1759asmlinkage long sys_shutdown(int fd, int how)
1760{
6cb153ca 1761 int err, fput_needed;
1da177e4
LT
1762 struct socket *sock;
1763
89bddce5
SH
1764 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1765 if (sock != NULL) {
1da177e4 1766 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1767 if (!err)
1768 err = sock->ops->shutdown(sock, how);
1769 fput_light(sock->file, fput_needed);
1da177e4
LT
1770 }
1771 return err;
1772}
1773
89bddce5 1774/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1775 * fields which are the same type (int / unsigned) on our platforms.
1776 */
1777#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1778#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1779#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1780
1da177e4
LT
1781/*
1782 * BSD sendmsg interface
1783 */
1784
1785asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1786{
89bddce5
SH
1787 struct compat_msghdr __user *msg_compat =
1788 (struct compat_msghdr __user *)msg;
1da177e4
LT
1789 struct socket *sock;
1790 char address[MAX_SOCK_ADDR];
1791 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1792 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1793 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1794 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1795 unsigned char *ctl_buf = ctl;
1796 struct msghdr msg_sys;
1797 int err, ctl_len, iov_size, total_len;
6cb153ca 1798 int fput_needed;
89bddce5 1799
1da177e4
LT
1800 err = -EFAULT;
1801 if (MSG_CMSG_COMPAT & flags) {
1802 if (get_compat_msghdr(&msg_sys, msg_compat))
1803 return -EFAULT;
89bddce5
SH
1804 }
1805 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1806 return -EFAULT;
1807
6cb153ca 1808 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1809 if (!sock)
1da177e4
LT
1810 goto out;
1811
1812 /* do not move before msg_sys is valid */
1813 err = -EMSGSIZE;
1814 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1815 goto out_put;
1816
89bddce5 1817 /* Check whether to allocate the iovec area */
1da177e4
LT
1818 err = -ENOMEM;
1819 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1820 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1821 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1822 if (!iov)
1823 goto out_put;
1824 }
1825
1826 /* This will also move the address data into kernel space */
1827 if (MSG_CMSG_COMPAT & flags) {
1828 err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
1829 } else
1830 err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
89bddce5 1831 if (err < 0)
1da177e4
LT
1832 goto out_freeiov;
1833 total_len = err;
1834
1835 err = -ENOBUFS;
1836
1837 if (msg_sys.msg_controllen > INT_MAX)
1838 goto out_freeiov;
89bddce5 1839 ctl_len = msg_sys.msg_controllen;
1da177e4 1840 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1841 err =
1842 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1843 sizeof(ctl));
1da177e4
LT
1844 if (err)
1845 goto out_freeiov;
1846 ctl_buf = msg_sys.msg_control;
8920e8f9 1847 ctl_len = msg_sys.msg_controllen;
1da177e4 1848 } else if (ctl_len) {
89bddce5 1849 if (ctl_len > sizeof(ctl)) {
1da177e4 1850 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1851 if (ctl_buf == NULL)
1da177e4
LT
1852 goto out_freeiov;
1853 }
1854 err = -EFAULT;
1855 /*
1856 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1857 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1858 * checking falls down on this.
1859 */
89bddce5
SH
1860 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1861 ctl_len))
1da177e4
LT
1862 goto out_freectl;
1863 msg_sys.msg_control = ctl_buf;
1864 }
1865 msg_sys.msg_flags = flags;
1866
1867 if (sock->file->f_flags & O_NONBLOCK)
1868 msg_sys.msg_flags |= MSG_DONTWAIT;
1869 err = sock_sendmsg(sock, &msg_sys, total_len);
1870
1871out_freectl:
89bddce5 1872 if (ctl_buf != ctl)
1da177e4
LT
1873 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1874out_freeiov:
1875 if (iov != iovstack)
1876 sock_kfree_s(sock->sk, iov, iov_size);
1877out_put:
6cb153ca 1878 fput_light(sock->file, fput_needed);
89bddce5 1879out:
1da177e4
LT
1880 return err;
1881}
1882
1883/*
1884 * BSD recvmsg interface
1885 */
1886
89bddce5
SH
1887asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
1888 unsigned int flags)
1da177e4 1889{
89bddce5
SH
1890 struct compat_msghdr __user *msg_compat =
1891 (struct compat_msghdr __user *)msg;
1da177e4
LT
1892 struct socket *sock;
1893 struct iovec iovstack[UIO_FASTIOV];
89bddce5 1894 struct iovec *iov = iovstack;
1da177e4
LT
1895 struct msghdr msg_sys;
1896 unsigned long cmsg_ptr;
1897 int err, iov_size, total_len, len;
6cb153ca 1898 int fput_needed;
1da177e4
LT
1899
1900 /* kernel mode address */
1901 char addr[MAX_SOCK_ADDR];
1902
1903 /* user mode address pointers */
1904 struct sockaddr __user *uaddr;
1905 int __user *uaddr_len;
89bddce5 1906
1da177e4
LT
1907 if (MSG_CMSG_COMPAT & flags) {
1908 if (get_compat_msghdr(&msg_sys, msg_compat))
1909 return -EFAULT;
89bddce5
SH
1910 }
1911 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1912 return -EFAULT;
1da177e4 1913
6cb153ca 1914 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1915 if (!sock)
1916 goto out;
1917
1918 err = -EMSGSIZE;
1919 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1920 goto out_put;
89bddce5
SH
1921
1922 /* Check whether to allocate the iovec area */
1da177e4
LT
1923 err = -ENOMEM;
1924 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1925 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1926 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1927 if (!iov)
1928 goto out_put;
1929 }
1930
1931 /*
89bddce5
SH
1932 * Save the user-mode address (verify_iovec will change the
1933 * kernel msghdr to use the kernel address space)
1da177e4 1934 */
89bddce5 1935
cfcabdcc 1936 uaddr = (__force void __user *)msg_sys.msg_name;
1da177e4
LT
1937 uaddr_len = COMPAT_NAMELEN(msg);
1938 if (MSG_CMSG_COMPAT & flags) {
1939 err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1940 } else
1941 err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1942 if (err < 0)
1943 goto out_freeiov;
89bddce5 1944 total_len = err;
1da177e4
LT
1945
1946 cmsg_ptr = (unsigned long)msg_sys.msg_control;
4a19542e 1947 msg_sys.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 1948
1da177e4
LT
1949 if (sock->file->f_flags & O_NONBLOCK)
1950 flags |= MSG_DONTWAIT;
1951 err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1952 if (err < 0)
1953 goto out_freeiov;
1954 len = err;
1955
1956 if (uaddr != NULL) {
89bddce5
SH
1957 err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
1958 uaddr_len);
1da177e4
LT
1959 if (err < 0)
1960 goto out_freeiov;
1961 }
37f7f421
DM
1962 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
1963 COMPAT_FLAGS(msg));
1da177e4
LT
1964 if (err)
1965 goto out_freeiov;
1966 if (MSG_CMSG_COMPAT & flags)
89bddce5 1967 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1968 &msg_compat->msg_controllen);
1969 else
89bddce5 1970 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1971 &msg->msg_controllen);
1972 if (err)
1973 goto out_freeiov;
1974 err = len;
1975
1976out_freeiov:
1977 if (iov != iovstack)
1978 sock_kfree_s(sock->sk, iov, iov_size);
1979out_put:
6cb153ca 1980 fput_light(sock->file, fput_needed);
1da177e4
LT
1981out:
1982 return err;
1983}
1984
1985#ifdef __ARCH_WANT_SYS_SOCKETCALL
1986
1987/* Argument list sizes for sys_socketcall */
1988#define AL(x) ((x) * sizeof(unsigned long))
89bddce5
SH
1989static const unsigned char nargs[18]={
1990 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1991 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1992 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
1993};
1994
1da177e4
LT
1995#undef AL
1996
1997/*
89bddce5 1998 * System call vectors.
1da177e4
LT
1999 *
2000 * Argument checking cleaned up. Saved 20% in size.
2001 * This function doesn't need to set the kernel lock because
89bddce5 2002 * it is set by the callees.
1da177e4
LT
2003 */
2004
2005asmlinkage long sys_socketcall(int call, unsigned long __user *args)
2006{
2007 unsigned long a[6];
89bddce5 2008 unsigned long a0, a1;
1da177e4
LT
2009 int err;
2010
89bddce5 2011 if (call < 1 || call > SYS_RECVMSG)
1da177e4
LT
2012 return -EINVAL;
2013
2014 /* copy_from_user should be SMP safe. */
2015 if (copy_from_user(a, args, nargs[call]))
2016 return -EFAULT;
3ec3b2fb 2017
89bddce5 2018 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb
DW
2019 if (err)
2020 return err;
2021
89bddce5
SH
2022 a0 = a[0];
2023 a1 = a[1];
2024
2025 switch (call) {
2026 case SYS_SOCKET:
2027 err = sys_socket(a0, a1, a[2]);
2028 break;
2029 case SYS_BIND:
2030 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2031 break;
2032 case SYS_CONNECT:
2033 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2034 break;
2035 case SYS_LISTEN:
2036 err = sys_listen(a0, a1);
2037 break;
2038 case SYS_ACCEPT:
2039 err =
2040 sys_accept(a0, (struct sockaddr __user *)a1,
2041 (int __user *)a[2]);
2042 break;
2043 case SYS_GETSOCKNAME:
2044 err =
2045 sys_getsockname(a0, (struct sockaddr __user *)a1,
2046 (int __user *)a[2]);
2047 break;
2048 case SYS_GETPEERNAME:
2049 err =
2050 sys_getpeername(a0, (struct sockaddr __user *)a1,
2051 (int __user *)a[2]);
2052 break;
2053 case SYS_SOCKETPAIR:
2054 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2055 break;
2056 case SYS_SEND:
2057 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2058 break;
2059 case SYS_SENDTO:
2060 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2061 (struct sockaddr __user *)a[4], a[5]);
2062 break;
2063 case SYS_RECV:
2064 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2065 break;
2066 case SYS_RECVFROM:
2067 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2068 (struct sockaddr __user *)a[4],
2069 (int __user *)a[5]);
2070 break;
2071 case SYS_SHUTDOWN:
2072 err = sys_shutdown(a0, a1);
2073 break;
2074 case SYS_SETSOCKOPT:
2075 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2076 break;
2077 case SYS_GETSOCKOPT:
2078 err =
2079 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2080 (int __user *)a[4]);
2081 break;
2082 case SYS_SENDMSG:
2083 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2084 break;
2085 case SYS_RECVMSG:
2086 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2087 break;
2088 default:
2089 err = -EINVAL;
2090 break;
1da177e4
LT
2091 }
2092 return err;
2093}
2094
89bddce5 2095#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2096
55737fda
SH
2097/**
2098 * sock_register - add a socket protocol handler
2099 * @ops: description of protocol
2100 *
1da177e4
LT
2101 * This function is called by a protocol handler that wants to
2102 * advertise its address family, and have it linked into the
55737fda
SH
2103 * socket interface. The value ops->family coresponds to the
2104 * socket system call protocol family.
1da177e4 2105 */
f0fd27d4 2106int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2107{
2108 int err;
2109
2110 if (ops->family >= NPROTO) {
89bddce5
SH
2111 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2112 NPROTO);
1da177e4
LT
2113 return -ENOBUFS;
2114 }
55737fda
SH
2115
2116 spin_lock(&net_family_lock);
2117 if (net_families[ops->family])
2118 err = -EEXIST;
2119 else {
89bddce5 2120 net_families[ops->family] = ops;
1da177e4
LT
2121 err = 0;
2122 }
55737fda
SH
2123 spin_unlock(&net_family_lock);
2124
89bddce5 2125 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2126 return err;
2127}
2128
55737fda
SH
2129/**
2130 * sock_unregister - remove a protocol handler
2131 * @family: protocol family to remove
2132 *
1da177e4
LT
2133 * This function is called by a protocol handler that wants to
2134 * remove its address family, and have it unlinked from the
55737fda
SH
2135 * new socket creation.
2136 *
2137 * If protocol handler is a module, then it can use module reference
2138 * counts to protect against new references. If protocol handler is not
2139 * a module then it needs to provide its own protection in
2140 * the ops->create routine.
1da177e4 2141 */
f0fd27d4 2142void sock_unregister(int family)
1da177e4 2143{
f0fd27d4 2144 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2145
55737fda 2146 spin_lock(&net_family_lock);
89bddce5 2147 net_families[family] = NULL;
55737fda
SH
2148 spin_unlock(&net_family_lock);
2149
2150 synchronize_rcu();
2151
89bddce5 2152 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2153}
2154
77d76ea3 2155static int __init sock_init(void)
1da177e4
LT
2156{
2157 /*
89bddce5 2158 * Initialize sock SLAB cache.
1da177e4 2159 */
89bddce5 2160
1da177e4
LT
2161 sk_init();
2162
1da177e4 2163 /*
89bddce5 2164 * Initialize skbuff SLAB cache
1da177e4
LT
2165 */
2166 skb_init();
1da177e4
LT
2167
2168 /*
89bddce5 2169 * Initialize the protocols module.
1da177e4
LT
2170 */
2171
2172 init_inodecache();
2173 register_filesystem(&sock_fs_type);
2174 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2175
2176 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2177 */
2178
2179#ifdef CONFIG_NETFILTER
2180 netfilter_init();
2181#endif
cbeb321a
DM
2182
2183 return 0;
1da177e4
LT
2184}
2185
77d76ea3
AK
2186core_initcall(sock_init); /* early initcall */
2187
1da177e4
LT
2188#ifdef CONFIG_PROC_FS
2189void socket_seq_show(struct seq_file *seq)
2190{
2191 int cpu;
2192 int counter = 0;
2193
6f912042 2194 for_each_possible_cpu(cpu)
89bddce5 2195 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2196
2197 /* It can be negative, by the way. 8) */
2198 if (counter < 0)
2199 counter = 0;
2200
2201 seq_printf(seq, "sockets: used %d\n", counter);
2202}
89bddce5 2203#endif /* CONFIG_PROC_FS */
1da177e4 2204
89bbfc95
SP
2205#ifdef CONFIG_COMPAT
2206static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 2207 unsigned long arg)
89bbfc95
SP
2208{
2209 struct socket *sock = file->private_data;
2210 int ret = -ENOIOCTLCMD;
2211
2212 if (sock->ops->compat_ioctl)
2213 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2214
2215 return ret;
2216}
2217#endif
2218
ac5a488e
SS
2219int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2220{
2221 return sock->ops->bind(sock, addr, addrlen);
2222}
2223
2224int kernel_listen(struct socket *sock, int backlog)
2225{
2226 return sock->ops->listen(sock, backlog);
2227}
2228
2229int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2230{
2231 struct sock *sk = sock->sk;
2232 int err;
2233
2234 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2235 newsock);
2236 if (err < 0)
2237 goto done;
2238
2239 err = sock->ops->accept(sock, *newsock, flags);
2240 if (err < 0) {
2241 sock_release(*newsock);
fa8705b0 2242 *newsock = NULL;
ac5a488e
SS
2243 goto done;
2244 }
2245
2246 (*newsock)->ops = sock->ops;
2247
2248done:
2249 return err;
2250}
2251
2252int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 2253 int flags)
ac5a488e
SS
2254{
2255 return sock->ops->connect(sock, addr, addrlen, flags);
2256}
2257
2258int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2259 int *addrlen)
2260{
2261 return sock->ops->getname(sock, addr, addrlen, 0);
2262}
2263
2264int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2265 int *addrlen)
2266{
2267 return sock->ops->getname(sock, addr, addrlen, 1);
2268}
2269
2270int kernel_getsockopt(struct socket *sock, int level, int optname,
2271 char *optval, int *optlen)
2272{
2273 mm_segment_t oldfs = get_fs();
2274 int err;
2275
2276 set_fs(KERNEL_DS);
2277 if (level == SOL_SOCKET)
2278 err = sock_getsockopt(sock, level, optname, optval, optlen);
2279 else
2280 err = sock->ops->getsockopt(sock, level, optname, optval,
2281 optlen);
2282 set_fs(oldfs);
2283 return err;
2284}
2285
2286int kernel_setsockopt(struct socket *sock, int level, int optname,
2287 char *optval, int optlen)
2288{
2289 mm_segment_t oldfs = get_fs();
2290 int err;
2291
2292 set_fs(KERNEL_DS);
2293 if (level == SOL_SOCKET)
2294 err = sock_setsockopt(sock, level, optname, optval, optlen);
2295 else
2296 err = sock->ops->setsockopt(sock, level, optname, optval,
2297 optlen);
2298 set_fs(oldfs);
2299 return err;
2300}
2301
2302int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2303 size_t size, int flags)
2304{
2305 if (sock->ops->sendpage)
2306 return sock->ops->sendpage(sock, page, offset, size, flags);
2307
2308 return sock_no_sendpage(sock, page, offset, size, flags);
2309}
2310
2311int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2312{
2313 mm_segment_t oldfs = get_fs();
2314 int err;
2315
2316 set_fs(KERNEL_DS);
2317 err = sock->ops->ioctl(sock, cmd, arg);
2318 set_fs(oldfs);
2319
2320 return err;
2321}
2322
91cf45f0
TM
2323int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
2324{
2325 return sock->ops->shutdown(sock, how);
2326}
2327
1da177e4
LT
2328/* ABI emulation layers need these two */
2329EXPORT_SYMBOL(move_addr_to_kernel);
2330EXPORT_SYMBOL(move_addr_to_user);
2331EXPORT_SYMBOL(sock_create);
2332EXPORT_SYMBOL(sock_create_kern);
2333EXPORT_SYMBOL(sock_create_lite);
2334EXPORT_SYMBOL(sock_map_fd);
2335EXPORT_SYMBOL(sock_recvmsg);
2336EXPORT_SYMBOL(sock_register);
2337EXPORT_SYMBOL(sock_release);
2338EXPORT_SYMBOL(sock_sendmsg);
2339EXPORT_SYMBOL(sock_unregister);
2340EXPORT_SYMBOL(sock_wake_async);
2341EXPORT_SYMBOL(sockfd_lookup);
2342EXPORT_SYMBOL(kernel_sendmsg);
2343EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
2344EXPORT_SYMBOL(kernel_bind);
2345EXPORT_SYMBOL(kernel_listen);
2346EXPORT_SYMBOL(kernel_accept);
2347EXPORT_SYMBOL(kernel_connect);
2348EXPORT_SYMBOL(kernel_getsockname);
2349EXPORT_SYMBOL(kernel_getpeername);
2350EXPORT_SYMBOL(kernel_getsockopt);
2351EXPORT_SYMBOL(kernel_setsockopt);
2352EXPORT_SYMBOL(kernel_sendpage);
2353EXPORT_SYMBOL(kernel_sock_ioctl);
91cf45f0 2354EXPORT_SYMBOL(kernel_sock_shutdown);