[NET] NETNS: Omit net_device->nd_net without CONFIG_NET_NS.
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
55737fda 66#include <linux/rcupdate.h>
1da177e4
LT
67#include <linux/netdevice.h>
68#include <linux/proc_fs.h>
69#include <linux/seq_file.h>
4a3e2f71 70#include <linux/mutex.h>
1da177e4
LT
71#include <linux/wanrouter.h>
72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
1da177e4
LT
75#include <linux/init.h>
76#include <linux/poll.h>
77#include <linux/cache.h>
78#include <linux/module.h>
79#include <linux/highmem.h>
1da177e4
LT
80#include <linux/mount.h>
81#include <linux/security.h>
82#include <linux/syscalls.h>
83#include <linux/compat.h>
84#include <linux/kmod.h>
3ec3b2fb 85#include <linux/audit.h>
d86b5e0e 86#include <linux/wireless.h>
1b8d7ae4 87#include <linux/nsproxy.h>
1da177e4
LT
88
89#include <asm/uaccess.h>
90#include <asm/unistd.h>
91
92#include <net/compat.h>
93
94#include <net/sock.h>
95#include <linux/netfilter.h>
96
97static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
98static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
99 unsigned long nr_segs, loff_t pos);
100static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
101 unsigned long nr_segs, loff_t pos);
89bddce5 102static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
103
104static int sock_close(struct inode *inode, struct file *file);
105static unsigned int sock_poll(struct file *file,
106 struct poll_table_struct *wait);
89bddce5 107static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
108#ifdef CONFIG_COMPAT
109static long compat_sock_ioctl(struct file *file,
89bddce5 110 unsigned int cmd, unsigned long arg);
89bbfc95 111#endif
1da177e4 112static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
113static ssize_t sock_sendpage(struct file *file, struct page *page,
114 int offset, size_t size, loff_t *ppos, int more);
9c55e01c
JA
115static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
116 struct pipe_inode_info *pipe, size_t len,
117 unsigned int flags);
1da177e4 118
1da177e4
LT
119/*
120 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
121 * in the operation structures but are done directly via the socketcall() multiplexor.
122 */
123
da7071d7 124static const struct file_operations socket_file_ops = {
1da177e4
LT
125 .owner = THIS_MODULE,
126 .llseek = no_llseek,
127 .aio_read = sock_aio_read,
128 .aio_write = sock_aio_write,
129 .poll = sock_poll,
130 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
131#ifdef CONFIG_COMPAT
132 .compat_ioctl = compat_sock_ioctl,
133#endif
1da177e4
LT
134 .mmap = sock_mmap,
135 .open = sock_no_open, /* special open code to disallow open via /proc */
136 .release = sock_close,
137 .fasync = sock_fasync,
5274f052
JA
138 .sendpage = sock_sendpage,
139 .splice_write = generic_splice_sendpage,
9c55e01c 140 .splice_read = sock_splice_read,
1da177e4
LT
141};
142
143/*
144 * The protocol list. Each protocol is registered in here.
145 */
146
1da177e4 147static DEFINE_SPINLOCK(net_family_lock);
f0fd27d4 148static const struct net_proto_family *net_families[NPROTO] __read_mostly;
1da177e4 149
1da177e4
LT
150/*
151 * Statistics counters of the socket lists
152 */
153
154static DEFINE_PER_CPU(int, sockets_in_use) = 0;
155
156/*
89bddce5
SH
157 * Support routines.
158 * Move socket addresses back and forth across the kernel/user
159 * divide and look after the messy bits.
1da177e4
LT
160 */
161
89bddce5 162#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
163 16 for IP, 16 for IPX,
164 24 for IPv6,
89bddce5 165 about 80 for AX.25
1da177e4
LT
166 must be at least one bigger than
167 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 168 :unix_mkname()).
1da177e4 169 */
89bddce5 170
1da177e4
LT
171/**
172 * move_addr_to_kernel - copy a socket address into kernel space
173 * @uaddr: Address in user space
174 * @kaddr: Address in kernel space
175 * @ulen: Length in user space
176 *
177 * The address is copied into kernel space. If the provided address is
178 * too long an error code of -EINVAL is returned. If the copy gives
179 * invalid addresses -EFAULT is returned. On a success 0 is returned.
180 */
181
182int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
183{
89bddce5 184 if (ulen < 0 || ulen > MAX_SOCK_ADDR)
1da177e4 185 return -EINVAL;
89bddce5 186 if (ulen == 0)
1da177e4 187 return 0;
89bddce5 188 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 189 return -EFAULT;
3ec3b2fb 190 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
191}
192
193/**
194 * move_addr_to_user - copy an address to user space
195 * @kaddr: kernel space address
196 * @klen: length of address in kernel
197 * @uaddr: user space address
198 * @ulen: pointer to user length field
199 *
200 * The value pointed to by ulen on entry is the buffer length available.
201 * This is overwritten with the buffer space used. -EINVAL is returned
202 * if an overlong buffer is specified or a negative buffer size. -EFAULT
203 * is returned if either the buffer or the length field are not
204 * accessible.
205 * After copying the data up to the limit the user specifies, the true
206 * length of the data is written over the length limit the user
207 * specified. Zero is returned for a success.
208 */
89bddce5
SH
209
210int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
211 int __user *ulen)
1da177e4
LT
212{
213 int err;
214 int len;
215
89bddce5
SH
216 err = get_user(len, ulen);
217 if (err)
1da177e4 218 return err;
89bddce5
SH
219 if (len > klen)
220 len = klen;
221 if (len < 0 || len > MAX_SOCK_ADDR)
1da177e4 222 return -EINVAL;
89bddce5 223 if (len) {
d6fe3945
SG
224 if (audit_sockaddr(klen, kaddr))
225 return -ENOMEM;
89bddce5 226 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
227 return -EFAULT;
228 }
229 /*
89bddce5
SH
230 * "fromlen shall refer to the value before truncation.."
231 * 1003.1g
1da177e4
LT
232 */
233 return __put_user(klen, ulen);
234}
235
236#define SOCKFS_MAGIC 0x534F434B
237
e18b890b 238static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
239
240static struct inode *sock_alloc_inode(struct super_block *sb)
241{
242 struct socket_alloc *ei;
89bddce5 243
e94b1766 244 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
245 if (!ei)
246 return NULL;
247 init_waitqueue_head(&ei->socket.wait);
89bddce5 248
1da177e4
LT
249 ei->socket.fasync_list = NULL;
250 ei->socket.state = SS_UNCONNECTED;
251 ei->socket.flags = 0;
252 ei->socket.ops = NULL;
253 ei->socket.sk = NULL;
254 ei->socket.file = NULL;
1da177e4
LT
255
256 return &ei->vfs_inode;
257}
258
259static void sock_destroy_inode(struct inode *inode)
260{
261 kmem_cache_free(sock_inode_cachep,
262 container_of(inode, struct socket_alloc, vfs_inode));
263}
264
4ba9b9d0 265static void init_once(struct kmem_cache *cachep, void *foo)
1da177e4 266{
89bddce5 267 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 268
a35afb83 269 inode_init_once(&ei->vfs_inode);
1da177e4 270}
89bddce5 271
1da177e4
LT
272static int init_inodecache(void)
273{
274 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
275 sizeof(struct socket_alloc),
276 0,
277 (SLAB_HWCACHE_ALIGN |
278 SLAB_RECLAIM_ACCOUNT |
279 SLAB_MEM_SPREAD),
20c2df83 280 init_once);
1da177e4
LT
281 if (sock_inode_cachep == NULL)
282 return -ENOMEM;
283 return 0;
284}
285
286static struct super_operations sockfs_ops = {
287 .alloc_inode = sock_alloc_inode,
288 .destroy_inode =sock_destroy_inode,
289 .statfs = simple_statfs,
290};
291
454e2398 292static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
293 int flags, const char *dev_name, void *data,
294 struct vfsmount *mnt)
1da177e4 295{
454e2398
DH
296 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
297 mnt);
1da177e4
LT
298}
299
ba89966c 300static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
301
302static struct file_system_type sock_fs_type = {
303 .name = "sockfs",
304 .get_sb = sockfs_get_sb,
305 .kill_sb = kill_anon_super,
306};
89bddce5 307
1da177e4
LT
308static int sockfs_delete_dentry(struct dentry *dentry)
309{
304e61e6
ED
310 /*
311 * At creation time, we pretended this dentry was hashed
312 * (by clearing DCACHE_UNHASHED bit in d_flags)
313 * At delete time, we restore the truth : not hashed.
314 * (so that dput() can proceed correctly)
315 */
316 dentry->d_flags |= DCACHE_UNHASHED;
317 return 0;
1da177e4 318}
c23fbb6b
ED
319
320/*
321 * sockfs_dname() is called from d_path().
322 */
323static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
324{
325 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
326 dentry->d_inode->i_ino);
327}
328
1da177e4 329static struct dentry_operations sockfs_dentry_operations = {
89bddce5 330 .d_delete = sockfs_delete_dentry,
c23fbb6b 331 .d_dname = sockfs_dname,
1da177e4
LT
332};
333
334/*
335 * Obtains the first available file descriptor and sets it up for use.
336 *
39d8c1b6
DM
337 * These functions create file structures and maps them to fd space
338 * of the current process. On success it returns file descriptor
1da177e4
LT
339 * and file struct implicitly stored in sock->file.
340 * Note that another thread may close file descriptor before we return
341 * from this function. We use the fact that now we do not refer
342 * to socket after mapping. If one day we will need it, this
343 * function will increment ref. count on file by 1.
344 *
345 * In any case returned fd MAY BE not valid!
346 * This race condition is unavoidable
347 * with shared fd spaces, we cannot solve it inside kernel,
348 * but we take care of internal coherence yet.
349 */
350
39d8c1b6 351static int sock_alloc_fd(struct file **filep)
1da177e4
LT
352{
353 int fd;
1da177e4
LT
354
355 fd = get_unused_fd();
39d8c1b6 356 if (likely(fd >= 0)) {
1da177e4
LT
357 struct file *file = get_empty_filp();
358
39d8c1b6
DM
359 *filep = file;
360 if (unlikely(!file)) {
1da177e4 361 put_unused_fd(fd);
39d8c1b6 362 return -ENFILE;
1da177e4 363 }
39d8c1b6
DM
364 } else
365 *filep = NULL;
366 return fd;
367}
1da177e4 368
39d8c1b6
DM
369static int sock_attach_fd(struct socket *sock, struct file *file)
370{
ce8d2cdf 371 struct dentry *dentry;
c23fbb6b 372 struct qstr name = { .name = "" };
39d8c1b6 373
ce8d2cdf
DH
374 dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
375 if (unlikely(!dentry))
39d8c1b6
DM
376 return -ENOMEM;
377
ce8d2cdf 378 dentry->d_op = &sockfs_dentry_operations;
304e61e6
ED
379 /*
380 * We dont want to push this dentry into global dentry hash table.
381 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
382 * This permits a working /proc/$pid/fd/XXX on sockets
383 */
ce8d2cdf
DH
384 dentry->d_flags &= ~DCACHE_UNHASHED;
385 d_instantiate(dentry, SOCK_INODE(sock));
39d8c1b6
DM
386
387 sock->file = file;
ce8d2cdf
DH
388 init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE,
389 &socket_file_ops);
390 SOCK_INODE(sock)->i_fop = &socket_file_ops;
39d8c1b6
DM
391 file->f_flags = O_RDWR;
392 file->f_pos = 0;
393 file->private_data = sock;
1da177e4 394
39d8c1b6
DM
395 return 0;
396}
397
398int sock_map_fd(struct socket *sock)
399{
400 struct file *newfile;
401 int fd = sock_alloc_fd(&newfile);
402
403 if (likely(fd >= 0)) {
404 int err = sock_attach_fd(sock, newfile);
405
406 if (unlikely(err < 0)) {
407 put_filp(newfile);
1da177e4 408 put_unused_fd(fd);
39d8c1b6 409 return err;
1da177e4 410 }
39d8c1b6 411 fd_install(fd, newfile);
1da177e4 412 }
1da177e4
LT
413 return fd;
414}
415
6cb153ca
BL
416static struct socket *sock_from_file(struct file *file, int *err)
417{
6cb153ca
BL
418 if (file->f_op == &socket_file_ops)
419 return file->private_data; /* set in sock_map_fd */
420
23bb80d2
ED
421 *err = -ENOTSOCK;
422 return NULL;
6cb153ca
BL
423}
424
1da177e4
LT
425/**
426 * sockfd_lookup - Go from a file number to its socket slot
427 * @fd: file handle
428 * @err: pointer to an error code return
429 *
430 * The file handle passed in is locked and the socket it is bound
431 * too is returned. If an error occurs the err pointer is overwritten
432 * with a negative errno code and NULL is returned. The function checks
433 * for both invalid handles and passing a handle which is not a socket.
434 *
435 * On a success the socket object pointer is returned.
436 */
437
438struct socket *sockfd_lookup(int fd, int *err)
439{
440 struct file *file;
1da177e4
LT
441 struct socket *sock;
442
89bddce5
SH
443 file = fget(fd);
444 if (!file) {
1da177e4
LT
445 *err = -EBADF;
446 return NULL;
447 }
89bddce5 448
6cb153ca
BL
449 sock = sock_from_file(file, err);
450 if (!sock)
1da177e4 451 fput(file);
6cb153ca
BL
452 return sock;
453}
1da177e4 454
6cb153ca
BL
455static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
456{
457 struct file *file;
458 struct socket *sock;
459
3672558c 460 *err = -EBADF;
6cb153ca
BL
461 file = fget_light(fd, fput_needed);
462 if (file) {
463 sock = sock_from_file(file, err);
464 if (sock)
465 return sock;
466 fput_light(file, *fput_needed);
1da177e4 467 }
6cb153ca 468 return NULL;
1da177e4
LT
469}
470
471/**
472 * sock_alloc - allocate a socket
89bddce5 473 *
1da177e4
LT
474 * Allocate a new inode and socket object. The two are bound together
475 * and initialised. The socket is then returned. If we are out of inodes
476 * NULL is returned.
477 */
478
479static struct socket *sock_alloc(void)
480{
89bddce5
SH
481 struct inode *inode;
482 struct socket *sock;
1da177e4
LT
483
484 inode = new_inode(sock_mnt->mnt_sb);
485 if (!inode)
486 return NULL;
487
488 sock = SOCKET_I(inode);
489
89bddce5 490 inode->i_mode = S_IFSOCK | S_IRWXUGO;
1da177e4
LT
491 inode->i_uid = current->fsuid;
492 inode->i_gid = current->fsgid;
493
494 get_cpu_var(sockets_in_use)++;
495 put_cpu_var(sockets_in_use);
496 return sock;
497}
498
499/*
500 * In theory you can't get an open on this inode, but /proc provides
501 * a back door. Remember to keep it shut otherwise you'll let the
502 * creepy crawlies in.
503 */
89bddce5 504
1da177e4
LT
505static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
506{
507 return -ENXIO;
508}
509
4b6f5d20 510const struct file_operations bad_sock_fops = {
1da177e4
LT
511 .owner = THIS_MODULE,
512 .open = sock_no_open,
513};
514
515/**
516 * sock_release - close a socket
517 * @sock: socket to close
518 *
519 * The socket is released from the protocol stack if it has a release
520 * callback, and the inode is then released if the socket is bound to
89bddce5 521 * an inode not a file.
1da177e4 522 */
89bddce5 523
1da177e4
LT
524void sock_release(struct socket *sock)
525{
526 if (sock->ops) {
527 struct module *owner = sock->ops->owner;
528
529 sock->ops->release(sock);
530 sock->ops = NULL;
531 module_put(owner);
532 }
533
534 if (sock->fasync_list)
535 printk(KERN_ERR "sock_release: fasync list not empty!\n");
536
537 get_cpu_var(sockets_in_use)--;
538 put_cpu_var(sockets_in_use);
539 if (!sock->file) {
540 iput(SOCK_INODE(sock));
541 return;
542 }
89bddce5 543 sock->file = NULL;
1da177e4
LT
544}
545
89bddce5 546static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
547 struct msghdr *msg, size_t size)
548{
549 struct sock_iocb *si = kiocb_to_siocb(iocb);
550 int err;
551
552 si->sock = sock;
553 si->scm = NULL;
554 si->msg = msg;
555 si->size = size;
556
557 err = security_socket_sendmsg(sock, msg, size);
558 if (err)
559 return err;
560
561 return sock->ops->sendmsg(iocb, sock, msg, size);
562}
563
564int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
565{
566 struct kiocb iocb;
567 struct sock_iocb siocb;
568 int ret;
569
570 init_sync_kiocb(&iocb, NULL);
571 iocb.private = &siocb;
572 ret = __sock_sendmsg(&iocb, sock, msg, size);
573 if (-EIOCBQUEUED == ret)
574 ret = wait_on_sync_kiocb(&iocb);
575 return ret;
576}
577
578int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
579 struct kvec *vec, size_t num, size_t size)
580{
581 mm_segment_t oldfs = get_fs();
582 int result;
583
584 set_fs(KERNEL_DS);
585 /*
586 * the following is safe, since for compiler definitions of kvec and
587 * iovec are identical, yielding the same in-core layout and alignment
588 */
89bddce5 589 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
590 msg->msg_iovlen = num;
591 result = sock_sendmsg(sock, msg, size);
592 set_fs(oldfs);
593 return result;
594}
595
92f37fd2
ED
596/*
597 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
598 */
599void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
600 struct sk_buff *skb)
601{
602 ktime_t kt = skb->tstamp;
603
604 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
605 struct timeval tv;
606 /* Race occurred between timestamp enabling and packet
607 receiving. Fill in the current time for now. */
608 if (kt.tv64 == 0)
609 kt = ktime_get_real();
610 skb->tstamp = kt;
611 tv = ktime_to_timeval(kt);
612 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
613 } else {
614 struct timespec ts;
615 /* Race occurred between timestamp enabling and packet
616 receiving. Fill in the current time for now. */
617 if (kt.tv64 == 0)
618 kt = ktime_get_real();
619 skb->tstamp = kt;
620 ts = ktime_to_timespec(kt);
621 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
622 }
623}
624
7c81fd8b
ACM
625EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
626
89bddce5 627static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
628 struct msghdr *msg, size_t size, int flags)
629{
630 int err;
631 struct sock_iocb *si = kiocb_to_siocb(iocb);
632
633 si->sock = sock;
634 si->scm = NULL;
635 si->msg = msg;
636 si->size = size;
637 si->flags = flags;
638
639 err = security_socket_recvmsg(sock, msg, size, flags);
640 if (err)
641 return err;
642
643 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
644}
645
89bddce5 646int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
647 size_t size, int flags)
648{
649 struct kiocb iocb;
650 struct sock_iocb siocb;
651 int ret;
652
89bddce5 653 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
654 iocb.private = &siocb;
655 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
656 if (-EIOCBQUEUED == ret)
657 ret = wait_on_sync_kiocb(&iocb);
658 return ret;
659}
660
89bddce5
SH
661int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
662 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
663{
664 mm_segment_t oldfs = get_fs();
665 int result;
666
667 set_fs(KERNEL_DS);
668 /*
669 * the following is safe, since for compiler definitions of kvec and
670 * iovec are identical, yielding the same in-core layout and alignment
671 */
89bddce5 672 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
673 result = sock_recvmsg(sock, msg, size, flags);
674 set_fs(oldfs);
675 return result;
676}
677
678static void sock_aio_dtor(struct kiocb *iocb)
679{
680 kfree(iocb->private);
681}
682
ce1d4d3e
CH
683static ssize_t sock_sendpage(struct file *file, struct page *page,
684 int offset, size_t size, loff_t *ppos, int more)
1da177e4 685{
1da177e4
LT
686 struct socket *sock;
687 int flags;
688
ce1d4d3e
CH
689 sock = file->private_data;
690
691 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
692 if (more)
693 flags |= MSG_MORE;
694
695 return sock->ops->sendpage(sock, page, offset, size, flags);
696}
1da177e4 697
9c55e01c
JA
698static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
699 struct pipe_inode_info *pipe, size_t len,
700 unsigned int flags)
701{
702 struct socket *sock = file->private_data;
703
997b37da
RDC
704 if (unlikely(!sock->ops->splice_read))
705 return -EINVAL;
706
9c55e01c
JA
707 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
708}
709
ce1d4d3e 710static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 711 struct sock_iocb *siocb)
ce1d4d3e
CH
712{
713 if (!is_sync_kiocb(iocb)) {
714 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
715 if (!siocb)
716 return NULL;
1da177e4
LT
717 iocb->ki_dtor = sock_aio_dtor;
718 }
1da177e4 719
ce1d4d3e 720 siocb->kiocb = iocb;
ce1d4d3e
CH
721 iocb->private = siocb;
722 return siocb;
1da177e4
LT
723}
724
ce1d4d3e 725static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
726 struct file *file, const struct iovec *iov,
727 unsigned long nr_segs)
ce1d4d3e
CH
728{
729 struct socket *sock = file->private_data;
730 size_t size = 0;
731 int i;
1da177e4 732
89bddce5
SH
733 for (i = 0; i < nr_segs; i++)
734 size += iov[i].iov_len;
1da177e4 735
ce1d4d3e
CH
736 msg->msg_name = NULL;
737 msg->msg_namelen = 0;
738 msg->msg_control = NULL;
739 msg->msg_controllen = 0;
89bddce5 740 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
741 msg->msg_iovlen = nr_segs;
742 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
743
744 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
745}
746
027445c3
BP
747static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
748 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
749{
750 struct sock_iocb siocb, *x;
751
1da177e4
LT
752 if (pos != 0)
753 return -ESPIPE;
027445c3
BP
754
755 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
756 return 0;
757
027445c3
BP
758
759 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
760 if (!x)
761 return -ENOMEM;
027445c3 762 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
763}
764
ce1d4d3e 765static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
766 struct file *file, const struct iovec *iov,
767 unsigned long nr_segs)
1da177e4 768{
ce1d4d3e
CH
769 struct socket *sock = file->private_data;
770 size_t size = 0;
771 int i;
1da177e4 772
89bddce5
SH
773 for (i = 0; i < nr_segs; i++)
774 size += iov[i].iov_len;
1da177e4 775
ce1d4d3e
CH
776 msg->msg_name = NULL;
777 msg->msg_namelen = 0;
778 msg->msg_control = NULL;
779 msg->msg_controllen = 0;
89bddce5 780 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
781 msg->msg_iovlen = nr_segs;
782 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
783 if (sock->type == SOCK_SEQPACKET)
784 msg->msg_flags |= MSG_EOR;
1da177e4 785
ce1d4d3e 786 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
787}
788
027445c3
BP
789static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
790 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
791{
792 struct sock_iocb siocb, *x;
1da177e4 793
ce1d4d3e
CH
794 if (pos != 0)
795 return -ESPIPE;
027445c3 796
027445c3 797 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
798 if (!x)
799 return -ENOMEM;
1da177e4 800
027445c3 801 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
802}
803
1da177e4
LT
804/*
805 * Atomic setting of ioctl hooks to avoid race
806 * with module unload.
807 */
808
4a3e2f71 809static DEFINE_MUTEX(br_ioctl_mutex);
881d966b 810static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL;
1da177e4 811
881d966b 812void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 813{
4a3e2f71 814 mutex_lock(&br_ioctl_mutex);
1da177e4 815 br_ioctl_hook = hook;
4a3e2f71 816 mutex_unlock(&br_ioctl_mutex);
1da177e4 817}
89bddce5 818
1da177e4
LT
819EXPORT_SYMBOL(brioctl_set);
820
4a3e2f71 821static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 822static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 823
881d966b 824void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 825{
4a3e2f71 826 mutex_lock(&vlan_ioctl_mutex);
1da177e4 827 vlan_ioctl_hook = hook;
4a3e2f71 828 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 829}
89bddce5 830
1da177e4
LT
831EXPORT_SYMBOL(vlan_ioctl_set);
832
4a3e2f71 833static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 834static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 835
89bddce5 836void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 837{
4a3e2f71 838 mutex_lock(&dlci_ioctl_mutex);
1da177e4 839 dlci_ioctl_hook = hook;
4a3e2f71 840 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 841}
89bddce5 842
1da177e4
LT
843EXPORT_SYMBOL(dlci_ioctl_set);
844
845/*
846 * With an ioctl, arg may well be a user mode pointer, but we don't know
847 * what to do with it - that's up to the protocol still.
848 */
849
850static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
851{
852 struct socket *sock;
881d966b 853 struct sock *sk;
1da177e4
LT
854 void __user *argp = (void __user *)arg;
855 int pid, err;
881d966b 856 struct net *net;
1da177e4 857
b69aee04 858 sock = file->private_data;
881d966b
EB
859 sk = sock->sk;
860 net = sk->sk_net;
1da177e4 861 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 862 err = dev_ioctl(net, cmd, argp);
1da177e4 863 } else
d86b5e0e 864#ifdef CONFIG_WIRELESS_EXT
1da177e4 865 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 866 err = dev_ioctl(net, cmd, argp);
1da177e4 867 } else
89bddce5
SH
868#endif /* CONFIG_WIRELESS_EXT */
869 switch (cmd) {
1da177e4
LT
870 case FIOSETOWN:
871 case SIOCSPGRP:
872 err = -EFAULT;
873 if (get_user(pid, (int __user *)argp))
874 break;
875 err = f_setown(sock->file, pid, 1);
876 break;
877 case FIOGETOWN:
878 case SIOCGPGRP:
609d7fa9 879 err = put_user(f_getown(sock->file),
89bddce5 880 (int __user *)argp);
1da177e4
LT
881 break;
882 case SIOCGIFBR:
883 case SIOCSIFBR:
884 case SIOCBRADDBR:
885 case SIOCBRDELBR:
886 err = -ENOPKG;
887 if (!br_ioctl_hook)
888 request_module("bridge");
889
4a3e2f71 890 mutex_lock(&br_ioctl_mutex);
89bddce5 891 if (br_ioctl_hook)
881d966b 892 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 893 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
894 break;
895 case SIOCGIFVLAN:
896 case SIOCSIFVLAN:
897 err = -ENOPKG;
898 if (!vlan_ioctl_hook)
899 request_module("8021q");
900
4a3e2f71 901 mutex_lock(&vlan_ioctl_mutex);
1da177e4 902 if (vlan_ioctl_hook)
881d966b 903 err = vlan_ioctl_hook(net, argp);
4a3e2f71 904 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 905 break;
1da177e4
LT
906 case SIOCADDDLCI:
907 case SIOCDELDLCI:
908 err = -ENOPKG;
909 if (!dlci_ioctl_hook)
910 request_module("dlci");
911
7512cbf6
PE
912 mutex_lock(&dlci_ioctl_mutex);
913 if (dlci_ioctl_hook)
1da177e4 914 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 915 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
916 break;
917 default:
918 err = sock->ops->ioctl(sock, cmd, arg);
b5e5fa5e
CH
919
920 /*
921 * If this ioctl is unknown try to hand it down
922 * to the NIC driver.
923 */
924 if (err == -ENOIOCTLCMD)
881d966b 925 err = dev_ioctl(net, cmd, argp);
1da177e4 926 break;
89bddce5 927 }
1da177e4
LT
928 return err;
929}
930
931int sock_create_lite(int family, int type, int protocol, struct socket **res)
932{
933 int err;
934 struct socket *sock = NULL;
89bddce5 935
1da177e4
LT
936 err = security_socket_create(family, type, protocol, 1);
937 if (err)
938 goto out;
939
940 sock = sock_alloc();
941 if (!sock) {
942 err = -ENOMEM;
943 goto out;
944 }
945
1da177e4 946 sock->type = type;
7420ed23
VY
947 err = security_socket_post_create(sock, family, type, protocol, 1);
948 if (err)
949 goto out_release;
950
1da177e4
LT
951out:
952 *res = sock;
953 return err;
7420ed23
VY
954out_release:
955 sock_release(sock);
956 sock = NULL;
957 goto out;
1da177e4
LT
958}
959
960/* No kernel lock held - perfect */
89bddce5 961static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
962{
963 struct socket *sock;
964
965 /*
89bddce5 966 * We can't return errors to poll, so it's either yes or no.
1da177e4 967 */
b69aee04 968 sock = file->private_data;
1da177e4
LT
969 return sock->ops->poll(file, sock, wait);
970}
971
89bddce5 972static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 973{
b69aee04 974 struct socket *sock = file->private_data;
1da177e4
LT
975
976 return sock->ops->mmap(file, sock, vma);
977}
978
20380731 979static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
980{
981 /*
89bddce5
SH
982 * It was possible the inode is NULL we were
983 * closing an unfinished socket.
1da177e4
LT
984 */
985
89bddce5 986 if (!inode) {
1da177e4
LT
987 printk(KERN_DEBUG "sock_close: NULL inode\n");
988 return 0;
989 }
990 sock_fasync(-1, filp, 0);
991 sock_release(SOCKET_I(inode));
992 return 0;
993}
994
995/*
996 * Update the socket async list
997 *
998 * Fasync_list locking strategy.
999 *
1000 * 1. fasync_list is modified only under process context socket lock
1001 * i.e. under semaphore.
1002 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
1003 * or under socket lock.
1004 * 3. fasync_list can be used from softirq context, so that
1005 * modification under socket lock have to be enhanced with
1006 * write_lock_bh(&sk->sk_callback_lock).
1007 * --ANK (990710)
1008 */
1009
1010static int sock_fasync(int fd, struct file *filp, int on)
1011{
89bddce5 1012 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
1013 struct socket *sock;
1014 struct sock *sk;
1015
89bddce5 1016 if (on) {
8b3a7005 1017 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 1018 if (fna == NULL)
1da177e4
LT
1019 return -ENOMEM;
1020 }
1021
b69aee04 1022 sock = filp->private_data;
1da177e4 1023
89bddce5
SH
1024 sk = sock->sk;
1025 if (sk == NULL) {
1da177e4
LT
1026 kfree(fna);
1027 return -EINVAL;
1028 }
1029
1030 lock_sock(sk);
1031
89bddce5 1032 prev = &(sock->fasync_list);
1da177e4 1033
89bddce5
SH
1034 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1035 if (fa->fa_file == filp)
1da177e4
LT
1036 break;
1037
89bddce5
SH
1038 if (on) {
1039 if (fa != NULL) {
1da177e4 1040 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1041 fa->fa_fd = fd;
1da177e4
LT
1042 write_unlock_bh(&sk->sk_callback_lock);
1043
1044 kfree(fna);
1045 goto out;
1046 }
89bddce5
SH
1047 fna->fa_file = filp;
1048 fna->fa_fd = fd;
1049 fna->magic = FASYNC_MAGIC;
1050 fna->fa_next = sock->fasync_list;
1da177e4 1051 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1052 sock->fasync_list = fna;
1da177e4 1053 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1054 } else {
1055 if (fa != NULL) {
1da177e4 1056 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1057 *prev = fa->fa_next;
1da177e4
LT
1058 write_unlock_bh(&sk->sk_callback_lock);
1059 kfree(fa);
1060 }
1061 }
1062
1063out:
1064 release_sock(sock->sk);
1065 return 0;
1066}
1067
1068/* This function may be called only under socket lock or callback_lock */
1069
1070int sock_wake_async(struct socket *sock, int how, int band)
1071{
1072 if (!sock || !sock->fasync_list)
1073 return -1;
89bddce5 1074 switch (how) {
8d8ad9d7 1075 case SOCK_WAKE_WAITD:
1da177e4
LT
1076 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1077 break;
1078 goto call_kill;
8d8ad9d7 1079 case SOCK_WAKE_SPACE:
1da177e4
LT
1080 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1081 break;
1082 /* fall through */
8d8ad9d7 1083 case SOCK_WAKE_IO:
89bddce5 1084call_kill:
1da177e4
LT
1085 __kill_fasync(sock->fasync_list, SIGIO, band);
1086 break;
8d8ad9d7 1087 case SOCK_WAKE_URG:
1da177e4
LT
1088 __kill_fasync(sock->fasync_list, SIGURG, band);
1089 }
1090 return 0;
1091}
1092
1b8d7ae4 1093static int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1094 struct socket **res, int kern)
1da177e4
LT
1095{
1096 int err;
1097 struct socket *sock;
55737fda 1098 const struct net_proto_family *pf;
1da177e4
LT
1099
1100 /*
89bddce5 1101 * Check protocol is in range
1da177e4
LT
1102 */
1103 if (family < 0 || family >= NPROTO)
1104 return -EAFNOSUPPORT;
1105 if (type < 0 || type >= SOCK_MAX)
1106 return -EINVAL;
1107
1108 /* Compatibility.
1109
1110 This uglymoron is moved from INET layer to here to avoid
1111 deadlock in module load.
1112 */
1113 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1114 static int warned;
1da177e4
LT
1115 if (!warned) {
1116 warned = 1;
89bddce5
SH
1117 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1118 current->comm);
1da177e4
LT
1119 }
1120 family = PF_PACKET;
1121 }
1122
1123 err = security_socket_create(family, type, protocol, kern);
1124 if (err)
1125 return err;
89bddce5 1126
55737fda
SH
1127 /*
1128 * Allocate the socket and allow the family to set things up. if
1129 * the protocol is 0, the family is instructed to select an appropriate
1130 * default.
1131 */
1132 sock = sock_alloc();
1133 if (!sock) {
1134 if (net_ratelimit())
1135 printk(KERN_WARNING "socket: no more sockets\n");
1136 return -ENFILE; /* Not exactly a match, but its the
1137 closest posix thing */
1138 }
1139
1140 sock->type = type;
1141
1da177e4 1142#if defined(CONFIG_KMOD)
89bddce5
SH
1143 /* Attempt to load a protocol module if the find failed.
1144 *
1145 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1146 * requested real, full-featured networking support upon configuration.
1147 * Otherwise module support will break!
1148 */
55737fda 1149 if (net_families[family] == NULL)
89bddce5 1150 request_module("net-pf-%d", family);
1da177e4
LT
1151#endif
1152
55737fda
SH
1153 rcu_read_lock();
1154 pf = rcu_dereference(net_families[family]);
1155 err = -EAFNOSUPPORT;
1156 if (!pf)
1157 goto out_release;
1da177e4
LT
1158
1159 /*
1160 * We will call the ->create function, that possibly is in a loadable
1161 * module, so we have to bump that loadable module refcnt first.
1162 */
55737fda 1163 if (!try_module_get(pf->owner))
1da177e4
LT
1164 goto out_release;
1165
55737fda
SH
1166 /* Now protected by module ref count */
1167 rcu_read_unlock();
1168
1b8d7ae4 1169 err = pf->create(net, sock, protocol);
55737fda 1170 if (err < 0)
1da177e4 1171 goto out_module_put;
a79af59e 1172
1da177e4
LT
1173 /*
1174 * Now to bump the refcnt of the [loadable] module that owns this
1175 * socket at sock_release time we decrement its refcnt.
1176 */
55737fda
SH
1177 if (!try_module_get(sock->ops->owner))
1178 goto out_module_busy;
1179
1da177e4
LT
1180 /*
1181 * Now that we're done with the ->create function, the [loadable]
1182 * module can have its refcnt decremented
1183 */
55737fda 1184 module_put(pf->owner);
7420ed23
VY
1185 err = security_socket_post_create(sock, family, type, protocol, kern);
1186 if (err)
3b185525 1187 goto out_sock_release;
55737fda 1188 *res = sock;
1da177e4 1189
55737fda
SH
1190 return 0;
1191
1192out_module_busy:
1193 err = -EAFNOSUPPORT;
1da177e4 1194out_module_put:
55737fda
SH
1195 sock->ops = NULL;
1196 module_put(pf->owner);
1197out_sock_release:
1da177e4 1198 sock_release(sock);
55737fda
SH
1199 return err;
1200
1201out_release:
1202 rcu_read_unlock();
1203 goto out_sock_release;
1da177e4
LT
1204}
1205
1206int sock_create(int family, int type, int protocol, struct socket **res)
1207{
1b8d7ae4 1208 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4
LT
1209}
1210
1211int sock_create_kern(int family, int type, int protocol, struct socket **res)
1212{
1b8d7ae4 1213 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4
LT
1214}
1215
1216asmlinkage long sys_socket(int family, int type, int protocol)
1217{
1218 int retval;
1219 struct socket *sock;
1220
1221 retval = sock_create(family, type, protocol, &sock);
1222 if (retval < 0)
1223 goto out;
1224
1225 retval = sock_map_fd(sock);
1226 if (retval < 0)
1227 goto out_release;
1228
1229out:
1230 /* It may be already another descriptor 8) Not kernel problem. */
1231 return retval;
1232
1233out_release:
1234 sock_release(sock);
1235 return retval;
1236}
1237
1238/*
1239 * Create a pair of connected sockets.
1240 */
1241
89bddce5
SH
1242asmlinkage long sys_socketpair(int family, int type, int protocol,
1243 int __user *usockvec)
1da177e4
LT
1244{
1245 struct socket *sock1, *sock2;
1246 int fd1, fd2, err;
db349509 1247 struct file *newfile1, *newfile2;
1da177e4
LT
1248
1249 /*
1250 * Obtain the first socket and check if the underlying protocol
1251 * supports the socketpair call.
1252 */
1253
1254 err = sock_create(family, type, protocol, &sock1);
1255 if (err < 0)
1256 goto out;
1257
1258 err = sock_create(family, type, protocol, &sock2);
1259 if (err < 0)
1260 goto out_release_1;
1261
1262 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1263 if (err < 0)
1da177e4
LT
1264 goto out_release_both;
1265
db349509 1266 fd1 = sock_alloc_fd(&newfile1);
bf3c23d1
DM
1267 if (unlikely(fd1 < 0)) {
1268 err = fd1;
db349509 1269 goto out_release_both;
bf3c23d1 1270 }
1da177e4 1271
db349509
AV
1272 fd2 = sock_alloc_fd(&newfile2);
1273 if (unlikely(fd2 < 0)) {
bf3c23d1 1274 err = fd2;
db349509
AV
1275 put_filp(newfile1);
1276 put_unused_fd(fd1);
1da177e4 1277 goto out_release_both;
db349509 1278 }
1da177e4 1279
db349509
AV
1280 err = sock_attach_fd(sock1, newfile1);
1281 if (unlikely(err < 0)) {
1282 goto out_fd2;
1283 }
1284
1285 err = sock_attach_fd(sock2, newfile2);
1286 if (unlikely(err < 0)) {
1287 fput(newfile1);
1288 goto out_fd1;
1289 }
1290
1291 err = audit_fd_pair(fd1, fd2);
1292 if (err < 0) {
1293 fput(newfile1);
1294 fput(newfile2);
1295 goto out_fd;
1296 }
1da177e4 1297
db349509
AV
1298 fd_install(fd1, newfile1);
1299 fd_install(fd2, newfile2);
1da177e4
LT
1300 /* fd1 and fd2 may be already another descriptors.
1301 * Not kernel problem.
1302 */
1303
89bddce5 1304 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1305 if (!err)
1306 err = put_user(fd2, &usockvec[1]);
1307 if (!err)
1308 return 0;
1309
1310 sys_close(fd2);
1311 sys_close(fd1);
1312 return err;
1313
1da177e4 1314out_release_both:
89bddce5 1315 sock_release(sock2);
1da177e4 1316out_release_1:
89bddce5 1317 sock_release(sock1);
1da177e4
LT
1318out:
1319 return err;
db349509
AV
1320
1321out_fd2:
1322 put_filp(newfile1);
1323 sock_release(sock1);
1324out_fd1:
1325 put_filp(newfile2);
1326 sock_release(sock2);
1327out_fd:
1328 put_unused_fd(fd1);
1329 put_unused_fd(fd2);
1330 goto out;
1da177e4
LT
1331}
1332
1da177e4
LT
1333/*
1334 * Bind a name to a socket. Nothing much to do here since it's
1335 * the protocol's responsibility to handle the local address.
1336 *
1337 * We move the socket address to kernel space before we call
1338 * the protocol layer (having also checked the address is ok).
1339 */
1340
1341asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1342{
1343 struct socket *sock;
1344 char address[MAX_SOCK_ADDR];
6cb153ca 1345 int err, fput_needed;
1da177e4 1346
89bddce5 1347 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1348 if (sock) {
89bddce5
SH
1349 err = move_addr_to_kernel(umyaddr, addrlen, address);
1350 if (err >= 0) {
1351 err = security_socket_bind(sock,
1352 (struct sockaddr *)address,
1353 addrlen);
6cb153ca
BL
1354 if (!err)
1355 err = sock->ops->bind(sock,
89bddce5
SH
1356 (struct sockaddr *)
1357 address, addrlen);
1da177e4 1358 }
6cb153ca 1359 fput_light(sock->file, fput_needed);
89bddce5 1360 }
1da177e4
LT
1361 return err;
1362}
1363
1da177e4
LT
1364/*
1365 * Perform a listen. Basically, we allow the protocol to do anything
1366 * necessary for a listen, and if that works, we mark the socket as
1367 * ready for listening.
1368 */
1369
1da177e4
LT
1370asmlinkage long sys_listen(int fd, int backlog)
1371{
1372 struct socket *sock;
6cb153ca 1373 int err, fput_needed;
b8e1f9b5 1374 int somaxconn;
89bddce5
SH
1375
1376 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1377 if (sock) {
b8e1f9b5
PE
1378 somaxconn = sock->sk->sk_net->sysctl_somaxconn;
1379 if ((unsigned)backlog > somaxconn)
1380 backlog = somaxconn;
1da177e4
LT
1381
1382 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1383 if (!err)
1384 err = sock->ops->listen(sock, backlog);
1da177e4 1385
6cb153ca 1386 fput_light(sock->file, fput_needed);
1da177e4
LT
1387 }
1388 return err;
1389}
1390
1da177e4
LT
1391/*
1392 * For accept, we attempt to create a new socket, set up the link
1393 * with the client, wake up the client, then return the new
1394 * connected fd. We collect the address of the connector in kernel
1395 * space and move it to user at the very end. This is unclean because
1396 * we open the socket then return an error.
1397 *
1398 * 1003.1g adds the ability to recvmsg() to query connection pending
1399 * status to recvmsg. We need to add that support in a way thats
1400 * clean when we restucture accept also.
1401 */
1402
89bddce5
SH
1403asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1404 int __user *upeer_addrlen)
1da177e4
LT
1405{
1406 struct socket *sock, *newsock;
39d8c1b6 1407 struct file *newfile;
6cb153ca 1408 int err, len, newfd, fput_needed;
1da177e4
LT
1409 char address[MAX_SOCK_ADDR];
1410
6cb153ca 1411 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1412 if (!sock)
1413 goto out;
1414
1415 err = -ENFILE;
89bddce5 1416 if (!(newsock = sock_alloc()))
1da177e4
LT
1417 goto out_put;
1418
1419 newsock->type = sock->type;
1420 newsock->ops = sock->ops;
1421
1da177e4
LT
1422 /*
1423 * We don't need try_module_get here, as the listening socket (sock)
1424 * has the protocol module (sock->ops->owner) held.
1425 */
1426 __module_get(newsock->ops->owner);
1427
39d8c1b6
DM
1428 newfd = sock_alloc_fd(&newfile);
1429 if (unlikely(newfd < 0)) {
1430 err = newfd;
9a1875e6
DM
1431 sock_release(newsock);
1432 goto out_put;
39d8c1b6
DM
1433 }
1434
1435 err = sock_attach_fd(newsock, newfile);
1436 if (err < 0)
79f4f642 1437 goto out_fd_simple;
39d8c1b6 1438
a79af59e
FF
1439 err = security_socket_accept(sock, newsock);
1440 if (err)
39d8c1b6 1441 goto out_fd;
a79af59e 1442
1da177e4
LT
1443 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1444 if (err < 0)
39d8c1b6 1445 goto out_fd;
1da177e4
LT
1446
1447 if (upeer_sockaddr) {
89bddce5
SH
1448 if (newsock->ops->getname(newsock, (struct sockaddr *)address,
1449 &len, 2) < 0) {
1da177e4 1450 err = -ECONNABORTED;
39d8c1b6 1451 goto out_fd;
1da177e4 1452 }
89bddce5
SH
1453 err = move_addr_to_user(address, len, upeer_sockaddr,
1454 upeer_addrlen);
1da177e4 1455 if (err < 0)
39d8c1b6 1456 goto out_fd;
1da177e4
LT
1457 }
1458
1459 /* File flags are not inherited via accept() unlike another OSes. */
1460
39d8c1b6
DM
1461 fd_install(newfd, newfile);
1462 err = newfd;
1da177e4
LT
1463
1464 security_socket_post_accept(sock, newsock);
1465
1466out_put:
6cb153ca 1467 fput_light(sock->file, fput_needed);
1da177e4
LT
1468out:
1469 return err;
79f4f642
AD
1470out_fd_simple:
1471 sock_release(newsock);
1472 put_filp(newfile);
1473 put_unused_fd(newfd);
1474 goto out_put;
39d8c1b6 1475out_fd:
9606a216 1476 fput(newfile);
39d8c1b6 1477 put_unused_fd(newfd);
1da177e4
LT
1478 goto out_put;
1479}
1480
1da177e4
LT
1481/*
1482 * Attempt to connect to a socket with the server address. The address
1483 * is in user space so we verify it is OK and move it to kernel space.
1484 *
1485 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1486 * break bindings
1487 *
1488 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1489 * other SEQPACKET protocols that take time to connect() as it doesn't
1490 * include the -EINPROGRESS status for such sockets.
1491 */
1492
89bddce5
SH
1493asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
1494 int addrlen)
1da177e4
LT
1495{
1496 struct socket *sock;
1497 char address[MAX_SOCK_ADDR];
6cb153ca 1498 int err, fput_needed;
1da177e4 1499
6cb153ca 1500 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1501 if (!sock)
1502 goto out;
1503 err = move_addr_to_kernel(uservaddr, addrlen, address);
1504 if (err < 0)
1505 goto out_put;
1506
89bddce5
SH
1507 err =
1508 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4
LT
1509 if (err)
1510 goto out_put;
1511
89bddce5 1512 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
1da177e4
LT
1513 sock->file->f_flags);
1514out_put:
6cb153ca 1515 fput_light(sock->file, fput_needed);
1da177e4
LT
1516out:
1517 return err;
1518}
1519
1520/*
1521 * Get the local address ('name') of a socket object. Move the obtained
1522 * name to user space.
1523 */
1524
89bddce5
SH
1525asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1526 int __user *usockaddr_len)
1da177e4
LT
1527{
1528 struct socket *sock;
1529 char address[MAX_SOCK_ADDR];
6cb153ca 1530 int len, err, fput_needed;
89bddce5 1531
6cb153ca 1532 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1533 if (!sock)
1534 goto out;
1535
1536 err = security_socket_getsockname(sock);
1537 if (err)
1538 goto out_put;
1539
1540 err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
1541 if (err)
1542 goto out_put;
1543 err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
1544
1545out_put:
6cb153ca 1546 fput_light(sock->file, fput_needed);
1da177e4
LT
1547out:
1548 return err;
1549}
1550
1551/*
1552 * Get the remote address ('name') of a socket object. Move the obtained
1553 * name to user space.
1554 */
1555
89bddce5
SH
1556asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1557 int __user *usockaddr_len)
1da177e4
LT
1558{
1559 struct socket *sock;
1560 char address[MAX_SOCK_ADDR];
6cb153ca 1561 int len, err, fput_needed;
1da177e4 1562
89bddce5
SH
1563 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1564 if (sock != NULL) {
1da177e4
LT
1565 err = security_socket_getpeername(sock);
1566 if (err) {
6cb153ca 1567 fput_light(sock->file, fput_needed);
1da177e4
LT
1568 return err;
1569 }
1570
89bddce5
SH
1571 err =
1572 sock->ops->getname(sock, (struct sockaddr *)address, &len,
1573 1);
1da177e4 1574 if (!err)
89bddce5
SH
1575 err = move_addr_to_user(address, len, usockaddr,
1576 usockaddr_len);
6cb153ca 1577 fput_light(sock->file, fput_needed);
1da177e4
LT
1578 }
1579 return err;
1580}
1581
1582/*
1583 * Send a datagram to a given address. We move the address into kernel
1584 * space and check the user space data area is readable before invoking
1585 * the protocol.
1586 */
1587
89bddce5
SH
1588asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1589 unsigned flags, struct sockaddr __user *addr,
1590 int addr_len)
1da177e4
LT
1591{
1592 struct socket *sock;
1593 char address[MAX_SOCK_ADDR];
1594 int err;
1595 struct msghdr msg;
1596 struct iovec iov;
6cb153ca 1597 int fput_needed;
6cb153ca 1598
de0fa95c
PE
1599 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1600 if (!sock)
4387ff75 1601 goto out;
6cb153ca 1602
89bddce5
SH
1603 iov.iov_base = buff;
1604 iov.iov_len = len;
1605 msg.msg_name = NULL;
1606 msg.msg_iov = &iov;
1607 msg.msg_iovlen = 1;
1608 msg.msg_control = NULL;
1609 msg.msg_controllen = 0;
1610 msg.msg_namelen = 0;
6cb153ca 1611 if (addr) {
1da177e4
LT
1612 err = move_addr_to_kernel(addr, addr_len, address);
1613 if (err < 0)
1614 goto out_put;
89bddce5
SH
1615 msg.msg_name = address;
1616 msg.msg_namelen = addr_len;
1da177e4
LT
1617 }
1618 if (sock->file->f_flags & O_NONBLOCK)
1619 flags |= MSG_DONTWAIT;
1620 msg.msg_flags = flags;
1621 err = sock_sendmsg(sock, &msg, len);
1622
89bddce5 1623out_put:
de0fa95c 1624 fput_light(sock->file, fput_needed);
4387ff75 1625out:
1da177e4
LT
1626 return err;
1627}
1628
1629/*
89bddce5 1630 * Send a datagram down a socket.
1da177e4
LT
1631 */
1632
89bddce5 1633asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
1da177e4
LT
1634{
1635 return sys_sendto(fd, buff, len, flags, NULL, 0);
1636}
1637
1638/*
89bddce5 1639 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1640 * sender. We verify the buffers are writable and if needed move the
1641 * sender address from kernel to user space.
1642 */
1643
89bddce5
SH
1644asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1645 unsigned flags, struct sockaddr __user *addr,
1646 int __user *addr_len)
1da177e4
LT
1647{
1648 struct socket *sock;
1649 struct iovec iov;
1650 struct msghdr msg;
1651 char address[MAX_SOCK_ADDR];
89bddce5 1652 int err, err2;
6cb153ca
BL
1653 int fput_needed;
1654
de0fa95c 1655 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1656 if (!sock)
de0fa95c 1657 goto out;
1da177e4 1658
89bddce5
SH
1659 msg.msg_control = NULL;
1660 msg.msg_controllen = 0;
1661 msg.msg_iovlen = 1;
1662 msg.msg_iov = &iov;
1663 iov.iov_len = size;
1664 iov.iov_base = ubuf;
1665 msg.msg_name = address;
1666 msg.msg_namelen = MAX_SOCK_ADDR;
1da177e4
LT
1667 if (sock->file->f_flags & O_NONBLOCK)
1668 flags |= MSG_DONTWAIT;
89bddce5 1669 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1670
89bddce5
SH
1671 if (err >= 0 && addr != NULL) {
1672 err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1673 if (err2 < 0)
1674 err = err2;
1da177e4 1675 }
de0fa95c
PE
1676
1677 fput_light(sock->file, fput_needed);
4387ff75 1678out:
1da177e4
LT
1679 return err;
1680}
1681
1682/*
89bddce5 1683 * Receive a datagram from a socket.
1da177e4
LT
1684 */
1685
89bddce5
SH
1686asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1687 unsigned flags)
1da177e4
LT
1688{
1689 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1690}
1691
1692/*
1693 * Set a socket option. Because we don't know the option lengths we have
1694 * to pass the user mode parameter for the protocols to sort out.
1695 */
1696
89bddce5
SH
1697asmlinkage long sys_setsockopt(int fd, int level, int optname,
1698 char __user *optval, int optlen)
1da177e4 1699{
6cb153ca 1700 int err, fput_needed;
1da177e4
LT
1701 struct socket *sock;
1702
1703 if (optlen < 0)
1704 return -EINVAL;
89bddce5
SH
1705
1706 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1707 if (sock != NULL) {
1708 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1709 if (err)
1710 goto out_put;
1da177e4
LT
1711
1712 if (level == SOL_SOCKET)
89bddce5
SH
1713 err =
1714 sock_setsockopt(sock, level, optname, optval,
1715 optlen);
1da177e4 1716 else
89bddce5
SH
1717 err =
1718 sock->ops->setsockopt(sock, level, optname, optval,
1719 optlen);
6cb153ca
BL
1720out_put:
1721 fput_light(sock->file, fput_needed);
1da177e4
LT
1722 }
1723 return err;
1724}
1725
1726/*
1727 * Get a socket option. Because we don't know the option lengths we have
1728 * to pass a user mode parameter for the protocols to sort out.
1729 */
1730
89bddce5
SH
1731asmlinkage long sys_getsockopt(int fd, int level, int optname,
1732 char __user *optval, int __user *optlen)
1da177e4 1733{
6cb153ca 1734 int err, fput_needed;
1da177e4
LT
1735 struct socket *sock;
1736
89bddce5
SH
1737 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1738 if (sock != NULL) {
6cb153ca
BL
1739 err = security_socket_getsockopt(sock, level, optname);
1740 if (err)
1741 goto out_put;
1da177e4
LT
1742
1743 if (level == SOL_SOCKET)
89bddce5
SH
1744 err =
1745 sock_getsockopt(sock, level, optname, optval,
1746 optlen);
1da177e4 1747 else
89bddce5
SH
1748 err =
1749 sock->ops->getsockopt(sock, level, optname, optval,
1750 optlen);
6cb153ca
BL
1751out_put:
1752 fput_light(sock->file, fput_needed);
1da177e4
LT
1753 }
1754 return err;
1755}
1756
1da177e4
LT
1757/*
1758 * Shutdown a socket.
1759 */
1760
1761asmlinkage long sys_shutdown(int fd, int how)
1762{
6cb153ca 1763 int err, fput_needed;
1da177e4
LT
1764 struct socket *sock;
1765
89bddce5
SH
1766 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1767 if (sock != NULL) {
1da177e4 1768 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1769 if (!err)
1770 err = sock->ops->shutdown(sock, how);
1771 fput_light(sock->file, fput_needed);
1da177e4
LT
1772 }
1773 return err;
1774}
1775
89bddce5 1776/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1777 * fields which are the same type (int / unsigned) on our platforms.
1778 */
1779#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1780#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1781#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1782
1da177e4
LT
1783/*
1784 * BSD sendmsg interface
1785 */
1786
1787asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1788{
89bddce5
SH
1789 struct compat_msghdr __user *msg_compat =
1790 (struct compat_msghdr __user *)msg;
1da177e4
LT
1791 struct socket *sock;
1792 char address[MAX_SOCK_ADDR];
1793 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1794 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1795 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1796 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1797 unsigned char *ctl_buf = ctl;
1798 struct msghdr msg_sys;
1799 int err, ctl_len, iov_size, total_len;
6cb153ca 1800 int fput_needed;
89bddce5 1801
1da177e4
LT
1802 err = -EFAULT;
1803 if (MSG_CMSG_COMPAT & flags) {
1804 if (get_compat_msghdr(&msg_sys, msg_compat))
1805 return -EFAULT;
89bddce5
SH
1806 }
1807 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1808 return -EFAULT;
1809
6cb153ca 1810 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1811 if (!sock)
1da177e4
LT
1812 goto out;
1813
1814 /* do not move before msg_sys is valid */
1815 err = -EMSGSIZE;
1816 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1817 goto out_put;
1818
89bddce5 1819 /* Check whether to allocate the iovec area */
1da177e4
LT
1820 err = -ENOMEM;
1821 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1822 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1823 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1824 if (!iov)
1825 goto out_put;
1826 }
1827
1828 /* This will also move the address data into kernel space */
1829 if (MSG_CMSG_COMPAT & flags) {
1830 err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
1831 } else
1832 err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
89bddce5 1833 if (err < 0)
1da177e4
LT
1834 goto out_freeiov;
1835 total_len = err;
1836
1837 err = -ENOBUFS;
1838
1839 if (msg_sys.msg_controllen > INT_MAX)
1840 goto out_freeiov;
89bddce5 1841 ctl_len = msg_sys.msg_controllen;
1da177e4 1842 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1843 err =
1844 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1845 sizeof(ctl));
1da177e4
LT
1846 if (err)
1847 goto out_freeiov;
1848 ctl_buf = msg_sys.msg_control;
8920e8f9 1849 ctl_len = msg_sys.msg_controllen;
1da177e4 1850 } else if (ctl_len) {
89bddce5 1851 if (ctl_len > sizeof(ctl)) {
1da177e4 1852 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1853 if (ctl_buf == NULL)
1da177e4
LT
1854 goto out_freeiov;
1855 }
1856 err = -EFAULT;
1857 /*
1858 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1859 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1860 * checking falls down on this.
1861 */
89bddce5
SH
1862 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1863 ctl_len))
1da177e4
LT
1864 goto out_freectl;
1865 msg_sys.msg_control = ctl_buf;
1866 }
1867 msg_sys.msg_flags = flags;
1868
1869 if (sock->file->f_flags & O_NONBLOCK)
1870 msg_sys.msg_flags |= MSG_DONTWAIT;
1871 err = sock_sendmsg(sock, &msg_sys, total_len);
1872
1873out_freectl:
89bddce5 1874 if (ctl_buf != ctl)
1da177e4
LT
1875 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1876out_freeiov:
1877 if (iov != iovstack)
1878 sock_kfree_s(sock->sk, iov, iov_size);
1879out_put:
6cb153ca 1880 fput_light(sock->file, fput_needed);
89bddce5 1881out:
1da177e4
LT
1882 return err;
1883}
1884
1885/*
1886 * BSD recvmsg interface
1887 */
1888
89bddce5
SH
1889asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
1890 unsigned int flags)
1da177e4 1891{
89bddce5
SH
1892 struct compat_msghdr __user *msg_compat =
1893 (struct compat_msghdr __user *)msg;
1da177e4
LT
1894 struct socket *sock;
1895 struct iovec iovstack[UIO_FASTIOV];
89bddce5 1896 struct iovec *iov = iovstack;
1da177e4
LT
1897 struct msghdr msg_sys;
1898 unsigned long cmsg_ptr;
1899 int err, iov_size, total_len, len;
6cb153ca 1900 int fput_needed;
1da177e4
LT
1901
1902 /* kernel mode address */
1903 char addr[MAX_SOCK_ADDR];
1904
1905 /* user mode address pointers */
1906 struct sockaddr __user *uaddr;
1907 int __user *uaddr_len;
89bddce5 1908
1da177e4
LT
1909 if (MSG_CMSG_COMPAT & flags) {
1910 if (get_compat_msghdr(&msg_sys, msg_compat))
1911 return -EFAULT;
89bddce5
SH
1912 }
1913 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1914 return -EFAULT;
1da177e4 1915
6cb153ca 1916 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1917 if (!sock)
1918 goto out;
1919
1920 err = -EMSGSIZE;
1921 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1922 goto out_put;
89bddce5
SH
1923
1924 /* Check whether to allocate the iovec area */
1da177e4
LT
1925 err = -ENOMEM;
1926 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1927 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1928 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1929 if (!iov)
1930 goto out_put;
1931 }
1932
1933 /*
89bddce5
SH
1934 * Save the user-mode address (verify_iovec will change the
1935 * kernel msghdr to use the kernel address space)
1da177e4 1936 */
89bddce5 1937
cfcabdcc 1938 uaddr = (__force void __user *)msg_sys.msg_name;
1da177e4
LT
1939 uaddr_len = COMPAT_NAMELEN(msg);
1940 if (MSG_CMSG_COMPAT & flags) {
1941 err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1942 } else
1943 err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1944 if (err < 0)
1945 goto out_freeiov;
89bddce5 1946 total_len = err;
1da177e4
LT
1947
1948 cmsg_ptr = (unsigned long)msg_sys.msg_control;
4a19542e 1949 msg_sys.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 1950
1da177e4
LT
1951 if (sock->file->f_flags & O_NONBLOCK)
1952 flags |= MSG_DONTWAIT;
1953 err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1954 if (err < 0)
1955 goto out_freeiov;
1956 len = err;
1957
1958 if (uaddr != NULL) {
89bddce5
SH
1959 err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
1960 uaddr_len);
1da177e4
LT
1961 if (err < 0)
1962 goto out_freeiov;
1963 }
37f7f421
DM
1964 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
1965 COMPAT_FLAGS(msg));
1da177e4
LT
1966 if (err)
1967 goto out_freeiov;
1968 if (MSG_CMSG_COMPAT & flags)
89bddce5 1969 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1970 &msg_compat->msg_controllen);
1971 else
89bddce5 1972 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1973 &msg->msg_controllen);
1974 if (err)
1975 goto out_freeiov;
1976 err = len;
1977
1978out_freeiov:
1979 if (iov != iovstack)
1980 sock_kfree_s(sock->sk, iov, iov_size);
1981out_put:
6cb153ca 1982 fput_light(sock->file, fput_needed);
1da177e4
LT
1983out:
1984 return err;
1985}
1986
1987#ifdef __ARCH_WANT_SYS_SOCKETCALL
1988
1989/* Argument list sizes for sys_socketcall */
1990#define AL(x) ((x) * sizeof(unsigned long))
89bddce5
SH
1991static const unsigned char nargs[18]={
1992 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1993 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1994 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
1995};
1996
1da177e4
LT
1997#undef AL
1998
1999/*
89bddce5 2000 * System call vectors.
1da177e4
LT
2001 *
2002 * Argument checking cleaned up. Saved 20% in size.
2003 * This function doesn't need to set the kernel lock because
89bddce5 2004 * it is set by the callees.
1da177e4
LT
2005 */
2006
2007asmlinkage long sys_socketcall(int call, unsigned long __user *args)
2008{
2009 unsigned long a[6];
89bddce5 2010 unsigned long a0, a1;
1da177e4
LT
2011 int err;
2012
89bddce5 2013 if (call < 1 || call > SYS_RECVMSG)
1da177e4
LT
2014 return -EINVAL;
2015
2016 /* copy_from_user should be SMP safe. */
2017 if (copy_from_user(a, args, nargs[call]))
2018 return -EFAULT;
3ec3b2fb 2019
89bddce5 2020 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb
DW
2021 if (err)
2022 return err;
2023
89bddce5
SH
2024 a0 = a[0];
2025 a1 = a[1];
2026
2027 switch (call) {
2028 case SYS_SOCKET:
2029 err = sys_socket(a0, a1, a[2]);
2030 break;
2031 case SYS_BIND:
2032 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2033 break;
2034 case SYS_CONNECT:
2035 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2036 break;
2037 case SYS_LISTEN:
2038 err = sys_listen(a0, a1);
2039 break;
2040 case SYS_ACCEPT:
2041 err =
2042 sys_accept(a0, (struct sockaddr __user *)a1,
2043 (int __user *)a[2]);
2044 break;
2045 case SYS_GETSOCKNAME:
2046 err =
2047 sys_getsockname(a0, (struct sockaddr __user *)a1,
2048 (int __user *)a[2]);
2049 break;
2050 case SYS_GETPEERNAME:
2051 err =
2052 sys_getpeername(a0, (struct sockaddr __user *)a1,
2053 (int __user *)a[2]);
2054 break;
2055 case SYS_SOCKETPAIR:
2056 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2057 break;
2058 case SYS_SEND:
2059 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2060 break;
2061 case SYS_SENDTO:
2062 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2063 (struct sockaddr __user *)a[4], a[5]);
2064 break;
2065 case SYS_RECV:
2066 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2067 break;
2068 case SYS_RECVFROM:
2069 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2070 (struct sockaddr __user *)a[4],
2071 (int __user *)a[5]);
2072 break;
2073 case SYS_SHUTDOWN:
2074 err = sys_shutdown(a0, a1);
2075 break;
2076 case SYS_SETSOCKOPT:
2077 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2078 break;
2079 case SYS_GETSOCKOPT:
2080 err =
2081 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2082 (int __user *)a[4]);
2083 break;
2084 case SYS_SENDMSG:
2085 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2086 break;
2087 case SYS_RECVMSG:
2088 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2089 break;
2090 default:
2091 err = -EINVAL;
2092 break;
1da177e4
LT
2093 }
2094 return err;
2095}
2096
89bddce5 2097#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2098
55737fda
SH
2099/**
2100 * sock_register - add a socket protocol handler
2101 * @ops: description of protocol
2102 *
1da177e4
LT
2103 * This function is called by a protocol handler that wants to
2104 * advertise its address family, and have it linked into the
55737fda
SH
2105 * socket interface. The value ops->family coresponds to the
2106 * socket system call protocol family.
1da177e4 2107 */
f0fd27d4 2108int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2109{
2110 int err;
2111
2112 if (ops->family >= NPROTO) {
89bddce5
SH
2113 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2114 NPROTO);
1da177e4
LT
2115 return -ENOBUFS;
2116 }
55737fda
SH
2117
2118 spin_lock(&net_family_lock);
2119 if (net_families[ops->family])
2120 err = -EEXIST;
2121 else {
89bddce5 2122 net_families[ops->family] = ops;
1da177e4
LT
2123 err = 0;
2124 }
55737fda
SH
2125 spin_unlock(&net_family_lock);
2126
89bddce5 2127 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2128 return err;
2129}
2130
55737fda
SH
2131/**
2132 * sock_unregister - remove a protocol handler
2133 * @family: protocol family to remove
2134 *
1da177e4
LT
2135 * This function is called by a protocol handler that wants to
2136 * remove its address family, and have it unlinked from the
55737fda
SH
2137 * new socket creation.
2138 *
2139 * If protocol handler is a module, then it can use module reference
2140 * counts to protect against new references. If protocol handler is not
2141 * a module then it needs to provide its own protection in
2142 * the ops->create routine.
1da177e4 2143 */
f0fd27d4 2144void sock_unregister(int family)
1da177e4 2145{
f0fd27d4 2146 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2147
55737fda 2148 spin_lock(&net_family_lock);
89bddce5 2149 net_families[family] = NULL;
55737fda
SH
2150 spin_unlock(&net_family_lock);
2151
2152 synchronize_rcu();
2153
89bddce5 2154 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2155}
2156
77d76ea3 2157static int __init sock_init(void)
1da177e4
LT
2158{
2159 /*
89bddce5 2160 * Initialize sock SLAB cache.
1da177e4 2161 */
89bddce5 2162
1da177e4
LT
2163 sk_init();
2164
1da177e4 2165 /*
89bddce5 2166 * Initialize skbuff SLAB cache
1da177e4
LT
2167 */
2168 skb_init();
1da177e4
LT
2169
2170 /*
89bddce5 2171 * Initialize the protocols module.
1da177e4
LT
2172 */
2173
2174 init_inodecache();
2175 register_filesystem(&sock_fs_type);
2176 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2177
2178 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2179 */
2180
2181#ifdef CONFIG_NETFILTER
2182 netfilter_init();
2183#endif
cbeb321a
DM
2184
2185 return 0;
1da177e4
LT
2186}
2187
77d76ea3
AK
2188core_initcall(sock_init); /* early initcall */
2189
1da177e4
LT
2190#ifdef CONFIG_PROC_FS
2191void socket_seq_show(struct seq_file *seq)
2192{
2193 int cpu;
2194 int counter = 0;
2195
6f912042 2196 for_each_possible_cpu(cpu)
89bddce5 2197 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2198
2199 /* It can be negative, by the way. 8) */
2200 if (counter < 0)
2201 counter = 0;
2202
2203 seq_printf(seq, "sockets: used %d\n", counter);
2204}
89bddce5 2205#endif /* CONFIG_PROC_FS */
1da177e4 2206
89bbfc95
SP
2207#ifdef CONFIG_COMPAT
2208static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 2209 unsigned long arg)
89bbfc95
SP
2210{
2211 struct socket *sock = file->private_data;
2212 int ret = -ENOIOCTLCMD;
2213
2214 if (sock->ops->compat_ioctl)
2215 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2216
2217 return ret;
2218}
2219#endif
2220
ac5a488e
SS
2221int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2222{
2223 return sock->ops->bind(sock, addr, addrlen);
2224}
2225
2226int kernel_listen(struct socket *sock, int backlog)
2227{
2228 return sock->ops->listen(sock, backlog);
2229}
2230
2231int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2232{
2233 struct sock *sk = sock->sk;
2234 int err;
2235
2236 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2237 newsock);
2238 if (err < 0)
2239 goto done;
2240
2241 err = sock->ops->accept(sock, *newsock, flags);
2242 if (err < 0) {
2243 sock_release(*newsock);
fa8705b0 2244 *newsock = NULL;
ac5a488e
SS
2245 goto done;
2246 }
2247
2248 (*newsock)->ops = sock->ops;
2249
2250done:
2251 return err;
2252}
2253
2254int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 2255 int flags)
ac5a488e
SS
2256{
2257 return sock->ops->connect(sock, addr, addrlen, flags);
2258}
2259
2260int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2261 int *addrlen)
2262{
2263 return sock->ops->getname(sock, addr, addrlen, 0);
2264}
2265
2266int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2267 int *addrlen)
2268{
2269 return sock->ops->getname(sock, addr, addrlen, 1);
2270}
2271
2272int kernel_getsockopt(struct socket *sock, int level, int optname,
2273 char *optval, int *optlen)
2274{
2275 mm_segment_t oldfs = get_fs();
2276 int err;
2277
2278 set_fs(KERNEL_DS);
2279 if (level == SOL_SOCKET)
2280 err = sock_getsockopt(sock, level, optname, optval, optlen);
2281 else
2282 err = sock->ops->getsockopt(sock, level, optname, optval,
2283 optlen);
2284 set_fs(oldfs);
2285 return err;
2286}
2287
2288int kernel_setsockopt(struct socket *sock, int level, int optname,
2289 char *optval, int optlen)
2290{
2291 mm_segment_t oldfs = get_fs();
2292 int err;
2293
2294 set_fs(KERNEL_DS);
2295 if (level == SOL_SOCKET)
2296 err = sock_setsockopt(sock, level, optname, optval, optlen);
2297 else
2298 err = sock->ops->setsockopt(sock, level, optname, optval,
2299 optlen);
2300 set_fs(oldfs);
2301 return err;
2302}
2303
2304int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2305 size_t size, int flags)
2306{
2307 if (sock->ops->sendpage)
2308 return sock->ops->sendpage(sock, page, offset, size, flags);
2309
2310 return sock_no_sendpage(sock, page, offset, size, flags);
2311}
2312
2313int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2314{
2315 mm_segment_t oldfs = get_fs();
2316 int err;
2317
2318 set_fs(KERNEL_DS);
2319 err = sock->ops->ioctl(sock, cmd, arg);
2320 set_fs(oldfs);
2321
2322 return err;
2323}
2324
91cf45f0
TM
2325int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
2326{
2327 return sock->ops->shutdown(sock, how);
2328}
2329
1da177e4
LT
2330/* ABI emulation layers need these two */
2331EXPORT_SYMBOL(move_addr_to_kernel);
2332EXPORT_SYMBOL(move_addr_to_user);
2333EXPORT_SYMBOL(sock_create);
2334EXPORT_SYMBOL(sock_create_kern);
2335EXPORT_SYMBOL(sock_create_lite);
2336EXPORT_SYMBOL(sock_map_fd);
2337EXPORT_SYMBOL(sock_recvmsg);
2338EXPORT_SYMBOL(sock_register);
2339EXPORT_SYMBOL(sock_release);
2340EXPORT_SYMBOL(sock_sendmsg);
2341EXPORT_SYMBOL(sock_unregister);
2342EXPORT_SYMBOL(sock_wake_async);
2343EXPORT_SYMBOL(sockfd_lookup);
2344EXPORT_SYMBOL(kernel_sendmsg);
2345EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
2346EXPORT_SYMBOL(kernel_bind);
2347EXPORT_SYMBOL(kernel_listen);
2348EXPORT_SYMBOL(kernel_accept);
2349EXPORT_SYMBOL(kernel_connect);
2350EXPORT_SYMBOL(kernel_getsockname);
2351EXPORT_SYMBOL(kernel_getpeername);
2352EXPORT_SYMBOL(kernel_getsockopt);
2353EXPORT_SYMBOL(kernel_setsockopt);
2354EXPORT_SYMBOL(kernel_sendpage);
2355EXPORT_SYMBOL(kernel_sock_ioctl);
91cf45f0 2356EXPORT_SYMBOL(kernel_sock_shutdown);