add file position info to proc
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
55737fda 66#include <linux/rcupdate.h>
1da177e4
LT
67#include <linux/netdevice.h>
68#include <linux/proc_fs.h>
69#include <linux/seq_file.h>
4a3e2f71 70#include <linux/mutex.h>
1da177e4
LT
71#include <linux/wanrouter.h>
72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
1da177e4
LT
75#include <linux/init.h>
76#include <linux/poll.h>
77#include <linux/cache.h>
78#include <linux/module.h>
79#include <linux/highmem.h>
1da177e4
LT
80#include <linux/mount.h>
81#include <linux/security.h>
82#include <linux/syscalls.h>
83#include <linux/compat.h>
84#include <linux/kmod.h>
3ec3b2fb 85#include <linux/audit.h>
d86b5e0e 86#include <linux/wireless.h>
1da177e4
LT
87
88#include <asm/uaccess.h>
89#include <asm/unistd.h>
90
91#include <net/compat.h>
92
93#include <net/sock.h>
94#include <linux/netfilter.h>
95
96static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
97static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
98 unsigned long nr_segs, loff_t pos);
99static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
100 unsigned long nr_segs, loff_t pos);
89bddce5 101static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
102
103static int sock_close(struct inode *inode, struct file *file);
104static unsigned int sock_poll(struct file *file,
105 struct poll_table_struct *wait);
89bddce5 106static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
107#ifdef CONFIG_COMPAT
108static long compat_sock_ioctl(struct file *file,
89bddce5 109 unsigned int cmd, unsigned long arg);
89bbfc95 110#endif
1da177e4 111static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
112static ssize_t sock_sendpage(struct file *file, struct page *page,
113 int offset, size_t size, loff_t *ppos, int more);
114
1da177e4
LT
115/*
116 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
117 * in the operation structures but are done directly via the socketcall() multiplexor.
118 */
119
da7071d7 120static const struct file_operations socket_file_ops = {
1da177e4
LT
121 .owner = THIS_MODULE,
122 .llseek = no_llseek,
123 .aio_read = sock_aio_read,
124 .aio_write = sock_aio_write,
125 .poll = sock_poll,
126 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
127#ifdef CONFIG_COMPAT
128 .compat_ioctl = compat_sock_ioctl,
129#endif
1da177e4
LT
130 .mmap = sock_mmap,
131 .open = sock_no_open, /* special open code to disallow open via /proc */
132 .release = sock_close,
133 .fasync = sock_fasync,
5274f052
JA
134 .sendpage = sock_sendpage,
135 .splice_write = generic_splice_sendpage,
1da177e4
LT
136};
137
138/*
139 * The protocol list. Each protocol is registered in here.
140 */
141
1da177e4 142static DEFINE_SPINLOCK(net_family_lock);
f0fd27d4 143static const struct net_proto_family *net_families[NPROTO] __read_mostly;
1da177e4 144
1da177e4
LT
145/*
146 * Statistics counters of the socket lists
147 */
148
149static DEFINE_PER_CPU(int, sockets_in_use) = 0;
150
151/*
89bddce5
SH
152 * Support routines.
153 * Move socket addresses back and forth across the kernel/user
154 * divide and look after the messy bits.
1da177e4
LT
155 */
156
89bddce5 157#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
158 16 for IP, 16 for IPX,
159 24 for IPv6,
89bddce5 160 about 80 for AX.25
1da177e4
LT
161 must be at least one bigger than
162 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 163 :unix_mkname()).
1da177e4 164 */
89bddce5 165
1da177e4
LT
166/**
167 * move_addr_to_kernel - copy a socket address into kernel space
168 * @uaddr: Address in user space
169 * @kaddr: Address in kernel space
170 * @ulen: Length in user space
171 *
172 * The address is copied into kernel space. If the provided address is
173 * too long an error code of -EINVAL is returned. If the copy gives
174 * invalid addresses -EFAULT is returned. On a success 0 is returned.
175 */
176
177int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
178{
89bddce5 179 if (ulen < 0 || ulen > MAX_SOCK_ADDR)
1da177e4 180 return -EINVAL;
89bddce5 181 if (ulen == 0)
1da177e4 182 return 0;
89bddce5 183 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 184 return -EFAULT;
3ec3b2fb 185 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
186}
187
188/**
189 * move_addr_to_user - copy an address to user space
190 * @kaddr: kernel space address
191 * @klen: length of address in kernel
192 * @uaddr: user space address
193 * @ulen: pointer to user length field
194 *
195 * The value pointed to by ulen on entry is the buffer length available.
196 * This is overwritten with the buffer space used. -EINVAL is returned
197 * if an overlong buffer is specified or a negative buffer size. -EFAULT
198 * is returned if either the buffer or the length field are not
199 * accessible.
200 * After copying the data up to the limit the user specifies, the true
201 * length of the data is written over the length limit the user
202 * specified. Zero is returned for a success.
203 */
89bddce5
SH
204
205int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
206 int __user *ulen)
1da177e4
LT
207{
208 int err;
209 int len;
210
89bddce5
SH
211 err = get_user(len, ulen);
212 if (err)
1da177e4 213 return err;
89bddce5
SH
214 if (len > klen)
215 len = klen;
216 if (len < 0 || len > MAX_SOCK_ADDR)
1da177e4 217 return -EINVAL;
89bddce5 218 if (len) {
d6fe3945
SG
219 if (audit_sockaddr(klen, kaddr))
220 return -ENOMEM;
89bddce5 221 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
222 return -EFAULT;
223 }
224 /*
89bddce5
SH
225 * "fromlen shall refer to the value before truncation.."
226 * 1003.1g
1da177e4
LT
227 */
228 return __put_user(klen, ulen);
229}
230
231#define SOCKFS_MAGIC 0x534F434B
232
e18b890b 233static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
234
235static struct inode *sock_alloc_inode(struct super_block *sb)
236{
237 struct socket_alloc *ei;
89bddce5 238
e94b1766 239 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
240 if (!ei)
241 return NULL;
242 init_waitqueue_head(&ei->socket.wait);
89bddce5 243
1da177e4
LT
244 ei->socket.fasync_list = NULL;
245 ei->socket.state = SS_UNCONNECTED;
246 ei->socket.flags = 0;
247 ei->socket.ops = NULL;
248 ei->socket.sk = NULL;
249 ei->socket.file = NULL;
1da177e4
LT
250
251 return &ei->vfs_inode;
252}
253
254static void sock_destroy_inode(struct inode *inode)
255{
256 kmem_cache_free(sock_inode_cachep,
257 container_of(inode, struct socket_alloc, vfs_inode));
258}
259
e18b890b 260static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
1da177e4 261{
89bddce5 262 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 263
50953fe9 264 if (flags & SLAB_CTOR_CONSTRUCTOR)
1da177e4
LT
265 inode_init_once(&ei->vfs_inode);
266}
89bddce5 267
1da177e4
LT
268static int init_inodecache(void)
269{
270 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
271 sizeof(struct socket_alloc),
272 0,
273 (SLAB_HWCACHE_ALIGN |
274 SLAB_RECLAIM_ACCOUNT |
275 SLAB_MEM_SPREAD),
276 init_once,
277 NULL);
1da177e4
LT
278 if (sock_inode_cachep == NULL)
279 return -ENOMEM;
280 return 0;
281}
282
283static struct super_operations sockfs_ops = {
284 .alloc_inode = sock_alloc_inode,
285 .destroy_inode =sock_destroy_inode,
286 .statfs = simple_statfs,
287};
288
454e2398 289static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
290 int flags, const char *dev_name, void *data,
291 struct vfsmount *mnt)
1da177e4 292{
454e2398
DH
293 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
294 mnt);
1da177e4
LT
295}
296
ba89966c 297static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
298
299static struct file_system_type sock_fs_type = {
300 .name = "sockfs",
301 .get_sb = sockfs_get_sb,
302 .kill_sb = kill_anon_super,
303};
89bddce5 304
1da177e4
LT
305static int sockfs_delete_dentry(struct dentry *dentry)
306{
304e61e6
ED
307 /*
308 * At creation time, we pretended this dentry was hashed
309 * (by clearing DCACHE_UNHASHED bit in d_flags)
310 * At delete time, we restore the truth : not hashed.
311 * (so that dput() can proceed correctly)
312 */
313 dentry->d_flags |= DCACHE_UNHASHED;
314 return 0;
1da177e4
LT
315}
316static struct dentry_operations sockfs_dentry_operations = {
89bddce5 317 .d_delete = sockfs_delete_dentry,
1da177e4
LT
318};
319
320/*
321 * Obtains the first available file descriptor and sets it up for use.
322 *
39d8c1b6
DM
323 * These functions create file structures and maps them to fd space
324 * of the current process. On success it returns file descriptor
1da177e4
LT
325 * and file struct implicitly stored in sock->file.
326 * Note that another thread may close file descriptor before we return
327 * from this function. We use the fact that now we do not refer
328 * to socket after mapping. If one day we will need it, this
329 * function will increment ref. count on file by 1.
330 *
331 * In any case returned fd MAY BE not valid!
332 * This race condition is unavoidable
333 * with shared fd spaces, we cannot solve it inside kernel,
334 * but we take care of internal coherence yet.
335 */
336
39d8c1b6 337static int sock_alloc_fd(struct file **filep)
1da177e4
LT
338{
339 int fd;
1da177e4
LT
340
341 fd = get_unused_fd();
39d8c1b6 342 if (likely(fd >= 0)) {
1da177e4
LT
343 struct file *file = get_empty_filp();
344
39d8c1b6
DM
345 *filep = file;
346 if (unlikely(!file)) {
1da177e4 347 put_unused_fd(fd);
39d8c1b6 348 return -ENFILE;
1da177e4 349 }
39d8c1b6
DM
350 } else
351 *filep = NULL;
352 return fd;
353}
1da177e4 354
39d8c1b6
DM
355static int sock_attach_fd(struct socket *sock, struct file *file)
356{
357 struct qstr this;
358 char name[32];
359
360 this.len = sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino);
361 this.name = name;
304e61e6 362 this.hash = 0;
39d8c1b6 363
3126a42c
JS
364 file->f_path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);
365 if (unlikely(!file->f_path.dentry))
39d8c1b6
DM
366 return -ENOMEM;
367
3126a42c 368 file->f_path.dentry->d_op = &sockfs_dentry_operations;
304e61e6
ED
369 /*
370 * We dont want to push this dentry into global dentry hash table.
371 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
372 * This permits a working /proc/$pid/fd/XXX on sockets
373 */
3126a42c
JS
374 file->f_path.dentry->d_flags &= ~DCACHE_UNHASHED;
375 d_instantiate(file->f_path.dentry, SOCK_INODE(sock));
376 file->f_path.mnt = mntget(sock_mnt);
377 file->f_mapping = file->f_path.dentry->d_inode->i_mapping;
39d8c1b6
DM
378
379 sock->file = file;
380 file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
381 file->f_mode = FMODE_READ | FMODE_WRITE;
382 file->f_flags = O_RDWR;
383 file->f_pos = 0;
384 file->private_data = sock;
1da177e4 385
39d8c1b6
DM
386 return 0;
387}
388
389int sock_map_fd(struct socket *sock)
390{
391 struct file *newfile;
392 int fd = sock_alloc_fd(&newfile);
393
394 if (likely(fd >= 0)) {
395 int err = sock_attach_fd(sock, newfile);
396
397 if (unlikely(err < 0)) {
398 put_filp(newfile);
1da177e4 399 put_unused_fd(fd);
39d8c1b6 400 return err;
1da177e4 401 }
39d8c1b6 402 fd_install(fd, newfile);
1da177e4 403 }
1da177e4
LT
404 return fd;
405}
406
6cb153ca
BL
407static struct socket *sock_from_file(struct file *file, int *err)
408{
6cb153ca
BL
409 if (file->f_op == &socket_file_ops)
410 return file->private_data; /* set in sock_map_fd */
411
23bb80d2
ED
412 *err = -ENOTSOCK;
413 return NULL;
6cb153ca
BL
414}
415
1da177e4
LT
416/**
417 * sockfd_lookup - Go from a file number to its socket slot
418 * @fd: file handle
419 * @err: pointer to an error code return
420 *
421 * The file handle passed in is locked and the socket it is bound
422 * too is returned. If an error occurs the err pointer is overwritten
423 * with a negative errno code and NULL is returned. The function checks
424 * for both invalid handles and passing a handle which is not a socket.
425 *
426 * On a success the socket object pointer is returned.
427 */
428
429struct socket *sockfd_lookup(int fd, int *err)
430{
431 struct file *file;
1da177e4
LT
432 struct socket *sock;
433
89bddce5
SH
434 file = fget(fd);
435 if (!file) {
1da177e4
LT
436 *err = -EBADF;
437 return NULL;
438 }
89bddce5 439
6cb153ca
BL
440 sock = sock_from_file(file, err);
441 if (!sock)
1da177e4 442 fput(file);
6cb153ca
BL
443 return sock;
444}
1da177e4 445
6cb153ca
BL
446static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
447{
448 struct file *file;
449 struct socket *sock;
450
3672558c 451 *err = -EBADF;
6cb153ca
BL
452 file = fget_light(fd, fput_needed);
453 if (file) {
454 sock = sock_from_file(file, err);
455 if (sock)
456 return sock;
457 fput_light(file, *fput_needed);
1da177e4 458 }
6cb153ca 459 return NULL;
1da177e4
LT
460}
461
462/**
463 * sock_alloc - allocate a socket
89bddce5 464 *
1da177e4
LT
465 * Allocate a new inode and socket object. The two are bound together
466 * and initialised. The socket is then returned. If we are out of inodes
467 * NULL is returned.
468 */
469
470static struct socket *sock_alloc(void)
471{
89bddce5
SH
472 struct inode *inode;
473 struct socket *sock;
1da177e4
LT
474
475 inode = new_inode(sock_mnt->mnt_sb);
476 if (!inode)
477 return NULL;
478
479 sock = SOCKET_I(inode);
480
89bddce5 481 inode->i_mode = S_IFSOCK | S_IRWXUGO;
1da177e4
LT
482 inode->i_uid = current->fsuid;
483 inode->i_gid = current->fsgid;
484
485 get_cpu_var(sockets_in_use)++;
486 put_cpu_var(sockets_in_use);
487 return sock;
488}
489
490/*
491 * In theory you can't get an open on this inode, but /proc provides
492 * a back door. Remember to keep it shut otherwise you'll let the
493 * creepy crawlies in.
494 */
89bddce5 495
1da177e4
LT
496static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
497{
498 return -ENXIO;
499}
500
4b6f5d20 501const struct file_operations bad_sock_fops = {
1da177e4
LT
502 .owner = THIS_MODULE,
503 .open = sock_no_open,
504};
505
506/**
507 * sock_release - close a socket
508 * @sock: socket to close
509 *
510 * The socket is released from the protocol stack if it has a release
511 * callback, and the inode is then released if the socket is bound to
89bddce5 512 * an inode not a file.
1da177e4 513 */
89bddce5 514
1da177e4
LT
515void sock_release(struct socket *sock)
516{
517 if (sock->ops) {
518 struct module *owner = sock->ops->owner;
519
520 sock->ops->release(sock);
521 sock->ops = NULL;
522 module_put(owner);
523 }
524
525 if (sock->fasync_list)
526 printk(KERN_ERR "sock_release: fasync list not empty!\n");
527
528 get_cpu_var(sockets_in_use)--;
529 put_cpu_var(sockets_in_use);
530 if (!sock->file) {
531 iput(SOCK_INODE(sock));
532 return;
533 }
89bddce5 534 sock->file = NULL;
1da177e4
LT
535}
536
89bddce5 537static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
538 struct msghdr *msg, size_t size)
539{
540 struct sock_iocb *si = kiocb_to_siocb(iocb);
541 int err;
542
543 si->sock = sock;
544 si->scm = NULL;
545 si->msg = msg;
546 si->size = size;
547
548 err = security_socket_sendmsg(sock, msg, size);
549 if (err)
550 return err;
551
552 return sock->ops->sendmsg(iocb, sock, msg, size);
553}
554
555int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
556{
557 struct kiocb iocb;
558 struct sock_iocb siocb;
559 int ret;
560
561 init_sync_kiocb(&iocb, NULL);
562 iocb.private = &siocb;
563 ret = __sock_sendmsg(&iocb, sock, msg, size);
564 if (-EIOCBQUEUED == ret)
565 ret = wait_on_sync_kiocb(&iocb);
566 return ret;
567}
568
569int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
570 struct kvec *vec, size_t num, size_t size)
571{
572 mm_segment_t oldfs = get_fs();
573 int result;
574
575 set_fs(KERNEL_DS);
576 /*
577 * the following is safe, since for compiler definitions of kvec and
578 * iovec are identical, yielding the same in-core layout and alignment
579 */
89bddce5 580 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
581 msg->msg_iovlen = num;
582 result = sock_sendmsg(sock, msg, size);
583 set_fs(oldfs);
584 return result;
585}
586
92f37fd2
ED
587/*
588 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
589 */
590void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
591 struct sk_buff *skb)
592{
593 ktime_t kt = skb->tstamp;
594
595 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
596 struct timeval tv;
597 /* Race occurred between timestamp enabling and packet
598 receiving. Fill in the current time for now. */
599 if (kt.tv64 == 0)
600 kt = ktime_get_real();
601 skb->tstamp = kt;
602 tv = ktime_to_timeval(kt);
603 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
604 } else {
605 struct timespec ts;
606 /* Race occurred between timestamp enabling and packet
607 receiving. Fill in the current time for now. */
608 if (kt.tv64 == 0)
609 kt = ktime_get_real();
610 skb->tstamp = kt;
611 ts = ktime_to_timespec(kt);
612 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
613 }
614}
615
7c81fd8b
ACM
616EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
617
89bddce5 618static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
619 struct msghdr *msg, size_t size, int flags)
620{
621 int err;
622 struct sock_iocb *si = kiocb_to_siocb(iocb);
623
624 si->sock = sock;
625 si->scm = NULL;
626 si->msg = msg;
627 si->size = size;
628 si->flags = flags;
629
630 err = security_socket_recvmsg(sock, msg, size, flags);
631 if (err)
632 return err;
633
634 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
635}
636
89bddce5 637int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
638 size_t size, int flags)
639{
640 struct kiocb iocb;
641 struct sock_iocb siocb;
642 int ret;
643
89bddce5 644 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
645 iocb.private = &siocb;
646 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
647 if (-EIOCBQUEUED == ret)
648 ret = wait_on_sync_kiocb(&iocb);
649 return ret;
650}
651
89bddce5
SH
652int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
653 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
654{
655 mm_segment_t oldfs = get_fs();
656 int result;
657
658 set_fs(KERNEL_DS);
659 /*
660 * the following is safe, since for compiler definitions of kvec and
661 * iovec are identical, yielding the same in-core layout and alignment
662 */
89bddce5 663 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
664 result = sock_recvmsg(sock, msg, size, flags);
665 set_fs(oldfs);
666 return result;
667}
668
669static void sock_aio_dtor(struct kiocb *iocb)
670{
671 kfree(iocb->private);
672}
673
ce1d4d3e
CH
674static ssize_t sock_sendpage(struct file *file, struct page *page,
675 int offset, size_t size, loff_t *ppos, int more)
1da177e4 676{
1da177e4
LT
677 struct socket *sock;
678 int flags;
679
ce1d4d3e
CH
680 sock = file->private_data;
681
682 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
683 if (more)
684 flags |= MSG_MORE;
685
686 return sock->ops->sendpage(sock, page, offset, size, flags);
687}
1da177e4 688
ce1d4d3e 689static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 690 struct sock_iocb *siocb)
ce1d4d3e
CH
691{
692 if (!is_sync_kiocb(iocb)) {
693 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
694 if (!siocb)
695 return NULL;
1da177e4
LT
696 iocb->ki_dtor = sock_aio_dtor;
697 }
1da177e4 698
ce1d4d3e 699 siocb->kiocb = iocb;
ce1d4d3e
CH
700 iocb->private = siocb;
701 return siocb;
1da177e4
LT
702}
703
ce1d4d3e 704static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
705 struct file *file, const struct iovec *iov,
706 unsigned long nr_segs)
ce1d4d3e
CH
707{
708 struct socket *sock = file->private_data;
709 size_t size = 0;
710 int i;
1da177e4 711
89bddce5
SH
712 for (i = 0; i < nr_segs; i++)
713 size += iov[i].iov_len;
1da177e4 714
ce1d4d3e
CH
715 msg->msg_name = NULL;
716 msg->msg_namelen = 0;
717 msg->msg_control = NULL;
718 msg->msg_controllen = 0;
89bddce5 719 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
720 msg->msg_iovlen = nr_segs;
721 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
722
723 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
724}
725
027445c3
BP
726static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
727 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
728{
729 struct sock_iocb siocb, *x;
730
1da177e4
LT
731 if (pos != 0)
732 return -ESPIPE;
027445c3
BP
733
734 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
735 return 0;
736
027445c3
BP
737
738 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
739 if (!x)
740 return -ENOMEM;
027445c3 741 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
742}
743
ce1d4d3e 744static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
745 struct file *file, const struct iovec *iov,
746 unsigned long nr_segs)
1da177e4 747{
ce1d4d3e
CH
748 struct socket *sock = file->private_data;
749 size_t size = 0;
750 int i;
1da177e4 751
89bddce5
SH
752 for (i = 0; i < nr_segs; i++)
753 size += iov[i].iov_len;
1da177e4 754
ce1d4d3e
CH
755 msg->msg_name = NULL;
756 msg->msg_namelen = 0;
757 msg->msg_control = NULL;
758 msg->msg_controllen = 0;
89bddce5 759 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
760 msg->msg_iovlen = nr_segs;
761 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
762 if (sock->type == SOCK_SEQPACKET)
763 msg->msg_flags |= MSG_EOR;
1da177e4 764
ce1d4d3e 765 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
766}
767
027445c3
BP
768static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
769 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
770{
771 struct sock_iocb siocb, *x;
1da177e4 772
ce1d4d3e
CH
773 if (pos != 0)
774 return -ESPIPE;
027445c3
BP
775
776 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
ce1d4d3e 777 return 0;
1da177e4 778
027445c3 779 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
780 if (!x)
781 return -ENOMEM;
1da177e4 782
027445c3 783 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
784}
785
1da177e4
LT
786/*
787 * Atomic setting of ioctl hooks to avoid race
788 * with module unload.
789 */
790
4a3e2f71 791static DEFINE_MUTEX(br_ioctl_mutex);
89bddce5 792static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL;
1da177e4 793
89bddce5 794void brioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 795{
4a3e2f71 796 mutex_lock(&br_ioctl_mutex);
1da177e4 797 br_ioctl_hook = hook;
4a3e2f71 798 mutex_unlock(&br_ioctl_mutex);
1da177e4 799}
89bddce5 800
1da177e4
LT
801EXPORT_SYMBOL(brioctl_set);
802
4a3e2f71 803static DEFINE_MUTEX(vlan_ioctl_mutex);
89bddce5 804static int (*vlan_ioctl_hook) (void __user *arg);
1da177e4 805
89bddce5 806void vlan_ioctl_set(int (*hook) (void __user *))
1da177e4 807{
4a3e2f71 808 mutex_lock(&vlan_ioctl_mutex);
1da177e4 809 vlan_ioctl_hook = hook;
4a3e2f71 810 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 811}
89bddce5 812
1da177e4
LT
813EXPORT_SYMBOL(vlan_ioctl_set);
814
4a3e2f71 815static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 816static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 817
89bddce5 818void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 819{
4a3e2f71 820 mutex_lock(&dlci_ioctl_mutex);
1da177e4 821 dlci_ioctl_hook = hook;
4a3e2f71 822 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 823}
89bddce5 824
1da177e4
LT
825EXPORT_SYMBOL(dlci_ioctl_set);
826
827/*
828 * With an ioctl, arg may well be a user mode pointer, but we don't know
829 * what to do with it - that's up to the protocol still.
830 */
831
832static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
833{
834 struct socket *sock;
835 void __user *argp = (void __user *)arg;
836 int pid, err;
837
b69aee04 838 sock = file->private_data;
1da177e4
LT
839 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
840 err = dev_ioctl(cmd, argp);
841 } else
d86b5e0e 842#ifdef CONFIG_WIRELESS_EXT
1da177e4
LT
843 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
844 err = dev_ioctl(cmd, argp);
845 } else
89bddce5
SH
846#endif /* CONFIG_WIRELESS_EXT */
847 switch (cmd) {
1da177e4
LT
848 case FIOSETOWN:
849 case SIOCSPGRP:
850 err = -EFAULT;
851 if (get_user(pid, (int __user *)argp))
852 break;
853 err = f_setown(sock->file, pid, 1);
854 break;
855 case FIOGETOWN:
856 case SIOCGPGRP:
609d7fa9 857 err = put_user(f_getown(sock->file),
89bddce5 858 (int __user *)argp);
1da177e4
LT
859 break;
860 case SIOCGIFBR:
861 case SIOCSIFBR:
862 case SIOCBRADDBR:
863 case SIOCBRDELBR:
864 err = -ENOPKG;
865 if (!br_ioctl_hook)
866 request_module("bridge");
867
4a3e2f71 868 mutex_lock(&br_ioctl_mutex);
89bddce5 869 if (br_ioctl_hook)
1da177e4 870 err = br_ioctl_hook(cmd, argp);
4a3e2f71 871 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
872 break;
873 case SIOCGIFVLAN:
874 case SIOCSIFVLAN:
875 err = -ENOPKG;
876 if (!vlan_ioctl_hook)
877 request_module("8021q");
878
4a3e2f71 879 mutex_lock(&vlan_ioctl_mutex);
1da177e4
LT
880 if (vlan_ioctl_hook)
881 err = vlan_ioctl_hook(argp);
4a3e2f71 882 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 883 break;
1da177e4
LT
884 case SIOCADDDLCI:
885 case SIOCDELDLCI:
886 err = -ENOPKG;
887 if (!dlci_ioctl_hook)
888 request_module("dlci");
889
890 if (dlci_ioctl_hook) {
4a3e2f71 891 mutex_lock(&dlci_ioctl_mutex);
1da177e4 892 err = dlci_ioctl_hook(cmd, argp);
4a3e2f71 893 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
894 }
895 break;
896 default:
897 err = sock->ops->ioctl(sock, cmd, arg);
b5e5fa5e
CH
898
899 /*
900 * If this ioctl is unknown try to hand it down
901 * to the NIC driver.
902 */
903 if (err == -ENOIOCTLCMD)
904 err = dev_ioctl(cmd, argp);
1da177e4 905 break;
89bddce5 906 }
1da177e4
LT
907 return err;
908}
909
910int sock_create_lite(int family, int type, int protocol, struct socket **res)
911{
912 int err;
913 struct socket *sock = NULL;
89bddce5 914
1da177e4
LT
915 err = security_socket_create(family, type, protocol, 1);
916 if (err)
917 goto out;
918
919 sock = sock_alloc();
920 if (!sock) {
921 err = -ENOMEM;
922 goto out;
923 }
924
1da177e4 925 sock->type = type;
7420ed23
VY
926 err = security_socket_post_create(sock, family, type, protocol, 1);
927 if (err)
928 goto out_release;
929
1da177e4
LT
930out:
931 *res = sock;
932 return err;
7420ed23
VY
933out_release:
934 sock_release(sock);
935 sock = NULL;
936 goto out;
1da177e4
LT
937}
938
939/* No kernel lock held - perfect */
89bddce5 940static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
941{
942 struct socket *sock;
943
944 /*
89bddce5 945 * We can't return errors to poll, so it's either yes or no.
1da177e4 946 */
b69aee04 947 sock = file->private_data;
1da177e4
LT
948 return sock->ops->poll(file, sock, wait);
949}
950
89bddce5 951static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 952{
b69aee04 953 struct socket *sock = file->private_data;
1da177e4
LT
954
955 return sock->ops->mmap(file, sock, vma);
956}
957
20380731 958static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
959{
960 /*
89bddce5
SH
961 * It was possible the inode is NULL we were
962 * closing an unfinished socket.
1da177e4
LT
963 */
964
89bddce5 965 if (!inode) {
1da177e4
LT
966 printk(KERN_DEBUG "sock_close: NULL inode\n");
967 return 0;
968 }
969 sock_fasync(-1, filp, 0);
970 sock_release(SOCKET_I(inode));
971 return 0;
972}
973
974/*
975 * Update the socket async list
976 *
977 * Fasync_list locking strategy.
978 *
979 * 1. fasync_list is modified only under process context socket lock
980 * i.e. under semaphore.
981 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
982 * or under socket lock.
983 * 3. fasync_list can be used from softirq context, so that
984 * modification under socket lock have to be enhanced with
985 * write_lock_bh(&sk->sk_callback_lock).
986 * --ANK (990710)
987 */
988
989static int sock_fasync(int fd, struct file *filp, int on)
990{
89bddce5 991 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
992 struct socket *sock;
993 struct sock *sk;
994
89bddce5 995 if (on) {
8b3a7005 996 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 997 if (fna == NULL)
1da177e4
LT
998 return -ENOMEM;
999 }
1000
b69aee04 1001 sock = filp->private_data;
1da177e4 1002
89bddce5
SH
1003 sk = sock->sk;
1004 if (sk == NULL) {
1da177e4
LT
1005 kfree(fna);
1006 return -EINVAL;
1007 }
1008
1009 lock_sock(sk);
1010
89bddce5 1011 prev = &(sock->fasync_list);
1da177e4 1012
89bddce5
SH
1013 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1014 if (fa->fa_file == filp)
1da177e4
LT
1015 break;
1016
89bddce5
SH
1017 if (on) {
1018 if (fa != NULL) {
1da177e4 1019 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1020 fa->fa_fd = fd;
1da177e4
LT
1021 write_unlock_bh(&sk->sk_callback_lock);
1022
1023 kfree(fna);
1024 goto out;
1025 }
89bddce5
SH
1026 fna->fa_file = filp;
1027 fna->fa_fd = fd;
1028 fna->magic = FASYNC_MAGIC;
1029 fna->fa_next = sock->fasync_list;
1da177e4 1030 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1031 sock->fasync_list = fna;
1da177e4 1032 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1033 } else {
1034 if (fa != NULL) {
1da177e4 1035 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1036 *prev = fa->fa_next;
1da177e4
LT
1037 write_unlock_bh(&sk->sk_callback_lock);
1038 kfree(fa);
1039 }
1040 }
1041
1042out:
1043 release_sock(sock->sk);
1044 return 0;
1045}
1046
1047/* This function may be called only under socket lock or callback_lock */
1048
1049int sock_wake_async(struct socket *sock, int how, int band)
1050{
1051 if (!sock || !sock->fasync_list)
1052 return -1;
89bddce5 1053 switch (how) {
1da177e4 1054 case 1:
89bddce5 1055
1da177e4
LT
1056 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1057 break;
1058 goto call_kill;
1059 case 2:
1060 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1061 break;
1062 /* fall through */
1063 case 0:
89bddce5 1064call_kill:
1da177e4
LT
1065 __kill_fasync(sock->fasync_list, SIGIO, band);
1066 break;
1067 case 3:
1068 __kill_fasync(sock->fasync_list, SIGURG, band);
1069 }
1070 return 0;
1071}
1072
89bddce5
SH
1073static int __sock_create(int family, int type, int protocol,
1074 struct socket **res, int kern)
1da177e4
LT
1075{
1076 int err;
1077 struct socket *sock;
55737fda 1078 const struct net_proto_family *pf;
1da177e4
LT
1079
1080 /*
89bddce5 1081 * Check protocol is in range
1da177e4
LT
1082 */
1083 if (family < 0 || family >= NPROTO)
1084 return -EAFNOSUPPORT;
1085 if (type < 0 || type >= SOCK_MAX)
1086 return -EINVAL;
1087
1088 /* Compatibility.
1089
1090 This uglymoron is moved from INET layer to here to avoid
1091 deadlock in module load.
1092 */
1093 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1094 static int warned;
1da177e4
LT
1095 if (!warned) {
1096 warned = 1;
89bddce5
SH
1097 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1098 current->comm);
1da177e4
LT
1099 }
1100 family = PF_PACKET;
1101 }
1102
1103 err = security_socket_create(family, type, protocol, kern);
1104 if (err)
1105 return err;
89bddce5 1106
55737fda
SH
1107 /*
1108 * Allocate the socket and allow the family to set things up. if
1109 * the protocol is 0, the family is instructed to select an appropriate
1110 * default.
1111 */
1112 sock = sock_alloc();
1113 if (!sock) {
1114 if (net_ratelimit())
1115 printk(KERN_WARNING "socket: no more sockets\n");
1116 return -ENFILE; /* Not exactly a match, but its the
1117 closest posix thing */
1118 }
1119
1120 sock->type = type;
1121
1da177e4 1122#if defined(CONFIG_KMOD)
89bddce5
SH
1123 /* Attempt to load a protocol module if the find failed.
1124 *
1125 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1126 * requested real, full-featured networking support upon configuration.
1127 * Otherwise module support will break!
1128 */
55737fda 1129 if (net_families[family] == NULL)
89bddce5 1130 request_module("net-pf-%d", family);
1da177e4
LT
1131#endif
1132
55737fda
SH
1133 rcu_read_lock();
1134 pf = rcu_dereference(net_families[family]);
1135 err = -EAFNOSUPPORT;
1136 if (!pf)
1137 goto out_release;
1da177e4
LT
1138
1139 /*
1140 * We will call the ->create function, that possibly is in a loadable
1141 * module, so we have to bump that loadable module refcnt first.
1142 */
55737fda 1143 if (!try_module_get(pf->owner))
1da177e4
LT
1144 goto out_release;
1145
55737fda
SH
1146 /* Now protected by module ref count */
1147 rcu_read_unlock();
1148
1149 err = pf->create(sock, protocol);
1150 if (err < 0)
1da177e4 1151 goto out_module_put;
a79af59e 1152
1da177e4
LT
1153 /*
1154 * Now to bump the refcnt of the [loadable] module that owns this
1155 * socket at sock_release time we decrement its refcnt.
1156 */
55737fda
SH
1157 if (!try_module_get(sock->ops->owner))
1158 goto out_module_busy;
1159
1da177e4
LT
1160 /*
1161 * Now that we're done with the ->create function, the [loadable]
1162 * module can have its refcnt decremented
1163 */
55737fda 1164 module_put(pf->owner);
7420ed23
VY
1165 err = security_socket_post_create(sock, family, type, protocol, kern);
1166 if (err)
1167 goto out_release;
55737fda 1168 *res = sock;
1da177e4 1169
55737fda
SH
1170 return 0;
1171
1172out_module_busy:
1173 err = -EAFNOSUPPORT;
1da177e4 1174out_module_put:
55737fda
SH
1175 sock->ops = NULL;
1176 module_put(pf->owner);
1177out_sock_release:
1da177e4 1178 sock_release(sock);
55737fda
SH
1179 return err;
1180
1181out_release:
1182 rcu_read_unlock();
1183 goto out_sock_release;
1da177e4
LT
1184}
1185
1186int sock_create(int family, int type, int protocol, struct socket **res)
1187{
1188 return __sock_create(family, type, protocol, res, 0);
1189}
1190
1191int sock_create_kern(int family, int type, int protocol, struct socket **res)
1192{
1193 return __sock_create(family, type, protocol, res, 1);
1194}
1195
1196asmlinkage long sys_socket(int family, int type, int protocol)
1197{
1198 int retval;
1199 struct socket *sock;
1200
1201 retval = sock_create(family, type, protocol, &sock);
1202 if (retval < 0)
1203 goto out;
1204
1205 retval = sock_map_fd(sock);
1206 if (retval < 0)
1207 goto out_release;
1208
1209out:
1210 /* It may be already another descriptor 8) Not kernel problem. */
1211 return retval;
1212
1213out_release:
1214 sock_release(sock);
1215 return retval;
1216}
1217
1218/*
1219 * Create a pair of connected sockets.
1220 */
1221
89bddce5
SH
1222asmlinkage long sys_socketpair(int family, int type, int protocol,
1223 int __user *usockvec)
1da177e4
LT
1224{
1225 struct socket *sock1, *sock2;
1226 int fd1, fd2, err;
db349509 1227 struct file *newfile1, *newfile2;
1da177e4
LT
1228
1229 /*
1230 * Obtain the first socket and check if the underlying protocol
1231 * supports the socketpair call.
1232 */
1233
1234 err = sock_create(family, type, protocol, &sock1);
1235 if (err < 0)
1236 goto out;
1237
1238 err = sock_create(family, type, protocol, &sock2);
1239 if (err < 0)
1240 goto out_release_1;
1241
1242 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1243 if (err < 0)
1da177e4
LT
1244 goto out_release_both;
1245
db349509
AV
1246 fd1 = sock_alloc_fd(&newfile1);
1247 if (unlikely(fd1 < 0))
1248 goto out_release_both;
1da177e4 1249
db349509
AV
1250 fd2 = sock_alloc_fd(&newfile2);
1251 if (unlikely(fd2 < 0)) {
1252 put_filp(newfile1);
1253 put_unused_fd(fd1);
1da177e4 1254 goto out_release_both;
db349509 1255 }
1da177e4 1256
db349509
AV
1257 err = sock_attach_fd(sock1, newfile1);
1258 if (unlikely(err < 0)) {
1259 goto out_fd2;
1260 }
1261
1262 err = sock_attach_fd(sock2, newfile2);
1263 if (unlikely(err < 0)) {
1264 fput(newfile1);
1265 goto out_fd1;
1266 }
1267
1268 err = audit_fd_pair(fd1, fd2);
1269 if (err < 0) {
1270 fput(newfile1);
1271 fput(newfile2);
1272 goto out_fd;
1273 }
1da177e4 1274
db349509
AV
1275 fd_install(fd1, newfile1);
1276 fd_install(fd2, newfile2);
1da177e4
LT
1277 /* fd1 and fd2 may be already another descriptors.
1278 * Not kernel problem.
1279 */
1280
89bddce5 1281 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1282 if (!err)
1283 err = put_user(fd2, &usockvec[1]);
1284 if (!err)
1285 return 0;
1286
1287 sys_close(fd2);
1288 sys_close(fd1);
1289 return err;
1290
1da177e4 1291out_release_both:
89bddce5 1292 sock_release(sock2);
1da177e4 1293out_release_1:
89bddce5 1294 sock_release(sock1);
1da177e4
LT
1295out:
1296 return err;
db349509
AV
1297
1298out_fd2:
1299 put_filp(newfile1);
1300 sock_release(sock1);
1301out_fd1:
1302 put_filp(newfile2);
1303 sock_release(sock2);
1304out_fd:
1305 put_unused_fd(fd1);
1306 put_unused_fd(fd2);
1307 goto out;
1da177e4
LT
1308}
1309
1da177e4
LT
1310/*
1311 * Bind a name to a socket. Nothing much to do here since it's
1312 * the protocol's responsibility to handle the local address.
1313 *
1314 * We move the socket address to kernel space before we call
1315 * the protocol layer (having also checked the address is ok).
1316 */
1317
1318asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1319{
1320 struct socket *sock;
1321 char address[MAX_SOCK_ADDR];
6cb153ca 1322 int err, fput_needed;
1da177e4 1323
89bddce5 1324 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1325 if (sock) {
89bddce5
SH
1326 err = move_addr_to_kernel(umyaddr, addrlen, address);
1327 if (err >= 0) {
1328 err = security_socket_bind(sock,
1329 (struct sockaddr *)address,
1330 addrlen);
6cb153ca
BL
1331 if (!err)
1332 err = sock->ops->bind(sock,
89bddce5
SH
1333 (struct sockaddr *)
1334 address, addrlen);
1da177e4 1335 }
6cb153ca 1336 fput_light(sock->file, fput_needed);
89bddce5 1337 }
1da177e4
LT
1338 return err;
1339}
1340
1da177e4
LT
1341/*
1342 * Perform a listen. Basically, we allow the protocol to do anything
1343 * necessary for a listen, and if that works, we mark the socket as
1344 * ready for listening.
1345 */
1346
7a42c217 1347int sysctl_somaxconn __read_mostly = SOMAXCONN;
1da177e4
LT
1348
1349asmlinkage long sys_listen(int fd, int backlog)
1350{
1351 struct socket *sock;
6cb153ca 1352 int err, fput_needed;
89bddce5
SH
1353
1354 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1355 if (sock) {
1356 if ((unsigned)backlog > sysctl_somaxconn)
1da177e4
LT
1357 backlog = sysctl_somaxconn;
1358
1359 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1360 if (!err)
1361 err = sock->ops->listen(sock, backlog);
1da177e4 1362
6cb153ca 1363 fput_light(sock->file, fput_needed);
1da177e4
LT
1364 }
1365 return err;
1366}
1367
1da177e4
LT
1368/*
1369 * For accept, we attempt to create a new socket, set up the link
1370 * with the client, wake up the client, then return the new
1371 * connected fd. We collect the address of the connector in kernel
1372 * space and move it to user at the very end. This is unclean because
1373 * we open the socket then return an error.
1374 *
1375 * 1003.1g adds the ability to recvmsg() to query connection pending
1376 * status to recvmsg. We need to add that support in a way thats
1377 * clean when we restucture accept also.
1378 */
1379
89bddce5
SH
1380asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1381 int __user *upeer_addrlen)
1da177e4
LT
1382{
1383 struct socket *sock, *newsock;
39d8c1b6 1384 struct file *newfile;
6cb153ca 1385 int err, len, newfd, fput_needed;
1da177e4
LT
1386 char address[MAX_SOCK_ADDR];
1387
6cb153ca 1388 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1389 if (!sock)
1390 goto out;
1391
1392 err = -ENFILE;
89bddce5 1393 if (!(newsock = sock_alloc()))
1da177e4
LT
1394 goto out_put;
1395
1396 newsock->type = sock->type;
1397 newsock->ops = sock->ops;
1398
1da177e4
LT
1399 /*
1400 * We don't need try_module_get here, as the listening socket (sock)
1401 * has the protocol module (sock->ops->owner) held.
1402 */
1403 __module_get(newsock->ops->owner);
1404
39d8c1b6
DM
1405 newfd = sock_alloc_fd(&newfile);
1406 if (unlikely(newfd < 0)) {
1407 err = newfd;
9a1875e6
DM
1408 sock_release(newsock);
1409 goto out_put;
39d8c1b6
DM
1410 }
1411
1412 err = sock_attach_fd(newsock, newfile);
1413 if (err < 0)
79f4f642 1414 goto out_fd_simple;
39d8c1b6 1415
a79af59e
FF
1416 err = security_socket_accept(sock, newsock);
1417 if (err)
39d8c1b6 1418 goto out_fd;
a79af59e 1419
1da177e4
LT
1420 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1421 if (err < 0)
39d8c1b6 1422 goto out_fd;
1da177e4
LT
1423
1424 if (upeer_sockaddr) {
89bddce5
SH
1425 if (newsock->ops->getname(newsock, (struct sockaddr *)address,
1426 &len, 2) < 0) {
1da177e4 1427 err = -ECONNABORTED;
39d8c1b6 1428 goto out_fd;
1da177e4 1429 }
89bddce5
SH
1430 err = move_addr_to_user(address, len, upeer_sockaddr,
1431 upeer_addrlen);
1da177e4 1432 if (err < 0)
39d8c1b6 1433 goto out_fd;
1da177e4
LT
1434 }
1435
1436 /* File flags are not inherited via accept() unlike another OSes. */
1437
39d8c1b6
DM
1438 fd_install(newfd, newfile);
1439 err = newfd;
1da177e4
LT
1440
1441 security_socket_post_accept(sock, newsock);
1442
1443out_put:
6cb153ca 1444 fput_light(sock->file, fput_needed);
1da177e4
LT
1445out:
1446 return err;
79f4f642
AD
1447out_fd_simple:
1448 sock_release(newsock);
1449 put_filp(newfile);
1450 put_unused_fd(newfd);
1451 goto out_put;
39d8c1b6 1452out_fd:
9606a216 1453 fput(newfile);
39d8c1b6 1454 put_unused_fd(newfd);
1da177e4
LT
1455 goto out_put;
1456}
1457
1da177e4
LT
1458/*
1459 * Attempt to connect to a socket with the server address. The address
1460 * is in user space so we verify it is OK and move it to kernel space.
1461 *
1462 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1463 * break bindings
1464 *
1465 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1466 * other SEQPACKET protocols that take time to connect() as it doesn't
1467 * include the -EINPROGRESS status for such sockets.
1468 */
1469
89bddce5
SH
1470asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
1471 int addrlen)
1da177e4
LT
1472{
1473 struct socket *sock;
1474 char address[MAX_SOCK_ADDR];
6cb153ca 1475 int err, fput_needed;
1da177e4 1476
6cb153ca 1477 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1478 if (!sock)
1479 goto out;
1480 err = move_addr_to_kernel(uservaddr, addrlen, address);
1481 if (err < 0)
1482 goto out_put;
1483
89bddce5
SH
1484 err =
1485 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4
LT
1486 if (err)
1487 goto out_put;
1488
89bddce5 1489 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
1da177e4
LT
1490 sock->file->f_flags);
1491out_put:
6cb153ca 1492 fput_light(sock->file, fput_needed);
1da177e4
LT
1493out:
1494 return err;
1495}
1496
1497/*
1498 * Get the local address ('name') of a socket object. Move the obtained
1499 * name to user space.
1500 */
1501
89bddce5
SH
1502asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1503 int __user *usockaddr_len)
1da177e4
LT
1504{
1505 struct socket *sock;
1506 char address[MAX_SOCK_ADDR];
6cb153ca 1507 int len, err, fput_needed;
89bddce5 1508
6cb153ca 1509 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1510 if (!sock)
1511 goto out;
1512
1513 err = security_socket_getsockname(sock);
1514 if (err)
1515 goto out_put;
1516
1517 err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
1518 if (err)
1519 goto out_put;
1520 err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
1521
1522out_put:
6cb153ca 1523 fput_light(sock->file, fput_needed);
1da177e4
LT
1524out:
1525 return err;
1526}
1527
1528/*
1529 * Get the remote address ('name') of a socket object. Move the obtained
1530 * name to user space.
1531 */
1532
89bddce5
SH
1533asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1534 int __user *usockaddr_len)
1da177e4
LT
1535{
1536 struct socket *sock;
1537 char address[MAX_SOCK_ADDR];
6cb153ca 1538 int len, err, fput_needed;
1da177e4 1539
89bddce5
SH
1540 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1541 if (sock != NULL) {
1da177e4
LT
1542 err = security_socket_getpeername(sock);
1543 if (err) {
6cb153ca 1544 fput_light(sock->file, fput_needed);
1da177e4
LT
1545 return err;
1546 }
1547
89bddce5
SH
1548 err =
1549 sock->ops->getname(sock, (struct sockaddr *)address, &len,
1550 1);
1da177e4 1551 if (!err)
89bddce5
SH
1552 err = move_addr_to_user(address, len, usockaddr,
1553 usockaddr_len);
6cb153ca 1554 fput_light(sock->file, fput_needed);
1da177e4
LT
1555 }
1556 return err;
1557}
1558
1559/*
1560 * Send a datagram to a given address. We move the address into kernel
1561 * space and check the user space data area is readable before invoking
1562 * the protocol.
1563 */
1564
89bddce5
SH
1565asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1566 unsigned flags, struct sockaddr __user *addr,
1567 int addr_len)
1da177e4
LT
1568{
1569 struct socket *sock;
1570 char address[MAX_SOCK_ADDR];
1571 int err;
1572 struct msghdr msg;
1573 struct iovec iov;
6cb153ca
BL
1574 int fput_needed;
1575 struct file *sock_file;
1576
1577 sock_file = fget_light(fd, &fput_needed);
4387ff75 1578 err = -EBADF;
6cb153ca 1579 if (!sock_file)
4387ff75 1580 goto out;
6cb153ca
BL
1581
1582 sock = sock_from_file(sock_file, &err);
1da177e4 1583 if (!sock)
6cb153ca 1584 goto out_put;
89bddce5
SH
1585 iov.iov_base = buff;
1586 iov.iov_len = len;
1587 msg.msg_name = NULL;
1588 msg.msg_iov = &iov;
1589 msg.msg_iovlen = 1;
1590 msg.msg_control = NULL;
1591 msg.msg_controllen = 0;
1592 msg.msg_namelen = 0;
6cb153ca 1593 if (addr) {
1da177e4
LT
1594 err = move_addr_to_kernel(addr, addr_len, address);
1595 if (err < 0)
1596 goto out_put;
89bddce5
SH
1597 msg.msg_name = address;
1598 msg.msg_namelen = addr_len;
1da177e4
LT
1599 }
1600 if (sock->file->f_flags & O_NONBLOCK)
1601 flags |= MSG_DONTWAIT;
1602 msg.msg_flags = flags;
1603 err = sock_sendmsg(sock, &msg, len);
1604
89bddce5 1605out_put:
6cb153ca 1606 fput_light(sock_file, fput_needed);
4387ff75 1607out:
1da177e4
LT
1608 return err;
1609}
1610
1611/*
89bddce5 1612 * Send a datagram down a socket.
1da177e4
LT
1613 */
1614
89bddce5 1615asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
1da177e4
LT
1616{
1617 return sys_sendto(fd, buff, len, flags, NULL, 0);
1618}
1619
1620/*
89bddce5 1621 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1622 * sender. We verify the buffers are writable and if needed move the
1623 * sender address from kernel to user space.
1624 */
1625
89bddce5
SH
1626asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1627 unsigned flags, struct sockaddr __user *addr,
1628 int __user *addr_len)
1da177e4
LT
1629{
1630 struct socket *sock;
1631 struct iovec iov;
1632 struct msghdr msg;
1633 char address[MAX_SOCK_ADDR];
89bddce5 1634 int err, err2;
6cb153ca
BL
1635 struct file *sock_file;
1636 int fput_needed;
1637
1638 sock_file = fget_light(fd, &fput_needed);
4387ff75 1639 err = -EBADF;
6cb153ca 1640 if (!sock_file)
4387ff75 1641 goto out;
1da177e4 1642
6cb153ca 1643 sock = sock_from_file(sock_file, &err);
1da177e4 1644 if (!sock)
4387ff75 1645 goto out_put;
1da177e4 1646
89bddce5
SH
1647 msg.msg_control = NULL;
1648 msg.msg_controllen = 0;
1649 msg.msg_iovlen = 1;
1650 msg.msg_iov = &iov;
1651 iov.iov_len = size;
1652 iov.iov_base = ubuf;
1653 msg.msg_name = address;
1654 msg.msg_namelen = MAX_SOCK_ADDR;
1da177e4
LT
1655 if (sock->file->f_flags & O_NONBLOCK)
1656 flags |= MSG_DONTWAIT;
89bddce5 1657 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1658
89bddce5
SH
1659 if (err >= 0 && addr != NULL) {
1660 err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1661 if (err2 < 0)
1662 err = err2;
1da177e4 1663 }
4387ff75 1664out_put:
6cb153ca 1665 fput_light(sock_file, fput_needed);
4387ff75 1666out:
1da177e4
LT
1667 return err;
1668}
1669
1670/*
89bddce5 1671 * Receive a datagram from a socket.
1da177e4
LT
1672 */
1673
89bddce5
SH
1674asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1675 unsigned flags)
1da177e4
LT
1676{
1677 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1678}
1679
1680/*
1681 * Set a socket option. Because we don't know the option lengths we have
1682 * to pass the user mode parameter for the protocols to sort out.
1683 */
1684
89bddce5
SH
1685asmlinkage long sys_setsockopt(int fd, int level, int optname,
1686 char __user *optval, int optlen)
1da177e4 1687{
6cb153ca 1688 int err, fput_needed;
1da177e4
LT
1689 struct socket *sock;
1690
1691 if (optlen < 0)
1692 return -EINVAL;
89bddce5
SH
1693
1694 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1695 if (sock != NULL) {
1696 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1697 if (err)
1698 goto out_put;
1da177e4
LT
1699
1700 if (level == SOL_SOCKET)
89bddce5
SH
1701 err =
1702 sock_setsockopt(sock, level, optname, optval,
1703 optlen);
1da177e4 1704 else
89bddce5
SH
1705 err =
1706 sock->ops->setsockopt(sock, level, optname, optval,
1707 optlen);
6cb153ca
BL
1708out_put:
1709 fput_light(sock->file, fput_needed);
1da177e4
LT
1710 }
1711 return err;
1712}
1713
1714/*
1715 * Get a socket option. Because we don't know the option lengths we have
1716 * to pass a user mode parameter for the protocols to sort out.
1717 */
1718
89bddce5
SH
1719asmlinkage long sys_getsockopt(int fd, int level, int optname,
1720 char __user *optval, int __user *optlen)
1da177e4 1721{
6cb153ca 1722 int err, fput_needed;
1da177e4
LT
1723 struct socket *sock;
1724
89bddce5
SH
1725 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1726 if (sock != NULL) {
6cb153ca
BL
1727 err = security_socket_getsockopt(sock, level, optname);
1728 if (err)
1729 goto out_put;
1da177e4
LT
1730
1731 if (level == SOL_SOCKET)
89bddce5
SH
1732 err =
1733 sock_getsockopt(sock, level, optname, optval,
1734 optlen);
1da177e4 1735 else
89bddce5
SH
1736 err =
1737 sock->ops->getsockopt(sock, level, optname, optval,
1738 optlen);
6cb153ca
BL
1739out_put:
1740 fput_light(sock->file, fput_needed);
1da177e4
LT
1741 }
1742 return err;
1743}
1744
1da177e4
LT
1745/*
1746 * Shutdown a socket.
1747 */
1748
1749asmlinkage long sys_shutdown(int fd, int how)
1750{
6cb153ca 1751 int err, fput_needed;
1da177e4
LT
1752 struct socket *sock;
1753
89bddce5
SH
1754 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1755 if (sock != NULL) {
1da177e4 1756 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1757 if (!err)
1758 err = sock->ops->shutdown(sock, how);
1759 fput_light(sock->file, fput_needed);
1da177e4
LT
1760 }
1761 return err;
1762}
1763
89bddce5 1764/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1765 * fields which are the same type (int / unsigned) on our platforms.
1766 */
1767#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1768#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1769#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1770
1da177e4
LT
1771/*
1772 * BSD sendmsg interface
1773 */
1774
1775asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1776{
89bddce5
SH
1777 struct compat_msghdr __user *msg_compat =
1778 (struct compat_msghdr __user *)msg;
1da177e4
LT
1779 struct socket *sock;
1780 char address[MAX_SOCK_ADDR];
1781 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1782 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1783 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1784 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1785 unsigned char *ctl_buf = ctl;
1786 struct msghdr msg_sys;
1787 int err, ctl_len, iov_size, total_len;
6cb153ca 1788 int fput_needed;
89bddce5 1789
1da177e4
LT
1790 err = -EFAULT;
1791 if (MSG_CMSG_COMPAT & flags) {
1792 if (get_compat_msghdr(&msg_sys, msg_compat))
1793 return -EFAULT;
89bddce5
SH
1794 }
1795 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1796 return -EFAULT;
1797
6cb153ca 1798 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1799 if (!sock)
1da177e4
LT
1800 goto out;
1801
1802 /* do not move before msg_sys is valid */
1803 err = -EMSGSIZE;
1804 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1805 goto out_put;
1806
89bddce5 1807 /* Check whether to allocate the iovec area */
1da177e4
LT
1808 err = -ENOMEM;
1809 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1810 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1811 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1812 if (!iov)
1813 goto out_put;
1814 }
1815
1816 /* This will also move the address data into kernel space */
1817 if (MSG_CMSG_COMPAT & flags) {
1818 err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
1819 } else
1820 err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
89bddce5 1821 if (err < 0)
1da177e4
LT
1822 goto out_freeiov;
1823 total_len = err;
1824
1825 err = -ENOBUFS;
1826
1827 if (msg_sys.msg_controllen > INT_MAX)
1828 goto out_freeiov;
89bddce5 1829 ctl_len = msg_sys.msg_controllen;
1da177e4 1830 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1831 err =
1832 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1833 sizeof(ctl));
1da177e4
LT
1834 if (err)
1835 goto out_freeiov;
1836 ctl_buf = msg_sys.msg_control;
8920e8f9 1837 ctl_len = msg_sys.msg_controllen;
1da177e4 1838 } else if (ctl_len) {
89bddce5 1839 if (ctl_len > sizeof(ctl)) {
1da177e4 1840 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1841 if (ctl_buf == NULL)
1da177e4
LT
1842 goto out_freeiov;
1843 }
1844 err = -EFAULT;
1845 /*
1846 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1847 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1848 * checking falls down on this.
1849 */
89bddce5
SH
1850 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1851 ctl_len))
1da177e4
LT
1852 goto out_freectl;
1853 msg_sys.msg_control = ctl_buf;
1854 }
1855 msg_sys.msg_flags = flags;
1856
1857 if (sock->file->f_flags & O_NONBLOCK)
1858 msg_sys.msg_flags |= MSG_DONTWAIT;
1859 err = sock_sendmsg(sock, &msg_sys, total_len);
1860
1861out_freectl:
89bddce5 1862 if (ctl_buf != ctl)
1da177e4
LT
1863 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1864out_freeiov:
1865 if (iov != iovstack)
1866 sock_kfree_s(sock->sk, iov, iov_size);
1867out_put:
6cb153ca 1868 fput_light(sock->file, fput_needed);
89bddce5 1869out:
1da177e4
LT
1870 return err;
1871}
1872
1873/*
1874 * BSD recvmsg interface
1875 */
1876
89bddce5
SH
1877asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
1878 unsigned int flags)
1da177e4 1879{
89bddce5
SH
1880 struct compat_msghdr __user *msg_compat =
1881 (struct compat_msghdr __user *)msg;
1da177e4
LT
1882 struct socket *sock;
1883 struct iovec iovstack[UIO_FASTIOV];
89bddce5 1884 struct iovec *iov = iovstack;
1da177e4
LT
1885 struct msghdr msg_sys;
1886 unsigned long cmsg_ptr;
1887 int err, iov_size, total_len, len;
6cb153ca 1888 int fput_needed;
1da177e4
LT
1889
1890 /* kernel mode address */
1891 char addr[MAX_SOCK_ADDR];
1892
1893 /* user mode address pointers */
1894 struct sockaddr __user *uaddr;
1895 int __user *uaddr_len;
89bddce5 1896
1da177e4
LT
1897 if (MSG_CMSG_COMPAT & flags) {
1898 if (get_compat_msghdr(&msg_sys, msg_compat))
1899 return -EFAULT;
89bddce5
SH
1900 }
1901 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1902 return -EFAULT;
1da177e4 1903
6cb153ca 1904 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1905 if (!sock)
1906 goto out;
1907
1908 err = -EMSGSIZE;
1909 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1910 goto out_put;
89bddce5
SH
1911
1912 /* Check whether to allocate the iovec area */
1da177e4
LT
1913 err = -ENOMEM;
1914 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1915 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1916 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1917 if (!iov)
1918 goto out_put;
1919 }
1920
1921 /*
89bddce5
SH
1922 * Save the user-mode address (verify_iovec will change the
1923 * kernel msghdr to use the kernel address space)
1da177e4 1924 */
89bddce5
SH
1925
1926 uaddr = (void __user *)msg_sys.msg_name;
1da177e4
LT
1927 uaddr_len = COMPAT_NAMELEN(msg);
1928 if (MSG_CMSG_COMPAT & flags) {
1929 err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1930 } else
1931 err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1932 if (err < 0)
1933 goto out_freeiov;
89bddce5 1934 total_len = err;
1da177e4
LT
1935
1936 cmsg_ptr = (unsigned long)msg_sys.msg_control;
1937 msg_sys.msg_flags = 0;
1938 if (MSG_CMSG_COMPAT & flags)
1939 msg_sys.msg_flags = MSG_CMSG_COMPAT;
89bddce5 1940
1da177e4
LT
1941 if (sock->file->f_flags & O_NONBLOCK)
1942 flags |= MSG_DONTWAIT;
1943 err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1944 if (err < 0)
1945 goto out_freeiov;
1946 len = err;
1947
1948 if (uaddr != NULL) {
89bddce5
SH
1949 err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
1950 uaddr_len);
1da177e4
LT
1951 if (err < 0)
1952 goto out_freeiov;
1953 }
37f7f421
DM
1954 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
1955 COMPAT_FLAGS(msg));
1da177e4
LT
1956 if (err)
1957 goto out_freeiov;
1958 if (MSG_CMSG_COMPAT & flags)
89bddce5 1959 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1960 &msg_compat->msg_controllen);
1961 else
89bddce5 1962 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1963 &msg->msg_controllen);
1964 if (err)
1965 goto out_freeiov;
1966 err = len;
1967
1968out_freeiov:
1969 if (iov != iovstack)
1970 sock_kfree_s(sock->sk, iov, iov_size);
1971out_put:
6cb153ca 1972 fput_light(sock->file, fput_needed);
1da177e4
LT
1973out:
1974 return err;
1975}
1976
1977#ifdef __ARCH_WANT_SYS_SOCKETCALL
1978
1979/* Argument list sizes for sys_socketcall */
1980#define AL(x) ((x) * sizeof(unsigned long))
89bddce5
SH
1981static const unsigned char nargs[18]={
1982 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1983 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1984 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
1985};
1986
1da177e4
LT
1987#undef AL
1988
1989/*
89bddce5 1990 * System call vectors.
1da177e4
LT
1991 *
1992 * Argument checking cleaned up. Saved 20% in size.
1993 * This function doesn't need to set the kernel lock because
89bddce5 1994 * it is set by the callees.
1da177e4
LT
1995 */
1996
1997asmlinkage long sys_socketcall(int call, unsigned long __user *args)
1998{
1999 unsigned long a[6];
89bddce5 2000 unsigned long a0, a1;
1da177e4
LT
2001 int err;
2002
89bddce5 2003 if (call < 1 || call > SYS_RECVMSG)
1da177e4
LT
2004 return -EINVAL;
2005
2006 /* copy_from_user should be SMP safe. */
2007 if (copy_from_user(a, args, nargs[call]))
2008 return -EFAULT;
3ec3b2fb 2009
89bddce5 2010 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb
DW
2011 if (err)
2012 return err;
2013
89bddce5
SH
2014 a0 = a[0];
2015 a1 = a[1];
2016
2017 switch (call) {
2018 case SYS_SOCKET:
2019 err = sys_socket(a0, a1, a[2]);
2020 break;
2021 case SYS_BIND:
2022 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2023 break;
2024 case SYS_CONNECT:
2025 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2026 break;
2027 case SYS_LISTEN:
2028 err = sys_listen(a0, a1);
2029 break;
2030 case SYS_ACCEPT:
2031 err =
2032 sys_accept(a0, (struct sockaddr __user *)a1,
2033 (int __user *)a[2]);
2034 break;
2035 case SYS_GETSOCKNAME:
2036 err =
2037 sys_getsockname(a0, (struct sockaddr __user *)a1,
2038 (int __user *)a[2]);
2039 break;
2040 case SYS_GETPEERNAME:
2041 err =
2042 sys_getpeername(a0, (struct sockaddr __user *)a1,
2043 (int __user *)a[2]);
2044 break;
2045 case SYS_SOCKETPAIR:
2046 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2047 break;
2048 case SYS_SEND:
2049 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2050 break;
2051 case SYS_SENDTO:
2052 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2053 (struct sockaddr __user *)a[4], a[5]);
2054 break;
2055 case SYS_RECV:
2056 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2057 break;
2058 case SYS_RECVFROM:
2059 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2060 (struct sockaddr __user *)a[4],
2061 (int __user *)a[5]);
2062 break;
2063 case SYS_SHUTDOWN:
2064 err = sys_shutdown(a0, a1);
2065 break;
2066 case SYS_SETSOCKOPT:
2067 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2068 break;
2069 case SYS_GETSOCKOPT:
2070 err =
2071 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2072 (int __user *)a[4]);
2073 break;
2074 case SYS_SENDMSG:
2075 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2076 break;
2077 case SYS_RECVMSG:
2078 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2079 break;
2080 default:
2081 err = -EINVAL;
2082 break;
1da177e4
LT
2083 }
2084 return err;
2085}
2086
89bddce5 2087#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2088
55737fda
SH
2089/**
2090 * sock_register - add a socket protocol handler
2091 * @ops: description of protocol
2092 *
1da177e4
LT
2093 * This function is called by a protocol handler that wants to
2094 * advertise its address family, and have it linked into the
55737fda
SH
2095 * socket interface. The value ops->family coresponds to the
2096 * socket system call protocol family.
1da177e4 2097 */
f0fd27d4 2098int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2099{
2100 int err;
2101
2102 if (ops->family >= NPROTO) {
89bddce5
SH
2103 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2104 NPROTO);
1da177e4
LT
2105 return -ENOBUFS;
2106 }
55737fda
SH
2107
2108 spin_lock(&net_family_lock);
2109 if (net_families[ops->family])
2110 err = -EEXIST;
2111 else {
89bddce5 2112 net_families[ops->family] = ops;
1da177e4
LT
2113 err = 0;
2114 }
55737fda
SH
2115 spin_unlock(&net_family_lock);
2116
89bddce5 2117 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2118 return err;
2119}
2120
55737fda
SH
2121/**
2122 * sock_unregister - remove a protocol handler
2123 * @family: protocol family to remove
2124 *
1da177e4
LT
2125 * This function is called by a protocol handler that wants to
2126 * remove its address family, and have it unlinked from the
55737fda
SH
2127 * new socket creation.
2128 *
2129 * If protocol handler is a module, then it can use module reference
2130 * counts to protect against new references. If protocol handler is not
2131 * a module then it needs to provide its own protection in
2132 * the ops->create routine.
1da177e4 2133 */
f0fd27d4 2134void sock_unregister(int family)
1da177e4 2135{
f0fd27d4 2136 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2137
55737fda 2138 spin_lock(&net_family_lock);
89bddce5 2139 net_families[family] = NULL;
55737fda
SH
2140 spin_unlock(&net_family_lock);
2141
2142 synchronize_rcu();
2143
89bddce5 2144 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2145}
2146
77d76ea3 2147static int __init sock_init(void)
1da177e4
LT
2148{
2149 /*
89bddce5 2150 * Initialize sock SLAB cache.
1da177e4 2151 */
89bddce5 2152
1da177e4
LT
2153 sk_init();
2154
1da177e4 2155 /*
89bddce5 2156 * Initialize skbuff SLAB cache
1da177e4
LT
2157 */
2158 skb_init();
1da177e4
LT
2159
2160 /*
89bddce5 2161 * Initialize the protocols module.
1da177e4
LT
2162 */
2163
2164 init_inodecache();
2165 register_filesystem(&sock_fs_type);
2166 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2167
2168 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2169 */
2170
2171#ifdef CONFIG_NETFILTER
2172 netfilter_init();
2173#endif
cbeb321a
DM
2174
2175 return 0;
1da177e4
LT
2176}
2177
77d76ea3
AK
2178core_initcall(sock_init); /* early initcall */
2179
1da177e4
LT
2180#ifdef CONFIG_PROC_FS
2181void socket_seq_show(struct seq_file *seq)
2182{
2183 int cpu;
2184 int counter = 0;
2185
6f912042 2186 for_each_possible_cpu(cpu)
89bddce5 2187 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2188
2189 /* It can be negative, by the way. 8) */
2190 if (counter < 0)
2191 counter = 0;
2192
2193 seq_printf(seq, "sockets: used %d\n", counter);
2194}
89bddce5 2195#endif /* CONFIG_PROC_FS */
1da177e4 2196
89bbfc95
SP
2197#ifdef CONFIG_COMPAT
2198static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 2199 unsigned long arg)
89bbfc95
SP
2200{
2201 struct socket *sock = file->private_data;
2202 int ret = -ENOIOCTLCMD;
2203
2204 if (sock->ops->compat_ioctl)
2205 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2206
2207 return ret;
2208}
2209#endif
2210
ac5a488e
SS
2211int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2212{
2213 return sock->ops->bind(sock, addr, addrlen);
2214}
2215
2216int kernel_listen(struct socket *sock, int backlog)
2217{
2218 return sock->ops->listen(sock, backlog);
2219}
2220
2221int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2222{
2223 struct sock *sk = sock->sk;
2224 int err;
2225
2226 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2227 newsock);
2228 if (err < 0)
2229 goto done;
2230
2231 err = sock->ops->accept(sock, *newsock, flags);
2232 if (err < 0) {
2233 sock_release(*newsock);
2234 goto done;
2235 }
2236
2237 (*newsock)->ops = sock->ops;
2238
2239done:
2240 return err;
2241}
2242
2243int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 2244 int flags)
ac5a488e
SS
2245{
2246 return sock->ops->connect(sock, addr, addrlen, flags);
2247}
2248
2249int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2250 int *addrlen)
2251{
2252 return sock->ops->getname(sock, addr, addrlen, 0);
2253}
2254
2255int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2256 int *addrlen)
2257{
2258 return sock->ops->getname(sock, addr, addrlen, 1);
2259}
2260
2261int kernel_getsockopt(struct socket *sock, int level, int optname,
2262 char *optval, int *optlen)
2263{
2264 mm_segment_t oldfs = get_fs();
2265 int err;
2266
2267 set_fs(KERNEL_DS);
2268 if (level == SOL_SOCKET)
2269 err = sock_getsockopt(sock, level, optname, optval, optlen);
2270 else
2271 err = sock->ops->getsockopt(sock, level, optname, optval,
2272 optlen);
2273 set_fs(oldfs);
2274 return err;
2275}
2276
2277int kernel_setsockopt(struct socket *sock, int level, int optname,
2278 char *optval, int optlen)
2279{
2280 mm_segment_t oldfs = get_fs();
2281 int err;
2282
2283 set_fs(KERNEL_DS);
2284 if (level == SOL_SOCKET)
2285 err = sock_setsockopt(sock, level, optname, optval, optlen);
2286 else
2287 err = sock->ops->setsockopt(sock, level, optname, optval,
2288 optlen);
2289 set_fs(oldfs);
2290 return err;
2291}
2292
2293int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2294 size_t size, int flags)
2295{
2296 if (sock->ops->sendpage)
2297 return sock->ops->sendpage(sock, page, offset, size, flags);
2298
2299 return sock_no_sendpage(sock, page, offset, size, flags);
2300}
2301
2302int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2303{
2304 mm_segment_t oldfs = get_fs();
2305 int err;
2306
2307 set_fs(KERNEL_DS);
2308 err = sock->ops->ioctl(sock, cmd, arg);
2309 set_fs(oldfs);
2310
2311 return err;
2312}
2313
1da177e4
LT
2314/* ABI emulation layers need these two */
2315EXPORT_SYMBOL(move_addr_to_kernel);
2316EXPORT_SYMBOL(move_addr_to_user);
2317EXPORT_SYMBOL(sock_create);
2318EXPORT_SYMBOL(sock_create_kern);
2319EXPORT_SYMBOL(sock_create_lite);
2320EXPORT_SYMBOL(sock_map_fd);
2321EXPORT_SYMBOL(sock_recvmsg);
2322EXPORT_SYMBOL(sock_register);
2323EXPORT_SYMBOL(sock_release);
2324EXPORT_SYMBOL(sock_sendmsg);
2325EXPORT_SYMBOL(sock_unregister);
2326EXPORT_SYMBOL(sock_wake_async);
2327EXPORT_SYMBOL(sockfd_lookup);
2328EXPORT_SYMBOL(kernel_sendmsg);
2329EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
2330EXPORT_SYMBOL(kernel_bind);
2331EXPORT_SYMBOL(kernel_listen);
2332EXPORT_SYMBOL(kernel_accept);
2333EXPORT_SYMBOL(kernel_connect);
2334EXPORT_SYMBOL(kernel_getsockname);
2335EXPORT_SYMBOL(kernel_getpeername);
2336EXPORT_SYMBOL(kernel_getsockopt);
2337EXPORT_SYMBOL(kernel_setsockopt);
2338EXPORT_SYMBOL(kernel_sendpage);
2339EXPORT_SYMBOL(kernel_sock_ioctl);