[NET]: Adding SO_TIMESTAMPNS / SCM_TIMESTAMPNS support
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
55737fda 66#include <linux/rcupdate.h>
1da177e4
LT
67#include <linux/netdevice.h>
68#include <linux/proc_fs.h>
69#include <linux/seq_file.h>
4a3e2f71 70#include <linux/mutex.h>
1da177e4
LT
71#include <linux/wanrouter.h>
72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
1da177e4
LT
75#include <linux/init.h>
76#include <linux/poll.h>
77#include <linux/cache.h>
78#include <linux/module.h>
79#include <linux/highmem.h>
1da177e4
LT
80#include <linux/mount.h>
81#include <linux/security.h>
82#include <linux/syscalls.h>
83#include <linux/compat.h>
84#include <linux/kmod.h>
3ec3b2fb 85#include <linux/audit.h>
d86b5e0e 86#include <linux/wireless.h>
1da177e4
LT
87
88#include <asm/uaccess.h>
89#include <asm/unistd.h>
90
91#include <net/compat.h>
92
93#include <net/sock.h>
94#include <linux/netfilter.h>
95
96static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
97static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
98 unsigned long nr_segs, loff_t pos);
99static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
100 unsigned long nr_segs, loff_t pos);
89bddce5 101static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
102
103static int sock_close(struct inode *inode, struct file *file);
104static unsigned int sock_poll(struct file *file,
105 struct poll_table_struct *wait);
89bddce5 106static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
107#ifdef CONFIG_COMPAT
108static long compat_sock_ioctl(struct file *file,
89bddce5 109 unsigned int cmd, unsigned long arg);
89bbfc95 110#endif
1da177e4 111static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
112static ssize_t sock_sendpage(struct file *file, struct page *page,
113 int offset, size_t size, loff_t *ppos, int more);
114
1da177e4
LT
115/*
116 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
117 * in the operation structures but are done directly via the socketcall() multiplexor.
118 */
119
da7071d7 120static const struct file_operations socket_file_ops = {
1da177e4
LT
121 .owner = THIS_MODULE,
122 .llseek = no_llseek,
123 .aio_read = sock_aio_read,
124 .aio_write = sock_aio_write,
125 .poll = sock_poll,
126 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
127#ifdef CONFIG_COMPAT
128 .compat_ioctl = compat_sock_ioctl,
129#endif
1da177e4
LT
130 .mmap = sock_mmap,
131 .open = sock_no_open, /* special open code to disallow open via /proc */
132 .release = sock_close,
133 .fasync = sock_fasync,
5274f052
JA
134 .sendpage = sock_sendpage,
135 .splice_write = generic_splice_sendpage,
1da177e4
LT
136};
137
138/*
139 * The protocol list. Each protocol is registered in here.
140 */
141
1da177e4 142static DEFINE_SPINLOCK(net_family_lock);
f0fd27d4 143static const struct net_proto_family *net_families[NPROTO] __read_mostly;
1da177e4 144
1da177e4
LT
145/*
146 * Statistics counters of the socket lists
147 */
148
149static DEFINE_PER_CPU(int, sockets_in_use) = 0;
150
151/*
89bddce5
SH
152 * Support routines.
153 * Move socket addresses back and forth across the kernel/user
154 * divide and look after the messy bits.
1da177e4
LT
155 */
156
89bddce5 157#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
158 16 for IP, 16 for IPX,
159 24 for IPv6,
89bddce5 160 about 80 for AX.25
1da177e4
LT
161 must be at least one bigger than
162 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 163 :unix_mkname()).
1da177e4 164 */
89bddce5 165
1da177e4
LT
166/**
167 * move_addr_to_kernel - copy a socket address into kernel space
168 * @uaddr: Address in user space
169 * @kaddr: Address in kernel space
170 * @ulen: Length in user space
171 *
172 * The address is copied into kernel space. If the provided address is
173 * too long an error code of -EINVAL is returned. If the copy gives
174 * invalid addresses -EFAULT is returned. On a success 0 is returned.
175 */
176
177int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
178{
89bddce5 179 if (ulen < 0 || ulen > MAX_SOCK_ADDR)
1da177e4 180 return -EINVAL;
89bddce5 181 if (ulen == 0)
1da177e4 182 return 0;
89bddce5 183 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 184 return -EFAULT;
3ec3b2fb 185 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
186}
187
188/**
189 * move_addr_to_user - copy an address to user space
190 * @kaddr: kernel space address
191 * @klen: length of address in kernel
192 * @uaddr: user space address
193 * @ulen: pointer to user length field
194 *
195 * The value pointed to by ulen on entry is the buffer length available.
196 * This is overwritten with the buffer space used. -EINVAL is returned
197 * if an overlong buffer is specified or a negative buffer size. -EFAULT
198 * is returned if either the buffer or the length field are not
199 * accessible.
200 * After copying the data up to the limit the user specifies, the true
201 * length of the data is written over the length limit the user
202 * specified. Zero is returned for a success.
203 */
89bddce5
SH
204
205int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
206 int __user *ulen)
1da177e4
LT
207{
208 int err;
209 int len;
210
89bddce5
SH
211 err = get_user(len, ulen);
212 if (err)
1da177e4 213 return err;
89bddce5
SH
214 if (len > klen)
215 len = klen;
216 if (len < 0 || len > MAX_SOCK_ADDR)
1da177e4 217 return -EINVAL;
89bddce5 218 if (len) {
d6fe3945
SG
219 if (audit_sockaddr(klen, kaddr))
220 return -ENOMEM;
89bddce5 221 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
222 return -EFAULT;
223 }
224 /*
89bddce5
SH
225 * "fromlen shall refer to the value before truncation.."
226 * 1003.1g
1da177e4
LT
227 */
228 return __put_user(klen, ulen);
229}
230
231#define SOCKFS_MAGIC 0x534F434B
232
e18b890b 233static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
234
235static struct inode *sock_alloc_inode(struct super_block *sb)
236{
237 struct socket_alloc *ei;
89bddce5 238
e94b1766 239 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
240 if (!ei)
241 return NULL;
242 init_waitqueue_head(&ei->socket.wait);
89bddce5 243
1da177e4
LT
244 ei->socket.fasync_list = NULL;
245 ei->socket.state = SS_UNCONNECTED;
246 ei->socket.flags = 0;
247 ei->socket.ops = NULL;
248 ei->socket.sk = NULL;
249 ei->socket.file = NULL;
1da177e4
LT
250
251 return &ei->vfs_inode;
252}
253
254static void sock_destroy_inode(struct inode *inode)
255{
256 kmem_cache_free(sock_inode_cachep,
257 container_of(inode, struct socket_alloc, vfs_inode));
258}
259
e18b890b 260static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
1da177e4 261{
89bddce5 262 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 263
89bddce5
SH
264 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR))
265 == SLAB_CTOR_CONSTRUCTOR)
1da177e4
LT
266 inode_init_once(&ei->vfs_inode);
267}
89bddce5 268
1da177e4
LT
269static int init_inodecache(void)
270{
271 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
272 sizeof(struct socket_alloc),
273 0,
274 (SLAB_HWCACHE_ALIGN |
275 SLAB_RECLAIM_ACCOUNT |
276 SLAB_MEM_SPREAD),
277 init_once,
278 NULL);
1da177e4
LT
279 if (sock_inode_cachep == NULL)
280 return -ENOMEM;
281 return 0;
282}
283
284static struct super_operations sockfs_ops = {
285 .alloc_inode = sock_alloc_inode,
286 .destroy_inode =sock_destroy_inode,
287 .statfs = simple_statfs,
288};
289
454e2398 290static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
291 int flags, const char *dev_name, void *data,
292 struct vfsmount *mnt)
1da177e4 293{
454e2398
DH
294 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
295 mnt);
1da177e4
LT
296}
297
ba89966c 298static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
299
300static struct file_system_type sock_fs_type = {
301 .name = "sockfs",
302 .get_sb = sockfs_get_sb,
303 .kill_sb = kill_anon_super,
304};
89bddce5 305
1da177e4
LT
306static int sockfs_delete_dentry(struct dentry *dentry)
307{
304e61e6
ED
308 /*
309 * At creation time, we pretended this dentry was hashed
310 * (by clearing DCACHE_UNHASHED bit in d_flags)
311 * At delete time, we restore the truth : not hashed.
312 * (so that dput() can proceed correctly)
313 */
314 dentry->d_flags |= DCACHE_UNHASHED;
315 return 0;
1da177e4
LT
316}
317static struct dentry_operations sockfs_dentry_operations = {
89bddce5 318 .d_delete = sockfs_delete_dentry,
1da177e4
LT
319};
320
321/*
322 * Obtains the first available file descriptor and sets it up for use.
323 *
39d8c1b6
DM
324 * These functions create file structures and maps them to fd space
325 * of the current process. On success it returns file descriptor
1da177e4
LT
326 * and file struct implicitly stored in sock->file.
327 * Note that another thread may close file descriptor before we return
328 * from this function. We use the fact that now we do not refer
329 * to socket after mapping. If one day we will need it, this
330 * function will increment ref. count on file by 1.
331 *
332 * In any case returned fd MAY BE not valid!
333 * This race condition is unavoidable
334 * with shared fd spaces, we cannot solve it inside kernel,
335 * but we take care of internal coherence yet.
336 */
337
39d8c1b6 338static int sock_alloc_fd(struct file **filep)
1da177e4
LT
339{
340 int fd;
1da177e4
LT
341
342 fd = get_unused_fd();
39d8c1b6 343 if (likely(fd >= 0)) {
1da177e4
LT
344 struct file *file = get_empty_filp();
345
39d8c1b6
DM
346 *filep = file;
347 if (unlikely(!file)) {
1da177e4 348 put_unused_fd(fd);
39d8c1b6 349 return -ENFILE;
1da177e4 350 }
39d8c1b6
DM
351 } else
352 *filep = NULL;
353 return fd;
354}
1da177e4 355
39d8c1b6
DM
356static int sock_attach_fd(struct socket *sock, struct file *file)
357{
358 struct qstr this;
359 char name[32];
360
361 this.len = sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino);
362 this.name = name;
304e61e6 363 this.hash = 0;
39d8c1b6 364
3126a42c
JS
365 file->f_path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);
366 if (unlikely(!file->f_path.dentry))
39d8c1b6
DM
367 return -ENOMEM;
368
3126a42c 369 file->f_path.dentry->d_op = &sockfs_dentry_operations;
304e61e6
ED
370 /*
371 * We dont want to push this dentry into global dentry hash table.
372 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
373 * This permits a working /proc/$pid/fd/XXX on sockets
374 */
3126a42c
JS
375 file->f_path.dentry->d_flags &= ~DCACHE_UNHASHED;
376 d_instantiate(file->f_path.dentry, SOCK_INODE(sock));
377 file->f_path.mnt = mntget(sock_mnt);
378 file->f_mapping = file->f_path.dentry->d_inode->i_mapping;
39d8c1b6
DM
379
380 sock->file = file;
381 file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
382 file->f_mode = FMODE_READ | FMODE_WRITE;
383 file->f_flags = O_RDWR;
384 file->f_pos = 0;
385 file->private_data = sock;
1da177e4 386
39d8c1b6
DM
387 return 0;
388}
389
390int sock_map_fd(struct socket *sock)
391{
392 struct file *newfile;
393 int fd = sock_alloc_fd(&newfile);
394
395 if (likely(fd >= 0)) {
396 int err = sock_attach_fd(sock, newfile);
397
398 if (unlikely(err < 0)) {
399 put_filp(newfile);
1da177e4 400 put_unused_fd(fd);
39d8c1b6 401 return err;
1da177e4 402 }
39d8c1b6 403 fd_install(fd, newfile);
1da177e4 404 }
1da177e4
LT
405 return fd;
406}
407
6cb153ca
BL
408static struct socket *sock_from_file(struct file *file, int *err)
409{
6cb153ca
BL
410 if (file->f_op == &socket_file_ops)
411 return file->private_data; /* set in sock_map_fd */
412
23bb80d2
ED
413 *err = -ENOTSOCK;
414 return NULL;
6cb153ca
BL
415}
416
1da177e4
LT
417/**
418 * sockfd_lookup - Go from a file number to its socket slot
419 * @fd: file handle
420 * @err: pointer to an error code return
421 *
422 * The file handle passed in is locked and the socket it is bound
423 * too is returned. If an error occurs the err pointer is overwritten
424 * with a negative errno code and NULL is returned. The function checks
425 * for both invalid handles and passing a handle which is not a socket.
426 *
427 * On a success the socket object pointer is returned.
428 */
429
430struct socket *sockfd_lookup(int fd, int *err)
431{
432 struct file *file;
1da177e4
LT
433 struct socket *sock;
434
89bddce5
SH
435 file = fget(fd);
436 if (!file) {
1da177e4
LT
437 *err = -EBADF;
438 return NULL;
439 }
89bddce5 440
6cb153ca
BL
441 sock = sock_from_file(file, err);
442 if (!sock)
1da177e4 443 fput(file);
6cb153ca
BL
444 return sock;
445}
1da177e4 446
6cb153ca
BL
447static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
448{
449 struct file *file;
450 struct socket *sock;
451
3672558c 452 *err = -EBADF;
6cb153ca
BL
453 file = fget_light(fd, fput_needed);
454 if (file) {
455 sock = sock_from_file(file, err);
456 if (sock)
457 return sock;
458 fput_light(file, *fput_needed);
1da177e4 459 }
6cb153ca 460 return NULL;
1da177e4
LT
461}
462
463/**
464 * sock_alloc - allocate a socket
89bddce5 465 *
1da177e4
LT
466 * Allocate a new inode and socket object. The two are bound together
467 * and initialised. The socket is then returned. If we are out of inodes
468 * NULL is returned.
469 */
470
471static struct socket *sock_alloc(void)
472{
89bddce5
SH
473 struct inode *inode;
474 struct socket *sock;
1da177e4
LT
475
476 inode = new_inode(sock_mnt->mnt_sb);
477 if (!inode)
478 return NULL;
479
480 sock = SOCKET_I(inode);
481
89bddce5 482 inode->i_mode = S_IFSOCK | S_IRWXUGO;
1da177e4
LT
483 inode->i_uid = current->fsuid;
484 inode->i_gid = current->fsgid;
485
486 get_cpu_var(sockets_in_use)++;
487 put_cpu_var(sockets_in_use);
488 return sock;
489}
490
491/*
492 * In theory you can't get an open on this inode, but /proc provides
493 * a back door. Remember to keep it shut otherwise you'll let the
494 * creepy crawlies in.
495 */
89bddce5 496
1da177e4
LT
497static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
498{
499 return -ENXIO;
500}
501
4b6f5d20 502const struct file_operations bad_sock_fops = {
1da177e4
LT
503 .owner = THIS_MODULE,
504 .open = sock_no_open,
505};
506
507/**
508 * sock_release - close a socket
509 * @sock: socket to close
510 *
511 * The socket is released from the protocol stack if it has a release
512 * callback, and the inode is then released if the socket is bound to
89bddce5 513 * an inode not a file.
1da177e4 514 */
89bddce5 515
1da177e4
LT
516void sock_release(struct socket *sock)
517{
518 if (sock->ops) {
519 struct module *owner = sock->ops->owner;
520
521 sock->ops->release(sock);
522 sock->ops = NULL;
523 module_put(owner);
524 }
525
526 if (sock->fasync_list)
527 printk(KERN_ERR "sock_release: fasync list not empty!\n");
528
529 get_cpu_var(sockets_in_use)--;
530 put_cpu_var(sockets_in_use);
531 if (!sock->file) {
532 iput(SOCK_INODE(sock));
533 return;
534 }
89bddce5 535 sock->file = NULL;
1da177e4
LT
536}
537
89bddce5 538static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
539 struct msghdr *msg, size_t size)
540{
541 struct sock_iocb *si = kiocb_to_siocb(iocb);
542 int err;
543
544 si->sock = sock;
545 si->scm = NULL;
546 si->msg = msg;
547 si->size = size;
548
549 err = security_socket_sendmsg(sock, msg, size);
550 if (err)
551 return err;
552
553 return sock->ops->sendmsg(iocb, sock, msg, size);
554}
555
556int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
557{
558 struct kiocb iocb;
559 struct sock_iocb siocb;
560 int ret;
561
562 init_sync_kiocb(&iocb, NULL);
563 iocb.private = &siocb;
564 ret = __sock_sendmsg(&iocb, sock, msg, size);
565 if (-EIOCBQUEUED == ret)
566 ret = wait_on_sync_kiocb(&iocb);
567 return ret;
568}
569
570int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
571 struct kvec *vec, size_t num, size_t size)
572{
573 mm_segment_t oldfs = get_fs();
574 int result;
575
576 set_fs(KERNEL_DS);
577 /*
578 * the following is safe, since for compiler definitions of kvec and
579 * iovec are identical, yielding the same in-core layout and alignment
580 */
89bddce5 581 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
582 msg->msg_iovlen = num;
583 result = sock_sendmsg(sock, msg, size);
584 set_fs(oldfs);
585 return result;
586}
587
92f37fd2
ED
588/*
589 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
590 */
591void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
592 struct sk_buff *skb)
593{
594 ktime_t kt = skb->tstamp;
595
596 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
597 struct timeval tv;
598 /* Race occurred between timestamp enabling and packet
599 receiving. Fill in the current time for now. */
600 if (kt.tv64 == 0)
601 kt = ktime_get_real();
602 skb->tstamp = kt;
603 tv = ktime_to_timeval(kt);
604 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
605 } else {
606 struct timespec ts;
607 /* Race occurred between timestamp enabling and packet
608 receiving. Fill in the current time for now. */
609 if (kt.tv64 == 0)
610 kt = ktime_get_real();
611 skb->tstamp = kt;
612 ts = ktime_to_timespec(kt);
613 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
614 }
615}
616
89bddce5 617static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
618 struct msghdr *msg, size_t size, int flags)
619{
620 int err;
621 struct sock_iocb *si = kiocb_to_siocb(iocb);
622
623 si->sock = sock;
624 si->scm = NULL;
625 si->msg = msg;
626 si->size = size;
627 si->flags = flags;
628
629 err = security_socket_recvmsg(sock, msg, size, flags);
630 if (err)
631 return err;
632
633 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
634}
635
89bddce5 636int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
637 size_t size, int flags)
638{
639 struct kiocb iocb;
640 struct sock_iocb siocb;
641 int ret;
642
89bddce5 643 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
644 iocb.private = &siocb;
645 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
646 if (-EIOCBQUEUED == ret)
647 ret = wait_on_sync_kiocb(&iocb);
648 return ret;
649}
650
89bddce5
SH
651int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
652 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
653{
654 mm_segment_t oldfs = get_fs();
655 int result;
656
657 set_fs(KERNEL_DS);
658 /*
659 * the following is safe, since for compiler definitions of kvec and
660 * iovec are identical, yielding the same in-core layout and alignment
661 */
89bddce5 662 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
663 result = sock_recvmsg(sock, msg, size, flags);
664 set_fs(oldfs);
665 return result;
666}
667
668static void sock_aio_dtor(struct kiocb *iocb)
669{
670 kfree(iocb->private);
671}
672
ce1d4d3e
CH
673static ssize_t sock_sendpage(struct file *file, struct page *page,
674 int offset, size_t size, loff_t *ppos, int more)
1da177e4 675{
1da177e4
LT
676 struct socket *sock;
677 int flags;
678
ce1d4d3e
CH
679 sock = file->private_data;
680
681 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
682 if (more)
683 flags |= MSG_MORE;
684
685 return sock->ops->sendpage(sock, page, offset, size, flags);
686}
1da177e4 687
ce1d4d3e 688static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 689 struct sock_iocb *siocb)
ce1d4d3e
CH
690{
691 if (!is_sync_kiocb(iocb)) {
692 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
693 if (!siocb)
694 return NULL;
1da177e4
LT
695 iocb->ki_dtor = sock_aio_dtor;
696 }
1da177e4 697
ce1d4d3e 698 siocb->kiocb = iocb;
ce1d4d3e
CH
699 iocb->private = siocb;
700 return siocb;
1da177e4
LT
701}
702
ce1d4d3e 703static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
704 struct file *file, const struct iovec *iov,
705 unsigned long nr_segs)
ce1d4d3e
CH
706{
707 struct socket *sock = file->private_data;
708 size_t size = 0;
709 int i;
1da177e4 710
89bddce5
SH
711 for (i = 0; i < nr_segs; i++)
712 size += iov[i].iov_len;
1da177e4 713
ce1d4d3e
CH
714 msg->msg_name = NULL;
715 msg->msg_namelen = 0;
716 msg->msg_control = NULL;
717 msg->msg_controllen = 0;
89bddce5 718 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
719 msg->msg_iovlen = nr_segs;
720 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
721
722 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
723}
724
027445c3
BP
725static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
726 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
727{
728 struct sock_iocb siocb, *x;
729
1da177e4
LT
730 if (pos != 0)
731 return -ESPIPE;
027445c3
BP
732
733 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
734 return 0;
735
027445c3
BP
736
737 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
738 if (!x)
739 return -ENOMEM;
027445c3 740 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
741}
742
ce1d4d3e 743static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
744 struct file *file, const struct iovec *iov,
745 unsigned long nr_segs)
1da177e4 746{
ce1d4d3e
CH
747 struct socket *sock = file->private_data;
748 size_t size = 0;
749 int i;
1da177e4 750
89bddce5
SH
751 for (i = 0; i < nr_segs; i++)
752 size += iov[i].iov_len;
1da177e4 753
ce1d4d3e
CH
754 msg->msg_name = NULL;
755 msg->msg_namelen = 0;
756 msg->msg_control = NULL;
757 msg->msg_controllen = 0;
89bddce5 758 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
759 msg->msg_iovlen = nr_segs;
760 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
761 if (sock->type == SOCK_SEQPACKET)
762 msg->msg_flags |= MSG_EOR;
1da177e4 763
ce1d4d3e 764 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
765}
766
027445c3
BP
767static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
768 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
769{
770 struct sock_iocb siocb, *x;
1da177e4 771
ce1d4d3e
CH
772 if (pos != 0)
773 return -ESPIPE;
027445c3
BP
774
775 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
ce1d4d3e 776 return 0;
1da177e4 777
027445c3 778 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
779 if (!x)
780 return -ENOMEM;
1da177e4 781
027445c3 782 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
783}
784
1da177e4
LT
785/*
786 * Atomic setting of ioctl hooks to avoid race
787 * with module unload.
788 */
789
4a3e2f71 790static DEFINE_MUTEX(br_ioctl_mutex);
89bddce5 791static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL;
1da177e4 792
89bddce5 793void brioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 794{
4a3e2f71 795 mutex_lock(&br_ioctl_mutex);
1da177e4 796 br_ioctl_hook = hook;
4a3e2f71 797 mutex_unlock(&br_ioctl_mutex);
1da177e4 798}
89bddce5 799
1da177e4
LT
800EXPORT_SYMBOL(brioctl_set);
801
4a3e2f71 802static DEFINE_MUTEX(vlan_ioctl_mutex);
89bddce5 803static int (*vlan_ioctl_hook) (void __user *arg);
1da177e4 804
89bddce5 805void vlan_ioctl_set(int (*hook) (void __user *))
1da177e4 806{
4a3e2f71 807 mutex_lock(&vlan_ioctl_mutex);
1da177e4 808 vlan_ioctl_hook = hook;
4a3e2f71 809 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 810}
89bddce5 811
1da177e4
LT
812EXPORT_SYMBOL(vlan_ioctl_set);
813
4a3e2f71 814static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 815static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 816
89bddce5 817void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 818{
4a3e2f71 819 mutex_lock(&dlci_ioctl_mutex);
1da177e4 820 dlci_ioctl_hook = hook;
4a3e2f71 821 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 822}
89bddce5 823
1da177e4
LT
824EXPORT_SYMBOL(dlci_ioctl_set);
825
826/*
827 * With an ioctl, arg may well be a user mode pointer, but we don't know
828 * what to do with it - that's up to the protocol still.
829 */
830
831static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
832{
833 struct socket *sock;
834 void __user *argp = (void __user *)arg;
835 int pid, err;
836
b69aee04 837 sock = file->private_data;
1da177e4
LT
838 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
839 err = dev_ioctl(cmd, argp);
840 } else
d86b5e0e 841#ifdef CONFIG_WIRELESS_EXT
1da177e4
LT
842 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
843 err = dev_ioctl(cmd, argp);
844 } else
89bddce5
SH
845#endif /* CONFIG_WIRELESS_EXT */
846 switch (cmd) {
1da177e4
LT
847 case FIOSETOWN:
848 case SIOCSPGRP:
849 err = -EFAULT;
850 if (get_user(pid, (int __user *)argp))
851 break;
852 err = f_setown(sock->file, pid, 1);
853 break;
854 case FIOGETOWN:
855 case SIOCGPGRP:
609d7fa9 856 err = put_user(f_getown(sock->file),
89bddce5 857 (int __user *)argp);
1da177e4
LT
858 break;
859 case SIOCGIFBR:
860 case SIOCSIFBR:
861 case SIOCBRADDBR:
862 case SIOCBRDELBR:
863 err = -ENOPKG;
864 if (!br_ioctl_hook)
865 request_module("bridge");
866
4a3e2f71 867 mutex_lock(&br_ioctl_mutex);
89bddce5 868 if (br_ioctl_hook)
1da177e4 869 err = br_ioctl_hook(cmd, argp);
4a3e2f71 870 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
871 break;
872 case SIOCGIFVLAN:
873 case SIOCSIFVLAN:
874 err = -ENOPKG;
875 if (!vlan_ioctl_hook)
876 request_module("8021q");
877
4a3e2f71 878 mutex_lock(&vlan_ioctl_mutex);
1da177e4
LT
879 if (vlan_ioctl_hook)
880 err = vlan_ioctl_hook(argp);
4a3e2f71 881 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 882 break;
1da177e4
LT
883 case SIOCADDDLCI:
884 case SIOCDELDLCI:
885 err = -ENOPKG;
886 if (!dlci_ioctl_hook)
887 request_module("dlci");
888
889 if (dlci_ioctl_hook) {
4a3e2f71 890 mutex_lock(&dlci_ioctl_mutex);
1da177e4 891 err = dlci_ioctl_hook(cmd, argp);
4a3e2f71 892 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
893 }
894 break;
895 default:
896 err = sock->ops->ioctl(sock, cmd, arg);
b5e5fa5e
CH
897
898 /*
899 * If this ioctl is unknown try to hand it down
900 * to the NIC driver.
901 */
902 if (err == -ENOIOCTLCMD)
903 err = dev_ioctl(cmd, argp);
1da177e4 904 break;
89bddce5 905 }
1da177e4
LT
906 return err;
907}
908
909int sock_create_lite(int family, int type, int protocol, struct socket **res)
910{
911 int err;
912 struct socket *sock = NULL;
89bddce5 913
1da177e4
LT
914 err = security_socket_create(family, type, protocol, 1);
915 if (err)
916 goto out;
917
918 sock = sock_alloc();
919 if (!sock) {
920 err = -ENOMEM;
921 goto out;
922 }
923
1da177e4 924 sock->type = type;
7420ed23
VY
925 err = security_socket_post_create(sock, family, type, protocol, 1);
926 if (err)
927 goto out_release;
928
1da177e4
LT
929out:
930 *res = sock;
931 return err;
7420ed23
VY
932out_release:
933 sock_release(sock);
934 sock = NULL;
935 goto out;
1da177e4
LT
936}
937
938/* No kernel lock held - perfect */
89bddce5 939static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
940{
941 struct socket *sock;
942
943 /*
89bddce5 944 * We can't return errors to poll, so it's either yes or no.
1da177e4 945 */
b69aee04 946 sock = file->private_data;
1da177e4
LT
947 return sock->ops->poll(file, sock, wait);
948}
949
89bddce5 950static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 951{
b69aee04 952 struct socket *sock = file->private_data;
1da177e4
LT
953
954 return sock->ops->mmap(file, sock, vma);
955}
956
20380731 957static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
958{
959 /*
89bddce5
SH
960 * It was possible the inode is NULL we were
961 * closing an unfinished socket.
1da177e4
LT
962 */
963
89bddce5 964 if (!inode) {
1da177e4
LT
965 printk(KERN_DEBUG "sock_close: NULL inode\n");
966 return 0;
967 }
968 sock_fasync(-1, filp, 0);
969 sock_release(SOCKET_I(inode));
970 return 0;
971}
972
973/*
974 * Update the socket async list
975 *
976 * Fasync_list locking strategy.
977 *
978 * 1. fasync_list is modified only under process context socket lock
979 * i.e. under semaphore.
980 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
981 * or under socket lock.
982 * 3. fasync_list can be used from softirq context, so that
983 * modification under socket lock have to be enhanced with
984 * write_lock_bh(&sk->sk_callback_lock).
985 * --ANK (990710)
986 */
987
988static int sock_fasync(int fd, struct file *filp, int on)
989{
89bddce5 990 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
991 struct socket *sock;
992 struct sock *sk;
993
89bddce5 994 if (on) {
8b3a7005 995 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 996 if (fna == NULL)
1da177e4
LT
997 return -ENOMEM;
998 }
999
b69aee04 1000 sock = filp->private_data;
1da177e4 1001
89bddce5
SH
1002 sk = sock->sk;
1003 if (sk == NULL) {
1da177e4
LT
1004 kfree(fna);
1005 return -EINVAL;
1006 }
1007
1008 lock_sock(sk);
1009
89bddce5 1010 prev = &(sock->fasync_list);
1da177e4 1011
89bddce5
SH
1012 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1013 if (fa->fa_file == filp)
1da177e4
LT
1014 break;
1015
89bddce5
SH
1016 if (on) {
1017 if (fa != NULL) {
1da177e4 1018 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1019 fa->fa_fd = fd;
1da177e4
LT
1020 write_unlock_bh(&sk->sk_callback_lock);
1021
1022 kfree(fna);
1023 goto out;
1024 }
89bddce5
SH
1025 fna->fa_file = filp;
1026 fna->fa_fd = fd;
1027 fna->magic = FASYNC_MAGIC;
1028 fna->fa_next = sock->fasync_list;
1da177e4 1029 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1030 sock->fasync_list = fna;
1da177e4 1031 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1032 } else {
1033 if (fa != NULL) {
1da177e4 1034 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1035 *prev = fa->fa_next;
1da177e4
LT
1036 write_unlock_bh(&sk->sk_callback_lock);
1037 kfree(fa);
1038 }
1039 }
1040
1041out:
1042 release_sock(sock->sk);
1043 return 0;
1044}
1045
1046/* This function may be called only under socket lock or callback_lock */
1047
1048int sock_wake_async(struct socket *sock, int how, int band)
1049{
1050 if (!sock || !sock->fasync_list)
1051 return -1;
89bddce5 1052 switch (how) {
1da177e4 1053 case 1:
89bddce5 1054
1da177e4
LT
1055 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1056 break;
1057 goto call_kill;
1058 case 2:
1059 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1060 break;
1061 /* fall through */
1062 case 0:
89bddce5 1063call_kill:
1da177e4
LT
1064 __kill_fasync(sock->fasync_list, SIGIO, band);
1065 break;
1066 case 3:
1067 __kill_fasync(sock->fasync_list, SIGURG, band);
1068 }
1069 return 0;
1070}
1071
89bddce5
SH
1072static int __sock_create(int family, int type, int protocol,
1073 struct socket **res, int kern)
1da177e4
LT
1074{
1075 int err;
1076 struct socket *sock;
55737fda 1077 const struct net_proto_family *pf;
1da177e4
LT
1078
1079 /*
89bddce5 1080 * Check protocol is in range
1da177e4
LT
1081 */
1082 if (family < 0 || family >= NPROTO)
1083 return -EAFNOSUPPORT;
1084 if (type < 0 || type >= SOCK_MAX)
1085 return -EINVAL;
1086
1087 /* Compatibility.
1088
1089 This uglymoron is moved from INET layer to here to avoid
1090 deadlock in module load.
1091 */
1092 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1093 static int warned;
1da177e4
LT
1094 if (!warned) {
1095 warned = 1;
89bddce5
SH
1096 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1097 current->comm);
1da177e4
LT
1098 }
1099 family = PF_PACKET;
1100 }
1101
1102 err = security_socket_create(family, type, protocol, kern);
1103 if (err)
1104 return err;
89bddce5 1105
55737fda
SH
1106 /*
1107 * Allocate the socket and allow the family to set things up. if
1108 * the protocol is 0, the family is instructed to select an appropriate
1109 * default.
1110 */
1111 sock = sock_alloc();
1112 if (!sock) {
1113 if (net_ratelimit())
1114 printk(KERN_WARNING "socket: no more sockets\n");
1115 return -ENFILE; /* Not exactly a match, but its the
1116 closest posix thing */
1117 }
1118
1119 sock->type = type;
1120
1da177e4 1121#if defined(CONFIG_KMOD)
89bddce5
SH
1122 /* Attempt to load a protocol module if the find failed.
1123 *
1124 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1125 * requested real, full-featured networking support upon configuration.
1126 * Otherwise module support will break!
1127 */
55737fda 1128 if (net_families[family] == NULL)
89bddce5 1129 request_module("net-pf-%d", family);
1da177e4
LT
1130#endif
1131
55737fda
SH
1132 rcu_read_lock();
1133 pf = rcu_dereference(net_families[family]);
1134 err = -EAFNOSUPPORT;
1135 if (!pf)
1136 goto out_release;
1da177e4
LT
1137
1138 /*
1139 * We will call the ->create function, that possibly is in a loadable
1140 * module, so we have to bump that loadable module refcnt first.
1141 */
55737fda 1142 if (!try_module_get(pf->owner))
1da177e4
LT
1143 goto out_release;
1144
55737fda
SH
1145 /* Now protected by module ref count */
1146 rcu_read_unlock();
1147
1148 err = pf->create(sock, protocol);
1149 if (err < 0)
1da177e4 1150 goto out_module_put;
a79af59e 1151
1da177e4
LT
1152 /*
1153 * Now to bump the refcnt of the [loadable] module that owns this
1154 * socket at sock_release time we decrement its refcnt.
1155 */
55737fda
SH
1156 if (!try_module_get(sock->ops->owner))
1157 goto out_module_busy;
1158
1da177e4
LT
1159 /*
1160 * Now that we're done with the ->create function, the [loadable]
1161 * module can have its refcnt decremented
1162 */
55737fda 1163 module_put(pf->owner);
7420ed23
VY
1164 err = security_socket_post_create(sock, family, type, protocol, kern);
1165 if (err)
1166 goto out_release;
55737fda 1167 *res = sock;
1da177e4 1168
55737fda
SH
1169 return 0;
1170
1171out_module_busy:
1172 err = -EAFNOSUPPORT;
1da177e4 1173out_module_put:
55737fda
SH
1174 sock->ops = NULL;
1175 module_put(pf->owner);
1176out_sock_release:
1da177e4 1177 sock_release(sock);
55737fda
SH
1178 return err;
1179
1180out_release:
1181 rcu_read_unlock();
1182 goto out_sock_release;
1da177e4
LT
1183}
1184
1185int sock_create(int family, int type, int protocol, struct socket **res)
1186{
1187 return __sock_create(family, type, protocol, res, 0);
1188}
1189
1190int sock_create_kern(int family, int type, int protocol, struct socket **res)
1191{
1192 return __sock_create(family, type, protocol, res, 1);
1193}
1194
1195asmlinkage long sys_socket(int family, int type, int protocol)
1196{
1197 int retval;
1198 struct socket *sock;
1199
1200 retval = sock_create(family, type, protocol, &sock);
1201 if (retval < 0)
1202 goto out;
1203
1204 retval = sock_map_fd(sock);
1205 if (retval < 0)
1206 goto out_release;
1207
1208out:
1209 /* It may be already another descriptor 8) Not kernel problem. */
1210 return retval;
1211
1212out_release:
1213 sock_release(sock);
1214 return retval;
1215}
1216
1217/*
1218 * Create a pair of connected sockets.
1219 */
1220
89bddce5
SH
1221asmlinkage long sys_socketpair(int family, int type, int protocol,
1222 int __user *usockvec)
1da177e4
LT
1223{
1224 struct socket *sock1, *sock2;
1225 int fd1, fd2, err;
db349509 1226 struct file *newfile1, *newfile2;
1da177e4
LT
1227
1228 /*
1229 * Obtain the first socket and check if the underlying protocol
1230 * supports the socketpair call.
1231 */
1232
1233 err = sock_create(family, type, protocol, &sock1);
1234 if (err < 0)
1235 goto out;
1236
1237 err = sock_create(family, type, protocol, &sock2);
1238 if (err < 0)
1239 goto out_release_1;
1240
1241 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1242 if (err < 0)
1da177e4
LT
1243 goto out_release_both;
1244
db349509
AV
1245 fd1 = sock_alloc_fd(&newfile1);
1246 if (unlikely(fd1 < 0))
1247 goto out_release_both;
1da177e4 1248
db349509
AV
1249 fd2 = sock_alloc_fd(&newfile2);
1250 if (unlikely(fd2 < 0)) {
1251 put_filp(newfile1);
1252 put_unused_fd(fd1);
1da177e4 1253 goto out_release_both;
db349509 1254 }
1da177e4 1255
db349509
AV
1256 err = sock_attach_fd(sock1, newfile1);
1257 if (unlikely(err < 0)) {
1258 goto out_fd2;
1259 }
1260
1261 err = sock_attach_fd(sock2, newfile2);
1262 if (unlikely(err < 0)) {
1263 fput(newfile1);
1264 goto out_fd1;
1265 }
1266
1267 err = audit_fd_pair(fd1, fd2);
1268 if (err < 0) {
1269 fput(newfile1);
1270 fput(newfile2);
1271 goto out_fd;
1272 }
1da177e4 1273
db349509
AV
1274 fd_install(fd1, newfile1);
1275 fd_install(fd2, newfile2);
1da177e4
LT
1276 /* fd1 and fd2 may be already another descriptors.
1277 * Not kernel problem.
1278 */
1279
89bddce5 1280 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1281 if (!err)
1282 err = put_user(fd2, &usockvec[1]);
1283 if (!err)
1284 return 0;
1285
1286 sys_close(fd2);
1287 sys_close(fd1);
1288 return err;
1289
1da177e4 1290out_release_both:
89bddce5 1291 sock_release(sock2);
1da177e4 1292out_release_1:
89bddce5 1293 sock_release(sock1);
1da177e4
LT
1294out:
1295 return err;
db349509
AV
1296
1297out_fd2:
1298 put_filp(newfile1);
1299 sock_release(sock1);
1300out_fd1:
1301 put_filp(newfile2);
1302 sock_release(sock2);
1303out_fd:
1304 put_unused_fd(fd1);
1305 put_unused_fd(fd2);
1306 goto out;
1da177e4
LT
1307}
1308
1da177e4
LT
1309/*
1310 * Bind a name to a socket. Nothing much to do here since it's
1311 * the protocol's responsibility to handle the local address.
1312 *
1313 * We move the socket address to kernel space before we call
1314 * the protocol layer (having also checked the address is ok).
1315 */
1316
1317asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1318{
1319 struct socket *sock;
1320 char address[MAX_SOCK_ADDR];
6cb153ca 1321 int err, fput_needed;
1da177e4 1322
89bddce5 1323 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1324 if (sock) {
89bddce5
SH
1325 err = move_addr_to_kernel(umyaddr, addrlen, address);
1326 if (err >= 0) {
1327 err = security_socket_bind(sock,
1328 (struct sockaddr *)address,
1329 addrlen);
6cb153ca
BL
1330 if (!err)
1331 err = sock->ops->bind(sock,
89bddce5
SH
1332 (struct sockaddr *)
1333 address, addrlen);
1da177e4 1334 }
6cb153ca 1335 fput_light(sock->file, fput_needed);
89bddce5 1336 }
1da177e4
LT
1337 return err;
1338}
1339
1da177e4
LT
1340/*
1341 * Perform a listen. Basically, we allow the protocol to do anything
1342 * necessary for a listen, and if that works, we mark the socket as
1343 * ready for listening.
1344 */
1345
7a42c217 1346int sysctl_somaxconn __read_mostly = SOMAXCONN;
1da177e4
LT
1347
1348asmlinkage long sys_listen(int fd, int backlog)
1349{
1350 struct socket *sock;
6cb153ca 1351 int err, fput_needed;
89bddce5
SH
1352
1353 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1354 if (sock) {
1355 if ((unsigned)backlog > sysctl_somaxconn)
1da177e4
LT
1356 backlog = sysctl_somaxconn;
1357
1358 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1359 if (!err)
1360 err = sock->ops->listen(sock, backlog);
1da177e4 1361
6cb153ca 1362 fput_light(sock->file, fput_needed);
1da177e4
LT
1363 }
1364 return err;
1365}
1366
1da177e4
LT
1367/*
1368 * For accept, we attempt to create a new socket, set up the link
1369 * with the client, wake up the client, then return the new
1370 * connected fd. We collect the address of the connector in kernel
1371 * space and move it to user at the very end. This is unclean because
1372 * we open the socket then return an error.
1373 *
1374 * 1003.1g adds the ability to recvmsg() to query connection pending
1375 * status to recvmsg. We need to add that support in a way thats
1376 * clean when we restucture accept also.
1377 */
1378
89bddce5
SH
1379asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1380 int __user *upeer_addrlen)
1da177e4
LT
1381{
1382 struct socket *sock, *newsock;
39d8c1b6 1383 struct file *newfile;
6cb153ca 1384 int err, len, newfd, fput_needed;
1da177e4
LT
1385 char address[MAX_SOCK_ADDR];
1386
6cb153ca 1387 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1388 if (!sock)
1389 goto out;
1390
1391 err = -ENFILE;
89bddce5 1392 if (!(newsock = sock_alloc()))
1da177e4
LT
1393 goto out_put;
1394
1395 newsock->type = sock->type;
1396 newsock->ops = sock->ops;
1397
1da177e4
LT
1398 /*
1399 * We don't need try_module_get here, as the listening socket (sock)
1400 * has the protocol module (sock->ops->owner) held.
1401 */
1402 __module_get(newsock->ops->owner);
1403
39d8c1b6
DM
1404 newfd = sock_alloc_fd(&newfile);
1405 if (unlikely(newfd < 0)) {
1406 err = newfd;
9a1875e6
DM
1407 sock_release(newsock);
1408 goto out_put;
39d8c1b6
DM
1409 }
1410
1411 err = sock_attach_fd(newsock, newfile);
1412 if (err < 0)
79f4f642 1413 goto out_fd_simple;
39d8c1b6 1414
a79af59e
FF
1415 err = security_socket_accept(sock, newsock);
1416 if (err)
39d8c1b6 1417 goto out_fd;
a79af59e 1418
1da177e4
LT
1419 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1420 if (err < 0)
39d8c1b6 1421 goto out_fd;
1da177e4
LT
1422
1423 if (upeer_sockaddr) {
89bddce5
SH
1424 if (newsock->ops->getname(newsock, (struct sockaddr *)address,
1425 &len, 2) < 0) {
1da177e4 1426 err = -ECONNABORTED;
39d8c1b6 1427 goto out_fd;
1da177e4 1428 }
89bddce5
SH
1429 err = move_addr_to_user(address, len, upeer_sockaddr,
1430 upeer_addrlen);
1da177e4 1431 if (err < 0)
39d8c1b6 1432 goto out_fd;
1da177e4
LT
1433 }
1434
1435 /* File flags are not inherited via accept() unlike another OSes. */
1436
39d8c1b6
DM
1437 fd_install(newfd, newfile);
1438 err = newfd;
1da177e4
LT
1439
1440 security_socket_post_accept(sock, newsock);
1441
1442out_put:
6cb153ca 1443 fput_light(sock->file, fput_needed);
1da177e4
LT
1444out:
1445 return err;
79f4f642
AD
1446out_fd_simple:
1447 sock_release(newsock);
1448 put_filp(newfile);
1449 put_unused_fd(newfd);
1450 goto out_put;
39d8c1b6 1451out_fd:
9606a216 1452 fput(newfile);
39d8c1b6 1453 put_unused_fd(newfd);
1da177e4
LT
1454 goto out_put;
1455}
1456
1da177e4
LT
1457/*
1458 * Attempt to connect to a socket with the server address. The address
1459 * is in user space so we verify it is OK and move it to kernel space.
1460 *
1461 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1462 * break bindings
1463 *
1464 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1465 * other SEQPACKET protocols that take time to connect() as it doesn't
1466 * include the -EINPROGRESS status for such sockets.
1467 */
1468
89bddce5
SH
1469asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
1470 int addrlen)
1da177e4
LT
1471{
1472 struct socket *sock;
1473 char address[MAX_SOCK_ADDR];
6cb153ca 1474 int err, fput_needed;
1da177e4 1475
6cb153ca 1476 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1477 if (!sock)
1478 goto out;
1479 err = move_addr_to_kernel(uservaddr, addrlen, address);
1480 if (err < 0)
1481 goto out_put;
1482
89bddce5
SH
1483 err =
1484 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4
LT
1485 if (err)
1486 goto out_put;
1487
89bddce5 1488 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
1da177e4
LT
1489 sock->file->f_flags);
1490out_put:
6cb153ca 1491 fput_light(sock->file, fput_needed);
1da177e4
LT
1492out:
1493 return err;
1494}
1495
1496/*
1497 * Get the local address ('name') of a socket object. Move the obtained
1498 * name to user space.
1499 */
1500
89bddce5
SH
1501asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1502 int __user *usockaddr_len)
1da177e4
LT
1503{
1504 struct socket *sock;
1505 char address[MAX_SOCK_ADDR];
6cb153ca 1506 int len, err, fput_needed;
89bddce5 1507
6cb153ca 1508 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1509 if (!sock)
1510 goto out;
1511
1512 err = security_socket_getsockname(sock);
1513 if (err)
1514 goto out_put;
1515
1516 err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
1517 if (err)
1518 goto out_put;
1519 err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
1520
1521out_put:
6cb153ca 1522 fput_light(sock->file, fput_needed);
1da177e4
LT
1523out:
1524 return err;
1525}
1526
1527/*
1528 * Get the remote address ('name') of a socket object. Move the obtained
1529 * name to user space.
1530 */
1531
89bddce5
SH
1532asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1533 int __user *usockaddr_len)
1da177e4
LT
1534{
1535 struct socket *sock;
1536 char address[MAX_SOCK_ADDR];
6cb153ca 1537 int len, err, fput_needed;
1da177e4 1538
89bddce5
SH
1539 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1540 if (sock != NULL) {
1da177e4
LT
1541 err = security_socket_getpeername(sock);
1542 if (err) {
6cb153ca 1543 fput_light(sock->file, fput_needed);
1da177e4
LT
1544 return err;
1545 }
1546
89bddce5
SH
1547 err =
1548 sock->ops->getname(sock, (struct sockaddr *)address, &len,
1549 1);
1da177e4 1550 if (!err)
89bddce5
SH
1551 err = move_addr_to_user(address, len, usockaddr,
1552 usockaddr_len);
6cb153ca 1553 fput_light(sock->file, fput_needed);
1da177e4
LT
1554 }
1555 return err;
1556}
1557
1558/*
1559 * Send a datagram to a given address. We move the address into kernel
1560 * space and check the user space data area is readable before invoking
1561 * the protocol.
1562 */
1563
89bddce5
SH
1564asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1565 unsigned flags, struct sockaddr __user *addr,
1566 int addr_len)
1da177e4
LT
1567{
1568 struct socket *sock;
1569 char address[MAX_SOCK_ADDR];
1570 int err;
1571 struct msghdr msg;
1572 struct iovec iov;
6cb153ca
BL
1573 int fput_needed;
1574 struct file *sock_file;
1575
1576 sock_file = fget_light(fd, &fput_needed);
4387ff75 1577 err = -EBADF;
6cb153ca 1578 if (!sock_file)
4387ff75 1579 goto out;
6cb153ca
BL
1580
1581 sock = sock_from_file(sock_file, &err);
1da177e4 1582 if (!sock)
6cb153ca 1583 goto out_put;
89bddce5
SH
1584 iov.iov_base = buff;
1585 iov.iov_len = len;
1586 msg.msg_name = NULL;
1587 msg.msg_iov = &iov;
1588 msg.msg_iovlen = 1;
1589 msg.msg_control = NULL;
1590 msg.msg_controllen = 0;
1591 msg.msg_namelen = 0;
6cb153ca 1592 if (addr) {
1da177e4
LT
1593 err = move_addr_to_kernel(addr, addr_len, address);
1594 if (err < 0)
1595 goto out_put;
89bddce5
SH
1596 msg.msg_name = address;
1597 msg.msg_namelen = addr_len;
1da177e4
LT
1598 }
1599 if (sock->file->f_flags & O_NONBLOCK)
1600 flags |= MSG_DONTWAIT;
1601 msg.msg_flags = flags;
1602 err = sock_sendmsg(sock, &msg, len);
1603
89bddce5 1604out_put:
6cb153ca 1605 fput_light(sock_file, fput_needed);
4387ff75 1606out:
1da177e4
LT
1607 return err;
1608}
1609
1610/*
89bddce5 1611 * Send a datagram down a socket.
1da177e4
LT
1612 */
1613
89bddce5 1614asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
1da177e4
LT
1615{
1616 return sys_sendto(fd, buff, len, flags, NULL, 0);
1617}
1618
1619/*
89bddce5 1620 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1621 * sender. We verify the buffers are writable and if needed move the
1622 * sender address from kernel to user space.
1623 */
1624
89bddce5
SH
1625asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1626 unsigned flags, struct sockaddr __user *addr,
1627 int __user *addr_len)
1da177e4
LT
1628{
1629 struct socket *sock;
1630 struct iovec iov;
1631 struct msghdr msg;
1632 char address[MAX_SOCK_ADDR];
89bddce5 1633 int err, err2;
6cb153ca
BL
1634 struct file *sock_file;
1635 int fput_needed;
1636
1637 sock_file = fget_light(fd, &fput_needed);
4387ff75 1638 err = -EBADF;
6cb153ca 1639 if (!sock_file)
4387ff75 1640 goto out;
1da177e4 1641
6cb153ca 1642 sock = sock_from_file(sock_file, &err);
1da177e4 1643 if (!sock)
4387ff75 1644 goto out_put;
1da177e4 1645
89bddce5
SH
1646 msg.msg_control = NULL;
1647 msg.msg_controllen = 0;
1648 msg.msg_iovlen = 1;
1649 msg.msg_iov = &iov;
1650 iov.iov_len = size;
1651 iov.iov_base = ubuf;
1652 msg.msg_name = address;
1653 msg.msg_namelen = MAX_SOCK_ADDR;
1da177e4
LT
1654 if (sock->file->f_flags & O_NONBLOCK)
1655 flags |= MSG_DONTWAIT;
89bddce5 1656 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1657
89bddce5
SH
1658 if (err >= 0 && addr != NULL) {
1659 err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1660 if (err2 < 0)
1661 err = err2;
1da177e4 1662 }
4387ff75 1663out_put:
6cb153ca 1664 fput_light(sock_file, fput_needed);
4387ff75 1665out:
1da177e4
LT
1666 return err;
1667}
1668
1669/*
89bddce5 1670 * Receive a datagram from a socket.
1da177e4
LT
1671 */
1672
89bddce5
SH
1673asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1674 unsigned flags)
1da177e4
LT
1675{
1676 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1677}
1678
1679/*
1680 * Set a socket option. Because we don't know the option lengths we have
1681 * to pass the user mode parameter for the protocols to sort out.
1682 */
1683
89bddce5
SH
1684asmlinkage long sys_setsockopt(int fd, int level, int optname,
1685 char __user *optval, int optlen)
1da177e4 1686{
6cb153ca 1687 int err, fput_needed;
1da177e4
LT
1688 struct socket *sock;
1689
1690 if (optlen < 0)
1691 return -EINVAL;
89bddce5
SH
1692
1693 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1694 if (sock != NULL) {
1695 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1696 if (err)
1697 goto out_put;
1da177e4
LT
1698
1699 if (level == SOL_SOCKET)
89bddce5
SH
1700 err =
1701 sock_setsockopt(sock, level, optname, optval,
1702 optlen);
1da177e4 1703 else
89bddce5
SH
1704 err =
1705 sock->ops->setsockopt(sock, level, optname, optval,
1706 optlen);
6cb153ca
BL
1707out_put:
1708 fput_light(sock->file, fput_needed);
1da177e4
LT
1709 }
1710 return err;
1711}
1712
1713/*
1714 * Get a socket option. Because we don't know the option lengths we have
1715 * to pass a user mode parameter for the protocols to sort out.
1716 */
1717
89bddce5
SH
1718asmlinkage long sys_getsockopt(int fd, int level, int optname,
1719 char __user *optval, int __user *optlen)
1da177e4 1720{
6cb153ca 1721 int err, fput_needed;
1da177e4
LT
1722 struct socket *sock;
1723
89bddce5
SH
1724 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1725 if (sock != NULL) {
6cb153ca
BL
1726 err = security_socket_getsockopt(sock, level, optname);
1727 if (err)
1728 goto out_put;
1da177e4
LT
1729
1730 if (level == SOL_SOCKET)
89bddce5
SH
1731 err =
1732 sock_getsockopt(sock, level, optname, optval,
1733 optlen);
1da177e4 1734 else
89bddce5
SH
1735 err =
1736 sock->ops->getsockopt(sock, level, optname, optval,
1737 optlen);
6cb153ca
BL
1738out_put:
1739 fput_light(sock->file, fput_needed);
1da177e4
LT
1740 }
1741 return err;
1742}
1743
1da177e4
LT
1744/*
1745 * Shutdown a socket.
1746 */
1747
1748asmlinkage long sys_shutdown(int fd, int how)
1749{
6cb153ca 1750 int err, fput_needed;
1da177e4
LT
1751 struct socket *sock;
1752
89bddce5
SH
1753 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1754 if (sock != NULL) {
1da177e4 1755 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1756 if (!err)
1757 err = sock->ops->shutdown(sock, how);
1758 fput_light(sock->file, fput_needed);
1da177e4
LT
1759 }
1760 return err;
1761}
1762
89bddce5 1763/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1764 * fields which are the same type (int / unsigned) on our platforms.
1765 */
1766#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1767#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1768#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1769
1da177e4
LT
1770/*
1771 * BSD sendmsg interface
1772 */
1773
1774asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1775{
89bddce5
SH
1776 struct compat_msghdr __user *msg_compat =
1777 (struct compat_msghdr __user *)msg;
1da177e4
LT
1778 struct socket *sock;
1779 char address[MAX_SOCK_ADDR];
1780 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1781 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1782 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1783 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1784 unsigned char *ctl_buf = ctl;
1785 struct msghdr msg_sys;
1786 int err, ctl_len, iov_size, total_len;
6cb153ca 1787 int fput_needed;
89bddce5 1788
1da177e4
LT
1789 err = -EFAULT;
1790 if (MSG_CMSG_COMPAT & flags) {
1791 if (get_compat_msghdr(&msg_sys, msg_compat))
1792 return -EFAULT;
89bddce5
SH
1793 }
1794 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1795 return -EFAULT;
1796
6cb153ca 1797 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1798 if (!sock)
1da177e4
LT
1799 goto out;
1800
1801 /* do not move before msg_sys is valid */
1802 err = -EMSGSIZE;
1803 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1804 goto out_put;
1805
89bddce5 1806 /* Check whether to allocate the iovec area */
1da177e4
LT
1807 err = -ENOMEM;
1808 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1809 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1810 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1811 if (!iov)
1812 goto out_put;
1813 }
1814
1815 /* This will also move the address data into kernel space */
1816 if (MSG_CMSG_COMPAT & flags) {
1817 err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
1818 } else
1819 err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
89bddce5 1820 if (err < 0)
1da177e4
LT
1821 goto out_freeiov;
1822 total_len = err;
1823
1824 err = -ENOBUFS;
1825
1826 if (msg_sys.msg_controllen > INT_MAX)
1827 goto out_freeiov;
89bddce5 1828 ctl_len = msg_sys.msg_controllen;
1da177e4 1829 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1830 err =
1831 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1832 sizeof(ctl));
1da177e4
LT
1833 if (err)
1834 goto out_freeiov;
1835 ctl_buf = msg_sys.msg_control;
8920e8f9 1836 ctl_len = msg_sys.msg_controllen;
1da177e4 1837 } else if (ctl_len) {
89bddce5 1838 if (ctl_len > sizeof(ctl)) {
1da177e4 1839 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1840 if (ctl_buf == NULL)
1da177e4
LT
1841 goto out_freeiov;
1842 }
1843 err = -EFAULT;
1844 /*
1845 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1846 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1847 * checking falls down on this.
1848 */
89bddce5
SH
1849 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1850 ctl_len))
1da177e4
LT
1851 goto out_freectl;
1852 msg_sys.msg_control = ctl_buf;
1853 }
1854 msg_sys.msg_flags = flags;
1855
1856 if (sock->file->f_flags & O_NONBLOCK)
1857 msg_sys.msg_flags |= MSG_DONTWAIT;
1858 err = sock_sendmsg(sock, &msg_sys, total_len);
1859
1860out_freectl:
89bddce5 1861 if (ctl_buf != ctl)
1da177e4
LT
1862 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1863out_freeiov:
1864 if (iov != iovstack)
1865 sock_kfree_s(sock->sk, iov, iov_size);
1866out_put:
6cb153ca 1867 fput_light(sock->file, fput_needed);
89bddce5 1868out:
1da177e4
LT
1869 return err;
1870}
1871
1872/*
1873 * BSD recvmsg interface
1874 */
1875
89bddce5
SH
1876asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
1877 unsigned int flags)
1da177e4 1878{
89bddce5
SH
1879 struct compat_msghdr __user *msg_compat =
1880 (struct compat_msghdr __user *)msg;
1da177e4
LT
1881 struct socket *sock;
1882 struct iovec iovstack[UIO_FASTIOV];
89bddce5 1883 struct iovec *iov = iovstack;
1da177e4
LT
1884 struct msghdr msg_sys;
1885 unsigned long cmsg_ptr;
1886 int err, iov_size, total_len, len;
6cb153ca 1887 int fput_needed;
1da177e4
LT
1888
1889 /* kernel mode address */
1890 char addr[MAX_SOCK_ADDR];
1891
1892 /* user mode address pointers */
1893 struct sockaddr __user *uaddr;
1894 int __user *uaddr_len;
89bddce5 1895
1da177e4
LT
1896 if (MSG_CMSG_COMPAT & flags) {
1897 if (get_compat_msghdr(&msg_sys, msg_compat))
1898 return -EFAULT;
89bddce5
SH
1899 }
1900 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1901 return -EFAULT;
1da177e4 1902
6cb153ca 1903 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1904 if (!sock)
1905 goto out;
1906
1907 err = -EMSGSIZE;
1908 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1909 goto out_put;
89bddce5
SH
1910
1911 /* Check whether to allocate the iovec area */
1da177e4
LT
1912 err = -ENOMEM;
1913 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1914 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1915 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1916 if (!iov)
1917 goto out_put;
1918 }
1919
1920 /*
89bddce5
SH
1921 * Save the user-mode address (verify_iovec will change the
1922 * kernel msghdr to use the kernel address space)
1da177e4 1923 */
89bddce5
SH
1924
1925 uaddr = (void __user *)msg_sys.msg_name;
1da177e4
LT
1926 uaddr_len = COMPAT_NAMELEN(msg);
1927 if (MSG_CMSG_COMPAT & flags) {
1928 err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1929 } else
1930 err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1931 if (err < 0)
1932 goto out_freeiov;
89bddce5 1933 total_len = err;
1da177e4
LT
1934
1935 cmsg_ptr = (unsigned long)msg_sys.msg_control;
1936 msg_sys.msg_flags = 0;
1937 if (MSG_CMSG_COMPAT & flags)
1938 msg_sys.msg_flags = MSG_CMSG_COMPAT;
89bddce5 1939
1da177e4
LT
1940 if (sock->file->f_flags & O_NONBLOCK)
1941 flags |= MSG_DONTWAIT;
1942 err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1943 if (err < 0)
1944 goto out_freeiov;
1945 len = err;
1946
1947 if (uaddr != NULL) {
89bddce5
SH
1948 err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
1949 uaddr_len);
1da177e4
LT
1950 if (err < 0)
1951 goto out_freeiov;
1952 }
37f7f421
DM
1953 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
1954 COMPAT_FLAGS(msg));
1da177e4
LT
1955 if (err)
1956 goto out_freeiov;
1957 if (MSG_CMSG_COMPAT & flags)
89bddce5 1958 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1959 &msg_compat->msg_controllen);
1960 else
89bddce5 1961 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1962 &msg->msg_controllen);
1963 if (err)
1964 goto out_freeiov;
1965 err = len;
1966
1967out_freeiov:
1968 if (iov != iovstack)
1969 sock_kfree_s(sock->sk, iov, iov_size);
1970out_put:
6cb153ca 1971 fput_light(sock->file, fput_needed);
1da177e4
LT
1972out:
1973 return err;
1974}
1975
1976#ifdef __ARCH_WANT_SYS_SOCKETCALL
1977
1978/* Argument list sizes for sys_socketcall */
1979#define AL(x) ((x) * sizeof(unsigned long))
89bddce5
SH
1980static const unsigned char nargs[18]={
1981 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1982 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1983 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
1984};
1985
1da177e4
LT
1986#undef AL
1987
1988/*
89bddce5 1989 * System call vectors.
1da177e4
LT
1990 *
1991 * Argument checking cleaned up. Saved 20% in size.
1992 * This function doesn't need to set the kernel lock because
89bddce5 1993 * it is set by the callees.
1da177e4
LT
1994 */
1995
1996asmlinkage long sys_socketcall(int call, unsigned long __user *args)
1997{
1998 unsigned long a[6];
89bddce5 1999 unsigned long a0, a1;
1da177e4
LT
2000 int err;
2001
89bddce5 2002 if (call < 1 || call > SYS_RECVMSG)
1da177e4
LT
2003 return -EINVAL;
2004
2005 /* copy_from_user should be SMP safe. */
2006 if (copy_from_user(a, args, nargs[call]))
2007 return -EFAULT;
3ec3b2fb 2008
89bddce5 2009 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb
DW
2010 if (err)
2011 return err;
2012
89bddce5
SH
2013 a0 = a[0];
2014 a1 = a[1];
2015
2016 switch (call) {
2017 case SYS_SOCKET:
2018 err = sys_socket(a0, a1, a[2]);
2019 break;
2020 case SYS_BIND:
2021 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2022 break;
2023 case SYS_CONNECT:
2024 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2025 break;
2026 case SYS_LISTEN:
2027 err = sys_listen(a0, a1);
2028 break;
2029 case SYS_ACCEPT:
2030 err =
2031 sys_accept(a0, (struct sockaddr __user *)a1,
2032 (int __user *)a[2]);
2033 break;
2034 case SYS_GETSOCKNAME:
2035 err =
2036 sys_getsockname(a0, (struct sockaddr __user *)a1,
2037 (int __user *)a[2]);
2038 break;
2039 case SYS_GETPEERNAME:
2040 err =
2041 sys_getpeername(a0, (struct sockaddr __user *)a1,
2042 (int __user *)a[2]);
2043 break;
2044 case SYS_SOCKETPAIR:
2045 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2046 break;
2047 case SYS_SEND:
2048 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2049 break;
2050 case SYS_SENDTO:
2051 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2052 (struct sockaddr __user *)a[4], a[5]);
2053 break;
2054 case SYS_RECV:
2055 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2056 break;
2057 case SYS_RECVFROM:
2058 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2059 (struct sockaddr __user *)a[4],
2060 (int __user *)a[5]);
2061 break;
2062 case SYS_SHUTDOWN:
2063 err = sys_shutdown(a0, a1);
2064 break;
2065 case SYS_SETSOCKOPT:
2066 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2067 break;
2068 case SYS_GETSOCKOPT:
2069 err =
2070 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2071 (int __user *)a[4]);
2072 break;
2073 case SYS_SENDMSG:
2074 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2075 break;
2076 case SYS_RECVMSG:
2077 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2078 break;
2079 default:
2080 err = -EINVAL;
2081 break;
1da177e4
LT
2082 }
2083 return err;
2084}
2085
89bddce5 2086#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2087
55737fda
SH
2088/**
2089 * sock_register - add a socket protocol handler
2090 * @ops: description of protocol
2091 *
1da177e4
LT
2092 * This function is called by a protocol handler that wants to
2093 * advertise its address family, and have it linked into the
55737fda
SH
2094 * socket interface. The value ops->family coresponds to the
2095 * socket system call protocol family.
1da177e4 2096 */
f0fd27d4 2097int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2098{
2099 int err;
2100
2101 if (ops->family >= NPROTO) {
89bddce5
SH
2102 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2103 NPROTO);
1da177e4
LT
2104 return -ENOBUFS;
2105 }
55737fda
SH
2106
2107 spin_lock(&net_family_lock);
2108 if (net_families[ops->family])
2109 err = -EEXIST;
2110 else {
89bddce5 2111 net_families[ops->family] = ops;
1da177e4
LT
2112 err = 0;
2113 }
55737fda
SH
2114 spin_unlock(&net_family_lock);
2115
89bddce5 2116 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2117 return err;
2118}
2119
55737fda
SH
2120/**
2121 * sock_unregister - remove a protocol handler
2122 * @family: protocol family to remove
2123 *
1da177e4
LT
2124 * This function is called by a protocol handler that wants to
2125 * remove its address family, and have it unlinked from the
55737fda
SH
2126 * new socket creation.
2127 *
2128 * If protocol handler is a module, then it can use module reference
2129 * counts to protect against new references. If protocol handler is not
2130 * a module then it needs to provide its own protection in
2131 * the ops->create routine.
1da177e4 2132 */
f0fd27d4 2133void sock_unregister(int family)
1da177e4 2134{
f0fd27d4 2135 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2136
55737fda 2137 spin_lock(&net_family_lock);
89bddce5 2138 net_families[family] = NULL;
55737fda
SH
2139 spin_unlock(&net_family_lock);
2140
2141 synchronize_rcu();
2142
89bddce5 2143 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2144}
2145
77d76ea3 2146static int __init sock_init(void)
1da177e4
LT
2147{
2148 /*
89bddce5 2149 * Initialize sock SLAB cache.
1da177e4 2150 */
89bddce5 2151
1da177e4
LT
2152 sk_init();
2153
1da177e4 2154 /*
89bddce5 2155 * Initialize skbuff SLAB cache
1da177e4
LT
2156 */
2157 skb_init();
1da177e4
LT
2158
2159 /*
89bddce5 2160 * Initialize the protocols module.
1da177e4
LT
2161 */
2162
2163 init_inodecache();
2164 register_filesystem(&sock_fs_type);
2165 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2166
2167 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2168 */
2169
2170#ifdef CONFIG_NETFILTER
2171 netfilter_init();
2172#endif
cbeb321a
DM
2173
2174 return 0;
1da177e4
LT
2175}
2176
77d76ea3
AK
2177core_initcall(sock_init); /* early initcall */
2178
1da177e4
LT
2179#ifdef CONFIG_PROC_FS
2180void socket_seq_show(struct seq_file *seq)
2181{
2182 int cpu;
2183 int counter = 0;
2184
6f912042 2185 for_each_possible_cpu(cpu)
89bddce5 2186 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2187
2188 /* It can be negative, by the way. 8) */
2189 if (counter < 0)
2190 counter = 0;
2191
2192 seq_printf(seq, "sockets: used %d\n", counter);
2193}
89bddce5 2194#endif /* CONFIG_PROC_FS */
1da177e4 2195
89bbfc95
SP
2196#ifdef CONFIG_COMPAT
2197static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 2198 unsigned long arg)
89bbfc95
SP
2199{
2200 struct socket *sock = file->private_data;
2201 int ret = -ENOIOCTLCMD;
2202
2203 if (sock->ops->compat_ioctl)
2204 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2205
2206 return ret;
2207}
2208#endif
2209
ac5a488e
SS
2210int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2211{
2212 return sock->ops->bind(sock, addr, addrlen);
2213}
2214
2215int kernel_listen(struct socket *sock, int backlog)
2216{
2217 return sock->ops->listen(sock, backlog);
2218}
2219
2220int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2221{
2222 struct sock *sk = sock->sk;
2223 int err;
2224
2225 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2226 newsock);
2227 if (err < 0)
2228 goto done;
2229
2230 err = sock->ops->accept(sock, *newsock, flags);
2231 if (err < 0) {
2232 sock_release(*newsock);
2233 goto done;
2234 }
2235
2236 (*newsock)->ops = sock->ops;
2237
2238done:
2239 return err;
2240}
2241
2242int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 2243 int flags)
ac5a488e
SS
2244{
2245 return sock->ops->connect(sock, addr, addrlen, flags);
2246}
2247
2248int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2249 int *addrlen)
2250{
2251 return sock->ops->getname(sock, addr, addrlen, 0);
2252}
2253
2254int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2255 int *addrlen)
2256{
2257 return sock->ops->getname(sock, addr, addrlen, 1);
2258}
2259
2260int kernel_getsockopt(struct socket *sock, int level, int optname,
2261 char *optval, int *optlen)
2262{
2263 mm_segment_t oldfs = get_fs();
2264 int err;
2265
2266 set_fs(KERNEL_DS);
2267 if (level == SOL_SOCKET)
2268 err = sock_getsockopt(sock, level, optname, optval, optlen);
2269 else
2270 err = sock->ops->getsockopt(sock, level, optname, optval,
2271 optlen);
2272 set_fs(oldfs);
2273 return err;
2274}
2275
2276int kernel_setsockopt(struct socket *sock, int level, int optname,
2277 char *optval, int optlen)
2278{
2279 mm_segment_t oldfs = get_fs();
2280 int err;
2281
2282 set_fs(KERNEL_DS);
2283 if (level == SOL_SOCKET)
2284 err = sock_setsockopt(sock, level, optname, optval, optlen);
2285 else
2286 err = sock->ops->setsockopt(sock, level, optname, optval,
2287 optlen);
2288 set_fs(oldfs);
2289 return err;
2290}
2291
2292int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2293 size_t size, int flags)
2294{
2295 if (sock->ops->sendpage)
2296 return sock->ops->sendpage(sock, page, offset, size, flags);
2297
2298 return sock_no_sendpage(sock, page, offset, size, flags);
2299}
2300
2301int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2302{
2303 mm_segment_t oldfs = get_fs();
2304 int err;
2305
2306 set_fs(KERNEL_DS);
2307 err = sock->ops->ioctl(sock, cmd, arg);
2308 set_fs(oldfs);
2309
2310 return err;
2311}
2312
1da177e4
LT
2313/* ABI emulation layers need these two */
2314EXPORT_SYMBOL(move_addr_to_kernel);
2315EXPORT_SYMBOL(move_addr_to_user);
2316EXPORT_SYMBOL(sock_create);
2317EXPORT_SYMBOL(sock_create_kern);
2318EXPORT_SYMBOL(sock_create_lite);
2319EXPORT_SYMBOL(sock_map_fd);
2320EXPORT_SYMBOL(sock_recvmsg);
2321EXPORT_SYMBOL(sock_register);
2322EXPORT_SYMBOL(sock_release);
2323EXPORT_SYMBOL(sock_sendmsg);
2324EXPORT_SYMBOL(sock_unregister);
2325EXPORT_SYMBOL(sock_wake_async);
2326EXPORT_SYMBOL(sockfd_lookup);
2327EXPORT_SYMBOL(kernel_sendmsg);
2328EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
2329EXPORT_SYMBOL(kernel_bind);
2330EXPORT_SYMBOL(kernel_listen);
2331EXPORT_SYMBOL(kernel_accept);
2332EXPORT_SYMBOL(kernel_connect);
2333EXPORT_SYMBOL(kernel_getsockname);
2334EXPORT_SYMBOL(kernel_getpeername);
2335EXPORT_SYMBOL(kernel_getsockopt);
2336EXPORT_SYMBOL(kernel_setsockopt);
2337EXPORT_SYMBOL(kernel_sendpage);
2338EXPORT_SYMBOL(kernel_sock_ioctl);