9p connect fixes
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4
LT
72#include <linux/wanrouter.h>
73#include <linux/if_bridge.h>
20380731
ACM
74#include <linux/if_frad.h>
75#include <linux/if_vlan.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
1da177e4
LT
90
91#include <asm/uaccess.h>
92#include <asm/unistd.h>
93
94#include <net/compat.h>
87de87d5 95#include <net/wext.h>
1da177e4
LT
96
97#include <net/sock.h>
98#include <linux/netfilter.h>
99
6b96018b
AB
100#include <linux/if_tun.h>
101#include <linux/ipv6_route.h>
102#include <linux/route.h>
6b96018b
AB
103#include <linux/sockios.h>
104#include <linux/atalk.h>
105
1da177e4 106static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
107static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
108 unsigned long nr_segs, loff_t pos);
109static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
110 unsigned long nr_segs, loff_t pos);
89bddce5 111static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
112
113static int sock_close(struct inode *inode, struct file *file);
114static unsigned int sock_poll(struct file *file,
115 struct poll_table_struct *wait);
89bddce5 116static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
117#ifdef CONFIG_COMPAT
118static long compat_sock_ioctl(struct file *file,
89bddce5 119 unsigned int cmd, unsigned long arg);
89bbfc95 120#endif
1da177e4 121static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
122static ssize_t sock_sendpage(struct file *file, struct page *page,
123 int offset, size_t size, loff_t *ppos, int more);
9c55e01c
JA
124static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
125 struct pipe_inode_info *pipe, size_t len,
126 unsigned int flags);
1da177e4 127
1da177e4
LT
128/*
129 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
130 * in the operation structures but are done directly via the socketcall() multiplexor.
131 */
132
da7071d7 133static const struct file_operations socket_file_ops = {
1da177e4
LT
134 .owner = THIS_MODULE,
135 .llseek = no_llseek,
136 .aio_read = sock_aio_read,
137 .aio_write = sock_aio_write,
138 .poll = sock_poll,
139 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
140#ifdef CONFIG_COMPAT
141 .compat_ioctl = compat_sock_ioctl,
142#endif
1da177e4
LT
143 .mmap = sock_mmap,
144 .open = sock_no_open, /* special open code to disallow open via /proc */
145 .release = sock_close,
146 .fasync = sock_fasync,
5274f052
JA
147 .sendpage = sock_sendpage,
148 .splice_write = generic_splice_sendpage,
9c55e01c 149 .splice_read = sock_splice_read,
1da177e4
LT
150};
151
152/*
153 * The protocol list. Each protocol is registered in here.
154 */
155
1da177e4 156static DEFINE_SPINLOCK(net_family_lock);
f0fd27d4 157static const struct net_proto_family *net_families[NPROTO] __read_mostly;
1da177e4 158
1da177e4
LT
159/*
160 * Statistics counters of the socket lists
161 */
162
163static DEFINE_PER_CPU(int, sockets_in_use) = 0;
164
165/*
89bddce5
SH
166 * Support routines.
167 * Move socket addresses back and forth across the kernel/user
168 * divide and look after the messy bits.
1da177e4
LT
169 */
170
89bddce5 171#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
172 16 for IP, 16 for IPX,
173 24 for IPv6,
89bddce5 174 about 80 for AX.25
1da177e4
LT
175 must be at least one bigger than
176 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 177 :unix_mkname()).
1da177e4 178 */
89bddce5 179
1da177e4
LT
180/**
181 * move_addr_to_kernel - copy a socket address into kernel space
182 * @uaddr: Address in user space
183 * @kaddr: Address in kernel space
184 * @ulen: Length in user space
185 *
186 * The address is copied into kernel space. If the provided address is
187 * too long an error code of -EINVAL is returned. If the copy gives
188 * invalid addresses -EFAULT is returned. On a success 0 is returned.
189 */
190
230b1839 191int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr)
1da177e4 192{
230b1839 193 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 194 return -EINVAL;
89bddce5 195 if (ulen == 0)
1da177e4 196 return 0;
89bddce5 197 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 198 return -EFAULT;
3ec3b2fb 199 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
200}
201
202/**
203 * move_addr_to_user - copy an address to user space
204 * @kaddr: kernel space address
205 * @klen: length of address in kernel
206 * @uaddr: user space address
207 * @ulen: pointer to user length field
208 *
209 * The value pointed to by ulen on entry is the buffer length available.
210 * This is overwritten with the buffer space used. -EINVAL is returned
211 * if an overlong buffer is specified or a negative buffer size. -EFAULT
212 * is returned if either the buffer or the length field are not
213 * accessible.
214 * After copying the data up to the limit the user specifies, the true
215 * length of the data is written over the length limit the user
216 * specified. Zero is returned for a success.
217 */
89bddce5 218
230b1839 219int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr,
89bddce5 220 int __user *ulen)
1da177e4
LT
221{
222 int err;
223 int len;
224
89bddce5
SH
225 err = get_user(len, ulen);
226 if (err)
1da177e4 227 return err;
89bddce5
SH
228 if (len > klen)
229 len = klen;
230b1839 230 if (len < 0 || len > sizeof(struct sockaddr_storage))
1da177e4 231 return -EINVAL;
89bddce5 232 if (len) {
d6fe3945
SG
233 if (audit_sockaddr(klen, kaddr))
234 return -ENOMEM;
89bddce5 235 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
236 return -EFAULT;
237 }
238 /*
89bddce5
SH
239 * "fromlen shall refer to the value before truncation.."
240 * 1003.1g
1da177e4
LT
241 */
242 return __put_user(klen, ulen);
243}
244
e18b890b 245static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
246
247static struct inode *sock_alloc_inode(struct super_block *sb)
248{
249 struct socket_alloc *ei;
89bddce5 250
e94b1766 251 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
252 if (!ei)
253 return NULL;
254 init_waitqueue_head(&ei->socket.wait);
89bddce5 255
1da177e4
LT
256 ei->socket.fasync_list = NULL;
257 ei->socket.state = SS_UNCONNECTED;
258 ei->socket.flags = 0;
259 ei->socket.ops = NULL;
260 ei->socket.sk = NULL;
261 ei->socket.file = NULL;
1da177e4
LT
262
263 return &ei->vfs_inode;
264}
265
266static void sock_destroy_inode(struct inode *inode)
267{
268 kmem_cache_free(sock_inode_cachep,
269 container_of(inode, struct socket_alloc, vfs_inode));
270}
271
51cc5068 272static void init_once(void *foo)
1da177e4 273{
89bddce5 274 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 275
a35afb83 276 inode_init_once(&ei->vfs_inode);
1da177e4 277}
89bddce5 278
1da177e4
LT
279static int init_inodecache(void)
280{
281 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
282 sizeof(struct socket_alloc),
283 0,
284 (SLAB_HWCACHE_ALIGN |
285 SLAB_RECLAIM_ACCOUNT |
286 SLAB_MEM_SPREAD),
20c2df83 287 init_once);
1da177e4
LT
288 if (sock_inode_cachep == NULL)
289 return -ENOMEM;
290 return 0;
291}
292
b87221de 293static const struct super_operations sockfs_ops = {
1da177e4
LT
294 .alloc_inode = sock_alloc_inode,
295 .destroy_inode =sock_destroy_inode,
296 .statfs = simple_statfs,
297};
298
454e2398 299static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
300 int flags, const char *dev_name, void *data,
301 struct vfsmount *mnt)
1da177e4 302{
454e2398
DH
303 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
304 mnt);
1da177e4
LT
305}
306
ba89966c 307static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
308
309static struct file_system_type sock_fs_type = {
310 .name = "sockfs",
311 .get_sb = sockfs_get_sb,
312 .kill_sb = kill_anon_super,
313};
89bddce5 314
1da177e4
LT
315static int sockfs_delete_dentry(struct dentry *dentry)
316{
304e61e6
ED
317 /*
318 * At creation time, we pretended this dentry was hashed
319 * (by clearing DCACHE_UNHASHED bit in d_flags)
320 * At delete time, we restore the truth : not hashed.
321 * (so that dput() can proceed correctly)
322 */
323 dentry->d_flags |= DCACHE_UNHASHED;
324 return 0;
1da177e4 325}
c23fbb6b
ED
326
327/*
328 * sockfs_dname() is called from d_path().
329 */
330static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
331{
332 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
333 dentry->d_inode->i_ino);
334}
335
3ba13d17 336static const struct dentry_operations sockfs_dentry_operations = {
89bddce5 337 .d_delete = sockfs_delete_dentry,
c23fbb6b 338 .d_dname = sockfs_dname,
1da177e4
LT
339};
340
341/*
342 * Obtains the first available file descriptor and sets it up for use.
343 *
39d8c1b6
DM
344 * These functions create file structures and maps them to fd space
345 * of the current process. On success it returns file descriptor
1da177e4
LT
346 * and file struct implicitly stored in sock->file.
347 * Note that another thread may close file descriptor before we return
348 * from this function. We use the fact that now we do not refer
349 * to socket after mapping. If one day we will need it, this
350 * function will increment ref. count on file by 1.
351 *
352 * In any case returned fd MAY BE not valid!
353 * This race condition is unavoidable
354 * with shared fd spaces, we cannot solve it inside kernel,
355 * but we take care of internal coherence yet.
356 */
357
7cbe66b6 358static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
1da177e4 359{
7cbe66b6
AV
360 struct qstr name = { .name = "" };
361 struct dentry *dentry;
362 struct file *file;
1da177e4 363 int fd;
1da177e4 364
a677a039 365 fd = get_unused_fd_flags(flags);
7cbe66b6
AV
366 if (unlikely(fd < 0))
367 return fd;
1da177e4 368
7cbe66b6 369 file = get_empty_filp();
1da177e4 370
7cbe66b6
AV
371 if (unlikely(!file)) {
372 put_unused_fd(fd);
373 return -ENFILE;
374 }
39d8c1b6 375
ce8d2cdf 376 dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
7cbe66b6
AV
377 if (unlikely(!dentry)) {
378 put_filp(file);
379 put_unused_fd(fd);
39d8c1b6 380 return -ENOMEM;
7cbe66b6 381 }
39d8c1b6 382
ce8d2cdf 383 dentry->d_op = &sockfs_dentry_operations;
304e61e6
ED
384 /*
385 * We dont want to push this dentry into global dentry hash table.
386 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
387 * This permits a working /proc/$pid/fd/XXX on sockets
388 */
ce8d2cdf
DH
389 dentry->d_flags &= ~DCACHE_UNHASHED;
390 d_instantiate(dentry, SOCK_INODE(sock));
39d8c1b6
DM
391
392 sock->file = file;
ce8d2cdf
DH
393 init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE,
394 &socket_file_ops);
395 SOCK_INODE(sock)->i_fop = &socket_file_ops;
77d27200 396 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6
DM
397 file->f_pos = 0;
398 file->private_data = sock;
1da177e4 399
7cbe66b6
AV
400 *f = file;
401 return fd;
39d8c1b6
DM
402}
403
a677a039 404int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
405{
406 struct file *newfile;
7cbe66b6 407 int fd = sock_alloc_file(sock, &newfile, flags);
39d8c1b6 408
7cbe66b6 409 if (likely(fd >= 0))
39d8c1b6 410 fd_install(fd, newfile);
7cbe66b6 411
1da177e4
LT
412 return fd;
413}
414
6cb153ca
BL
415static struct socket *sock_from_file(struct file *file, int *err)
416{
6cb153ca
BL
417 if (file->f_op == &socket_file_ops)
418 return file->private_data; /* set in sock_map_fd */
419
23bb80d2
ED
420 *err = -ENOTSOCK;
421 return NULL;
6cb153ca
BL
422}
423
1da177e4
LT
424/**
425 * sockfd_lookup - Go from a file number to its socket slot
426 * @fd: file handle
427 * @err: pointer to an error code return
428 *
429 * The file handle passed in is locked and the socket it is bound
430 * too is returned. If an error occurs the err pointer is overwritten
431 * with a negative errno code and NULL is returned. The function checks
432 * for both invalid handles and passing a handle which is not a socket.
433 *
434 * On a success the socket object pointer is returned.
435 */
436
437struct socket *sockfd_lookup(int fd, int *err)
438{
439 struct file *file;
1da177e4
LT
440 struct socket *sock;
441
89bddce5
SH
442 file = fget(fd);
443 if (!file) {
1da177e4
LT
444 *err = -EBADF;
445 return NULL;
446 }
89bddce5 447
6cb153ca
BL
448 sock = sock_from_file(file, err);
449 if (!sock)
1da177e4 450 fput(file);
6cb153ca
BL
451 return sock;
452}
1da177e4 453
6cb153ca
BL
454static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
455{
456 struct file *file;
457 struct socket *sock;
458
3672558c 459 *err = -EBADF;
6cb153ca
BL
460 file = fget_light(fd, fput_needed);
461 if (file) {
462 sock = sock_from_file(file, err);
463 if (sock)
464 return sock;
465 fput_light(file, *fput_needed);
1da177e4 466 }
6cb153ca 467 return NULL;
1da177e4
LT
468}
469
470/**
471 * sock_alloc - allocate a socket
89bddce5 472 *
1da177e4
LT
473 * Allocate a new inode and socket object. The two are bound together
474 * and initialised. The socket is then returned. If we are out of inodes
475 * NULL is returned.
476 */
477
478static struct socket *sock_alloc(void)
479{
89bddce5
SH
480 struct inode *inode;
481 struct socket *sock;
1da177e4
LT
482
483 inode = new_inode(sock_mnt->mnt_sb);
484 if (!inode)
485 return NULL;
486
487 sock = SOCKET_I(inode);
488
29a020d3 489 kmemcheck_annotate_bitfield(sock, type);
89bddce5 490 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
491 inode->i_uid = current_fsuid();
492 inode->i_gid = current_fsgid();
1da177e4 493
4e69489a 494 percpu_add(sockets_in_use, 1);
1da177e4
LT
495 return sock;
496}
497
498/*
499 * In theory you can't get an open on this inode, but /proc provides
500 * a back door. Remember to keep it shut otherwise you'll let the
501 * creepy crawlies in.
502 */
89bddce5 503
1da177e4
LT
504static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
505{
506 return -ENXIO;
507}
508
4b6f5d20 509const struct file_operations bad_sock_fops = {
1da177e4
LT
510 .owner = THIS_MODULE,
511 .open = sock_no_open,
512};
513
514/**
515 * sock_release - close a socket
516 * @sock: socket to close
517 *
518 * The socket is released from the protocol stack if it has a release
519 * callback, and the inode is then released if the socket is bound to
89bddce5 520 * an inode not a file.
1da177e4 521 */
89bddce5 522
1da177e4
LT
523void sock_release(struct socket *sock)
524{
525 if (sock->ops) {
526 struct module *owner = sock->ops->owner;
527
528 sock->ops->release(sock);
529 sock->ops = NULL;
530 module_put(owner);
531 }
532
533 if (sock->fasync_list)
534 printk(KERN_ERR "sock_release: fasync list not empty!\n");
535
4e69489a 536 percpu_sub(sockets_in_use, 1);
1da177e4
LT
537 if (!sock->file) {
538 iput(SOCK_INODE(sock));
539 return;
540 }
89bddce5 541 sock->file = NULL;
1da177e4
LT
542}
543
20d49473
PO
544int sock_tx_timestamp(struct msghdr *msg, struct sock *sk,
545 union skb_shared_tx *shtx)
546{
547 shtx->flags = 0;
548 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
549 shtx->hardware = 1;
550 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
551 shtx->software = 1;
552 return 0;
553}
554EXPORT_SYMBOL(sock_tx_timestamp);
555
89bddce5 556static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
557 struct msghdr *msg, size_t size)
558{
559 struct sock_iocb *si = kiocb_to_siocb(iocb);
560 int err;
561
562 si->sock = sock;
563 si->scm = NULL;
564 si->msg = msg;
565 si->size = size;
566
567 err = security_socket_sendmsg(sock, msg, size);
568 if (err)
569 return err;
570
571 return sock->ops->sendmsg(iocb, sock, msg, size);
572}
573
574int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
575{
576 struct kiocb iocb;
577 struct sock_iocb siocb;
578 int ret;
579
580 init_sync_kiocb(&iocb, NULL);
581 iocb.private = &siocb;
582 ret = __sock_sendmsg(&iocb, sock, msg, size);
583 if (-EIOCBQUEUED == ret)
584 ret = wait_on_sync_kiocb(&iocb);
585 return ret;
586}
587
588int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
589 struct kvec *vec, size_t num, size_t size)
590{
591 mm_segment_t oldfs = get_fs();
592 int result;
593
594 set_fs(KERNEL_DS);
595 /*
596 * the following is safe, since for compiler definitions of kvec and
597 * iovec are identical, yielding the same in-core layout and alignment
598 */
89bddce5 599 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
600 msg->msg_iovlen = num;
601 result = sock_sendmsg(sock, msg, size);
602 set_fs(oldfs);
603 return result;
604}
605
20d49473
PO
606static int ktime2ts(ktime_t kt, struct timespec *ts)
607{
608 if (kt.tv64) {
609 *ts = ktime_to_timespec(kt);
610 return 1;
611 } else {
612 return 0;
613 }
614}
615
92f37fd2
ED
616/*
617 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
618 */
619void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
620 struct sk_buff *skb)
621{
20d49473
PO
622 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
623 struct timespec ts[3];
624 int empty = 1;
625 struct skb_shared_hwtstamps *shhwtstamps =
626 skb_hwtstamps(skb);
627
628 /* Race occurred between timestamp enabling and packet
629 receiving. Fill in the current time for now. */
630 if (need_software_tstamp && skb->tstamp.tv64 == 0)
631 __net_timestamp(skb);
632
633 if (need_software_tstamp) {
634 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
635 struct timeval tv;
636 skb_get_timestamp(skb, &tv);
637 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
638 sizeof(tv), &tv);
639 } else {
640 struct timespec ts;
641 skb_get_timestampns(skb, &ts);
642 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
643 sizeof(ts), &ts);
644 }
645 }
646
647
648 memset(ts, 0, sizeof(ts));
649 if (skb->tstamp.tv64 &&
650 sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) {
651 skb_get_timestampns(skb, ts + 0);
652 empty = 0;
653 }
654 if (shhwtstamps) {
655 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
656 ktime2ts(shhwtstamps->syststamp, ts + 1))
657 empty = 0;
658 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
659 ktime2ts(shhwtstamps->hwtstamp, ts + 2))
660 empty = 0;
92f37fd2 661 }
20d49473
PO
662 if (!empty)
663 put_cmsg(msg, SOL_SOCKET,
664 SCM_TIMESTAMPING, sizeof(ts), &ts);
92f37fd2
ED
665}
666
7c81fd8b
ACM
667EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
668
3b885787
NH
669inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
670{
671 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
672 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
673 sizeof(__u32), &skb->dropcount);
674}
675
676void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
677 struct sk_buff *skb)
678{
679 sock_recv_timestamp(msg, sk, skb);
680 sock_recv_drops(msg, sk, skb);
681}
682EXPORT_SYMBOL_GPL(sock_recv_ts_and_drops);
683
a2e27255
ACM
684static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
685 struct msghdr *msg, size_t size, int flags)
1da177e4 686{
1da177e4
LT
687 struct sock_iocb *si = kiocb_to_siocb(iocb);
688
689 si->sock = sock;
690 si->scm = NULL;
691 si->msg = msg;
692 si->size = size;
693 si->flags = flags;
694
1da177e4
LT
695 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
696}
697
a2e27255
ACM
698static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
699 struct msghdr *msg, size_t size, int flags)
700{
701 int err = security_socket_recvmsg(sock, msg, size, flags);
702
703 return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
704}
705
89bddce5 706int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
707 size_t size, int flags)
708{
709 struct kiocb iocb;
710 struct sock_iocb siocb;
711 int ret;
712
89bddce5 713 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
714 iocb.private = &siocb;
715 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
716 if (-EIOCBQUEUED == ret)
717 ret = wait_on_sync_kiocb(&iocb);
718 return ret;
719}
720
a2e27255
ACM
721static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
722 size_t size, int flags)
723{
724 struct kiocb iocb;
725 struct sock_iocb siocb;
726 int ret;
727
728 init_sync_kiocb(&iocb, NULL);
729 iocb.private = &siocb;
730 ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
731 if (-EIOCBQUEUED == ret)
732 ret = wait_on_sync_kiocb(&iocb);
733 return ret;
734}
735
89bddce5
SH
736int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
737 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
738{
739 mm_segment_t oldfs = get_fs();
740 int result;
741
742 set_fs(KERNEL_DS);
743 /*
744 * the following is safe, since for compiler definitions of kvec and
745 * iovec are identical, yielding the same in-core layout and alignment
746 */
89bddce5 747 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
748 result = sock_recvmsg(sock, msg, size, flags);
749 set_fs(oldfs);
750 return result;
751}
752
753static void sock_aio_dtor(struct kiocb *iocb)
754{
755 kfree(iocb->private);
756}
757
ce1d4d3e
CH
758static ssize_t sock_sendpage(struct file *file, struct page *page,
759 int offset, size_t size, loff_t *ppos, int more)
1da177e4 760{
1da177e4
LT
761 struct socket *sock;
762 int flags;
763
ce1d4d3e
CH
764 sock = file->private_data;
765
766 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
767 if (more)
768 flags |= MSG_MORE;
769
e6949583 770 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 771}
1da177e4 772
9c55e01c
JA
773static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
774 struct pipe_inode_info *pipe, size_t len,
775 unsigned int flags)
776{
777 struct socket *sock = file->private_data;
778
997b37da
RDC
779 if (unlikely(!sock->ops->splice_read))
780 return -EINVAL;
781
9c55e01c
JA
782 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
783}
784
ce1d4d3e 785static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 786 struct sock_iocb *siocb)
ce1d4d3e
CH
787{
788 if (!is_sync_kiocb(iocb)) {
789 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
790 if (!siocb)
791 return NULL;
1da177e4
LT
792 iocb->ki_dtor = sock_aio_dtor;
793 }
1da177e4 794
ce1d4d3e 795 siocb->kiocb = iocb;
ce1d4d3e
CH
796 iocb->private = siocb;
797 return siocb;
1da177e4
LT
798}
799
ce1d4d3e 800static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
801 struct file *file, const struct iovec *iov,
802 unsigned long nr_segs)
ce1d4d3e
CH
803{
804 struct socket *sock = file->private_data;
805 size_t size = 0;
806 int i;
1da177e4 807
89bddce5
SH
808 for (i = 0; i < nr_segs; i++)
809 size += iov[i].iov_len;
1da177e4 810
ce1d4d3e
CH
811 msg->msg_name = NULL;
812 msg->msg_namelen = 0;
813 msg->msg_control = NULL;
814 msg->msg_controllen = 0;
89bddce5 815 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
816 msg->msg_iovlen = nr_segs;
817 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
818
819 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
820}
821
027445c3
BP
822static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
823 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
824{
825 struct sock_iocb siocb, *x;
826
1da177e4
LT
827 if (pos != 0)
828 return -ESPIPE;
027445c3
BP
829
830 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
831 return 0;
832
027445c3
BP
833
834 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
835 if (!x)
836 return -ENOMEM;
027445c3 837 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
838}
839
ce1d4d3e 840static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
841 struct file *file, const struct iovec *iov,
842 unsigned long nr_segs)
1da177e4 843{
ce1d4d3e
CH
844 struct socket *sock = file->private_data;
845 size_t size = 0;
846 int i;
1da177e4 847
89bddce5
SH
848 for (i = 0; i < nr_segs; i++)
849 size += iov[i].iov_len;
1da177e4 850
ce1d4d3e
CH
851 msg->msg_name = NULL;
852 msg->msg_namelen = 0;
853 msg->msg_control = NULL;
854 msg->msg_controllen = 0;
89bddce5 855 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
856 msg->msg_iovlen = nr_segs;
857 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
858 if (sock->type == SOCK_SEQPACKET)
859 msg->msg_flags |= MSG_EOR;
1da177e4 860
ce1d4d3e 861 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
862}
863
027445c3
BP
864static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
865 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
866{
867 struct sock_iocb siocb, *x;
1da177e4 868
ce1d4d3e
CH
869 if (pos != 0)
870 return -ESPIPE;
027445c3 871
027445c3 872 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
873 if (!x)
874 return -ENOMEM;
1da177e4 875
027445c3 876 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
877}
878
1da177e4
LT
879/*
880 * Atomic setting of ioctl hooks to avoid race
881 * with module unload.
882 */
883
4a3e2f71 884static DEFINE_MUTEX(br_ioctl_mutex);
881d966b 885static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL;
1da177e4 886
881d966b 887void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 888{
4a3e2f71 889 mutex_lock(&br_ioctl_mutex);
1da177e4 890 br_ioctl_hook = hook;
4a3e2f71 891 mutex_unlock(&br_ioctl_mutex);
1da177e4 892}
89bddce5 893
1da177e4
LT
894EXPORT_SYMBOL(brioctl_set);
895
4a3e2f71 896static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 897static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 898
881d966b 899void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 900{
4a3e2f71 901 mutex_lock(&vlan_ioctl_mutex);
1da177e4 902 vlan_ioctl_hook = hook;
4a3e2f71 903 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 904}
89bddce5 905
1da177e4
LT
906EXPORT_SYMBOL(vlan_ioctl_set);
907
4a3e2f71 908static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 909static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 910
89bddce5 911void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 912{
4a3e2f71 913 mutex_lock(&dlci_ioctl_mutex);
1da177e4 914 dlci_ioctl_hook = hook;
4a3e2f71 915 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 916}
89bddce5 917
1da177e4
LT
918EXPORT_SYMBOL(dlci_ioctl_set);
919
6b96018b
AB
920static long sock_do_ioctl(struct net *net, struct socket *sock,
921 unsigned int cmd, unsigned long arg)
922{
923 int err;
924 void __user *argp = (void __user *)arg;
925
926 err = sock->ops->ioctl(sock, cmd, arg);
927
928 /*
929 * If this ioctl is unknown try to hand it down
930 * to the NIC driver.
931 */
932 if (err == -ENOIOCTLCMD)
933 err = dev_ioctl(net, cmd, argp);
934
935 return err;
936}
937
1da177e4
LT
938/*
939 * With an ioctl, arg may well be a user mode pointer, but we don't know
940 * what to do with it - that's up to the protocol still.
941 */
942
943static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
944{
945 struct socket *sock;
881d966b 946 struct sock *sk;
1da177e4
LT
947 void __user *argp = (void __user *)arg;
948 int pid, err;
881d966b 949 struct net *net;
1da177e4 950
b69aee04 951 sock = file->private_data;
881d966b 952 sk = sock->sk;
3b1e0a65 953 net = sock_net(sk);
1da177e4 954 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 955 err = dev_ioctl(net, cmd, argp);
1da177e4 956 } else
3d23e349 957#ifdef CONFIG_WEXT_CORE
1da177e4 958 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 959 err = dev_ioctl(net, cmd, argp);
1da177e4 960 } else
3d23e349 961#endif
89bddce5 962 switch (cmd) {
1da177e4
LT
963 case FIOSETOWN:
964 case SIOCSPGRP:
965 err = -EFAULT;
966 if (get_user(pid, (int __user *)argp))
967 break;
968 err = f_setown(sock->file, pid, 1);
969 break;
970 case FIOGETOWN:
971 case SIOCGPGRP:
609d7fa9 972 err = put_user(f_getown(sock->file),
89bddce5 973 (int __user *)argp);
1da177e4
LT
974 break;
975 case SIOCGIFBR:
976 case SIOCSIFBR:
977 case SIOCBRADDBR:
978 case SIOCBRDELBR:
979 err = -ENOPKG;
980 if (!br_ioctl_hook)
981 request_module("bridge");
982
4a3e2f71 983 mutex_lock(&br_ioctl_mutex);
89bddce5 984 if (br_ioctl_hook)
881d966b 985 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 986 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
987 break;
988 case SIOCGIFVLAN:
989 case SIOCSIFVLAN:
990 err = -ENOPKG;
991 if (!vlan_ioctl_hook)
992 request_module("8021q");
993
4a3e2f71 994 mutex_lock(&vlan_ioctl_mutex);
1da177e4 995 if (vlan_ioctl_hook)
881d966b 996 err = vlan_ioctl_hook(net, argp);
4a3e2f71 997 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 998 break;
1da177e4
LT
999 case SIOCADDDLCI:
1000 case SIOCDELDLCI:
1001 err = -ENOPKG;
1002 if (!dlci_ioctl_hook)
1003 request_module("dlci");
1004
7512cbf6
PE
1005 mutex_lock(&dlci_ioctl_mutex);
1006 if (dlci_ioctl_hook)
1da177e4 1007 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1008 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1009 break;
1010 default:
6b96018b 1011 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1012 break;
89bddce5 1013 }
1da177e4
LT
1014 return err;
1015}
1016
1017int sock_create_lite(int family, int type, int protocol, struct socket **res)
1018{
1019 int err;
1020 struct socket *sock = NULL;
89bddce5 1021
1da177e4
LT
1022 err = security_socket_create(family, type, protocol, 1);
1023 if (err)
1024 goto out;
1025
1026 sock = sock_alloc();
1027 if (!sock) {
1028 err = -ENOMEM;
1029 goto out;
1030 }
1031
1da177e4 1032 sock->type = type;
7420ed23
VY
1033 err = security_socket_post_create(sock, family, type, protocol, 1);
1034 if (err)
1035 goto out_release;
1036
1da177e4
LT
1037out:
1038 *res = sock;
1039 return err;
7420ed23
VY
1040out_release:
1041 sock_release(sock);
1042 sock = NULL;
1043 goto out;
1da177e4
LT
1044}
1045
1046/* No kernel lock held - perfect */
89bddce5 1047static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
1048{
1049 struct socket *sock;
1050
1051 /*
89bddce5 1052 * We can't return errors to poll, so it's either yes or no.
1da177e4 1053 */
b69aee04 1054 sock = file->private_data;
1da177e4
LT
1055 return sock->ops->poll(file, sock, wait);
1056}
1057
89bddce5 1058static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1059{
b69aee04 1060 struct socket *sock = file->private_data;
1da177e4
LT
1061
1062 return sock->ops->mmap(file, sock, vma);
1063}
1064
20380731 1065static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
1066{
1067 /*
89bddce5
SH
1068 * It was possible the inode is NULL we were
1069 * closing an unfinished socket.
1da177e4
LT
1070 */
1071
89bddce5 1072 if (!inode) {
1da177e4
LT
1073 printk(KERN_DEBUG "sock_close: NULL inode\n");
1074 return 0;
1075 }
1da177e4
LT
1076 sock_release(SOCKET_I(inode));
1077 return 0;
1078}
1079
1080/*
1081 * Update the socket async list
1082 *
1083 * Fasync_list locking strategy.
1084 *
1085 * 1. fasync_list is modified only under process context socket lock
1086 * i.e. under semaphore.
1087 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
1088 * or under socket lock.
1089 * 3. fasync_list can be used from softirq context, so that
1090 * modification under socket lock have to be enhanced with
1091 * write_lock_bh(&sk->sk_callback_lock).
1092 * --ANK (990710)
1093 */
1094
1095static int sock_fasync(int fd, struct file *filp, int on)
1096{
89bddce5 1097 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
1098 struct socket *sock;
1099 struct sock *sk;
1100
89bddce5 1101 if (on) {
8b3a7005 1102 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 1103 if (fna == NULL)
1da177e4
LT
1104 return -ENOMEM;
1105 }
1106
b69aee04 1107 sock = filp->private_data;
1da177e4 1108
89bddce5
SH
1109 sk = sock->sk;
1110 if (sk == NULL) {
1da177e4
LT
1111 kfree(fna);
1112 return -EINVAL;
1113 }
1114
1115 lock_sock(sk);
1116
76398425
JC
1117 spin_lock(&filp->f_lock);
1118 if (on)
1119 filp->f_flags |= FASYNC;
1120 else
1121 filp->f_flags &= ~FASYNC;
1122 spin_unlock(&filp->f_lock);
1123
89bddce5 1124 prev = &(sock->fasync_list);
1da177e4 1125
89bddce5
SH
1126 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1127 if (fa->fa_file == filp)
1da177e4
LT
1128 break;
1129
89bddce5
SH
1130 if (on) {
1131 if (fa != NULL) {
1da177e4 1132 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1133 fa->fa_fd = fd;
1da177e4
LT
1134 write_unlock_bh(&sk->sk_callback_lock);
1135
1136 kfree(fna);
1137 goto out;
1138 }
89bddce5
SH
1139 fna->fa_file = filp;
1140 fna->fa_fd = fd;
1141 fna->magic = FASYNC_MAGIC;
1142 fna->fa_next = sock->fasync_list;
1da177e4 1143 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1144 sock->fasync_list = fna;
bcdce719 1145 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1146 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1147 } else {
1148 if (fa != NULL) {
1da177e4 1149 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1150 *prev = fa->fa_next;
bcdce719
ED
1151 if (!sock->fasync_list)
1152 sock_reset_flag(sk, SOCK_FASYNC);
1da177e4
LT
1153 write_unlock_bh(&sk->sk_callback_lock);
1154 kfree(fa);
1155 }
1156 }
1157
1158out:
1159 release_sock(sock->sk);
1160 return 0;
1161}
1162
1163/* This function may be called only under socket lock or callback_lock */
1164
1165int sock_wake_async(struct socket *sock, int how, int band)
1166{
1167 if (!sock || !sock->fasync_list)
1168 return -1;
89bddce5 1169 switch (how) {
8d8ad9d7 1170 case SOCK_WAKE_WAITD:
1da177e4
LT
1171 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1172 break;
1173 goto call_kill;
8d8ad9d7 1174 case SOCK_WAKE_SPACE:
1da177e4
LT
1175 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1176 break;
1177 /* fall through */
8d8ad9d7 1178 case SOCK_WAKE_IO:
89bddce5 1179call_kill:
1da177e4
LT
1180 __kill_fasync(sock->fasync_list, SIGIO, band);
1181 break;
8d8ad9d7 1182 case SOCK_WAKE_URG:
1da177e4
LT
1183 __kill_fasync(sock->fasync_list, SIGURG, band);
1184 }
1185 return 0;
1186}
1187
1b8d7ae4 1188static int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1189 struct socket **res, int kern)
1da177e4
LT
1190{
1191 int err;
1192 struct socket *sock;
55737fda 1193 const struct net_proto_family *pf;
1da177e4
LT
1194
1195 /*
89bddce5 1196 * Check protocol is in range
1da177e4
LT
1197 */
1198 if (family < 0 || family >= NPROTO)
1199 return -EAFNOSUPPORT;
1200 if (type < 0 || type >= SOCK_MAX)
1201 return -EINVAL;
1202
1203 /* Compatibility.
1204
1205 This uglymoron is moved from INET layer to here to avoid
1206 deadlock in module load.
1207 */
1208 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1209 static int warned;
1da177e4
LT
1210 if (!warned) {
1211 warned = 1;
89bddce5
SH
1212 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1213 current->comm);
1da177e4
LT
1214 }
1215 family = PF_PACKET;
1216 }
1217
1218 err = security_socket_create(family, type, protocol, kern);
1219 if (err)
1220 return err;
89bddce5 1221
55737fda
SH
1222 /*
1223 * Allocate the socket and allow the family to set things up. if
1224 * the protocol is 0, the family is instructed to select an appropriate
1225 * default.
1226 */
1227 sock = sock_alloc();
1228 if (!sock) {
1229 if (net_ratelimit())
1230 printk(KERN_WARNING "socket: no more sockets\n");
1231 return -ENFILE; /* Not exactly a match, but its the
1232 closest posix thing */
1233 }
1234
1235 sock->type = type;
1236
95a5afca 1237#ifdef CONFIG_MODULES
89bddce5
SH
1238 /* Attempt to load a protocol module if the find failed.
1239 *
1240 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1241 * requested real, full-featured networking support upon configuration.
1242 * Otherwise module support will break!
1243 */
55737fda 1244 if (net_families[family] == NULL)
89bddce5 1245 request_module("net-pf-%d", family);
1da177e4
LT
1246#endif
1247
55737fda
SH
1248 rcu_read_lock();
1249 pf = rcu_dereference(net_families[family]);
1250 err = -EAFNOSUPPORT;
1251 if (!pf)
1252 goto out_release;
1da177e4
LT
1253
1254 /*
1255 * We will call the ->create function, that possibly is in a loadable
1256 * module, so we have to bump that loadable module refcnt first.
1257 */
55737fda 1258 if (!try_module_get(pf->owner))
1da177e4
LT
1259 goto out_release;
1260
55737fda
SH
1261 /* Now protected by module ref count */
1262 rcu_read_unlock();
1263
3f378b68 1264 err = pf->create(net, sock, protocol, kern);
55737fda 1265 if (err < 0)
1da177e4 1266 goto out_module_put;
a79af59e 1267
1da177e4
LT
1268 /*
1269 * Now to bump the refcnt of the [loadable] module that owns this
1270 * socket at sock_release time we decrement its refcnt.
1271 */
55737fda
SH
1272 if (!try_module_get(sock->ops->owner))
1273 goto out_module_busy;
1274
1da177e4
LT
1275 /*
1276 * Now that we're done with the ->create function, the [loadable]
1277 * module can have its refcnt decremented
1278 */
55737fda 1279 module_put(pf->owner);
7420ed23
VY
1280 err = security_socket_post_create(sock, family, type, protocol, kern);
1281 if (err)
3b185525 1282 goto out_sock_release;
55737fda 1283 *res = sock;
1da177e4 1284
55737fda
SH
1285 return 0;
1286
1287out_module_busy:
1288 err = -EAFNOSUPPORT;
1da177e4 1289out_module_put:
55737fda
SH
1290 sock->ops = NULL;
1291 module_put(pf->owner);
1292out_sock_release:
1da177e4 1293 sock_release(sock);
55737fda
SH
1294 return err;
1295
1296out_release:
1297 rcu_read_unlock();
1298 goto out_sock_release;
1da177e4
LT
1299}
1300
1301int sock_create(int family, int type, int protocol, struct socket **res)
1302{
1b8d7ae4 1303 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4
LT
1304}
1305
1306int sock_create_kern(int family, int type, int protocol, struct socket **res)
1307{
1b8d7ae4 1308 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4
LT
1309}
1310
3e0fa65f 1311SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1312{
1313 int retval;
1314 struct socket *sock;
a677a039
UD
1315 int flags;
1316
e38b36f3
UD
1317 /* Check the SOCK_* constants for consistency. */
1318 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1319 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1320 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1321 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1322
a677a039 1323 flags = type & ~SOCK_TYPE_MASK;
77d27200 1324 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1325 return -EINVAL;
1326 type &= SOCK_TYPE_MASK;
1da177e4 1327
aaca0bdc
UD
1328 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1329 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1330
1da177e4
LT
1331 retval = sock_create(family, type, protocol, &sock);
1332 if (retval < 0)
1333 goto out;
1334
77d27200 1335 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1336 if (retval < 0)
1337 goto out_release;
1338
1339out:
1340 /* It may be already another descriptor 8) Not kernel problem. */
1341 return retval;
1342
1343out_release:
1344 sock_release(sock);
1345 return retval;
1346}
1347
1348/*
1349 * Create a pair of connected sockets.
1350 */
1351
3e0fa65f
HC
1352SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1353 int __user *, usockvec)
1da177e4
LT
1354{
1355 struct socket *sock1, *sock2;
1356 int fd1, fd2, err;
db349509 1357 struct file *newfile1, *newfile2;
a677a039
UD
1358 int flags;
1359
1360 flags = type & ~SOCK_TYPE_MASK;
77d27200 1361 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1362 return -EINVAL;
1363 type &= SOCK_TYPE_MASK;
1da177e4 1364
aaca0bdc
UD
1365 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1366 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1367
1da177e4
LT
1368 /*
1369 * Obtain the first socket and check if the underlying protocol
1370 * supports the socketpair call.
1371 */
1372
1373 err = sock_create(family, type, protocol, &sock1);
1374 if (err < 0)
1375 goto out;
1376
1377 err = sock_create(family, type, protocol, &sock2);
1378 if (err < 0)
1379 goto out_release_1;
1380
1381 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1382 if (err < 0)
1da177e4
LT
1383 goto out_release_both;
1384
7cbe66b6 1385 fd1 = sock_alloc_file(sock1, &newfile1, flags);
bf3c23d1
DM
1386 if (unlikely(fd1 < 0)) {
1387 err = fd1;
db349509 1388 goto out_release_both;
bf3c23d1 1389 }
1da177e4 1390
7cbe66b6 1391 fd2 = sock_alloc_file(sock2, &newfile2, flags);
198de4d7
AV
1392 if (unlikely(fd2 < 0)) {
1393 err = fd2;
1394 fput(newfile1);
1395 put_unused_fd(fd1);
1396 sock_release(sock2);
1397 goto out;
db349509
AV
1398 }
1399
157cf649 1400 audit_fd_pair(fd1, fd2);
db349509
AV
1401 fd_install(fd1, newfile1);
1402 fd_install(fd2, newfile2);
1da177e4
LT
1403 /* fd1 and fd2 may be already another descriptors.
1404 * Not kernel problem.
1405 */
1406
89bddce5 1407 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1408 if (!err)
1409 err = put_user(fd2, &usockvec[1]);
1410 if (!err)
1411 return 0;
1412
1413 sys_close(fd2);
1414 sys_close(fd1);
1415 return err;
1416
1da177e4 1417out_release_both:
89bddce5 1418 sock_release(sock2);
1da177e4 1419out_release_1:
89bddce5 1420 sock_release(sock1);
1da177e4
LT
1421out:
1422 return err;
1423}
1424
1da177e4
LT
1425/*
1426 * Bind a name to a socket. Nothing much to do here since it's
1427 * the protocol's responsibility to handle the local address.
1428 *
1429 * We move the socket address to kernel space before we call
1430 * the protocol layer (having also checked the address is ok).
1431 */
1432
20f37034 1433SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1434{
1435 struct socket *sock;
230b1839 1436 struct sockaddr_storage address;
6cb153ca 1437 int err, fput_needed;
1da177e4 1438
89bddce5 1439 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1440 if (sock) {
230b1839 1441 err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address);
89bddce5
SH
1442 if (err >= 0) {
1443 err = security_socket_bind(sock,
230b1839 1444 (struct sockaddr *)&address,
89bddce5 1445 addrlen);
6cb153ca
BL
1446 if (!err)
1447 err = sock->ops->bind(sock,
89bddce5 1448 (struct sockaddr *)
230b1839 1449 &address, addrlen);
1da177e4 1450 }
6cb153ca 1451 fput_light(sock->file, fput_needed);
89bddce5 1452 }
1da177e4
LT
1453 return err;
1454}
1455
1da177e4
LT
1456/*
1457 * Perform a listen. Basically, we allow the protocol to do anything
1458 * necessary for a listen, and if that works, we mark the socket as
1459 * ready for listening.
1460 */
1461
3e0fa65f 1462SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1463{
1464 struct socket *sock;
6cb153ca 1465 int err, fput_needed;
b8e1f9b5 1466 int somaxconn;
89bddce5
SH
1467
1468 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1469 if (sock) {
8efa6e93 1470 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
b8e1f9b5
PE
1471 if ((unsigned)backlog > somaxconn)
1472 backlog = somaxconn;
1da177e4
LT
1473
1474 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1475 if (!err)
1476 err = sock->ops->listen(sock, backlog);
1da177e4 1477
6cb153ca 1478 fput_light(sock->file, fput_needed);
1da177e4
LT
1479 }
1480 return err;
1481}
1482
1da177e4
LT
1483/*
1484 * For accept, we attempt to create a new socket, set up the link
1485 * with the client, wake up the client, then return the new
1486 * connected fd. We collect the address of the connector in kernel
1487 * space and move it to user at the very end. This is unclean because
1488 * we open the socket then return an error.
1489 *
1490 * 1003.1g adds the ability to recvmsg() to query connection pending
1491 * status to recvmsg. We need to add that support in a way thats
1492 * clean when we restucture accept also.
1493 */
1494
20f37034
HC
1495SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1496 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1497{
1498 struct socket *sock, *newsock;
39d8c1b6 1499 struct file *newfile;
6cb153ca 1500 int err, len, newfd, fput_needed;
230b1839 1501 struct sockaddr_storage address;
1da177e4 1502
77d27200 1503 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1504 return -EINVAL;
1505
1506 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1507 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1508
6cb153ca 1509 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1510 if (!sock)
1511 goto out;
1512
1513 err = -ENFILE;
89bddce5 1514 if (!(newsock = sock_alloc()))
1da177e4
LT
1515 goto out_put;
1516
1517 newsock->type = sock->type;
1518 newsock->ops = sock->ops;
1519
1da177e4
LT
1520 /*
1521 * We don't need try_module_get here, as the listening socket (sock)
1522 * has the protocol module (sock->ops->owner) held.
1523 */
1524 __module_get(newsock->ops->owner);
1525
7cbe66b6 1526 newfd = sock_alloc_file(newsock, &newfile, flags);
39d8c1b6
DM
1527 if (unlikely(newfd < 0)) {
1528 err = newfd;
9a1875e6
DM
1529 sock_release(newsock);
1530 goto out_put;
39d8c1b6
DM
1531 }
1532
a79af59e
FF
1533 err = security_socket_accept(sock, newsock);
1534 if (err)
39d8c1b6 1535 goto out_fd;
a79af59e 1536
1da177e4
LT
1537 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1538 if (err < 0)
39d8c1b6 1539 goto out_fd;
1da177e4
LT
1540
1541 if (upeer_sockaddr) {
230b1839 1542 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1543 &len, 2) < 0) {
1da177e4 1544 err = -ECONNABORTED;
39d8c1b6 1545 goto out_fd;
1da177e4 1546 }
230b1839
YH
1547 err = move_addr_to_user((struct sockaddr *)&address,
1548 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1549 if (err < 0)
39d8c1b6 1550 goto out_fd;
1da177e4
LT
1551 }
1552
1553 /* File flags are not inherited via accept() unlike another OSes. */
1554
39d8c1b6
DM
1555 fd_install(newfd, newfile);
1556 err = newfd;
1da177e4 1557
1da177e4 1558out_put:
6cb153ca 1559 fput_light(sock->file, fput_needed);
1da177e4
LT
1560out:
1561 return err;
39d8c1b6 1562out_fd:
9606a216 1563 fput(newfile);
39d8c1b6 1564 put_unused_fd(newfd);
1da177e4
LT
1565 goto out_put;
1566}
1567
20f37034
HC
1568SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1569 int __user *, upeer_addrlen)
aaca0bdc 1570{
de11defe 1571 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1572}
1573
1da177e4
LT
1574/*
1575 * Attempt to connect to a socket with the server address. The address
1576 * is in user space so we verify it is OK and move it to kernel space.
1577 *
1578 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1579 * break bindings
1580 *
1581 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1582 * other SEQPACKET protocols that take time to connect() as it doesn't
1583 * include the -EINPROGRESS status for such sockets.
1584 */
1585
20f37034
HC
1586SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1587 int, addrlen)
1da177e4
LT
1588{
1589 struct socket *sock;
230b1839 1590 struct sockaddr_storage address;
6cb153ca 1591 int err, fput_needed;
1da177e4 1592
6cb153ca 1593 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1594 if (!sock)
1595 goto out;
230b1839 1596 err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address);
1da177e4
LT
1597 if (err < 0)
1598 goto out_put;
1599
89bddce5 1600 err =
230b1839 1601 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1602 if (err)
1603 goto out_put;
1604
230b1839 1605 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1606 sock->file->f_flags);
1607out_put:
6cb153ca 1608 fput_light(sock->file, fput_needed);
1da177e4
LT
1609out:
1610 return err;
1611}
1612
1613/*
1614 * Get the local address ('name') of a socket object. Move the obtained
1615 * name to user space.
1616 */
1617
20f37034
HC
1618SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1619 int __user *, usockaddr_len)
1da177e4
LT
1620{
1621 struct socket *sock;
230b1839 1622 struct sockaddr_storage address;
6cb153ca 1623 int len, err, fput_needed;
89bddce5 1624
6cb153ca 1625 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1626 if (!sock)
1627 goto out;
1628
1629 err = security_socket_getsockname(sock);
1630 if (err)
1631 goto out_put;
1632
230b1839 1633 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1634 if (err)
1635 goto out_put;
230b1839 1636 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1637
1638out_put:
6cb153ca 1639 fput_light(sock->file, fput_needed);
1da177e4
LT
1640out:
1641 return err;
1642}
1643
1644/*
1645 * Get the remote address ('name') of a socket object. Move the obtained
1646 * name to user space.
1647 */
1648
20f37034
HC
1649SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1650 int __user *, usockaddr_len)
1da177e4
LT
1651{
1652 struct socket *sock;
230b1839 1653 struct sockaddr_storage address;
6cb153ca 1654 int len, err, fput_needed;
1da177e4 1655
89bddce5
SH
1656 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1657 if (sock != NULL) {
1da177e4
LT
1658 err = security_socket_getpeername(sock);
1659 if (err) {
6cb153ca 1660 fput_light(sock->file, fput_needed);
1da177e4
LT
1661 return err;
1662 }
1663
89bddce5 1664 err =
230b1839 1665 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1666 1);
1da177e4 1667 if (!err)
230b1839 1668 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr,
89bddce5 1669 usockaddr_len);
6cb153ca 1670 fput_light(sock->file, fput_needed);
1da177e4
LT
1671 }
1672 return err;
1673}
1674
1675/*
1676 * Send a datagram to a given address. We move the address into kernel
1677 * space and check the user space data area is readable before invoking
1678 * the protocol.
1679 */
1680
3e0fa65f
HC
1681SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1682 unsigned, flags, struct sockaddr __user *, addr,
1683 int, addr_len)
1da177e4
LT
1684{
1685 struct socket *sock;
230b1839 1686 struct sockaddr_storage address;
1da177e4
LT
1687 int err;
1688 struct msghdr msg;
1689 struct iovec iov;
6cb153ca 1690 int fput_needed;
6cb153ca 1691
de0fa95c
PE
1692 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1693 if (!sock)
4387ff75 1694 goto out;
6cb153ca 1695
89bddce5
SH
1696 iov.iov_base = buff;
1697 iov.iov_len = len;
1698 msg.msg_name = NULL;
1699 msg.msg_iov = &iov;
1700 msg.msg_iovlen = 1;
1701 msg.msg_control = NULL;
1702 msg.msg_controllen = 0;
1703 msg.msg_namelen = 0;
6cb153ca 1704 if (addr) {
230b1839 1705 err = move_addr_to_kernel(addr, addr_len, (struct sockaddr *)&address);
1da177e4
LT
1706 if (err < 0)
1707 goto out_put;
230b1839 1708 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1709 msg.msg_namelen = addr_len;
1da177e4
LT
1710 }
1711 if (sock->file->f_flags & O_NONBLOCK)
1712 flags |= MSG_DONTWAIT;
1713 msg.msg_flags = flags;
1714 err = sock_sendmsg(sock, &msg, len);
1715
89bddce5 1716out_put:
de0fa95c 1717 fput_light(sock->file, fput_needed);
4387ff75 1718out:
1da177e4
LT
1719 return err;
1720}
1721
1722/*
89bddce5 1723 * Send a datagram down a socket.
1da177e4
LT
1724 */
1725
3e0fa65f
HC
1726SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
1727 unsigned, flags)
1da177e4
LT
1728{
1729 return sys_sendto(fd, buff, len, flags, NULL, 0);
1730}
1731
1732/*
89bddce5 1733 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1734 * sender. We verify the buffers are writable and if needed move the
1735 * sender address from kernel to user space.
1736 */
1737
3e0fa65f
HC
1738SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
1739 unsigned, flags, struct sockaddr __user *, addr,
1740 int __user *, addr_len)
1da177e4
LT
1741{
1742 struct socket *sock;
1743 struct iovec iov;
1744 struct msghdr msg;
230b1839 1745 struct sockaddr_storage address;
89bddce5 1746 int err, err2;
6cb153ca
BL
1747 int fput_needed;
1748
de0fa95c 1749 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1750 if (!sock)
de0fa95c 1751 goto out;
1da177e4 1752
89bddce5
SH
1753 msg.msg_control = NULL;
1754 msg.msg_controllen = 0;
1755 msg.msg_iovlen = 1;
1756 msg.msg_iov = &iov;
1757 iov.iov_len = size;
1758 iov.iov_base = ubuf;
230b1839
YH
1759 msg.msg_name = (struct sockaddr *)&address;
1760 msg.msg_namelen = sizeof(address);
1da177e4
LT
1761 if (sock->file->f_flags & O_NONBLOCK)
1762 flags |= MSG_DONTWAIT;
89bddce5 1763 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1764
89bddce5 1765 if (err >= 0 && addr != NULL) {
230b1839
YH
1766 err2 = move_addr_to_user((struct sockaddr *)&address,
1767 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1768 if (err2 < 0)
1769 err = err2;
1da177e4 1770 }
de0fa95c
PE
1771
1772 fput_light(sock->file, fput_needed);
4387ff75 1773out:
1da177e4
LT
1774 return err;
1775}
1776
1777/*
89bddce5 1778 * Receive a datagram from a socket.
1da177e4
LT
1779 */
1780
89bddce5
SH
1781asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1782 unsigned flags)
1da177e4
LT
1783{
1784 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1785}
1786
1787/*
1788 * Set a socket option. Because we don't know the option lengths we have
1789 * to pass the user mode parameter for the protocols to sort out.
1790 */
1791
20f37034
HC
1792SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1793 char __user *, optval, int, optlen)
1da177e4 1794{
6cb153ca 1795 int err, fput_needed;
1da177e4
LT
1796 struct socket *sock;
1797
1798 if (optlen < 0)
1799 return -EINVAL;
89bddce5
SH
1800
1801 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1802 if (sock != NULL) {
1803 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1804 if (err)
1805 goto out_put;
1da177e4
LT
1806
1807 if (level == SOL_SOCKET)
89bddce5
SH
1808 err =
1809 sock_setsockopt(sock, level, optname, optval,
1810 optlen);
1da177e4 1811 else
89bddce5
SH
1812 err =
1813 sock->ops->setsockopt(sock, level, optname, optval,
1814 optlen);
6cb153ca
BL
1815out_put:
1816 fput_light(sock->file, fput_needed);
1da177e4
LT
1817 }
1818 return err;
1819}
1820
1821/*
1822 * Get a socket option. Because we don't know the option lengths we have
1823 * to pass a user mode parameter for the protocols to sort out.
1824 */
1825
20f37034
HC
1826SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1827 char __user *, optval, int __user *, optlen)
1da177e4 1828{
6cb153ca 1829 int err, fput_needed;
1da177e4
LT
1830 struct socket *sock;
1831
89bddce5
SH
1832 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1833 if (sock != NULL) {
6cb153ca
BL
1834 err = security_socket_getsockopt(sock, level, optname);
1835 if (err)
1836 goto out_put;
1da177e4
LT
1837
1838 if (level == SOL_SOCKET)
89bddce5
SH
1839 err =
1840 sock_getsockopt(sock, level, optname, optval,
1841 optlen);
1da177e4 1842 else
89bddce5
SH
1843 err =
1844 sock->ops->getsockopt(sock, level, optname, optval,
1845 optlen);
6cb153ca
BL
1846out_put:
1847 fput_light(sock->file, fput_needed);
1da177e4
LT
1848 }
1849 return err;
1850}
1851
1da177e4
LT
1852/*
1853 * Shutdown a socket.
1854 */
1855
754fe8d2 1856SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1857{
6cb153ca 1858 int err, fput_needed;
1da177e4
LT
1859 struct socket *sock;
1860
89bddce5
SH
1861 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1862 if (sock != NULL) {
1da177e4 1863 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1864 if (!err)
1865 err = sock->ops->shutdown(sock, how);
1866 fput_light(sock->file, fput_needed);
1da177e4
LT
1867 }
1868 return err;
1869}
1870
89bddce5 1871/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1872 * fields which are the same type (int / unsigned) on our platforms.
1873 */
1874#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1875#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1876#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1877
1da177e4
LT
1878/*
1879 * BSD sendmsg interface
1880 */
1881
3e0fa65f 1882SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
1da177e4 1883{
89bddce5
SH
1884 struct compat_msghdr __user *msg_compat =
1885 (struct compat_msghdr __user *)msg;
1da177e4 1886 struct socket *sock;
230b1839 1887 struct sockaddr_storage address;
1da177e4 1888 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1889 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1890 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1891 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1892 unsigned char *ctl_buf = ctl;
1893 struct msghdr msg_sys;
1894 int err, ctl_len, iov_size, total_len;
6cb153ca 1895 int fput_needed;
89bddce5 1896
1da177e4
LT
1897 err = -EFAULT;
1898 if (MSG_CMSG_COMPAT & flags) {
1899 if (get_compat_msghdr(&msg_sys, msg_compat))
1900 return -EFAULT;
89bddce5
SH
1901 }
1902 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1903 return -EFAULT;
1904
6cb153ca 1905 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1906 if (!sock)
1da177e4
LT
1907 goto out;
1908
1909 /* do not move before msg_sys is valid */
1910 err = -EMSGSIZE;
1911 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1912 goto out_put;
1913
89bddce5 1914 /* Check whether to allocate the iovec area */
1da177e4
LT
1915 err = -ENOMEM;
1916 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1917 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1918 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1919 if (!iov)
1920 goto out_put;
1921 }
1922
1923 /* This will also move the address data into kernel space */
1924 if (MSG_CMSG_COMPAT & flags) {
230b1839
YH
1925 err = verify_compat_iovec(&msg_sys, iov,
1926 (struct sockaddr *)&address,
1927 VERIFY_READ);
1da177e4 1928 } else
230b1839
YH
1929 err = verify_iovec(&msg_sys, iov,
1930 (struct sockaddr *)&address,
1931 VERIFY_READ);
89bddce5 1932 if (err < 0)
1da177e4
LT
1933 goto out_freeiov;
1934 total_len = err;
1935
1936 err = -ENOBUFS;
1937
1938 if (msg_sys.msg_controllen > INT_MAX)
1939 goto out_freeiov;
89bddce5 1940 ctl_len = msg_sys.msg_controllen;
1da177e4 1941 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1942 err =
1943 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1944 sizeof(ctl));
1da177e4
LT
1945 if (err)
1946 goto out_freeiov;
1947 ctl_buf = msg_sys.msg_control;
8920e8f9 1948 ctl_len = msg_sys.msg_controllen;
1da177e4 1949 } else if (ctl_len) {
89bddce5 1950 if (ctl_len > sizeof(ctl)) {
1da177e4 1951 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1952 if (ctl_buf == NULL)
1da177e4
LT
1953 goto out_freeiov;
1954 }
1955 err = -EFAULT;
1956 /*
1957 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1958 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1959 * checking falls down on this.
1960 */
89bddce5
SH
1961 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1962 ctl_len))
1da177e4
LT
1963 goto out_freectl;
1964 msg_sys.msg_control = ctl_buf;
1965 }
1966 msg_sys.msg_flags = flags;
1967
1968 if (sock->file->f_flags & O_NONBLOCK)
1969 msg_sys.msg_flags |= MSG_DONTWAIT;
1970 err = sock_sendmsg(sock, &msg_sys, total_len);
1971
1972out_freectl:
89bddce5 1973 if (ctl_buf != ctl)
1da177e4
LT
1974 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1975out_freeiov:
1976 if (iov != iovstack)
1977 sock_kfree_s(sock->sk, iov, iov_size);
1978out_put:
6cb153ca 1979 fput_light(sock->file, fput_needed);
89bddce5 1980out:
1da177e4
LT
1981 return err;
1982}
1983
a2e27255
ACM
1984static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
1985 struct msghdr *msg_sys, unsigned flags, int nosec)
1da177e4 1986{
89bddce5
SH
1987 struct compat_msghdr __user *msg_compat =
1988 (struct compat_msghdr __user *)msg;
1da177e4 1989 struct iovec iovstack[UIO_FASTIOV];
89bddce5 1990 struct iovec *iov = iovstack;
1da177e4
LT
1991 unsigned long cmsg_ptr;
1992 int err, iov_size, total_len, len;
1993
1994 /* kernel mode address */
230b1839 1995 struct sockaddr_storage addr;
1da177e4
LT
1996
1997 /* user mode address pointers */
1998 struct sockaddr __user *uaddr;
1999 int __user *uaddr_len;
89bddce5 2000
1da177e4 2001 if (MSG_CMSG_COMPAT & flags) {
a2e27255 2002 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 2003 return -EFAULT;
89bddce5 2004 }
a2e27255 2005 else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
89bddce5 2006 return -EFAULT;
1da177e4 2007
1da177e4 2008 err = -EMSGSIZE;
a2e27255
ACM
2009 if (msg_sys->msg_iovlen > UIO_MAXIOV)
2010 goto out;
89bddce5
SH
2011
2012 /* Check whether to allocate the iovec area */
1da177e4 2013 err = -ENOMEM;
a2e27255
ACM
2014 iov_size = msg_sys->msg_iovlen * sizeof(struct iovec);
2015 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
1da177e4
LT
2016 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
2017 if (!iov)
a2e27255 2018 goto out;
1da177e4
LT
2019 }
2020
2021 /*
89bddce5
SH
2022 * Save the user-mode address (verify_iovec will change the
2023 * kernel msghdr to use the kernel address space)
1da177e4 2024 */
89bddce5 2025
a2e27255 2026 uaddr = (__force void __user *)msg_sys->msg_name;
1da177e4
LT
2027 uaddr_len = COMPAT_NAMELEN(msg);
2028 if (MSG_CMSG_COMPAT & flags) {
a2e27255 2029 err = verify_compat_iovec(msg_sys, iov,
230b1839
YH
2030 (struct sockaddr *)&addr,
2031 VERIFY_WRITE);
1da177e4 2032 } else
a2e27255 2033 err = verify_iovec(msg_sys, iov,
230b1839
YH
2034 (struct sockaddr *)&addr,
2035 VERIFY_WRITE);
1da177e4
LT
2036 if (err < 0)
2037 goto out_freeiov;
89bddce5 2038 total_len = err;
1da177e4 2039
a2e27255
ACM
2040 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2041 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2042
1da177e4
LT
2043 if (sock->file->f_flags & O_NONBLOCK)
2044 flags |= MSG_DONTWAIT;
a2e27255
ACM
2045 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2046 total_len, flags);
1da177e4
LT
2047 if (err < 0)
2048 goto out_freeiov;
2049 len = err;
2050
2051 if (uaddr != NULL) {
230b1839 2052 err = move_addr_to_user((struct sockaddr *)&addr,
a2e27255 2053 msg_sys->msg_namelen, uaddr,
89bddce5 2054 uaddr_len);
1da177e4
LT
2055 if (err < 0)
2056 goto out_freeiov;
2057 }
a2e27255 2058 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2059 COMPAT_FLAGS(msg));
1da177e4
LT
2060 if (err)
2061 goto out_freeiov;
2062 if (MSG_CMSG_COMPAT & flags)
a2e27255 2063 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2064 &msg_compat->msg_controllen);
2065 else
a2e27255 2066 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2067 &msg->msg_controllen);
2068 if (err)
2069 goto out_freeiov;
2070 err = len;
2071
2072out_freeiov:
2073 if (iov != iovstack)
2074 sock_kfree_s(sock->sk, iov, iov_size);
a2e27255
ACM
2075out:
2076 return err;
2077}
2078
2079/*
2080 * BSD recvmsg interface
2081 */
2082
2083SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
2084 unsigned int, flags)
2085{
2086 int fput_needed, err;
2087 struct msghdr msg_sys;
2088 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
2089
2090 if (!sock)
2091 goto out;
2092
2093 err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0);
2094
6cb153ca 2095 fput_light(sock->file, fput_needed);
1da177e4
LT
2096out:
2097 return err;
2098}
2099
a2e27255
ACM
2100/*
2101 * Linux recvmmsg interface
2102 */
2103
2104int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2105 unsigned int flags, struct timespec *timeout)
2106{
2107 int fput_needed, err, datagrams;
2108 struct socket *sock;
2109 struct mmsghdr __user *entry;
d7256d0e 2110 struct compat_mmsghdr __user *compat_entry;
a2e27255
ACM
2111 struct msghdr msg_sys;
2112 struct timespec end_time;
2113
2114 if (timeout &&
2115 poll_select_set_timeout(&end_time, timeout->tv_sec,
2116 timeout->tv_nsec))
2117 return -EINVAL;
2118
2119 datagrams = 0;
2120
2121 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2122 if (!sock)
2123 return err;
2124
2125 err = sock_error(sock->sk);
2126 if (err)
2127 goto out_put;
2128
2129 entry = mmsg;
d7256d0e 2130 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2131
2132 while (datagrams < vlen) {
2133 /*
2134 * No need to ask LSM for more than the first datagram.
2135 */
d7256d0e
JMG
2136 if (MSG_CMSG_COMPAT & flags) {
2137 err = __sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
2138 &msg_sys, flags, datagrams);
2139 if (err < 0)
2140 break;
2141 err = __put_user(err, &compat_entry->msg_len);
2142 ++compat_entry;
2143 } else {
2144 err = __sys_recvmsg(sock, (struct msghdr __user *)entry,
2145 &msg_sys, flags, datagrams);
2146 if (err < 0)
2147 break;
2148 err = put_user(err, &entry->msg_len);
2149 ++entry;
2150 }
2151
a2e27255
ACM
2152 if (err)
2153 break;
a2e27255
ACM
2154 ++datagrams;
2155
2156 if (timeout) {
2157 ktime_get_ts(timeout);
2158 *timeout = timespec_sub(end_time, *timeout);
2159 if (timeout->tv_sec < 0) {
2160 timeout->tv_sec = timeout->tv_nsec = 0;
2161 break;
2162 }
2163
2164 /* Timeout, return less than vlen datagrams */
2165 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2166 break;
2167 }
2168
2169 /* Out of band data, return right away */
2170 if (msg_sys.msg_flags & MSG_OOB)
2171 break;
2172 }
2173
2174out_put:
2175 fput_light(sock->file, fput_needed);
1da177e4 2176
a2e27255
ACM
2177 if (err == 0)
2178 return datagrams;
2179
2180 if (datagrams != 0) {
2181 /*
2182 * We may return less entries than requested (vlen) if the
2183 * sock is non block and there aren't enough datagrams...
2184 */
2185 if (err != -EAGAIN) {
2186 /*
2187 * ... or if recvmsg returns an error after we
2188 * received some datagrams, where we record the
2189 * error to return on the next call or if the
2190 * app asks about it using getsockopt(SO_ERROR).
2191 */
2192 sock->sk->sk_err = -err;
2193 }
2194
2195 return datagrams;
2196 }
2197
2198 return err;
2199}
2200
2201SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2202 unsigned int, vlen, unsigned int, flags,
2203 struct timespec __user *, timeout)
2204{
2205 int datagrams;
2206 struct timespec timeout_sys;
2207
2208 if (!timeout)
2209 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2210
2211 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2212 return -EFAULT;
2213
2214 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2215
2216 if (datagrams > 0 &&
2217 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2218 datagrams = -EFAULT;
2219
2220 return datagrams;
2221}
2222
2223#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2224/* Argument list sizes for sys_socketcall */
2225#define AL(x) ((x) * sizeof(unsigned long))
a2e27255 2226static const unsigned char nargs[20] = {
89bddce5
SH
2227 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
2228 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
aaca0bdc 2229 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
a2e27255 2230 AL(4),AL(5)
89bddce5
SH
2231};
2232
1da177e4
LT
2233#undef AL
2234
2235/*
89bddce5 2236 * System call vectors.
1da177e4
LT
2237 *
2238 * Argument checking cleaned up. Saved 20% in size.
2239 * This function doesn't need to set the kernel lock because
89bddce5 2240 * it is set by the callees.
1da177e4
LT
2241 */
2242
3e0fa65f 2243SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4
LT
2244{
2245 unsigned long a[6];
89bddce5 2246 unsigned long a0, a1;
1da177e4 2247 int err;
47379052 2248 unsigned int len;
1da177e4 2249
a2e27255 2250 if (call < 1 || call > SYS_RECVMMSG)
1da177e4
LT
2251 return -EINVAL;
2252
47379052
AV
2253 len = nargs[call];
2254 if (len > sizeof(a))
2255 return -EINVAL;
2256
1da177e4 2257 /* copy_from_user should be SMP safe. */
47379052 2258 if (copy_from_user(a, args, len))
1da177e4 2259 return -EFAULT;
3ec3b2fb 2260
f3298dc4 2261 audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb 2262
89bddce5
SH
2263 a0 = a[0];
2264 a1 = a[1];
2265
2266 switch (call) {
2267 case SYS_SOCKET:
2268 err = sys_socket(a0, a1, a[2]);
2269 break;
2270 case SYS_BIND:
2271 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2272 break;
2273 case SYS_CONNECT:
2274 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2275 break;
2276 case SYS_LISTEN:
2277 err = sys_listen(a0, a1);
2278 break;
2279 case SYS_ACCEPT:
de11defe
UD
2280 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2281 (int __user *)a[2], 0);
89bddce5
SH
2282 break;
2283 case SYS_GETSOCKNAME:
2284 err =
2285 sys_getsockname(a0, (struct sockaddr __user *)a1,
2286 (int __user *)a[2]);
2287 break;
2288 case SYS_GETPEERNAME:
2289 err =
2290 sys_getpeername(a0, (struct sockaddr __user *)a1,
2291 (int __user *)a[2]);
2292 break;
2293 case SYS_SOCKETPAIR:
2294 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2295 break;
2296 case SYS_SEND:
2297 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2298 break;
2299 case SYS_SENDTO:
2300 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2301 (struct sockaddr __user *)a[4], a[5]);
2302 break;
2303 case SYS_RECV:
2304 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2305 break;
2306 case SYS_RECVFROM:
2307 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2308 (struct sockaddr __user *)a[4],
2309 (int __user *)a[5]);
2310 break;
2311 case SYS_SHUTDOWN:
2312 err = sys_shutdown(a0, a1);
2313 break;
2314 case SYS_SETSOCKOPT:
2315 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2316 break;
2317 case SYS_GETSOCKOPT:
2318 err =
2319 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2320 (int __user *)a[4]);
2321 break;
2322 case SYS_SENDMSG:
2323 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2324 break;
2325 case SYS_RECVMSG:
2326 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2327 break;
a2e27255
ACM
2328 case SYS_RECVMMSG:
2329 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2330 (struct timespec __user *)a[4]);
2331 break;
de11defe
UD
2332 case SYS_ACCEPT4:
2333 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2334 (int __user *)a[2], a[3]);
aaca0bdc 2335 break;
89bddce5
SH
2336 default:
2337 err = -EINVAL;
2338 break;
1da177e4
LT
2339 }
2340 return err;
2341}
2342
89bddce5 2343#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2344
55737fda
SH
2345/**
2346 * sock_register - add a socket protocol handler
2347 * @ops: description of protocol
2348 *
1da177e4
LT
2349 * This function is called by a protocol handler that wants to
2350 * advertise its address family, and have it linked into the
55737fda
SH
2351 * socket interface. The value ops->family coresponds to the
2352 * socket system call protocol family.
1da177e4 2353 */
f0fd27d4 2354int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2355{
2356 int err;
2357
2358 if (ops->family >= NPROTO) {
89bddce5
SH
2359 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2360 NPROTO);
1da177e4
LT
2361 return -ENOBUFS;
2362 }
55737fda
SH
2363
2364 spin_lock(&net_family_lock);
2365 if (net_families[ops->family])
2366 err = -EEXIST;
2367 else {
89bddce5 2368 net_families[ops->family] = ops;
1da177e4
LT
2369 err = 0;
2370 }
55737fda
SH
2371 spin_unlock(&net_family_lock);
2372
89bddce5 2373 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2374 return err;
2375}
2376
55737fda
SH
2377/**
2378 * sock_unregister - remove a protocol handler
2379 * @family: protocol family to remove
2380 *
1da177e4
LT
2381 * This function is called by a protocol handler that wants to
2382 * remove its address family, and have it unlinked from the
55737fda
SH
2383 * new socket creation.
2384 *
2385 * If protocol handler is a module, then it can use module reference
2386 * counts to protect against new references. If protocol handler is not
2387 * a module then it needs to provide its own protection in
2388 * the ops->create routine.
1da177e4 2389 */
f0fd27d4 2390void sock_unregister(int family)
1da177e4 2391{
f0fd27d4 2392 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2393
55737fda 2394 spin_lock(&net_family_lock);
89bddce5 2395 net_families[family] = NULL;
55737fda
SH
2396 spin_unlock(&net_family_lock);
2397
2398 synchronize_rcu();
2399
89bddce5 2400 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2401}
2402
77d76ea3 2403static int __init sock_init(void)
1da177e4
LT
2404{
2405 /*
89bddce5 2406 * Initialize sock SLAB cache.
1da177e4 2407 */
89bddce5 2408
1da177e4
LT
2409 sk_init();
2410
1da177e4 2411 /*
89bddce5 2412 * Initialize skbuff SLAB cache
1da177e4
LT
2413 */
2414 skb_init();
1da177e4
LT
2415
2416 /*
89bddce5 2417 * Initialize the protocols module.
1da177e4
LT
2418 */
2419
2420 init_inodecache();
2421 register_filesystem(&sock_fs_type);
2422 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2423
2424 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2425 */
2426
2427#ifdef CONFIG_NETFILTER
2428 netfilter_init();
2429#endif
cbeb321a
DM
2430
2431 return 0;
1da177e4
LT
2432}
2433
77d76ea3
AK
2434core_initcall(sock_init); /* early initcall */
2435
1da177e4
LT
2436#ifdef CONFIG_PROC_FS
2437void socket_seq_show(struct seq_file *seq)
2438{
2439 int cpu;
2440 int counter = 0;
2441
6f912042 2442 for_each_possible_cpu(cpu)
89bddce5 2443 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2444
2445 /* It can be negative, by the way. 8) */
2446 if (counter < 0)
2447 counter = 0;
2448
2449 seq_printf(seq, "sockets: used %d\n", counter);
2450}
89bddce5 2451#endif /* CONFIG_PROC_FS */
1da177e4 2452
89bbfc95 2453#ifdef CONFIG_COMPAT
6b96018b
AB
2454static int do_siocgstamp(struct net *net, struct socket *sock,
2455 unsigned int cmd, struct compat_timeval __user *up)
7a229387 2456{
7a229387
AB
2457 mm_segment_t old_fs = get_fs();
2458 struct timeval ktv;
2459 int err;
2460
2461 set_fs(KERNEL_DS);
6b96018b 2462 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387
AB
2463 set_fs(old_fs);
2464 if (!err) {
2465 err = put_user(ktv.tv_sec, &up->tv_sec);
2466 err |= __put_user(ktv.tv_usec, &up->tv_usec);
2467 }
2468 return err;
2469}
2470
6b96018b
AB
2471static int do_siocgstampns(struct net *net, struct socket *sock,
2472 unsigned int cmd, struct compat_timespec __user *up)
7a229387 2473{
7a229387
AB
2474 mm_segment_t old_fs = get_fs();
2475 struct timespec kts;
2476 int err;
2477
2478 set_fs(KERNEL_DS);
6b96018b 2479 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387
AB
2480 set_fs(old_fs);
2481 if (!err) {
2482 err = put_user(kts.tv_sec, &up->tv_sec);
2483 err |= __put_user(kts.tv_nsec, &up->tv_nsec);
2484 }
2485 return err;
2486}
2487
6b96018b 2488static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2489{
2490 struct ifreq __user *uifr;
2491 int err;
2492
2493 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2494 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2495 return -EFAULT;
2496
6b96018b 2497 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2498 if (err)
2499 return err;
2500
6b96018b 2501 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2502 return -EFAULT;
2503
2504 return 0;
2505}
2506
6b96018b 2507static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2508{
6b96018b 2509 struct compat_ifconf ifc32;
7a229387
AB
2510 struct ifconf ifc;
2511 struct ifconf __user *uifc;
6b96018b 2512 struct compat_ifreq __user *ifr32;
7a229387
AB
2513 struct ifreq __user *ifr;
2514 unsigned int i, j;
2515 int err;
2516
6b96018b 2517 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2518 return -EFAULT;
2519
2520 if (ifc32.ifcbuf == 0) {
2521 ifc32.ifc_len = 0;
2522 ifc.ifc_len = 0;
2523 ifc.ifc_req = NULL;
2524 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2525 } else {
6b96018b 2526 size_t len =((ifc32.ifc_len / sizeof (struct compat_ifreq)) + 1) *
7a229387
AB
2527 sizeof (struct ifreq);
2528 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2529 ifc.ifc_len = len;
2530 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2531 ifr32 = compat_ptr(ifc32.ifcbuf);
6b96018b
AB
2532 for (i = 0; i < ifc32.ifc_len; i += sizeof (struct compat_ifreq)) {
2533 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2534 return -EFAULT;
2535 ifr++;
2536 ifr32++;
2537 }
2538 }
2539 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2540 return -EFAULT;
2541
6b96018b 2542 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2543 if (err)
2544 return err;
2545
2546 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2547 return -EFAULT;
2548
2549 ifr = ifc.ifc_req;
2550 ifr32 = compat_ptr(ifc32.ifcbuf);
2551 for (i = 0, j = 0;
6b96018b
AB
2552 i + sizeof (struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2553 i += sizeof (struct compat_ifreq), j += sizeof (struct ifreq)) {
2554 if (copy_in_user(ifr32, ifr, sizeof (struct compat_ifreq)))
7a229387
AB
2555 return -EFAULT;
2556 ifr32++;
2557 ifr++;
2558 }
2559
2560 if (ifc32.ifcbuf == 0) {
2561 /* Translate from 64-bit structure multiple to
2562 * a 32-bit one.
2563 */
2564 i = ifc.ifc_len;
6b96018b 2565 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2566 ifc32.ifc_len = i;
2567 } else {
2568 ifc32.ifc_len = i;
2569 }
6b96018b 2570 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2571 return -EFAULT;
2572
2573 return 0;
2574}
2575
6b96018b 2576static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387
AB
2577{
2578 struct ifreq __user *ifr;
7a229387
AB
2579 u32 data;
2580 void __user *datap;
2581
2582 ifr = compat_alloc_user_space(sizeof(*ifr));
7a229387
AB
2583
2584 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
2585 return -EFAULT;
2586
2587 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2588 return -EFAULT;
2589
2590 datap = compat_ptr(data);
2591 if (put_user(datap, &ifr->ifr_ifru.ifru_data))
2592 return -EFAULT;
2593
6b96018b 2594 return dev_ioctl(net, SIOCETHTOOL, ifr);
7a229387
AB
2595}
2596
7a50a240
AB
2597static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2598{
2599 void __user *uptr;
2600 compat_uptr_t uptr32;
2601 struct ifreq __user *uifr;
2602
2603 uifr = compat_alloc_user_space(sizeof (*uifr));
2604 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2605 return -EFAULT;
2606
2607 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2608 return -EFAULT;
2609
2610 uptr = compat_ptr(uptr32);
2611
2612 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2613 return -EFAULT;
2614
2615 return dev_ioctl(net, SIOCWANDEV, uifr);
2616}
2617
6b96018b
AB
2618static int bond_ioctl(struct net *net, unsigned int cmd,
2619 struct compat_ifreq __user *ifr32)
7a229387
AB
2620{
2621 struct ifreq kifr;
2622 struct ifreq __user *uifr;
7a229387
AB
2623 mm_segment_t old_fs;
2624 int err;
2625 u32 data;
2626 void __user *datap;
2627
2628 switch (cmd) {
2629 case SIOCBONDENSLAVE:
2630 case SIOCBONDRELEASE:
2631 case SIOCBONDSETHWADDR:
2632 case SIOCBONDCHANGEACTIVE:
6b96018b 2633 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2634 return -EFAULT;
2635
2636 old_fs = get_fs();
2637 set_fs (KERNEL_DS);
6b96018b 2638 err = dev_ioctl(net, cmd, &kifr);
7a229387
AB
2639 set_fs (old_fs);
2640
2641 return err;
2642 case SIOCBONDSLAVEINFOQUERY:
2643 case SIOCBONDINFOQUERY:
2644 uifr = compat_alloc_user_space(sizeof(*uifr));
2645 if (copy_in_user(&uifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
2646 return -EFAULT;
2647
2648 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2649 return -EFAULT;
2650
2651 datap = compat_ptr(data);
2652 if (put_user(datap, &uifr->ifr_ifru.ifru_data))
2653 return -EFAULT;
2654
6b96018b 2655 return dev_ioctl(net, cmd, uifr);
7a229387
AB
2656 default:
2657 return -EINVAL;
2658 };
2659}
2660
6b96018b
AB
2661static int siocdevprivate_ioctl(struct net *net, unsigned int cmd,
2662 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2663{
2664 struct ifreq __user *u_ifreq64;
7a229387
AB
2665 char tmp_buf[IFNAMSIZ];
2666 void __user *data64;
2667 u32 data32;
2668
2669 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2670 IFNAMSIZ))
2671 return -EFAULT;
2672 if (__get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
2673 return -EFAULT;
2674 data64 = compat_ptr(data32);
2675
2676 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2677
2678 /* Don't check these user accesses, just let that get trapped
2679 * in the ioctl handler instead.
2680 */
2681 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2682 IFNAMSIZ))
2683 return -EFAULT;
2684 if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
2685 return -EFAULT;
2686
6b96018b 2687 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2688}
2689
6b96018b
AB
2690static int dev_ifsioc(struct net *net, struct socket *sock,
2691 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 2692{
a2116ed2 2693 struct ifreq __user *uifr;
7a229387
AB
2694 int err;
2695
a2116ed2
AB
2696 uifr = compat_alloc_user_space(sizeof(*uifr));
2697 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2698 return -EFAULT;
2699
2700 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2701
7a229387
AB
2702 if (!err) {
2703 switch (cmd) {
2704 case SIOCGIFFLAGS:
2705 case SIOCGIFMETRIC:
2706 case SIOCGIFMTU:
2707 case SIOCGIFMEM:
2708 case SIOCGIFHWADDR:
2709 case SIOCGIFINDEX:
2710 case SIOCGIFADDR:
2711 case SIOCGIFBRDADDR:
2712 case SIOCGIFDSTADDR:
2713 case SIOCGIFNETMASK:
fab2532b 2714 case SIOCGIFPFLAGS:
7a229387 2715 case SIOCGIFTXQLEN:
fab2532b
AB
2716 case SIOCGMIIPHY:
2717 case SIOCGMIIREG:
a2116ed2 2718 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
2719 err = -EFAULT;
2720 break;
2721 }
2722 }
2723 return err;
2724}
2725
a2116ed2
AB
2726static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2727 struct compat_ifreq __user *uifr32)
2728{
2729 struct ifreq ifr;
2730 struct compat_ifmap __user *uifmap32;
2731 mm_segment_t old_fs;
2732 int err;
2733
2734 uifmap32 = &uifr32->ifr_ifru.ifru_map;
2735 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
2736 err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2737 err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2738 err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2739 err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq);
2740 err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma);
2741 err |= __get_user(ifr.ifr_map.port, &uifmap32->port);
2742 if (err)
2743 return -EFAULT;
2744
2745 old_fs = get_fs();
2746 set_fs (KERNEL_DS);
2747 err = dev_ioctl(net, cmd, (void __user *)&ifr);
2748 set_fs (old_fs);
2749
2750 if (cmd == SIOCGIFMAP && !err) {
2751 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
2752 err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2753 err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2754 err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2755 err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq);
2756 err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma);
2757 err |= __put_user(ifr.ifr_map.port, &uifmap32->port);
2758 if (err)
2759 err = -EFAULT;
2760 }
2761 return err;
2762}
2763
2764static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uifr32)
2765{
2766 void __user *uptr;
2767 compat_uptr_t uptr32;
2768 struct ifreq __user *uifr;
2769
2770 uifr = compat_alloc_user_space(sizeof (*uifr));
2771 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2772 return -EFAULT;
2773
2774 if (get_user(uptr32, &uifr32->ifr_data))
2775 return -EFAULT;
2776
2777 uptr = compat_ptr(uptr32);
2778
2779 if (put_user(uptr, &uifr->ifr_data))
2780 return -EFAULT;
2781
2782 return dev_ioctl(net, SIOCSHWTSTAMP, uifr);
2783}
2784
7a229387
AB
2785struct rtentry32 {
2786 u32 rt_pad1;
2787 struct sockaddr rt_dst; /* target address */
2788 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
2789 struct sockaddr rt_genmask; /* target network mask (IP) */
2790 unsigned short rt_flags;
2791 short rt_pad2;
2792 u32 rt_pad3;
2793 unsigned char rt_tos;
2794 unsigned char rt_class;
2795 short rt_pad4;
2796 short rt_metric; /* +1 for binary compatibility! */
2797 /* char * */ u32 rt_dev; /* forcing the device at add */
2798 u32 rt_mtu; /* per route MTU/Window */
2799 u32 rt_window; /* Window clamping */
2800 unsigned short rt_irtt; /* Initial RTT */
2801};
2802
2803struct in6_rtmsg32 {
2804 struct in6_addr rtmsg_dst;
2805 struct in6_addr rtmsg_src;
2806 struct in6_addr rtmsg_gateway;
2807 u32 rtmsg_type;
2808 u16 rtmsg_dst_len;
2809 u16 rtmsg_src_len;
2810 u32 rtmsg_metric;
2811 u32 rtmsg_info;
2812 u32 rtmsg_flags;
2813 s32 rtmsg_ifindex;
2814};
2815
6b96018b
AB
2816static int routing_ioctl(struct net *net, struct socket *sock,
2817 unsigned int cmd, void __user *argp)
7a229387
AB
2818{
2819 int ret;
2820 void *r = NULL;
2821 struct in6_rtmsg r6;
2822 struct rtentry r4;
2823 char devname[16];
2824 u32 rtdev;
2825 mm_segment_t old_fs = get_fs();
2826
6b96018b
AB
2827 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
2828 struct in6_rtmsg32 __user *ur6 = argp;
7a229387
AB
2829 ret = copy_from_user (&r6.rtmsg_dst, &(ur6->rtmsg_dst),
2830 3 * sizeof(struct in6_addr));
2831 ret |= __get_user (r6.rtmsg_type, &(ur6->rtmsg_type));
2832 ret |= __get_user (r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
2833 ret |= __get_user (r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
2834 ret |= __get_user (r6.rtmsg_metric, &(ur6->rtmsg_metric));
2835 ret |= __get_user (r6.rtmsg_info, &(ur6->rtmsg_info));
2836 ret |= __get_user (r6.rtmsg_flags, &(ur6->rtmsg_flags));
2837 ret |= __get_user (r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
2838
2839 r = (void *) &r6;
2840 } else { /* ipv4 */
6b96018b 2841 struct rtentry32 __user *ur4 = argp;
7a229387
AB
2842 ret = copy_from_user (&r4.rt_dst, &(ur4->rt_dst),
2843 3 * sizeof(struct sockaddr));
2844 ret |= __get_user (r4.rt_flags, &(ur4->rt_flags));
2845 ret |= __get_user (r4.rt_metric, &(ur4->rt_metric));
2846 ret |= __get_user (r4.rt_mtu, &(ur4->rt_mtu));
2847 ret |= __get_user (r4.rt_window, &(ur4->rt_window));
2848 ret |= __get_user (r4.rt_irtt, &(ur4->rt_irtt));
2849 ret |= __get_user (rtdev, &(ur4->rt_dev));
2850 if (rtdev) {
2851 ret |= copy_from_user (devname, compat_ptr(rtdev), 15);
2852 r4.rt_dev = devname; devname[15] = 0;
2853 } else
2854 r4.rt_dev = NULL;
2855
2856 r = (void *) &r4;
2857 }
2858
2859 if (ret) {
2860 ret = -EFAULT;
2861 goto out;
2862 }
2863
2864 set_fs (KERNEL_DS);
6b96018b 2865 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
7a229387
AB
2866 set_fs (old_fs);
2867
2868out:
7a229387
AB
2869 return ret;
2870}
2871
2872/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
2873 * for some operations; this forces use of the newer bridge-utils that
2874 * use compatiable ioctls
2875 */
6b96018b 2876static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 2877{
6b96018b 2878 compat_ulong_t tmp;
7a229387 2879
6b96018b 2880 if (get_user(tmp, argp))
7a229387
AB
2881 return -EFAULT;
2882 if (tmp == BRCTL_GET_VERSION)
2883 return BRCTL_VERSION + 1;
2884 return -EINVAL;
2885}
2886
6b96018b
AB
2887static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
2888 unsigned int cmd, unsigned long arg)
2889{
2890 void __user *argp = compat_ptr(arg);
2891 struct sock *sk = sock->sk;
2892 struct net *net = sock_net(sk);
7a229387 2893
6b96018b
AB
2894 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
2895 return siocdevprivate_ioctl(net, cmd, argp);
2896
2897 switch (cmd) {
2898 case SIOCSIFBR:
2899 case SIOCGIFBR:
2900 return old_bridge_ioctl(argp);
2901 case SIOCGIFNAME:
2902 return dev_ifname32(net, argp);
2903 case SIOCGIFCONF:
2904 return dev_ifconf(net, argp);
2905 case SIOCETHTOOL:
2906 return ethtool_ioctl(net, argp);
7a50a240
AB
2907 case SIOCWANDEV:
2908 return compat_siocwandev(net, argp);
a2116ed2
AB
2909 case SIOCGIFMAP:
2910 case SIOCSIFMAP:
2911 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
2912 case SIOCBONDENSLAVE:
2913 case SIOCBONDRELEASE:
2914 case SIOCBONDSETHWADDR:
2915 case SIOCBONDSLAVEINFOQUERY:
2916 case SIOCBONDINFOQUERY:
2917 case SIOCBONDCHANGEACTIVE:
2918 return bond_ioctl(net, cmd, argp);
2919 case SIOCADDRT:
2920 case SIOCDELRT:
2921 return routing_ioctl(net, sock, cmd, argp);
2922 case SIOCGSTAMP:
2923 return do_siocgstamp(net, sock, cmd, argp);
2924 case SIOCGSTAMPNS:
2925 return do_siocgstampns(net, sock, cmd, argp);
a2116ed2
AB
2926 case SIOCSHWTSTAMP:
2927 return compat_siocshwtstamp(net, argp);
6b96018b
AB
2928
2929 case FIOSETOWN:
2930 case SIOCSPGRP:
2931 case FIOGETOWN:
2932 case SIOCGPGRP:
2933 case SIOCBRADDBR:
2934 case SIOCBRDELBR:
2935 case SIOCGIFVLAN:
2936 case SIOCSIFVLAN:
2937 case SIOCADDDLCI:
2938 case SIOCDELDLCI:
2939 return sock_ioctl(file, cmd, arg);
2940
2941 case SIOCGIFFLAGS:
2942 case SIOCSIFFLAGS:
2943 case SIOCGIFMETRIC:
2944 case SIOCSIFMETRIC:
2945 case SIOCGIFMTU:
2946 case SIOCSIFMTU:
2947 case SIOCGIFMEM:
2948 case SIOCSIFMEM:
2949 case SIOCGIFHWADDR:
2950 case SIOCSIFHWADDR:
2951 case SIOCADDMULTI:
2952 case SIOCDELMULTI:
2953 case SIOCGIFINDEX:
6b96018b
AB
2954 case SIOCGIFADDR:
2955 case SIOCSIFADDR:
2956 case SIOCSIFHWBROADCAST:
6b96018b 2957 case SIOCDIFADDR:
6b96018b
AB
2958 case SIOCGIFBRDADDR:
2959 case SIOCSIFBRDADDR:
2960 case SIOCGIFDSTADDR:
2961 case SIOCSIFDSTADDR:
2962 case SIOCGIFNETMASK:
2963 case SIOCSIFNETMASK:
2964 case SIOCSIFPFLAGS:
2965 case SIOCGIFPFLAGS:
2966 case SIOCGIFTXQLEN:
2967 case SIOCSIFTXQLEN:
2968 case SIOCBRADDIF:
2969 case SIOCBRDELIF:
9177efd3
AB
2970 case SIOCSIFNAME:
2971 case SIOCGMIIPHY:
2972 case SIOCGMIIREG:
2973 case SIOCSMIIREG:
6b96018b 2974 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 2975
6b96018b
AB
2976 case SIOCSARP:
2977 case SIOCGARP:
2978 case SIOCDARP:
6b96018b 2979 case SIOCATMARK:
9177efd3
AB
2980 return sock_do_ioctl(net, sock, cmd, arg);
2981 }
2982
2983 /* Prevent warning from compat_sys_ioctl, these always
2984 * result in -EINVAL in the native case anyway. */
2985 switch (cmd) {
2986 case SIOCRTMSG:
2987 case SIOCGIFCOUNT:
6b96018b
AB
2988 case SIOCSRARP:
2989 case SIOCGRARP:
2990 case SIOCDRARP:
9177efd3
AB
2991 case SIOCSIFLINK:
2992 case SIOCGIFSLAVE:
2993 case SIOCSIFSLAVE:
2994 return -EINVAL;
6b96018b
AB
2995 }
2996
2997 return -ENOIOCTLCMD;
2998}
7a229387 2999
89bbfc95 3000static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 3001 unsigned long arg)
89bbfc95
SP
3002{
3003 struct socket *sock = file->private_data;
3004 int ret = -ENOIOCTLCMD;
87de87d5
DM
3005 struct sock *sk;
3006 struct net *net;
3007
3008 sk = sock->sk;
3009 net = sock_net(sk);
89bbfc95
SP
3010
3011 if (sock->ops->compat_ioctl)
3012 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3013
87de87d5
DM
3014 if (ret == -ENOIOCTLCMD &&
3015 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3016 ret = compat_wext_handle_ioctl(net, cmd, arg);
3017
6b96018b
AB
3018 if (ret == -ENOIOCTLCMD)
3019 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3020
89bbfc95
SP
3021 return ret;
3022}
3023#endif
3024
ac5a488e
SS
3025int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3026{
3027 return sock->ops->bind(sock, addr, addrlen);
3028}
3029
3030int kernel_listen(struct socket *sock, int backlog)
3031{
3032 return sock->ops->listen(sock, backlog);
3033}
3034
3035int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3036{
3037 struct sock *sk = sock->sk;
3038 int err;
3039
3040 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3041 newsock);
3042 if (err < 0)
3043 goto done;
3044
3045 err = sock->ops->accept(sock, *newsock, flags);
3046 if (err < 0) {
3047 sock_release(*newsock);
fa8705b0 3048 *newsock = NULL;
ac5a488e
SS
3049 goto done;
3050 }
3051
3052 (*newsock)->ops = sock->ops;
1b08534e 3053 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3054
3055done:
3056 return err;
3057}
3058
3059int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3060 int flags)
ac5a488e
SS
3061{
3062 return sock->ops->connect(sock, addr, addrlen, flags);
3063}
3064
3065int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3066 int *addrlen)
3067{
3068 return sock->ops->getname(sock, addr, addrlen, 0);
3069}
3070
3071int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3072 int *addrlen)
3073{
3074 return sock->ops->getname(sock, addr, addrlen, 1);
3075}
3076
3077int kernel_getsockopt(struct socket *sock, int level, int optname,
3078 char *optval, int *optlen)
3079{
3080 mm_segment_t oldfs = get_fs();
3081 int err;
3082
3083 set_fs(KERNEL_DS);
3084 if (level == SOL_SOCKET)
3085 err = sock_getsockopt(sock, level, optname, optval, optlen);
3086 else
3087 err = sock->ops->getsockopt(sock, level, optname, optval,
3088 optlen);
3089 set_fs(oldfs);
3090 return err;
3091}
3092
3093int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3094 char *optval, unsigned int optlen)
ac5a488e
SS
3095{
3096 mm_segment_t oldfs = get_fs();
3097 int err;
3098
3099 set_fs(KERNEL_DS);
3100 if (level == SOL_SOCKET)
3101 err = sock_setsockopt(sock, level, optname, optval, optlen);
3102 else
3103 err = sock->ops->setsockopt(sock, level, optname, optval,
3104 optlen);
3105 set_fs(oldfs);
3106 return err;
3107}
3108
3109int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3110 size_t size, int flags)
3111{
3112 if (sock->ops->sendpage)
3113 return sock->ops->sendpage(sock, page, offset, size, flags);
3114
3115 return sock_no_sendpage(sock, page, offset, size, flags);
3116}
3117
3118int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3119{
3120 mm_segment_t oldfs = get_fs();
3121 int err;
3122
3123 set_fs(KERNEL_DS);
3124 err = sock->ops->ioctl(sock, cmd, arg);
3125 set_fs(oldfs);
3126
3127 return err;
3128}
3129
91cf45f0
TM
3130int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3131{
3132 return sock->ops->shutdown(sock, how);
3133}
3134
1da177e4
LT
3135EXPORT_SYMBOL(sock_create);
3136EXPORT_SYMBOL(sock_create_kern);
3137EXPORT_SYMBOL(sock_create_lite);
3138EXPORT_SYMBOL(sock_map_fd);
3139EXPORT_SYMBOL(sock_recvmsg);
3140EXPORT_SYMBOL(sock_register);
3141EXPORT_SYMBOL(sock_release);
3142EXPORT_SYMBOL(sock_sendmsg);
3143EXPORT_SYMBOL(sock_unregister);
3144EXPORT_SYMBOL(sock_wake_async);
3145EXPORT_SYMBOL(sockfd_lookup);
3146EXPORT_SYMBOL(kernel_sendmsg);
3147EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
3148EXPORT_SYMBOL(kernel_bind);
3149EXPORT_SYMBOL(kernel_listen);
3150EXPORT_SYMBOL(kernel_accept);
3151EXPORT_SYMBOL(kernel_connect);
3152EXPORT_SYMBOL(kernel_getsockname);
3153EXPORT_SYMBOL(kernel_getpeername);
3154EXPORT_SYMBOL(kernel_getsockopt);
3155EXPORT_SYMBOL(kernel_setsockopt);
3156EXPORT_SYMBOL(kernel_sendpage);
3157EXPORT_SYMBOL(kernel_sock_ioctl);
91cf45f0 3158EXPORT_SYMBOL(kernel_sock_shutdown);