sh_eth: use ETH_ZLEN instead of home-grown #define
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
1da177e4
LT
75#include <linux/init.h>
76#include <linux/poll.h>
77#include <linux/cache.h>
78#include <linux/module.h>
79#include <linux/highmem.h>
1da177e4
LT
80#include <linux/mount.h>
81#include <linux/security.h>
82#include <linux/syscalls.h>
83#include <linux/compat.h>
84#include <linux/kmod.h>
3ec3b2fb 85#include <linux/audit.h>
d86b5e0e 86#include <linux/wireless.h>
1b8d7ae4 87#include <linux/nsproxy.h>
1fd7317d 88#include <linux/magic.h>
5a0e3ad6 89#include <linux/slab.h>
600e1779 90#include <linux/xattr.h>
1da177e4
LT
91
92#include <asm/uaccess.h>
93#include <asm/unistd.h>
94
95#include <net/compat.h>
87de87d5 96#include <net/wext.h>
f8451725 97#include <net/cls_cgroup.h>
1da177e4
LT
98
99#include <net/sock.h>
100#include <linux/netfilter.h>
101
6b96018b
AB
102#include <linux/if_tun.h>
103#include <linux/ipv6_route.h>
104#include <linux/route.h>
6b96018b
AB
105#include <linux/sockios.h>
106#include <linux/atalk.h>
076bb0c8 107#include <net/busy_poll.h>
06021292 108
e0d1095a 109#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
110unsigned int sysctl_net_busy_read __read_mostly;
111unsigned int sysctl_net_busy_poll __read_mostly;
06021292 112#endif
6b96018b 113
1da177e4 114static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
115static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
116 unsigned long nr_segs, loff_t pos);
117static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
118 unsigned long nr_segs, loff_t pos);
89bddce5 119static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
120
121static int sock_close(struct inode *inode, struct file *file);
122static unsigned int sock_poll(struct file *file,
123 struct poll_table_struct *wait);
89bddce5 124static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
125#ifdef CONFIG_COMPAT
126static long compat_sock_ioctl(struct file *file,
89bddce5 127 unsigned int cmd, unsigned long arg);
89bbfc95 128#endif
1da177e4 129static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
130static ssize_t sock_sendpage(struct file *file, struct page *page,
131 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 132static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 133 struct pipe_inode_info *pipe, size_t len,
9c55e01c 134 unsigned int flags);
1da177e4 135
1da177e4
LT
136/*
137 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
138 * in the operation structures but are done directly via the socketcall() multiplexor.
139 */
140
da7071d7 141static const struct file_operations socket_file_ops = {
1da177e4
LT
142 .owner = THIS_MODULE,
143 .llseek = no_llseek,
144 .aio_read = sock_aio_read,
145 .aio_write = sock_aio_write,
146 .poll = sock_poll,
147 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
148#ifdef CONFIG_COMPAT
149 .compat_ioctl = compat_sock_ioctl,
150#endif
1da177e4
LT
151 .mmap = sock_mmap,
152 .open = sock_no_open, /* special open code to disallow open via /proc */
153 .release = sock_close,
154 .fasync = sock_fasync,
5274f052
JA
155 .sendpage = sock_sendpage,
156 .splice_write = generic_splice_sendpage,
9c55e01c 157 .splice_read = sock_splice_read,
1da177e4
LT
158};
159
160/*
161 * The protocol list. Each protocol is registered in here.
162 */
163
1da177e4 164static DEFINE_SPINLOCK(net_family_lock);
190683a9 165static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 166
1da177e4
LT
167/*
168 * Statistics counters of the socket lists
169 */
170
c6d409cf 171static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
172
173/*
89bddce5
SH
174 * Support routines.
175 * Move socket addresses back and forth across the kernel/user
176 * divide and look after the messy bits.
1da177e4
LT
177 */
178
1da177e4
LT
179/**
180 * move_addr_to_kernel - copy a socket address into kernel space
181 * @uaddr: Address in user space
182 * @kaddr: Address in kernel space
183 * @ulen: Length in user space
184 *
185 * The address is copied into kernel space. If the provided address is
186 * too long an error code of -EINVAL is returned. If the copy gives
187 * invalid addresses -EFAULT is returned. On a success 0 is returned.
188 */
189
43db362d 190int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 191{
230b1839 192 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 193 return -EINVAL;
89bddce5 194 if (ulen == 0)
1da177e4 195 return 0;
89bddce5 196 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 197 return -EFAULT;
3ec3b2fb 198 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
199}
200
201/**
202 * move_addr_to_user - copy an address to user space
203 * @kaddr: kernel space address
204 * @klen: length of address in kernel
205 * @uaddr: user space address
206 * @ulen: pointer to user length field
207 *
208 * The value pointed to by ulen on entry is the buffer length available.
209 * This is overwritten with the buffer space used. -EINVAL is returned
210 * if an overlong buffer is specified or a negative buffer size. -EFAULT
211 * is returned if either the buffer or the length field are not
212 * accessible.
213 * After copying the data up to the limit the user specifies, the true
214 * length of the data is written over the length limit the user
215 * specified. Zero is returned for a success.
216 */
89bddce5 217
43db362d 218static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 219 void __user *uaddr, int __user *ulen)
1da177e4
LT
220{
221 int err;
222 int len;
223
68c6beb3 224 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
225 err = get_user(len, ulen);
226 if (err)
1da177e4 227 return err;
89bddce5
SH
228 if (len > klen)
229 len = klen;
68c6beb3 230 if (len < 0)
1da177e4 231 return -EINVAL;
89bddce5 232 if (len) {
d6fe3945
SG
233 if (audit_sockaddr(klen, kaddr))
234 return -ENOMEM;
89bddce5 235 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
236 return -EFAULT;
237 }
238 /*
89bddce5
SH
239 * "fromlen shall refer to the value before truncation.."
240 * 1003.1g
1da177e4
LT
241 */
242 return __put_user(klen, ulen);
243}
244
e18b890b 245static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
246
247static struct inode *sock_alloc_inode(struct super_block *sb)
248{
249 struct socket_alloc *ei;
eaefd110 250 struct socket_wq *wq;
89bddce5 251
e94b1766 252 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
253 if (!ei)
254 return NULL;
eaefd110
ED
255 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
256 if (!wq) {
43815482
ED
257 kmem_cache_free(sock_inode_cachep, ei);
258 return NULL;
259 }
eaefd110
ED
260 init_waitqueue_head(&wq->wait);
261 wq->fasync_list = NULL;
262 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 263
1da177e4
LT
264 ei->socket.state = SS_UNCONNECTED;
265 ei->socket.flags = 0;
266 ei->socket.ops = NULL;
267 ei->socket.sk = NULL;
268 ei->socket.file = NULL;
1da177e4
LT
269
270 return &ei->vfs_inode;
271}
272
273static void sock_destroy_inode(struct inode *inode)
274{
43815482 275 struct socket_alloc *ei;
eaefd110 276 struct socket_wq *wq;
43815482
ED
277
278 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 279 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 280 kfree_rcu(wq, rcu);
43815482 281 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
282}
283
51cc5068 284static void init_once(void *foo)
1da177e4 285{
89bddce5 286 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 287
a35afb83 288 inode_init_once(&ei->vfs_inode);
1da177e4 289}
89bddce5 290
1da177e4
LT
291static int init_inodecache(void)
292{
293 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
294 sizeof(struct socket_alloc),
295 0,
296 (SLAB_HWCACHE_ALIGN |
297 SLAB_RECLAIM_ACCOUNT |
298 SLAB_MEM_SPREAD),
20c2df83 299 init_once);
1da177e4
LT
300 if (sock_inode_cachep == NULL)
301 return -ENOMEM;
302 return 0;
303}
304
b87221de 305static const struct super_operations sockfs_ops = {
c6d409cf
ED
306 .alloc_inode = sock_alloc_inode,
307 .destroy_inode = sock_destroy_inode,
308 .statfs = simple_statfs,
1da177e4
LT
309};
310
c23fbb6b
ED
311/*
312 * sockfs_dname() is called from d_path().
313 */
314static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
315{
316 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
317 dentry->d_inode->i_ino);
318}
319
3ba13d17 320static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 321 .d_dname = sockfs_dname,
1da177e4
LT
322};
323
c74a1cbb
AV
324static struct dentry *sockfs_mount(struct file_system_type *fs_type,
325 int flags, const char *dev_name, void *data)
326{
327 return mount_pseudo(fs_type, "socket:", &sockfs_ops,
328 &sockfs_dentry_operations, SOCKFS_MAGIC);
329}
330
331static struct vfsmount *sock_mnt __read_mostly;
332
333static struct file_system_type sock_fs_type = {
334 .name = "sockfs",
335 .mount = sockfs_mount,
336 .kill_sb = kill_anon_super,
337};
338
1da177e4
LT
339/*
340 * Obtains the first available file descriptor and sets it up for use.
341 *
39d8c1b6
DM
342 * These functions create file structures and maps them to fd space
343 * of the current process. On success it returns file descriptor
1da177e4
LT
344 * and file struct implicitly stored in sock->file.
345 * Note that another thread may close file descriptor before we return
346 * from this function. We use the fact that now we do not refer
347 * to socket after mapping. If one day we will need it, this
348 * function will increment ref. count on file by 1.
349 *
350 * In any case returned fd MAY BE not valid!
351 * This race condition is unavoidable
352 * with shared fd spaces, we cannot solve it inside kernel,
353 * but we take care of internal coherence yet.
354 */
355
aab174f0 356struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 357{
7cbe66b6 358 struct qstr name = { .name = "" };
2c48b9c4 359 struct path path;
7cbe66b6 360 struct file *file;
1da177e4 361
600e1779
MY
362 if (dname) {
363 name.name = dname;
364 name.len = strlen(name.name);
365 } else if (sock->sk) {
366 name.name = sock->sk->sk_prot_creator->name;
367 name.len = strlen(name.name);
368 }
4b936885 369 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
28407630
AV
370 if (unlikely(!path.dentry))
371 return ERR_PTR(-ENOMEM);
2c48b9c4 372 path.mnt = mntget(sock_mnt);
39d8c1b6 373
2c48b9c4 374 d_instantiate(path.dentry, SOCK_INODE(sock));
cc3808f8 375 SOCK_INODE(sock)->i_fop = &socket_file_ops;
39d8c1b6 376
2c48b9c4 377 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 378 &socket_file_ops);
39b65252 379 if (unlikely(IS_ERR(file))) {
cc3808f8 380 /* drop dentry, keep inode */
7de9c6ee 381 ihold(path.dentry->d_inode);
2c48b9c4 382 path_put(&path);
39b65252 383 return file;
cc3808f8
AV
384 }
385
386 sock->file = file;
77d27200 387 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 388 file->private_data = sock;
28407630 389 return file;
39d8c1b6 390}
56b31d1c 391EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 392
56b31d1c 393static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
394{
395 struct file *newfile;
28407630
AV
396 int fd = get_unused_fd_flags(flags);
397 if (unlikely(fd < 0))
398 return fd;
39d8c1b6 399
aab174f0 400 newfile = sock_alloc_file(sock, flags, NULL);
28407630 401 if (likely(!IS_ERR(newfile))) {
39d8c1b6 402 fd_install(fd, newfile);
28407630
AV
403 return fd;
404 }
7cbe66b6 405
28407630
AV
406 put_unused_fd(fd);
407 return PTR_ERR(newfile);
1da177e4
LT
408}
409
406a3c63 410struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 411{
6cb153ca
BL
412 if (file->f_op == &socket_file_ops)
413 return file->private_data; /* set in sock_map_fd */
414
23bb80d2
ED
415 *err = -ENOTSOCK;
416 return NULL;
6cb153ca 417}
406a3c63 418EXPORT_SYMBOL(sock_from_file);
6cb153ca 419
1da177e4 420/**
c6d409cf 421 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
422 * @fd: file handle
423 * @err: pointer to an error code return
424 *
425 * The file handle passed in is locked and the socket it is bound
426 * too is returned. If an error occurs the err pointer is overwritten
427 * with a negative errno code and NULL is returned. The function checks
428 * for both invalid handles and passing a handle which is not a socket.
429 *
430 * On a success the socket object pointer is returned.
431 */
432
433struct socket *sockfd_lookup(int fd, int *err)
434{
435 struct file *file;
1da177e4
LT
436 struct socket *sock;
437
89bddce5
SH
438 file = fget(fd);
439 if (!file) {
1da177e4
LT
440 *err = -EBADF;
441 return NULL;
442 }
89bddce5 443
6cb153ca
BL
444 sock = sock_from_file(file, err);
445 if (!sock)
1da177e4 446 fput(file);
6cb153ca
BL
447 return sock;
448}
c6d409cf 449EXPORT_SYMBOL(sockfd_lookup);
1da177e4 450
6cb153ca
BL
451static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
452{
453 struct file *file;
454 struct socket *sock;
455
3672558c 456 *err = -EBADF;
6cb153ca
BL
457 file = fget_light(fd, fput_needed);
458 if (file) {
459 sock = sock_from_file(file, err);
460 if (sock)
461 return sock;
462 fput_light(file, *fput_needed);
1da177e4 463 }
6cb153ca 464 return NULL;
1da177e4
LT
465}
466
600e1779
MY
467#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
468#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
469#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
470static ssize_t sockfs_getxattr(struct dentry *dentry,
471 const char *name, void *value, size_t size)
472{
473 const char *proto_name;
474 size_t proto_size;
475 int error;
476
477 error = -ENODATA;
478 if (!strncmp(name, XATTR_NAME_SOCKPROTONAME, XATTR_NAME_SOCKPROTONAME_LEN)) {
479 proto_name = dentry->d_name.name;
480 proto_size = strlen(proto_name);
481
482 if (value) {
483 error = -ERANGE;
484 if (proto_size + 1 > size)
485 goto out;
486
487 strncpy(value, proto_name, proto_size + 1);
488 }
489 error = proto_size + 1;
490 }
491
492out:
493 return error;
494}
495
496static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
497 size_t size)
498{
499 ssize_t len;
500 ssize_t used = 0;
501
502 len = security_inode_listsecurity(dentry->d_inode, buffer, size);
503 if (len < 0)
504 return len;
505 used += len;
506 if (buffer) {
507 if (size < used)
508 return -ERANGE;
509 buffer += len;
510 }
511
512 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
513 used += len;
514 if (buffer) {
515 if (size < used)
516 return -ERANGE;
517 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
518 buffer += len;
519 }
520
521 return used;
522}
523
524static const struct inode_operations sockfs_inode_ops = {
525 .getxattr = sockfs_getxattr,
526 .listxattr = sockfs_listxattr,
527};
528
1da177e4
LT
529/**
530 * sock_alloc - allocate a socket
89bddce5 531 *
1da177e4
LT
532 * Allocate a new inode and socket object. The two are bound together
533 * and initialised. The socket is then returned. If we are out of inodes
534 * NULL is returned.
535 */
536
537static struct socket *sock_alloc(void)
538{
89bddce5
SH
539 struct inode *inode;
540 struct socket *sock;
1da177e4 541
a209dfc7 542 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
543 if (!inode)
544 return NULL;
545
546 sock = SOCKET_I(inode);
547
29a020d3 548 kmemcheck_annotate_bitfield(sock, type);
85fe4025 549 inode->i_ino = get_next_ino();
89bddce5 550 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
551 inode->i_uid = current_fsuid();
552 inode->i_gid = current_fsgid();
600e1779 553 inode->i_op = &sockfs_inode_ops;
1da177e4 554
19e8d69c 555 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
556 return sock;
557}
558
559/*
560 * In theory you can't get an open on this inode, but /proc provides
561 * a back door. Remember to keep it shut otherwise you'll let the
562 * creepy crawlies in.
563 */
89bddce5 564
1da177e4
LT
565static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
566{
567 return -ENXIO;
568}
569
4b6f5d20 570const struct file_operations bad_sock_fops = {
1da177e4
LT
571 .owner = THIS_MODULE,
572 .open = sock_no_open,
6038f373 573 .llseek = noop_llseek,
1da177e4
LT
574};
575
576/**
577 * sock_release - close a socket
578 * @sock: socket to close
579 *
580 * The socket is released from the protocol stack if it has a release
581 * callback, and the inode is then released if the socket is bound to
89bddce5 582 * an inode not a file.
1da177e4 583 */
89bddce5 584
1da177e4
LT
585void sock_release(struct socket *sock)
586{
587 if (sock->ops) {
588 struct module *owner = sock->ops->owner;
589
590 sock->ops->release(sock);
591 sock->ops = NULL;
592 module_put(owner);
593 }
594
eaefd110 595 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
1da177e4
LT
596 printk(KERN_ERR "sock_release: fasync list not empty!\n");
597
b09e786b
MP
598 if (test_bit(SOCK_EXTERNALLY_ALLOCATED, &sock->flags))
599 return;
600
19e8d69c 601 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
602 if (!sock->file) {
603 iput(SOCK_INODE(sock));
604 return;
605 }
89bddce5 606 sock->file = NULL;
1da177e4 607}
c6d409cf 608EXPORT_SYMBOL(sock_release);
1da177e4 609
bf84a010 610void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
20d49473 611{
2244d07b 612 *tx_flags = 0;
20d49473 613 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
2244d07b 614 *tx_flags |= SKBTX_HW_TSTAMP;
20d49473 615 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
2244d07b 616 *tx_flags |= SKBTX_SW_TSTAMP;
6e3e939f
JB
617 if (sock_flag(sk, SOCK_WIFI_STATUS))
618 *tx_flags |= SKBTX_WIFI_STATUS;
20d49473
PO
619}
620EXPORT_SYMBOL(sock_tx_timestamp);
621
228e548e
AB
622static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
623 struct msghdr *msg, size_t size)
1da177e4
LT
624{
625 struct sock_iocb *si = kiocb_to_siocb(iocb);
1da177e4
LT
626
627 si->sock = sock;
628 si->scm = NULL;
629 si->msg = msg;
630 si->size = size;
631
1da177e4
LT
632 return sock->ops->sendmsg(iocb, sock, msg, size);
633}
634
228e548e
AB
635static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
636 struct msghdr *msg, size_t size)
637{
638 int err = security_socket_sendmsg(sock, msg, size);
639
640 return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
641}
642
1da177e4
LT
643int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
644{
645 struct kiocb iocb;
646 struct sock_iocb siocb;
647 int ret;
648
649 init_sync_kiocb(&iocb, NULL);
650 iocb.private = &siocb;
651 ret = __sock_sendmsg(&iocb, sock, msg, size);
652 if (-EIOCBQUEUED == ret)
653 ret = wait_on_sync_kiocb(&iocb);
654 return ret;
655}
c6d409cf 656EXPORT_SYMBOL(sock_sendmsg);
1da177e4 657
894dc24c 658static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
228e548e
AB
659{
660 struct kiocb iocb;
661 struct sock_iocb siocb;
662 int ret;
663
664 init_sync_kiocb(&iocb, NULL);
665 iocb.private = &siocb;
666 ret = __sock_sendmsg_nosec(&iocb, sock, msg, size);
667 if (-EIOCBQUEUED == ret)
668 ret = wait_on_sync_kiocb(&iocb);
669 return ret;
670}
671
1da177e4
LT
672int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
673 struct kvec *vec, size_t num, size_t size)
674{
675 mm_segment_t oldfs = get_fs();
676 int result;
677
678 set_fs(KERNEL_DS);
679 /*
680 * the following is safe, since for compiler definitions of kvec and
681 * iovec are identical, yielding the same in-core layout and alignment
682 */
89bddce5 683 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
684 msg->msg_iovlen = num;
685 result = sock_sendmsg(sock, msg, size);
686 set_fs(oldfs);
687 return result;
688}
c6d409cf 689EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 690
92f37fd2
ED
691/*
692 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
693 */
694void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
695 struct sk_buff *skb)
696{
20d49473
PO
697 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
698 struct timespec ts[3];
699 int empty = 1;
700 struct skb_shared_hwtstamps *shhwtstamps =
701 skb_hwtstamps(skb);
702
703 /* Race occurred between timestamp enabling and packet
704 receiving. Fill in the current time for now. */
705 if (need_software_tstamp && skb->tstamp.tv64 == 0)
706 __net_timestamp(skb);
707
708 if (need_software_tstamp) {
709 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
710 struct timeval tv;
711 skb_get_timestamp(skb, &tv);
712 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
713 sizeof(tv), &tv);
714 } else {
842509b8 715 skb_get_timestampns(skb, &ts[0]);
20d49473 716 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
842509b8 717 sizeof(ts[0]), &ts[0]);
20d49473
PO
718 }
719 }
720
721
722 memset(ts, 0, sizeof(ts));
6e94d1ef
DB
723 if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE) &&
724 ktime_to_timespec_cond(skb->tstamp, ts + 0))
20d49473 725 empty = 0;
20d49473
PO
726 if (shhwtstamps) {
727 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
6e94d1ef 728 ktime_to_timespec_cond(shhwtstamps->syststamp, ts + 1))
20d49473
PO
729 empty = 0;
730 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
6e94d1ef 731 ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts + 2))
20d49473 732 empty = 0;
92f37fd2 733 }
20d49473
PO
734 if (!empty)
735 put_cmsg(msg, SOL_SOCKET,
736 SCM_TIMESTAMPING, sizeof(ts), &ts);
92f37fd2 737}
7c81fd8b
ACM
738EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
739
6e3e939f
JB
740void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
741 struct sk_buff *skb)
742{
743 int ack;
744
745 if (!sock_flag(sk, SOCK_WIFI_STATUS))
746 return;
747 if (!skb->wifi_acked_valid)
748 return;
749
750 ack = skb->wifi_acked;
751
752 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
753}
754EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
755
11165f14 756static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
757 struct sk_buff *skb)
3b885787
NH
758{
759 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
760 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
761 sizeof(__u32), &skb->dropcount);
762}
763
767dd033 764void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
765 struct sk_buff *skb)
766{
767 sock_recv_timestamp(msg, sk, skb);
768 sock_recv_drops(msg, sk, skb);
769}
767dd033 770EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 771
a2e27255
ACM
772static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
773 struct msghdr *msg, size_t size, int flags)
1da177e4 774{
1da177e4
LT
775 struct sock_iocb *si = kiocb_to_siocb(iocb);
776
777 si->sock = sock;
778 si->scm = NULL;
779 si->msg = msg;
780 si->size = size;
781 si->flags = flags;
782
1da177e4
LT
783 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
784}
785
a2e27255
ACM
786static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
787 struct msghdr *msg, size_t size, int flags)
788{
789 int err = security_socket_recvmsg(sock, msg, size, flags);
790
791 return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
792}
793
89bddce5 794int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
795 size_t size, int flags)
796{
797 struct kiocb iocb;
798 struct sock_iocb siocb;
799 int ret;
800
89bddce5 801 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
802 iocb.private = &siocb;
803 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
804 if (-EIOCBQUEUED == ret)
805 ret = wait_on_sync_kiocb(&iocb);
806 return ret;
807}
c6d409cf 808EXPORT_SYMBOL(sock_recvmsg);
1da177e4 809
a2e27255
ACM
810static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
811 size_t size, int flags)
812{
813 struct kiocb iocb;
814 struct sock_iocb siocb;
815 int ret;
816
817 init_sync_kiocb(&iocb, NULL);
818 iocb.private = &siocb;
819 ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
820 if (-EIOCBQUEUED == ret)
821 ret = wait_on_sync_kiocb(&iocb);
822 return ret;
823}
824
c1249c0a
ML
825/**
826 * kernel_recvmsg - Receive a message from a socket (kernel space)
827 * @sock: The socket to receive the message from
828 * @msg: Received message
829 * @vec: Input s/g array for message data
830 * @num: Size of input s/g array
831 * @size: Number of bytes to read
832 * @flags: Message flags (MSG_DONTWAIT, etc...)
833 *
834 * On return the msg structure contains the scatter/gather array passed in the
835 * vec argument. The array is modified so that it consists of the unfilled
836 * portion of the original array.
837 *
838 * The returned value is the total number of bytes received, or an error.
839 */
89bddce5
SH
840int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
841 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
842{
843 mm_segment_t oldfs = get_fs();
844 int result;
845
846 set_fs(KERNEL_DS);
847 /*
848 * the following is safe, since for compiler definitions of kvec and
849 * iovec are identical, yielding the same in-core layout and alignment
850 */
89bddce5 851 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
852 result = sock_recvmsg(sock, msg, size, flags);
853 set_fs(oldfs);
854 return result;
855}
c6d409cf 856EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 857
ce1d4d3e
CH
858static ssize_t sock_sendpage(struct file *file, struct page *page,
859 int offset, size_t size, loff_t *ppos, int more)
1da177e4 860{
1da177e4
LT
861 struct socket *sock;
862 int flags;
863
ce1d4d3e
CH
864 sock = file->private_data;
865
35f9c09f
ED
866 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
867 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
868 flags |= more;
ce1d4d3e 869
e6949583 870 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 871}
1da177e4 872
9c55e01c 873static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 874 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
875 unsigned int flags)
876{
877 struct socket *sock = file->private_data;
878
997b37da
RDC
879 if (unlikely(!sock->ops->splice_read))
880 return -EINVAL;
881
9c55e01c
JA
882 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
883}
884
ce1d4d3e 885static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 886 struct sock_iocb *siocb)
ce1d4d3e 887{
d29c445b
KO
888 if (!is_sync_kiocb(iocb))
889 BUG();
1da177e4 890
ce1d4d3e 891 siocb->kiocb = iocb;
ce1d4d3e
CH
892 iocb->private = siocb;
893 return siocb;
1da177e4
LT
894}
895
ce1d4d3e 896static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
897 struct file *file, const struct iovec *iov,
898 unsigned long nr_segs)
ce1d4d3e
CH
899{
900 struct socket *sock = file->private_data;
901 size_t size = 0;
902 int i;
1da177e4 903
89bddce5
SH
904 for (i = 0; i < nr_segs; i++)
905 size += iov[i].iov_len;
1da177e4 906
ce1d4d3e
CH
907 msg->msg_name = NULL;
908 msg->msg_namelen = 0;
909 msg->msg_control = NULL;
910 msg->msg_controllen = 0;
89bddce5 911 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
912 msg->msg_iovlen = nr_segs;
913 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
914
915 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
916}
917
027445c3
BP
918static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
919 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
920{
921 struct sock_iocb siocb, *x;
922
1da177e4
LT
923 if (pos != 0)
924 return -ESPIPE;
027445c3 925
73a7075e 926 if (iocb->ki_nbytes == 0) /* Match SYS5 behaviour */
1da177e4
LT
927 return 0;
928
027445c3
BP
929
930 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
931 if (!x)
932 return -ENOMEM;
027445c3 933 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
934}
935
ce1d4d3e 936static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
937 struct file *file, const struct iovec *iov,
938 unsigned long nr_segs)
1da177e4 939{
ce1d4d3e
CH
940 struct socket *sock = file->private_data;
941 size_t size = 0;
942 int i;
1da177e4 943
89bddce5
SH
944 for (i = 0; i < nr_segs; i++)
945 size += iov[i].iov_len;
1da177e4 946
ce1d4d3e
CH
947 msg->msg_name = NULL;
948 msg->msg_namelen = 0;
949 msg->msg_control = NULL;
950 msg->msg_controllen = 0;
89bddce5 951 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
952 msg->msg_iovlen = nr_segs;
953 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
954 if (sock->type == SOCK_SEQPACKET)
955 msg->msg_flags |= MSG_EOR;
1da177e4 956
ce1d4d3e 957 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
958}
959
027445c3
BP
960static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
961 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
962{
963 struct sock_iocb siocb, *x;
1da177e4 964
ce1d4d3e
CH
965 if (pos != 0)
966 return -ESPIPE;
027445c3 967
027445c3 968 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
969 if (!x)
970 return -ENOMEM;
1da177e4 971
027445c3 972 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
973}
974
1da177e4
LT
975/*
976 * Atomic setting of ioctl hooks to avoid race
977 * with module unload.
978 */
979
4a3e2f71 980static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 981static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 982
881d966b 983void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 984{
4a3e2f71 985 mutex_lock(&br_ioctl_mutex);
1da177e4 986 br_ioctl_hook = hook;
4a3e2f71 987 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
988}
989EXPORT_SYMBOL(brioctl_set);
990
4a3e2f71 991static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 992static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 993
881d966b 994void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 995{
4a3e2f71 996 mutex_lock(&vlan_ioctl_mutex);
1da177e4 997 vlan_ioctl_hook = hook;
4a3e2f71 998 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
999}
1000EXPORT_SYMBOL(vlan_ioctl_set);
1001
4a3e2f71 1002static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 1003static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 1004
89bddce5 1005void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 1006{
4a3e2f71 1007 mutex_lock(&dlci_ioctl_mutex);
1da177e4 1008 dlci_ioctl_hook = hook;
4a3e2f71 1009 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1010}
1011EXPORT_SYMBOL(dlci_ioctl_set);
1012
6b96018b
AB
1013static long sock_do_ioctl(struct net *net, struct socket *sock,
1014 unsigned int cmd, unsigned long arg)
1015{
1016 int err;
1017 void __user *argp = (void __user *)arg;
1018
1019 err = sock->ops->ioctl(sock, cmd, arg);
1020
1021 /*
1022 * If this ioctl is unknown try to hand it down
1023 * to the NIC driver.
1024 */
1025 if (err == -ENOIOCTLCMD)
1026 err = dev_ioctl(net, cmd, argp);
1027
1028 return err;
1029}
1030
1da177e4
LT
1031/*
1032 * With an ioctl, arg may well be a user mode pointer, but we don't know
1033 * what to do with it - that's up to the protocol still.
1034 */
1035
1036static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1037{
1038 struct socket *sock;
881d966b 1039 struct sock *sk;
1da177e4
LT
1040 void __user *argp = (void __user *)arg;
1041 int pid, err;
881d966b 1042 struct net *net;
1da177e4 1043
b69aee04 1044 sock = file->private_data;
881d966b 1045 sk = sock->sk;
3b1e0a65 1046 net = sock_net(sk);
1da177e4 1047 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 1048 err = dev_ioctl(net, cmd, argp);
1da177e4 1049 } else
3d23e349 1050#ifdef CONFIG_WEXT_CORE
1da177e4 1051 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 1052 err = dev_ioctl(net, cmd, argp);
1da177e4 1053 } else
3d23e349 1054#endif
89bddce5 1055 switch (cmd) {
1da177e4
LT
1056 case FIOSETOWN:
1057 case SIOCSPGRP:
1058 err = -EFAULT;
1059 if (get_user(pid, (int __user *)argp))
1060 break;
1061 err = f_setown(sock->file, pid, 1);
1062 break;
1063 case FIOGETOWN:
1064 case SIOCGPGRP:
609d7fa9 1065 err = put_user(f_getown(sock->file),
89bddce5 1066 (int __user *)argp);
1da177e4
LT
1067 break;
1068 case SIOCGIFBR:
1069 case SIOCSIFBR:
1070 case SIOCBRADDBR:
1071 case SIOCBRDELBR:
1072 err = -ENOPKG;
1073 if (!br_ioctl_hook)
1074 request_module("bridge");
1075
4a3e2f71 1076 mutex_lock(&br_ioctl_mutex);
89bddce5 1077 if (br_ioctl_hook)
881d966b 1078 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1079 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1080 break;
1081 case SIOCGIFVLAN:
1082 case SIOCSIFVLAN:
1083 err = -ENOPKG;
1084 if (!vlan_ioctl_hook)
1085 request_module("8021q");
1086
4a3e2f71 1087 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1088 if (vlan_ioctl_hook)
881d966b 1089 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1090 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1091 break;
1da177e4
LT
1092 case SIOCADDDLCI:
1093 case SIOCDELDLCI:
1094 err = -ENOPKG;
1095 if (!dlci_ioctl_hook)
1096 request_module("dlci");
1097
7512cbf6
PE
1098 mutex_lock(&dlci_ioctl_mutex);
1099 if (dlci_ioctl_hook)
1da177e4 1100 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1101 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1102 break;
1103 default:
6b96018b 1104 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1105 break;
89bddce5 1106 }
1da177e4
LT
1107 return err;
1108}
1109
1110int sock_create_lite(int family, int type, int protocol, struct socket **res)
1111{
1112 int err;
1113 struct socket *sock = NULL;
89bddce5 1114
1da177e4
LT
1115 err = security_socket_create(family, type, protocol, 1);
1116 if (err)
1117 goto out;
1118
1119 sock = sock_alloc();
1120 if (!sock) {
1121 err = -ENOMEM;
1122 goto out;
1123 }
1124
1da177e4 1125 sock->type = type;
7420ed23
VY
1126 err = security_socket_post_create(sock, family, type, protocol, 1);
1127 if (err)
1128 goto out_release;
1129
1da177e4
LT
1130out:
1131 *res = sock;
1132 return err;
7420ed23
VY
1133out_release:
1134 sock_release(sock);
1135 sock = NULL;
1136 goto out;
1da177e4 1137}
c6d409cf 1138EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1139
1140/* No kernel lock held - perfect */
89bddce5 1141static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4 1142{
cbf55001 1143 unsigned int busy_flag = 0;
1da177e4
LT
1144 struct socket *sock;
1145
1146 /*
89bddce5 1147 * We can't return errors to poll, so it's either yes or no.
1da177e4 1148 */
b69aee04 1149 sock = file->private_data;
2d48d67f 1150
cbf55001 1151 if (sk_can_busy_loop(sock->sk)) {
2d48d67f 1152 /* this socket can poll_ll so tell the system call */
cbf55001 1153 busy_flag = POLL_BUSY_LOOP;
2d48d67f
ET
1154
1155 /* once, only if requested by syscall */
cbf55001
ET
1156 if (wait && (wait->_key & POLL_BUSY_LOOP))
1157 sk_busy_loop(sock->sk, 1);
2d48d67f
ET
1158 }
1159
cbf55001 1160 return busy_flag | sock->ops->poll(file, sock, wait);
1da177e4
LT
1161}
1162
89bddce5 1163static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1164{
b69aee04 1165 struct socket *sock = file->private_data;
1da177e4
LT
1166
1167 return sock->ops->mmap(file, sock, vma);
1168}
1169
20380731 1170static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1171{
1da177e4
LT
1172 sock_release(SOCKET_I(inode));
1173 return 0;
1174}
1175
1176/*
1177 * Update the socket async list
1178 *
1179 * Fasync_list locking strategy.
1180 *
1181 * 1. fasync_list is modified only under process context socket lock
1182 * i.e. under semaphore.
1183 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1184 * or under socket lock
1da177e4
LT
1185 */
1186
1187static int sock_fasync(int fd, struct file *filp, int on)
1188{
989a2979
ED
1189 struct socket *sock = filp->private_data;
1190 struct sock *sk = sock->sk;
eaefd110 1191 struct socket_wq *wq;
1da177e4 1192
989a2979 1193 if (sk == NULL)
1da177e4 1194 return -EINVAL;
1da177e4
LT
1195
1196 lock_sock(sk);
eaefd110
ED
1197 wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
1198 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1199
eaefd110 1200 if (!wq->fasync_list)
989a2979
ED
1201 sock_reset_flag(sk, SOCK_FASYNC);
1202 else
bcdce719 1203 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1204
989a2979 1205 release_sock(sk);
1da177e4
LT
1206 return 0;
1207}
1208
43815482 1209/* This function may be called only under socket lock or callback_lock or rcu_lock */
1da177e4
LT
1210
1211int sock_wake_async(struct socket *sock, int how, int band)
1212{
43815482
ED
1213 struct socket_wq *wq;
1214
1215 if (!sock)
1216 return -1;
1217 rcu_read_lock();
1218 wq = rcu_dereference(sock->wq);
1219 if (!wq || !wq->fasync_list) {
1220 rcu_read_unlock();
1da177e4 1221 return -1;
43815482 1222 }
89bddce5 1223 switch (how) {
8d8ad9d7 1224 case SOCK_WAKE_WAITD:
1da177e4
LT
1225 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1226 break;
1227 goto call_kill;
8d8ad9d7 1228 case SOCK_WAKE_SPACE:
1da177e4
LT
1229 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1230 break;
1231 /* fall through */
8d8ad9d7 1232 case SOCK_WAKE_IO:
89bddce5 1233call_kill:
43815482 1234 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1235 break;
8d8ad9d7 1236 case SOCK_WAKE_URG:
43815482 1237 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1238 }
43815482 1239 rcu_read_unlock();
1da177e4
LT
1240 return 0;
1241}
c6d409cf 1242EXPORT_SYMBOL(sock_wake_async);
1da177e4 1243
721db93a 1244int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1245 struct socket **res, int kern)
1da177e4
LT
1246{
1247 int err;
1248 struct socket *sock;
55737fda 1249 const struct net_proto_family *pf;
1da177e4
LT
1250
1251 /*
89bddce5 1252 * Check protocol is in range
1da177e4
LT
1253 */
1254 if (family < 0 || family >= NPROTO)
1255 return -EAFNOSUPPORT;
1256 if (type < 0 || type >= SOCK_MAX)
1257 return -EINVAL;
1258
1259 /* Compatibility.
1260
1261 This uglymoron is moved from INET layer to here to avoid
1262 deadlock in module load.
1263 */
1264 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1265 static int warned;
1da177e4
LT
1266 if (!warned) {
1267 warned = 1;
89bddce5
SH
1268 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1269 current->comm);
1da177e4
LT
1270 }
1271 family = PF_PACKET;
1272 }
1273
1274 err = security_socket_create(family, type, protocol, kern);
1275 if (err)
1276 return err;
89bddce5 1277
55737fda
SH
1278 /*
1279 * Allocate the socket and allow the family to set things up. if
1280 * the protocol is 0, the family is instructed to select an appropriate
1281 * default.
1282 */
1283 sock = sock_alloc();
1284 if (!sock) {
e87cc472 1285 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1286 return -ENFILE; /* Not exactly a match, but its the
1287 closest posix thing */
1288 }
1289
1290 sock->type = type;
1291
95a5afca 1292#ifdef CONFIG_MODULES
89bddce5
SH
1293 /* Attempt to load a protocol module if the find failed.
1294 *
1295 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1296 * requested real, full-featured networking support upon configuration.
1297 * Otherwise module support will break!
1298 */
190683a9 1299 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1300 request_module("net-pf-%d", family);
1da177e4
LT
1301#endif
1302
55737fda
SH
1303 rcu_read_lock();
1304 pf = rcu_dereference(net_families[family]);
1305 err = -EAFNOSUPPORT;
1306 if (!pf)
1307 goto out_release;
1da177e4
LT
1308
1309 /*
1310 * We will call the ->create function, that possibly is in a loadable
1311 * module, so we have to bump that loadable module refcnt first.
1312 */
55737fda 1313 if (!try_module_get(pf->owner))
1da177e4
LT
1314 goto out_release;
1315
55737fda
SH
1316 /* Now protected by module ref count */
1317 rcu_read_unlock();
1318
3f378b68 1319 err = pf->create(net, sock, protocol, kern);
55737fda 1320 if (err < 0)
1da177e4 1321 goto out_module_put;
a79af59e 1322
1da177e4
LT
1323 /*
1324 * Now to bump the refcnt of the [loadable] module that owns this
1325 * socket at sock_release time we decrement its refcnt.
1326 */
55737fda
SH
1327 if (!try_module_get(sock->ops->owner))
1328 goto out_module_busy;
1329
1da177e4
LT
1330 /*
1331 * Now that we're done with the ->create function, the [loadable]
1332 * module can have its refcnt decremented
1333 */
55737fda 1334 module_put(pf->owner);
7420ed23
VY
1335 err = security_socket_post_create(sock, family, type, protocol, kern);
1336 if (err)
3b185525 1337 goto out_sock_release;
55737fda 1338 *res = sock;
1da177e4 1339
55737fda
SH
1340 return 0;
1341
1342out_module_busy:
1343 err = -EAFNOSUPPORT;
1da177e4 1344out_module_put:
55737fda
SH
1345 sock->ops = NULL;
1346 module_put(pf->owner);
1347out_sock_release:
1da177e4 1348 sock_release(sock);
55737fda
SH
1349 return err;
1350
1351out_release:
1352 rcu_read_unlock();
1353 goto out_sock_release;
1da177e4 1354}
721db93a 1355EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1356
1357int sock_create(int family, int type, int protocol, struct socket **res)
1358{
1b8d7ae4 1359 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1360}
c6d409cf 1361EXPORT_SYMBOL(sock_create);
1da177e4
LT
1362
1363int sock_create_kern(int family, int type, int protocol, struct socket **res)
1364{
1b8d7ae4 1365 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4 1366}
c6d409cf 1367EXPORT_SYMBOL(sock_create_kern);
1da177e4 1368
3e0fa65f 1369SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1370{
1371 int retval;
1372 struct socket *sock;
a677a039
UD
1373 int flags;
1374
e38b36f3
UD
1375 /* Check the SOCK_* constants for consistency. */
1376 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1377 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1378 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1379 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1380
a677a039 1381 flags = type & ~SOCK_TYPE_MASK;
77d27200 1382 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1383 return -EINVAL;
1384 type &= SOCK_TYPE_MASK;
1da177e4 1385
aaca0bdc
UD
1386 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1387 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1388
1da177e4
LT
1389 retval = sock_create(family, type, protocol, &sock);
1390 if (retval < 0)
1391 goto out;
1392
77d27200 1393 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1394 if (retval < 0)
1395 goto out_release;
1396
1397out:
1398 /* It may be already another descriptor 8) Not kernel problem. */
1399 return retval;
1400
1401out_release:
1402 sock_release(sock);
1403 return retval;
1404}
1405
1406/*
1407 * Create a pair of connected sockets.
1408 */
1409
3e0fa65f
HC
1410SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1411 int __user *, usockvec)
1da177e4
LT
1412{
1413 struct socket *sock1, *sock2;
1414 int fd1, fd2, err;
db349509 1415 struct file *newfile1, *newfile2;
a677a039
UD
1416 int flags;
1417
1418 flags = type & ~SOCK_TYPE_MASK;
77d27200 1419 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1420 return -EINVAL;
1421 type &= SOCK_TYPE_MASK;
1da177e4 1422
aaca0bdc
UD
1423 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1424 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1425
1da177e4
LT
1426 /*
1427 * Obtain the first socket and check if the underlying protocol
1428 * supports the socketpair call.
1429 */
1430
1431 err = sock_create(family, type, protocol, &sock1);
1432 if (err < 0)
1433 goto out;
1434
1435 err = sock_create(family, type, protocol, &sock2);
1436 if (err < 0)
1437 goto out_release_1;
1438
1439 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1440 if (err < 0)
1da177e4
LT
1441 goto out_release_both;
1442
28407630 1443 fd1 = get_unused_fd_flags(flags);
bf3c23d1
DM
1444 if (unlikely(fd1 < 0)) {
1445 err = fd1;
db349509 1446 goto out_release_both;
bf3c23d1 1447 }
d73aa286 1448
28407630 1449 fd2 = get_unused_fd_flags(flags);
198de4d7
AV
1450 if (unlikely(fd2 < 0)) {
1451 err = fd2;
d73aa286 1452 goto out_put_unused_1;
28407630
AV
1453 }
1454
aab174f0 1455 newfile1 = sock_alloc_file(sock1, flags, NULL);
28407630
AV
1456 if (unlikely(IS_ERR(newfile1))) {
1457 err = PTR_ERR(newfile1);
d73aa286 1458 goto out_put_unused_both;
28407630
AV
1459 }
1460
aab174f0 1461 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1462 if (IS_ERR(newfile2)) {
1463 err = PTR_ERR(newfile2);
d73aa286 1464 goto out_fput_1;
db349509
AV
1465 }
1466
d73aa286
YD
1467 err = put_user(fd1, &usockvec[0]);
1468 if (err)
1469 goto out_fput_both;
1470
1471 err = put_user(fd2, &usockvec[1]);
1472 if (err)
1473 goto out_fput_both;
1474
157cf649 1475 audit_fd_pair(fd1, fd2);
d73aa286 1476
db349509
AV
1477 fd_install(fd1, newfile1);
1478 fd_install(fd2, newfile2);
1da177e4
LT
1479 /* fd1 and fd2 may be already another descriptors.
1480 * Not kernel problem.
1481 */
1482
d73aa286 1483 return 0;
1da177e4 1484
d73aa286
YD
1485out_fput_both:
1486 fput(newfile2);
1487 fput(newfile1);
1488 put_unused_fd(fd2);
1489 put_unused_fd(fd1);
1490 goto out;
1491
1492out_fput_1:
1493 fput(newfile1);
1494 put_unused_fd(fd2);
1495 put_unused_fd(fd1);
1496 sock_release(sock2);
1497 goto out;
1da177e4 1498
d73aa286
YD
1499out_put_unused_both:
1500 put_unused_fd(fd2);
1501out_put_unused_1:
1502 put_unused_fd(fd1);
1da177e4 1503out_release_both:
89bddce5 1504 sock_release(sock2);
1da177e4 1505out_release_1:
89bddce5 1506 sock_release(sock1);
1da177e4
LT
1507out:
1508 return err;
1509}
1510
1da177e4
LT
1511/*
1512 * Bind a name to a socket. Nothing much to do here since it's
1513 * the protocol's responsibility to handle the local address.
1514 *
1515 * We move the socket address to kernel space before we call
1516 * the protocol layer (having also checked the address is ok).
1517 */
1518
20f37034 1519SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1520{
1521 struct socket *sock;
230b1839 1522 struct sockaddr_storage address;
6cb153ca 1523 int err, fput_needed;
1da177e4 1524
89bddce5 1525 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1526 if (sock) {
43db362d 1527 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1528 if (err >= 0) {
1529 err = security_socket_bind(sock,
230b1839 1530 (struct sockaddr *)&address,
89bddce5 1531 addrlen);
6cb153ca
BL
1532 if (!err)
1533 err = sock->ops->bind(sock,
89bddce5 1534 (struct sockaddr *)
230b1839 1535 &address, addrlen);
1da177e4 1536 }
6cb153ca 1537 fput_light(sock->file, fput_needed);
89bddce5 1538 }
1da177e4
LT
1539 return err;
1540}
1541
1da177e4
LT
1542/*
1543 * Perform a listen. Basically, we allow the protocol to do anything
1544 * necessary for a listen, and if that works, we mark the socket as
1545 * ready for listening.
1546 */
1547
3e0fa65f 1548SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1549{
1550 struct socket *sock;
6cb153ca 1551 int err, fput_needed;
b8e1f9b5 1552 int somaxconn;
89bddce5
SH
1553
1554 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1555 if (sock) {
8efa6e93 1556 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1557 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1558 backlog = somaxconn;
1da177e4
LT
1559
1560 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1561 if (!err)
1562 err = sock->ops->listen(sock, backlog);
1da177e4 1563
6cb153ca 1564 fput_light(sock->file, fput_needed);
1da177e4
LT
1565 }
1566 return err;
1567}
1568
1da177e4
LT
1569/*
1570 * For accept, we attempt to create a new socket, set up the link
1571 * with the client, wake up the client, then return the new
1572 * connected fd. We collect the address of the connector in kernel
1573 * space and move it to user at the very end. This is unclean because
1574 * we open the socket then return an error.
1575 *
1576 * 1003.1g adds the ability to recvmsg() to query connection pending
1577 * status to recvmsg. We need to add that support in a way thats
1578 * clean when we restucture accept also.
1579 */
1580
20f37034
HC
1581SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1582 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1583{
1584 struct socket *sock, *newsock;
39d8c1b6 1585 struct file *newfile;
6cb153ca 1586 int err, len, newfd, fput_needed;
230b1839 1587 struct sockaddr_storage address;
1da177e4 1588
77d27200 1589 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1590 return -EINVAL;
1591
1592 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1593 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1594
6cb153ca 1595 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1596 if (!sock)
1597 goto out;
1598
1599 err = -ENFILE;
c6d409cf
ED
1600 newsock = sock_alloc();
1601 if (!newsock)
1da177e4
LT
1602 goto out_put;
1603
1604 newsock->type = sock->type;
1605 newsock->ops = sock->ops;
1606
1da177e4
LT
1607 /*
1608 * We don't need try_module_get here, as the listening socket (sock)
1609 * has the protocol module (sock->ops->owner) held.
1610 */
1611 __module_get(newsock->ops->owner);
1612
28407630 1613 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1614 if (unlikely(newfd < 0)) {
1615 err = newfd;
9a1875e6
DM
1616 sock_release(newsock);
1617 goto out_put;
39d8c1b6 1618 }
aab174f0 1619 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
28407630
AV
1620 if (unlikely(IS_ERR(newfile))) {
1621 err = PTR_ERR(newfile);
1622 put_unused_fd(newfd);
1623 sock_release(newsock);
1624 goto out_put;
1625 }
39d8c1b6 1626
a79af59e
FF
1627 err = security_socket_accept(sock, newsock);
1628 if (err)
39d8c1b6 1629 goto out_fd;
a79af59e 1630
1da177e4
LT
1631 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1632 if (err < 0)
39d8c1b6 1633 goto out_fd;
1da177e4
LT
1634
1635 if (upeer_sockaddr) {
230b1839 1636 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1637 &len, 2) < 0) {
1da177e4 1638 err = -ECONNABORTED;
39d8c1b6 1639 goto out_fd;
1da177e4 1640 }
43db362d 1641 err = move_addr_to_user(&address,
230b1839 1642 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1643 if (err < 0)
39d8c1b6 1644 goto out_fd;
1da177e4
LT
1645 }
1646
1647 /* File flags are not inherited via accept() unlike another OSes. */
1648
39d8c1b6
DM
1649 fd_install(newfd, newfile);
1650 err = newfd;
1da177e4 1651
1da177e4 1652out_put:
6cb153ca 1653 fput_light(sock->file, fput_needed);
1da177e4
LT
1654out:
1655 return err;
39d8c1b6 1656out_fd:
9606a216 1657 fput(newfile);
39d8c1b6 1658 put_unused_fd(newfd);
1da177e4
LT
1659 goto out_put;
1660}
1661
20f37034
HC
1662SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1663 int __user *, upeer_addrlen)
aaca0bdc 1664{
de11defe 1665 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1666}
1667
1da177e4
LT
1668/*
1669 * Attempt to connect to a socket with the server address. The address
1670 * is in user space so we verify it is OK and move it to kernel space.
1671 *
1672 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1673 * break bindings
1674 *
1675 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1676 * other SEQPACKET protocols that take time to connect() as it doesn't
1677 * include the -EINPROGRESS status for such sockets.
1678 */
1679
20f37034
HC
1680SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1681 int, addrlen)
1da177e4
LT
1682{
1683 struct socket *sock;
230b1839 1684 struct sockaddr_storage address;
6cb153ca 1685 int err, fput_needed;
1da177e4 1686
6cb153ca 1687 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1688 if (!sock)
1689 goto out;
43db362d 1690 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1691 if (err < 0)
1692 goto out_put;
1693
89bddce5 1694 err =
230b1839 1695 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1696 if (err)
1697 goto out_put;
1698
230b1839 1699 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1700 sock->file->f_flags);
1701out_put:
6cb153ca 1702 fput_light(sock->file, fput_needed);
1da177e4
LT
1703out:
1704 return err;
1705}
1706
1707/*
1708 * Get the local address ('name') of a socket object. Move the obtained
1709 * name to user space.
1710 */
1711
20f37034
HC
1712SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1713 int __user *, usockaddr_len)
1da177e4
LT
1714{
1715 struct socket *sock;
230b1839 1716 struct sockaddr_storage address;
6cb153ca 1717 int len, err, fput_needed;
89bddce5 1718
6cb153ca 1719 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1720 if (!sock)
1721 goto out;
1722
1723 err = security_socket_getsockname(sock);
1724 if (err)
1725 goto out_put;
1726
230b1839 1727 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1728 if (err)
1729 goto out_put;
43db362d 1730 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1731
1732out_put:
6cb153ca 1733 fput_light(sock->file, fput_needed);
1da177e4
LT
1734out:
1735 return err;
1736}
1737
1738/*
1739 * Get the remote address ('name') of a socket object. Move the obtained
1740 * name to user space.
1741 */
1742
20f37034
HC
1743SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1744 int __user *, usockaddr_len)
1da177e4
LT
1745{
1746 struct socket *sock;
230b1839 1747 struct sockaddr_storage address;
6cb153ca 1748 int len, err, fput_needed;
1da177e4 1749
89bddce5
SH
1750 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1751 if (sock != NULL) {
1da177e4
LT
1752 err = security_socket_getpeername(sock);
1753 if (err) {
6cb153ca 1754 fput_light(sock->file, fput_needed);
1da177e4
LT
1755 return err;
1756 }
1757
89bddce5 1758 err =
230b1839 1759 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1760 1);
1da177e4 1761 if (!err)
43db362d 1762 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1763 usockaddr_len);
6cb153ca 1764 fput_light(sock->file, fput_needed);
1da177e4
LT
1765 }
1766 return err;
1767}
1768
1769/*
1770 * Send a datagram to a given address. We move the address into kernel
1771 * space and check the user space data area is readable before invoking
1772 * the protocol.
1773 */
1774
3e0fa65f 1775SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1776 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1777 int, addr_len)
1da177e4
LT
1778{
1779 struct socket *sock;
230b1839 1780 struct sockaddr_storage address;
1da177e4
LT
1781 int err;
1782 struct msghdr msg;
1783 struct iovec iov;
6cb153ca 1784 int fput_needed;
6cb153ca 1785
253eacc0
LT
1786 if (len > INT_MAX)
1787 len = INT_MAX;
de0fa95c
PE
1788 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1789 if (!sock)
4387ff75 1790 goto out;
6cb153ca 1791
89bddce5
SH
1792 iov.iov_base = buff;
1793 iov.iov_len = len;
1794 msg.msg_name = NULL;
1795 msg.msg_iov = &iov;
1796 msg.msg_iovlen = 1;
1797 msg.msg_control = NULL;
1798 msg.msg_controllen = 0;
1799 msg.msg_namelen = 0;
6cb153ca 1800 if (addr) {
43db362d 1801 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1802 if (err < 0)
1803 goto out_put;
230b1839 1804 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1805 msg.msg_namelen = addr_len;
1da177e4
LT
1806 }
1807 if (sock->file->f_flags & O_NONBLOCK)
1808 flags |= MSG_DONTWAIT;
1809 msg.msg_flags = flags;
1810 err = sock_sendmsg(sock, &msg, len);
1811
89bddce5 1812out_put:
de0fa95c 1813 fput_light(sock->file, fput_needed);
4387ff75 1814out:
1da177e4
LT
1815 return err;
1816}
1817
1818/*
89bddce5 1819 * Send a datagram down a socket.
1da177e4
LT
1820 */
1821
3e0fa65f 1822SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1823 unsigned int, flags)
1da177e4
LT
1824{
1825 return sys_sendto(fd, buff, len, flags, NULL, 0);
1826}
1827
1828/*
89bddce5 1829 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1830 * sender. We verify the buffers are writable and if needed move the
1831 * sender address from kernel to user space.
1832 */
1833
3e0fa65f 1834SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1835 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1836 int __user *, addr_len)
1da177e4
LT
1837{
1838 struct socket *sock;
1839 struct iovec iov;
1840 struct msghdr msg;
230b1839 1841 struct sockaddr_storage address;
89bddce5 1842 int err, err2;
6cb153ca
BL
1843 int fput_needed;
1844
253eacc0
LT
1845 if (size > INT_MAX)
1846 size = INT_MAX;
de0fa95c 1847 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1848 if (!sock)
de0fa95c 1849 goto out;
1da177e4 1850
89bddce5
SH
1851 msg.msg_control = NULL;
1852 msg.msg_controllen = 0;
1853 msg.msg_iovlen = 1;
1854 msg.msg_iov = &iov;
1855 iov.iov_len = size;
1856 iov.iov_base = ubuf;
f3d33426
HFS
1857 /* Save some cycles and don't copy the address if not needed */
1858 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1859 /* We assume all kernel code knows the size of sockaddr_storage */
1860 msg.msg_namelen = 0;
1da177e4
LT
1861 if (sock->file->f_flags & O_NONBLOCK)
1862 flags |= MSG_DONTWAIT;
89bddce5 1863 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1864
89bddce5 1865 if (err >= 0 && addr != NULL) {
43db362d 1866 err2 = move_addr_to_user(&address,
230b1839 1867 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1868 if (err2 < 0)
1869 err = err2;
1da177e4 1870 }
de0fa95c
PE
1871
1872 fput_light(sock->file, fput_needed);
4387ff75 1873out:
1da177e4
LT
1874 return err;
1875}
1876
1877/*
89bddce5 1878 * Receive a datagram from a socket.
1da177e4
LT
1879 */
1880
89bddce5 1881asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
95c96174 1882 unsigned int flags)
1da177e4
LT
1883{
1884 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1885}
1886
1887/*
1888 * Set a socket option. Because we don't know the option lengths we have
1889 * to pass the user mode parameter for the protocols to sort out.
1890 */
1891
20f37034
HC
1892SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1893 char __user *, optval, int, optlen)
1da177e4 1894{
6cb153ca 1895 int err, fput_needed;
1da177e4
LT
1896 struct socket *sock;
1897
1898 if (optlen < 0)
1899 return -EINVAL;
89bddce5
SH
1900
1901 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1902 if (sock != NULL) {
1903 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1904 if (err)
1905 goto out_put;
1da177e4
LT
1906
1907 if (level == SOL_SOCKET)
89bddce5
SH
1908 err =
1909 sock_setsockopt(sock, level, optname, optval,
1910 optlen);
1da177e4 1911 else
89bddce5
SH
1912 err =
1913 sock->ops->setsockopt(sock, level, optname, optval,
1914 optlen);
6cb153ca
BL
1915out_put:
1916 fput_light(sock->file, fput_needed);
1da177e4
LT
1917 }
1918 return err;
1919}
1920
1921/*
1922 * Get a socket option. Because we don't know the option lengths we have
1923 * to pass a user mode parameter for the protocols to sort out.
1924 */
1925
20f37034
HC
1926SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1927 char __user *, optval, int __user *, optlen)
1da177e4 1928{
6cb153ca 1929 int err, fput_needed;
1da177e4
LT
1930 struct socket *sock;
1931
89bddce5
SH
1932 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1933 if (sock != NULL) {
6cb153ca
BL
1934 err = security_socket_getsockopt(sock, level, optname);
1935 if (err)
1936 goto out_put;
1da177e4
LT
1937
1938 if (level == SOL_SOCKET)
89bddce5
SH
1939 err =
1940 sock_getsockopt(sock, level, optname, optval,
1941 optlen);
1da177e4 1942 else
89bddce5
SH
1943 err =
1944 sock->ops->getsockopt(sock, level, optname, optval,
1945 optlen);
6cb153ca
BL
1946out_put:
1947 fput_light(sock->file, fput_needed);
1da177e4
LT
1948 }
1949 return err;
1950}
1951
1da177e4
LT
1952/*
1953 * Shutdown a socket.
1954 */
1955
754fe8d2 1956SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1957{
6cb153ca 1958 int err, fput_needed;
1da177e4
LT
1959 struct socket *sock;
1960
89bddce5
SH
1961 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1962 if (sock != NULL) {
1da177e4 1963 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1964 if (!err)
1965 err = sock->ops->shutdown(sock, how);
1966 fput_light(sock->file, fput_needed);
1da177e4
LT
1967 }
1968 return err;
1969}
1970
89bddce5 1971/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1972 * fields which are the same type (int / unsigned) on our platforms.
1973 */
1974#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1975#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1976#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1977
c71d8ebe
TH
1978struct used_address {
1979 struct sockaddr_storage name;
1980 unsigned int name_len;
1981};
1982
1661bf36
DC
1983static int copy_msghdr_from_user(struct msghdr *kmsg,
1984 struct msghdr __user *umsg)
1985{
1986 if (copy_from_user(kmsg, umsg, sizeof(struct msghdr)))
1987 return -EFAULT;
1988 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 1989 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
1661bf36
DC
1990 return 0;
1991}
1992
a7526eb5 1993static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
95c96174 1994 struct msghdr *msg_sys, unsigned int flags,
c71d8ebe 1995 struct used_address *used_address)
1da177e4 1996{
89bddce5
SH
1997 struct compat_msghdr __user *msg_compat =
1998 (struct compat_msghdr __user *)msg;
230b1839 1999 struct sockaddr_storage address;
1da177e4 2000 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 2001 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
2002 __attribute__ ((aligned(sizeof(__kernel_size_t))));
2003 /* 20 is size of ipv6_pktinfo */
1da177e4 2004 unsigned char *ctl_buf = ctl;
a74e9106 2005 int err, ctl_len, total_len;
89bddce5 2006
1da177e4
LT
2007 err = -EFAULT;
2008 if (MSG_CMSG_COMPAT & flags) {
228e548e 2009 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 2010 return -EFAULT;
1661bf36
DC
2011 } else {
2012 err = copy_msghdr_from_user(msg_sys, msg);
2013 if (err)
2014 return err;
2015 }
1da177e4 2016
228e548e 2017 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
a74e9106
ED
2018 err = -EMSGSIZE;
2019 if (msg_sys->msg_iovlen > UIO_MAXIOV)
2020 goto out;
2021 err = -ENOMEM;
2022 iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec),
2023 GFP_KERNEL);
1da177e4 2024 if (!iov)
228e548e 2025 goto out;
1da177e4
LT
2026 }
2027
2028 /* This will also move the address data into kernel space */
2029 if (MSG_CMSG_COMPAT & flags) {
43db362d 2030 err = verify_compat_iovec(msg_sys, iov, &address, VERIFY_READ);
1da177e4 2031 } else
43db362d 2032 err = verify_iovec(msg_sys, iov, &address, VERIFY_READ);
89bddce5 2033 if (err < 0)
1da177e4
LT
2034 goto out_freeiov;
2035 total_len = err;
2036
2037 err = -ENOBUFS;
2038
228e548e 2039 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 2040 goto out_freeiov;
228e548e 2041 ctl_len = msg_sys->msg_controllen;
1da177e4 2042 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2043 err =
228e548e 2044 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2045 sizeof(ctl));
1da177e4
LT
2046 if (err)
2047 goto out_freeiov;
228e548e
AB
2048 ctl_buf = msg_sys->msg_control;
2049 ctl_len = msg_sys->msg_controllen;
1da177e4 2050 } else if (ctl_len) {
89bddce5 2051 if (ctl_len > sizeof(ctl)) {
1da177e4 2052 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2053 if (ctl_buf == NULL)
1da177e4
LT
2054 goto out_freeiov;
2055 }
2056 err = -EFAULT;
2057 /*
228e548e 2058 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2059 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2060 * checking falls down on this.
2061 */
fb8621bb 2062 if (copy_from_user(ctl_buf,
228e548e 2063 (void __user __force *)msg_sys->msg_control,
89bddce5 2064 ctl_len))
1da177e4 2065 goto out_freectl;
228e548e 2066 msg_sys->msg_control = ctl_buf;
1da177e4 2067 }
228e548e 2068 msg_sys->msg_flags = flags;
1da177e4
LT
2069
2070 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2071 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2072 /*
2073 * If this is sendmmsg() and current destination address is same as
2074 * previously succeeded address, omit asking LSM's decision.
2075 * used_address->name_len is initialized to UINT_MAX so that the first
2076 * destination address never matches.
2077 */
bc909d9d
MD
2078 if (used_address && msg_sys->msg_name &&
2079 used_address->name_len == msg_sys->msg_namelen &&
2080 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe
TH
2081 used_address->name_len)) {
2082 err = sock_sendmsg_nosec(sock, msg_sys, total_len);
2083 goto out_freectl;
2084 }
2085 err = sock_sendmsg(sock, msg_sys, total_len);
2086 /*
2087 * If this is sendmmsg() and sending to current destination address was
2088 * successful, remember it.
2089 */
2090 if (used_address && err >= 0) {
2091 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2092 if (msg_sys->msg_name)
2093 memcpy(&used_address->name, msg_sys->msg_name,
2094 used_address->name_len);
c71d8ebe 2095 }
1da177e4
LT
2096
2097out_freectl:
89bddce5 2098 if (ctl_buf != ctl)
1da177e4
LT
2099 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2100out_freeiov:
2101 if (iov != iovstack)
a74e9106 2102 kfree(iov);
228e548e
AB
2103out:
2104 return err;
2105}
2106
2107/*
2108 * BSD sendmsg interface
2109 */
2110
a7526eb5 2111long __sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
228e548e
AB
2112{
2113 int fput_needed, err;
2114 struct msghdr msg_sys;
1be374a0
AL
2115 struct socket *sock;
2116
1be374a0 2117 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2118 if (!sock)
2119 goto out;
2120
a7526eb5 2121 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
228e548e 2122
6cb153ca 2123 fput_light(sock->file, fput_needed);
89bddce5 2124out:
1da177e4
LT
2125 return err;
2126}
2127
a7526eb5
AL
2128SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned int, flags)
2129{
2130 if (flags & MSG_CMSG_COMPAT)
2131 return -EINVAL;
2132 return __sys_sendmsg(fd, msg, flags);
2133}
2134
228e548e
AB
2135/*
2136 * Linux sendmmsg interface
2137 */
2138
2139int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2140 unsigned int flags)
2141{
2142 int fput_needed, err, datagrams;
2143 struct socket *sock;
2144 struct mmsghdr __user *entry;
2145 struct compat_mmsghdr __user *compat_entry;
2146 struct msghdr msg_sys;
c71d8ebe 2147 struct used_address used_address;
228e548e 2148
98382f41
AB
2149 if (vlen > UIO_MAXIOV)
2150 vlen = UIO_MAXIOV;
228e548e
AB
2151
2152 datagrams = 0;
2153
2154 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2155 if (!sock)
2156 return err;
2157
c71d8ebe 2158 used_address.name_len = UINT_MAX;
228e548e
AB
2159 entry = mmsg;
2160 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2161 err = 0;
228e548e
AB
2162
2163 while (datagrams < vlen) {
228e548e 2164 if (MSG_CMSG_COMPAT & flags) {
a7526eb5
AL
2165 err = ___sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
2166 &msg_sys, flags, &used_address);
228e548e
AB
2167 if (err < 0)
2168 break;
2169 err = __put_user(err, &compat_entry->msg_len);
2170 ++compat_entry;
2171 } else {
a7526eb5
AL
2172 err = ___sys_sendmsg(sock,
2173 (struct msghdr __user *)entry,
2174 &msg_sys, flags, &used_address);
228e548e
AB
2175 if (err < 0)
2176 break;
2177 err = put_user(err, &entry->msg_len);
2178 ++entry;
2179 }
2180
2181 if (err)
2182 break;
2183 ++datagrams;
2184 }
2185
228e548e
AB
2186 fput_light(sock->file, fput_needed);
2187
728ffb86
AB
2188 /* We only return an error if no datagrams were able to be sent */
2189 if (datagrams != 0)
228e548e
AB
2190 return datagrams;
2191
228e548e
AB
2192 return err;
2193}
2194
2195SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2196 unsigned int, vlen, unsigned int, flags)
2197{
1be374a0
AL
2198 if (flags & MSG_CMSG_COMPAT)
2199 return -EINVAL;
228e548e
AB
2200 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2201}
2202
a7526eb5 2203static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
95c96174 2204 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2205{
89bddce5
SH
2206 struct compat_msghdr __user *msg_compat =
2207 (struct compat_msghdr __user *)msg;
1da177e4 2208 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2209 struct iovec *iov = iovstack;
1da177e4 2210 unsigned long cmsg_ptr;
a74e9106 2211 int err, total_len, len;
1da177e4
LT
2212
2213 /* kernel mode address */
230b1839 2214 struct sockaddr_storage addr;
1da177e4
LT
2215
2216 /* user mode address pointers */
2217 struct sockaddr __user *uaddr;
2218 int __user *uaddr_len;
89bddce5 2219
1da177e4 2220 if (MSG_CMSG_COMPAT & flags) {
a2e27255 2221 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 2222 return -EFAULT;
1661bf36
DC
2223 } else {
2224 err = copy_msghdr_from_user(msg_sys, msg);
2225 if (err)
2226 return err;
2227 }
1da177e4 2228
a2e27255 2229 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
a74e9106
ED
2230 err = -EMSGSIZE;
2231 if (msg_sys->msg_iovlen > UIO_MAXIOV)
2232 goto out;
2233 err = -ENOMEM;
2234 iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec),
2235 GFP_KERNEL);
1da177e4 2236 if (!iov)
a2e27255 2237 goto out;
1da177e4
LT
2238 }
2239
f3d33426
HFS
2240 /* Save the user-mode address (verify_iovec will change the
2241 * kernel msghdr to use the kernel address space)
1da177e4 2242 */
a2e27255 2243 uaddr = (__force void __user *)msg_sys->msg_name;
1da177e4 2244 uaddr_len = COMPAT_NAMELEN(msg);
f3d33426 2245 if (MSG_CMSG_COMPAT & flags)
43db362d 2246 err = verify_compat_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
f3d33426 2247 else
43db362d 2248 err = verify_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
1da177e4
LT
2249 if (err < 0)
2250 goto out_freeiov;
89bddce5 2251 total_len = err;
1da177e4 2252
a2e27255
ACM
2253 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2254 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2255
f3d33426
HFS
2256 /* We assume all kernel code knows the size of sockaddr_storage */
2257 msg_sys->msg_namelen = 0;
2258
1da177e4
LT
2259 if (sock->file->f_flags & O_NONBLOCK)
2260 flags |= MSG_DONTWAIT;
a2e27255
ACM
2261 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2262 total_len, flags);
1da177e4
LT
2263 if (err < 0)
2264 goto out_freeiov;
2265 len = err;
2266
2267 if (uaddr != NULL) {
43db362d 2268 err = move_addr_to_user(&addr,
a2e27255 2269 msg_sys->msg_namelen, uaddr,
89bddce5 2270 uaddr_len);
1da177e4
LT
2271 if (err < 0)
2272 goto out_freeiov;
2273 }
a2e27255 2274 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2275 COMPAT_FLAGS(msg));
1da177e4
LT
2276 if (err)
2277 goto out_freeiov;
2278 if (MSG_CMSG_COMPAT & flags)
a2e27255 2279 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2280 &msg_compat->msg_controllen);
2281 else
a2e27255 2282 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2283 &msg->msg_controllen);
2284 if (err)
2285 goto out_freeiov;
2286 err = len;
2287
2288out_freeiov:
2289 if (iov != iovstack)
a74e9106 2290 kfree(iov);
a2e27255
ACM
2291out:
2292 return err;
2293}
2294
2295/*
2296 * BSD recvmsg interface
2297 */
2298
a7526eb5 2299long __sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags)
a2e27255
ACM
2300{
2301 int fput_needed, err;
2302 struct msghdr msg_sys;
1be374a0
AL
2303 struct socket *sock;
2304
1be374a0 2305 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2306 if (!sock)
2307 goto out;
2308
a7526eb5 2309 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2310
6cb153ca 2311 fput_light(sock->file, fput_needed);
1da177e4
LT
2312out:
2313 return err;
2314}
2315
a7526eb5
AL
2316SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
2317 unsigned int, flags)
2318{
2319 if (flags & MSG_CMSG_COMPAT)
2320 return -EINVAL;
2321 return __sys_recvmsg(fd, msg, flags);
2322}
2323
a2e27255
ACM
2324/*
2325 * Linux recvmmsg interface
2326 */
2327
2328int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2329 unsigned int flags, struct timespec *timeout)
2330{
2331 int fput_needed, err, datagrams;
2332 struct socket *sock;
2333 struct mmsghdr __user *entry;
d7256d0e 2334 struct compat_mmsghdr __user *compat_entry;
a2e27255
ACM
2335 struct msghdr msg_sys;
2336 struct timespec end_time;
2337
2338 if (timeout &&
2339 poll_select_set_timeout(&end_time, timeout->tv_sec,
2340 timeout->tv_nsec))
2341 return -EINVAL;
2342
2343 datagrams = 0;
2344
2345 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2346 if (!sock)
2347 return err;
2348
2349 err = sock_error(sock->sk);
2350 if (err)
2351 goto out_put;
2352
2353 entry = mmsg;
d7256d0e 2354 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2355
2356 while (datagrams < vlen) {
2357 /*
2358 * No need to ask LSM for more than the first datagram.
2359 */
d7256d0e 2360 if (MSG_CMSG_COMPAT & flags) {
a7526eb5
AL
2361 err = ___sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
2362 &msg_sys, flags & ~MSG_WAITFORONE,
2363 datagrams);
d7256d0e
JMG
2364 if (err < 0)
2365 break;
2366 err = __put_user(err, &compat_entry->msg_len);
2367 ++compat_entry;
2368 } else {
a7526eb5
AL
2369 err = ___sys_recvmsg(sock,
2370 (struct msghdr __user *)entry,
2371 &msg_sys, flags & ~MSG_WAITFORONE,
2372 datagrams);
d7256d0e
JMG
2373 if (err < 0)
2374 break;
2375 err = put_user(err, &entry->msg_len);
2376 ++entry;
2377 }
2378
a2e27255
ACM
2379 if (err)
2380 break;
a2e27255
ACM
2381 ++datagrams;
2382
71c5c159
BB
2383 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2384 if (flags & MSG_WAITFORONE)
2385 flags |= MSG_DONTWAIT;
2386
a2e27255
ACM
2387 if (timeout) {
2388 ktime_get_ts(timeout);
2389 *timeout = timespec_sub(end_time, *timeout);
2390 if (timeout->tv_sec < 0) {
2391 timeout->tv_sec = timeout->tv_nsec = 0;
2392 break;
2393 }
2394
2395 /* Timeout, return less than vlen datagrams */
2396 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2397 break;
2398 }
2399
2400 /* Out of band data, return right away */
2401 if (msg_sys.msg_flags & MSG_OOB)
2402 break;
2403 }
2404
2405out_put:
2406 fput_light(sock->file, fput_needed);
1da177e4 2407
a2e27255
ACM
2408 if (err == 0)
2409 return datagrams;
2410
2411 if (datagrams != 0) {
2412 /*
2413 * We may return less entries than requested (vlen) if the
2414 * sock is non block and there aren't enough datagrams...
2415 */
2416 if (err != -EAGAIN) {
2417 /*
2418 * ... or if recvmsg returns an error after we
2419 * received some datagrams, where we record the
2420 * error to return on the next call or if the
2421 * app asks about it using getsockopt(SO_ERROR).
2422 */
2423 sock->sk->sk_err = -err;
2424 }
2425
2426 return datagrams;
2427 }
2428
2429 return err;
2430}
2431
2432SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2433 unsigned int, vlen, unsigned int, flags,
2434 struct timespec __user *, timeout)
2435{
2436 int datagrams;
2437 struct timespec timeout_sys;
2438
1be374a0
AL
2439 if (flags & MSG_CMSG_COMPAT)
2440 return -EINVAL;
2441
a2e27255
ACM
2442 if (!timeout)
2443 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2444
2445 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2446 return -EFAULT;
2447
2448 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2449
2450 if (datagrams > 0 &&
2451 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2452 datagrams = -EFAULT;
2453
2454 return datagrams;
2455}
2456
2457#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2458/* Argument list sizes for sys_socketcall */
2459#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2460static const unsigned char nargs[21] = {
c6d409cf
ED
2461 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2462 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2463 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2464 AL(4), AL(5), AL(4)
89bddce5
SH
2465};
2466
1da177e4
LT
2467#undef AL
2468
2469/*
89bddce5 2470 * System call vectors.
1da177e4
LT
2471 *
2472 * Argument checking cleaned up. Saved 20% in size.
2473 * This function doesn't need to set the kernel lock because
89bddce5 2474 * it is set by the callees.
1da177e4
LT
2475 */
2476
3e0fa65f 2477SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2478{
2950fa9d 2479 unsigned long a[AUDITSC_ARGS];
89bddce5 2480 unsigned long a0, a1;
1da177e4 2481 int err;
47379052 2482 unsigned int len;
1da177e4 2483
228e548e 2484 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2485 return -EINVAL;
2486
47379052
AV
2487 len = nargs[call];
2488 if (len > sizeof(a))
2489 return -EINVAL;
2490
1da177e4 2491 /* copy_from_user should be SMP safe. */
47379052 2492 if (copy_from_user(a, args, len))
1da177e4 2493 return -EFAULT;
3ec3b2fb 2494
2950fa9d
CG
2495 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2496 if (err)
2497 return err;
3ec3b2fb 2498
89bddce5
SH
2499 a0 = a[0];
2500 a1 = a[1];
2501
2502 switch (call) {
2503 case SYS_SOCKET:
2504 err = sys_socket(a0, a1, a[2]);
2505 break;
2506 case SYS_BIND:
2507 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2508 break;
2509 case SYS_CONNECT:
2510 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2511 break;
2512 case SYS_LISTEN:
2513 err = sys_listen(a0, a1);
2514 break;
2515 case SYS_ACCEPT:
de11defe
UD
2516 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2517 (int __user *)a[2], 0);
89bddce5
SH
2518 break;
2519 case SYS_GETSOCKNAME:
2520 err =
2521 sys_getsockname(a0, (struct sockaddr __user *)a1,
2522 (int __user *)a[2]);
2523 break;
2524 case SYS_GETPEERNAME:
2525 err =
2526 sys_getpeername(a0, (struct sockaddr __user *)a1,
2527 (int __user *)a[2]);
2528 break;
2529 case SYS_SOCKETPAIR:
2530 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2531 break;
2532 case SYS_SEND:
2533 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2534 break;
2535 case SYS_SENDTO:
2536 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2537 (struct sockaddr __user *)a[4], a[5]);
2538 break;
2539 case SYS_RECV:
2540 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2541 break;
2542 case SYS_RECVFROM:
2543 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2544 (struct sockaddr __user *)a[4],
2545 (int __user *)a[5]);
2546 break;
2547 case SYS_SHUTDOWN:
2548 err = sys_shutdown(a0, a1);
2549 break;
2550 case SYS_SETSOCKOPT:
2551 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2552 break;
2553 case SYS_GETSOCKOPT:
2554 err =
2555 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2556 (int __user *)a[4]);
2557 break;
2558 case SYS_SENDMSG:
2559 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2560 break;
228e548e
AB
2561 case SYS_SENDMMSG:
2562 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2563 break;
89bddce5
SH
2564 case SYS_RECVMSG:
2565 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2566 break;
a2e27255
ACM
2567 case SYS_RECVMMSG:
2568 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2569 (struct timespec __user *)a[4]);
2570 break;
de11defe
UD
2571 case SYS_ACCEPT4:
2572 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2573 (int __user *)a[2], a[3]);
aaca0bdc 2574 break;
89bddce5
SH
2575 default:
2576 err = -EINVAL;
2577 break;
1da177e4
LT
2578 }
2579 return err;
2580}
2581
89bddce5 2582#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2583
55737fda
SH
2584/**
2585 * sock_register - add a socket protocol handler
2586 * @ops: description of protocol
2587 *
1da177e4
LT
2588 * This function is called by a protocol handler that wants to
2589 * advertise its address family, and have it linked into the
55737fda
SH
2590 * socket interface. The value ops->family coresponds to the
2591 * socket system call protocol family.
1da177e4 2592 */
f0fd27d4 2593int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2594{
2595 int err;
2596
2597 if (ops->family >= NPROTO) {
89bddce5
SH
2598 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2599 NPROTO);
1da177e4
LT
2600 return -ENOBUFS;
2601 }
55737fda
SH
2602
2603 spin_lock(&net_family_lock);
190683a9
ED
2604 if (rcu_dereference_protected(net_families[ops->family],
2605 lockdep_is_held(&net_family_lock)))
55737fda
SH
2606 err = -EEXIST;
2607 else {
cf778b00 2608 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2609 err = 0;
2610 }
55737fda
SH
2611 spin_unlock(&net_family_lock);
2612
89bddce5 2613 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2614 return err;
2615}
c6d409cf 2616EXPORT_SYMBOL(sock_register);
1da177e4 2617
55737fda
SH
2618/**
2619 * sock_unregister - remove a protocol handler
2620 * @family: protocol family to remove
2621 *
1da177e4
LT
2622 * This function is called by a protocol handler that wants to
2623 * remove its address family, and have it unlinked from the
55737fda
SH
2624 * new socket creation.
2625 *
2626 * If protocol handler is a module, then it can use module reference
2627 * counts to protect against new references. If protocol handler is not
2628 * a module then it needs to provide its own protection in
2629 * the ops->create routine.
1da177e4 2630 */
f0fd27d4 2631void sock_unregister(int family)
1da177e4 2632{
f0fd27d4 2633 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2634
55737fda 2635 spin_lock(&net_family_lock);
a9b3cd7f 2636 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2637 spin_unlock(&net_family_lock);
2638
2639 synchronize_rcu();
2640
89bddce5 2641 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4 2642}
c6d409cf 2643EXPORT_SYMBOL(sock_unregister);
1da177e4 2644
77d76ea3 2645static int __init sock_init(void)
1da177e4 2646{
b3e19d92 2647 int err;
2ca794e5
EB
2648 /*
2649 * Initialize the network sysctl infrastructure.
2650 */
2651 err = net_sysctl_init();
2652 if (err)
2653 goto out;
b3e19d92 2654
1da177e4 2655 /*
89bddce5 2656 * Initialize skbuff SLAB cache
1da177e4
LT
2657 */
2658 skb_init();
1da177e4
LT
2659
2660 /*
89bddce5 2661 * Initialize the protocols module.
1da177e4
LT
2662 */
2663
2664 init_inodecache();
b3e19d92
NP
2665
2666 err = register_filesystem(&sock_fs_type);
2667 if (err)
2668 goto out_fs;
1da177e4 2669 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2670 if (IS_ERR(sock_mnt)) {
2671 err = PTR_ERR(sock_mnt);
2672 goto out_mount;
2673 }
77d76ea3
AK
2674
2675 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2676 */
2677
2678#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2679 err = netfilter_init();
2680 if (err)
2681 goto out;
1da177e4 2682#endif
cbeb321a 2683
c1f19b51
RC
2684#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
2685 skb_timestamping_init();
2686#endif
2687
b3e19d92
NP
2688out:
2689 return err;
2690
2691out_mount:
2692 unregister_filesystem(&sock_fs_type);
2693out_fs:
2694 goto out;
1da177e4
LT
2695}
2696
77d76ea3
AK
2697core_initcall(sock_init); /* early initcall */
2698
1da177e4
LT
2699#ifdef CONFIG_PROC_FS
2700void socket_seq_show(struct seq_file *seq)
2701{
2702 int cpu;
2703 int counter = 0;
2704
6f912042 2705 for_each_possible_cpu(cpu)
89bddce5 2706 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2707
2708 /* It can be negative, by the way. 8) */
2709 if (counter < 0)
2710 counter = 0;
2711
2712 seq_printf(seq, "sockets: used %d\n", counter);
2713}
89bddce5 2714#endif /* CONFIG_PROC_FS */
1da177e4 2715
89bbfc95 2716#ifdef CONFIG_COMPAT
6b96018b 2717static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2718 unsigned int cmd, void __user *up)
7a229387 2719{
7a229387
AB
2720 mm_segment_t old_fs = get_fs();
2721 struct timeval ktv;
2722 int err;
2723
2724 set_fs(KERNEL_DS);
6b96018b 2725 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2726 set_fs(old_fs);
644595f8 2727 if (!err)
ed6fe9d6 2728 err = compat_put_timeval(&ktv, up);
644595f8 2729
7a229387
AB
2730 return err;
2731}
2732
6b96018b 2733static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2734 unsigned int cmd, void __user *up)
7a229387 2735{
7a229387
AB
2736 mm_segment_t old_fs = get_fs();
2737 struct timespec kts;
2738 int err;
2739
2740 set_fs(KERNEL_DS);
6b96018b 2741 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2742 set_fs(old_fs);
644595f8 2743 if (!err)
ed6fe9d6 2744 err = compat_put_timespec(&kts, up);
644595f8 2745
7a229387
AB
2746 return err;
2747}
2748
6b96018b 2749static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2750{
2751 struct ifreq __user *uifr;
2752 int err;
2753
2754 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2755 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2756 return -EFAULT;
2757
6b96018b 2758 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2759 if (err)
2760 return err;
2761
6b96018b 2762 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2763 return -EFAULT;
2764
2765 return 0;
2766}
2767
6b96018b 2768static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2769{
6b96018b 2770 struct compat_ifconf ifc32;
7a229387
AB
2771 struct ifconf ifc;
2772 struct ifconf __user *uifc;
6b96018b 2773 struct compat_ifreq __user *ifr32;
7a229387
AB
2774 struct ifreq __user *ifr;
2775 unsigned int i, j;
2776 int err;
2777
6b96018b 2778 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2779 return -EFAULT;
2780
43da5f2e 2781 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2782 if (ifc32.ifcbuf == 0) {
2783 ifc32.ifc_len = 0;
2784 ifc.ifc_len = 0;
2785 ifc.ifc_req = NULL;
2786 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2787 } else {
c6d409cf
ED
2788 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2789 sizeof(struct ifreq);
7a229387
AB
2790 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2791 ifc.ifc_len = len;
2792 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2793 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2794 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2795 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2796 return -EFAULT;
2797 ifr++;
2798 ifr32++;
2799 }
2800 }
2801 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2802 return -EFAULT;
2803
6b96018b 2804 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2805 if (err)
2806 return err;
2807
2808 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2809 return -EFAULT;
2810
2811 ifr = ifc.ifc_req;
2812 ifr32 = compat_ptr(ifc32.ifcbuf);
2813 for (i = 0, j = 0;
c6d409cf
ED
2814 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2815 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2816 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2817 return -EFAULT;
2818 ifr32++;
2819 ifr++;
2820 }
2821
2822 if (ifc32.ifcbuf == 0) {
2823 /* Translate from 64-bit structure multiple to
2824 * a 32-bit one.
2825 */
2826 i = ifc.ifc_len;
6b96018b 2827 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2828 ifc32.ifc_len = i;
2829 } else {
2830 ifc32.ifc_len = i;
2831 }
6b96018b 2832 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2833 return -EFAULT;
2834
2835 return 0;
2836}
2837
6b96018b 2838static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2839{
3a7da39d
BH
2840 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2841 bool convert_in = false, convert_out = false;
2842 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2843 struct ethtool_rxnfc __user *rxnfc;
7a229387 2844 struct ifreq __user *ifr;
3a7da39d
BH
2845 u32 rule_cnt = 0, actual_rule_cnt;
2846 u32 ethcmd;
7a229387 2847 u32 data;
3a7da39d 2848 int ret;
7a229387 2849
3a7da39d
BH
2850 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2851 return -EFAULT;
7a229387 2852
3a7da39d
BH
2853 compat_rxnfc = compat_ptr(data);
2854
2855 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2856 return -EFAULT;
2857
3a7da39d
BH
2858 /* Most ethtool structures are defined without padding.
2859 * Unfortunately struct ethtool_rxnfc is an exception.
2860 */
2861 switch (ethcmd) {
2862 default:
2863 break;
2864 case ETHTOOL_GRXCLSRLALL:
2865 /* Buffer size is variable */
2866 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2867 return -EFAULT;
2868 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2869 return -ENOMEM;
2870 buf_size += rule_cnt * sizeof(u32);
2871 /* fall through */
2872 case ETHTOOL_GRXRINGS:
2873 case ETHTOOL_GRXCLSRLCNT:
2874 case ETHTOOL_GRXCLSRULE:
55664f32 2875 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2876 convert_out = true;
2877 /* fall through */
2878 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2879 buf_size += sizeof(struct ethtool_rxnfc);
2880 convert_in = true;
2881 break;
2882 }
2883
2884 ifr = compat_alloc_user_space(buf_size);
954b1244 2885 rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
3a7da39d
BH
2886
2887 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2888 return -EFAULT;
2889
3a7da39d
BH
2890 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2891 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2892 return -EFAULT;
2893
3a7da39d 2894 if (convert_in) {
127fe533 2895 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2896 * fs.ring_cookie and at the end of fs, but nowhere else.
2897 */
127fe533
AD
2898 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2899 sizeof(compat_rxnfc->fs.m_ext) !=
2900 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2901 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2902 BUILD_BUG_ON(
2903 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2904 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2905 offsetof(struct ethtool_rxnfc, fs.location) -
2906 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2907
2908 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2909 (void __user *)(&rxnfc->fs.m_ext + 1) -
2910 (void __user *)rxnfc) ||
3a7da39d
BH
2911 copy_in_user(&rxnfc->fs.ring_cookie,
2912 &compat_rxnfc->fs.ring_cookie,
954b1244
SH
2913 (void __user *)(&rxnfc->fs.location + 1) -
2914 (void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2915 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2916 sizeof(rxnfc->rule_cnt)))
2917 return -EFAULT;
2918 }
2919
2920 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2921 if (ret)
2922 return ret;
2923
2924 if (convert_out) {
2925 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2926 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2927 (const void __user *)rxnfc) ||
3a7da39d
BH
2928 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2929 &rxnfc->fs.ring_cookie,
954b1244
SH
2930 (const void __user *)(&rxnfc->fs.location + 1) -
2931 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2932 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2933 sizeof(rxnfc->rule_cnt)))
2934 return -EFAULT;
2935
2936 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2937 /* As an optimisation, we only copy the actual
2938 * number of rules that the underlying
2939 * function returned. Since Mallory might
2940 * change the rule count in user memory, we
2941 * check that it is less than the rule count
2942 * originally given (as the user buffer size),
2943 * which has been range-checked.
2944 */
2945 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2946 return -EFAULT;
2947 if (actual_rule_cnt < rule_cnt)
2948 rule_cnt = actual_rule_cnt;
2949 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2950 &rxnfc->rule_locs[0],
2951 rule_cnt * sizeof(u32)))
2952 return -EFAULT;
2953 }
2954 }
2955
2956 return 0;
7a229387
AB
2957}
2958
7a50a240
AB
2959static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2960{
2961 void __user *uptr;
2962 compat_uptr_t uptr32;
2963 struct ifreq __user *uifr;
2964
c6d409cf 2965 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2966 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2967 return -EFAULT;
2968
2969 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2970 return -EFAULT;
2971
2972 uptr = compat_ptr(uptr32);
2973
2974 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2975 return -EFAULT;
2976
2977 return dev_ioctl(net, SIOCWANDEV, uifr);
2978}
2979
6b96018b
AB
2980static int bond_ioctl(struct net *net, unsigned int cmd,
2981 struct compat_ifreq __user *ifr32)
7a229387
AB
2982{
2983 struct ifreq kifr;
7a229387
AB
2984 mm_segment_t old_fs;
2985 int err;
7a229387
AB
2986
2987 switch (cmd) {
2988 case SIOCBONDENSLAVE:
2989 case SIOCBONDRELEASE:
2990 case SIOCBONDSETHWADDR:
2991 case SIOCBONDCHANGEACTIVE:
6b96018b 2992 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2993 return -EFAULT;
2994
2995 old_fs = get_fs();
c6d409cf 2996 set_fs(KERNEL_DS);
c3f52ae6 2997 err = dev_ioctl(net, cmd,
2998 (struct ifreq __user __force *) &kifr);
c6d409cf 2999 set_fs(old_fs);
7a229387
AB
3000
3001 return err;
7a229387 3002 default:
07d106d0 3003 return -ENOIOCTLCMD;
ccbd6a5a 3004 }
7a229387
AB
3005}
3006
590d4693
BH
3007/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3008static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3009 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
3010{
3011 struct ifreq __user *u_ifreq64;
7a229387
AB
3012 char tmp_buf[IFNAMSIZ];
3013 void __user *data64;
3014 u32 data32;
3015
3016 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
3017 IFNAMSIZ))
3018 return -EFAULT;
417c3522 3019 if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
7a229387
AB
3020 return -EFAULT;
3021 data64 = compat_ptr(data32);
3022
3023 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
3024
7a229387
AB
3025 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
3026 IFNAMSIZ))
3027 return -EFAULT;
417c3522 3028 if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
7a229387
AB
3029 return -EFAULT;
3030
6b96018b 3031 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
3032}
3033
6b96018b
AB
3034static int dev_ifsioc(struct net *net, struct socket *sock,
3035 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 3036{
a2116ed2 3037 struct ifreq __user *uifr;
7a229387
AB
3038 int err;
3039
a2116ed2
AB
3040 uifr = compat_alloc_user_space(sizeof(*uifr));
3041 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3042 return -EFAULT;
3043
3044 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3045
7a229387
AB
3046 if (!err) {
3047 switch (cmd) {
3048 case SIOCGIFFLAGS:
3049 case SIOCGIFMETRIC:
3050 case SIOCGIFMTU:
3051 case SIOCGIFMEM:
3052 case SIOCGIFHWADDR:
3053 case SIOCGIFINDEX:
3054 case SIOCGIFADDR:
3055 case SIOCGIFBRDADDR:
3056 case SIOCGIFDSTADDR:
3057 case SIOCGIFNETMASK:
fab2532b 3058 case SIOCGIFPFLAGS:
7a229387 3059 case SIOCGIFTXQLEN:
fab2532b
AB
3060 case SIOCGMIIPHY:
3061 case SIOCGMIIREG:
a2116ed2 3062 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
3063 err = -EFAULT;
3064 break;
3065 }
3066 }
3067 return err;
3068}
3069
a2116ed2
AB
3070static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3071 struct compat_ifreq __user *uifr32)
3072{
3073 struct ifreq ifr;
3074 struct compat_ifmap __user *uifmap32;
3075 mm_segment_t old_fs;
3076 int err;
3077
3078 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3079 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3080 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3081 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3082 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3083 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3084 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3085 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3086 if (err)
3087 return -EFAULT;
3088
3089 old_fs = get_fs();
c6d409cf 3090 set_fs(KERNEL_DS);
c3f52ae6 3091 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 3092 set_fs(old_fs);
a2116ed2
AB
3093
3094 if (cmd == SIOCGIFMAP && !err) {
3095 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3096 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3097 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3098 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3099 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3100 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3101 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3102 if (err)
3103 err = -EFAULT;
3104 }
3105 return err;
3106}
3107
7a229387 3108struct rtentry32 {
c6d409cf 3109 u32 rt_pad1;
7a229387
AB
3110 struct sockaddr rt_dst; /* target address */
3111 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3112 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3113 unsigned short rt_flags;
3114 short rt_pad2;
3115 u32 rt_pad3;
3116 unsigned char rt_tos;
3117 unsigned char rt_class;
3118 short rt_pad4;
3119 short rt_metric; /* +1 for binary compatibility! */
7a229387 3120 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3121 u32 rt_mtu; /* per route MTU/Window */
3122 u32 rt_window; /* Window clamping */
7a229387
AB
3123 unsigned short rt_irtt; /* Initial RTT */
3124};
3125
3126struct in6_rtmsg32 {
3127 struct in6_addr rtmsg_dst;
3128 struct in6_addr rtmsg_src;
3129 struct in6_addr rtmsg_gateway;
3130 u32 rtmsg_type;
3131 u16 rtmsg_dst_len;
3132 u16 rtmsg_src_len;
3133 u32 rtmsg_metric;
3134 u32 rtmsg_info;
3135 u32 rtmsg_flags;
3136 s32 rtmsg_ifindex;
3137};
3138
6b96018b
AB
3139static int routing_ioctl(struct net *net, struct socket *sock,
3140 unsigned int cmd, void __user *argp)
7a229387
AB
3141{
3142 int ret;
3143 void *r = NULL;
3144 struct in6_rtmsg r6;
3145 struct rtentry r4;
3146 char devname[16];
3147 u32 rtdev;
3148 mm_segment_t old_fs = get_fs();
3149
6b96018b
AB
3150 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3151 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3152 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3153 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3154 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3155 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3156 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3157 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3158 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3159 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3160 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3161
3162 r = (void *) &r6;
3163 } else { /* ipv4 */
6b96018b 3164 struct rtentry32 __user *ur4 = argp;
c6d409cf 3165 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3166 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3167 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3168 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3169 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3170 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3171 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3172 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3173 if (rtdev) {
c6d409cf 3174 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3175 r4.rt_dev = (char __user __force *)devname;
3176 devname[15] = 0;
7a229387
AB
3177 } else
3178 r4.rt_dev = NULL;
3179
3180 r = (void *) &r4;
3181 }
3182
3183 if (ret) {
3184 ret = -EFAULT;
3185 goto out;
3186 }
3187
c6d409cf 3188 set_fs(KERNEL_DS);
6b96018b 3189 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3190 set_fs(old_fs);
7a229387
AB
3191
3192out:
7a229387
AB
3193 return ret;
3194}
3195
3196/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3197 * for some operations; this forces use of the newer bridge-utils that
25985edc 3198 * use compatible ioctls
7a229387 3199 */
6b96018b 3200static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3201{
6b96018b 3202 compat_ulong_t tmp;
7a229387 3203
6b96018b 3204 if (get_user(tmp, argp))
7a229387
AB
3205 return -EFAULT;
3206 if (tmp == BRCTL_GET_VERSION)
3207 return BRCTL_VERSION + 1;
3208 return -EINVAL;
3209}
3210
6b96018b
AB
3211static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3212 unsigned int cmd, unsigned long arg)
3213{
3214 void __user *argp = compat_ptr(arg);
3215 struct sock *sk = sock->sk;
3216 struct net *net = sock_net(sk);
7a229387 3217
6b96018b 3218 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3219 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3220
3221 switch (cmd) {
3222 case SIOCSIFBR:
3223 case SIOCGIFBR:
3224 return old_bridge_ioctl(argp);
3225 case SIOCGIFNAME:
3226 return dev_ifname32(net, argp);
3227 case SIOCGIFCONF:
3228 return dev_ifconf(net, argp);
3229 case SIOCETHTOOL:
3230 return ethtool_ioctl(net, argp);
7a50a240
AB
3231 case SIOCWANDEV:
3232 return compat_siocwandev(net, argp);
a2116ed2
AB
3233 case SIOCGIFMAP:
3234 case SIOCSIFMAP:
3235 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3236 case SIOCBONDENSLAVE:
3237 case SIOCBONDRELEASE:
3238 case SIOCBONDSETHWADDR:
6b96018b
AB
3239 case SIOCBONDCHANGEACTIVE:
3240 return bond_ioctl(net, cmd, argp);
3241 case SIOCADDRT:
3242 case SIOCDELRT:
3243 return routing_ioctl(net, sock, cmd, argp);
3244 case SIOCGSTAMP:
3245 return do_siocgstamp(net, sock, cmd, argp);
3246 case SIOCGSTAMPNS:
3247 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3248 case SIOCBONDSLAVEINFOQUERY:
3249 case SIOCBONDINFOQUERY:
a2116ed2 3250 case SIOCSHWTSTAMP:
fd468c74 3251 case SIOCGHWTSTAMP:
590d4693 3252 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3253
3254 case FIOSETOWN:
3255 case SIOCSPGRP:
3256 case FIOGETOWN:
3257 case SIOCGPGRP:
3258 case SIOCBRADDBR:
3259 case SIOCBRDELBR:
3260 case SIOCGIFVLAN:
3261 case SIOCSIFVLAN:
3262 case SIOCADDDLCI:
3263 case SIOCDELDLCI:
3264 return sock_ioctl(file, cmd, arg);
3265
3266 case SIOCGIFFLAGS:
3267 case SIOCSIFFLAGS:
3268 case SIOCGIFMETRIC:
3269 case SIOCSIFMETRIC:
3270 case SIOCGIFMTU:
3271 case SIOCSIFMTU:
3272 case SIOCGIFMEM:
3273 case SIOCSIFMEM:
3274 case SIOCGIFHWADDR:
3275 case SIOCSIFHWADDR:
3276 case SIOCADDMULTI:
3277 case SIOCDELMULTI:
3278 case SIOCGIFINDEX:
6b96018b
AB
3279 case SIOCGIFADDR:
3280 case SIOCSIFADDR:
3281 case SIOCSIFHWBROADCAST:
6b96018b 3282 case SIOCDIFADDR:
6b96018b
AB
3283 case SIOCGIFBRDADDR:
3284 case SIOCSIFBRDADDR:
3285 case SIOCGIFDSTADDR:
3286 case SIOCSIFDSTADDR:
3287 case SIOCGIFNETMASK:
3288 case SIOCSIFNETMASK:
3289 case SIOCSIFPFLAGS:
3290 case SIOCGIFPFLAGS:
3291 case SIOCGIFTXQLEN:
3292 case SIOCSIFTXQLEN:
3293 case SIOCBRADDIF:
3294 case SIOCBRDELIF:
9177efd3
AB
3295 case SIOCSIFNAME:
3296 case SIOCGMIIPHY:
3297 case SIOCGMIIREG:
3298 case SIOCSMIIREG:
6b96018b 3299 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3300
6b96018b
AB
3301 case SIOCSARP:
3302 case SIOCGARP:
3303 case SIOCDARP:
6b96018b 3304 case SIOCATMARK:
9177efd3
AB
3305 return sock_do_ioctl(net, sock, cmd, arg);
3306 }
3307
6b96018b
AB
3308 return -ENOIOCTLCMD;
3309}
7a229387 3310
95c96174 3311static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3312 unsigned long arg)
89bbfc95
SP
3313{
3314 struct socket *sock = file->private_data;
3315 int ret = -ENOIOCTLCMD;
87de87d5
DM
3316 struct sock *sk;
3317 struct net *net;
3318
3319 sk = sock->sk;
3320 net = sock_net(sk);
89bbfc95
SP
3321
3322 if (sock->ops->compat_ioctl)
3323 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3324
87de87d5
DM
3325 if (ret == -ENOIOCTLCMD &&
3326 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3327 ret = compat_wext_handle_ioctl(net, cmd, arg);
3328
6b96018b
AB
3329 if (ret == -ENOIOCTLCMD)
3330 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3331
89bbfc95
SP
3332 return ret;
3333}
3334#endif
3335
ac5a488e
SS
3336int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3337{
3338 return sock->ops->bind(sock, addr, addrlen);
3339}
c6d409cf 3340EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3341
3342int kernel_listen(struct socket *sock, int backlog)
3343{
3344 return sock->ops->listen(sock, backlog);
3345}
c6d409cf 3346EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3347
3348int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3349{
3350 struct sock *sk = sock->sk;
3351 int err;
3352
3353 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3354 newsock);
3355 if (err < 0)
3356 goto done;
3357
3358 err = sock->ops->accept(sock, *newsock, flags);
3359 if (err < 0) {
3360 sock_release(*newsock);
fa8705b0 3361 *newsock = NULL;
ac5a488e
SS
3362 goto done;
3363 }
3364
3365 (*newsock)->ops = sock->ops;
1b08534e 3366 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3367
3368done:
3369 return err;
3370}
c6d409cf 3371EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3372
3373int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3374 int flags)
ac5a488e
SS
3375{
3376 return sock->ops->connect(sock, addr, addrlen, flags);
3377}
c6d409cf 3378EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3379
3380int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3381 int *addrlen)
3382{
3383 return sock->ops->getname(sock, addr, addrlen, 0);
3384}
c6d409cf 3385EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3386
3387int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3388 int *addrlen)
3389{
3390 return sock->ops->getname(sock, addr, addrlen, 1);
3391}
c6d409cf 3392EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3393
3394int kernel_getsockopt(struct socket *sock, int level, int optname,
3395 char *optval, int *optlen)
3396{
3397 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3398 char __user *uoptval;
3399 int __user *uoptlen;
ac5a488e
SS
3400 int err;
3401
fb8621bb
NK
3402 uoptval = (char __user __force *) optval;
3403 uoptlen = (int __user __force *) optlen;
3404
ac5a488e
SS
3405 set_fs(KERNEL_DS);
3406 if (level == SOL_SOCKET)
fb8621bb 3407 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3408 else
fb8621bb
NK
3409 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3410 uoptlen);
ac5a488e
SS
3411 set_fs(oldfs);
3412 return err;
3413}
c6d409cf 3414EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3415
3416int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3417 char *optval, unsigned int optlen)
ac5a488e
SS
3418{
3419 mm_segment_t oldfs = get_fs();
fb8621bb 3420 char __user *uoptval;
ac5a488e
SS
3421 int err;
3422
fb8621bb
NK
3423 uoptval = (char __user __force *) optval;
3424
ac5a488e
SS
3425 set_fs(KERNEL_DS);
3426 if (level == SOL_SOCKET)
fb8621bb 3427 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3428 else
fb8621bb 3429 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3430 optlen);
3431 set_fs(oldfs);
3432 return err;
3433}
c6d409cf 3434EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3435
3436int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3437 size_t size, int flags)
3438{
3439 if (sock->ops->sendpage)
3440 return sock->ops->sendpage(sock, page, offset, size, flags);
3441
3442 return sock_no_sendpage(sock, page, offset, size, flags);
3443}
c6d409cf 3444EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3445
3446int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3447{
3448 mm_segment_t oldfs = get_fs();
3449 int err;
3450
3451 set_fs(KERNEL_DS);
3452 err = sock->ops->ioctl(sock, cmd, arg);
3453 set_fs(oldfs);
3454
3455 return err;
3456}
c6d409cf 3457EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3458
91cf45f0
TM
3459int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3460{
3461 return sock->ops->shutdown(sock, how);
3462}
91cf45f0 3463EXPORT_SYMBOL(kernel_sock_shutdown);