cxgb4i : Move stray CPL definitions to cxgb4 driver
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
1da177e4
LT
92
93#include <asm/uaccess.h>
94#include <asm/unistd.h>
95
96#include <net/compat.h>
87de87d5 97#include <net/wext.h>
f8451725 98#include <net/cls_cgroup.h>
1da177e4
LT
99
100#include <net/sock.h>
101#include <linux/netfilter.h>
102
6b96018b
AB
103#include <linux/if_tun.h>
104#include <linux/ipv6_route.h>
105#include <linux/route.h>
6b96018b
AB
106#include <linux/sockios.h>
107#include <linux/atalk.h>
076bb0c8 108#include <net/busy_poll.h>
06021292 109
e0d1095a 110#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
111unsigned int sysctl_net_busy_read __read_mostly;
112unsigned int sysctl_net_busy_poll __read_mostly;
06021292 113#endif
6b96018b 114
1da177e4 115static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
116static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
117 unsigned long nr_segs, loff_t pos);
118static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
119 unsigned long nr_segs, loff_t pos);
89bddce5 120static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
121
122static int sock_close(struct inode *inode, struct file *file);
123static unsigned int sock_poll(struct file *file,
124 struct poll_table_struct *wait);
89bddce5 125static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
126#ifdef CONFIG_COMPAT
127static long compat_sock_ioctl(struct file *file,
89bddce5 128 unsigned int cmd, unsigned long arg);
89bbfc95 129#endif
1da177e4 130static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
131static ssize_t sock_sendpage(struct file *file, struct page *page,
132 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 133static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 134 struct pipe_inode_info *pipe, size_t len,
9c55e01c 135 unsigned int flags);
1da177e4 136
1da177e4
LT
137/*
138 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
139 * in the operation structures but are done directly via the socketcall() multiplexor.
140 */
141
da7071d7 142static const struct file_operations socket_file_ops = {
1da177e4
LT
143 .owner = THIS_MODULE,
144 .llseek = no_llseek,
145 .aio_read = sock_aio_read,
146 .aio_write = sock_aio_write,
147 .poll = sock_poll,
148 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
149#ifdef CONFIG_COMPAT
150 .compat_ioctl = compat_sock_ioctl,
151#endif
1da177e4
LT
152 .mmap = sock_mmap,
153 .open = sock_no_open, /* special open code to disallow open via /proc */
154 .release = sock_close,
155 .fasync = sock_fasync,
5274f052
JA
156 .sendpage = sock_sendpage,
157 .splice_write = generic_splice_sendpage,
9c55e01c 158 .splice_read = sock_splice_read,
1da177e4
LT
159};
160
161/*
162 * The protocol list. Each protocol is registered in here.
163 */
164
1da177e4 165static DEFINE_SPINLOCK(net_family_lock);
190683a9 166static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 167
1da177e4
LT
168/*
169 * Statistics counters of the socket lists
170 */
171
c6d409cf 172static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
173
174/*
89bddce5
SH
175 * Support routines.
176 * Move socket addresses back and forth across the kernel/user
177 * divide and look after the messy bits.
1da177e4
LT
178 */
179
1da177e4
LT
180/**
181 * move_addr_to_kernel - copy a socket address into kernel space
182 * @uaddr: Address in user space
183 * @kaddr: Address in kernel space
184 * @ulen: Length in user space
185 *
186 * The address is copied into kernel space. If the provided address is
187 * too long an error code of -EINVAL is returned. If the copy gives
188 * invalid addresses -EFAULT is returned. On a success 0 is returned.
189 */
190
43db362d 191int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 192{
230b1839 193 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 194 return -EINVAL;
89bddce5 195 if (ulen == 0)
1da177e4 196 return 0;
89bddce5 197 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 198 return -EFAULT;
3ec3b2fb 199 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
200}
201
202/**
203 * move_addr_to_user - copy an address to user space
204 * @kaddr: kernel space address
205 * @klen: length of address in kernel
206 * @uaddr: user space address
207 * @ulen: pointer to user length field
208 *
209 * The value pointed to by ulen on entry is the buffer length available.
210 * This is overwritten with the buffer space used. -EINVAL is returned
211 * if an overlong buffer is specified or a negative buffer size. -EFAULT
212 * is returned if either the buffer or the length field are not
213 * accessible.
214 * After copying the data up to the limit the user specifies, the true
215 * length of the data is written over the length limit the user
216 * specified. Zero is returned for a success.
217 */
89bddce5 218
43db362d 219static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 220 void __user *uaddr, int __user *ulen)
1da177e4
LT
221{
222 int err;
223 int len;
224
68c6beb3 225 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
226 err = get_user(len, ulen);
227 if (err)
1da177e4 228 return err;
89bddce5
SH
229 if (len > klen)
230 len = klen;
68c6beb3 231 if (len < 0)
1da177e4 232 return -EINVAL;
89bddce5 233 if (len) {
d6fe3945
SG
234 if (audit_sockaddr(klen, kaddr))
235 return -ENOMEM;
89bddce5 236 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
237 return -EFAULT;
238 }
239 /*
89bddce5
SH
240 * "fromlen shall refer to the value before truncation.."
241 * 1003.1g
1da177e4
LT
242 */
243 return __put_user(klen, ulen);
244}
245
e18b890b 246static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
247
248static struct inode *sock_alloc_inode(struct super_block *sb)
249{
250 struct socket_alloc *ei;
eaefd110 251 struct socket_wq *wq;
89bddce5 252
e94b1766 253 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
254 if (!ei)
255 return NULL;
eaefd110
ED
256 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
257 if (!wq) {
43815482
ED
258 kmem_cache_free(sock_inode_cachep, ei);
259 return NULL;
260 }
eaefd110
ED
261 init_waitqueue_head(&wq->wait);
262 wq->fasync_list = NULL;
263 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 264
1da177e4
LT
265 ei->socket.state = SS_UNCONNECTED;
266 ei->socket.flags = 0;
267 ei->socket.ops = NULL;
268 ei->socket.sk = NULL;
269 ei->socket.file = NULL;
1da177e4
LT
270
271 return &ei->vfs_inode;
272}
273
274static void sock_destroy_inode(struct inode *inode)
275{
43815482 276 struct socket_alloc *ei;
eaefd110 277 struct socket_wq *wq;
43815482
ED
278
279 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 280 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 281 kfree_rcu(wq, rcu);
43815482 282 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
283}
284
51cc5068 285static void init_once(void *foo)
1da177e4 286{
89bddce5 287 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 288
a35afb83 289 inode_init_once(&ei->vfs_inode);
1da177e4 290}
89bddce5 291
1da177e4
LT
292static int init_inodecache(void)
293{
294 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
295 sizeof(struct socket_alloc),
296 0,
297 (SLAB_HWCACHE_ALIGN |
298 SLAB_RECLAIM_ACCOUNT |
299 SLAB_MEM_SPREAD),
20c2df83 300 init_once);
1da177e4
LT
301 if (sock_inode_cachep == NULL)
302 return -ENOMEM;
303 return 0;
304}
305
b87221de 306static const struct super_operations sockfs_ops = {
c6d409cf
ED
307 .alloc_inode = sock_alloc_inode,
308 .destroy_inode = sock_destroy_inode,
309 .statfs = simple_statfs,
1da177e4
LT
310};
311
c23fbb6b
ED
312/*
313 * sockfs_dname() is called from d_path().
314 */
315static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
316{
317 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
318 dentry->d_inode->i_ino);
319}
320
3ba13d17 321static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 322 .d_dname = sockfs_dname,
1da177e4
LT
323};
324
c74a1cbb
AV
325static struct dentry *sockfs_mount(struct file_system_type *fs_type,
326 int flags, const char *dev_name, void *data)
327{
328 return mount_pseudo(fs_type, "socket:", &sockfs_ops,
329 &sockfs_dentry_operations, SOCKFS_MAGIC);
330}
331
332static struct vfsmount *sock_mnt __read_mostly;
333
334static struct file_system_type sock_fs_type = {
335 .name = "sockfs",
336 .mount = sockfs_mount,
337 .kill_sb = kill_anon_super,
338};
339
1da177e4
LT
340/*
341 * Obtains the first available file descriptor and sets it up for use.
342 *
39d8c1b6
DM
343 * These functions create file structures and maps them to fd space
344 * of the current process. On success it returns file descriptor
1da177e4
LT
345 * and file struct implicitly stored in sock->file.
346 * Note that another thread may close file descriptor before we return
347 * from this function. We use the fact that now we do not refer
348 * to socket after mapping. If one day we will need it, this
349 * function will increment ref. count on file by 1.
350 *
351 * In any case returned fd MAY BE not valid!
352 * This race condition is unavoidable
353 * with shared fd spaces, we cannot solve it inside kernel,
354 * but we take care of internal coherence yet.
355 */
356
aab174f0 357struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 358{
7cbe66b6 359 struct qstr name = { .name = "" };
2c48b9c4 360 struct path path;
7cbe66b6 361 struct file *file;
1da177e4 362
600e1779
MY
363 if (dname) {
364 name.name = dname;
365 name.len = strlen(name.name);
366 } else if (sock->sk) {
367 name.name = sock->sk->sk_prot_creator->name;
368 name.len = strlen(name.name);
369 }
4b936885 370 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
28407630
AV
371 if (unlikely(!path.dentry))
372 return ERR_PTR(-ENOMEM);
2c48b9c4 373 path.mnt = mntget(sock_mnt);
39d8c1b6 374
2c48b9c4 375 d_instantiate(path.dentry, SOCK_INODE(sock));
cc3808f8 376 SOCK_INODE(sock)->i_fop = &socket_file_ops;
39d8c1b6 377
2c48b9c4 378 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 379 &socket_file_ops);
39b65252 380 if (unlikely(IS_ERR(file))) {
cc3808f8 381 /* drop dentry, keep inode */
7de9c6ee 382 ihold(path.dentry->d_inode);
2c48b9c4 383 path_put(&path);
39b65252 384 return file;
cc3808f8
AV
385 }
386
387 sock->file = file;
77d27200 388 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 389 file->private_data = sock;
28407630 390 return file;
39d8c1b6 391}
56b31d1c 392EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 393
56b31d1c 394static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
395{
396 struct file *newfile;
28407630
AV
397 int fd = get_unused_fd_flags(flags);
398 if (unlikely(fd < 0))
399 return fd;
39d8c1b6 400
aab174f0 401 newfile = sock_alloc_file(sock, flags, NULL);
28407630 402 if (likely(!IS_ERR(newfile))) {
39d8c1b6 403 fd_install(fd, newfile);
28407630
AV
404 return fd;
405 }
7cbe66b6 406
28407630
AV
407 put_unused_fd(fd);
408 return PTR_ERR(newfile);
1da177e4
LT
409}
410
406a3c63 411struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 412{
6cb153ca
BL
413 if (file->f_op == &socket_file_ops)
414 return file->private_data; /* set in sock_map_fd */
415
23bb80d2
ED
416 *err = -ENOTSOCK;
417 return NULL;
6cb153ca 418}
406a3c63 419EXPORT_SYMBOL(sock_from_file);
6cb153ca 420
1da177e4 421/**
c6d409cf 422 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
423 * @fd: file handle
424 * @err: pointer to an error code return
425 *
426 * The file handle passed in is locked and the socket it is bound
427 * too is returned. If an error occurs the err pointer is overwritten
428 * with a negative errno code and NULL is returned. The function checks
429 * for both invalid handles and passing a handle which is not a socket.
430 *
431 * On a success the socket object pointer is returned.
432 */
433
434struct socket *sockfd_lookup(int fd, int *err)
435{
436 struct file *file;
1da177e4
LT
437 struct socket *sock;
438
89bddce5
SH
439 file = fget(fd);
440 if (!file) {
1da177e4
LT
441 *err = -EBADF;
442 return NULL;
443 }
89bddce5 444
6cb153ca
BL
445 sock = sock_from_file(file, err);
446 if (!sock)
1da177e4 447 fput(file);
6cb153ca
BL
448 return sock;
449}
c6d409cf 450EXPORT_SYMBOL(sockfd_lookup);
1da177e4 451
6cb153ca
BL
452static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
453{
00e188ef 454 struct fd f = fdget(fd);
6cb153ca
BL
455 struct socket *sock;
456
3672558c 457 *err = -EBADF;
00e188ef
AV
458 if (f.file) {
459 sock = sock_from_file(f.file, err);
460 if (likely(sock)) {
461 *fput_needed = f.flags;
6cb153ca 462 return sock;
00e188ef
AV
463 }
464 fdput(f);
1da177e4 465 }
6cb153ca 466 return NULL;
1da177e4
LT
467}
468
600e1779
MY
469#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
470#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
471#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
472static ssize_t sockfs_getxattr(struct dentry *dentry,
473 const char *name, void *value, size_t size)
474{
475 const char *proto_name;
476 size_t proto_size;
477 int error;
478
479 error = -ENODATA;
480 if (!strncmp(name, XATTR_NAME_SOCKPROTONAME, XATTR_NAME_SOCKPROTONAME_LEN)) {
481 proto_name = dentry->d_name.name;
482 proto_size = strlen(proto_name);
483
484 if (value) {
485 error = -ERANGE;
486 if (proto_size + 1 > size)
487 goto out;
488
489 strncpy(value, proto_name, proto_size + 1);
490 }
491 error = proto_size + 1;
492 }
493
494out:
495 return error;
496}
497
498static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
499 size_t size)
500{
501 ssize_t len;
502 ssize_t used = 0;
503
504 len = security_inode_listsecurity(dentry->d_inode, buffer, size);
505 if (len < 0)
506 return len;
507 used += len;
508 if (buffer) {
509 if (size < used)
510 return -ERANGE;
511 buffer += len;
512 }
513
514 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
515 used += len;
516 if (buffer) {
517 if (size < used)
518 return -ERANGE;
519 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
520 buffer += len;
521 }
522
523 return used;
524}
525
526static const struct inode_operations sockfs_inode_ops = {
527 .getxattr = sockfs_getxattr,
528 .listxattr = sockfs_listxattr,
529};
530
1da177e4
LT
531/**
532 * sock_alloc - allocate a socket
89bddce5 533 *
1da177e4
LT
534 * Allocate a new inode and socket object. The two are bound together
535 * and initialised. The socket is then returned. If we are out of inodes
536 * NULL is returned.
537 */
538
539static struct socket *sock_alloc(void)
540{
89bddce5
SH
541 struct inode *inode;
542 struct socket *sock;
1da177e4 543
a209dfc7 544 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
545 if (!inode)
546 return NULL;
547
548 sock = SOCKET_I(inode);
549
29a020d3 550 kmemcheck_annotate_bitfield(sock, type);
85fe4025 551 inode->i_ino = get_next_ino();
89bddce5 552 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
553 inode->i_uid = current_fsuid();
554 inode->i_gid = current_fsgid();
600e1779 555 inode->i_op = &sockfs_inode_ops;
1da177e4 556
19e8d69c 557 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
558 return sock;
559}
560
561/*
562 * In theory you can't get an open on this inode, but /proc provides
563 * a back door. Remember to keep it shut otherwise you'll let the
564 * creepy crawlies in.
565 */
89bddce5 566
1da177e4
LT
567static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
568{
569 return -ENXIO;
570}
571
4b6f5d20 572const struct file_operations bad_sock_fops = {
1da177e4
LT
573 .owner = THIS_MODULE,
574 .open = sock_no_open,
6038f373 575 .llseek = noop_llseek,
1da177e4
LT
576};
577
578/**
579 * sock_release - close a socket
580 * @sock: socket to close
581 *
582 * The socket is released from the protocol stack if it has a release
583 * callback, and the inode is then released if the socket is bound to
89bddce5 584 * an inode not a file.
1da177e4 585 */
89bddce5 586
1da177e4
LT
587void sock_release(struct socket *sock)
588{
589 if (sock->ops) {
590 struct module *owner = sock->ops->owner;
591
592 sock->ops->release(sock);
593 sock->ops = NULL;
594 module_put(owner);
595 }
596
eaefd110 597 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
3410f22e 598 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 599
b09e786b
MP
600 if (test_bit(SOCK_EXTERNALLY_ALLOCATED, &sock->flags))
601 return;
602
19e8d69c 603 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
604 if (!sock->file) {
605 iput(SOCK_INODE(sock));
606 return;
607 }
89bddce5 608 sock->file = NULL;
1da177e4 609}
c6d409cf 610EXPORT_SYMBOL(sock_release);
1da177e4 611
bf84a010 612void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
20d49473 613{
2244d07b 614 *tx_flags = 0;
20d49473 615 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
2244d07b 616 *tx_flags |= SKBTX_HW_TSTAMP;
20d49473 617 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
2244d07b 618 *tx_flags |= SKBTX_SW_TSTAMP;
6e3e939f
JB
619 if (sock_flag(sk, SOCK_WIFI_STATUS))
620 *tx_flags |= SKBTX_WIFI_STATUS;
20d49473
PO
621}
622EXPORT_SYMBOL(sock_tx_timestamp);
623
228e548e
AB
624static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
625 struct msghdr *msg, size_t size)
1da177e4
LT
626{
627 struct sock_iocb *si = kiocb_to_siocb(iocb);
1da177e4
LT
628
629 si->sock = sock;
630 si->scm = NULL;
631 si->msg = msg;
632 si->size = size;
633
1da177e4
LT
634 return sock->ops->sendmsg(iocb, sock, msg, size);
635}
636
228e548e
AB
637static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
638 struct msghdr *msg, size_t size)
639{
640 int err = security_socket_sendmsg(sock, msg, size);
641
642 return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
643}
644
1da177e4
LT
645int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
646{
647 struct kiocb iocb;
648 struct sock_iocb siocb;
649 int ret;
650
651 init_sync_kiocb(&iocb, NULL);
652 iocb.private = &siocb;
653 ret = __sock_sendmsg(&iocb, sock, msg, size);
654 if (-EIOCBQUEUED == ret)
655 ret = wait_on_sync_kiocb(&iocb);
656 return ret;
657}
c6d409cf 658EXPORT_SYMBOL(sock_sendmsg);
1da177e4 659
894dc24c 660static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
228e548e
AB
661{
662 struct kiocb iocb;
663 struct sock_iocb siocb;
664 int ret;
665
666 init_sync_kiocb(&iocb, NULL);
667 iocb.private = &siocb;
668 ret = __sock_sendmsg_nosec(&iocb, sock, msg, size);
669 if (-EIOCBQUEUED == ret)
670 ret = wait_on_sync_kiocb(&iocb);
671 return ret;
672}
673
1da177e4
LT
674int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
675 struct kvec *vec, size_t num, size_t size)
676{
677 mm_segment_t oldfs = get_fs();
678 int result;
679
680 set_fs(KERNEL_DS);
681 /*
682 * the following is safe, since for compiler definitions of kvec and
683 * iovec are identical, yielding the same in-core layout and alignment
684 */
89bddce5 685 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
686 msg->msg_iovlen = num;
687 result = sock_sendmsg(sock, msg, size);
688 set_fs(oldfs);
689 return result;
690}
c6d409cf 691EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 692
92f37fd2
ED
693/*
694 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
695 */
696void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
697 struct sk_buff *skb)
698{
20d49473
PO
699 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
700 struct timespec ts[3];
701 int empty = 1;
702 struct skb_shared_hwtstamps *shhwtstamps =
703 skb_hwtstamps(skb);
704
705 /* Race occurred between timestamp enabling and packet
706 receiving. Fill in the current time for now. */
707 if (need_software_tstamp && skb->tstamp.tv64 == 0)
708 __net_timestamp(skb);
709
710 if (need_software_tstamp) {
711 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
712 struct timeval tv;
713 skb_get_timestamp(skb, &tv);
714 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
715 sizeof(tv), &tv);
716 } else {
842509b8 717 skb_get_timestampns(skb, &ts[0]);
20d49473 718 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
842509b8 719 sizeof(ts[0]), &ts[0]);
20d49473
PO
720 }
721 }
722
723
724 memset(ts, 0, sizeof(ts));
6e94d1ef
DB
725 if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE) &&
726 ktime_to_timespec_cond(skb->tstamp, ts + 0))
20d49473 727 empty = 0;
4d276eb6
WB
728 if (shhwtstamps &&
729 sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
730 ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts + 2))
731 empty = 0;
20d49473
PO
732 if (!empty)
733 put_cmsg(msg, SOL_SOCKET,
734 SCM_TIMESTAMPING, sizeof(ts), &ts);
92f37fd2 735}
7c81fd8b
ACM
736EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
737
6e3e939f
JB
738void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
739 struct sk_buff *skb)
740{
741 int ack;
742
743 if (!sock_flag(sk, SOCK_WIFI_STATUS))
744 return;
745 if (!skb->wifi_acked_valid)
746 return;
747
748 ack = skb->wifi_acked;
749
750 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
751}
752EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
753
11165f14 754static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
755 struct sk_buff *skb)
3b885787
NH
756{
757 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
758 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
759 sizeof(__u32), &skb->dropcount);
760}
761
767dd033 762void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
763 struct sk_buff *skb)
764{
765 sock_recv_timestamp(msg, sk, skb);
766 sock_recv_drops(msg, sk, skb);
767}
767dd033 768EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 769
a2e27255
ACM
770static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
771 struct msghdr *msg, size_t size, int flags)
1da177e4 772{
1da177e4
LT
773 struct sock_iocb *si = kiocb_to_siocb(iocb);
774
775 si->sock = sock;
776 si->scm = NULL;
777 si->msg = msg;
778 si->size = size;
779 si->flags = flags;
780
1da177e4
LT
781 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
782}
783
a2e27255
ACM
784static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
785 struct msghdr *msg, size_t size, int flags)
786{
787 int err = security_socket_recvmsg(sock, msg, size, flags);
788
789 return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
790}
791
89bddce5 792int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
793 size_t size, int flags)
794{
795 struct kiocb iocb;
796 struct sock_iocb siocb;
797 int ret;
798
89bddce5 799 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
800 iocb.private = &siocb;
801 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
802 if (-EIOCBQUEUED == ret)
803 ret = wait_on_sync_kiocb(&iocb);
804 return ret;
805}
c6d409cf 806EXPORT_SYMBOL(sock_recvmsg);
1da177e4 807
a2e27255
ACM
808static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
809 size_t size, int flags)
810{
811 struct kiocb iocb;
812 struct sock_iocb siocb;
813 int ret;
814
815 init_sync_kiocb(&iocb, NULL);
816 iocb.private = &siocb;
817 ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
818 if (-EIOCBQUEUED == ret)
819 ret = wait_on_sync_kiocb(&iocb);
820 return ret;
821}
822
c1249c0a
ML
823/**
824 * kernel_recvmsg - Receive a message from a socket (kernel space)
825 * @sock: The socket to receive the message from
826 * @msg: Received message
827 * @vec: Input s/g array for message data
828 * @num: Size of input s/g array
829 * @size: Number of bytes to read
830 * @flags: Message flags (MSG_DONTWAIT, etc...)
831 *
832 * On return the msg structure contains the scatter/gather array passed in the
833 * vec argument. The array is modified so that it consists of the unfilled
834 * portion of the original array.
835 *
836 * The returned value is the total number of bytes received, or an error.
837 */
89bddce5
SH
838int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
839 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
840{
841 mm_segment_t oldfs = get_fs();
842 int result;
843
844 set_fs(KERNEL_DS);
845 /*
846 * the following is safe, since for compiler definitions of kvec and
847 * iovec are identical, yielding the same in-core layout and alignment
848 */
89bddce5 849 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
850 result = sock_recvmsg(sock, msg, size, flags);
851 set_fs(oldfs);
852 return result;
853}
c6d409cf 854EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 855
ce1d4d3e
CH
856static ssize_t sock_sendpage(struct file *file, struct page *page,
857 int offset, size_t size, loff_t *ppos, int more)
1da177e4 858{
1da177e4
LT
859 struct socket *sock;
860 int flags;
861
ce1d4d3e
CH
862 sock = file->private_data;
863
35f9c09f
ED
864 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
865 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
866 flags |= more;
ce1d4d3e 867
e6949583 868 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 869}
1da177e4 870
9c55e01c 871static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 872 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
873 unsigned int flags)
874{
875 struct socket *sock = file->private_data;
876
997b37da
RDC
877 if (unlikely(!sock->ops->splice_read))
878 return -EINVAL;
879
9c55e01c
JA
880 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
881}
882
ce1d4d3e 883static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 884 struct sock_iocb *siocb)
ce1d4d3e 885{
d29c445b
KO
886 if (!is_sync_kiocb(iocb))
887 BUG();
1da177e4 888
ce1d4d3e 889 siocb->kiocb = iocb;
ce1d4d3e
CH
890 iocb->private = siocb;
891 return siocb;
1da177e4
LT
892}
893
ce1d4d3e 894static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
895 struct file *file, const struct iovec *iov,
896 unsigned long nr_segs)
ce1d4d3e
CH
897{
898 struct socket *sock = file->private_data;
899 size_t size = 0;
900 int i;
1da177e4 901
89bddce5
SH
902 for (i = 0; i < nr_segs; i++)
903 size += iov[i].iov_len;
1da177e4 904
ce1d4d3e
CH
905 msg->msg_name = NULL;
906 msg->msg_namelen = 0;
907 msg->msg_control = NULL;
908 msg->msg_controllen = 0;
89bddce5 909 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
910 msg->msg_iovlen = nr_segs;
911 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
912
913 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
914}
915
027445c3
BP
916static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
917 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
918{
919 struct sock_iocb siocb, *x;
920
1da177e4
LT
921 if (pos != 0)
922 return -ESPIPE;
027445c3 923
73a7075e 924 if (iocb->ki_nbytes == 0) /* Match SYS5 behaviour */
1da177e4
LT
925 return 0;
926
027445c3
BP
927
928 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
929 if (!x)
930 return -ENOMEM;
027445c3 931 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
932}
933
ce1d4d3e 934static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
935 struct file *file, const struct iovec *iov,
936 unsigned long nr_segs)
1da177e4 937{
ce1d4d3e
CH
938 struct socket *sock = file->private_data;
939 size_t size = 0;
940 int i;
1da177e4 941
89bddce5
SH
942 for (i = 0; i < nr_segs; i++)
943 size += iov[i].iov_len;
1da177e4 944
ce1d4d3e
CH
945 msg->msg_name = NULL;
946 msg->msg_namelen = 0;
947 msg->msg_control = NULL;
948 msg->msg_controllen = 0;
89bddce5 949 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
950 msg->msg_iovlen = nr_segs;
951 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
952 if (sock->type == SOCK_SEQPACKET)
953 msg->msg_flags |= MSG_EOR;
1da177e4 954
ce1d4d3e 955 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
956}
957
027445c3
BP
958static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
959 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
960{
961 struct sock_iocb siocb, *x;
1da177e4 962
ce1d4d3e
CH
963 if (pos != 0)
964 return -ESPIPE;
027445c3 965
027445c3 966 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
967 if (!x)
968 return -ENOMEM;
1da177e4 969
027445c3 970 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
971}
972
1da177e4
LT
973/*
974 * Atomic setting of ioctl hooks to avoid race
975 * with module unload.
976 */
977
4a3e2f71 978static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 979static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 980
881d966b 981void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 982{
4a3e2f71 983 mutex_lock(&br_ioctl_mutex);
1da177e4 984 br_ioctl_hook = hook;
4a3e2f71 985 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
986}
987EXPORT_SYMBOL(brioctl_set);
988
4a3e2f71 989static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 990static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 991
881d966b 992void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 993{
4a3e2f71 994 mutex_lock(&vlan_ioctl_mutex);
1da177e4 995 vlan_ioctl_hook = hook;
4a3e2f71 996 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
997}
998EXPORT_SYMBOL(vlan_ioctl_set);
999
4a3e2f71 1000static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 1001static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 1002
89bddce5 1003void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 1004{
4a3e2f71 1005 mutex_lock(&dlci_ioctl_mutex);
1da177e4 1006 dlci_ioctl_hook = hook;
4a3e2f71 1007 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1008}
1009EXPORT_SYMBOL(dlci_ioctl_set);
1010
6b96018b
AB
1011static long sock_do_ioctl(struct net *net, struct socket *sock,
1012 unsigned int cmd, unsigned long arg)
1013{
1014 int err;
1015 void __user *argp = (void __user *)arg;
1016
1017 err = sock->ops->ioctl(sock, cmd, arg);
1018
1019 /*
1020 * If this ioctl is unknown try to hand it down
1021 * to the NIC driver.
1022 */
1023 if (err == -ENOIOCTLCMD)
1024 err = dev_ioctl(net, cmd, argp);
1025
1026 return err;
1027}
1028
1da177e4
LT
1029/*
1030 * With an ioctl, arg may well be a user mode pointer, but we don't know
1031 * what to do with it - that's up to the protocol still.
1032 */
1033
1034static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1035{
1036 struct socket *sock;
881d966b 1037 struct sock *sk;
1da177e4
LT
1038 void __user *argp = (void __user *)arg;
1039 int pid, err;
881d966b 1040 struct net *net;
1da177e4 1041
b69aee04 1042 sock = file->private_data;
881d966b 1043 sk = sock->sk;
3b1e0a65 1044 net = sock_net(sk);
1da177e4 1045 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 1046 err = dev_ioctl(net, cmd, argp);
1da177e4 1047 } else
3d23e349 1048#ifdef CONFIG_WEXT_CORE
1da177e4 1049 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 1050 err = dev_ioctl(net, cmd, argp);
1da177e4 1051 } else
3d23e349 1052#endif
89bddce5 1053 switch (cmd) {
1da177e4
LT
1054 case FIOSETOWN:
1055 case SIOCSPGRP:
1056 err = -EFAULT;
1057 if (get_user(pid, (int __user *)argp))
1058 break;
1059 err = f_setown(sock->file, pid, 1);
1060 break;
1061 case FIOGETOWN:
1062 case SIOCGPGRP:
609d7fa9 1063 err = put_user(f_getown(sock->file),
89bddce5 1064 (int __user *)argp);
1da177e4
LT
1065 break;
1066 case SIOCGIFBR:
1067 case SIOCSIFBR:
1068 case SIOCBRADDBR:
1069 case SIOCBRDELBR:
1070 err = -ENOPKG;
1071 if (!br_ioctl_hook)
1072 request_module("bridge");
1073
4a3e2f71 1074 mutex_lock(&br_ioctl_mutex);
89bddce5 1075 if (br_ioctl_hook)
881d966b 1076 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1077 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1078 break;
1079 case SIOCGIFVLAN:
1080 case SIOCSIFVLAN:
1081 err = -ENOPKG;
1082 if (!vlan_ioctl_hook)
1083 request_module("8021q");
1084
4a3e2f71 1085 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1086 if (vlan_ioctl_hook)
881d966b 1087 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1088 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1089 break;
1da177e4
LT
1090 case SIOCADDDLCI:
1091 case SIOCDELDLCI:
1092 err = -ENOPKG;
1093 if (!dlci_ioctl_hook)
1094 request_module("dlci");
1095
7512cbf6
PE
1096 mutex_lock(&dlci_ioctl_mutex);
1097 if (dlci_ioctl_hook)
1da177e4 1098 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1099 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1100 break;
1101 default:
6b96018b 1102 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1103 break;
89bddce5 1104 }
1da177e4
LT
1105 return err;
1106}
1107
1108int sock_create_lite(int family, int type, int protocol, struct socket **res)
1109{
1110 int err;
1111 struct socket *sock = NULL;
89bddce5 1112
1da177e4
LT
1113 err = security_socket_create(family, type, protocol, 1);
1114 if (err)
1115 goto out;
1116
1117 sock = sock_alloc();
1118 if (!sock) {
1119 err = -ENOMEM;
1120 goto out;
1121 }
1122
1da177e4 1123 sock->type = type;
7420ed23
VY
1124 err = security_socket_post_create(sock, family, type, protocol, 1);
1125 if (err)
1126 goto out_release;
1127
1da177e4
LT
1128out:
1129 *res = sock;
1130 return err;
7420ed23
VY
1131out_release:
1132 sock_release(sock);
1133 sock = NULL;
1134 goto out;
1da177e4 1135}
c6d409cf 1136EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1137
1138/* No kernel lock held - perfect */
89bddce5 1139static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4 1140{
cbf55001 1141 unsigned int busy_flag = 0;
1da177e4
LT
1142 struct socket *sock;
1143
1144 /*
89bddce5 1145 * We can't return errors to poll, so it's either yes or no.
1da177e4 1146 */
b69aee04 1147 sock = file->private_data;
2d48d67f 1148
cbf55001 1149 if (sk_can_busy_loop(sock->sk)) {
2d48d67f 1150 /* this socket can poll_ll so tell the system call */
cbf55001 1151 busy_flag = POLL_BUSY_LOOP;
2d48d67f
ET
1152
1153 /* once, only if requested by syscall */
cbf55001
ET
1154 if (wait && (wait->_key & POLL_BUSY_LOOP))
1155 sk_busy_loop(sock->sk, 1);
2d48d67f
ET
1156 }
1157
cbf55001 1158 return busy_flag | sock->ops->poll(file, sock, wait);
1da177e4
LT
1159}
1160
89bddce5 1161static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1162{
b69aee04 1163 struct socket *sock = file->private_data;
1da177e4
LT
1164
1165 return sock->ops->mmap(file, sock, vma);
1166}
1167
20380731 1168static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1169{
1da177e4
LT
1170 sock_release(SOCKET_I(inode));
1171 return 0;
1172}
1173
1174/*
1175 * Update the socket async list
1176 *
1177 * Fasync_list locking strategy.
1178 *
1179 * 1. fasync_list is modified only under process context socket lock
1180 * i.e. under semaphore.
1181 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1182 * or under socket lock
1da177e4
LT
1183 */
1184
1185static int sock_fasync(int fd, struct file *filp, int on)
1186{
989a2979
ED
1187 struct socket *sock = filp->private_data;
1188 struct sock *sk = sock->sk;
eaefd110 1189 struct socket_wq *wq;
1da177e4 1190
989a2979 1191 if (sk == NULL)
1da177e4 1192 return -EINVAL;
1da177e4
LT
1193
1194 lock_sock(sk);
eaefd110
ED
1195 wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
1196 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1197
eaefd110 1198 if (!wq->fasync_list)
989a2979
ED
1199 sock_reset_flag(sk, SOCK_FASYNC);
1200 else
bcdce719 1201 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1202
989a2979 1203 release_sock(sk);
1da177e4
LT
1204 return 0;
1205}
1206
43815482 1207/* This function may be called only under socket lock or callback_lock or rcu_lock */
1da177e4
LT
1208
1209int sock_wake_async(struct socket *sock, int how, int band)
1210{
43815482
ED
1211 struct socket_wq *wq;
1212
1213 if (!sock)
1214 return -1;
1215 rcu_read_lock();
1216 wq = rcu_dereference(sock->wq);
1217 if (!wq || !wq->fasync_list) {
1218 rcu_read_unlock();
1da177e4 1219 return -1;
43815482 1220 }
89bddce5 1221 switch (how) {
8d8ad9d7 1222 case SOCK_WAKE_WAITD:
1da177e4
LT
1223 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1224 break;
1225 goto call_kill;
8d8ad9d7 1226 case SOCK_WAKE_SPACE:
1da177e4
LT
1227 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1228 break;
1229 /* fall through */
8d8ad9d7 1230 case SOCK_WAKE_IO:
89bddce5 1231call_kill:
43815482 1232 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1233 break;
8d8ad9d7 1234 case SOCK_WAKE_URG:
43815482 1235 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1236 }
43815482 1237 rcu_read_unlock();
1da177e4
LT
1238 return 0;
1239}
c6d409cf 1240EXPORT_SYMBOL(sock_wake_async);
1da177e4 1241
721db93a 1242int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1243 struct socket **res, int kern)
1da177e4
LT
1244{
1245 int err;
1246 struct socket *sock;
55737fda 1247 const struct net_proto_family *pf;
1da177e4
LT
1248
1249 /*
89bddce5 1250 * Check protocol is in range
1da177e4
LT
1251 */
1252 if (family < 0 || family >= NPROTO)
1253 return -EAFNOSUPPORT;
1254 if (type < 0 || type >= SOCK_MAX)
1255 return -EINVAL;
1256
1257 /* Compatibility.
1258
1259 This uglymoron is moved from INET layer to here to avoid
1260 deadlock in module load.
1261 */
1262 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1263 static int warned;
1da177e4
LT
1264 if (!warned) {
1265 warned = 1;
3410f22e
YY
1266 pr_info("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1267 current->comm);
1da177e4
LT
1268 }
1269 family = PF_PACKET;
1270 }
1271
1272 err = security_socket_create(family, type, protocol, kern);
1273 if (err)
1274 return err;
89bddce5 1275
55737fda
SH
1276 /*
1277 * Allocate the socket and allow the family to set things up. if
1278 * the protocol is 0, the family is instructed to select an appropriate
1279 * default.
1280 */
1281 sock = sock_alloc();
1282 if (!sock) {
e87cc472 1283 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1284 return -ENFILE; /* Not exactly a match, but its the
1285 closest posix thing */
1286 }
1287
1288 sock->type = type;
1289
95a5afca 1290#ifdef CONFIG_MODULES
89bddce5
SH
1291 /* Attempt to load a protocol module if the find failed.
1292 *
1293 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1294 * requested real, full-featured networking support upon configuration.
1295 * Otherwise module support will break!
1296 */
190683a9 1297 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1298 request_module("net-pf-%d", family);
1da177e4
LT
1299#endif
1300
55737fda
SH
1301 rcu_read_lock();
1302 pf = rcu_dereference(net_families[family]);
1303 err = -EAFNOSUPPORT;
1304 if (!pf)
1305 goto out_release;
1da177e4
LT
1306
1307 /*
1308 * We will call the ->create function, that possibly is in a loadable
1309 * module, so we have to bump that loadable module refcnt first.
1310 */
55737fda 1311 if (!try_module_get(pf->owner))
1da177e4
LT
1312 goto out_release;
1313
55737fda
SH
1314 /* Now protected by module ref count */
1315 rcu_read_unlock();
1316
3f378b68 1317 err = pf->create(net, sock, protocol, kern);
55737fda 1318 if (err < 0)
1da177e4 1319 goto out_module_put;
a79af59e 1320
1da177e4
LT
1321 /*
1322 * Now to bump the refcnt of the [loadable] module that owns this
1323 * socket at sock_release time we decrement its refcnt.
1324 */
55737fda
SH
1325 if (!try_module_get(sock->ops->owner))
1326 goto out_module_busy;
1327
1da177e4
LT
1328 /*
1329 * Now that we're done with the ->create function, the [loadable]
1330 * module can have its refcnt decremented
1331 */
55737fda 1332 module_put(pf->owner);
7420ed23
VY
1333 err = security_socket_post_create(sock, family, type, protocol, kern);
1334 if (err)
3b185525 1335 goto out_sock_release;
55737fda 1336 *res = sock;
1da177e4 1337
55737fda
SH
1338 return 0;
1339
1340out_module_busy:
1341 err = -EAFNOSUPPORT;
1da177e4 1342out_module_put:
55737fda
SH
1343 sock->ops = NULL;
1344 module_put(pf->owner);
1345out_sock_release:
1da177e4 1346 sock_release(sock);
55737fda
SH
1347 return err;
1348
1349out_release:
1350 rcu_read_unlock();
1351 goto out_sock_release;
1da177e4 1352}
721db93a 1353EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1354
1355int sock_create(int family, int type, int protocol, struct socket **res)
1356{
1b8d7ae4 1357 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1358}
c6d409cf 1359EXPORT_SYMBOL(sock_create);
1da177e4
LT
1360
1361int sock_create_kern(int family, int type, int protocol, struct socket **res)
1362{
1b8d7ae4 1363 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4 1364}
c6d409cf 1365EXPORT_SYMBOL(sock_create_kern);
1da177e4 1366
3e0fa65f 1367SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1368{
1369 int retval;
1370 struct socket *sock;
a677a039
UD
1371 int flags;
1372
e38b36f3
UD
1373 /* Check the SOCK_* constants for consistency. */
1374 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1375 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1376 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1377 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1378
a677a039 1379 flags = type & ~SOCK_TYPE_MASK;
77d27200 1380 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1381 return -EINVAL;
1382 type &= SOCK_TYPE_MASK;
1da177e4 1383
aaca0bdc
UD
1384 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1385 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1386
1da177e4
LT
1387 retval = sock_create(family, type, protocol, &sock);
1388 if (retval < 0)
1389 goto out;
1390
77d27200 1391 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1392 if (retval < 0)
1393 goto out_release;
1394
1395out:
1396 /* It may be already another descriptor 8) Not kernel problem. */
1397 return retval;
1398
1399out_release:
1400 sock_release(sock);
1401 return retval;
1402}
1403
1404/*
1405 * Create a pair of connected sockets.
1406 */
1407
3e0fa65f
HC
1408SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1409 int __user *, usockvec)
1da177e4
LT
1410{
1411 struct socket *sock1, *sock2;
1412 int fd1, fd2, err;
db349509 1413 struct file *newfile1, *newfile2;
a677a039
UD
1414 int flags;
1415
1416 flags = type & ~SOCK_TYPE_MASK;
77d27200 1417 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1418 return -EINVAL;
1419 type &= SOCK_TYPE_MASK;
1da177e4 1420
aaca0bdc
UD
1421 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1422 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1423
1da177e4
LT
1424 /*
1425 * Obtain the first socket and check if the underlying protocol
1426 * supports the socketpair call.
1427 */
1428
1429 err = sock_create(family, type, protocol, &sock1);
1430 if (err < 0)
1431 goto out;
1432
1433 err = sock_create(family, type, protocol, &sock2);
1434 if (err < 0)
1435 goto out_release_1;
1436
1437 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1438 if (err < 0)
1da177e4
LT
1439 goto out_release_both;
1440
28407630 1441 fd1 = get_unused_fd_flags(flags);
bf3c23d1
DM
1442 if (unlikely(fd1 < 0)) {
1443 err = fd1;
db349509 1444 goto out_release_both;
bf3c23d1 1445 }
d73aa286 1446
28407630 1447 fd2 = get_unused_fd_flags(flags);
198de4d7
AV
1448 if (unlikely(fd2 < 0)) {
1449 err = fd2;
d73aa286 1450 goto out_put_unused_1;
28407630
AV
1451 }
1452
aab174f0 1453 newfile1 = sock_alloc_file(sock1, flags, NULL);
28407630
AV
1454 if (unlikely(IS_ERR(newfile1))) {
1455 err = PTR_ERR(newfile1);
d73aa286 1456 goto out_put_unused_both;
28407630
AV
1457 }
1458
aab174f0 1459 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1460 if (IS_ERR(newfile2)) {
1461 err = PTR_ERR(newfile2);
d73aa286 1462 goto out_fput_1;
db349509
AV
1463 }
1464
d73aa286
YD
1465 err = put_user(fd1, &usockvec[0]);
1466 if (err)
1467 goto out_fput_both;
1468
1469 err = put_user(fd2, &usockvec[1]);
1470 if (err)
1471 goto out_fput_both;
1472
157cf649 1473 audit_fd_pair(fd1, fd2);
d73aa286 1474
db349509
AV
1475 fd_install(fd1, newfile1);
1476 fd_install(fd2, newfile2);
1da177e4
LT
1477 /* fd1 and fd2 may be already another descriptors.
1478 * Not kernel problem.
1479 */
1480
d73aa286 1481 return 0;
1da177e4 1482
d73aa286
YD
1483out_fput_both:
1484 fput(newfile2);
1485 fput(newfile1);
1486 put_unused_fd(fd2);
1487 put_unused_fd(fd1);
1488 goto out;
1489
1490out_fput_1:
1491 fput(newfile1);
1492 put_unused_fd(fd2);
1493 put_unused_fd(fd1);
1494 sock_release(sock2);
1495 goto out;
1da177e4 1496
d73aa286
YD
1497out_put_unused_both:
1498 put_unused_fd(fd2);
1499out_put_unused_1:
1500 put_unused_fd(fd1);
1da177e4 1501out_release_both:
89bddce5 1502 sock_release(sock2);
1da177e4 1503out_release_1:
89bddce5 1504 sock_release(sock1);
1da177e4
LT
1505out:
1506 return err;
1507}
1508
1da177e4
LT
1509/*
1510 * Bind a name to a socket. Nothing much to do here since it's
1511 * the protocol's responsibility to handle the local address.
1512 *
1513 * We move the socket address to kernel space before we call
1514 * the protocol layer (having also checked the address is ok).
1515 */
1516
20f37034 1517SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1518{
1519 struct socket *sock;
230b1839 1520 struct sockaddr_storage address;
6cb153ca 1521 int err, fput_needed;
1da177e4 1522
89bddce5 1523 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1524 if (sock) {
43db362d 1525 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1526 if (err >= 0) {
1527 err = security_socket_bind(sock,
230b1839 1528 (struct sockaddr *)&address,
89bddce5 1529 addrlen);
6cb153ca
BL
1530 if (!err)
1531 err = sock->ops->bind(sock,
89bddce5 1532 (struct sockaddr *)
230b1839 1533 &address, addrlen);
1da177e4 1534 }
6cb153ca 1535 fput_light(sock->file, fput_needed);
89bddce5 1536 }
1da177e4
LT
1537 return err;
1538}
1539
1da177e4
LT
1540/*
1541 * Perform a listen. Basically, we allow the protocol to do anything
1542 * necessary for a listen, and if that works, we mark the socket as
1543 * ready for listening.
1544 */
1545
3e0fa65f 1546SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1547{
1548 struct socket *sock;
6cb153ca 1549 int err, fput_needed;
b8e1f9b5 1550 int somaxconn;
89bddce5
SH
1551
1552 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1553 if (sock) {
8efa6e93 1554 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1555 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1556 backlog = somaxconn;
1da177e4
LT
1557
1558 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1559 if (!err)
1560 err = sock->ops->listen(sock, backlog);
1da177e4 1561
6cb153ca 1562 fput_light(sock->file, fput_needed);
1da177e4
LT
1563 }
1564 return err;
1565}
1566
1da177e4
LT
1567/*
1568 * For accept, we attempt to create a new socket, set up the link
1569 * with the client, wake up the client, then return the new
1570 * connected fd. We collect the address of the connector in kernel
1571 * space and move it to user at the very end. This is unclean because
1572 * we open the socket then return an error.
1573 *
1574 * 1003.1g adds the ability to recvmsg() to query connection pending
1575 * status to recvmsg. We need to add that support in a way thats
1576 * clean when we restucture accept also.
1577 */
1578
20f37034
HC
1579SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1580 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1581{
1582 struct socket *sock, *newsock;
39d8c1b6 1583 struct file *newfile;
6cb153ca 1584 int err, len, newfd, fput_needed;
230b1839 1585 struct sockaddr_storage address;
1da177e4 1586
77d27200 1587 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1588 return -EINVAL;
1589
1590 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1591 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1592
6cb153ca 1593 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1594 if (!sock)
1595 goto out;
1596
1597 err = -ENFILE;
c6d409cf
ED
1598 newsock = sock_alloc();
1599 if (!newsock)
1da177e4
LT
1600 goto out_put;
1601
1602 newsock->type = sock->type;
1603 newsock->ops = sock->ops;
1604
1da177e4
LT
1605 /*
1606 * We don't need try_module_get here, as the listening socket (sock)
1607 * has the protocol module (sock->ops->owner) held.
1608 */
1609 __module_get(newsock->ops->owner);
1610
28407630 1611 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1612 if (unlikely(newfd < 0)) {
1613 err = newfd;
9a1875e6
DM
1614 sock_release(newsock);
1615 goto out_put;
39d8c1b6 1616 }
aab174f0 1617 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
28407630
AV
1618 if (unlikely(IS_ERR(newfile))) {
1619 err = PTR_ERR(newfile);
1620 put_unused_fd(newfd);
1621 sock_release(newsock);
1622 goto out_put;
1623 }
39d8c1b6 1624
a79af59e
FF
1625 err = security_socket_accept(sock, newsock);
1626 if (err)
39d8c1b6 1627 goto out_fd;
a79af59e 1628
1da177e4
LT
1629 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1630 if (err < 0)
39d8c1b6 1631 goto out_fd;
1da177e4
LT
1632
1633 if (upeer_sockaddr) {
230b1839 1634 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1635 &len, 2) < 0) {
1da177e4 1636 err = -ECONNABORTED;
39d8c1b6 1637 goto out_fd;
1da177e4 1638 }
43db362d 1639 err = move_addr_to_user(&address,
230b1839 1640 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1641 if (err < 0)
39d8c1b6 1642 goto out_fd;
1da177e4
LT
1643 }
1644
1645 /* File flags are not inherited via accept() unlike another OSes. */
1646
39d8c1b6
DM
1647 fd_install(newfd, newfile);
1648 err = newfd;
1da177e4 1649
1da177e4 1650out_put:
6cb153ca 1651 fput_light(sock->file, fput_needed);
1da177e4
LT
1652out:
1653 return err;
39d8c1b6 1654out_fd:
9606a216 1655 fput(newfile);
39d8c1b6 1656 put_unused_fd(newfd);
1da177e4
LT
1657 goto out_put;
1658}
1659
20f37034
HC
1660SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1661 int __user *, upeer_addrlen)
aaca0bdc 1662{
de11defe 1663 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1664}
1665
1da177e4
LT
1666/*
1667 * Attempt to connect to a socket with the server address. The address
1668 * is in user space so we verify it is OK and move it to kernel space.
1669 *
1670 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1671 * break bindings
1672 *
1673 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1674 * other SEQPACKET protocols that take time to connect() as it doesn't
1675 * include the -EINPROGRESS status for such sockets.
1676 */
1677
20f37034
HC
1678SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1679 int, addrlen)
1da177e4
LT
1680{
1681 struct socket *sock;
230b1839 1682 struct sockaddr_storage address;
6cb153ca 1683 int err, fput_needed;
1da177e4 1684
6cb153ca 1685 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1686 if (!sock)
1687 goto out;
43db362d 1688 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1689 if (err < 0)
1690 goto out_put;
1691
89bddce5 1692 err =
230b1839 1693 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1694 if (err)
1695 goto out_put;
1696
230b1839 1697 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1698 sock->file->f_flags);
1699out_put:
6cb153ca 1700 fput_light(sock->file, fput_needed);
1da177e4
LT
1701out:
1702 return err;
1703}
1704
1705/*
1706 * Get the local address ('name') of a socket object. Move the obtained
1707 * name to user space.
1708 */
1709
20f37034
HC
1710SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1711 int __user *, usockaddr_len)
1da177e4
LT
1712{
1713 struct socket *sock;
230b1839 1714 struct sockaddr_storage address;
6cb153ca 1715 int len, err, fput_needed;
89bddce5 1716
6cb153ca 1717 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1718 if (!sock)
1719 goto out;
1720
1721 err = security_socket_getsockname(sock);
1722 if (err)
1723 goto out_put;
1724
230b1839 1725 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1726 if (err)
1727 goto out_put;
43db362d 1728 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1729
1730out_put:
6cb153ca 1731 fput_light(sock->file, fput_needed);
1da177e4
LT
1732out:
1733 return err;
1734}
1735
1736/*
1737 * Get the remote address ('name') of a socket object. Move the obtained
1738 * name to user space.
1739 */
1740
20f37034
HC
1741SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1742 int __user *, usockaddr_len)
1da177e4
LT
1743{
1744 struct socket *sock;
230b1839 1745 struct sockaddr_storage address;
6cb153ca 1746 int len, err, fput_needed;
1da177e4 1747
89bddce5
SH
1748 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1749 if (sock != NULL) {
1da177e4
LT
1750 err = security_socket_getpeername(sock);
1751 if (err) {
6cb153ca 1752 fput_light(sock->file, fput_needed);
1da177e4
LT
1753 return err;
1754 }
1755
89bddce5 1756 err =
230b1839 1757 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1758 1);
1da177e4 1759 if (!err)
43db362d 1760 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1761 usockaddr_len);
6cb153ca 1762 fput_light(sock->file, fput_needed);
1da177e4
LT
1763 }
1764 return err;
1765}
1766
1767/*
1768 * Send a datagram to a given address. We move the address into kernel
1769 * space and check the user space data area is readable before invoking
1770 * the protocol.
1771 */
1772
3e0fa65f 1773SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1774 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1775 int, addr_len)
1da177e4
LT
1776{
1777 struct socket *sock;
230b1839 1778 struct sockaddr_storage address;
1da177e4
LT
1779 int err;
1780 struct msghdr msg;
1781 struct iovec iov;
6cb153ca 1782 int fput_needed;
6cb153ca 1783
253eacc0
LT
1784 if (len > INT_MAX)
1785 len = INT_MAX;
de0fa95c
PE
1786 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1787 if (!sock)
4387ff75 1788 goto out;
6cb153ca 1789
89bddce5
SH
1790 iov.iov_base = buff;
1791 iov.iov_len = len;
1792 msg.msg_name = NULL;
1793 msg.msg_iov = &iov;
1794 msg.msg_iovlen = 1;
1795 msg.msg_control = NULL;
1796 msg.msg_controllen = 0;
1797 msg.msg_namelen = 0;
6cb153ca 1798 if (addr) {
43db362d 1799 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1800 if (err < 0)
1801 goto out_put;
230b1839 1802 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1803 msg.msg_namelen = addr_len;
1da177e4
LT
1804 }
1805 if (sock->file->f_flags & O_NONBLOCK)
1806 flags |= MSG_DONTWAIT;
1807 msg.msg_flags = flags;
1808 err = sock_sendmsg(sock, &msg, len);
1809
89bddce5 1810out_put:
de0fa95c 1811 fput_light(sock->file, fput_needed);
4387ff75 1812out:
1da177e4
LT
1813 return err;
1814}
1815
1816/*
89bddce5 1817 * Send a datagram down a socket.
1da177e4
LT
1818 */
1819
3e0fa65f 1820SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1821 unsigned int, flags)
1da177e4
LT
1822{
1823 return sys_sendto(fd, buff, len, flags, NULL, 0);
1824}
1825
1826/*
89bddce5 1827 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1828 * sender. We verify the buffers are writable and if needed move the
1829 * sender address from kernel to user space.
1830 */
1831
3e0fa65f 1832SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1833 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1834 int __user *, addr_len)
1da177e4
LT
1835{
1836 struct socket *sock;
1837 struct iovec iov;
1838 struct msghdr msg;
230b1839 1839 struct sockaddr_storage address;
89bddce5 1840 int err, err2;
6cb153ca
BL
1841 int fput_needed;
1842
253eacc0
LT
1843 if (size > INT_MAX)
1844 size = INT_MAX;
de0fa95c 1845 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1846 if (!sock)
de0fa95c 1847 goto out;
1da177e4 1848
89bddce5
SH
1849 msg.msg_control = NULL;
1850 msg.msg_controllen = 0;
1851 msg.msg_iovlen = 1;
1852 msg.msg_iov = &iov;
1853 iov.iov_len = size;
1854 iov.iov_base = ubuf;
f3d33426
HFS
1855 /* Save some cycles and don't copy the address if not needed */
1856 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1857 /* We assume all kernel code knows the size of sockaddr_storage */
1858 msg.msg_namelen = 0;
1da177e4
LT
1859 if (sock->file->f_flags & O_NONBLOCK)
1860 flags |= MSG_DONTWAIT;
89bddce5 1861 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1862
89bddce5 1863 if (err >= 0 && addr != NULL) {
43db362d 1864 err2 = move_addr_to_user(&address,
230b1839 1865 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1866 if (err2 < 0)
1867 err = err2;
1da177e4 1868 }
de0fa95c
PE
1869
1870 fput_light(sock->file, fput_needed);
4387ff75 1871out:
1da177e4
LT
1872 return err;
1873}
1874
1875/*
89bddce5 1876 * Receive a datagram from a socket.
1da177e4
LT
1877 */
1878
b7c0ddf5
JG
1879SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1880 unsigned int, flags)
1da177e4
LT
1881{
1882 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1883}
1884
1885/*
1886 * Set a socket option. Because we don't know the option lengths we have
1887 * to pass the user mode parameter for the protocols to sort out.
1888 */
1889
20f37034
HC
1890SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1891 char __user *, optval, int, optlen)
1da177e4 1892{
6cb153ca 1893 int err, fput_needed;
1da177e4
LT
1894 struct socket *sock;
1895
1896 if (optlen < 0)
1897 return -EINVAL;
89bddce5
SH
1898
1899 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1900 if (sock != NULL) {
1901 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1902 if (err)
1903 goto out_put;
1da177e4
LT
1904
1905 if (level == SOL_SOCKET)
89bddce5
SH
1906 err =
1907 sock_setsockopt(sock, level, optname, optval,
1908 optlen);
1da177e4 1909 else
89bddce5
SH
1910 err =
1911 sock->ops->setsockopt(sock, level, optname, optval,
1912 optlen);
6cb153ca
BL
1913out_put:
1914 fput_light(sock->file, fput_needed);
1da177e4
LT
1915 }
1916 return err;
1917}
1918
1919/*
1920 * Get a socket option. Because we don't know the option lengths we have
1921 * to pass a user mode parameter for the protocols to sort out.
1922 */
1923
20f37034
HC
1924SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1925 char __user *, optval, int __user *, optlen)
1da177e4 1926{
6cb153ca 1927 int err, fput_needed;
1da177e4
LT
1928 struct socket *sock;
1929
89bddce5
SH
1930 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1931 if (sock != NULL) {
6cb153ca
BL
1932 err = security_socket_getsockopt(sock, level, optname);
1933 if (err)
1934 goto out_put;
1da177e4
LT
1935
1936 if (level == SOL_SOCKET)
89bddce5
SH
1937 err =
1938 sock_getsockopt(sock, level, optname, optval,
1939 optlen);
1da177e4 1940 else
89bddce5
SH
1941 err =
1942 sock->ops->getsockopt(sock, level, optname, optval,
1943 optlen);
6cb153ca
BL
1944out_put:
1945 fput_light(sock->file, fput_needed);
1da177e4
LT
1946 }
1947 return err;
1948}
1949
1da177e4
LT
1950/*
1951 * Shutdown a socket.
1952 */
1953
754fe8d2 1954SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1955{
6cb153ca 1956 int err, fput_needed;
1da177e4
LT
1957 struct socket *sock;
1958
89bddce5
SH
1959 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1960 if (sock != NULL) {
1da177e4 1961 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1962 if (!err)
1963 err = sock->ops->shutdown(sock, how);
1964 fput_light(sock->file, fput_needed);
1da177e4
LT
1965 }
1966 return err;
1967}
1968
89bddce5 1969/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1970 * fields which are the same type (int / unsigned) on our platforms.
1971 */
1972#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1973#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1974#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1975
c71d8ebe
TH
1976struct used_address {
1977 struct sockaddr_storage name;
1978 unsigned int name_len;
1979};
1980
1661bf36
DC
1981static int copy_msghdr_from_user(struct msghdr *kmsg,
1982 struct msghdr __user *umsg)
1983{
1984 if (copy_from_user(kmsg, umsg, sizeof(struct msghdr)))
1985 return -EFAULT;
dbb490b9
ML
1986
1987 if (kmsg->msg_namelen < 0)
1988 return -EINVAL;
1989
1661bf36 1990 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 1991 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
1661bf36
DC
1992 return 0;
1993}
1994
a7526eb5 1995static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
95c96174 1996 struct msghdr *msg_sys, unsigned int flags,
c71d8ebe 1997 struct used_address *used_address)
1da177e4 1998{
89bddce5
SH
1999 struct compat_msghdr __user *msg_compat =
2000 (struct compat_msghdr __user *)msg;
230b1839 2001 struct sockaddr_storage address;
1da177e4 2002 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 2003 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
2004 __attribute__ ((aligned(sizeof(__kernel_size_t))));
2005 /* 20 is size of ipv6_pktinfo */
1da177e4 2006 unsigned char *ctl_buf = ctl;
a74e9106 2007 int err, ctl_len, total_len;
89bddce5 2008
1da177e4
LT
2009 err = -EFAULT;
2010 if (MSG_CMSG_COMPAT & flags) {
228e548e 2011 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 2012 return -EFAULT;
1661bf36
DC
2013 } else {
2014 err = copy_msghdr_from_user(msg_sys, msg);
2015 if (err)
2016 return err;
2017 }
1da177e4 2018
228e548e 2019 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
a74e9106
ED
2020 err = -EMSGSIZE;
2021 if (msg_sys->msg_iovlen > UIO_MAXIOV)
2022 goto out;
2023 err = -ENOMEM;
2024 iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec),
2025 GFP_KERNEL);
1da177e4 2026 if (!iov)
228e548e 2027 goto out;
1da177e4
LT
2028 }
2029
2030 /* This will also move the address data into kernel space */
2031 if (MSG_CMSG_COMPAT & flags) {
43db362d 2032 err = verify_compat_iovec(msg_sys, iov, &address, VERIFY_READ);
1da177e4 2033 } else
43db362d 2034 err = verify_iovec(msg_sys, iov, &address, VERIFY_READ);
89bddce5 2035 if (err < 0)
1da177e4
LT
2036 goto out_freeiov;
2037 total_len = err;
2038
2039 err = -ENOBUFS;
2040
228e548e 2041 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 2042 goto out_freeiov;
228e548e 2043 ctl_len = msg_sys->msg_controllen;
1da177e4 2044 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2045 err =
228e548e 2046 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2047 sizeof(ctl));
1da177e4
LT
2048 if (err)
2049 goto out_freeiov;
228e548e
AB
2050 ctl_buf = msg_sys->msg_control;
2051 ctl_len = msg_sys->msg_controllen;
1da177e4 2052 } else if (ctl_len) {
89bddce5 2053 if (ctl_len > sizeof(ctl)) {
1da177e4 2054 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2055 if (ctl_buf == NULL)
1da177e4
LT
2056 goto out_freeiov;
2057 }
2058 err = -EFAULT;
2059 /*
228e548e 2060 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2061 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2062 * checking falls down on this.
2063 */
fb8621bb 2064 if (copy_from_user(ctl_buf,
228e548e 2065 (void __user __force *)msg_sys->msg_control,
89bddce5 2066 ctl_len))
1da177e4 2067 goto out_freectl;
228e548e 2068 msg_sys->msg_control = ctl_buf;
1da177e4 2069 }
228e548e 2070 msg_sys->msg_flags = flags;
1da177e4
LT
2071
2072 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2073 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2074 /*
2075 * If this is sendmmsg() and current destination address is same as
2076 * previously succeeded address, omit asking LSM's decision.
2077 * used_address->name_len is initialized to UINT_MAX so that the first
2078 * destination address never matches.
2079 */
bc909d9d
MD
2080 if (used_address && msg_sys->msg_name &&
2081 used_address->name_len == msg_sys->msg_namelen &&
2082 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe
TH
2083 used_address->name_len)) {
2084 err = sock_sendmsg_nosec(sock, msg_sys, total_len);
2085 goto out_freectl;
2086 }
2087 err = sock_sendmsg(sock, msg_sys, total_len);
2088 /*
2089 * If this is sendmmsg() and sending to current destination address was
2090 * successful, remember it.
2091 */
2092 if (used_address && err >= 0) {
2093 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2094 if (msg_sys->msg_name)
2095 memcpy(&used_address->name, msg_sys->msg_name,
2096 used_address->name_len);
c71d8ebe 2097 }
1da177e4
LT
2098
2099out_freectl:
89bddce5 2100 if (ctl_buf != ctl)
1da177e4
LT
2101 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2102out_freeiov:
2103 if (iov != iovstack)
a74e9106 2104 kfree(iov);
228e548e
AB
2105out:
2106 return err;
2107}
2108
2109/*
2110 * BSD sendmsg interface
2111 */
2112
a7526eb5 2113long __sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
228e548e
AB
2114{
2115 int fput_needed, err;
2116 struct msghdr msg_sys;
1be374a0
AL
2117 struct socket *sock;
2118
1be374a0 2119 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2120 if (!sock)
2121 goto out;
2122
a7526eb5 2123 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
228e548e 2124
6cb153ca 2125 fput_light(sock->file, fput_needed);
89bddce5 2126out:
1da177e4
LT
2127 return err;
2128}
2129
a7526eb5
AL
2130SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned int, flags)
2131{
2132 if (flags & MSG_CMSG_COMPAT)
2133 return -EINVAL;
2134 return __sys_sendmsg(fd, msg, flags);
2135}
2136
228e548e
AB
2137/*
2138 * Linux sendmmsg interface
2139 */
2140
2141int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2142 unsigned int flags)
2143{
2144 int fput_needed, err, datagrams;
2145 struct socket *sock;
2146 struct mmsghdr __user *entry;
2147 struct compat_mmsghdr __user *compat_entry;
2148 struct msghdr msg_sys;
c71d8ebe 2149 struct used_address used_address;
228e548e 2150
98382f41
AB
2151 if (vlen > UIO_MAXIOV)
2152 vlen = UIO_MAXIOV;
228e548e
AB
2153
2154 datagrams = 0;
2155
2156 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2157 if (!sock)
2158 return err;
2159
c71d8ebe 2160 used_address.name_len = UINT_MAX;
228e548e
AB
2161 entry = mmsg;
2162 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2163 err = 0;
228e548e
AB
2164
2165 while (datagrams < vlen) {
228e548e 2166 if (MSG_CMSG_COMPAT & flags) {
a7526eb5
AL
2167 err = ___sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
2168 &msg_sys, flags, &used_address);
228e548e
AB
2169 if (err < 0)
2170 break;
2171 err = __put_user(err, &compat_entry->msg_len);
2172 ++compat_entry;
2173 } else {
a7526eb5
AL
2174 err = ___sys_sendmsg(sock,
2175 (struct msghdr __user *)entry,
2176 &msg_sys, flags, &used_address);
228e548e
AB
2177 if (err < 0)
2178 break;
2179 err = put_user(err, &entry->msg_len);
2180 ++entry;
2181 }
2182
2183 if (err)
2184 break;
2185 ++datagrams;
2186 }
2187
228e548e
AB
2188 fput_light(sock->file, fput_needed);
2189
728ffb86
AB
2190 /* We only return an error if no datagrams were able to be sent */
2191 if (datagrams != 0)
228e548e
AB
2192 return datagrams;
2193
228e548e
AB
2194 return err;
2195}
2196
2197SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2198 unsigned int, vlen, unsigned int, flags)
2199{
1be374a0
AL
2200 if (flags & MSG_CMSG_COMPAT)
2201 return -EINVAL;
228e548e
AB
2202 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2203}
2204
a7526eb5 2205static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
95c96174 2206 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2207{
89bddce5
SH
2208 struct compat_msghdr __user *msg_compat =
2209 (struct compat_msghdr __user *)msg;
1da177e4 2210 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2211 struct iovec *iov = iovstack;
1da177e4 2212 unsigned long cmsg_ptr;
a74e9106 2213 int err, total_len, len;
1da177e4
LT
2214
2215 /* kernel mode address */
230b1839 2216 struct sockaddr_storage addr;
1da177e4
LT
2217
2218 /* user mode address pointers */
2219 struct sockaddr __user *uaddr;
2220 int __user *uaddr_len;
89bddce5 2221
1da177e4 2222 if (MSG_CMSG_COMPAT & flags) {
a2e27255 2223 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 2224 return -EFAULT;
1661bf36
DC
2225 } else {
2226 err = copy_msghdr_from_user(msg_sys, msg);
2227 if (err)
2228 return err;
2229 }
1da177e4 2230
a2e27255 2231 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
a74e9106
ED
2232 err = -EMSGSIZE;
2233 if (msg_sys->msg_iovlen > UIO_MAXIOV)
2234 goto out;
2235 err = -ENOMEM;
2236 iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec),
2237 GFP_KERNEL);
1da177e4 2238 if (!iov)
a2e27255 2239 goto out;
1da177e4
LT
2240 }
2241
f3d33426
HFS
2242 /* Save the user-mode address (verify_iovec will change the
2243 * kernel msghdr to use the kernel address space)
1da177e4 2244 */
a2e27255 2245 uaddr = (__force void __user *)msg_sys->msg_name;
1da177e4 2246 uaddr_len = COMPAT_NAMELEN(msg);
f3d33426 2247 if (MSG_CMSG_COMPAT & flags)
43db362d 2248 err = verify_compat_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
f3d33426 2249 else
43db362d 2250 err = verify_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
1da177e4
LT
2251 if (err < 0)
2252 goto out_freeiov;
89bddce5 2253 total_len = err;
1da177e4 2254
a2e27255
ACM
2255 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2256 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2257
f3d33426
HFS
2258 /* We assume all kernel code knows the size of sockaddr_storage */
2259 msg_sys->msg_namelen = 0;
2260
1da177e4
LT
2261 if (sock->file->f_flags & O_NONBLOCK)
2262 flags |= MSG_DONTWAIT;
a2e27255
ACM
2263 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2264 total_len, flags);
1da177e4
LT
2265 if (err < 0)
2266 goto out_freeiov;
2267 len = err;
2268
2269 if (uaddr != NULL) {
43db362d 2270 err = move_addr_to_user(&addr,
a2e27255 2271 msg_sys->msg_namelen, uaddr,
89bddce5 2272 uaddr_len);
1da177e4
LT
2273 if (err < 0)
2274 goto out_freeiov;
2275 }
a2e27255 2276 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2277 COMPAT_FLAGS(msg));
1da177e4
LT
2278 if (err)
2279 goto out_freeiov;
2280 if (MSG_CMSG_COMPAT & flags)
a2e27255 2281 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2282 &msg_compat->msg_controllen);
2283 else
a2e27255 2284 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2285 &msg->msg_controllen);
2286 if (err)
2287 goto out_freeiov;
2288 err = len;
2289
2290out_freeiov:
2291 if (iov != iovstack)
a74e9106 2292 kfree(iov);
a2e27255
ACM
2293out:
2294 return err;
2295}
2296
2297/*
2298 * BSD recvmsg interface
2299 */
2300
a7526eb5 2301long __sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags)
a2e27255
ACM
2302{
2303 int fput_needed, err;
2304 struct msghdr msg_sys;
1be374a0
AL
2305 struct socket *sock;
2306
1be374a0 2307 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2308 if (!sock)
2309 goto out;
2310
a7526eb5 2311 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2312
6cb153ca 2313 fput_light(sock->file, fput_needed);
1da177e4
LT
2314out:
2315 return err;
2316}
2317
a7526eb5
AL
2318SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
2319 unsigned int, flags)
2320{
2321 if (flags & MSG_CMSG_COMPAT)
2322 return -EINVAL;
2323 return __sys_recvmsg(fd, msg, flags);
2324}
2325
a2e27255
ACM
2326/*
2327 * Linux recvmmsg interface
2328 */
2329
2330int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2331 unsigned int flags, struct timespec *timeout)
2332{
2333 int fput_needed, err, datagrams;
2334 struct socket *sock;
2335 struct mmsghdr __user *entry;
d7256d0e 2336 struct compat_mmsghdr __user *compat_entry;
a2e27255
ACM
2337 struct msghdr msg_sys;
2338 struct timespec end_time;
2339
2340 if (timeout &&
2341 poll_select_set_timeout(&end_time, timeout->tv_sec,
2342 timeout->tv_nsec))
2343 return -EINVAL;
2344
2345 datagrams = 0;
2346
2347 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2348 if (!sock)
2349 return err;
2350
2351 err = sock_error(sock->sk);
2352 if (err)
2353 goto out_put;
2354
2355 entry = mmsg;
d7256d0e 2356 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2357
2358 while (datagrams < vlen) {
2359 /*
2360 * No need to ask LSM for more than the first datagram.
2361 */
d7256d0e 2362 if (MSG_CMSG_COMPAT & flags) {
a7526eb5
AL
2363 err = ___sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
2364 &msg_sys, flags & ~MSG_WAITFORONE,
2365 datagrams);
d7256d0e
JMG
2366 if (err < 0)
2367 break;
2368 err = __put_user(err, &compat_entry->msg_len);
2369 ++compat_entry;
2370 } else {
a7526eb5
AL
2371 err = ___sys_recvmsg(sock,
2372 (struct msghdr __user *)entry,
2373 &msg_sys, flags & ~MSG_WAITFORONE,
2374 datagrams);
d7256d0e
JMG
2375 if (err < 0)
2376 break;
2377 err = put_user(err, &entry->msg_len);
2378 ++entry;
2379 }
2380
a2e27255
ACM
2381 if (err)
2382 break;
a2e27255
ACM
2383 ++datagrams;
2384
71c5c159
BB
2385 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2386 if (flags & MSG_WAITFORONE)
2387 flags |= MSG_DONTWAIT;
2388
a2e27255
ACM
2389 if (timeout) {
2390 ktime_get_ts(timeout);
2391 *timeout = timespec_sub(end_time, *timeout);
2392 if (timeout->tv_sec < 0) {
2393 timeout->tv_sec = timeout->tv_nsec = 0;
2394 break;
2395 }
2396
2397 /* Timeout, return less than vlen datagrams */
2398 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2399 break;
2400 }
2401
2402 /* Out of band data, return right away */
2403 if (msg_sys.msg_flags & MSG_OOB)
2404 break;
2405 }
2406
2407out_put:
2408 fput_light(sock->file, fput_needed);
1da177e4 2409
a2e27255
ACM
2410 if (err == 0)
2411 return datagrams;
2412
2413 if (datagrams != 0) {
2414 /*
2415 * We may return less entries than requested (vlen) if the
2416 * sock is non block and there aren't enough datagrams...
2417 */
2418 if (err != -EAGAIN) {
2419 /*
2420 * ... or if recvmsg returns an error after we
2421 * received some datagrams, where we record the
2422 * error to return on the next call or if the
2423 * app asks about it using getsockopt(SO_ERROR).
2424 */
2425 sock->sk->sk_err = -err;
2426 }
2427
2428 return datagrams;
2429 }
2430
2431 return err;
2432}
2433
2434SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2435 unsigned int, vlen, unsigned int, flags,
2436 struct timespec __user *, timeout)
2437{
2438 int datagrams;
2439 struct timespec timeout_sys;
2440
1be374a0
AL
2441 if (flags & MSG_CMSG_COMPAT)
2442 return -EINVAL;
2443
a2e27255
ACM
2444 if (!timeout)
2445 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2446
2447 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2448 return -EFAULT;
2449
2450 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2451
2452 if (datagrams > 0 &&
2453 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2454 datagrams = -EFAULT;
2455
2456 return datagrams;
2457}
2458
2459#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2460/* Argument list sizes for sys_socketcall */
2461#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2462static const unsigned char nargs[21] = {
c6d409cf
ED
2463 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2464 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2465 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2466 AL(4), AL(5), AL(4)
89bddce5
SH
2467};
2468
1da177e4
LT
2469#undef AL
2470
2471/*
89bddce5 2472 * System call vectors.
1da177e4
LT
2473 *
2474 * Argument checking cleaned up. Saved 20% in size.
2475 * This function doesn't need to set the kernel lock because
89bddce5 2476 * it is set by the callees.
1da177e4
LT
2477 */
2478
3e0fa65f 2479SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2480{
2950fa9d 2481 unsigned long a[AUDITSC_ARGS];
89bddce5 2482 unsigned long a0, a1;
1da177e4 2483 int err;
47379052 2484 unsigned int len;
1da177e4 2485
228e548e 2486 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2487 return -EINVAL;
2488
47379052
AV
2489 len = nargs[call];
2490 if (len > sizeof(a))
2491 return -EINVAL;
2492
1da177e4 2493 /* copy_from_user should be SMP safe. */
47379052 2494 if (copy_from_user(a, args, len))
1da177e4 2495 return -EFAULT;
3ec3b2fb 2496
2950fa9d
CG
2497 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2498 if (err)
2499 return err;
3ec3b2fb 2500
89bddce5
SH
2501 a0 = a[0];
2502 a1 = a[1];
2503
2504 switch (call) {
2505 case SYS_SOCKET:
2506 err = sys_socket(a0, a1, a[2]);
2507 break;
2508 case SYS_BIND:
2509 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2510 break;
2511 case SYS_CONNECT:
2512 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2513 break;
2514 case SYS_LISTEN:
2515 err = sys_listen(a0, a1);
2516 break;
2517 case SYS_ACCEPT:
de11defe
UD
2518 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2519 (int __user *)a[2], 0);
89bddce5
SH
2520 break;
2521 case SYS_GETSOCKNAME:
2522 err =
2523 sys_getsockname(a0, (struct sockaddr __user *)a1,
2524 (int __user *)a[2]);
2525 break;
2526 case SYS_GETPEERNAME:
2527 err =
2528 sys_getpeername(a0, (struct sockaddr __user *)a1,
2529 (int __user *)a[2]);
2530 break;
2531 case SYS_SOCKETPAIR:
2532 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2533 break;
2534 case SYS_SEND:
2535 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2536 break;
2537 case SYS_SENDTO:
2538 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2539 (struct sockaddr __user *)a[4], a[5]);
2540 break;
2541 case SYS_RECV:
2542 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2543 break;
2544 case SYS_RECVFROM:
2545 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2546 (struct sockaddr __user *)a[4],
2547 (int __user *)a[5]);
2548 break;
2549 case SYS_SHUTDOWN:
2550 err = sys_shutdown(a0, a1);
2551 break;
2552 case SYS_SETSOCKOPT:
2553 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2554 break;
2555 case SYS_GETSOCKOPT:
2556 err =
2557 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2558 (int __user *)a[4]);
2559 break;
2560 case SYS_SENDMSG:
2561 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2562 break;
228e548e
AB
2563 case SYS_SENDMMSG:
2564 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2565 break;
89bddce5
SH
2566 case SYS_RECVMSG:
2567 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2568 break;
a2e27255
ACM
2569 case SYS_RECVMMSG:
2570 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2571 (struct timespec __user *)a[4]);
2572 break;
de11defe
UD
2573 case SYS_ACCEPT4:
2574 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2575 (int __user *)a[2], a[3]);
aaca0bdc 2576 break;
89bddce5
SH
2577 default:
2578 err = -EINVAL;
2579 break;
1da177e4
LT
2580 }
2581 return err;
2582}
2583
89bddce5 2584#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2585
55737fda
SH
2586/**
2587 * sock_register - add a socket protocol handler
2588 * @ops: description of protocol
2589 *
1da177e4
LT
2590 * This function is called by a protocol handler that wants to
2591 * advertise its address family, and have it linked into the
55737fda
SH
2592 * socket interface. The value ops->family coresponds to the
2593 * socket system call protocol family.
1da177e4 2594 */
f0fd27d4 2595int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2596{
2597 int err;
2598
2599 if (ops->family >= NPROTO) {
3410f22e 2600 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2601 return -ENOBUFS;
2602 }
55737fda
SH
2603
2604 spin_lock(&net_family_lock);
190683a9
ED
2605 if (rcu_dereference_protected(net_families[ops->family],
2606 lockdep_is_held(&net_family_lock)))
55737fda
SH
2607 err = -EEXIST;
2608 else {
cf778b00 2609 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2610 err = 0;
2611 }
55737fda
SH
2612 spin_unlock(&net_family_lock);
2613
3410f22e 2614 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2615 return err;
2616}
c6d409cf 2617EXPORT_SYMBOL(sock_register);
1da177e4 2618
55737fda
SH
2619/**
2620 * sock_unregister - remove a protocol handler
2621 * @family: protocol family to remove
2622 *
1da177e4
LT
2623 * This function is called by a protocol handler that wants to
2624 * remove its address family, and have it unlinked from the
55737fda
SH
2625 * new socket creation.
2626 *
2627 * If protocol handler is a module, then it can use module reference
2628 * counts to protect against new references. If protocol handler is not
2629 * a module then it needs to provide its own protection in
2630 * the ops->create routine.
1da177e4 2631 */
f0fd27d4 2632void sock_unregister(int family)
1da177e4 2633{
f0fd27d4 2634 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2635
55737fda 2636 spin_lock(&net_family_lock);
a9b3cd7f 2637 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2638 spin_unlock(&net_family_lock);
2639
2640 synchronize_rcu();
2641
3410f22e 2642 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2643}
c6d409cf 2644EXPORT_SYMBOL(sock_unregister);
1da177e4 2645
77d76ea3 2646static int __init sock_init(void)
1da177e4 2647{
b3e19d92 2648 int err;
2ca794e5
EB
2649 /*
2650 * Initialize the network sysctl infrastructure.
2651 */
2652 err = net_sysctl_init();
2653 if (err)
2654 goto out;
b3e19d92 2655
1da177e4 2656 /*
89bddce5 2657 * Initialize skbuff SLAB cache
1da177e4
LT
2658 */
2659 skb_init();
1da177e4
LT
2660
2661 /*
89bddce5 2662 * Initialize the protocols module.
1da177e4
LT
2663 */
2664
2665 init_inodecache();
b3e19d92
NP
2666
2667 err = register_filesystem(&sock_fs_type);
2668 if (err)
2669 goto out_fs;
1da177e4 2670 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2671 if (IS_ERR(sock_mnt)) {
2672 err = PTR_ERR(sock_mnt);
2673 goto out_mount;
2674 }
77d76ea3
AK
2675
2676 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2677 */
2678
2679#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2680 err = netfilter_init();
2681 if (err)
2682 goto out;
1da177e4 2683#endif
cbeb321a 2684
408eccce 2685 ptp_classifier_init();
c1f19b51 2686
b3e19d92
NP
2687out:
2688 return err;
2689
2690out_mount:
2691 unregister_filesystem(&sock_fs_type);
2692out_fs:
2693 goto out;
1da177e4
LT
2694}
2695
77d76ea3
AK
2696core_initcall(sock_init); /* early initcall */
2697
1da177e4
LT
2698#ifdef CONFIG_PROC_FS
2699void socket_seq_show(struct seq_file *seq)
2700{
2701 int cpu;
2702 int counter = 0;
2703
6f912042 2704 for_each_possible_cpu(cpu)
89bddce5 2705 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2706
2707 /* It can be negative, by the way. 8) */
2708 if (counter < 0)
2709 counter = 0;
2710
2711 seq_printf(seq, "sockets: used %d\n", counter);
2712}
89bddce5 2713#endif /* CONFIG_PROC_FS */
1da177e4 2714
89bbfc95 2715#ifdef CONFIG_COMPAT
6b96018b 2716static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2717 unsigned int cmd, void __user *up)
7a229387 2718{
7a229387
AB
2719 mm_segment_t old_fs = get_fs();
2720 struct timeval ktv;
2721 int err;
2722
2723 set_fs(KERNEL_DS);
6b96018b 2724 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2725 set_fs(old_fs);
644595f8 2726 if (!err)
ed6fe9d6 2727 err = compat_put_timeval(&ktv, up);
644595f8 2728
7a229387
AB
2729 return err;
2730}
2731
6b96018b 2732static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2733 unsigned int cmd, void __user *up)
7a229387 2734{
7a229387
AB
2735 mm_segment_t old_fs = get_fs();
2736 struct timespec kts;
2737 int err;
2738
2739 set_fs(KERNEL_DS);
6b96018b 2740 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2741 set_fs(old_fs);
644595f8 2742 if (!err)
ed6fe9d6 2743 err = compat_put_timespec(&kts, up);
644595f8 2744
7a229387
AB
2745 return err;
2746}
2747
6b96018b 2748static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2749{
2750 struct ifreq __user *uifr;
2751 int err;
2752
2753 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2754 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2755 return -EFAULT;
2756
6b96018b 2757 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2758 if (err)
2759 return err;
2760
6b96018b 2761 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2762 return -EFAULT;
2763
2764 return 0;
2765}
2766
6b96018b 2767static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2768{
6b96018b 2769 struct compat_ifconf ifc32;
7a229387
AB
2770 struct ifconf ifc;
2771 struct ifconf __user *uifc;
6b96018b 2772 struct compat_ifreq __user *ifr32;
7a229387
AB
2773 struct ifreq __user *ifr;
2774 unsigned int i, j;
2775 int err;
2776
6b96018b 2777 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2778 return -EFAULT;
2779
43da5f2e 2780 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2781 if (ifc32.ifcbuf == 0) {
2782 ifc32.ifc_len = 0;
2783 ifc.ifc_len = 0;
2784 ifc.ifc_req = NULL;
2785 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2786 } else {
c6d409cf
ED
2787 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2788 sizeof(struct ifreq);
7a229387
AB
2789 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2790 ifc.ifc_len = len;
2791 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2792 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2793 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2794 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2795 return -EFAULT;
2796 ifr++;
2797 ifr32++;
2798 }
2799 }
2800 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2801 return -EFAULT;
2802
6b96018b 2803 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2804 if (err)
2805 return err;
2806
2807 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2808 return -EFAULT;
2809
2810 ifr = ifc.ifc_req;
2811 ifr32 = compat_ptr(ifc32.ifcbuf);
2812 for (i = 0, j = 0;
c6d409cf
ED
2813 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2814 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2815 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2816 return -EFAULT;
2817 ifr32++;
2818 ifr++;
2819 }
2820
2821 if (ifc32.ifcbuf == 0) {
2822 /* Translate from 64-bit structure multiple to
2823 * a 32-bit one.
2824 */
2825 i = ifc.ifc_len;
6b96018b 2826 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2827 ifc32.ifc_len = i;
2828 } else {
2829 ifc32.ifc_len = i;
2830 }
6b96018b 2831 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2832 return -EFAULT;
2833
2834 return 0;
2835}
2836
6b96018b 2837static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2838{
3a7da39d
BH
2839 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2840 bool convert_in = false, convert_out = false;
2841 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2842 struct ethtool_rxnfc __user *rxnfc;
7a229387 2843 struct ifreq __user *ifr;
3a7da39d
BH
2844 u32 rule_cnt = 0, actual_rule_cnt;
2845 u32 ethcmd;
7a229387 2846 u32 data;
3a7da39d 2847 int ret;
7a229387 2848
3a7da39d
BH
2849 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2850 return -EFAULT;
7a229387 2851
3a7da39d
BH
2852 compat_rxnfc = compat_ptr(data);
2853
2854 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2855 return -EFAULT;
2856
3a7da39d
BH
2857 /* Most ethtool structures are defined without padding.
2858 * Unfortunately struct ethtool_rxnfc is an exception.
2859 */
2860 switch (ethcmd) {
2861 default:
2862 break;
2863 case ETHTOOL_GRXCLSRLALL:
2864 /* Buffer size is variable */
2865 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2866 return -EFAULT;
2867 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2868 return -ENOMEM;
2869 buf_size += rule_cnt * sizeof(u32);
2870 /* fall through */
2871 case ETHTOOL_GRXRINGS:
2872 case ETHTOOL_GRXCLSRLCNT:
2873 case ETHTOOL_GRXCLSRULE:
55664f32 2874 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2875 convert_out = true;
2876 /* fall through */
2877 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2878 buf_size += sizeof(struct ethtool_rxnfc);
2879 convert_in = true;
2880 break;
2881 }
2882
2883 ifr = compat_alloc_user_space(buf_size);
954b1244 2884 rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
3a7da39d
BH
2885
2886 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2887 return -EFAULT;
2888
3a7da39d
BH
2889 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2890 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2891 return -EFAULT;
2892
3a7da39d 2893 if (convert_in) {
127fe533 2894 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2895 * fs.ring_cookie and at the end of fs, but nowhere else.
2896 */
127fe533
AD
2897 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2898 sizeof(compat_rxnfc->fs.m_ext) !=
2899 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2900 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2901 BUILD_BUG_ON(
2902 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2903 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2904 offsetof(struct ethtool_rxnfc, fs.location) -
2905 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2906
2907 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2908 (void __user *)(&rxnfc->fs.m_ext + 1) -
2909 (void __user *)rxnfc) ||
3a7da39d
BH
2910 copy_in_user(&rxnfc->fs.ring_cookie,
2911 &compat_rxnfc->fs.ring_cookie,
954b1244
SH
2912 (void __user *)(&rxnfc->fs.location + 1) -
2913 (void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2914 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2915 sizeof(rxnfc->rule_cnt)))
2916 return -EFAULT;
2917 }
2918
2919 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2920 if (ret)
2921 return ret;
2922
2923 if (convert_out) {
2924 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2925 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2926 (const void __user *)rxnfc) ||
3a7da39d
BH
2927 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2928 &rxnfc->fs.ring_cookie,
954b1244
SH
2929 (const void __user *)(&rxnfc->fs.location + 1) -
2930 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2931 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2932 sizeof(rxnfc->rule_cnt)))
2933 return -EFAULT;
2934
2935 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2936 /* As an optimisation, we only copy the actual
2937 * number of rules that the underlying
2938 * function returned. Since Mallory might
2939 * change the rule count in user memory, we
2940 * check that it is less than the rule count
2941 * originally given (as the user buffer size),
2942 * which has been range-checked.
2943 */
2944 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2945 return -EFAULT;
2946 if (actual_rule_cnt < rule_cnt)
2947 rule_cnt = actual_rule_cnt;
2948 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2949 &rxnfc->rule_locs[0],
2950 rule_cnt * sizeof(u32)))
2951 return -EFAULT;
2952 }
2953 }
2954
2955 return 0;
7a229387
AB
2956}
2957
7a50a240
AB
2958static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2959{
2960 void __user *uptr;
2961 compat_uptr_t uptr32;
2962 struct ifreq __user *uifr;
2963
c6d409cf 2964 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2965 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2966 return -EFAULT;
2967
2968 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2969 return -EFAULT;
2970
2971 uptr = compat_ptr(uptr32);
2972
2973 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2974 return -EFAULT;
2975
2976 return dev_ioctl(net, SIOCWANDEV, uifr);
2977}
2978
6b96018b
AB
2979static int bond_ioctl(struct net *net, unsigned int cmd,
2980 struct compat_ifreq __user *ifr32)
7a229387
AB
2981{
2982 struct ifreq kifr;
7a229387
AB
2983 mm_segment_t old_fs;
2984 int err;
7a229387
AB
2985
2986 switch (cmd) {
2987 case SIOCBONDENSLAVE:
2988 case SIOCBONDRELEASE:
2989 case SIOCBONDSETHWADDR:
2990 case SIOCBONDCHANGEACTIVE:
6b96018b 2991 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2992 return -EFAULT;
2993
2994 old_fs = get_fs();
c6d409cf 2995 set_fs(KERNEL_DS);
c3f52ae6 2996 err = dev_ioctl(net, cmd,
2997 (struct ifreq __user __force *) &kifr);
c6d409cf 2998 set_fs(old_fs);
7a229387
AB
2999
3000 return err;
7a229387 3001 default:
07d106d0 3002 return -ENOIOCTLCMD;
ccbd6a5a 3003 }
7a229387
AB
3004}
3005
590d4693
BH
3006/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3007static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3008 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
3009{
3010 struct ifreq __user *u_ifreq64;
7a229387
AB
3011 char tmp_buf[IFNAMSIZ];
3012 void __user *data64;
3013 u32 data32;
3014
3015 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
3016 IFNAMSIZ))
3017 return -EFAULT;
417c3522 3018 if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
7a229387
AB
3019 return -EFAULT;
3020 data64 = compat_ptr(data32);
3021
3022 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
3023
7a229387
AB
3024 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
3025 IFNAMSIZ))
3026 return -EFAULT;
417c3522 3027 if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
7a229387
AB
3028 return -EFAULT;
3029
6b96018b 3030 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
3031}
3032
6b96018b
AB
3033static int dev_ifsioc(struct net *net, struct socket *sock,
3034 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 3035{
a2116ed2 3036 struct ifreq __user *uifr;
7a229387
AB
3037 int err;
3038
a2116ed2
AB
3039 uifr = compat_alloc_user_space(sizeof(*uifr));
3040 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3041 return -EFAULT;
3042
3043 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3044
7a229387
AB
3045 if (!err) {
3046 switch (cmd) {
3047 case SIOCGIFFLAGS:
3048 case SIOCGIFMETRIC:
3049 case SIOCGIFMTU:
3050 case SIOCGIFMEM:
3051 case SIOCGIFHWADDR:
3052 case SIOCGIFINDEX:
3053 case SIOCGIFADDR:
3054 case SIOCGIFBRDADDR:
3055 case SIOCGIFDSTADDR:
3056 case SIOCGIFNETMASK:
fab2532b 3057 case SIOCGIFPFLAGS:
7a229387 3058 case SIOCGIFTXQLEN:
fab2532b
AB
3059 case SIOCGMIIPHY:
3060 case SIOCGMIIREG:
a2116ed2 3061 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
3062 err = -EFAULT;
3063 break;
3064 }
3065 }
3066 return err;
3067}
3068
a2116ed2
AB
3069static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3070 struct compat_ifreq __user *uifr32)
3071{
3072 struct ifreq ifr;
3073 struct compat_ifmap __user *uifmap32;
3074 mm_segment_t old_fs;
3075 int err;
3076
3077 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3078 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3079 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3080 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3081 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3082 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3083 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3084 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3085 if (err)
3086 return -EFAULT;
3087
3088 old_fs = get_fs();
c6d409cf 3089 set_fs(KERNEL_DS);
c3f52ae6 3090 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 3091 set_fs(old_fs);
a2116ed2
AB
3092
3093 if (cmd == SIOCGIFMAP && !err) {
3094 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3095 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3096 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3097 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3098 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3099 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3100 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3101 if (err)
3102 err = -EFAULT;
3103 }
3104 return err;
3105}
3106
7a229387 3107struct rtentry32 {
c6d409cf 3108 u32 rt_pad1;
7a229387
AB
3109 struct sockaddr rt_dst; /* target address */
3110 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3111 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3112 unsigned short rt_flags;
3113 short rt_pad2;
3114 u32 rt_pad3;
3115 unsigned char rt_tos;
3116 unsigned char rt_class;
3117 short rt_pad4;
3118 short rt_metric; /* +1 for binary compatibility! */
7a229387 3119 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3120 u32 rt_mtu; /* per route MTU/Window */
3121 u32 rt_window; /* Window clamping */
7a229387
AB
3122 unsigned short rt_irtt; /* Initial RTT */
3123};
3124
3125struct in6_rtmsg32 {
3126 struct in6_addr rtmsg_dst;
3127 struct in6_addr rtmsg_src;
3128 struct in6_addr rtmsg_gateway;
3129 u32 rtmsg_type;
3130 u16 rtmsg_dst_len;
3131 u16 rtmsg_src_len;
3132 u32 rtmsg_metric;
3133 u32 rtmsg_info;
3134 u32 rtmsg_flags;
3135 s32 rtmsg_ifindex;
3136};
3137
6b96018b
AB
3138static int routing_ioctl(struct net *net, struct socket *sock,
3139 unsigned int cmd, void __user *argp)
7a229387
AB
3140{
3141 int ret;
3142 void *r = NULL;
3143 struct in6_rtmsg r6;
3144 struct rtentry r4;
3145 char devname[16];
3146 u32 rtdev;
3147 mm_segment_t old_fs = get_fs();
3148
6b96018b
AB
3149 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3150 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3151 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3152 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3153 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3154 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3155 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3156 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3157 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3158 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3159 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3160
3161 r = (void *) &r6;
3162 } else { /* ipv4 */
6b96018b 3163 struct rtentry32 __user *ur4 = argp;
c6d409cf 3164 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3165 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3166 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3167 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3168 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3169 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3170 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3171 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3172 if (rtdev) {
c6d409cf 3173 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3174 r4.rt_dev = (char __user __force *)devname;
3175 devname[15] = 0;
7a229387
AB
3176 } else
3177 r4.rt_dev = NULL;
3178
3179 r = (void *) &r4;
3180 }
3181
3182 if (ret) {
3183 ret = -EFAULT;
3184 goto out;
3185 }
3186
c6d409cf 3187 set_fs(KERNEL_DS);
6b96018b 3188 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3189 set_fs(old_fs);
7a229387
AB
3190
3191out:
7a229387
AB
3192 return ret;
3193}
3194
3195/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3196 * for some operations; this forces use of the newer bridge-utils that
25985edc 3197 * use compatible ioctls
7a229387 3198 */
6b96018b 3199static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3200{
6b96018b 3201 compat_ulong_t tmp;
7a229387 3202
6b96018b 3203 if (get_user(tmp, argp))
7a229387
AB
3204 return -EFAULT;
3205 if (tmp == BRCTL_GET_VERSION)
3206 return BRCTL_VERSION + 1;
3207 return -EINVAL;
3208}
3209
6b96018b
AB
3210static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3211 unsigned int cmd, unsigned long arg)
3212{
3213 void __user *argp = compat_ptr(arg);
3214 struct sock *sk = sock->sk;
3215 struct net *net = sock_net(sk);
7a229387 3216
6b96018b 3217 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3218 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3219
3220 switch (cmd) {
3221 case SIOCSIFBR:
3222 case SIOCGIFBR:
3223 return old_bridge_ioctl(argp);
3224 case SIOCGIFNAME:
3225 return dev_ifname32(net, argp);
3226 case SIOCGIFCONF:
3227 return dev_ifconf(net, argp);
3228 case SIOCETHTOOL:
3229 return ethtool_ioctl(net, argp);
7a50a240
AB
3230 case SIOCWANDEV:
3231 return compat_siocwandev(net, argp);
a2116ed2
AB
3232 case SIOCGIFMAP:
3233 case SIOCSIFMAP:
3234 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3235 case SIOCBONDENSLAVE:
3236 case SIOCBONDRELEASE:
3237 case SIOCBONDSETHWADDR:
6b96018b
AB
3238 case SIOCBONDCHANGEACTIVE:
3239 return bond_ioctl(net, cmd, argp);
3240 case SIOCADDRT:
3241 case SIOCDELRT:
3242 return routing_ioctl(net, sock, cmd, argp);
3243 case SIOCGSTAMP:
3244 return do_siocgstamp(net, sock, cmd, argp);
3245 case SIOCGSTAMPNS:
3246 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3247 case SIOCBONDSLAVEINFOQUERY:
3248 case SIOCBONDINFOQUERY:
a2116ed2 3249 case SIOCSHWTSTAMP:
fd468c74 3250 case SIOCGHWTSTAMP:
590d4693 3251 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3252
3253 case FIOSETOWN:
3254 case SIOCSPGRP:
3255 case FIOGETOWN:
3256 case SIOCGPGRP:
3257 case SIOCBRADDBR:
3258 case SIOCBRDELBR:
3259 case SIOCGIFVLAN:
3260 case SIOCSIFVLAN:
3261 case SIOCADDDLCI:
3262 case SIOCDELDLCI:
3263 return sock_ioctl(file, cmd, arg);
3264
3265 case SIOCGIFFLAGS:
3266 case SIOCSIFFLAGS:
3267 case SIOCGIFMETRIC:
3268 case SIOCSIFMETRIC:
3269 case SIOCGIFMTU:
3270 case SIOCSIFMTU:
3271 case SIOCGIFMEM:
3272 case SIOCSIFMEM:
3273 case SIOCGIFHWADDR:
3274 case SIOCSIFHWADDR:
3275 case SIOCADDMULTI:
3276 case SIOCDELMULTI:
3277 case SIOCGIFINDEX:
6b96018b
AB
3278 case SIOCGIFADDR:
3279 case SIOCSIFADDR:
3280 case SIOCSIFHWBROADCAST:
6b96018b 3281 case SIOCDIFADDR:
6b96018b
AB
3282 case SIOCGIFBRDADDR:
3283 case SIOCSIFBRDADDR:
3284 case SIOCGIFDSTADDR:
3285 case SIOCSIFDSTADDR:
3286 case SIOCGIFNETMASK:
3287 case SIOCSIFNETMASK:
3288 case SIOCSIFPFLAGS:
3289 case SIOCGIFPFLAGS:
3290 case SIOCGIFTXQLEN:
3291 case SIOCSIFTXQLEN:
3292 case SIOCBRADDIF:
3293 case SIOCBRDELIF:
9177efd3
AB
3294 case SIOCSIFNAME:
3295 case SIOCGMIIPHY:
3296 case SIOCGMIIREG:
3297 case SIOCSMIIREG:
6b96018b 3298 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3299
6b96018b
AB
3300 case SIOCSARP:
3301 case SIOCGARP:
3302 case SIOCDARP:
6b96018b 3303 case SIOCATMARK:
9177efd3
AB
3304 return sock_do_ioctl(net, sock, cmd, arg);
3305 }
3306
6b96018b
AB
3307 return -ENOIOCTLCMD;
3308}
7a229387 3309
95c96174 3310static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3311 unsigned long arg)
89bbfc95
SP
3312{
3313 struct socket *sock = file->private_data;
3314 int ret = -ENOIOCTLCMD;
87de87d5
DM
3315 struct sock *sk;
3316 struct net *net;
3317
3318 sk = sock->sk;
3319 net = sock_net(sk);
89bbfc95
SP
3320
3321 if (sock->ops->compat_ioctl)
3322 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3323
87de87d5
DM
3324 if (ret == -ENOIOCTLCMD &&
3325 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3326 ret = compat_wext_handle_ioctl(net, cmd, arg);
3327
6b96018b
AB
3328 if (ret == -ENOIOCTLCMD)
3329 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3330
89bbfc95
SP
3331 return ret;
3332}
3333#endif
3334
ac5a488e
SS
3335int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3336{
3337 return sock->ops->bind(sock, addr, addrlen);
3338}
c6d409cf 3339EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3340
3341int kernel_listen(struct socket *sock, int backlog)
3342{
3343 return sock->ops->listen(sock, backlog);
3344}
c6d409cf 3345EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3346
3347int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3348{
3349 struct sock *sk = sock->sk;
3350 int err;
3351
3352 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3353 newsock);
3354 if (err < 0)
3355 goto done;
3356
3357 err = sock->ops->accept(sock, *newsock, flags);
3358 if (err < 0) {
3359 sock_release(*newsock);
fa8705b0 3360 *newsock = NULL;
ac5a488e
SS
3361 goto done;
3362 }
3363
3364 (*newsock)->ops = sock->ops;
1b08534e 3365 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3366
3367done:
3368 return err;
3369}
c6d409cf 3370EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3371
3372int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3373 int flags)
ac5a488e
SS
3374{
3375 return sock->ops->connect(sock, addr, addrlen, flags);
3376}
c6d409cf 3377EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3378
3379int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3380 int *addrlen)
3381{
3382 return sock->ops->getname(sock, addr, addrlen, 0);
3383}
c6d409cf 3384EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3385
3386int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3387 int *addrlen)
3388{
3389 return sock->ops->getname(sock, addr, addrlen, 1);
3390}
c6d409cf 3391EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3392
3393int kernel_getsockopt(struct socket *sock, int level, int optname,
3394 char *optval, int *optlen)
3395{
3396 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3397 char __user *uoptval;
3398 int __user *uoptlen;
ac5a488e
SS
3399 int err;
3400
fb8621bb
NK
3401 uoptval = (char __user __force *) optval;
3402 uoptlen = (int __user __force *) optlen;
3403
ac5a488e
SS
3404 set_fs(KERNEL_DS);
3405 if (level == SOL_SOCKET)
fb8621bb 3406 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3407 else
fb8621bb
NK
3408 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3409 uoptlen);
ac5a488e
SS
3410 set_fs(oldfs);
3411 return err;
3412}
c6d409cf 3413EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3414
3415int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3416 char *optval, unsigned int optlen)
ac5a488e
SS
3417{
3418 mm_segment_t oldfs = get_fs();
fb8621bb 3419 char __user *uoptval;
ac5a488e
SS
3420 int err;
3421
fb8621bb
NK
3422 uoptval = (char __user __force *) optval;
3423
ac5a488e
SS
3424 set_fs(KERNEL_DS);
3425 if (level == SOL_SOCKET)
fb8621bb 3426 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3427 else
fb8621bb 3428 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3429 optlen);
3430 set_fs(oldfs);
3431 return err;
3432}
c6d409cf 3433EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3434
3435int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3436 size_t size, int flags)
3437{
3438 if (sock->ops->sendpage)
3439 return sock->ops->sendpage(sock, page, offset, size, flags);
3440
3441 return sock_no_sendpage(sock, page, offset, size, flags);
3442}
c6d409cf 3443EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3444
3445int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3446{
3447 mm_segment_t oldfs = get_fs();
3448 int err;
3449
3450 set_fs(KERNEL_DS);
3451 err = sock->ops->ioctl(sock, cmd, arg);
3452 set_fs(oldfs);
3453
3454 return err;
3455}
c6d409cf 3456EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3457
91cf45f0
TM
3458int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3459{
3460 return sock->ops->shutdown(sock, how);
3461}
91cf45f0 3462EXPORT_SYMBOL(kernel_sock_shutdown);