net/socket.c: fold do_sock_{read,write} into callers
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
1da177e4
LT
92
93#include <asm/uaccess.h>
94#include <asm/unistd.h>
95
96#include <net/compat.h>
87de87d5 97#include <net/wext.h>
f8451725 98#include <net/cls_cgroup.h>
1da177e4
LT
99
100#include <net/sock.h>
101#include <linux/netfilter.h>
102
6b96018b
AB
103#include <linux/if_tun.h>
104#include <linux/ipv6_route.h>
105#include <linux/route.h>
6b96018b
AB
106#include <linux/sockios.h>
107#include <linux/atalk.h>
076bb0c8 108#include <net/busy_poll.h>
f24b9be5 109#include <linux/errqueue.h>
06021292 110
e0d1095a 111#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
112unsigned int sysctl_net_busy_read __read_mostly;
113unsigned int sysctl_net_busy_poll __read_mostly;
06021292 114#endif
6b96018b 115
027445c3
BP
116static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
117 unsigned long nr_segs, loff_t pos);
118static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
119 unsigned long nr_segs, loff_t pos);
89bddce5 120static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
121
122static int sock_close(struct inode *inode, struct file *file);
123static unsigned int sock_poll(struct file *file,
124 struct poll_table_struct *wait);
89bddce5 125static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
126#ifdef CONFIG_COMPAT
127static long compat_sock_ioctl(struct file *file,
89bddce5 128 unsigned int cmd, unsigned long arg);
89bbfc95 129#endif
1da177e4 130static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
131static ssize_t sock_sendpage(struct file *file, struct page *page,
132 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 133static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 134 struct pipe_inode_info *pipe, size_t len,
9c55e01c 135 unsigned int flags);
1da177e4 136
1da177e4
LT
137/*
138 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
139 * in the operation structures but are done directly via the socketcall() multiplexor.
140 */
141
da7071d7 142static const struct file_operations socket_file_ops = {
1da177e4
LT
143 .owner = THIS_MODULE,
144 .llseek = no_llseek,
145 .aio_read = sock_aio_read,
146 .aio_write = sock_aio_write,
147 .poll = sock_poll,
148 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
149#ifdef CONFIG_COMPAT
150 .compat_ioctl = compat_sock_ioctl,
151#endif
1da177e4 152 .mmap = sock_mmap,
1da177e4
LT
153 .release = sock_close,
154 .fasync = sock_fasync,
5274f052
JA
155 .sendpage = sock_sendpage,
156 .splice_write = generic_splice_sendpage,
9c55e01c 157 .splice_read = sock_splice_read,
1da177e4
LT
158};
159
160/*
161 * The protocol list. Each protocol is registered in here.
162 */
163
1da177e4 164static DEFINE_SPINLOCK(net_family_lock);
190683a9 165static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 166
1da177e4
LT
167/*
168 * Statistics counters of the socket lists
169 */
170
c6d409cf 171static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
172
173/*
89bddce5
SH
174 * Support routines.
175 * Move socket addresses back and forth across the kernel/user
176 * divide and look after the messy bits.
1da177e4
LT
177 */
178
1da177e4
LT
179/**
180 * move_addr_to_kernel - copy a socket address into kernel space
181 * @uaddr: Address in user space
182 * @kaddr: Address in kernel space
183 * @ulen: Length in user space
184 *
185 * The address is copied into kernel space. If the provided address is
186 * too long an error code of -EINVAL is returned. If the copy gives
187 * invalid addresses -EFAULT is returned. On a success 0 is returned.
188 */
189
43db362d 190int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 191{
230b1839 192 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 193 return -EINVAL;
89bddce5 194 if (ulen == 0)
1da177e4 195 return 0;
89bddce5 196 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 197 return -EFAULT;
3ec3b2fb 198 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
199}
200
201/**
202 * move_addr_to_user - copy an address to user space
203 * @kaddr: kernel space address
204 * @klen: length of address in kernel
205 * @uaddr: user space address
206 * @ulen: pointer to user length field
207 *
208 * The value pointed to by ulen on entry is the buffer length available.
209 * This is overwritten with the buffer space used. -EINVAL is returned
210 * if an overlong buffer is specified or a negative buffer size. -EFAULT
211 * is returned if either the buffer or the length field are not
212 * accessible.
213 * After copying the data up to the limit the user specifies, the true
214 * length of the data is written over the length limit the user
215 * specified. Zero is returned for a success.
216 */
89bddce5 217
43db362d 218static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 219 void __user *uaddr, int __user *ulen)
1da177e4
LT
220{
221 int err;
222 int len;
223
68c6beb3 224 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
225 err = get_user(len, ulen);
226 if (err)
1da177e4 227 return err;
89bddce5
SH
228 if (len > klen)
229 len = klen;
68c6beb3 230 if (len < 0)
1da177e4 231 return -EINVAL;
89bddce5 232 if (len) {
d6fe3945
SG
233 if (audit_sockaddr(klen, kaddr))
234 return -ENOMEM;
89bddce5 235 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
236 return -EFAULT;
237 }
238 /*
89bddce5
SH
239 * "fromlen shall refer to the value before truncation.."
240 * 1003.1g
1da177e4
LT
241 */
242 return __put_user(klen, ulen);
243}
244
e18b890b 245static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
246
247static struct inode *sock_alloc_inode(struct super_block *sb)
248{
249 struct socket_alloc *ei;
eaefd110 250 struct socket_wq *wq;
89bddce5 251
e94b1766 252 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
253 if (!ei)
254 return NULL;
eaefd110
ED
255 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
256 if (!wq) {
43815482
ED
257 kmem_cache_free(sock_inode_cachep, ei);
258 return NULL;
259 }
eaefd110
ED
260 init_waitqueue_head(&wq->wait);
261 wq->fasync_list = NULL;
262 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 263
1da177e4
LT
264 ei->socket.state = SS_UNCONNECTED;
265 ei->socket.flags = 0;
266 ei->socket.ops = NULL;
267 ei->socket.sk = NULL;
268 ei->socket.file = NULL;
1da177e4
LT
269
270 return &ei->vfs_inode;
271}
272
273static void sock_destroy_inode(struct inode *inode)
274{
43815482 275 struct socket_alloc *ei;
eaefd110 276 struct socket_wq *wq;
43815482
ED
277
278 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 279 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 280 kfree_rcu(wq, rcu);
43815482 281 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
282}
283
51cc5068 284static void init_once(void *foo)
1da177e4 285{
89bddce5 286 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 287
a35afb83 288 inode_init_once(&ei->vfs_inode);
1da177e4 289}
89bddce5 290
1da177e4
LT
291static int init_inodecache(void)
292{
293 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
294 sizeof(struct socket_alloc),
295 0,
296 (SLAB_HWCACHE_ALIGN |
297 SLAB_RECLAIM_ACCOUNT |
298 SLAB_MEM_SPREAD),
20c2df83 299 init_once);
1da177e4
LT
300 if (sock_inode_cachep == NULL)
301 return -ENOMEM;
302 return 0;
303}
304
b87221de 305static const struct super_operations sockfs_ops = {
c6d409cf
ED
306 .alloc_inode = sock_alloc_inode,
307 .destroy_inode = sock_destroy_inode,
308 .statfs = simple_statfs,
1da177e4
LT
309};
310
c23fbb6b
ED
311/*
312 * sockfs_dname() is called from d_path().
313 */
314static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
315{
316 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
317 dentry->d_inode->i_ino);
318}
319
3ba13d17 320static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 321 .d_dname = sockfs_dname,
1da177e4
LT
322};
323
c74a1cbb
AV
324static struct dentry *sockfs_mount(struct file_system_type *fs_type,
325 int flags, const char *dev_name, void *data)
326{
327 return mount_pseudo(fs_type, "socket:", &sockfs_ops,
328 &sockfs_dentry_operations, SOCKFS_MAGIC);
329}
330
331static struct vfsmount *sock_mnt __read_mostly;
332
333static struct file_system_type sock_fs_type = {
334 .name = "sockfs",
335 .mount = sockfs_mount,
336 .kill_sb = kill_anon_super,
337};
338
1da177e4
LT
339/*
340 * Obtains the first available file descriptor and sets it up for use.
341 *
39d8c1b6
DM
342 * These functions create file structures and maps them to fd space
343 * of the current process. On success it returns file descriptor
1da177e4
LT
344 * and file struct implicitly stored in sock->file.
345 * Note that another thread may close file descriptor before we return
346 * from this function. We use the fact that now we do not refer
347 * to socket after mapping. If one day we will need it, this
348 * function will increment ref. count on file by 1.
349 *
350 * In any case returned fd MAY BE not valid!
351 * This race condition is unavoidable
352 * with shared fd spaces, we cannot solve it inside kernel,
353 * but we take care of internal coherence yet.
354 */
355
aab174f0 356struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 357{
7cbe66b6 358 struct qstr name = { .name = "" };
2c48b9c4 359 struct path path;
7cbe66b6 360 struct file *file;
1da177e4 361
600e1779
MY
362 if (dname) {
363 name.name = dname;
364 name.len = strlen(name.name);
365 } else if (sock->sk) {
366 name.name = sock->sk->sk_prot_creator->name;
367 name.len = strlen(name.name);
368 }
4b936885 369 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
28407630
AV
370 if (unlikely(!path.dentry))
371 return ERR_PTR(-ENOMEM);
2c48b9c4 372 path.mnt = mntget(sock_mnt);
39d8c1b6 373
2c48b9c4 374 d_instantiate(path.dentry, SOCK_INODE(sock));
39d8c1b6 375
2c48b9c4 376 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 377 &socket_file_ops);
39b65252 378 if (unlikely(IS_ERR(file))) {
cc3808f8 379 /* drop dentry, keep inode */
7de9c6ee 380 ihold(path.dentry->d_inode);
2c48b9c4 381 path_put(&path);
39b65252 382 return file;
cc3808f8
AV
383 }
384
385 sock->file = file;
77d27200 386 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 387 file->private_data = sock;
28407630 388 return file;
39d8c1b6 389}
56b31d1c 390EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 391
56b31d1c 392static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
393{
394 struct file *newfile;
28407630
AV
395 int fd = get_unused_fd_flags(flags);
396 if (unlikely(fd < 0))
397 return fd;
39d8c1b6 398
aab174f0 399 newfile = sock_alloc_file(sock, flags, NULL);
28407630 400 if (likely(!IS_ERR(newfile))) {
39d8c1b6 401 fd_install(fd, newfile);
28407630
AV
402 return fd;
403 }
7cbe66b6 404
28407630
AV
405 put_unused_fd(fd);
406 return PTR_ERR(newfile);
1da177e4
LT
407}
408
406a3c63 409struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 410{
6cb153ca
BL
411 if (file->f_op == &socket_file_ops)
412 return file->private_data; /* set in sock_map_fd */
413
23bb80d2
ED
414 *err = -ENOTSOCK;
415 return NULL;
6cb153ca 416}
406a3c63 417EXPORT_SYMBOL(sock_from_file);
6cb153ca 418
1da177e4 419/**
c6d409cf 420 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
421 * @fd: file handle
422 * @err: pointer to an error code return
423 *
424 * The file handle passed in is locked and the socket it is bound
425 * too is returned. If an error occurs the err pointer is overwritten
426 * with a negative errno code and NULL is returned. The function checks
427 * for both invalid handles and passing a handle which is not a socket.
428 *
429 * On a success the socket object pointer is returned.
430 */
431
432struct socket *sockfd_lookup(int fd, int *err)
433{
434 struct file *file;
1da177e4
LT
435 struct socket *sock;
436
89bddce5
SH
437 file = fget(fd);
438 if (!file) {
1da177e4
LT
439 *err = -EBADF;
440 return NULL;
441 }
89bddce5 442
6cb153ca
BL
443 sock = sock_from_file(file, err);
444 if (!sock)
1da177e4 445 fput(file);
6cb153ca
BL
446 return sock;
447}
c6d409cf 448EXPORT_SYMBOL(sockfd_lookup);
1da177e4 449
6cb153ca
BL
450static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
451{
00e188ef 452 struct fd f = fdget(fd);
6cb153ca
BL
453 struct socket *sock;
454
3672558c 455 *err = -EBADF;
00e188ef
AV
456 if (f.file) {
457 sock = sock_from_file(f.file, err);
458 if (likely(sock)) {
459 *fput_needed = f.flags;
6cb153ca 460 return sock;
00e188ef
AV
461 }
462 fdput(f);
1da177e4 463 }
6cb153ca 464 return NULL;
1da177e4
LT
465}
466
600e1779
MY
467#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
468#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
469#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
470static ssize_t sockfs_getxattr(struct dentry *dentry,
471 const char *name, void *value, size_t size)
472{
473 const char *proto_name;
474 size_t proto_size;
475 int error;
476
477 error = -ENODATA;
478 if (!strncmp(name, XATTR_NAME_SOCKPROTONAME, XATTR_NAME_SOCKPROTONAME_LEN)) {
479 proto_name = dentry->d_name.name;
480 proto_size = strlen(proto_name);
481
482 if (value) {
483 error = -ERANGE;
484 if (proto_size + 1 > size)
485 goto out;
486
487 strncpy(value, proto_name, proto_size + 1);
488 }
489 error = proto_size + 1;
490 }
491
492out:
493 return error;
494}
495
496static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
497 size_t size)
498{
499 ssize_t len;
500 ssize_t used = 0;
501
502 len = security_inode_listsecurity(dentry->d_inode, buffer, size);
503 if (len < 0)
504 return len;
505 used += len;
506 if (buffer) {
507 if (size < used)
508 return -ERANGE;
509 buffer += len;
510 }
511
512 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
513 used += len;
514 if (buffer) {
515 if (size < used)
516 return -ERANGE;
517 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
518 buffer += len;
519 }
520
521 return used;
522}
523
524static const struct inode_operations sockfs_inode_ops = {
525 .getxattr = sockfs_getxattr,
526 .listxattr = sockfs_listxattr,
527};
528
1da177e4
LT
529/**
530 * sock_alloc - allocate a socket
89bddce5 531 *
1da177e4
LT
532 * Allocate a new inode and socket object. The two are bound together
533 * and initialised. The socket is then returned. If we are out of inodes
534 * NULL is returned.
535 */
536
537static struct socket *sock_alloc(void)
538{
89bddce5
SH
539 struct inode *inode;
540 struct socket *sock;
1da177e4 541
a209dfc7 542 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
543 if (!inode)
544 return NULL;
545
546 sock = SOCKET_I(inode);
547
29a020d3 548 kmemcheck_annotate_bitfield(sock, type);
85fe4025 549 inode->i_ino = get_next_ino();
89bddce5 550 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
551 inode->i_uid = current_fsuid();
552 inode->i_gid = current_fsgid();
600e1779 553 inode->i_op = &sockfs_inode_ops;
1da177e4 554
19e8d69c 555 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
556 return sock;
557}
558
1da177e4
LT
559/**
560 * sock_release - close a socket
561 * @sock: socket to close
562 *
563 * The socket is released from the protocol stack if it has a release
564 * callback, and the inode is then released if the socket is bound to
89bddce5 565 * an inode not a file.
1da177e4 566 */
89bddce5 567
1da177e4
LT
568void sock_release(struct socket *sock)
569{
570 if (sock->ops) {
571 struct module *owner = sock->ops->owner;
572
573 sock->ops->release(sock);
574 sock->ops = NULL;
575 module_put(owner);
576 }
577
eaefd110 578 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
3410f22e 579 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 580
b09e786b
MP
581 if (test_bit(SOCK_EXTERNALLY_ALLOCATED, &sock->flags))
582 return;
583
19e8d69c 584 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
585 if (!sock->file) {
586 iput(SOCK_INODE(sock));
587 return;
588 }
89bddce5 589 sock->file = NULL;
1da177e4 590}
c6d409cf 591EXPORT_SYMBOL(sock_release);
1da177e4 592
67cc0d40 593void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags)
20d49473 594{
140c55d4
ED
595 u8 flags = *tx_flags;
596
b9f40e21 597 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
598 flags |= SKBTX_HW_TSTAMP;
599
b9f40e21 600 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
601 flags |= SKBTX_SW_TSTAMP;
602
e7fd2885 603 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
604 flags |= SKBTX_SCHED_TSTAMP;
605
e1c8a607 606 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)
140c55d4 607 flags |= SKBTX_ACK_TSTAMP;
e7fd2885 608
140c55d4 609 *tx_flags = flags;
20d49473 610}
67cc0d40 611EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 612
228e548e
AB
613static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
614 struct msghdr *msg, size_t size)
1da177e4 615{
1da177e4
LT
616 return sock->ops->sendmsg(iocb, sock, msg, size);
617}
618
228e548e
AB
619static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
620 struct msghdr *msg, size_t size)
621{
622 int err = security_socket_sendmsg(sock, msg, size);
623
624 return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
625}
626
0cf00c6f
GZ
627static int do_sock_sendmsg(struct socket *sock, struct msghdr *msg,
628 size_t size, bool nosec)
1da177e4
LT
629{
630 struct kiocb iocb;
1da177e4
LT
631 int ret;
632
633 init_sync_kiocb(&iocb, NULL);
0cf00c6f
GZ
634 ret = nosec ? __sock_sendmsg_nosec(&iocb, sock, msg, size) :
635 __sock_sendmsg(&iocb, sock, msg, size);
1da177e4
LT
636 if (-EIOCBQUEUED == ret)
637 ret = wait_on_sync_kiocb(&iocb);
638 return ret;
639}
0cf00c6f
GZ
640
641int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
642{
643 return do_sock_sendmsg(sock, msg, size, false);
644}
c6d409cf 645EXPORT_SYMBOL(sock_sendmsg);
1da177e4 646
894dc24c 647static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
228e548e 648{
0cf00c6f 649 return do_sock_sendmsg(sock, msg, size, true);
228e548e
AB
650}
651
1da177e4
LT
652int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
653 struct kvec *vec, size_t num, size_t size)
654{
655 mm_segment_t oldfs = get_fs();
656 int result;
657
658 set_fs(KERNEL_DS);
659 /*
660 * the following is safe, since for compiler definitions of kvec and
661 * iovec are identical, yielding the same in-core layout and alignment
662 */
c0371da6 663 iov_iter_init(&msg->msg_iter, WRITE, (struct iovec *)vec, num, size);
1da177e4
LT
664 result = sock_sendmsg(sock, msg, size);
665 set_fs(oldfs);
666 return result;
667}
c6d409cf 668EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 669
92f37fd2
ED
670/*
671 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
672 */
673void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
674 struct sk_buff *skb)
675{
20d49473 676 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
f24b9be5 677 struct scm_timestamping tss;
20d49473
PO
678 int empty = 1;
679 struct skb_shared_hwtstamps *shhwtstamps =
680 skb_hwtstamps(skb);
681
682 /* Race occurred between timestamp enabling and packet
683 receiving. Fill in the current time for now. */
684 if (need_software_tstamp && skb->tstamp.tv64 == 0)
685 __net_timestamp(skb);
686
687 if (need_software_tstamp) {
688 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
689 struct timeval tv;
690 skb_get_timestamp(skb, &tv);
691 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
692 sizeof(tv), &tv);
693 } else {
f24b9be5
WB
694 struct timespec ts;
695 skb_get_timestampns(skb, &ts);
20d49473 696 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
f24b9be5 697 sizeof(ts), &ts);
20d49473
PO
698 }
699 }
700
f24b9be5 701 memset(&tss, 0, sizeof(tss));
c199105d 702 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
f24b9be5 703 ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
20d49473 704 empty = 0;
4d276eb6 705 if (shhwtstamps &&
b9f40e21 706 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
f24b9be5 707 ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
4d276eb6 708 empty = 0;
20d49473
PO
709 if (!empty)
710 put_cmsg(msg, SOL_SOCKET,
f24b9be5 711 SCM_TIMESTAMPING, sizeof(tss), &tss);
92f37fd2 712}
7c81fd8b
ACM
713EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
714
6e3e939f
JB
715void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
716 struct sk_buff *skb)
717{
718 int ack;
719
720 if (!sock_flag(sk, SOCK_WIFI_STATUS))
721 return;
722 if (!skb->wifi_acked_valid)
723 return;
724
725 ack = skb->wifi_acked;
726
727 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
728}
729EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
730
11165f14 731static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
732 struct sk_buff *skb)
3b885787
NH
733{
734 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
735 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
736 sizeof(__u32), &skb->dropcount);
737}
738
767dd033 739void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
740 struct sk_buff *skb)
741{
742 sock_recv_timestamp(msg, sk, skb);
743 sock_recv_drops(msg, sk, skb);
744}
767dd033 745EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 746
a2e27255
ACM
747static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
748 struct msghdr *msg, size_t size, int flags)
1da177e4 749{
1da177e4
LT
750 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
751}
752
a2e27255
ACM
753static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
754 struct msghdr *msg, size_t size, int flags)
755{
756 int err = security_socket_recvmsg(sock, msg, size, flags);
757
758 return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
759}
760
89bddce5 761int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
762 size_t size, int flags)
763{
764 struct kiocb iocb;
1da177e4
LT
765 int ret;
766
89bddce5 767 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
768 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
769 if (-EIOCBQUEUED == ret)
770 ret = wait_on_sync_kiocb(&iocb);
771 return ret;
772}
c6d409cf 773EXPORT_SYMBOL(sock_recvmsg);
1da177e4 774
a2e27255
ACM
775static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
776 size_t size, int flags)
777{
778 struct kiocb iocb;
a2e27255
ACM
779 int ret;
780
781 init_sync_kiocb(&iocb, NULL);
a2e27255
ACM
782 ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
783 if (-EIOCBQUEUED == ret)
784 ret = wait_on_sync_kiocb(&iocb);
785 return ret;
786}
787
c1249c0a
ML
788/**
789 * kernel_recvmsg - Receive a message from a socket (kernel space)
790 * @sock: The socket to receive the message from
791 * @msg: Received message
792 * @vec: Input s/g array for message data
793 * @num: Size of input s/g array
794 * @size: Number of bytes to read
795 * @flags: Message flags (MSG_DONTWAIT, etc...)
796 *
797 * On return the msg structure contains the scatter/gather array passed in the
798 * vec argument. The array is modified so that it consists of the unfilled
799 * portion of the original array.
800 *
801 * The returned value is the total number of bytes received, or an error.
802 */
89bddce5
SH
803int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
804 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
805{
806 mm_segment_t oldfs = get_fs();
807 int result;
808
809 set_fs(KERNEL_DS);
810 /*
811 * the following is safe, since for compiler definitions of kvec and
812 * iovec are identical, yielding the same in-core layout and alignment
813 */
c0371da6 814 iov_iter_init(&msg->msg_iter, READ, (struct iovec *)vec, num, size);
1da177e4
LT
815 result = sock_recvmsg(sock, msg, size, flags);
816 set_fs(oldfs);
817 return result;
818}
c6d409cf 819EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 820
ce1d4d3e
CH
821static ssize_t sock_sendpage(struct file *file, struct page *page,
822 int offset, size_t size, loff_t *ppos, int more)
1da177e4 823{
1da177e4
LT
824 struct socket *sock;
825 int flags;
826
ce1d4d3e
CH
827 sock = file->private_data;
828
35f9c09f
ED
829 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
830 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
831 flags |= more;
ce1d4d3e 832
e6949583 833 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 834}
1da177e4 835
9c55e01c 836static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 837 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
838 unsigned int flags)
839{
840 struct socket *sock = file->private_data;
841
997b37da
RDC
842 if (unlikely(!sock->ops->splice_read))
843 return -EINVAL;
844
9c55e01c
JA
845 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
846}
847
027445c3
BP
848static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
849 unsigned long nr_segs, loff_t pos)
ce1d4d3e 850{
6d652330
AV
851 struct file *file = iocb->ki_filp;
852 struct socket *sock = file->private_data;
7cc05662 853 struct msghdr msg;
ce1d4d3e 854
1da177e4
LT
855 if (pos != 0)
856 return -ESPIPE;
027445c3 857
73a7075e 858 if (iocb->ki_nbytes == 0) /* Match SYS5 behaviour */
1da177e4
LT
859 return 0;
860
6d652330
AV
861 msg.msg_name = NULL;
862 msg.msg_namelen = 0;
863 msg.msg_control = NULL;
864 msg.msg_controllen = 0;
865 iov_iter_init(&msg.msg_iter, READ, iov, nr_segs, iocb->ki_nbytes);
866 msg.msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
1da177e4 867
6d652330 868 return __sock_recvmsg(iocb, sock, &msg, iocb->ki_nbytes, msg.msg_flags);
1da177e4
LT
869}
870
027445c3
BP
871static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
872 unsigned long nr_segs, loff_t pos)
ce1d4d3e 873{
6d652330
AV
874 struct file *file = iocb->ki_filp;
875 struct socket *sock = file->private_data;
7cc05662 876 struct msghdr msg;
1da177e4 877
ce1d4d3e
CH
878 if (pos != 0)
879 return -ESPIPE;
027445c3 880
6d652330
AV
881 msg.msg_name = NULL;
882 msg.msg_namelen = 0;
883 msg.msg_control = NULL;
884 msg.msg_controllen = 0;
885 iov_iter_init(&msg.msg_iter, WRITE, iov, nr_segs, iocb->ki_nbytes);
886 msg.msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
887 if (sock->type == SOCK_SEQPACKET)
888 msg.msg_flags |= MSG_EOR;
889
890 return __sock_sendmsg(iocb, sock, &msg, iocb->ki_nbytes);
1da177e4
LT
891}
892
1da177e4
LT
893/*
894 * Atomic setting of ioctl hooks to avoid race
895 * with module unload.
896 */
897
4a3e2f71 898static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 899static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 900
881d966b 901void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 902{
4a3e2f71 903 mutex_lock(&br_ioctl_mutex);
1da177e4 904 br_ioctl_hook = hook;
4a3e2f71 905 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
906}
907EXPORT_SYMBOL(brioctl_set);
908
4a3e2f71 909static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 910static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 911
881d966b 912void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 913{
4a3e2f71 914 mutex_lock(&vlan_ioctl_mutex);
1da177e4 915 vlan_ioctl_hook = hook;
4a3e2f71 916 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
917}
918EXPORT_SYMBOL(vlan_ioctl_set);
919
4a3e2f71 920static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 921static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 922
89bddce5 923void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 924{
4a3e2f71 925 mutex_lock(&dlci_ioctl_mutex);
1da177e4 926 dlci_ioctl_hook = hook;
4a3e2f71 927 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
928}
929EXPORT_SYMBOL(dlci_ioctl_set);
930
6b96018b
AB
931static long sock_do_ioctl(struct net *net, struct socket *sock,
932 unsigned int cmd, unsigned long arg)
933{
934 int err;
935 void __user *argp = (void __user *)arg;
936
937 err = sock->ops->ioctl(sock, cmd, arg);
938
939 /*
940 * If this ioctl is unknown try to hand it down
941 * to the NIC driver.
942 */
943 if (err == -ENOIOCTLCMD)
944 err = dev_ioctl(net, cmd, argp);
945
946 return err;
947}
948
1da177e4
LT
949/*
950 * With an ioctl, arg may well be a user mode pointer, but we don't know
951 * what to do with it - that's up to the protocol still.
952 */
953
954static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
955{
956 struct socket *sock;
881d966b 957 struct sock *sk;
1da177e4
LT
958 void __user *argp = (void __user *)arg;
959 int pid, err;
881d966b 960 struct net *net;
1da177e4 961
b69aee04 962 sock = file->private_data;
881d966b 963 sk = sock->sk;
3b1e0a65 964 net = sock_net(sk);
1da177e4 965 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 966 err = dev_ioctl(net, cmd, argp);
1da177e4 967 } else
3d23e349 968#ifdef CONFIG_WEXT_CORE
1da177e4 969 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 970 err = dev_ioctl(net, cmd, argp);
1da177e4 971 } else
3d23e349 972#endif
89bddce5 973 switch (cmd) {
1da177e4
LT
974 case FIOSETOWN:
975 case SIOCSPGRP:
976 err = -EFAULT;
977 if (get_user(pid, (int __user *)argp))
978 break;
e0b93edd
JL
979 f_setown(sock->file, pid, 1);
980 err = 0;
1da177e4
LT
981 break;
982 case FIOGETOWN:
983 case SIOCGPGRP:
609d7fa9 984 err = put_user(f_getown(sock->file),
89bddce5 985 (int __user *)argp);
1da177e4
LT
986 break;
987 case SIOCGIFBR:
988 case SIOCSIFBR:
989 case SIOCBRADDBR:
990 case SIOCBRDELBR:
991 err = -ENOPKG;
992 if (!br_ioctl_hook)
993 request_module("bridge");
994
4a3e2f71 995 mutex_lock(&br_ioctl_mutex);
89bddce5 996 if (br_ioctl_hook)
881d966b 997 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 998 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
999 break;
1000 case SIOCGIFVLAN:
1001 case SIOCSIFVLAN:
1002 err = -ENOPKG;
1003 if (!vlan_ioctl_hook)
1004 request_module("8021q");
1005
4a3e2f71 1006 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1007 if (vlan_ioctl_hook)
881d966b 1008 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1009 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1010 break;
1da177e4
LT
1011 case SIOCADDDLCI:
1012 case SIOCDELDLCI:
1013 err = -ENOPKG;
1014 if (!dlci_ioctl_hook)
1015 request_module("dlci");
1016
7512cbf6
PE
1017 mutex_lock(&dlci_ioctl_mutex);
1018 if (dlci_ioctl_hook)
1da177e4 1019 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1020 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1021 break;
1022 default:
6b96018b 1023 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1024 break;
89bddce5 1025 }
1da177e4
LT
1026 return err;
1027}
1028
1029int sock_create_lite(int family, int type, int protocol, struct socket **res)
1030{
1031 int err;
1032 struct socket *sock = NULL;
89bddce5 1033
1da177e4
LT
1034 err = security_socket_create(family, type, protocol, 1);
1035 if (err)
1036 goto out;
1037
1038 sock = sock_alloc();
1039 if (!sock) {
1040 err = -ENOMEM;
1041 goto out;
1042 }
1043
1da177e4 1044 sock->type = type;
7420ed23
VY
1045 err = security_socket_post_create(sock, family, type, protocol, 1);
1046 if (err)
1047 goto out_release;
1048
1da177e4
LT
1049out:
1050 *res = sock;
1051 return err;
7420ed23
VY
1052out_release:
1053 sock_release(sock);
1054 sock = NULL;
1055 goto out;
1da177e4 1056}
c6d409cf 1057EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1058
1059/* No kernel lock held - perfect */
89bddce5 1060static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4 1061{
cbf55001 1062 unsigned int busy_flag = 0;
1da177e4
LT
1063 struct socket *sock;
1064
1065 /*
89bddce5 1066 * We can't return errors to poll, so it's either yes or no.
1da177e4 1067 */
b69aee04 1068 sock = file->private_data;
2d48d67f 1069
cbf55001 1070 if (sk_can_busy_loop(sock->sk)) {
2d48d67f 1071 /* this socket can poll_ll so tell the system call */
cbf55001 1072 busy_flag = POLL_BUSY_LOOP;
2d48d67f
ET
1073
1074 /* once, only if requested by syscall */
cbf55001
ET
1075 if (wait && (wait->_key & POLL_BUSY_LOOP))
1076 sk_busy_loop(sock->sk, 1);
2d48d67f
ET
1077 }
1078
cbf55001 1079 return busy_flag | sock->ops->poll(file, sock, wait);
1da177e4
LT
1080}
1081
89bddce5 1082static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1083{
b69aee04 1084 struct socket *sock = file->private_data;
1da177e4
LT
1085
1086 return sock->ops->mmap(file, sock, vma);
1087}
1088
20380731 1089static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1090{
1da177e4
LT
1091 sock_release(SOCKET_I(inode));
1092 return 0;
1093}
1094
1095/*
1096 * Update the socket async list
1097 *
1098 * Fasync_list locking strategy.
1099 *
1100 * 1. fasync_list is modified only under process context socket lock
1101 * i.e. under semaphore.
1102 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1103 * or under socket lock
1da177e4
LT
1104 */
1105
1106static int sock_fasync(int fd, struct file *filp, int on)
1107{
989a2979
ED
1108 struct socket *sock = filp->private_data;
1109 struct sock *sk = sock->sk;
eaefd110 1110 struct socket_wq *wq;
1da177e4 1111
989a2979 1112 if (sk == NULL)
1da177e4 1113 return -EINVAL;
1da177e4
LT
1114
1115 lock_sock(sk);
eaefd110
ED
1116 wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
1117 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1118
eaefd110 1119 if (!wq->fasync_list)
989a2979
ED
1120 sock_reset_flag(sk, SOCK_FASYNC);
1121 else
bcdce719 1122 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1123
989a2979 1124 release_sock(sk);
1da177e4
LT
1125 return 0;
1126}
1127
43815482 1128/* This function may be called only under socket lock or callback_lock or rcu_lock */
1da177e4
LT
1129
1130int sock_wake_async(struct socket *sock, int how, int band)
1131{
43815482
ED
1132 struct socket_wq *wq;
1133
1134 if (!sock)
1135 return -1;
1136 rcu_read_lock();
1137 wq = rcu_dereference(sock->wq);
1138 if (!wq || !wq->fasync_list) {
1139 rcu_read_unlock();
1da177e4 1140 return -1;
43815482 1141 }
89bddce5 1142 switch (how) {
8d8ad9d7 1143 case SOCK_WAKE_WAITD:
1da177e4
LT
1144 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1145 break;
1146 goto call_kill;
8d8ad9d7 1147 case SOCK_WAKE_SPACE:
1da177e4
LT
1148 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1149 break;
1150 /* fall through */
8d8ad9d7 1151 case SOCK_WAKE_IO:
89bddce5 1152call_kill:
43815482 1153 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1154 break;
8d8ad9d7 1155 case SOCK_WAKE_URG:
43815482 1156 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1157 }
43815482 1158 rcu_read_unlock();
1da177e4
LT
1159 return 0;
1160}
c6d409cf 1161EXPORT_SYMBOL(sock_wake_async);
1da177e4 1162
721db93a 1163int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1164 struct socket **res, int kern)
1da177e4
LT
1165{
1166 int err;
1167 struct socket *sock;
55737fda 1168 const struct net_proto_family *pf;
1da177e4
LT
1169
1170 /*
89bddce5 1171 * Check protocol is in range
1da177e4
LT
1172 */
1173 if (family < 0 || family >= NPROTO)
1174 return -EAFNOSUPPORT;
1175 if (type < 0 || type >= SOCK_MAX)
1176 return -EINVAL;
1177
1178 /* Compatibility.
1179
1180 This uglymoron is moved from INET layer to here to avoid
1181 deadlock in module load.
1182 */
1183 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1184 static int warned;
1da177e4
LT
1185 if (!warned) {
1186 warned = 1;
3410f22e
YY
1187 pr_info("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1188 current->comm);
1da177e4
LT
1189 }
1190 family = PF_PACKET;
1191 }
1192
1193 err = security_socket_create(family, type, protocol, kern);
1194 if (err)
1195 return err;
89bddce5 1196
55737fda
SH
1197 /*
1198 * Allocate the socket and allow the family to set things up. if
1199 * the protocol is 0, the family is instructed to select an appropriate
1200 * default.
1201 */
1202 sock = sock_alloc();
1203 if (!sock) {
e87cc472 1204 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1205 return -ENFILE; /* Not exactly a match, but its the
1206 closest posix thing */
1207 }
1208
1209 sock->type = type;
1210
95a5afca 1211#ifdef CONFIG_MODULES
89bddce5
SH
1212 /* Attempt to load a protocol module if the find failed.
1213 *
1214 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1215 * requested real, full-featured networking support upon configuration.
1216 * Otherwise module support will break!
1217 */
190683a9 1218 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1219 request_module("net-pf-%d", family);
1da177e4
LT
1220#endif
1221
55737fda
SH
1222 rcu_read_lock();
1223 pf = rcu_dereference(net_families[family]);
1224 err = -EAFNOSUPPORT;
1225 if (!pf)
1226 goto out_release;
1da177e4
LT
1227
1228 /*
1229 * We will call the ->create function, that possibly is in a loadable
1230 * module, so we have to bump that loadable module refcnt first.
1231 */
55737fda 1232 if (!try_module_get(pf->owner))
1da177e4
LT
1233 goto out_release;
1234
55737fda
SH
1235 /* Now protected by module ref count */
1236 rcu_read_unlock();
1237
3f378b68 1238 err = pf->create(net, sock, protocol, kern);
55737fda 1239 if (err < 0)
1da177e4 1240 goto out_module_put;
a79af59e 1241
1da177e4
LT
1242 /*
1243 * Now to bump the refcnt of the [loadable] module that owns this
1244 * socket at sock_release time we decrement its refcnt.
1245 */
55737fda
SH
1246 if (!try_module_get(sock->ops->owner))
1247 goto out_module_busy;
1248
1da177e4
LT
1249 /*
1250 * Now that we're done with the ->create function, the [loadable]
1251 * module can have its refcnt decremented
1252 */
55737fda 1253 module_put(pf->owner);
7420ed23
VY
1254 err = security_socket_post_create(sock, family, type, protocol, kern);
1255 if (err)
3b185525 1256 goto out_sock_release;
55737fda 1257 *res = sock;
1da177e4 1258
55737fda
SH
1259 return 0;
1260
1261out_module_busy:
1262 err = -EAFNOSUPPORT;
1da177e4 1263out_module_put:
55737fda
SH
1264 sock->ops = NULL;
1265 module_put(pf->owner);
1266out_sock_release:
1da177e4 1267 sock_release(sock);
55737fda
SH
1268 return err;
1269
1270out_release:
1271 rcu_read_unlock();
1272 goto out_sock_release;
1da177e4 1273}
721db93a 1274EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1275
1276int sock_create(int family, int type, int protocol, struct socket **res)
1277{
1b8d7ae4 1278 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1279}
c6d409cf 1280EXPORT_SYMBOL(sock_create);
1da177e4
LT
1281
1282int sock_create_kern(int family, int type, int protocol, struct socket **res)
1283{
1b8d7ae4 1284 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4 1285}
c6d409cf 1286EXPORT_SYMBOL(sock_create_kern);
1da177e4 1287
3e0fa65f 1288SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1289{
1290 int retval;
1291 struct socket *sock;
a677a039
UD
1292 int flags;
1293
e38b36f3
UD
1294 /* Check the SOCK_* constants for consistency. */
1295 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1296 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1297 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1298 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1299
a677a039 1300 flags = type & ~SOCK_TYPE_MASK;
77d27200 1301 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1302 return -EINVAL;
1303 type &= SOCK_TYPE_MASK;
1da177e4 1304
aaca0bdc
UD
1305 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1306 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1307
1da177e4
LT
1308 retval = sock_create(family, type, protocol, &sock);
1309 if (retval < 0)
1310 goto out;
1311
77d27200 1312 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1313 if (retval < 0)
1314 goto out_release;
1315
1316out:
1317 /* It may be already another descriptor 8) Not kernel problem. */
1318 return retval;
1319
1320out_release:
1321 sock_release(sock);
1322 return retval;
1323}
1324
1325/*
1326 * Create a pair of connected sockets.
1327 */
1328
3e0fa65f
HC
1329SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1330 int __user *, usockvec)
1da177e4
LT
1331{
1332 struct socket *sock1, *sock2;
1333 int fd1, fd2, err;
db349509 1334 struct file *newfile1, *newfile2;
a677a039
UD
1335 int flags;
1336
1337 flags = type & ~SOCK_TYPE_MASK;
77d27200 1338 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1339 return -EINVAL;
1340 type &= SOCK_TYPE_MASK;
1da177e4 1341
aaca0bdc
UD
1342 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1343 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1344
1da177e4
LT
1345 /*
1346 * Obtain the first socket and check if the underlying protocol
1347 * supports the socketpair call.
1348 */
1349
1350 err = sock_create(family, type, protocol, &sock1);
1351 if (err < 0)
1352 goto out;
1353
1354 err = sock_create(family, type, protocol, &sock2);
1355 if (err < 0)
1356 goto out_release_1;
1357
1358 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1359 if (err < 0)
1da177e4
LT
1360 goto out_release_both;
1361
28407630 1362 fd1 = get_unused_fd_flags(flags);
bf3c23d1
DM
1363 if (unlikely(fd1 < 0)) {
1364 err = fd1;
db349509 1365 goto out_release_both;
bf3c23d1 1366 }
d73aa286 1367
28407630 1368 fd2 = get_unused_fd_flags(flags);
198de4d7
AV
1369 if (unlikely(fd2 < 0)) {
1370 err = fd2;
d73aa286 1371 goto out_put_unused_1;
28407630
AV
1372 }
1373
aab174f0 1374 newfile1 = sock_alloc_file(sock1, flags, NULL);
28407630
AV
1375 if (unlikely(IS_ERR(newfile1))) {
1376 err = PTR_ERR(newfile1);
d73aa286 1377 goto out_put_unused_both;
28407630
AV
1378 }
1379
aab174f0 1380 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1381 if (IS_ERR(newfile2)) {
1382 err = PTR_ERR(newfile2);
d73aa286 1383 goto out_fput_1;
db349509
AV
1384 }
1385
d73aa286
YD
1386 err = put_user(fd1, &usockvec[0]);
1387 if (err)
1388 goto out_fput_both;
1389
1390 err = put_user(fd2, &usockvec[1]);
1391 if (err)
1392 goto out_fput_both;
1393
157cf649 1394 audit_fd_pair(fd1, fd2);
d73aa286 1395
db349509
AV
1396 fd_install(fd1, newfile1);
1397 fd_install(fd2, newfile2);
1da177e4
LT
1398 /* fd1 and fd2 may be already another descriptors.
1399 * Not kernel problem.
1400 */
1401
d73aa286 1402 return 0;
1da177e4 1403
d73aa286
YD
1404out_fput_both:
1405 fput(newfile2);
1406 fput(newfile1);
1407 put_unused_fd(fd2);
1408 put_unused_fd(fd1);
1409 goto out;
1410
1411out_fput_1:
1412 fput(newfile1);
1413 put_unused_fd(fd2);
1414 put_unused_fd(fd1);
1415 sock_release(sock2);
1416 goto out;
1da177e4 1417
d73aa286
YD
1418out_put_unused_both:
1419 put_unused_fd(fd2);
1420out_put_unused_1:
1421 put_unused_fd(fd1);
1da177e4 1422out_release_both:
89bddce5 1423 sock_release(sock2);
1da177e4 1424out_release_1:
89bddce5 1425 sock_release(sock1);
1da177e4
LT
1426out:
1427 return err;
1428}
1429
1da177e4
LT
1430/*
1431 * Bind a name to a socket. Nothing much to do here since it's
1432 * the protocol's responsibility to handle the local address.
1433 *
1434 * We move the socket address to kernel space before we call
1435 * the protocol layer (having also checked the address is ok).
1436 */
1437
20f37034 1438SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1439{
1440 struct socket *sock;
230b1839 1441 struct sockaddr_storage address;
6cb153ca 1442 int err, fput_needed;
1da177e4 1443
89bddce5 1444 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1445 if (sock) {
43db362d 1446 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1447 if (err >= 0) {
1448 err = security_socket_bind(sock,
230b1839 1449 (struct sockaddr *)&address,
89bddce5 1450 addrlen);
6cb153ca
BL
1451 if (!err)
1452 err = sock->ops->bind(sock,
89bddce5 1453 (struct sockaddr *)
230b1839 1454 &address, addrlen);
1da177e4 1455 }
6cb153ca 1456 fput_light(sock->file, fput_needed);
89bddce5 1457 }
1da177e4
LT
1458 return err;
1459}
1460
1da177e4
LT
1461/*
1462 * Perform a listen. Basically, we allow the protocol to do anything
1463 * necessary for a listen, and if that works, we mark the socket as
1464 * ready for listening.
1465 */
1466
3e0fa65f 1467SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1468{
1469 struct socket *sock;
6cb153ca 1470 int err, fput_needed;
b8e1f9b5 1471 int somaxconn;
89bddce5
SH
1472
1473 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1474 if (sock) {
8efa6e93 1475 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1476 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1477 backlog = somaxconn;
1da177e4
LT
1478
1479 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1480 if (!err)
1481 err = sock->ops->listen(sock, backlog);
1da177e4 1482
6cb153ca 1483 fput_light(sock->file, fput_needed);
1da177e4
LT
1484 }
1485 return err;
1486}
1487
1da177e4
LT
1488/*
1489 * For accept, we attempt to create a new socket, set up the link
1490 * with the client, wake up the client, then return the new
1491 * connected fd. We collect the address of the connector in kernel
1492 * space and move it to user at the very end. This is unclean because
1493 * we open the socket then return an error.
1494 *
1495 * 1003.1g adds the ability to recvmsg() to query connection pending
1496 * status to recvmsg. We need to add that support in a way thats
1497 * clean when we restucture accept also.
1498 */
1499
20f37034
HC
1500SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1501 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1502{
1503 struct socket *sock, *newsock;
39d8c1b6 1504 struct file *newfile;
6cb153ca 1505 int err, len, newfd, fput_needed;
230b1839 1506 struct sockaddr_storage address;
1da177e4 1507
77d27200 1508 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1509 return -EINVAL;
1510
1511 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1512 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1513
6cb153ca 1514 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1515 if (!sock)
1516 goto out;
1517
1518 err = -ENFILE;
c6d409cf
ED
1519 newsock = sock_alloc();
1520 if (!newsock)
1da177e4
LT
1521 goto out_put;
1522
1523 newsock->type = sock->type;
1524 newsock->ops = sock->ops;
1525
1da177e4
LT
1526 /*
1527 * We don't need try_module_get here, as the listening socket (sock)
1528 * has the protocol module (sock->ops->owner) held.
1529 */
1530 __module_get(newsock->ops->owner);
1531
28407630 1532 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1533 if (unlikely(newfd < 0)) {
1534 err = newfd;
9a1875e6
DM
1535 sock_release(newsock);
1536 goto out_put;
39d8c1b6 1537 }
aab174f0 1538 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
28407630
AV
1539 if (unlikely(IS_ERR(newfile))) {
1540 err = PTR_ERR(newfile);
1541 put_unused_fd(newfd);
1542 sock_release(newsock);
1543 goto out_put;
1544 }
39d8c1b6 1545
a79af59e
FF
1546 err = security_socket_accept(sock, newsock);
1547 if (err)
39d8c1b6 1548 goto out_fd;
a79af59e 1549
1da177e4
LT
1550 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1551 if (err < 0)
39d8c1b6 1552 goto out_fd;
1da177e4
LT
1553
1554 if (upeer_sockaddr) {
230b1839 1555 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1556 &len, 2) < 0) {
1da177e4 1557 err = -ECONNABORTED;
39d8c1b6 1558 goto out_fd;
1da177e4 1559 }
43db362d 1560 err = move_addr_to_user(&address,
230b1839 1561 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1562 if (err < 0)
39d8c1b6 1563 goto out_fd;
1da177e4
LT
1564 }
1565
1566 /* File flags are not inherited via accept() unlike another OSes. */
1567
39d8c1b6
DM
1568 fd_install(newfd, newfile);
1569 err = newfd;
1da177e4 1570
1da177e4 1571out_put:
6cb153ca 1572 fput_light(sock->file, fput_needed);
1da177e4
LT
1573out:
1574 return err;
39d8c1b6 1575out_fd:
9606a216 1576 fput(newfile);
39d8c1b6 1577 put_unused_fd(newfd);
1da177e4
LT
1578 goto out_put;
1579}
1580
20f37034
HC
1581SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1582 int __user *, upeer_addrlen)
aaca0bdc 1583{
de11defe 1584 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1585}
1586
1da177e4
LT
1587/*
1588 * Attempt to connect to a socket with the server address. The address
1589 * is in user space so we verify it is OK and move it to kernel space.
1590 *
1591 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1592 * break bindings
1593 *
1594 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1595 * other SEQPACKET protocols that take time to connect() as it doesn't
1596 * include the -EINPROGRESS status for such sockets.
1597 */
1598
20f37034
HC
1599SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1600 int, addrlen)
1da177e4
LT
1601{
1602 struct socket *sock;
230b1839 1603 struct sockaddr_storage address;
6cb153ca 1604 int err, fput_needed;
1da177e4 1605
6cb153ca 1606 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1607 if (!sock)
1608 goto out;
43db362d 1609 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1610 if (err < 0)
1611 goto out_put;
1612
89bddce5 1613 err =
230b1839 1614 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1615 if (err)
1616 goto out_put;
1617
230b1839 1618 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1619 sock->file->f_flags);
1620out_put:
6cb153ca 1621 fput_light(sock->file, fput_needed);
1da177e4
LT
1622out:
1623 return err;
1624}
1625
1626/*
1627 * Get the local address ('name') of a socket object. Move the obtained
1628 * name to user space.
1629 */
1630
20f37034
HC
1631SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1632 int __user *, usockaddr_len)
1da177e4
LT
1633{
1634 struct socket *sock;
230b1839 1635 struct sockaddr_storage address;
6cb153ca 1636 int len, err, fput_needed;
89bddce5 1637
6cb153ca 1638 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1639 if (!sock)
1640 goto out;
1641
1642 err = security_socket_getsockname(sock);
1643 if (err)
1644 goto out_put;
1645
230b1839 1646 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1647 if (err)
1648 goto out_put;
43db362d 1649 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1650
1651out_put:
6cb153ca 1652 fput_light(sock->file, fput_needed);
1da177e4
LT
1653out:
1654 return err;
1655}
1656
1657/*
1658 * Get the remote address ('name') of a socket object. Move the obtained
1659 * name to user space.
1660 */
1661
20f37034
HC
1662SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1663 int __user *, usockaddr_len)
1da177e4
LT
1664{
1665 struct socket *sock;
230b1839 1666 struct sockaddr_storage address;
6cb153ca 1667 int len, err, fput_needed;
1da177e4 1668
89bddce5
SH
1669 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1670 if (sock != NULL) {
1da177e4
LT
1671 err = security_socket_getpeername(sock);
1672 if (err) {
6cb153ca 1673 fput_light(sock->file, fput_needed);
1da177e4
LT
1674 return err;
1675 }
1676
89bddce5 1677 err =
230b1839 1678 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1679 1);
1da177e4 1680 if (!err)
43db362d 1681 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1682 usockaddr_len);
6cb153ca 1683 fput_light(sock->file, fput_needed);
1da177e4
LT
1684 }
1685 return err;
1686}
1687
1688/*
1689 * Send a datagram to a given address. We move the address into kernel
1690 * space and check the user space data area is readable before invoking
1691 * the protocol.
1692 */
1693
3e0fa65f 1694SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1695 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1696 int, addr_len)
1da177e4
LT
1697{
1698 struct socket *sock;
230b1839 1699 struct sockaddr_storage address;
1da177e4
LT
1700 int err;
1701 struct msghdr msg;
1702 struct iovec iov;
6cb153ca 1703 int fput_needed;
6cb153ca 1704
253eacc0
LT
1705 if (len > INT_MAX)
1706 len = INT_MAX;
de0fa95c
PE
1707 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1708 if (!sock)
4387ff75 1709 goto out;
6cb153ca 1710
89bddce5
SH
1711 iov.iov_base = buff;
1712 iov.iov_len = len;
1713 msg.msg_name = NULL;
c0371da6 1714 iov_iter_init(&msg.msg_iter, WRITE, &iov, 1, len);
89bddce5
SH
1715 msg.msg_control = NULL;
1716 msg.msg_controllen = 0;
1717 msg.msg_namelen = 0;
6cb153ca 1718 if (addr) {
43db362d 1719 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1720 if (err < 0)
1721 goto out_put;
230b1839 1722 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1723 msg.msg_namelen = addr_len;
1da177e4
LT
1724 }
1725 if (sock->file->f_flags & O_NONBLOCK)
1726 flags |= MSG_DONTWAIT;
1727 msg.msg_flags = flags;
1728 err = sock_sendmsg(sock, &msg, len);
1729
89bddce5 1730out_put:
de0fa95c 1731 fput_light(sock->file, fput_needed);
4387ff75 1732out:
1da177e4
LT
1733 return err;
1734}
1735
1736/*
89bddce5 1737 * Send a datagram down a socket.
1da177e4
LT
1738 */
1739
3e0fa65f 1740SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1741 unsigned int, flags)
1da177e4
LT
1742{
1743 return sys_sendto(fd, buff, len, flags, NULL, 0);
1744}
1745
1746/*
89bddce5 1747 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1748 * sender. We verify the buffers are writable and if needed move the
1749 * sender address from kernel to user space.
1750 */
1751
3e0fa65f 1752SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1753 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1754 int __user *, addr_len)
1da177e4
LT
1755{
1756 struct socket *sock;
1757 struct iovec iov;
1758 struct msghdr msg;
230b1839 1759 struct sockaddr_storage address;
89bddce5 1760 int err, err2;
6cb153ca
BL
1761 int fput_needed;
1762
253eacc0
LT
1763 if (size > INT_MAX)
1764 size = INT_MAX;
de0fa95c 1765 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1766 if (!sock)
de0fa95c 1767 goto out;
1da177e4 1768
89bddce5
SH
1769 msg.msg_control = NULL;
1770 msg.msg_controllen = 0;
89bddce5
SH
1771 iov.iov_len = size;
1772 iov.iov_base = ubuf;
c0371da6 1773 iov_iter_init(&msg.msg_iter, READ, &iov, 1, size);
f3d33426
HFS
1774 /* Save some cycles and don't copy the address if not needed */
1775 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1776 /* We assume all kernel code knows the size of sockaddr_storage */
1777 msg.msg_namelen = 0;
1da177e4
LT
1778 if (sock->file->f_flags & O_NONBLOCK)
1779 flags |= MSG_DONTWAIT;
89bddce5 1780 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1781
89bddce5 1782 if (err >= 0 && addr != NULL) {
43db362d 1783 err2 = move_addr_to_user(&address,
230b1839 1784 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1785 if (err2 < 0)
1786 err = err2;
1da177e4 1787 }
de0fa95c
PE
1788
1789 fput_light(sock->file, fput_needed);
4387ff75 1790out:
1da177e4
LT
1791 return err;
1792}
1793
1794/*
89bddce5 1795 * Receive a datagram from a socket.
1da177e4
LT
1796 */
1797
b7c0ddf5
JG
1798SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1799 unsigned int, flags)
1da177e4
LT
1800{
1801 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1802}
1803
1804/*
1805 * Set a socket option. Because we don't know the option lengths we have
1806 * to pass the user mode parameter for the protocols to sort out.
1807 */
1808
20f37034
HC
1809SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1810 char __user *, optval, int, optlen)
1da177e4 1811{
6cb153ca 1812 int err, fput_needed;
1da177e4
LT
1813 struct socket *sock;
1814
1815 if (optlen < 0)
1816 return -EINVAL;
89bddce5
SH
1817
1818 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1819 if (sock != NULL) {
1820 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1821 if (err)
1822 goto out_put;
1da177e4
LT
1823
1824 if (level == SOL_SOCKET)
89bddce5
SH
1825 err =
1826 sock_setsockopt(sock, level, optname, optval,
1827 optlen);
1da177e4 1828 else
89bddce5
SH
1829 err =
1830 sock->ops->setsockopt(sock, level, optname, optval,
1831 optlen);
6cb153ca
BL
1832out_put:
1833 fput_light(sock->file, fput_needed);
1da177e4
LT
1834 }
1835 return err;
1836}
1837
1838/*
1839 * Get a socket option. Because we don't know the option lengths we have
1840 * to pass a user mode parameter for the protocols to sort out.
1841 */
1842
20f37034
HC
1843SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1844 char __user *, optval, int __user *, optlen)
1da177e4 1845{
6cb153ca 1846 int err, fput_needed;
1da177e4
LT
1847 struct socket *sock;
1848
89bddce5
SH
1849 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1850 if (sock != NULL) {
6cb153ca
BL
1851 err = security_socket_getsockopt(sock, level, optname);
1852 if (err)
1853 goto out_put;
1da177e4
LT
1854
1855 if (level == SOL_SOCKET)
89bddce5
SH
1856 err =
1857 sock_getsockopt(sock, level, optname, optval,
1858 optlen);
1da177e4 1859 else
89bddce5
SH
1860 err =
1861 sock->ops->getsockopt(sock, level, optname, optval,
1862 optlen);
6cb153ca
BL
1863out_put:
1864 fput_light(sock->file, fput_needed);
1da177e4
LT
1865 }
1866 return err;
1867}
1868
1da177e4
LT
1869/*
1870 * Shutdown a socket.
1871 */
1872
754fe8d2 1873SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1874{
6cb153ca 1875 int err, fput_needed;
1da177e4
LT
1876 struct socket *sock;
1877
89bddce5
SH
1878 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1879 if (sock != NULL) {
1da177e4 1880 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1881 if (!err)
1882 err = sock->ops->shutdown(sock, how);
1883 fput_light(sock->file, fput_needed);
1da177e4
LT
1884 }
1885 return err;
1886}
1887
89bddce5 1888/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1889 * fields which are the same type (int / unsigned) on our platforms.
1890 */
1891#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1892#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1893#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1894
c71d8ebe
TH
1895struct used_address {
1896 struct sockaddr_storage name;
1897 unsigned int name_len;
1898};
1899
08adb7da
AV
1900static ssize_t copy_msghdr_from_user(struct msghdr *kmsg,
1901 struct user_msghdr __user *umsg,
1902 struct sockaddr __user **save_addr,
1903 struct iovec **iov)
1661bf36 1904{
08adb7da
AV
1905 struct sockaddr __user *uaddr;
1906 struct iovec __user *uiov;
c0371da6 1907 size_t nr_segs;
08adb7da
AV
1908 ssize_t err;
1909
1910 if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) ||
1911 __get_user(uaddr, &umsg->msg_name) ||
1912 __get_user(kmsg->msg_namelen, &umsg->msg_namelen) ||
1913 __get_user(uiov, &umsg->msg_iov) ||
c0371da6 1914 __get_user(nr_segs, &umsg->msg_iovlen) ||
08adb7da
AV
1915 __get_user(kmsg->msg_control, &umsg->msg_control) ||
1916 __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
1917 __get_user(kmsg->msg_flags, &umsg->msg_flags))
1661bf36 1918 return -EFAULT;
dbb490b9 1919
08adb7da 1920 if (!uaddr)
6a2a2b3a
AS
1921 kmsg->msg_namelen = 0;
1922
dbb490b9
ML
1923 if (kmsg->msg_namelen < 0)
1924 return -EINVAL;
1925
1661bf36 1926 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 1927 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
1928
1929 if (save_addr)
1930 *save_addr = uaddr;
1931
1932 if (uaddr && kmsg->msg_namelen) {
1933 if (!save_addr) {
1934 err = move_addr_to_kernel(uaddr, kmsg->msg_namelen,
1935 kmsg->msg_name);
1936 if (err < 0)
1937 return err;
1938 }
1939 } else {
1940 kmsg->msg_name = NULL;
1941 kmsg->msg_namelen = 0;
1942 }
1943
c0371da6 1944 if (nr_segs > UIO_MAXIOV)
08adb7da
AV
1945 return -EMSGSIZE;
1946
1947 err = rw_copy_check_uvector(save_addr ? READ : WRITE,
c0371da6 1948 uiov, nr_segs,
08adb7da
AV
1949 UIO_FASTIOV, *iov, iov);
1950 if (err >= 0)
c0371da6
AV
1951 iov_iter_init(&kmsg->msg_iter, save_addr ? READ : WRITE,
1952 *iov, nr_segs, err);
08adb7da 1953 return err;
1661bf36
DC
1954}
1955
666547ff 1956static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 1957 struct msghdr *msg_sys, unsigned int flags,
c71d8ebe 1958 struct used_address *used_address)
1da177e4 1959{
89bddce5
SH
1960 struct compat_msghdr __user *msg_compat =
1961 (struct compat_msghdr __user *)msg;
230b1839 1962 struct sockaddr_storage address;
1da177e4 1963 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1964 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1965 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1966 /* 20 is size of ipv6_pktinfo */
1da177e4 1967 unsigned char *ctl_buf = ctl;
08adb7da
AV
1968 int ctl_len, total_len;
1969 ssize_t err;
89bddce5 1970
08adb7da 1971 msg_sys->msg_name = &address;
1da177e4 1972
08449320 1973 if (MSG_CMSG_COMPAT & flags)
08adb7da 1974 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 1975 else
08adb7da 1976 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 1977 if (err < 0)
1da177e4
LT
1978 goto out_freeiov;
1979 total_len = err;
1980
1981 err = -ENOBUFS;
1982
228e548e 1983 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 1984 goto out_freeiov;
228e548e 1985 ctl_len = msg_sys->msg_controllen;
1da177e4 1986 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 1987 err =
228e548e 1988 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 1989 sizeof(ctl));
1da177e4
LT
1990 if (err)
1991 goto out_freeiov;
228e548e
AB
1992 ctl_buf = msg_sys->msg_control;
1993 ctl_len = msg_sys->msg_controllen;
1da177e4 1994 } else if (ctl_len) {
89bddce5 1995 if (ctl_len > sizeof(ctl)) {
1da177e4 1996 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1997 if (ctl_buf == NULL)
1da177e4
LT
1998 goto out_freeiov;
1999 }
2000 err = -EFAULT;
2001 /*
228e548e 2002 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2003 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2004 * checking falls down on this.
2005 */
fb8621bb 2006 if (copy_from_user(ctl_buf,
228e548e 2007 (void __user __force *)msg_sys->msg_control,
89bddce5 2008 ctl_len))
1da177e4 2009 goto out_freectl;
228e548e 2010 msg_sys->msg_control = ctl_buf;
1da177e4 2011 }
228e548e 2012 msg_sys->msg_flags = flags;
1da177e4
LT
2013
2014 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2015 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2016 /*
2017 * If this is sendmmsg() and current destination address is same as
2018 * previously succeeded address, omit asking LSM's decision.
2019 * used_address->name_len is initialized to UINT_MAX so that the first
2020 * destination address never matches.
2021 */
bc909d9d
MD
2022 if (used_address && msg_sys->msg_name &&
2023 used_address->name_len == msg_sys->msg_namelen &&
2024 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe
TH
2025 used_address->name_len)) {
2026 err = sock_sendmsg_nosec(sock, msg_sys, total_len);
2027 goto out_freectl;
2028 }
2029 err = sock_sendmsg(sock, msg_sys, total_len);
2030 /*
2031 * If this is sendmmsg() and sending to current destination address was
2032 * successful, remember it.
2033 */
2034 if (used_address && err >= 0) {
2035 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2036 if (msg_sys->msg_name)
2037 memcpy(&used_address->name, msg_sys->msg_name,
2038 used_address->name_len);
c71d8ebe 2039 }
1da177e4
LT
2040
2041out_freectl:
89bddce5 2042 if (ctl_buf != ctl)
1da177e4
LT
2043 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2044out_freeiov:
2045 if (iov != iovstack)
a74e9106 2046 kfree(iov);
228e548e
AB
2047 return err;
2048}
2049
2050/*
2051 * BSD sendmsg interface
2052 */
2053
666547ff 2054long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
228e548e
AB
2055{
2056 int fput_needed, err;
2057 struct msghdr msg_sys;
1be374a0
AL
2058 struct socket *sock;
2059
1be374a0 2060 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2061 if (!sock)
2062 goto out;
2063
a7526eb5 2064 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
228e548e 2065
6cb153ca 2066 fput_light(sock->file, fput_needed);
89bddce5 2067out:
1da177e4
LT
2068 return err;
2069}
2070
666547ff 2071SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5
AL
2072{
2073 if (flags & MSG_CMSG_COMPAT)
2074 return -EINVAL;
2075 return __sys_sendmsg(fd, msg, flags);
2076}
2077
228e548e
AB
2078/*
2079 * Linux sendmmsg interface
2080 */
2081
2082int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2083 unsigned int flags)
2084{
2085 int fput_needed, err, datagrams;
2086 struct socket *sock;
2087 struct mmsghdr __user *entry;
2088 struct compat_mmsghdr __user *compat_entry;
2089 struct msghdr msg_sys;
c71d8ebe 2090 struct used_address used_address;
228e548e 2091
98382f41
AB
2092 if (vlen > UIO_MAXIOV)
2093 vlen = UIO_MAXIOV;
228e548e
AB
2094
2095 datagrams = 0;
2096
2097 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2098 if (!sock)
2099 return err;
2100
c71d8ebe 2101 used_address.name_len = UINT_MAX;
228e548e
AB
2102 entry = mmsg;
2103 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2104 err = 0;
228e548e
AB
2105
2106 while (datagrams < vlen) {
228e548e 2107 if (MSG_CMSG_COMPAT & flags) {
666547ff 2108 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5 2109 &msg_sys, flags, &used_address);
228e548e
AB
2110 if (err < 0)
2111 break;
2112 err = __put_user(err, &compat_entry->msg_len);
2113 ++compat_entry;
2114 } else {
a7526eb5 2115 err = ___sys_sendmsg(sock,
666547ff 2116 (struct user_msghdr __user *)entry,
a7526eb5 2117 &msg_sys, flags, &used_address);
228e548e
AB
2118 if (err < 0)
2119 break;
2120 err = put_user(err, &entry->msg_len);
2121 ++entry;
2122 }
2123
2124 if (err)
2125 break;
2126 ++datagrams;
2127 }
2128
228e548e
AB
2129 fput_light(sock->file, fput_needed);
2130
728ffb86
AB
2131 /* We only return an error if no datagrams were able to be sent */
2132 if (datagrams != 0)
228e548e
AB
2133 return datagrams;
2134
228e548e
AB
2135 return err;
2136}
2137
2138SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2139 unsigned int, vlen, unsigned int, flags)
2140{
1be374a0
AL
2141 if (flags & MSG_CMSG_COMPAT)
2142 return -EINVAL;
228e548e
AB
2143 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2144}
2145
666547ff 2146static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2147 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2148{
89bddce5
SH
2149 struct compat_msghdr __user *msg_compat =
2150 (struct compat_msghdr __user *)msg;
1da177e4 2151 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2152 struct iovec *iov = iovstack;
1da177e4 2153 unsigned long cmsg_ptr;
08adb7da
AV
2154 int total_len, len;
2155 ssize_t err;
1da177e4
LT
2156
2157 /* kernel mode address */
230b1839 2158 struct sockaddr_storage addr;
1da177e4
LT
2159
2160 /* user mode address pointers */
2161 struct sockaddr __user *uaddr;
08adb7da 2162 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2163
08adb7da 2164 msg_sys->msg_name = &addr;
1da177e4 2165
f3d33426 2166 if (MSG_CMSG_COMPAT & flags)
08adb7da 2167 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2168 else
08adb7da 2169 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4
LT
2170 if (err < 0)
2171 goto out_freeiov;
89bddce5 2172 total_len = err;
1da177e4 2173
a2e27255
ACM
2174 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2175 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2176
f3d33426
HFS
2177 /* We assume all kernel code knows the size of sockaddr_storage */
2178 msg_sys->msg_namelen = 0;
2179
1da177e4
LT
2180 if (sock->file->f_flags & O_NONBLOCK)
2181 flags |= MSG_DONTWAIT;
a2e27255
ACM
2182 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2183 total_len, flags);
1da177e4
LT
2184 if (err < 0)
2185 goto out_freeiov;
2186 len = err;
2187
2188 if (uaddr != NULL) {
43db362d 2189 err = move_addr_to_user(&addr,
a2e27255 2190 msg_sys->msg_namelen, uaddr,
89bddce5 2191 uaddr_len);
1da177e4
LT
2192 if (err < 0)
2193 goto out_freeiov;
2194 }
a2e27255 2195 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2196 COMPAT_FLAGS(msg));
1da177e4
LT
2197 if (err)
2198 goto out_freeiov;
2199 if (MSG_CMSG_COMPAT & flags)
a2e27255 2200 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2201 &msg_compat->msg_controllen);
2202 else
a2e27255 2203 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2204 &msg->msg_controllen);
2205 if (err)
2206 goto out_freeiov;
2207 err = len;
2208
2209out_freeiov:
2210 if (iov != iovstack)
a74e9106 2211 kfree(iov);
a2e27255
ACM
2212 return err;
2213}
2214
2215/*
2216 * BSD recvmsg interface
2217 */
2218
666547ff 2219long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
a2e27255
ACM
2220{
2221 int fput_needed, err;
2222 struct msghdr msg_sys;
1be374a0
AL
2223 struct socket *sock;
2224
1be374a0 2225 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2226 if (!sock)
2227 goto out;
2228
a7526eb5 2229 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2230
6cb153ca 2231 fput_light(sock->file, fput_needed);
1da177e4
LT
2232out:
2233 return err;
2234}
2235
666547ff 2236SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2237 unsigned int, flags)
2238{
2239 if (flags & MSG_CMSG_COMPAT)
2240 return -EINVAL;
2241 return __sys_recvmsg(fd, msg, flags);
2242}
2243
a2e27255
ACM
2244/*
2245 * Linux recvmmsg interface
2246 */
2247
2248int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2249 unsigned int flags, struct timespec *timeout)
2250{
2251 int fput_needed, err, datagrams;
2252 struct socket *sock;
2253 struct mmsghdr __user *entry;
d7256d0e 2254 struct compat_mmsghdr __user *compat_entry;
a2e27255
ACM
2255 struct msghdr msg_sys;
2256 struct timespec end_time;
2257
2258 if (timeout &&
2259 poll_select_set_timeout(&end_time, timeout->tv_sec,
2260 timeout->tv_nsec))
2261 return -EINVAL;
2262
2263 datagrams = 0;
2264
2265 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2266 if (!sock)
2267 return err;
2268
2269 err = sock_error(sock->sk);
2270 if (err)
2271 goto out_put;
2272
2273 entry = mmsg;
d7256d0e 2274 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2275
2276 while (datagrams < vlen) {
2277 /*
2278 * No need to ask LSM for more than the first datagram.
2279 */
d7256d0e 2280 if (MSG_CMSG_COMPAT & flags) {
666547ff 2281 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2282 &msg_sys, flags & ~MSG_WAITFORONE,
2283 datagrams);
d7256d0e
JMG
2284 if (err < 0)
2285 break;
2286 err = __put_user(err, &compat_entry->msg_len);
2287 ++compat_entry;
2288 } else {
a7526eb5 2289 err = ___sys_recvmsg(sock,
666547ff 2290 (struct user_msghdr __user *)entry,
a7526eb5
AL
2291 &msg_sys, flags & ~MSG_WAITFORONE,
2292 datagrams);
d7256d0e
JMG
2293 if (err < 0)
2294 break;
2295 err = put_user(err, &entry->msg_len);
2296 ++entry;
2297 }
2298
a2e27255
ACM
2299 if (err)
2300 break;
a2e27255
ACM
2301 ++datagrams;
2302
71c5c159
BB
2303 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2304 if (flags & MSG_WAITFORONE)
2305 flags |= MSG_DONTWAIT;
2306
a2e27255
ACM
2307 if (timeout) {
2308 ktime_get_ts(timeout);
2309 *timeout = timespec_sub(end_time, *timeout);
2310 if (timeout->tv_sec < 0) {
2311 timeout->tv_sec = timeout->tv_nsec = 0;
2312 break;
2313 }
2314
2315 /* Timeout, return less than vlen datagrams */
2316 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2317 break;
2318 }
2319
2320 /* Out of band data, return right away */
2321 if (msg_sys.msg_flags & MSG_OOB)
2322 break;
2323 }
2324
2325out_put:
2326 fput_light(sock->file, fput_needed);
1da177e4 2327
a2e27255
ACM
2328 if (err == 0)
2329 return datagrams;
2330
2331 if (datagrams != 0) {
2332 /*
2333 * We may return less entries than requested (vlen) if the
2334 * sock is non block and there aren't enough datagrams...
2335 */
2336 if (err != -EAGAIN) {
2337 /*
2338 * ... or if recvmsg returns an error after we
2339 * received some datagrams, where we record the
2340 * error to return on the next call or if the
2341 * app asks about it using getsockopt(SO_ERROR).
2342 */
2343 sock->sk->sk_err = -err;
2344 }
2345
2346 return datagrams;
2347 }
2348
2349 return err;
2350}
2351
2352SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2353 unsigned int, vlen, unsigned int, flags,
2354 struct timespec __user *, timeout)
2355{
2356 int datagrams;
2357 struct timespec timeout_sys;
2358
1be374a0
AL
2359 if (flags & MSG_CMSG_COMPAT)
2360 return -EINVAL;
2361
a2e27255
ACM
2362 if (!timeout)
2363 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2364
2365 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2366 return -EFAULT;
2367
2368 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2369
2370 if (datagrams > 0 &&
2371 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2372 datagrams = -EFAULT;
2373
2374 return datagrams;
2375}
2376
2377#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2378/* Argument list sizes for sys_socketcall */
2379#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2380static const unsigned char nargs[21] = {
c6d409cf
ED
2381 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2382 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2383 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2384 AL(4), AL(5), AL(4)
89bddce5
SH
2385};
2386
1da177e4
LT
2387#undef AL
2388
2389/*
89bddce5 2390 * System call vectors.
1da177e4
LT
2391 *
2392 * Argument checking cleaned up. Saved 20% in size.
2393 * This function doesn't need to set the kernel lock because
89bddce5 2394 * it is set by the callees.
1da177e4
LT
2395 */
2396
3e0fa65f 2397SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2398{
2950fa9d 2399 unsigned long a[AUDITSC_ARGS];
89bddce5 2400 unsigned long a0, a1;
1da177e4 2401 int err;
47379052 2402 unsigned int len;
1da177e4 2403
228e548e 2404 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2405 return -EINVAL;
2406
47379052
AV
2407 len = nargs[call];
2408 if (len > sizeof(a))
2409 return -EINVAL;
2410
1da177e4 2411 /* copy_from_user should be SMP safe. */
47379052 2412 if (copy_from_user(a, args, len))
1da177e4 2413 return -EFAULT;
3ec3b2fb 2414
2950fa9d
CG
2415 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2416 if (err)
2417 return err;
3ec3b2fb 2418
89bddce5
SH
2419 a0 = a[0];
2420 a1 = a[1];
2421
2422 switch (call) {
2423 case SYS_SOCKET:
2424 err = sys_socket(a0, a1, a[2]);
2425 break;
2426 case SYS_BIND:
2427 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2428 break;
2429 case SYS_CONNECT:
2430 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2431 break;
2432 case SYS_LISTEN:
2433 err = sys_listen(a0, a1);
2434 break;
2435 case SYS_ACCEPT:
de11defe
UD
2436 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2437 (int __user *)a[2], 0);
89bddce5
SH
2438 break;
2439 case SYS_GETSOCKNAME:
2440 err =
2441 sys_getsockname(a0, (struct sockaddr __user *)a1,
2442 (int __user *)a[2]);
2443 break;
2444 case SYS_GETPEERNAME:
2445 err =
2446 sys_getpeername(a0, (struct sockaddr __user *)a1,
2447 (int __user *)a[2]);
2448 break;
2449 case SYS_SOCKETPAIR:
2450 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2451 break;
2452 case SYS_SEND:
2453 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2454 break;
2455 case SYS_SENDTO:
2456 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2457 (struct sockaddr __user *)a[4], a[5]);
2458 break;
2459 case SYS_RECV:
2460 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2461 break;
2462 case SYS_RECVFROM:
2463 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2464 (struct sockaddr __user *)a[4],
2465 (int __user *)a[5]);
2466 break;
2467 case SYS_SHUTDOWN:
2468 err = sys_shutdown(a0, a1);
2469 break;
2470 case SYS_SETSOCKOPT:
2471 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2472 break;
2473 case SYS_GETSOCKOPT:
2474 err =
2475 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2476 (int __user *)a[4]);
2477 break;
2478 case SYS_SENDMSG:
666547ff 2479 err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2480 break;
228e548e
AB
2481 case SYS_SENDMMSG:
2482 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2483 break;
89bddce5 2484 case SYS_RECVMSG:
666547ff 2485 err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2486 break;
a2e27255
ACM
2487 case SYS_RECVMMSG:
2488 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2489 (struct timespec __user *)a[4]);
2490 break;
de11defe
UD
2491 case SYS_ACCEPT4:
2492 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2493 (int __user *)a[2], a[3]);
aaca0bdc 2494 break;
89bddce5
SH
2495 default:
2496 err = -EINVAL;
2497 break;
1da177e4
LT
2498 }
2499 return err;
2500}
2501
89bddce5 2502#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2503
55737fda
SH
2504/**
2505 * sock_register - add a socket protocol handler
2506 * @ops: description of protocol
2507 *
1da177e4
LT
2508 * This function is called by a protocol handler that wants to
2509 * advertise its address family, and have it linked into the
e793c0f7 2510 * socket interface. The value ops->family corresponds to the
55737fda 2511 * socket system call protocol family.
1da177e4 2512 */
f0fd27d4 2513int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2514{
2515 int err;
2516
2517 if (ops->family >= NPROTO) {
3410f22e 2518 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2519 return -ENOBUFS;
2520 }
55737fda
SH
2521
2522 spin_lock(&net_family_lock);
190683a9
ED
2523 if (rcu_dereference_protected(net_families[ops->family],
2524 lockdep_is_held(&net_family_lock)))
55737fda
SH
2525 err = -EEXIST;
2526 else {
cf778b00 2527 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2528 err = 0;
2529 }
55737fda
SH
2530 spin_unlock(&net_family_lock);
2531
3410f22e 2532 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2533 return err;
2534}
c6d409cf 2535EXPORT_SYMBOL(sock_register);
1da177e4 2536
55737fda
SH
2537/**
2538 * sock_unregister - remove a protocol handler
2539 * @family: protocol family to remove
2540 *
1da177e4
LT
2541 * This function is called by a protocol handler that wants to
2542 * remove its address family, and have it unlinked from the
55737fda
SH
2543 * new socket creation.
2544 *
2545 * If protocol handler is a module, then it can use module reference
2546 * counts to protect against new references. If protocol handler is not
2547 * a module then it needs to provide its own protection in
2548 * the ops->create routine.
1da177e4 2549 */
f0fd27d4 2550void sock_unregister(int family)
1da177e4 2551{
f0fd27d4 2552 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2553
55737fda 2554 spin_lock(&net_family_lock);
a9b3cd7f 2555 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2556 spin_unlock(&net_family_lock);
2557
2558 synchronize_rcu();
2559
3410f22e 2560 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2561}
c6d409cf 2562EXPORT_SYMBOL(sock_unregister);
1da177e4 2563
77d76ea3 2564static int __init sock_init(void)
1da177e4 2565{
b3e19d92 2566 int err;
2ca794e5
EB
2567 /*
2568 * Initialize the network sysctl infrastructure.
2569 */
2570 err = net_sysctl_init();
2571 if (err)
2572 goto out;
b3e19d92 2573
1da177e4 2574 /*
89bddce5 2575 * Initialize skbuff SLAB cache
1da177e4
LT
2576 */
2577 skb_init();
1da177e4
LT
2578
2579 /*
89bddce5 2580 * Initialize the protocols module.
1da177e4
LT
2581 */
2582
2583 init_inodecache();
b3e19d92
NP
2584
2585 err = register_filesystem(&sock_fs_type);
2586 if (err)
2587 goto out_fs;
1da177e4 2588 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2589 if (IS_ERR(sock_mnt)) {
2590 err = PTR_ERR(sock_mnt);
2591 goto out_mount;
2592 }
77d76ea3
AK
2593
2594 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2595 */
2596
2597#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2598 err = netfilter_init();
2599 if (err)
2600 goto out;
1da177e4 2601#endif
cbeb321a 2602
408eccce 2603 ptp_classifier_init();
c1f19b51 2604
b3e19d92
NP
2605out:
2606 return err;
2607
2608out_mount:
2609 unregister_filesystem(&sock_fs_type);
2610out_fs:
2611 goto out;
1da177e4
LT
2612}
2613
77d76ea3
AK
2614core_initcall(sock_init); /* early initcall */
2615
1da177e4
LT
2616#ifdef CONFIG_PROC_FS
2617void socket_seq_show(struct seq_file *seq)
2618{
2619 int cpu;
2620 int counter = 0;
2621
6f912042 2622 for_each_possible_cpu(cpu)
89bddce5 2623 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2624
2625 /* It can be negative, by the way. 8) */
2626 if (counter < 0)
2627 counter = 0;
2628
2629 seq_printf(seq, "sockets: used %d\n", counter);
2630}
89bddce5 2631#endif /* CONFIG_PROC_FS */
1da177e4 2632
89bbfc95 2633#ifdef CONFIG_COMPAT
6b96018b 2634static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2635 unsigned int cmd, void __user *up)
7a229387 2636{
7a229387
AB
2637 mm_segment_t old_fs = get_fs();
2638 struct timeval ktv;
2639 int err;
2640
2641 set_fs(KERNEL_DS);
6b96018b 2642 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2643 set_fs(old_fs);
644595f8 2644 if (!err)
ed6fe9d6 2645 err = compat_put_timeval(&ktv, up);
644595f8 2646
7a229387
AB
2647 return err;
2648}
2649
6b96018b 2650static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2651 unsigned int cmd, void __user *up)
7a229387 2652{
7a229387
AB
2653 mm_segment_t old_fs = get_fs();
2654 struct timespec kts;
2655 int err;
2656
2657 set_fs(KERNEL_DS);
6b96018b 2658 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2659 set_fs(old_fs);
644595f8 2660 if (!err)
ed6fe9d6 2661 err = compat_put_timespec(&kts, up);
644595f8 2662
7a229387
AB
2663 return err;
2664}
2665
6b96018b 2666static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2667{
2668 struct ifreq __user *uifr;
2669 int err;
2670
2671 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2672 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2673 return -EFAULT;
2674
6b96018b 2675 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2676 if (err)
2677 return err;
2678
6b96018b 2679 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2680 return -EFAULT;
2681
2682 return 0;
2683}
2684
6b96018b 2685static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2686{
6b96018b 2687 struct compat_ifconf ifc32;
7a229387
AB
2688 struct ifconf ifc;
2689 struct ifconf __user *uifc;
6b96018b 2690 struct compat_ifreq __user *ifr32;
7a229387
AB
2691 struct ifreq __user *ifr;
2692 unsigned int i, j;
2693 int err;
2694
6b96018b 2695 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2696 return -EFAULT;
2697
43da5f2e 2698 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2699 if (ifc32.ifcbuf == 0) {
2700 ifc32.ifc_len = 0;
2701 ifc.ifc_len = 0;
2702 ifc.ifc_req = NULL;
2703 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2704 } else {
c6d409cf
ED
2705 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2706 sizeof(struct ifreq);
7a229387
AB
2707 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2708 ifc.ifc_len = len;
2709 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2710 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2711 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2712 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2713 return -EFAULT;
2714 ifr++;
2715 ifr32++;
2716 }
2717 }
2718 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2719 return -EFAULT;
2720
6b96018b 2721 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2722 if (err)
2723 return err;
2724
2725 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2726 return -EFAULT;
2727
2728 ifr = ifc.ifc_req;
2729 ifr32 = compat_ptr(ifc32.ifcbuf);
2730 for (i = 0, j = 0;
c6d409cf
ED
2731 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2732 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2733 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2734 return -EFAULT;
2735 ifr32++;
2736 ifr++;
2737 }
2738
2739 if (ifc32.ifcbuf == 0) {
2740 /* Translate from 64-bit structure multiple to
2741 * a 32-bit one.
2742 */
2743 i = ifc.ifc_len;
6b96018b 2744 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2745 ifc32.ifc_len = i;
2746 } else {
2747 ifc32.ifc_len = i;
2748 }
6b96018b 2749 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2750 return -EFAULT;
2751
2752 return 0;
2753}
2754
6b96018b 2755static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2756{
3a7da39d
BH
2757 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2758 bool convert_in = false, convert_out = false;
2759 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2760 struct ethtool_rxnfc __user *rxnfc;
7a229387 2761 struct ifreq __user *ifr;
3a7da39d
BH
2762 u32 rule_cnt = 0, actual_rule_cnt;
2763 u32 ethcmd;
7a229387 2764 u32 data;
3a7da39d 2765 int ret;
7a229387 2766
3a7da39d
BH
2767 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2768 return -EFAULT;
7a229387 2769
3a7da39d
BH
2770 compat_rxnfc = compat_ptr(data);
2771
2772 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2773 return -EFAULT;
2774
3a7da39d
BH
2775 /* Most ethtool structures are defined without padding.
2776 * Unfortunately struct ethtool_rxnfc is an exception.
2777 */
2778 switch (ethcmd) {
2779 default:
2780 break;
2781 case ETHTOOL_GRXCLSRLALL:
2782 /* Buffer size is variable */
2783 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2784 return -EFAULT;
2785 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2786 return -ENOMEM;
2787 buf_size += rule_cnt * sizeof(u32);
2788 /* fall through */
2789 case ETHTOOL_GRXRINGS:
2790 case ETHTOOL_GRXCLSRLCNT:
2791 case ETHTOOL_GRXCLSRULE:
55664f32 2792 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2793 convert_out = true;
2794 /* fall through */
2795 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2796 buf_size += sizeof(struct ethtool_rxnfc);
2797 convert_in = true;
2798 break;
2799 }
2800
2801 ifr = compat_alloc_user_space(buf_size);
954b1244 2802 rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
3a7da39d
BH
2803
2804 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2805 return -EFAULT;
2806
3a7da39d
BH
2807 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2808 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2809 return -EFAULT;
2810
3a7da39d 2811 if (convert_in) {
127fe533 2812 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2813 * fs.ring_cookie and at the end of fs, but nowhere else.
2814 */
127fe533
AD
2815 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2816 sizeof(compat_rxnfc->fs.m_ext) !=
2817 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2818 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2819 BUILD_BUG_ON(
2820 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2821 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2822 offsetof(struct ethtool_rxnfc, fs.location) -
2823 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2824
2825 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2826 (void __user *)(&rxnfc->fs.m_ext + 1) -
2827 (void __user *)rxnfc) ||
3a7da39d
BH
2828 copy_in_user(&rxnfc->fs.ring_cookie,
2829 &compat_rxnfc->fs.ring_cookie,
954b1244
SH
2830 (void __user *)(&rxnfc->fs.location + 1) -
2831 (void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2832 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2833 sizeof(rxnfc->rule_cnt)))
2834 return -EFAULT;
2835 }
2836
2837 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2838 if (ret)
2839 return ret;
2840
2841 if (convert_out) {
2842 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2843 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2844 (const void __user *)rxnfc) ||
3a7da39d
BH
2845 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2846 &rxnfc->fs.ring_cookie,
954b1244
SH
2847 (const void __user *)(&rxnfc->fs.location + 1) -
2848 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2849 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2850 sizeof(rxnfc->rule_cnt)))
2851 return -EFAULT;
2852
2853 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2854 /* As an optimisation, we only copy the actual
2855 * number of rules that the underlying
2856 * function returned. Since Mallory might
2857 * change the rule count in user memory, we
2858 * check that it is less than the rule count
2859 * originally given (as the user buffer size),
2860 * which has been range-checked.
2861 */
2862 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2863 return -EFAULT;
2864 if (actual_rule_cnt < rule_cnt)
2865 rule_cnt = actual_rule_cnt;
2866 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2867 &rxnfc->rule_locs[0],
2868 rule_cnt * sizeof(u32)))
2869 return -EFAULT;
2870 }
2871 }
2872
2873 return 0;
7a229387
AB
2874}
2875
7a50a240
AB
2876static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2877{
2878 void __user *uptr;
2879 compat_uptr_t uptr32;
2880 struct ifreq __user *uifr;
2881
c6d409cf 2882 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2883 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2884 return -EFAULT;
2885
2886 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2887 return -EFAULT;
2888
2889 uptr = compat_ptr(uptr32);
2890
2891 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2892 return -EFAULT;
2893
2894 return dev_ioctl(net, SIOCWANDEV, uifr);
2895}
2896
6b96018b
AB
2897static int bond_ioctl(struct net *net, unsigned int cmd,
2898 struct compat_ifreq __user *ifr32)
7a229387
AB
2899{
2900 struct ifreq kifr;
7a229387
AB
2901 mm_segment_t old_fs;
2902 int err;
7a229387
AB
2903
2904 switch (cmd) {
2905 case SIOCBONDENSLAVE:
2906 case SIOCBONDRELEASE:
2907 case SIOCBONDSETHWADDR:
2908 case SIOCBONDCHANGEACTIVE:
6b96018b 2909 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2910 return -EFAULT;
2911
2912 old_fs = get_fs();
c6d409cf 2913 set_fs(KERNEL_DS);
c3f52ae6 2914 err = dev_ioctl(net, cmd,
2915 (struct ifreq __user __force *) &kifr);
c6d409cf 2916 set_fs(old_fs);
7a229387
AB
2917
2918 return err;
7a229387 2919 default:
07d106d0 2920 return -ENOIOCTLCMD;
ccbd6a5a 2921 }
7a229387
AB
2922}
2923
590d4693
BH
2924/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
2925static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 2926 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2927{
2928 struct ifreq __user *u_ifreq64;
7a229387
AB
2929 char tmp_buf[IFNAMSIZ];
2930 void __user *data64;
2931 u32 data32;
2932
2933 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2934 IFNAMSIZ))
2935 return -EFAULT;
417c3522 2936 if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
7a229387
AB
2937 return -EFAULT;
2938 data64 = compat_ptr(data32);
2939
2940 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2941
7a229387
AB
2942 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2943 IFNAMSIZ))
2944 return -EFAULT;
417c3522 2945 if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
7a229387
AB
2946 return -EFAULT;
2947
6b96018b 2948 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2949}
2950
6b96018b
AB
2951static int dev_ifsioc(struct net *net, struct socket *sock,
2952 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 2953{
a2116ed2 2954 struct ifreq __user *uifr;
7a229387
AB
2955 int err;
2956
a2116ed2
AB
2957 uifr = compat_alloc_user_space(sizeof(*uifr));
2958 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2959 return -EFAULT;
2960
2961 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2962
7a229387
AB
2963 if (!err) {
2964 switch (cmd) {
2965 case SIOCGIFFLAGS:
2966 case SIOCGIFMETRIC:
2967 case SIOCGIFMTU:
2968 case SIOCGIFMEM:
2969 case SIOCGIFHWADDR:
2970 case SIOCGIFINDEX:
2971 case SIOCGIFADDR:
2972 case SIOCGIFBRDADDR:
2973 case SIOCGIFDSTADDR:
2974 case SIOCGIFNETMASK:
fab2532b 2975 case SIOCGIFPFLAGS:
7a229387 2976 case SIOCGIFTXQLEN:
fab2532b
AB
2977 case SIOCGMIIPHY:
2978 case SIOCGMIIREG:
a2116ed2 2979 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
2980 err = -EFAULT;
2981 break;
2982 }
2983 }
2984 return err;
2985}
2986
a2116ed2
AB
2987static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2988 struct compat_ifreq __user *uifr32)
2989{
2990 struct ifreq ifr;
2991 struct compat_ifmap __user *uifmap32;
2992 mm_segment_t old_fs;
2993 int err;
2994
2995 uifmap32 = &uifr32->ifr_ifru.ifru_map;
2996 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
2997 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2998 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2999 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3000 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3001 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3002 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3003 if (err)
3004 return -EFAULT;
3005
3006 old_fs = get_fs();
c6d409cf 3007 set_fs(KERNEL_DS);
c3f52ae6 3008 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 3009 set_fs(old_fs);
a2116ed2
AB
3010
3011 if (cmd == SIOCGIFMAP && !err) {
3012 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3013 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3014 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3015 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3016 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3017 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3018 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3019 if (err)
3020 err = -EFAULT;
3021 }
3022 return err;
3023}
3024
7a229387 3025struct rtentry32 {
c6d409cf 3026 u32 rt_pad1;
7a229387
AB
3027 struct sockaddr rt_dst; /* target address */
3028 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3029 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3030 unsigned short rt_flags;
3031 short rt_pad2;
3032 u32 rt_pad3;
3033 unsigned char rt_tos;
3034 unsigned char rt_class;
3035 short rt_pad4;
3036 short rt_metric; /* +1 for binary compatibility! */
7a229387 3037 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3038 u32 rt_mtu; /* per route MTU/Window */
3039 u32 rt_window; /* Window clamping */
7a229387
AB
3040 unsigned short rt_irtt; /* Initial RTT */
3041};
3042
3043struct in6_rtmsg32 {
3044 struct in6_addr rtmsg_dst;
3045 struct in6_addr rtmsg_src;
3046 struct in6_addr rtmsg_gateway;
3047 u32 rtmsg_type;
3048 u16 rtmsg_dst_len;
3049 u16 rtmsg_src_len;
3050 u32 rtmsg_metric;
3051 u32 rtmsg_info;
3052 u32 rtmsg_flags;
3053 s32 rtmsg_ifindex;
3054};
3055
6b96018b
AB
3056static int routing_ioctl(struct net *net, struct socket *sock,
3057 unsigned int cmd, void __user *argp)
7a229387
AB
3058{
3059 int ret;
3060 void *r = NULL;
3061 struct in6_rtmsg r6;
3062 struct rtentry r4;
3063 char devname[16];
3064 u32 rtdev;
3065 mm_segment_t old_fs = get_fs();
3066
6b96018b
AB
3067 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3068 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3069 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3070 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3071 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3072 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3073 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3074 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3075 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3076 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3077 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3078
3079 r = (void *) &r6;
3080 } else { /* ipv4 */
6b96018b 3081 struct rtentry32 __user *ur4 = argp;
c6d409cf 3082 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3083 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3084 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3085 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3086 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3087 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3088 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3089 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3090 if (rtdev) {
c6d409cf 3091 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3092 r4.rt_dev = (char __user __force *)devname;
3093 devname[15] = 0;
7a229387
AB
3094 } else
3095 r4.rt_dev = NULL;
3096
3097 r = (void *) &r4;
3098 }
3099
3100 if (ret) {
3101 ret = -EFAULT;
3102 goto out;
3103 }
3104
c6d409cf 3105 set_fs(KERNEL_DS);
6b96018b 3106 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3107 set_fs(old_fs);
7a229387
AB
3108
3109out:
7a229387
AB
3110 return ret;
3111}
3112
3113/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3114 * for some operations; this forces use of the newer bridge-utils that
25985edc 3115 * use compatible ioctls
7a229387 3116 */
6b96018b 3117static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3118{
6b96018b 3119 compat_ulong_t tmp;
7a229387 3120
6b96018b 3121 if (get_user(tmp, argp))
7a229387
AB
3122 return -EFAULT;
3123 if (tmp == BRCTL_GET_VERSION)
3124 return BRCTL_VERSION + 1;
3125 return -EINVAL;
3126}
3127
6b96018b
AB
3128static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3129 unsigned int cmd, unsigned long arg)
3130{
3131 void __user *argp = compat_ptr(arg);
3132 struct sock *sk = sock->sk;
3133 struct net *net = sock_net(sk);
7a229387 3134
6b96018b 3135 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3136 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3137
3138 switch (cmd) {
3139 case SIOCSIFBR:
3140 case SIOCGIFBR:
3141 return old_bridge_ioctl(argp);
3142 case SIOCGIFNAME:
3143 return dev_ifname32(net, argp);
3144 case SIOCGIFCONF:
3145 return dev_ifconf(net, argp);
3146 case SIOCETHTOOL:
3147 return ethtool_ioctl(net, argp);
7a50a240
AB
3148 case SIOCWANDEV:
3149 return compat_siocwandev(net, argp);
a2116ed2
AB
3150 case SIOCGIFMAP:
3151 case SIOCSIFMAP:
3152 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3153 case SIOCBONDENSLAVE:
3154 case SIOCBONDRELEASE:
3155 case SIOCBONDSETHWADDR:
6b96018b
AB
3156 case SIOCBONDCHANGEACTIVE:
3157 return bond_ioctl(net, cmd, argp);
3158 case SIOCADDRT:
3159 case SIOCDELRT:
3160 return routing_ioctl(net, sock, cmd, argp);
3161 case SIOCGSTAMP:
3162 return do_siocgstamp(net, sock, cmd, argp);
3163 case SIOCGSTAMPNS:
3164 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3165 case SIOCBONDSLAVEINFOQUERY:
3166 case SIOCBONDINFOQUERY:
a2116ed2 3167 case SIOCSHWTSTAMP:
fd468c74 3168 case SIOCGHWTSTAMP:
590d4693 3169 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3170
3171 case FIOSETOWN:
3172 case SIOCSPGRP:
3173 case FIOGETOWN:
3174 case SIOCGPGRP:
3175 case SIOCBRADDBR:
3176 case SIOCBRDELBR:
3177 case SIOCGIFVLAN:
3178 case SIOCSIFVLAN:
3179 case SIOCADDDLCI:
3180 case SIOCDELDLCI:
3181 return sock_ioctl(file, cmd, arg);
3182
3183 case SIOCGIFFLAGS:
3184 case SIOCSIFFLAGS:
3185 case SIOCGIFMETRIC:
3186 case SIOCSIFMETRIC:
3187 case SIOCGIFMTU:
3188 case SIOCSIFMTU:
3189 case SIOCGIFMEM:
3190 case SIOCSIFMEM:
3191 case SIOCGIFHWADDR:
3192 case SIOCSIFHWADDR:
3193 case SIOCADDMULTI:
3194 case SIOCDELMULTI:
3195 case SIOCGIFINDEX:
6b96018b
AB
3196 case SIOCGIFADDR:
3197 case SIOCSIFADDR:
3198 case SIOCSIFHWBROADCAST:
6b96018b 3199 case SIOCDIFADDR:
6b96018b
AB
3200 case SIOCGIFBRDADDR:
3201 case SIOCSIFBRDADDR:
3202 case SIOCGIFDSTADDR:
3203 case SIOCSIFDSTADDR:
3204 case SIOCGIFNETMASK:
3205 case SIOCSIFNETMASK:
3206 case SIOCSIFPFLAGS:
3207 case SIOCGIFPFLAGS:
3208 case SIOCGIFTXQLEN:
3209 case SIOCSIFTXQLEN:
3210 case SIOCBRADDIF:
3211 case SIOCBRDELIF:
9177efd3
AB
3212 case SIOCSIFNAME:
3213 case SIOCGMIIPHY:
3214 case SIOCGMIIREG:
3215 case SIOCSMIIREG:
6b96018b 3216 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3217
6b96018b
AB
3218 case SIOCSARP:
3219 case SIOCGARP:
3220 case SIOCDARP:
6b96018b 3221 case SIOCATMARK:
9177efd3
AB
3222 return sock_do_ioctl(net, sock, cmd, arg);
3223 }
3224
6b96018b
AB
3225 return -ENOIOCTLCMD;
3226}
7a229387 3227
95c96174 3228static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3229 unsigned long arg)
89bbfc95
SP
3230{
3231 struct socket *sock = file->private_data;
3232 int ret = -ENOIOCTLCMD;
87de87d5
DM
3233 struct sock *sk;
3234 struct net *net;
3235
3236 sk = sock->sk;
3237 net = sock_net(sk);
89bbfc95
SP
3238
3239 if (sock->ops->compat_ioctl)
3240 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3241
87de87d5
DM
3242 if (ret == -ENOIOCTLCMD &&
3243 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3244 ret = compat_wext_handle_ioctl(net, cmd, arg);
3245
6b96018b
AB
3246 if (ret == -ENOIOCTLCMD)
3247 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3248
89bbfc95
SP
3249 return ret;
3250}
3251#endif
3252
ac5a488e
SS
3253int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3254{
3255 return sock->ops->bind(sock, addr, addrlen);
3256}
c6d409cf 3257EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3258
3259int kernel_listen(struct socket *sock, int backlog)
3260{
3261 return sock->ops->listen(sock, backlog);
3262}
c6d409cf 3263EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3264
3265int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3266{
3267 struct sock *sk = sock->sk;
3268 int err;
3269
3270 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3271 newsock);
3272 if (err < 0)
3273 goto done;
3274
3275 err = sock->ops->accept(sock, *newsock, flags);
3276 if (err < 0) {
3277 sock_release(*newsock);
fa8705b0 3278 *newsock = NULL;
ac5a488e
SS
3279 goto done;
3280 }
3281
3282 (*newsock)->ops = sock->ops;
1b08534e 3283 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3284
3285done:
3286 return err;
3287}
c6d409cf 3288EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3289
3290int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3291 int flags)
ac5a488e
SS
3292{
3293 return sock->ops->connect(sock, addr, addrlen, flags);
3294}
c6d409cf 3295EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3296
3297int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3298 int *addrlen)
3299{
3300 return sock->ops->getname(sock, addr, addrlen, 0);
3301}
c6d409cf 3302EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3303
3304int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3305 int *addrlen)
3306{
3307 return sock->ops->getname(sock, addr, addrlen, 1);
3308}
c6d409cf 3309EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3310
3311int kernel_getsockopt(struct socket *sock, int level, int optname,
3312 char *optval, int *optlen)
3313{
3314 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3315 char __user *uoptval;
3316 int __user *uoptlen;
ac5a488e
SS
3317 int err;
3318
fb8621bb
NK
3319 uoptval = (char __user __force *) optval;
3320 uoptlen = (int __user __force *) optlen;
3321
ac5a488e
SS
3322 set_fs(KERNEL_DS);
3323 if (level == SOL_SOCKET)
fb8621bb 3324 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3325 else
fb8621bb
NK
3326 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3327 uoptlen);
ac5a488e
SS
3328 set_fs(oldfs);
3329 return err;
3330}
c6d409cf 3331EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3332
3333int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3334 char *optval, unsigned int optlen)
ac5a488e
SS
3335{
3336 mm_segment_t oldfs = get_fs();
fb8621bb 3337 char __user *uoptval;
ac5a488e
SS
3338 int err;
3339
fb8621bb
NK
3340 uoptval = (char __user __force *) optval;
3341
ac5a488e
SS
3342 set_fs(KERNEL_DS);
3343 if (level == SOL_SOCKET)
fb8621bb 3344 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3345 else
fb8621bb 3346 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3347 optlen);
3348 set_fs(oldfs);
3349 return err;
3350}
c6d409cf 3351EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3352
3353int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3354 size_t size, int flags)
3355{
3356 if (sock->ops->sendpage)
3357 return sock->ops->sendpage(sock, page, offset, size, flags);
3358
3359 return sock_no_sendpage(sock, page, offset, size, flags);
3360}
c6d409cf 3361EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3362
3363int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3364{
3365 mm_segment_t oldfs = get_fs();
3366 int err;
3367
3368 set_fs(KERNEL_DS);
3369 err = sock->ops->ioctl(sock, cmd, arg);
3370 set_fs(oldfs);
3371
3372 return err;
3373}
c6d409cf 3374EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3375
91cf45f0
TM
3376int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3377{
3378 return sock->ops->shutdown(sock, how);
3379}
91cf45f0 3380EXPORT_SYMBOL(kernel_sock_shutdown);