mtd: fsl-quadspi: account for const type of of_device_id.data
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
1da177e4 92
7c0f6ba6 93#include <linux/uaccess.h>
1da177e4
LT
94#include <asm/unistd.h>
95
96#include <net/compat.h>
87de87d5 97#include <net/wext.h>
f8451725 98#include <net/cls_cgroup.h>
1da177e4
LT
99
100#include <net/sock.h>
101#include <linux/netfilter.h>
102
6b96018b
AB
103#include <linux/if_tun.h>
104#include <linux/ipv6_route.h>
105#include <linux/route.h>
6b96018b
AB
106#include <linux/sockios.h>
107#include <linux/atalk.h>
076bb0c8 108#include <net/busy_poll.h>
f24b9be5 109#include <linux/errqueue.h>
06021292 110
e0d1095a 111#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
112unsigned int sysctl_net_busy_read __read_mostly;
113unsigned int sysctl_net_busy_poll __read_mostly;
06021292 114#endif
6b96018b 115
8ae5e030
AV
116static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
117static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 118static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
119
120static int sock_close(struct inode *inode, struct file *file);
121static unsigned int sock_poll(struct file *file,
122 struct poll_table_struct *wait);
89bddce5 123static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
124#ifdef CONFIG_COMPAT
125static long compat_sock_ioctl(struct file *file,
89bddce5 126 unsigned int cmd, unsigned long arg);
89bbfc95 127#endif
1da177e4 128static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
129static ssize_t sock_sendpage(struct file *file, struct page *page,
130 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 132 struct pipe_inode_info *pipe, size_t len,
9c55e01c 133 unsigned int flags);
1da177e4 134
1da177e4
LT
135/*
136 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
137 * in the operation structures but are done directly via the socketcall() multiplexor.
138 */
139
da7071d7 140static const struct file_operations socket_file_ops = {
1da177e4
LT
141 .owner = THIS_MODULE,
142 .llseek = no_llseek,
8ae5e030
AV
143 .read_iter = sock_read_iter,
144 .write_iter = sock_write_iter,
1da177e4
LT
145 .poll = sock_poll,
146 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
147#ifdef CONFIG_COMPAT
148 .compat_ioctl = compat_sock_ioctl,
149#endif
1da177e4 150 .mmap = sock_mmap,
1da177e4
LT
151 .release = sock_close,
152 .fasync = sock_fasync,
5274f052
JA
153 .sendpage = sock_sendpage,
154 .splice_write = generic_splice_sendpage,
9c55e01c 155 .splice_read = sock_splice_read,
1da177e4
LT
156};
157
158/*
159 * The protocol list. Each protocol is registered in here.
160 */
161
1da177e4 162static DEFINE_SPINLOCK(net_family_lock);
190683a9 163static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 164
1da177e4
LT
165/*
166 * Statistics counters of the socket lists
167 */
168
c6d409cf 169static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
170
171/*
89bddce5
SH
172 * Support routines.
173 * Move socket addresses back and forth across the kernel/user
174 * divide and look after the messy bits.
1da177e4
LT
175 */
176
1da177e4
LT
177/**
178 * move_addr_to_kernel - copy a socket address into kernel space
179 * @uaddr: Address in user space
180 * @kaddr: Address in kernel space
181 * @ulen: Length in user space
182 *
183 * The address is copied into kernel space. If the provided address is
184 * too long an error code of -EINVAL is returned. If the copy gives
185 * invalid addresses -EFAULT is returned. On a success 0 is returned.
186 */
187
43db362d 188int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 189{
230b1839 190 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 191 return -EINVAL;
89bddce5 192 if (ulen == 0)
1da177e4 193 return 0;
89bddce5 194 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 195 return -EFAULT;
3ec3b2fb 196 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
197}
198
199/**
200 * move_addr_to_user - copy an address to user space
201 * @kaddr: kernel space address
202 * @klen: length of address in kernel
203 * @uaddr: user space address
204 * @ulen: pointer to user length field
205 *
206 * The value pointed to by ulen on entry is the buffer length available.
207 * This is overwritten with the buffer space used. -EINVAL is returned
208 * if an overlong buffer is specified or a negative buffer size. -EFAULT
209 * is returned if either the buffer or the length field are not
210 * accessible.
211 * After copying the data up to the limit the user specifies, the true
212 * length of the data is written over the length limit the user
213 * specified. Zero is returned for a success.
214 */
89bddce5 215
43db362d 216static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 217 void __user *uaddr, int __user *ulen)
1da177e4
LT
218{
219 int err;
220 int len;
221
68c6beb3 222 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
223 err = get_user(len, ulen);
224 if (err)
1da177e4 225 return err;
89bddce5
SH
226 if (len > klen)
227 len = klen;
68c6beb3 228 if (len < 0)
1da177e4 229 return -EINVAL;
89bddce5 230 if (len) {
d6fe3945
SG
231 if (audit_sockaddr(klen, kaddr))
232 return -ENOMEM;
89bddce5 233 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
234 return -EFAULT;
235 }
236 /*
89bddce5
SH
237 * "fromlen shall refer to the value before truncation.."
238 * 1003.1g
1da177e4
LT
239 */
240 return __put_user(klen, ulen);
241}
242
e18b890b 243static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
244
245static struct inode *sock_alloc_inode(struct super_block *sb)
246{
247 struct socket_alloc *ei;
eaefd110 248 struct socket_wq *wq;
89bddce5 249
e94b1766 250 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
251 if (!ei)
252 return NULL;
eaefd110
ED
253 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
254 if (!wq) {
43815482
ED
255 kmem_cache_free(sock_inode_cachep, ei);
256 return NULL;
257 }
eaefd110
ED
258 init_waitqueue_head(&wq->wait);
259 wq->fasync_list = NULL;
574aab1e 260 wq->flags = 0;
eaefd110 261 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 262
1da177e4
LT
263 ei->socket.state = SS_UNCONNECTED;
264 ei->socket.flags = 0;
265 ei->socket.ops = NULL;
266 ei->socket.sk = NULL;
267 ei->socket.file = NULL;
1da177e4
LT
268
269 return &ei->vfs_inode;
270}
271
272static void sock_destroy_inode(struct inode *inode)
273{
43815482 274 struct socket_alloc *ei;
eaefd110 275 struct socket_wq *wq;
43815482
ED
276
277 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 278 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 279 kfree_rcu(wq, rcu);
43815482 280 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
281}
282
51cc5068 283static void init_once(void *foo)
1da177e4 284{
89bddce5 285 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 286
a35afb83 287 inode_init_once(&ei->vfs_inode);
1da177e4 288}
89bddce5 289
1e911632 290static void init_inodecache(void)
1da177e4
LT
291{
292 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
293 sizeof(struct socket_alloc),
294 0,
295 (SLAB_HWCACHE_ALIGN |
296 SLAB_RECLAIM_ACCOUNT |
5d097056 297 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 298 init_once);
1e911632 299 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
300}
301
b87221de 302static const struct super_operations sockfs_ops = {
c6d409cf
ED
303 .alloc_inode = sock_alloc_inode,
304 .destroy_inode = sock_destroy_inode,
305 .statfs = simple_statfs,
1da177e4
LT
306};
307
c23fbb6b
ED
308/*
309 * sockfs_dname() is called from d_path().
310 */
311static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
312{
313 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 314 d_inode(dentry)->i_ino);
c23fbb6b
ED
315}
316
3ba13d17 317static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 318 .d_dname = sockfs_dname,
1da177e4
LT
319};
320
bba0bd31
AG
321static int sockfs_xattr_get(const struct xattr_handler *handler,
322 struct dentry *dentry, struct inode *inode,
323 const char *suffix, void *value, size_t size)
324{
325 if (value) {
326 if (dentry->d_name.len + 1 > size)
327 return -ERANGE;
328 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
329 }
330 return dentry->d_name.len + 1;
331}
332
333#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
334#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
335#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
336
337static const struct xattr_handler sockfs_xattr_handler = {
338 .name = XATTR_NAME_SOCKPROTONAME,
339 .get = sockfs_xattr_get,
340};
341
4a590153
AG
342static int sockfs_security_xattr_set(const struct xattr_handler *handler,
343 struct dentry *dentry, struct inode *inode,
344 const char *suffix, const void *value,
345 size_t size, int flags)
346{
347 /* Handled by LSM. */
348 return -EAGAIN;
349}
350
351static const struct xattr_handler sockfs_security_xattr_handler = {
352 .prefix = XATTR_SECURITY_PREFIX,
353 .set = sockfs_security_xattr_set,
354};
355
bba0bd31
AG
356static const struct xattr_handler *sockfs_xattr_handlers[] = {
357 &sockfs_xattr_handler,
4a590153 358 &sockfs_security_xattr_handler,
bba0bd31
AG
359 NULL
360};
361
c74a1cbb
AV
362static struct dentry *sockfs_mount(struct file_system_type *fs_type,
363 int flags, const char *dev_name, void *data)
364{
bba0bd31
AG
365 return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
366 sockfs_xattr_handlers,
367 &sockfs_dentry_operations, SOCKFS_MAGIC);
c74a1cbb
AV
368}
369
370static struct vfsmount *sock_mnt __read_mostly;
371
372static struct file_system_type sock_fs_type = {
373 .name = "sockfs",
374 .mount = sockfs_mount,
375 .kill_sb = kill_anon_super,
376};
377
1da177e4
LT
378/*
379 * Obtains the first available file descriptor and sets it up for use.
380 *
39d8c1b6
DM
381 * These functions create file structures and maps them to fd space
382 * of the current process. On success it returns file descriptor
1da177e4
LT
383 * and file struct implicitly stored in sock->file.
384 * Note that another thread may close file descriptor before we return
385 * from this function. We use the fact that now we do not refer
386 * to socket after mapping. If one day we will need it, this
387 * function will increment ref. count on file by 1.
388 *
389 * In any case returned fd MAY BE not valid!
390 * This race condition is unavoidable
391 * with shared fd spaces, we cannot solve it inside kernel,
392 * but we take care of internal coherence yet.
393 */
394
aab174f0 395struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 396{
7cbe66b6 397 struct qstr name = { .name = "" };
2c48b9c4 398 struct path path;
7cbe66b6 399 struct file *file;
1da177e4 400
600e1779
MY
401 if (dname) {
402 name.name = dname;
403 name.len = strlen(name.name);
404 } else if (sock->sk) {
405 name.name = sock->sk->sk_prot_creator->name;
406 name.len = strlen(name.name);
407 }
4b936885 408 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
28407630
AV
409 if (unlikely(!path.dentry))
410 return ERR_PTR(-ENOMEM);
2c48b9c4 411 path.mnt = mntget(sock_mnt);
39d8c1b6 412
2c48b9c4 413 d_instantiate(path.dentry, SOCK_INODE(sock));
39d8c1b6 414
2c48b9c4 415 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 416 &socket_file_ops);
b5ffe634 417 if (IS_ERR(file)) {
cc3808f8 418 /* drop dentry, keep inode */
c5ef6035 419 ihold(d_inode(path.dentry));
2c48b9c4 420 path_put(&path);
39b65252 421 return file;
cc3808f8
AV
422 }
423
424 sock->file = file;
77d27200 425 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 426 file->private_data = sock;
28407630 427 return file;
39d8c1b6 428}
56b31d1c 429EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 430
56b31d1c 431static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
432{
433 struct file *newfile;
28407630
AV
434 int fd = get_unused_fd_flags(flags);
435 if (unlikely(fd < 0))
436 return fd;
39d8c1b6 437
aab174f0 438 newfile = sock_alloc_file(sock, flags, NULL);
28407630 439 if (likely(!IS_ERR(newfile))) {
39d8c1b6 440 fd_install(fd, newfile);
28407630
AV
441 return fd;
442 }
7cbe66b6 443
28407630
AV
444 put_unused_fd(fd);
445 return PTR_ERR(newfile);
1da177e4
LT
446}
447
406a3c63 448struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 449{
6cb153ca
BL
450 if (file->f_op == &socket_file_ops)
451 return file->private_data; /* set in sock_map_fd */
452
23bb80d2
ED
453 *err = -ENOTSOCK;
454 return NULL;
6cb153ca 455}
406a3c63 456EXPORT_SYMBOL(sock_from_file);
6cb153ca 457
1da177e4 458/**
c6d409cf 459 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
460 * @fd: file handle
461 * @err: pointer to an error code return
462 *
463 * The file handle passed in is locked and the socket it is bound
241c4667 464 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
465 * with a negative errno code and NULL is returned. The function checks
466 * for both invalid handles and passing a handle which is not a socket.
467 *
468 * On a success the socket object pointer is returned.
469 */
470
471struct socket *sockfd_lookup(int fd, int *err)
472{
473 struct file *file;
1da177e4
LT
474 struct socket *sock;
475
89bddce5
SH
476 file = fget(fd);
477 if (!file) {
1da177e4
LT
478 *err = -EBADF;
479 return NULL;
480 }
89bddce5 481
6cb153ca
BL
482 sock = sock_from_file(file, err);
483 if (!sock)
1da177e4 484 fput(file);
6cb153ca
BL
485 return sock;
486}
c6d409cf 487EXPORT_SYMBOL(sockfd_lookup);
1da177e4 488
6cb153ca
BL
489static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
490{
00e188ef 491 struct fd f = fdget(fd);
6cb153ca
BL
492 struct socket *sock;
493
3672558c 494 *err = -EBADF;
00e188ef
AV
495 if (f.file) {
496 sock = sock_from_file(f.file, err);
497 if (likely(sock)) {
498 *fput_needed = f.flags;
6cb153ca 499 return sock;
00e188ef
AV
500 }
501 fdput(f);
1da177e4 502 }
6cb153ca 503 return NULL;
1da177e4
LT
504}
505
600e1779
MY
506static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
507 size_t size)
508{
509 ssize_t len;
510 ssize_t used = 0;
511
c5ef6035 512 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
513 if (len < 0)
514 return len;
515 used += len;
516 if (buffer) {
517 if (size < used)
518 return -ERANGE;
519 buffer += len;
520 }
521
522 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
523 used += len;
524 if (buffer) {
525 if (size < used)
526 return -ERANGE;
527 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
528 buffer += len;
529 }
530
531 return used;
532}
533
dc647ec8 534static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
86741ec2
LC
535{
536 int err = simple_setattr(dentry, iattr);
537
e1a3a60a 538 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
539 struct socket *sock = SOCKET_I(d_inode(dentry));
540
541 sock->sk->sk_uid = iattr->ia_uid;
542 }
543
544 return err;
545}
546
600e1779 547static const struct inode_operations sockfs_inode_ops = {
600e1779 548 .listxattr = sockfs_listxattr,
86741ec2 549 .setattr = sockfs_setattr,
600e1779
MY
550};
551
1da177e4
LT
552/**
553 * sock_alloc - allocate a socket
89bddce5 554 *
1da177e4
LT
555 * Allocate a new inode and socket object. The two are bound together
556 * and initialised. The socket is then returned. If we are out of inodes
557 * NULL is returned.
558 */
559
f4a00aac 560struct socket *sock_alloc(void)
1da177e4 561{
89bddce5
SH
562 struct inode *inode;
563 struct socket *sock;
1da177e4 564
a209dfc7 565 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
566 if (!inode)
567 return NULL;
568
569 sock = SOCKET_I(inode);
570
85fe4025 571 inode->i_ino = get_next_ino();
89bddce5 572 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
573 inode->i_uid = current_fsuid();
574 inode->i_gid = current_fsgid();
600e1779 575 inode->i_op = &sockfs_inode_ops;
1da177e4 576
19e8d69c 577 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
578 return sock;
579}
f4a00aac 580EXPORT_SYMBOL(sock_alloc);
1da177e4 581
1da177e4
LT
582/**
583 * sock_release - close a socket
584 * @sock: socket to close
585 *
586 * The socket is released from the protocol stack if it has a release
587 * callback, and the inode is then released if the socket is bound to
89bddce5 588 * an inode not a file.
1da177e4 589 */
89bddce5 590
1da177e4
LT
591void sock_release(struct socket *sock)
592{
593 if (sock->ops) {
594 struct module *owner = sock->ops->owner;
595
596 sock->ops->release(sock);
597 sock->ops = NULL;
598 module_put(owner);
599 }
600
eaefd110 601 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
3410f22e 602 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 603
19e8d69c 604 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
605 if (!sock->file) {
606 iput(SOCK_INODE(sock));
607 return;
608 }
89bddce5 609 sock->file = NULL;
1da177e4 610}
c6d409cf 611EXPORT_SYMBOL(sock_release);
1da177e4 612
c14ac945 613void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 614{
140c55d4
ED
615 u8 flags = *tx_flags;
616
c14ac945 617 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
618 flags |= SKBTX_HW_TSTAMP;
619
c14ac945 620 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
621 flags |= SKBTX_SW_TSTAMP;
622
c14ac945 623 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
624 flags |= SKBTX_SCHED_TSTAMP;
625
140c55d4 626 *tx_flags = flags;
20d49473 627}
67cc0d40 628EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 629
d8725c86 630static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 631{
01e97e65 632 int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
d8725c86
AV
633 BUG_ON(ret == -EIOCBQUEUED);
634 return ret;
1da177e4
LT
635}
636
d8725c86 637int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 638{
d8725c86 639 int err = security_socket_sendmsg(sock, msg,
01e97e65 640 msg_data_left(msg));
228e548e 641
d8725c86 642 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 643}
c6d409cf 644EXPORT_SYMBOL(sock_sendmsg);
1da177e4
LT
645
646int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
647 struct kvec *vec, size_t num, size_t size)
648{
6aa24814 649 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
d8725c86 650 return sock_sendmsg(sock, msg);
1da177e4 651}
c6d409cf 652EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 653
306b13eb
TH
654int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
655 struct kvec *vec, size_t num, size_t size)
656{
657 struct socket *sock = sk->sk_socket;
658
659 if (!sock->ops->sendmsg_locked)
db5980d8 660 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb
TH
661
662 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
663
664 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
665}
666EXPORT_SYMBOL(kernel_sendmsg_locked);
667
8605330a
SHY
668static bool skb_is_err_queue(const struct sk_buff *skb)
669{
670 /* pkt_type of skbs enqueued on the error queue are set to
671 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
672 * in recvmsg, since skbs received on a local socket will never
673 * have a pkt_type of PACKET_OUTGOING.
674 */
675 return skb->pkt_type == PACKET_OUTGOING;
676}
677
b50a5c70
ML
678/* On transmit, software and hardware timestamps are returned independently.
679 * As the two skb clones share the hardware timestamp, which may be updated
680 * before the software timestamp is received, a hardware TX timestamp may be
681 * returned only if there is no software TX timestamp. Ignore false software
682 * timestamps, which may be made in the __sock_recv_timestamp() call when the
683 * option SO_TIMESTAMP(NS) is enabled on the socket, even when the skb has a
684 * hardware timestamp.
685 */
686static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
687{
688 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
689}
690
aad9c8c4
ML
691static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
692{
693 struct scm_ts_pktinfo ts_pktinfo;
694 struct net_device *orig_dev;
695
696 if (!skb_mac_header_was_set(skb))
697 return;
698
699 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
700
701 rcu_read_lock();
702 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
703 if (orig_dev)
704 ts_pktinfo.if_index = orig_dev->ifindex;
705 rcu_read_unlock();
706
707 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
708 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
709 sizeof(ts_pktinfo), &ts_pktinfo);
710}
711
92f37fd2
ED
712/*
713 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
714 */
715void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
716 struct sk_buff *skb)
717{
20d49473 718 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
f24b9be5 719 struct scm_timestamping tss;
b50a5c70 720 int empty = 1, false_tstamp = 0;
20d49473
PO
721 struct skb_shared_hwtstamps *shhwtstamps =
722 skb_hwtstamps(skb);
723
724 /* Race occurred between timestamp enabling and packet
725 receiving. Fill in the current time for now. */
b50a5c70 726 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 727 __net_timestamp(skb);
b50a5c70
ML
728 false_tstamp = 1;
729 }
20d49473
PO
730
731 if (need_software_tstamp) {
732 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
733 struct timeval tv;
734 skb_get_timestamp(skb, &tv);
735 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
736 sizeof(tv), &tv);
737 } else {
f24b9be5
WB
738 struct timespec ts;
739 skb_get_timestampns(skb, &ts);
20d49473 740 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
f24b9be5 741 sizeof(ts), &ts);
20d49473
PO
742 }
743 }
744
f24b9be5 745 memset(&tss, 0, sizeof(tss));
c199105d 746 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
f24b9be5 747 ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
20d49473 748 empty = 0;
4d276eb6 749 if (shhwtstamps &&
b9f40e21 750 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
b50a5c70 751 !skb_is_swtx_tstamp(skb, false_tstamp) &&
aad9c8c4 752 ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
4d276eb6 753 empty = 0;
aad9c8c4
ML
754 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
755 !skb_is_err_queue(skb))
756 put_ts_pktinfo(msg, skb);
757 }
1c885808 758 if (!empty) {
20d49473 759 put_cmsg(msg, SOL_SOCKET,
f24b9be5 760 SCM_TIMESTAMPING, sizeof(tss), &tss);
1c885808 761
8605330a 762 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 763 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
764 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
765 skb->len, skb->data);
766 }
92f37fd2 767}
7c81fd8b
ACM
768EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
769
6e3e939f
JB
770void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
771 struct sk_buff *skb)
772{
773 int ack;
774
775 if (!sock_flag(sk, SOCK_WIFI_STATUS))
776 return;
777 if (!skb->wifi_acked_valid)
778 return;
779
780 ack = skb->wifi_acked;
781
782 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
783}
784EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
785
11165f14 786static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
787 struct sk_buff *skb)
3b885787 788{
744d5a3e 789 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 790 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 791 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
792}
793
767dd033 794void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
795 struct sk_buff *skb)
796{
797 sock_recv_timestamp(msg, sk, skb);
798 sock_recv_drops(msg, sk, skb);
799}
767dd033 800EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 801
1b784140 802static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 803 int flags)
1da177e4 804{
2da62906 805 return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags);
1da177e4
LT
806}
807
2da62906 808int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 809{
2da62906 810 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 811
2da62906 812 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 813}
c6d409cf 814EXPORT_SYMBOL(sock_recvmsg);
1da177e4 815
c1249c0a
ML
816/**
817 * kernel_recvmsg - Receive a message from a socket (kernel space)
818 * @sock: The socket to receive the message from
819 * @msg: Received message
820 * @vec: Input s/g array for message data
821 * @num: Size of input s/g array
822 * @size: Number of bytes to read
823 * @flags: Message flags (MSG_DONTWAIT, etc...)
824 *
825 * On return the msg structure contains the scatter/gather array passed in the
826 * vec argument. The array is modified so that it consists of the unfilled
827 * portion of the original array.
828 *
829 * The returned value is the total number of bytes received, or an error.
830 */
89bddce5
SH
831int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
832 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
833{
834 mm_segment_t oldfs = get_fs();
835 int result;
836
6aa24814 837 iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
1da177e4 838 set_fs(KERNEL_DS);
2da62906 839 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
840 set_fs(oldfs);
841 return result;
842}
c6d409cf 843EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 844
ce1d4d3e
CH
845static ssize_t sock_sendpage(struct file *file, struct page *page,
846 int offset, size_t size, loff_t *ppos, int more)
1da177e4 847{
1da177e4
LT
848 struct socket *sock;
849 int flags;
850
ce1d4d3e
CH
851 sock = file->private_data;
852
35f9c09f
ED
853 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
854 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
855 flags |= more;
ce1d4d3e 856
e6949583 857 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 858}
1da177e4 859
9c55e01c 860static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 861 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
862 unsigned int flags)
863{
864 struct socket *sock = file->private_data;
865
997b37da
RDC
866 if (unlikely(!sock->ops->splice_read))
867 return -EINVAL;
868
9c55e01c
JA
869 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
870}
871
8ae5e030 872static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 873{
6d652330
AV
874 struct file *file = iocb->ki_filp;
875 struct socket *sock = file->private_data;
0345f931 876 struct msghdr msg = {.msg_iter = *to,
877 .msg_iocb = iocb};
8ae5e030 878 ssize_t res;
ce1d4d3e 879
8ae5e030
AV
880 if (file->f_flags & O_NONBLOCK)
881 msg.msg_flags = MSG_DONTWAIT;
882
883 if (iocb->ki_pos != 0)
1da177e4 884 return -ESPIPE;
027445c3 885
66ee59af 886 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
887 return 0;
888
2da62906 889 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
890 *to = msg.msg_iter;
891 return res;
1da177e4
LT
892}
893
8ae5e030 894static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 895{
6d652330
AV
896 struct file *file = iocb->ki_filp;
897 struct socket *sock = file->private_data;
0345f931 898 struct msghdr msg = {.msg_iter = *from,
899 .msg_iocb = iocb};
8ae5e030 900 ssize_t res;
1da177e4 901
8ae5e030 902 if (iocb->ki_pos != 0)
ce1d4d3e 903 return -ESPIPE;
027445c3 904
8ae5e030
AV
905 if (file->f_flags & O_NONBLOCK)
906 msg.msg_flags = MSG_DONTWAIT;
907
6d652330
AV
908 if (sock->type == SOCK_SEQPACKET)
909 msg.msg_flags |= MSG_EOR;
910
d8725c86 911 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
912 *from = msg.msg_iter;
913 return res;
1da177e4
LT
914}
915
1da177e4
LT
916/*
917 * Atomic setting of ioctl hooks to avoid race
918 * with module unload.
919 */
920
4a3e2f71 921static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 922static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 923
881d966b 924void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 925{
4a3e2f71 926 mutex_lock(&br_ioctl_mutex);
1da177e4 927 br_ioctl_hook = hook;
4a3e2f71 928 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
929}
930EXPORT_SYMBOL(brioctl_set);
931
4a3e2f71 932static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 933static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 934
881d966b 935void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 936{
4a3e2f71 937 mutex_lock(&vlan_ioctl_mutex);
1da177e4 938 vlan_ioctl_hook = hook;
4a3e2f71 939 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
940}
941EXPORT_SYMBOL(vlan_ioctl_set);
942
4a3e2f71 943static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 944static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 945
89bddce5 946void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 947{
4a3e2f71 948 mutex_lock(&dlci_ioctl_mutex);
1da177e4 949 dlci_ioctl_hook = hook;
4a3e2f71 950 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
951}
952EXPORT_SYMBOL(dlci_ioctl_set);
953
6b96018b
AB
954static long sock_do_ioctl(struct net *net, struct socket *sock,
955 unsigned int cmd, unsigned long arg)
956{
957 int err;
958 void __user *argp = (void __user *)arg;
959
960 err = sock->ops->ioctl(sock, cmd, arg);
961
962 /*
963 * If this ioctl is unknown try to hand it down
964 * to the NIC driver.
965 */
966 if (err == -ENOIOCTLCMD)
967 err = dev_ioctl(net, cmd, argp);
968
969 return err;
970}
971
1da177e4
LT
972/*
973 * With an ioctl, arg may well be a user mode pointer, but we don't know
974 * what to do with it - that's up to the protocol still.
975 */
976
c62cce2c
AV
977static struct ns_common *get_net_ns(struct ns_common *ns)
978{
979 return &get_net(container_of(ns, struct net, ns))->ns;
980}
981
1da177e4
LT
982static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
983{
984 struct socket *sock;
881d966b 985 struct sock *sk;
1da177e4
LT
986 void __user *argp = (void __user *)arg;
987 int pid, err;
881d966b 988 struct net *net;
1da177e4 989
b69aee04 990 sock = file->private_data;
881d966b 991 sk = sock->sk;
3b1e0a65 992 net = sock_net(sk);
1da177e4 993 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 994 err = dev_ioctl(net, cmd, argp);
1da177e4 995 } else
3d23e349 996#ifdef CONFIG_WEXT_CORE
1da177e4 997 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 998 err = dev_ioctl(net, cmd, argp);
1da177e4 999 } else
3d23e349 1000#endif
89bddce5 1001 switch (cmd) {
1da177e4
LT
1002 case FIOSETOWN:
1003 case SIOCSPGRP:
1004 err = -EFAULT;
1005 if (get_user(pid, (int __user *)argp))
1006 break;
393cc3f5 1007 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1008 break;
1009 case FIOGETOWN:
1010 case SIOCGPGRP:
609d7fa9 1011 err = put_user(f_getown(sock->file),
89bddce5 1012 (int __user *)argp);
1da177e4
LT
1013 break;
1014 case SIOCGIFBR:
1015 case SIOCSIFBR:
1016 case SIOCBRADDBR:
1017 case SIOCBRDELBR:
1018 err = -ENOPKG;
1019 if (!br_ioctl_hook)
1020 request_module("bridge");
1021
4a3e2f71 1022 mutex_lock(&br_ioctl_mutex);
89bddce5 1023 if (br_ioctl_hook)
881d966b 1024 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1025 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1026 break;
1027 case SIOCGIFVLAN:
1028 case SIOCSIFVLAN:
1029 err = -ENOPKG;
1030 if (!vlan_ioctl_hook)
1031 request_module("8021q");
1032
4a3e2f71 1033 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1034 if (vlan_ioctl_hook)
881d966b 1035 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1036 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1037 break;
1da177e4
LT
1038 case SIOCADDDLCI:
1039 case SIOCDELDLCI:
1040 err = -ENOPKG;
1041 if (!dlci_ioctl_hook)
1042 request_module("dlci");
1043
7512cbf6
PE
1044 mutex_lock(&dlci_ioctl_mutex);
1045 if (dlci_ioctl_hook)
1da177e4 1046 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1047 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 1048 break;
c62cce2c
AV
1049 case SIOCGSKNS:
1050 err = -EPERM;
1051 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1052 break;
1053
1054 err = open_related_ns(&net->ns, get_net_ns);
1055 break;
1da177e4 1056 default:
6b96018b 1057 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1058 break;
89bddce5 1059 }
1da177e4
LT
1060 return err;
1061}
1062
1063int sock_create_lite(int family, int type, int protocol, struct socket **res)
1064{
1065 int err;
1066 struct socket *sock = NULL;
89bddce5 1067
1da177e4
LT
1068 err = security_socket_create(family, type, protocol, 1);
1069 if (err)
1070 goto out;
1071
1072 sock = sock_alloc();
1073 if (!sock) {
1074 err = -ENOMEM;
1075 goto out;
1076 }
1077
1da177e4 1078 sock->type = type;
7420ed23
VY
1079 err = security_socket_post_create(sock, family, type, protocol, 1);
1080 if (err)
1081 goto out_release;
1082
1da177e4
LT
1083out:
1084 *res = sock;
1085 return err;
7420ed23
VY
1086out_release:
1087 sock_release(sock);
1088 sock = NULL;
1089 goto out;
1da177e4 1090}
c6d409cf 1091EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1092
1093/* No kernel lock held - perfect */
89bddce5 1094static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4 1095{
cbf55001 1096 unsigned int busy_flag = 0;
1da177e4
LT
1097 struct socket *sock;
1098
1099 /*
89bddce5 1100 * We can't return errors to poll, so it's either yes or no.
1da177e4 1101 */
b69aee04 1102 sock = file->private_data;
2d48d67f 1103
cbf55001 1104 if (sk_can_busy_loop(sock->sk)) {
2d48d67f 1105 /* this socket can poll_ll so tell the system call */
cbf55001 1106 busy_flag = POLL_BUSY_LOOP;
2d48d67f
ET
1107
1108 /* once, only if requested by syscall */
cbf55001
ET
1109 if (wait && (wait->_key & POLL_BUSY_LOOP))
1110 sk_busy_loop(sock->sk, 1);
2d48d67f
ET
1111 }
1112
cbf55001 1113 return busy_flag | sock->ops->poll(file, sock, wait);
1da177e4
LT
1114}
1115
89bddce5 1116static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1117{
b69aee04 1118 struct socket *sock = file->private_data;
1da177e4
LT
1119
1120 return sock->ops->mmap(file, sock, vma);
1121}
1122
20380731 1123static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1124{
1da177e4
LT
1125 sock_release(SOCKET_I(inode));
1126 return 0;
1127}
1128
1129/*
1130 * Update the socket async list
1131 *
1132 * Fasync_list locking strategy.
1133 *
1134 * 1. fasync_list is modified only under process context socket lock
1135 * i.e. under semaphore.
1136 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1137 * or under socket lock
1da177e4
LT
1138 */
1139
1140static int sock_fasync(int fd, struct file *filp, int on)
1141{
989a2979
ED
1142 struct socket *sock = filp->private_data;
1143 struct sock *sk = sock->sk;
eaefd110 1144 struct socket_wq *wq;
1da177e4 1145
989a2979 1146 if (sk == NULL)
1da177e4 1147 return -EINVAL;
1da177e4
LT
1148
1149 lock_sock(sk);
1e1d04e6 1150 wq = rcu_dereference_protected(sock->wq, lockdep_sock_is_held(sk));
eaefd110 1151 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1152
eaefd110 1153 if (!wq->fasync_list)
989a2979
ED
1154 sock_reset_flag(sk, SOCK_FASYNC);
1155 else
bcdce719 1156 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1157
989a2979 1158 release_sock(sk);
1da177e4
LT
1159 return 0;
1160}
1161
ceb5d58b 1162/* This function may be called only under rcu_lock */
1da177e4 1163
ceb5d58b 1164int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1165{
ceb5d58b 1166 if (!wq || !wq->fasync_list)
1da177e4 1167 return -1;
ceb5d58b 1168
89bddce5 1169 switch (how) {
8d8ad9d7 1170 case SOCK_WAKE_WAITD:
ceb5d58b 1171 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1172 break;
1173 goto call_kill;
8d8ad9d7 1174 case SOCK_WAKE_SPACE:
ceb5d58b 1175 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1176 break;
1177 /* fall through */
8d8ad9d7 1178 case SOCK_WAKE_IO:
89bddce5 1179call_kill:
43815482 1180 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1181 break;
8d8ad9d7 1182 case SOCK_WAKE_URG:
43815482 1183 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1184 }
ceb5d58b 1185
1da177e4
LT
1186 return 0;
1187}
c6d409cf 1188EXPORT_SYMBOL(sock_wake_async);
1da177e4 1189
721db93a 1190int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1191 struct socket **res, int kern)
1da177e4
LT
1192{
1193 int err;
1194 struct socket *sock;
55737fda 1195 const struct net_proto_family *pf;
1da177e4
LT
1196
1197 /*
89bddce5 1198 * Check protocol is in range
1da177e4
LT
1199 */
1200 if (family < 0 || family >= NPROTO)
1201 return -EAFNOSUPPORT;
1202 if (type < 0 || type >= SOCK_MAX)
1203 return -EINVAL;
1204
1205 /* Compatibility.
1206
1207 This uglymoron is moved from INET layer to here to avoid
1208 deadlock in module load.
1209 */
1210 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1211 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1212 current->comm);
1da177e4
LT
1213 family = PF_PACKET;
1214 }
1215
1216 err = security_socket_create(family, type, protocol, kern);
1217 if (err)
1218 return err;
89bddce5 1219
55737fda
SH
1220 /*
1221 * Allocate the socket and allow the family to set things up. if
1222 * the protocol is 0, the family is instructed to select an appropriate
1223 * default.
1224 */
1225 sock = sock_alloc();
1226 if (!sock) {
e87cc472 1227 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1228 return -ENFILE; /* Not exactly a match, but its the
1229 closest posix thing */
1230 }
1231
1232 sock->type = type;
1233
95a5afca 1234#ifdef CONFIG_MODULES
89bddce5
SH
1235 /* Attempt to load a protocol module if the find failed.
1236 *
1237 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1238 * requested real, full-featured networking support upon configuration.
1239 * Otherwise module support will break!
1240 */
190683a9 1241 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1242 request_module("net-pf-%d", family);
1da177e4
LT
1243#endif
1244
55737fda
SH
1245 rcu_read_lock();
1246 pf = rcu_dereference(net_families[family]);
1247 err = -EAFNOSUPPORT;
1248 if (!pf)
1249 goto out_release;
1da177e4
LT
1250
1251 /*
1252 * We will call the ->create function, that possibly is in a loadable
1253 * module, so we have to bump that loadable module refcnt first.
1254 */
55737fda 1255 if (!try_module_get(pf->owner))
1da177e4
LT
1256 goto out_release;
1257
55737fda
SH
1258 /* Now protected by module ref count */
1259 rcu_read_unlock();
1260
3f378b68 1261 err = pf->create(net, sock, protocol, kern);
55737fda 1262 if (err < 0)
1da177e4 1263 goto out_module_put;
a79af59e 1264
1da177e4
LT
1265 /*
1266 * Now to bump the refcnt of the [loadable] module that owns this
1267 * socket at sock_release time we decrement its refcnt.
1268 */
55737fda
SH
1269 if (!try_module_get(sock->ops->owner))
1270 goto out_module_busy;
1271
1da177e4
LT
1272 /*
1273 * Now that we're done with the ->create function, the [loadable]
1274 * module can have its refcnt decremented
1275 */
55737fda 1276 module_put(pf->owner);
7420ed23
VY
1277 err = security_socket_post_create(sock, family, type, protocol, kern);
1278 if (err)
3b185525 1279 goto out_sock_release;
55737fda 1280 *res = sock;
1da177e4 1281
55737fda
SH
1282 return 0;
1283
1284out_module_busy:
1285 err = -EAFNOSUPPORT;
1da177e4 1286out_module_put:
55737fda
SH
1287 sock->ops = NULL;
1288 module_put(pf->owner);
1289out_sock_release:
1da177e4 1290 sock_release(sock);
55737fda
SH
1291 return err;
1292
1293out_release:
1294 rcu_read_unlock();
1295 goto out_sock_release;
1da177e4 1296}
721db93a 1297EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1298
1299int sock_create(int family, int type, int protocol, struct socket **res)
1300{
1b8d7ae4 1301 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1302}
c6d409cf 1303EXPORT_SYMBOL(sock_create);
1da177e4 1304
eeb1bd5c 1305int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1306{
eeb1bd5c 1307 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1308}
c6d409cf 1309EXPORT_SYMBOL(sock_create_kern);
1da177e4 1310
3e0fa65f 1311SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1312{
1313 int retval;
1314 struct socket *sock;
a677a039
UD
1315 int flags;
1316
e38b36f3
UD
1317 /* Check the SOCK_* constants for consistency. */
1318 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1319 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1320 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1321 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1322
a677a039 1323 flags = type & ~SOCK_TYPE_MASK;
77d27200 1324 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1325 return -EINVAL;
1326 type &= SOCK_TYPE_MASK;
1da177e4 1327
aaca0bdc
UD
1328 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1329 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1330
1da177e4
LT
1331 retval = sock_create(family, type, protocol, &sock);
1332 if (retval < 0)
1333 goto out;
1334
77d27200 1335 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1336 if (retval < 0)
1337 goto out_release;
1338
1339out:
1340 /* It may be already another descriptor 8) Not kernel problem. */
1341 return retval;
1342
1343out_release:
1344 sock_release(sock);
1345 return retval;
1346}
1347
1348/*
1349 * Create a pair of connected sockets.
1350 */
1351
3e0fa65f
HC
1352SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1353 int __user *, usockvec)
1da177e4
LT
1354{
1355 struct socket *sock1, *sock2;
1356 int fd1, fd2, err;
db349509 1357 struct file *newfile1, *newfile2;
a677a039
UD
1358 int flags;
1359
1360 flags = type & ~SOCK_TYPE_MASK;
77d27200 1361 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1362 return -EINVAL;
1363 type &= SOCK_TYPE_MASK;
1da177e4 1364
aaca0bdc
UD
1365 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1366 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1367
1da177e4
LT
1368 /*
1369 * Obtain the first socket and check if the underlying protocol
1370 * supports the socketpair call.
1371 */
1372
1373 err = sock_create(family, type, protocol, &sock1);
1374 if (err < 0)
1375 goto out;
1376
1377 err = sock_create(family, type, protocol, &sock2);
1378 if (err < 0)
1379 goto out_release_1;
1380
1381 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1382 if (err < 0)
1da177e4
LT
1383 goto out_release_both;
1384
28407630 1385 fd1 = get_unused_fd_flags(flags);
bf3c23d1
DM
1386 if (unlikely(fd1 < 0)) {
1387 err = fd1;
db349509 1388 goto out_release_both;
bf3c23d1 1389 }
d73aa286 1390
28407630 1391 fd2 = get_unused_fd_flags(flags);
198de4d7
AV
1392 if (unlikely(fd2 < 0)) {
1393 err = fd2;
d73aa286 1394 goto out_put_unused_1;
28407630
AV
1395 }
1396
aab174f0 1397 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1398 if (IS_ERR(newfile1)) {
28407630 1399 err = PTR_ERR(newfile1);
d73aa286 1400 goto out_put_unused_both;
28407630
AV
1401 }
1402
aab174f0 1403 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1404 if (IS_ERR(newfile2)) {
1405 err = PTR_ERR(newfile2);
d73aa286 1406 goto out_fput_1;
db349509
AV
1407 }
1408
d73aa286
YD
1409 err = put_user(fd1, &usockvec[0]);
1410 if (err)
1411 goto out_fput_both;
1412
1413 err = put_user(fd2, &usockvec[1]);
1414 if (err)
1415 goto out_fput_both;
1416
157cf649 1417 audit_fd_pair(fd1, fd2);
d73aa286 1418
db349509
AV
1419 fd_install(fd1, newfile1);
1420 fd_install(fd2, newfile2);
1da177e4
LT
1421 /* fd1 and fd2 may be already another descriptors.
1422 * Not kernel problem.
1423 */
1424
d73aa286 1425 return 0;
1da177e4 1426
d73aa286
YD
1427out_fput_both:
1428 fput(newfile2);
1429 fput(newfile1);
1430 put_unused_fd(fd2);
1431 put_unused_fd(fd1);
1432 goto out;
1433
1434out_fput_1:
1435 fput(newfile1);
1436 put_unused_fd(fd2);
1437 put_unused_fd(fd1);
1438 sock_release(sock2);
1439 goto out;
1da177e4 1440
d73aa286
YD
1441out_put_unused_both:
1442 put_unused_fd(fd2);
1443out_put_unused_1:
1444 put_unused_fd(fd1);
1da177e4 1445out_release_both:
89bddce5 1446 sock_release(sock2);
1da177e4 1447out_release_1:
89bddce5 1448 sock_release(sock1);
1da177e4
LT
1449out:
1450 return err;
1451}
1452
1da177e4
LT
1453/*
1454 * Bind a name to a socket. Nothing much to do here since it's
1455 * the protocol's responsibility to handle the local address.
1456 *
1457 * We move the socket address to kernel space before we call
1458 * the protocol layer (having also checked the address is ok).
1459 */
1460
20f37034 1461SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1462{
1463 struct socket *sock;
230b1839 1464 struct sockaddr_storage address;
6cb153ca 1465 int err, fput_needed;
1da177e4 1466
89bddce5 1467 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1468 if (sock) {
43db362d 1469 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1470 if (err >= 0) {
1471 err = security_socket_bind(sock,
230b1839 1472 (struct sockaddr *)&address,
89bddce5 1473 addrlen);
6cb153ca
BL
1474 if (!err)
1475 err = sock->ops->bind(sock,
89bddce5 1476 (struct sockaddr *)
230b1839 1477 &address, addrlen);
1da177e4 1478 }
6cb153ca 1479 fput_light(sock->file, fput_needed);
89bddce5 1480 }
1da177e4
LT
1481 return err;
1482}
1483
1da177e4
LT
1484/*
1485 * Perform a listen. Basically, we allow the protocol to do anything
1486 * necessary for a listen, and if that works, we mark the socket as
1487 * ready for listening.
1488 */
1489
3e0fa65f 1490SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1491{
1492 struct socket *sock;
6cb153ca 1493 int err, fput_needed;
b8e1f9b5 1494 int somaxconn;
89bddce5
SH
1495
1496 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1497 if (sock) {
8efa6e93 1498 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1499 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1500 backlog = somaxconn;
1da177e4
LT
1501
1502 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1503 if (!err)
1504 err = sock->ops->listen(sock, backlog);
1da177e4 1505
6cb153ca 1506 fput_light(sock->file, fput_needed);
1da177e4
LT
1507 }
1508 return err;
1509}
1510
1da177e4
LT
1511/*
1512 * For accept, we attempt to create a new socket, set up the link
1513 * with the client, wake up the client, then return the new
1514 * connected fd. We collect the address of the connector in kernel
1515 * space and move it to user at the very end. This is unclean because
1516 * we open the socket then return an error.
1517 *
1518 * 1003.1g adds the ability to recvmsg() to query connection pending
1519 * status to recvmsg. We need to add that support in a way thats
1520 * clean when we restucture accept also.
1521 */
1522
20f37034
HC
1523SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1524 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1525{
1526 struct socket *sock, *newsock;
39d8c1b6 1527 struct file *newfile;
6cb153ca 1528 int err, len, newfd, fput_needed;
230b1839 1529 struct sockaddr_storage address;
1da177e4 1530
77d27200 1531 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1532 return -EINVAL;
1533
1534 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1535 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1536
6cb153ca 1537 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1538 if (!sock)
1539 goto out;
1540
1541 err = -ENFILE;
c6d409cf
ED
1542 newsock = sock_alloc();
1543 if (!newsock)
1da177e4
LT
1544 goto out_put;
1545
1546 newsock->type = sock->type;
1547 newsock->ops = sock->ops;
1548
1da177e4
LT
1549 /*
1550 * We don't need try_module_get here, as the listening socket (sock)
1551 * has the protocol module (sock->ops->owner) held.
1552 */
1553 __module_get(newsock->ops->owner);
1554
28407630 1555 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1556 if (unlikely(newfd < 0)) {
1557 err = newfd;
9a1875e6
DM
1558 sock_release(newsock);
1559 goto out_put;
39d8c1b6 1560 }
aab174f0 1561 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1562 if (IS_ERR(newfile)) {
28407630
AV
1563 err = PTR_ERR(newfile);
1564 put_unused_fd(newfd);
1565 sock_release(newsock);
1566 goto out_put;
1567 }
39d8c1b6 1568
a79af59e
FF
1569 err = security_socket_accept(sock, newsock);
1570 if (err)
39d8c1b6 1571 goto out_fd;
a79af59e 1572
cdfbabfb 1573 err = sock->ops->accept(sock, newsock, sock->file->f_flags, false);
1da177e4 1574 if (err < 0)
39d8c1b6 1575 goto out_fd;
1da177e4
LT
1576
1577 if (upeer_sockaddr) {
230b1839 1578 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1579 &len, 2) < 0) {
1da177e4 1580 err = -ECONNABORTED;
39d8c1b6 1581 goto out_fd;
1da177e4 1582 }
43db362d 1583 err = move_addr_to_user(&address,
230b1839 1584 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1585 if (err < 0)
39d8c1b6 1586 goto out_fd;
1da177e4
LT
1587 }
1588
1589 /* File flags are not inherited via accept() unlike another OSes. */
1590
39d8c1b6
DM
1591 fd_install(newfd, newfile);
1592 err = newfd;
1da177e4 1593
1da177e4 1594out_put:
6cb153ca 1595 fput_light(sock->file, fput_needed);
1da177e4
LT
1596out:
1597 return err;
39d8c1b6 1598out_fd:
9606a216 1599 fput(newfile);
39d8c1b6 1600 put_unused_fd(newfd);
1da177e4
LT
1601 goto out_put;
1602}
1603
20f37034
HC
1604SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1605 int __user *, upeer_addrlen)
aaca0bdc 1606{
de11defe 1607 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1608}
1609
1da177e4
LT
1610/*
1611 * Attempt to connect to a socket with the server address. The address
1612 * is in user space so we verify it is OK and move it to kernel space.
1613 *
1614 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1615 * break bindings
1616 *
1617 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1618 * other SEQPACKET protocols that take time to connect() as it doesn't
1619 * include the -EINPROGRESS status for such sockets.
1620 */
1621
20f37034
HC
1622SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1623 int, addrlen)
1da177e4
LT
1624{
1625 struct socket *sock;
230b1839 1626 struct sockaddr_storage address;
6cb153ca 1627 int err, fput_needed;
1da177e4 1628
6cb153ca 1629 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1630 if (!sock)
1631 goto out;
43db362d 1632 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1633 if (err < 0)
1634 goto out_put;
1635
89bddce5 1636 err =
230b1839 1637 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1638 if (err)
1639 goto out_put;
1640
230b1839 1641 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1642 sock->file->f_flags);
1643out_put:
6cb153ca 1644 fput_light(sock->file, fput_needed);
1da177e4
LT
1645out:
1646 return err;
1647}
1648
1649/*
1650 * Get the local address ('name') of a socket object. Move the obtained
1651 * name to user space.
1652 */
1653
20f37034
HC
1654SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1655 int __user *, usockaddr_len)
1da177e4
LT
1656{
1657 struct socket *sock;
230b1839 1658 struct sockaddr_storage address;
6cb153ca 1659 int len, err, fput_needed;
89bddce5 1660
6cb153ca 1661 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1662 if (!sock)
1663 goto out;
1664
1665 err = security_socket_getsockname(sock);
1666 if (err)
1667 goto out_put;
1668
230b1839 1669 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1670 if (err)
1671 goto out_put;
43db362d 1672 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1673
1674out_put:
6cb153ca 1675 fput_light(sock->file, fput_needed);
1da177e4
LT
1676out:
1677 return err;
1678}
1679
1680/*
1681 * Get the remote address ('name') of a socket object. Move the obtained
1682 * name to user space.
1683 */
1684
20f37034
HC
1685SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1686 int __user *, usockaddr_len)
1da177e4
LT
1687{
1688 struct socket *sock;
230b1839 1689 struct sockaddr_storage address;
6cb153ca 1690 int len, err, fput_needed;
1da177e4 1691
89bddce5
SH
1692 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1693 if (sock != NULL) {
1da177e4
LT
1694 err = security_socket_getpeername(sock);
1695 if (err) {
6cb153ca 1696 fput_light(sock->file, fput_needed);
1da177e4
LT
1697 return err;
1698 }
1699
89bddce5 1700 err =
230b1839 1701 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1702 1);
1da177e4 1703 if (!err)
43db362d 1704 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1705 usockaddr_len);
6cb153ca 1706 fput_light(sock->file, fput_needed);
1da177e4
LT
1707 }
1708 return err;
1709}
1710
1711/*
1712 * Send a datagram to a given address. We move the address into kernel
1713 * space and check the user space data area is readable before invoking
1714 * the protocol.
1715 */
1716
3e0fa65f 1717SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1718 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1719 int, addr_len)
1da177e4
LT
1720{
1721 struct socket *sock;
230b1839 1722 struct sockaddr_storage address;
1da177e4
LT
1723 int err;
1724 struct msghdr msg;
1725 struct iovec iov;
6cb153ca 1726 int fput_needed;
6cb153ca 1727
602bd0e9
AV
1728 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1729 if (unlikely(err))
1730 return err;
de0fa95c
PE
1731 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1732 if (!sock)
4387ff75 1733 goto out;
6cb153ca 1734
89bddce5 1735 msg.msg_name = NULL;
89bddce5
SH
1736 msg.msg_control = NULL;
1737 msg.msg_controllen = 0;
1738 msg.msg_namelen = 0;
6cb153ca 1739 if (addr) {
43db362d 1740 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1741 if (err < 0)
1742 goto out_put;
230b1839 1743 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1744 msg.msg_namelen = addr_len;
1da177e4
LT
1745 }
1746 if (sock->file->f_flags & O_NONBLOCK)
1747 flags |= MSG_DONTWAIT;
1748 msg.msg_flags = flags;
d8725c86 1749 err = sock_sendmsg(sock, &msg);
1da177e4 1750
89bddce5 1751out_put:
de0fa95c 1752 fput_light(sock->file, fput_needed);
4387ff75 1753out:
1da177e4
LT
1754 return err;
1755}
1756
1757/*
89bddce5 1758 * Send a datagram down a socket.
1da177e4
LT
1759 */
1760
3e0fa65f 1761SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1762 unsigned int, flags)
1da177e4
LT
1763{
1764 return sys_sendto(fd, buff, len, flags, NULL, 0);
1765}
1766
1767/*
89bddce5 1768 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1769 * sender. We verify the buffers are writable and if needed move the
1770 * sender address from kernel to user space.
1771 */
1772
3e0fa65f 1773SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1774 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1775 int __user *, addr_len)
1da177e4
LT
1776{
1777 struct socket *sock;
1778 struct iovec iov;
1779 struct msghdr msg;
230b1839 1780 struct sockaddr_storage address;
89bddce5 1781 int err, err2;
6cb153ca
BL
1782 int fput_needed;
1783
602bd0e9
AV
1784 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1785 if (unlikely(err))
1786 return err;
de0fa95c 1787 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1788 if (!sock)
de0fa95c 1789 goto out;
1da177e4 1790
89bddce5
SH
1791 msg.msg_control = NULL;
1792 msg.msg_controllen = 0;
f3d33426
HFS
1793 /* Save some cycles and don't copy the address if not needed */
1794 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1795 /* We assume all kernel code knows the size of sockaddr_storage */
1796 msg.msg_namelen = 0;
130ed5d1 1797 msg.msg_iocb = NULL;
9f138fa6 1798 msg.msg_flags = 0;
1da177e4
LT
1799 if (sock->file->f_flags & O_NONBLOCK)
1800 flags |= MSG_DONTWAIT;
2da62906 1801 err = sock_recvmsg(sock, &msg, flags);
1da177e4 1802
89bddce5 1803 if (err >= 0 && addr != NULL) {
43db362d 1804 err2 = move_addr_to_user(&address,
230b1839 1805 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1806 if (err2 < 0)
1807 err = err2;
1da177e4 1808 }
de0fa95c
PE
1809
1810 fput_light(sock->file, fput_needed);
4387ff75 1811out:
1da177e4
LT
1812 return err;
1813}
1814
1815/*
89bddce5 1816 * Receive a datagram from a socket.
1da177e4
LT
1817 */
1818
b7c0ddf5
JG
1819SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1820 unsigned int, flags)
1da177e4
LT
1821{
1822 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1823}
1824
1825/*
1826 * Set a socket option. Because we don't know the option lengths we have
1827 * to pass the user mode parameter for the protocols to sort out.
1828 */
1829
20f37034
HC
1830SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1831 char __user *, optval, int, optlen)
1da177e4 1832{
6cb153ca 1833 int err, fput_needed;
1da177e4
LT
1834 struct socket *sock;
1835
1836 if (optlen < 0)
1837 return -EINVAL;
89bddce5
SH
1838
1839 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1840 if (sock != NULL) {
1841 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1842 if (err)
1843 goto out_put;
1da177e4
LT
1844
1845 if (level == SOL_SOCKET)
89bddce5
SH
1846 err =
1847 sock_setsockopt(sock, level, optname, optval,
1848 optlen);
1da177e4 1849 else
89bddce5
SH
1850 err =
1851 sock->ops->setsockopt(sock, level, optname, optval,
1852 optlen);
6cb153ca
BL
1853out_put:
1854 fput_light(sock->file, fput_needed);
1da177e4
LT
1855 }
1856 return err;
1857}
1858
1859/*
1860 * Get a socket option. Because we don't know the option lengths we have
1861 * to pass a user mode parameter for the protocols to sort out.
1862 */
1863
20f37034
HC
1864SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1865 char __user *, optval, int __user *, optlen)
1da177e4 1866{
6cb153ca 1867 int err, fput_needed;
1da177e4
LT
1868 struct socket *sock;
1869
89bddce5
SH
1870 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1871 if (sock != NULL) {
6cb153ca
BL
1872 err = security_socket_getsockopt(sock, level, optname);
1873 if (err)
1874 goto out_put;
1da177e4
LT
1875
1876 if (level == SOL_SOCKET)
89bddce5
SH
1877 err =
1878 sock_getsockopt(sock, level, optname, optval,
1879 optlen);
1da177e4 1880 else
89bddce5
SH
1881 err =
1882 sock->ops->getsockopt(sock, level, optname, optval,
1883 optlen);
6cb153ca
BL
1884out_put:
1885 fput_light(sock->file, fput_needed);
1da177e4
LT
1886 }
1887 return err;
1888}
1889
1da177e4
LT
1890/*
1891 * Shutdown a socket.
1892 */
1893
754fe8d2 1894SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1895{
6cb153ca 1896 int err, fput_needed;
1da177e4
LT
1897 struct socket *sock;
1898
89bddce5
SH
1899 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1900 if (sock != NULL) {
1da177e4 1901 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1902 if (!err)
1903 err = sock->ops->shutdown(sock, how);
1904 fput_light(sock->file, fput_needed);
1da177e4
LT
1905 }
1906 return err;
1907}
1908
89bddce5 1909/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1910 * fields which are the same type (int / unsigned) on our platforms.
1911 */
1912#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1913#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1914#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1915
c71d8ebe
TH
1916struct used_address {
1917 struct sockaddr_storage name;
1918 unsigned int name_len;
1919};
1920
da184284
AV
1921static int copy_msghdr_from_user(struct msghdr *kmsg,
1922 struct user_msghdr __user *umsg,
1923 struct sockaddr __user **save_addr,
1924 struct iovec **iov)
1661bf36 1925{
ffb07550 1926 struct user_msghdr msg;
08adb7da
AV
1927 ssize_t err;
1928
ffb07550 1929 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 1930 return -EFAULT;
dbb490b9 1931
864d9664 1932 kmsg->msg_control = (void __force *)msg.msg_control;
ffb07550
AV
1933 kmsg->msg_controllen = msg.msg_controllen;
1934 kmsg->msg_flags = msg.msg_flags;
1935
1936 kmsg->msg_namelen = msg.msg_namelen;
1937 if (!msg.msg_name)
6a2a2b3a
AS
1938 kmsg->msg_namelen = 0;
1939
dbb490b9
ML
1940 if (kmsg->msg_namelen < 0)
1941 return -EINVAL;
1942
1661bf36 1943 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 1944 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
1945
1946 if (save_addr)
ffb07550 1947 *save_addr = msg.msg_name;
08adb7da 1948
ffb07550 1949 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 1950 if (!save_addr) {
864d9664
PA
1951 err = move_addr_to_kernel(msg.msg_name,
1952 kmsg->msg_namelen,
08adb7da
AV
1953 kmsg->msg_name);
1954 if (err < 0)
1955 return err;
1956 }
1957 } else {
1958 kmsg->msg_name = NULL;
1959 kmsg->msg_namelen = 0;
1960 }
1961
ffb07550 1962 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
1963 return -EMSGSIZE;
1964
0345f931 1965 kmsg->msg_iocb = NULL;
1966
ffb07550
AV
1967 return import_iovec(save_addr ? READ : WRITE,
1968 msg.msg_iov, msg.msg_iovlen,
da184284 1969 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
1970}
1971
666547ff 1972static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 1973 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
1974 struct used_address *used_address,
1975 unsigned int allowed_msghdr_flags)
1da177e4 1976{
89bddce5
SH
1977 struct compat_msghdr __user *msg_compat =
1978 (struct compat_msghdr __user *)msg;
230b1839 1979 struct sockaddr_storage address;
1da177e4 1980 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1981 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 1982 __aligned(sizeof(__kernel_size_t));
89bddce5 1983 /* 20 is size of ipv6_pktinfo */
1da177e4 1984 unsigned char *ctl_buf = ctl;
d8725c86 1985 int ctl_len;
08adb7da 1986 ssize_t err;
89bddce5 1987
08adb7da 1988 msg_sys->msg_name = &address;
1da177e4 1989
08449320 1990 if (MSG_CMSG_COMPAT & flags)
08adb7da 1991 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 1992 else
08adb7da 1993 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 1994 if (err < 0)
da184284 1995 return err;
1da177e4
LT
1996
1997 err = -ENOBUFS;
1998
228e548e 1999 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 2000 goto out_freeiov;
28a94d8f 2001 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2002 ctl_len = msg_sys->msg_controllen;
1da177e4 2003 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2004 err =
228e548e 2005 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2006 sizeof(ctl));
1da177e4
LT
2007 if (err)
2008 goto out_freeiov;
228e548e
AB
2009 ctl_buf = msg_sys->msg_control;
2010 ctl_len = msg_sys->msg_controllen;
1da177e4 2011 } else if (ctl_len) {
ac4340fc
DM
2012 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2013 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2014 if (ctl_len > sizeof(ctl)) {
1da177e4 2015 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2016 if (ctl_buf == NULL)
1da177e4
LT
2017 goto out_freeiov;
2018 }
2019 err = -EFAULT;
2020 /*
228e548e 2021 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2022 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2023 * checking falls down on this.
2024 */
fb8621bb 2025 if (copy_from_user(ctl_buf,
228e548e 2026 (void __user __force *)msg_sys->msg_control,
89bddce5 2027 ctl_len))
1da177e4 2028 goto out_freectl;
228e548e 2029 msg_sys->msg_control = ctl_buf;
1da177e4 2030 }
228e548e 2031 msg_sys->msg_flags = flags;
1da177e4
LT
2032
2033 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2034 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2035 /*
2036 * If this is sendmmsg() and current destination address is same as
2037 * previously succeeded address, omit asking LSM's decision.
2038 * used_address->name_len is initialized to UINT_MAX so that the first
2039 * destination address never matches.
2040 */
bc909d9d
MD
2041 if (used_address && msg_sys->msg_name &&
2042 used_address->name_len == msg_sys->msg_namelen &&
2043 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2044 used_address->name_len)) {
d8725c86 2045 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2046 goto out_freectl;
2047 }
d8725c86 2048 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2049 /*
2050 * If this is sendmmsg() and sending to current destination address was
2051 * successful, remember it.
2052 */
2053 if (used_address && err >= 0) {
2054 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2055 if (msg_sys->msg_name)
2056 memcpy(&used_address->name, msg_sys->msg_name,
2057 used_address->name_len);
c71d8ebe 2058 }
1da177e4
LT
2059
2060out_freectl:
89bddce5 2061 if (ctl_buf != ctl)
1da177e4
LT
2062 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2063out_freeiov:
da184284 2064 kfree(iov);
228e548e
AB
2065 return err;
2066}
2067
2068/*
2069 * BSD sendmsg interface
2070 */
2071
666547ff 2072long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
228e548e
AB
2073{
2074 int fput_needed, err;
2075 struct msghdr msg_sys;
1be374a0
AL
2076 struct socket *sock;
2077
1be374a0 2078 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2079 if (!sock)
2080 goto out;
2081
28a94d8f 2082 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2083
6cb153ca 2084 fput_light(sock->file, fput_needed);
89bddce5 2085out:
1da177e4
LT
2086 return err;
2087}
2088
666547ff 2089SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5
AL
2090{
2091 if (flags & MSG_CMSG_COMPAT)
2092 return -EINVAL;
2093 return __sys_sendmsg(fd, msg, flags);
2094}
2095
228e548e
AB
2096/*
2097 * Linux sendmmsg interface
2098 */
2099
2100int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2101 unsigned int flags)
2102{
2103 int fput_needed, err, datagrams;
2104 struct socket *sock;
2105 struct mmsghdr __user *entry;
2106 struct compat_mmsghdr __user *compat_entry;
2107 struct msghdr msg_sys;
c71d8ebe 2108 struct used_address used_address;
f092276d 2109 unsigned int oflags = flags;
228e548e 2110
98382f41
AB
2111 if (vlen > UIO_MAXIOV)
2112 vlen = UIO_MAXIOV;
228e548e
AB
2113
2114 datagrams = 0;
2115
2116 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2117 if (!sock)
2118 return err;
2119
c71d8ebe 2120 used_address.name_len = UINT_MAX;
228e548e
AB
2121 entry = mmsg;
2122 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2123 err = 0;
f092276d 2124 flags |= MSG_BATCH;
228e548e
AB
2125
2126 while (datagrams < vlen) {
f092276d
TH
2127 if (datagrams == vlen - 1)
2128 flags = oflags;
2129
228e548e 2130 if (MSG_CMSG_COMPAT & flags) {
666547ff 2131 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2132 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2133 if (err < 0)
2134 break;
2135 err = __put_user(err, &compat_entry->msg_len);
2136 ++compat_entry;
2137 } else {
a7526eb5 2138 err = ___sys_sendmsg(sock,
666547ff 2139 (struct user_msghdr __user *)entry,
28a94d8f 2140 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2141 if (err < 0)
2142 break;
2143 err = put_user(err, &entry->msg_len);
2144 ++entry;
2145 }
2146
2147 if (err)
2148 break;
2149 ++datagrams;
3023898b
SHY
2150 if (msg_data_left(&msg_sys))
2151 break;
a78cb84c 2152 cond_resched();
228e548e
AB
2153 }
2154
228e548e
AB
2155 fput_light(sock->file, fput_needed);
2156
728ffb86
AB
2157 /* We only return an error if no datagrams were able to be sent */
2158 if (datagrams != 0)
228e548e
AB
2159 return datagrams;
2160
228e548e
AB
2161 return err;
2162}
2163
2164SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2165 unsigned int, vlen, unsigned int, flags)
2166{
1be374a0
AL
2167 if (flags & MSG_CMSG_COMPAT)
2168 return -EINVAL;
228e548e
AB
2169 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2170}
2171
666547ff 2172static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2173 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2174{
89bddce5
SH
2175 struct compat_msghdr __user *msg_compat =
2176 (struct compat_msghdr __user *)msg;
1da177e4 2177 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2178 struct iovec *iov = iovstack;
1da177e4 2179 unsigned long cmsg_ptr;
2da62906 2180 int len;
08adb7da 2181 ssize_t err;
1da177e4
LT
2182
2183 /* kernel mode address */
230b1839 2184 struct sockaddr_storage addr;
1da177e4
LT
2185
2186 /* user mode address pointers */
2187 struct sockaddr __user *uaddr;
08adb7da 2188 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2189
08adb7da 2190 msg_sys->msg_name = &addr;
1da177e4 2191
f3d33426 2192 if (MSG_CMSG_COMPAT & flags)
08adb7da 2193 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2194 else
08adb7da 2195 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2196 if (err < 0)
da184284 2197 return err;
1da177e4 2198
a2e27255
ACM
2199 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2200 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2201
f3d33426
HFS
2202 /* We assume all kernel code knows the size of sockaddr_storage */
2203 msg_sys->msg_namelen = 0;
2204
1da177e4
LT
2205 if (sock->file->f_flags & O_NONBLOCK)
2206 flags |= MSG_DONTWAIT;
2da62906 2207 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4
LT
2208 if (err < 0)
2209 goto out_freeiov;
2210 len = err;
2211
2212 if (uaddr != NULL) {
43db362d 2213 err = move_addr_to_user(&addr,
a2e27255 2214 msg_sys->msg_namelen, uaddr,
89bddce5 2215 uaddr_len);
1da177e4
LT
2216 if (err < 0)
2217 goto out_freeiov;
2218 }
a2e27255 2219 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2220 COMPAT_FLAGS(msg));
1da177e4
LT
2221 if (err)
2222 goto out_freeiov;
2223 if (MSG_CMSG_COMPAT & flags)
a2e27255 2224 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2225 &msg_compat->msg_controllen);
2226 else
a2e27255 2227 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2228 &msg->msg_controllen);
2229 if (err)
2230 goto out_freeiov;
2231 err = len;
2232
2233out_freeiov:
da184284 2234 kfree(iov);
a2e27255
ACM
2235 return err;
2236}
2237
2238/*
2239 * BSD recvmsg interface
2240 */
2241
666547ff 2242long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
a2e27255
ACM
2243{
2244 int fput_needed, err;
2245 struct msghdr msg_sys;
1be374a0
AL
2246 struct socket *sock;
2247
1be374a0 2248 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2249 if (!sock)
2250 goto out;
2251
a7526eb5 2252 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2253
6cb153ca 2254 fput_light(sock->file, fput_needed);
1da177e4
LT
2255out:
2256 return err;
2257}
2258
666547ff 2259SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2260 unsigned int, flags)
2261{
2262 if (flags & MSG_CMSG_COMPAT)
2263 return -EINVAL;
2264 return __sys_recvmsg(fd, msg, flags);
2265}
2266
a2e27255
ACM
2267/*
2268 * Linux recvmmsg interface
2269 */
2270
2271int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2272 unsigned int flags, struct timespec *timeout)
2273{
2274 int fput_needed, err, datagrams;
2275 struct socket *sock;
2276 struct mmsghdr __user *entry;
d7256d0e 2277 struct compat_mmsghdr __user *compat_entry;
a2e27255 2278 struct msghdr msg_sys;
766b9f92
DD
2279 struct timespec64 end_time;
2280 struct timespec64 timeout64;
a2e27255
ACM
2281
2282 if (timeout &&
2283 poll_select_set_timeout(&end_time, timeout->tv_sec,
2284 timeout->tv_nsec))
2285 return -EINVAL;
2286
2287 datagrams = 0;
2288
2289 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2290 if (!sock)
2291 return err;
2292
2293 err = sock_error(sock->sk);
e623a9e9
MJ
2294 if (err) {
2295 datagrams = err;
a2e27255 2296 goto out_put;
e623a9e9 2297 }
a2e27255
ACM
2298
2299 entry = mmsg;
d7256d0e 2300 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2301
2302 while (datagrams < vlen) {
2303 /*
2304 * No need to ask LSM for more than the first datagram.
2305 */
d7256d0e 2306 if (MSG_CMSG_COMPAT & flags) {
666547ff 2307 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2308 &msg_sys, flags & ~MSG_WAITFORONE,
2309 datagrams);
d7256d0e
JMG
2310 if (err < 0)
2311 break;
2312 err = __put_user(err, &compat_entry->msg_len);
2313 ++compat_entry;
2314 } else {
a7526eb5 2315 err = ___sys_recvmsg(sock,
666547ff 2316 (struct user_msghdr __user *)entry,
a7526eb5
AL
2317 &msg_sys, flags & ~MSG_WAITFORONE,
2318 datagrams);
d7256d0e
JMG
2319 if (err < 0)
2320 break;
2321 err = put_user(err, &entry->msg_len);
2322 ++entry;
2323 }
2324
a2e27255
ACM
2325 if (err)
2326 break;
a2e27255
ACM
2327 ++datagrams;
2328
71c5c159
BB
2329 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2330 if (flags & MSG_WAITFORONE)
2331 flags |= MSG_DONTWAIT;
2332
a2e27255 2333 if (timeout) {
766b9f92
DD
2334 ktime_get_ts64(&timeout64);
2335 *timeout = timespec64_to_timespec(
2336 timespec64_sub(end_time, timeout64));
a2e27255
ACM
2337 if (timeout->tv_sec < 0) {
2338 timeout->tv_sec = timeout->tv_nsec = 0;
2339 break;
2340 }
2341
2342 /* Timeout, return less than vlen datagrams */
2343 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2344 break;
2345 }
2346
2347 /* Out of band data, return right away */
2348 if (msg_sys.msg_flags & MSG_OOB)
2349 break;
a78cb84c 2350 cond_resched();
a2e27255
ACM
2351 }
2352
a2e27255 2353 if (err == 0)
34b88a68
ACM
2354 goto out_put;
2355
2356 if (datagrams == 0) {
2357 datagrams = err;
2358 goto out_put;
2359 }
a2e27255 2360
34b88a68
ACM
2361 /*
2362 * We may return less entries than requested (vlen) if the
2363 * sock is non block and there aren't enough datagrams...
2364 */
2365 if (err != -EAGAIN) {
a2e27255 2366 /*
34b88a68
ACM
2367 * ... or if recvmsg returns an error after we
2368 * received some datagrams, where we record the
2369 * error to return on the next call or if the
2370 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2371 */
34b88a68 2372 sock->sk->sk_err = -err;
a2e27255 2373 }
34b88a68
ACM
2374out_put:
2375 fput_light(sock->file, fput_needed);
a2e27255 2376
34b88a68 2377 return datagrams;
a2e27255
ACM
2378}
2379
2380SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2381 unsigned int, vlen, unsigned int, flags,
2382 struct timespec __user *, timeout)
2383{
2384 int datagrams;
2385 struct timespec timeout_sys;
2386
1be374a0
AL
2387 if (flags & MSG_CMSG_COMPAT)
2388 return -EINVAL;
2389
a2e27255
ACM
2390 if (!timeout)
2391 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2392
2393 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2394 return -EFAULT;
2395
2396 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2397
2398 if (datagrams > 0 &&
2399 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2400 datagrams = -EFAULT;
2401
2402 return datagrams;
2403}
2404
2405#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2406/* Argument list sizes for sys_socketcall */
2407#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2408static const unsigned char nargs[21] = {
c6d409cf
ED
2409 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2410 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2411 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2412 AL(4), AL(5), AL(4)
89bddce5
SH
2413};
2414
1da177e4
LT
2415#undef AL
2416
2417/*
89bddce5 2418 * System call vectors.
1da177e4
LT
2419 *
2420 * Argument checking cleaned up. Saved 20% in size.
2421 * This function doesn't need to set the kernel lock because
89bddce5 2422 * it is set by the callees.
1da177e4
LT
2423 */
2424
3e0fa65f 2425SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2426{
2950fa9d 2427 unsigned long a[AUDITSC_ARGS];
89bddce5 2428 unsigned long a0, a1;
1da177e4 2429 int err;
47379052 2430 unsigned int len;
1da177e4 2431
228e548e 2432 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2433 return -EINVAL;
2434
47379052
AV
2435 len = nargs[call];
2436 if (len > sizeof(a))
2437 return -EINVAL;
2438
1da177e4 2439 /* copy_from_user should be SMP safe. */
47379052 2440 if (copy_from_user(a, args, len))
1da177e4 2441 return -EFAULT;
3ec3b2fb 2442
2950fa9d
CG
2443 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2444 if (err)
2445 return err;
3ec3b2fb 2446
89bddce5
SH
2447 a0 = a[0];
2448 a1 = a[1];
2449
2450 switch (call) {
2451 case SYS_SOCKET:
2452 err = sys_socket(a0, a1, a[2]);
2453 break;
2454 case SYS_BIND:
2455 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2456 break;
2457 case SYS_CONNECT:
2458 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2459 break;
2460 case SYS_LISTEN:
2461 err = sys_listen(a0, a1);
2462 break;
2463 case SYS_ACCEPT:
de11defe
UD
2464 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2465 (int __user *)a[2], 0);
89bddce5
SH
2466 break;
2467 case SYS_GETSOCKNAME:
2468 err =
2469 sys_getsockname(a0, (struct sockaddr __user *)a1,
2470 (int __user *)a[2]);
2471 break;
2472 case SYS_GETPEERNAME:
2473 err =
2474 sys_getpeername(a0, (struct sockaddr __user *)a1,
2475 (int __user *)a[2]);
2476 break;
2477 case SYS_SOCKETPAIR:
2478 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2479 break;
2480 case SYS_SEND:
2481 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2482 break;
2483 case SYS_SENDTO:
2484 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2485 (struct sockaddr __user *)a[4], a[5]);
2486 break;
2487 case SYS_RECV:
2488 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2489 break;
2490 case SYS_RECVFROM:
2491 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2492 (struct sockaddr __user *)a[4],
2493 (int __user *)a[5]);
2494 break;
2495 case SYS_SHUTDOWN:
2496 err = sys_shutdown(a0, a1);
2497 break;
2498 case SYS_SETSOCKOPT:
2499 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2500 break;
2501 case SYS_GETSOCKOPT:
2502 err =
2503 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2504 (int __user *)a[4]);
2505 break;
2506 case SYS_SENDMSG:
666547ff 2507 err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2508 break;
228e548e
AB
2509 case SYS_SENDMMSG:
2510 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2511 break;
89bddce5 2512 case SYS_RECVMSG:
666547ff 2513 err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2514 break;
a2e27255
ACM
2515 case SYS_RECVMMSG:
2516 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2517 (struct timespec __user *)a[4]);
2518 break;
de11defe
UD
2519 case SYS_ACCEPT4:
2520 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2521 (int __user *)a[2], a[3]);
aaca0bdc 2522 break;
89bddce5
SH
2523 default:
2524 err = -EINVAL;
2525 break;
1da177e4
LT
2526 }
2527 return err;
2528}
2529
89bddce5 2530#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2531
55737fda
SH
2532/**
2533 * sock_register - add a socket protocol handler
2534 * @ops: description of protocol
2535 *
1da177e4
LT
2536 * This function is called by a protocol handler that wants to
2537 * advertise its address family, and have it linked into the
e793c0f7 2538 * socket interface. The value ops->family corresponds to the
55737fda 2539 * socket system call protocol family.
1da177e4 2540 */
f0fd27d4 2541int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2542{
2543 int err;
2544
2545 if (ops->family >= NPROTO) {
3410f22e 2546 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2547 return -ENOBUFS;
2548 }
55737fda
SH
2549
2550 spin_lock(&net_family_lock);
190683a9
ED
2551 if (rcu_dereference_protected(net_families[ops->family],
2552 lockdep_is_held(&net_family_lock)))
55737fda
SH
2553 err = -EEXIST;
2554 else {
cf778b00 2555 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2556 err = 0;
2557 }
55737fda
SH
2558 spin_unlock(&net_family_lock);
2559
3410f22e 2560 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2561 return err;
2562}
c6d409cf 2563EXPORT_SYMBOL(sock_register);
1da177e4 2564
55737fda
SH
2565/**
2566 * sock_unregister - remove a protocol handler
2567 * @family: protocol family to remove
2568 *
1da177e4
LT
2569 * This function is called by a protocol handler that wants to
2570 * remove its address family, and have it unlinked from the
55737fda
SH
2571 * new socket creation.
2572 *
2573 * If protocol handler is a module, then it can use module reference
2574 * counts to protect against new references. If protocol handler is not
2575 * a module then it needs to provide its own protection in
2576 * the ops->create routine.
1da177e4 2577 */
f0fd27d4 2578void sock_unregister(int family)
1da177e4 2579{
f0fd27d4 2580 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2581
55737fda 2582 spin_lock(&net_family_lock);
a9b3cd7f 2583 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2584 spin_unlock(&net_family_lock);
2585
2586 synchronize_rcu();
2587
3410f22e 2588 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2589}
c6d409cf 2590EXPORT_SYMBOL(sock_unregister);
1da177e4 2591
77d76ea3 2592static int __init sock_init(void)
1da177e4 2593{
b3e19d92 2594 int err;
2ca794e5
EB
2595 /*
2596 * Initialize the network sysctl infrastructure.
2597 */
2598 err = net_sysctl_init();
2599 if (err)
2600 goto out;
b3e19d92 2601
1da177e4 2602 /*
89bddce5 2603 * Initialize skbuff SLAB cache
1da177e4
LT
2604 */
2605 skb_init();
1da177e4
LT
2606
2607 /*
89bddce5 2608 * Initialize the protocols module.
1da177e4
LT
2609 */
2610
2611 init_inodecache();
b3e19d92
NP
2612
2613 err = register_filesystem(&sock_fs_type);
2614 if (err)
2615 goto out_fs;
1da177e4 2616 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2617 if (IS_ERR(sock_mnt)) {
2618 err = PTR_ERR(sock_mnt);
2619 goto out_mount;
2620 }
77d76ea3
AK
2621
2622 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2623 */
2624
2625#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2626 err = netfilter_init();
2627 if (err)
2628 goto out;
1da177e4 2629#endif
cbeb321a 2630
408eccce 2631 ptp_classifier_init();
c1f19b51 2632
b3e19d92
NP
2633out:
2634 return err;
2635
2636out_mount:
2637 unregister_filesystem(&sock_fs_type);
2638out_fs:
2639 goto out;
1da177e4
LT
2640}
2641
77d76ea3
AK
2642core_initcall(sock_init); /* early initcall */
2643
1da177e4
LT
2644#ifdef CONFIG_PROC_FS
2645void socket_seq_show(struct seq_file *seq)
2646{
2647 int cpu;
2648 int counter = 0;
2649
6f912042 2650 for_each_possible_cpu(cpu)
89bddce5 2651 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2652
2653 /* It can be negative, by the way. 8) */
2654 if (counter < 0)
2655 counter = 0;
2656
2657 seq_printf(seq, "sockets: used %d\n", counter);
2658}
89bddce5 2659#endif /* CONFIG_PROC_FS */
1da177e4 2660
89bbfc95 2661#ifdef CONFIG_COMPAT
6b96018b 2662static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2663 unsigned int cmd, void __user *up)
7a229387 2664{
7a229387
AB
2665 mm_segment_t old_fs = get_fs();
2666 struct timeval ktv;
2667 int err;
2668
2669 set_fs(KERNEL_DS);
6b96018b 2670 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2671 set_fs(old_fs);
644595f8 2672 if (!err)
ed6fe9d6 2673 err = compat_put_timeval(&ktv, up);
644595f8 2674
7a229387
AB
2675 return err;
2676}
2677
6b96018b 2678static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2679 unsigned int cmd, void __user *up)
7a229387 2680{
7a229387
AB
2681 mm_segment_t old_fs = get_fs();
2682 struct timespec kts;
2683 int err;
2684
2685 set_fs(KERNEL_DS);
6b96018b 2686 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2687 set_fs(old_fs);
644595f8 2688 if (!err)
ed6fe9d6 2689 err = compat_put_timespec(&kts, up);
644595f8 2690
7a229387
AB
2691 return err;
2692}
2693
6b96018b 2694static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2695{
2696 struct ifreq __user *uifr;
2697 int err;
2698
2699 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2700 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2701 return -EFAULT;
2702
6b96018b 2703 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2704 if (err)
2705 return err;
2706
6b96018b 2707 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2708 return -EFAULT;
2709
2710 return 0;
2711}
2712
6b96018b 2713static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2714{
6b96018b 2715 struct compat_ifconf ifc32;
7a229387
AB
2716 struct ifconf ifc;
2717 struct ifconf __user *uifc;
6b96018b 2718 struct compat_ifreq __user *ifr32;
7a229387
AB
2719 struct ifreq __user *ifr;
2720 unsigned int i, j;
2721 int err;
2722
6b96018b 2723 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2724 return -EFAULT;
2725
43da5f2e 2726 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2727 if (ifc32.ifcbuf == 0) {
2728 ifc32.ifc_len = 0;
2729 ifc.ifc_len = 0;
2730 ifc.ifc_req = NULL;
2731 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2732 } else {
c6d409cf
ED
2733 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2734 sizeof(struct ifreq);
7a229387
AB
2735 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2736 ifc.ifc_len = len;
2737 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2738 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2739 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2740 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2741 return -EFAULT;
2742 ifr++;
2743 ifr32++;
2744 }
2745 }
2746 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2747 return -EFAULT;
2748
6b96018b 2749 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2750 if (err)
2751 return err;
2752
2753 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2754 return -EFAULT;
2755
2756 ifr = ifc.ifc_req;
2757 ifr32 = compat_ptr(ifc32.ifcbuf);
2758 for (i = 0, j = 0;
c6d409cf
ED
2759 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2760 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2761 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2762 return -EFAULT;
2763 ifr32++;
2764 ifr++;
2765 }
2766
2767 if (ifc32.ifcbuf == 0) {
2768 /* Translate from 64-bit structure multiple to
2769 * a 32-bit one.
2770 */
2771 i = ifc.ifc_len;
6b96018b 2772 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2773 ifc32.ifc_len = i;
2774 } else {
2775 ifc32.ifc_len = i;
2776 }
6b96018b 2777 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2778 return -EFAULT;
2779
2780 return 0;
2781}
2782
6b96018b 2783static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2784{
3a7da39d
BH
2785 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2786 bool convert_in = false, convert_out = false;
2787 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2788 struct ethtool_rxnfc __user *rxnfc;
7a229387 2789 struct ifreq __user *ifr;
3a7da39d
BH
2790 u32 rule_cnt = 0, actual_rule_cnt;
2791 u32 ethcmd;
7a229387 2792 u32 data;
3a7da39d 2793 int ret;
7a229387 2794
3a7da39d
BH
2795 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2796 return -EFAULT;
7a229387 2797
3a7da39d
BH
2798 compat_rxnfc = compat_ptr(data);
2799
2800 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2801 return -EFAULT;
2802
3a7da39d
BH
2803 /* Most ethtool structures are defined without padding.
2804 * Unfortunately struct ethtool_rxnfc is an exception.
2805 */
2806 switch (ethcmd) {
2807 default:
2808 break;
2809 case ETHTOOL_GRXCLSRLALL:
2810 /* Buffer size is variable */
2811 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2812 return -EFAULT;
2813 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2814 return -ENOMEM;
2815 buf_size += rule_cnt * sizeof(u32);
2816 /* fall through */
2817 case ETHTOOL_GRXRINGS:
2818 case ETHTOOL_GRXCLSRLCNT:
2819 case ETHTOOL_GRXCLSRULE:
55664f32 2820 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2821 convert_out = true;
2822 /* fall through */
2823 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2824 buf_size += sizeof(struct ethtool_rxnfc);
2825 convert_in = true;
2826 break;
2827 }
2828
2829 ifr = compat_alloc_user_space(buf_size);
954b1244 2830 rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
3a7da39d
BH
2831
2832 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2833 return -EFAULT;
2834
3a7da39d
BH
2835 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2836 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2837 return -EFAULT;
2838
3a7da39d 2839 if (convert_in) {
127fe533 2840 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2841 * fs.ring_cookie and at the end of fs, but nowhere else.
2842 */
127fe533
AD
2843 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2844 sizeof(compat_rxnfc->fs.m_ext) !=
2845 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2846 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2847 BUILD_BUG_ON(
2848 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2849 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2850 offsetof(struct ethtool_rxnfc, fs.location) -
2851 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2852
2853 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2854 (void __user *)(&rxnfc->fs.m_ext + 1) -
2855 (void __user *)rxnfc) ||
3a7da39d
BH
2856 copy_in_user(&rxnfc->fs.ring_cookie,
2857 &compat_rxnfc->fs.ring_cookie,
954b1244
SH
2858 (void __user *)(&rxnfc->fs.location + 1) -
2859 (void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2860 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2861 sizeof(rxnfc->rule_cnt)))
2862 return -EFAULT;
2863 }
2864
2865 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2866 if (ret)
2867 return ret;
2868
2869 if (convert_out) {
2870 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2871 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2872 (const void __user *)rxnfc) ||
3a7da39d
BH
2873 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2874 &rxnfc->fs.ring_cookie,
954b1244
SH
2875 (const void __user *)(&rxnfc->fs.location + 1) -
2876 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2877 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2878 sizeof(rxnfc->rule_cnt)))
2879 return -EFAULT;
2880
2881 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2882 /* As an optimisation, we only copy the actual
2883 * number of rules that the underlying
2884 * function returned. Since Mallory might
2885 * change the rule count in user memory, we
2886 * check that it is less than the rule count
2887 * originally given (as the user buffer size),
2888 * which has been range-checked.
2889 */
2890 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2891 return -EFAULT;
2892 if (actual_rule_cnt < rule_cnt)
2893 rule_cnt = actual_rule_cnt;
2894 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2895 &rxnfc->rule_locs[0],
2896 rule_cnt * sizeof(u32)))
2897 return -EFAULT;
2898 }
2899 }
2900
2901 return 0;
7a229387
AB
2902}
2903
7a50a240
AB
2904static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2905{
2906 void __user *uptr;
2907 compat_uptr_t uptr32;
2908 struct ifreq __user *uifr;
2909
c6d409cf 2910 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2911 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2912 return -EFAULT;
2913
2914 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2915 return -EFAULT;
2916
2917 uptr = compat_ptr(uptr32);
2918
2919 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2920 return -EFAULT;
2921
2922 return dev_ioctl(net, SIOCWANDEV, uifr);
2923}
2924
6b96018b
AB
2925static int bond_ioctl(struct net *net, unsigned int cmd,
2926 struct compat_ifreq __user *ifr32)
7a229387
AB
2927{
2928 struct ifreq kifr;
7a229387
AB
2929 mm_segment_t old_fs;
2930 int err;
7a229387
AB
2931
2932 switch (cmd) {
2933 case SIOCBONDENSLAVE:
2934 case SIOCBONDRELEASE:
2935 case SIOCBONDSETHWADDR:
2936 case SIOCBONDCHANGEACTIVE:
6b96018b 2937 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2938 return -EFAULT;
2939
2940 old_fs = get_fs();
c6d409cf 2941 set_fs(KERNEL_DS);
c3f52ae6 2942 err = dev_ioctl(net, cmd,
2943 (struct ifreq __user __force *) &kifr);
c6d409cf 2944 set_fs(old_fs);
7a229387
AB
2945
2946 return err;
7a229387 2947 default:
07d106d0 2948 return -ENOIOCTLCMD;
ccbd6a5a 2949 }
7a229387
AB
2950}
2951
590d4693
BH
2952/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
2953static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 2954 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2955{
2956 struct ifreq __user *u_ifreq64;
7a229387
AB
2957 char tmp_buf[IFNAMSIZ];
2958 void __user *data64;
2959 u32 data32;
2960
2961 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2962 IFNAMSIZ))
2963 return -EFAULT;
417c3522 2964 if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
7a229387
AB
2965 return -EFAULT;
2966 data64 = compat_ptr(data32);
2967
2968 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2969
7a229387
AB
2970 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2971 IFNAMSIZ))
2972 return -EFAULT;
417c3522 2973 if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
7a229387
AB
2974 return -EFAULT;
2975
6b96018b 2976 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2977}
2978
6b96018b
AB
2979static int dev_ifsioc(struct net *net, struct socket *sock,
2980 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 2981{
a2116ed2 2982 struct ifreq __user *uifr;
7a229387
AB
2983 int err;
2984
a2116ed2
AB
2985 uifr = compat_alloc_user_space(sizeof(*uifr));
2986 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2987 return -EFAULT;
2988
2989 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2990
7a229387
AB
2991 if (!err) {
2992 switch (cmd) {
2993 case SIOCGIFFLAGS:
2994 case SIOCGIFMETRIC:
2995 case SIOCGIFMTU:
2996 case SIOCGIFMEM:
2997 case SIOCGIFHWADDR:
2998 case SIOCGIFINDEX:
2999 case SIOCGIFADDR:
3000 case SIOCGIFBRDADDR:
3001 case SIOCGIFDSTADDR:
3002 case SIOCGIFNETMASK:
fab2532b 3003 case SIOCGIFPFLAGS:
7a229387 3004 case SIOCGIFTXQLEN:
fab2532b
AB
3005 case SIOCGMIIPHY:
3006 case SIOCGMIIREG:
a2116ed2 3007 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
3008 err = -EFAULT;
3009 break;
3010 }
3011 }
3012 return err;
3013}
3014
a2116ed2
AB
3015static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3016 struct compat_ifreq __user *uifr32)
3017{
3018 struct ifreq ifr;
3019 struct compat_ifmap __user *uifmap32;
3020 mm_segment_t old_fs;
3021 int err;
3022
3023 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3024 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3025 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3026 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3027 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3028 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3029 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3030 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3031 if (err)
3032 return -EFAULT;
3033
3034 old_fs = get_fs();
c6d409cf 3035 set_fs(KERNEL_DS);
c3f52ae6 3036 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 3037 set_fs(old_fs);
a2116ed2
AB
3038
3039 if (cmd == SIOCGIFMAP && !err) {
3040 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3041 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3042 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3043 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3044 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3045 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3046 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3047 if (err)
3048 err = -EFAULT;
3049 }
3050 return err;
3051}
3052
7a229387 3053struct rtentry32 {
c6d409cf 3054 u32 rt_pad1;
7a229387
AB
3055 struct sockaddr rt_dst; /* target address */
3056 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3057 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3058 unsigned short rt_flags;
3059 short rt_pad2;
3060 u32 rt_pad3;
3061 unsigned char rt_tos;
3062 unsigned char rt_class;
3063 short rt_pad4;
3064 short rt_metric; /* +1 for binary compatibility! */
7a229387 3065 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3066 u32 rt_mtu; /* per route MTU/Window */
3067 u32 rt_window; /* Window clamping */
7a229387
AB
3068 unsigned short rt_irtt; /* Initial RTT */
3069};
3070
3071struct in6_rtmsg32 {
3072 struct in6_addr rtmsg_dst;
3073 struct in6_addr rtmsg_src;
3074 struct in6_addr rtmsg_gateway;
3075 u32 rtmsg_type;
3076 u16 rtmsg_dst_len;
3077 u16 rtmsg_src_len;
3078 u32 rtmsg_metric;
3079 u32 rtmsg_info;
3080 u32 rtmsg_flags;
3081 s32 rtmsg_ifindex;
3082};
3083
6b96018b
AB
3084static int routing_ioctl(struct net *net, struct socket *sock,
3085 unsigned int cmd, void __user *argp)
7a229387
AB
3086{
3087 int ret;
3088 void *r = NULL;
3089 struct in6_rtmsg r6;
3090 struct rtentry r4;
3091 char devname[16];
3092 u32 rtdev;
3093 mm_segment_t old_fs = get_fs();
3094
6b96018b
AB
3095 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3096 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3097 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3098 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3099 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3100 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3101 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3102 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3103 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3104 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3105 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3106
3107 r = (void *) &r6;
3108 } else { /* ipv4 */
6b96018b 3109 struct rtentry32 __user *ur4 = argp;
c6d409cf 3110 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3111 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3112 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3113 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3114 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3115 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3116 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3117 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3118 if (rtdev) {
c6d409cf 3119 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3120 r4.rt_dev = (char __user __force *)devname;
3121 devname[15] = 0;
7a229387
AB
3122 } else
3123 r4.rt_dev = NULL;
3124
3125 r = (void *) &r4;
3126 }
3127
3128 if (ret) {
3129 ret = -EFAULT;
3130 goto out;
3131 }
3132
c6d409cf 3133 set_fs(KERNEL_DS);
6b96018b 3134 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3135 set_fs(old_fs);
7a229387
AB
3136
3137out:
7a229387
AB
3138 return ret;
3139}
3140
3141/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3142 * for some operations; this forces use of the newer bridge-utils that
25985edc 3143 * use compatible ioctls
7a229387 3144 */
6b96018b 3145static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3146{
6b96018b 3147 compat_ulong_t tmp;
7a229387 3148
6b96018b 3149 if (get_user(tmp, argp))
7a229387
AB
3150 return -EFAULT;
3151 if (tmp == BRCTL_GET_VERSION)
3152 return BRCTL_VERSION + 1;
3153 return -EINVAL;
3154}
3155
6b96018b
AB
3156static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3157 unsigned int cmd, unsigned long arg)
3158{
3159 void __user *argp = compat_ptr(arg);
3160 struct sock *sk = sock->sk;
3161 struct net *net = sock_net(sk);
7a229387 3162
6b96018b 3163 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3164 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3165
3166 switch (cmd) {
3167 case SIOCSIFBR:
3168 case SIOCGIFBR:
3169 return old_bridge_ioctl(argp);
3170 case SIOCGIFNAME:
3171 return dev_ifname32(net, argp);
3172 case SIOCGIFCONF:
3173 return dev_ifconf(net, argp);
3174 case SIOCETHTOOL:
3175 return ethtool_ioctl(net, argp);
7a50a240
AB
3176 case SIOCWANDEV:
3177 return compat_siocwandev(net, argp);
a2116ed2
AB
3178 case SIOCGIFMAP:
3179 case SIOCSIFMAP:
3180 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3181 case SIOCBONDENSLAVE:
3182 case SIOCBONDRELEASE:
3183 case SIOCBONDSETHWADDR:
6b96018b
AB
3184 case SIOCBONDCHANGEACTIVE:
3185 return bond_ioctl(net, cmd, argp);
3186 case SIOCADDRT:
3187 case SIOCDELRT:
3188 return routing_ioctl(net, sock, cmd, argp);
3189 case SIOCGSTAMP:
3190 return do_siocgstamp(net, sock, cmd, argp);
3191 case SIOCGSTAMPNS:
3192 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3193 case SIOCBONDSLAVEINFOQUERY:
3194 case SIOCBONDINFOQUERY:
a2116ed2 3195 case SIOCSHWTSTAMP:
fd468c74 3196 case SIOCGHWTSTAMP:
590d4693 3197 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3198
3199 case FIOSETOWN:
3200 case SIOCSPGRP:
3201 case FIOGETOWN:
3202 case SIOCGPGRP:
3203 case SIOCBRADDBR:
3204 case SIOCBRDELBR:
3205 case SIOCGIFVLAN:
3206 case SIOCSIFVLAN:
3207 case SIOCADDDLCI:
3208 case SIOCDELDLCI:
c62cce2c 3209 case SIOCGSKNS:
6b96018b
AB
3210 return sock_ioctl(file, cmd, arg);
3211
3212 case SIOCGIFFLAGS:
3213 case SIOCSIFFLAGS:
3214 case SIOCGIFMETRIC:
3215 case SIOCSIFMETRIC:
3216 case SIOCGIFMTU:
3217 case SIOCSIFMTU:
3218 case SIOCGIFMEM:
3219 case SIOCSIFMEM:
3220 case SIOCGIFHWADDR:
3221 case SIOCSIFHWADDR:
3222 case SIOCADDMULTI:
3223 case SIOCDELMULTI:
3224 case SIOCGIFINDEX:
6b96018b
AB
3225 case SIOCGIFADDR:
3226 case SIOCSIFADDR:
3227 case SIOCSIFHWBROADCAST:
6b96018b 3228 case SIOCDIFADDR:
6b96018b
AB
3229 case SIOCGIFBRDADDR:
3230 case SIOCSIFBRDADDR:
3231 case SIOCGIFDSTADDR:
3232 case SIOCSIFDSTADDR:
3233 case SIOCGIFNETMASK:
3234 case SIOCSIFNETMASK:
3235 case SIOCSIFPFLAGS:
3236 case SIOCGIFPFLAGS:
3237 case SIOCGIFTXQLEN:
3238 case SIOCSIFTXQLEN:
3239 case SIOCBRADDIF:
3240 case SIOCBRDELIF:
9177efd3
AB
3241 case SIOCSIFNAME:
3242 case SIOCGMIIPHY:
3243 case SIOCGMIIREG:
3244 case SIOCSMIIREG:
6b96018b 3245 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3246
6b96018b
AB
3247 case SIOCSARP:
3248 case SIOCGARP:
3249 case SIOCDARP:
6b96018b 3250 case SIOCATMARK:
9177efd3
AB
3251 return sock_do_ioctl(net, sock, cmd, arg);
3252 }
3253
6b96018b
AB
3254 return -ENOIOCTLCMD;
3255}
7a229387 3256
95c96174 3257static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3258 unsigned long arg)
89bbfc95
SP
3259{
3260 struct socket *sock = file->private_data;
3261 int ret = -ENOIOCTLCMD;
87de87d5
DM
3262 struct sock *sk;
3263 struct net *net;
3264
3265 sk = sock->sk;
3266 net = sock_net(sk);
89bbfc95
SP
3267
3268 if (sock->ops->compat_ioctl)
3269 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3270
87de87d5
DM
3271 if (ret == -ENOIOCTLCMD &&
3272 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3273 ret = compat_wext_handle_ioctl(net, cmd, arg);
3274
6b96018b
AB
3275 if (ret == -ENOIOCTLCMD)
3276 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3277
89bbfc95
SP
3278 return ret;
3279}
3280#endif
3281
ac5a488e
SS
3282int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3283{
3284 return sock->ops->bind(sock, addr, addrlen);
3285}
c6d409cf 3286EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3287
3288int kernel_listen(struct socket *sock, int backlog)
3289{
3290 return sock->ops->listen(sock, backlog);
3291}
c6d409cf 3292EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3293
3294int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3295{
3296 struct sock *sk = sock->sk;
3297 int err;
3298
3299 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3300 newsock);
3301 if (err < 0)
3302 goto done;
3303
cdfbabfb 3304 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3305 if (err < 0) {
3306 sock_release(*newsock);
fa8705b0 3307 *newsock = NULL;
ac5a488e
SS
3308 goto done;
3309 }
3310
3311 (*newsock)->ops = sock->ops;
1b08534e 3312 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3313
3314done:
3315 return err;
3316}
c6d409cf 3317EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3318
3319int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3320 int flags)
ac5a488e
SS
3321{
3322 return sock->ops->connect(sock, addr, addrlen, flags);
3323}
c6d409cf 3324EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3325
3326int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3327 int *addrlen)
3328{
3329 return sock->ops->getname(sock, addr, addrlen, 0);
3330}
c6d409cf 3331EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3332
3333int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3334 int *addrlen)
3335{
3336 return sock->ops->getname(sock, addr, addrlen, 1);
3337}
c6d409cf 3338EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3339
3340int kernel_getsockopt(struct socket *sock, int level, int optname,
3341 char *optval, int *optlen)
3342{
3343 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3344 char __user *uoptval;
3345 int __user *uoptlen;
ac5a488e
SS
3346 int err;
3347
fb8621bb
NK
3348 uoptval = (char __user __force *) optval;
3349 uoptlen = (int __user __force *) optlen;
3350
ac5a488e
SS
3351 set_fs(KERNEL_DS);
3352 if (level == SOL_SOCKET)
fb8621bb 3353 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3354 else
fb8621bb
NK
3355 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3356 uoptlen);
ac5a488e
SS
3357 set_fs(oldfs);
3358 return err;
3359}
c6d409cf 3360EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3361
3362int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3363 char *optval, unsigned int optlen)
ac5a488e
SS
3364{
3365 mm_segment_t oldfs = get_fs();
fb8621bb 3366 char __user *uoptval;
ac5a488e
SS
3367 int err;
3368
fb8621bb
NK
3369 uoptval = (char __user __force *) optval;
3370
ac5a488e
SS
3371 set_fs(KERNEL_DS);
3372 if (level == SOL_SOCKET)
fb8621bb 3373 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3374 else
fb8621bb 3375 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3376 optlen);
3377 set_fs(oldfs);
3378 return err;
3379}
c6d409cf 3380EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3381
3382int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3383 size_t size, int flags)
3384{
3385 if (sock->ops->sendpage)
3386 return sock->ops->sendpage(sock, page, offset, size, flags);
3387
3388 return sock_no_sendpage(sock, page, offset, size, flags);
3389}
c6d409cf 3390EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3391
306b13eb
TH
3392int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3393 size_t size, int flags)
3394{
3395 struct socket *sock = sk->sk_socket;
3396
3397 if (sock->ops->sendpage_locked)
3398 return sock->ops->sendpage_locked(sk, page, offset, size,
3399 flags);
3400
3401 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3402}
3403EXPORT_SYMBOL(kernel_sendpage_locked);
3404
ac5a488e
SS
3405int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3406{
3407 mm_segment_t oldfs = get_fs();
3408 int err;
3409
3410 set_fs(KERNEL_DS);
3411 err = sock->ops->ioctl(sock, cmd, arg);
3412 set_fs(oldfs);
3413
3414 return err;
3415}
c6d409cf 3416EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3417
91cf45f0
TM
3418int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3419{
3420 return sock->ops->shutdown(sock, how);
3421}
91cf45f0 3422EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075
P
3423
3424/* This routine returns the IP overhead imposed by a socket i.e.
3425 * the length of the underlying IP header, depending on whether
3426 * this is an IPv4 or IPv6 socket and the length from IP options turned
57240d00 3427 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075
P
3428 */
3429u32 kernel_sock_ip_overhead(struct sock *sk)
3430{
3431 struct inet_sock *inet;
3432 struct ip_options_rcu *opt;
3433 u32 overhead = 0;
113c3075
P
3434#if IS_ENABLED(CONFIG_IPV6)
3435 struct ipv6_pinfo *np;
3436 struct ipv6_txoptions *optv6 = NULL;
3437#endif /* IS_ENABLED(CONFIG_IPV6) */
3438
3439 if (!sk)
3440 return overhead;
3441
113c3075
P
3442 switch (sk->sk_family) {
3443 case AF_INET:
3444 inet = inet_sk(sk);
3445 overhead += sizeof(struct iphdr);
3446 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3447 sock_owned_by_user(sk));
113c3075
P
3448 if (opt)
3449 overhead += opt->opt.optlen;
3450 return overhead;
3451#if IS_ENABLED(CONFIG_IPV6)
3452 case AF_INET6:
3453 np = inet6_sk(sk);
3454 overhead += sizeof(struct ipv6hdr);
3455 if (np)
3456 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3457 sock_owned_by_user(sk));
113c3075
P
3458 if (optv6)
3459 overhead += (optv6->opt_flen + optv6->opt_nflen);
3460 return overhead;
3461#endif /* IS_ENABLED(CONFIG_IPV6) */
3462 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3463 return overhead;
3464 }
3465}
3466EXPORT_SYMBOL(kernel_sock_ip_overhead);