Merge tag 'hyperv-next-signed-20201214' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-block.git] / net / socket.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * NET An implementation of the SOCKET network access protocol.
4 *
5 * Version: @(#)socket.c 1.1.93 18/02/95
6 *
7 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 8 * Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 *
11 * Fixes:
12 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
13 * shutdown()
14 * Alan Cox : verify_area() fixes
15 * Alan Cox : Removed DDI
16 * Jonathan Kamens : SOCK_DGRAM reconnect bug
17 * Alan Cox : Moved a load of checks to the very
18 * top level.
19 * Alan Cox : Move address structures to/from user
20 * mode above the protocol layers.
21 * Rob Janssen : Allow 0 length sends.
22 * Alan Cox : Asynchronous I/O support (cribbed from the
23 * tty drivers).
24 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
25 * Jeff Uphoff : Made max number of sockets command-line
26 * configurable.
27 * Matti Aarnio : Made the number of sockets dynamic,
28 * to be allocated when needed, and mr.
29 * Uphoff's max is used as max to be
30 * allowed to allocate.
31 * Linus : Argh. removed all the socket allocation
32 * altogether: it's in the inode now.
33 * Alan Cox : Made sock_alloc()/sock_release() public
34 * for NetROM and future kernel nfsd type
35 * stuff.
36 * Alan Cox : sendmsg/recvmsg basics.
37 * Tom Dyas : Export net symbols.
38 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
39 * Alan Cox : Added thread locking to sys_* calls
40 * for sockets. May have errors at the
41 * moment.
42 * Kevin Buhr : Fixed the dumb errors in the above.
43 * Andi Kleen : Some small cleanups, optimizations,
44 * and fixed a copy_from_user() bug.
45 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 46 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
47 * protocol-independent
48 *
1da177e4 49 * This module is effectively the top level interface to the BSD socket
89bddce5 50 * paradigm.
1da177e4
LT
51 *
52 * Based upon Swansea University Computer Society NET3.039
53 */
54
cc69837f 55#include <linux/ethtool.h>
1da177e4 56#include <linux/mm.h>
1da177e4
LT
57#include <linux/socket.h>
58#include <linux/file.h>
59#include <linux/net.h>
60#include <linux/interrupt.h>
aaca0bdc 61#include <linux/thread_info.h>
55737fda 62#include <linux/rcupdate.h>
1da177e4
LT
63#include <linux/netdevice.h>
64#include <linux/proc_fs.h>
65#include <linux/seq_file.h>
4a3e2f71 66#include <linux/mutex.h>
1da177e4 67#include <linux/if_bridge.h>
20380731 68#include <linux/if_vlan.h>
408eccce 69#include <linux/ptp_classify.h>
1da177e4
LT
70#include <linux/init.h>
71#include <linux/poll.h>
72#include <linux/cache.h>
73#include <linux/module.h>
74#include <linux/highmem.h>
1da177e4 75#include <linux/mount.h>
fba9be49 76#include <linux/pseudo_fs.h>
1da177e4
LT
77#include <linux/security.h>
78#include <linux/syscalls.h>
79#include <linux/compat.h>
80#include <linux/kmod.h>
3ec3b2fb 81#include <linux/audit.h>
d86b5e0e 82#include <linux/wireless.h>
1b8d7ae4 83#include <linux/nsproxy.h>
1fd7317d 84#include <linux/magic.h>
5a0e3ad6 85#include <linux/slab.h>
600e1779 86#include <linux/xattr.h>
c8e8cd57 87#include <linux/nospec.h>
8c3c447b 88#include <linux/indirect_call_wrapper.h>
1da177e4 89
7c0f6ba6 90#include <linux/uaccess.h>
1da177e4
LT
91#include <asm/unistd.h>
92
93#include <net/compat.h>
87de87d5 94#include <net/wext.h>
f8451725 95#include <net/cls_cgroup.h>
1da177e4
LT
96
97#include <net/sock.h>
98#include <linux/netfilter.h>
99
6b96018b
AB
100#include <linux/if_tun.h>
101#include <linux/ipv6_route.h>
102#include <linux/route.h>
c7dc504e 103#include <linux/termios.h>
6b96018b 104#include <linux/sockios.h>
076bb0c8 105#include <net/busy_poll.h>
f24b9be5 106#include <linux/errqueue.h>
06021292 107
e0d1095a 108#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
109unsigned int sysctl_net_busy_read __read_mostly;
110unsigned int sysctl_net_busy_poll __read_mostly;
06021292 111#endif
6b96018b 112
8ae5e030
AV
113static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
114static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 115static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
116
117static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
118static __poll_t sock_poll(struct file *file,
119 struct poll_table_struct *wait);
89bddce5 120static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
121#ifdef CONFIG_COMPAT
122static long compat_sock_ioctl(struct file *file,
89bddce5 123 unsigned int cmd, unsigned long arg);
89bbfc95 124#endif
1da177e4 125static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
126static ssize_t sock_sendpage(struct file *file, struct page *page,
127 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 128static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 129 struct pipe_inode_info *pipe, size_t len,
9c55e01c 130 unsigned int flags);
542d3065
AB
131
132#ifdef CONFIG_PROC_FS
133static void sock_show_fdinfo(struct seq_file *m, struct file *f)
134{
135 struct socket *sock = f->private_data;
136
137 if (sock->ops->show_fdinfo)
138 sock->ops->show_fdinfo(m, sock);
139}
140#else
141#define sock_show_fdinfo NULL
142#endif
1da177e4 143
1da177e4
LT
144/*
145 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
146 * in the operation structures but are done directly via the socketcall() multiplexor.
147 */
148
da7071d7 149static const struct file_operations socket_file_ops = {
1da177e4
LT
150 .owner = THIS_MODULE,
151 .llseek = no_llseek,
8ae5e030
AV
152 .read_iter = sock_read_iter,
153 .write_iter = sock_write_iter,
1da177e4
LT
154 .poll = sock_poll,
155 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
156#ifdef CONFIG_COMPAT
157 .compat_ioctl = compat_sock_ioctl,
158#endif
1da177e4 159 .mmap = sock_mmap,
1da177e4
LT
160 .release = sock_close,
161 .fasync = sock_fasync,
5274f052
JA
162 .sendpage = sock_sendpage,
163 .splice_write = generic_splice_sendpage,
9c55e01c 164 .splice_read = sock_splice_read,
b4653342 165 .show_fdinfo = sock_show_fdinfo,
1da177e4
LT
166};
167
168/*
169 * The protocol list. Each protocol is registered in here.
170 */
171
1da177e4 172static DEFINE_SPINLOCK(net_family_lock);
190683a9 173static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 174
1da177e4 175/*
89bddce5
SH
176 * Support routines.
177 * Move socket addresses back and forth across the kernel/user
178 * divide and look after the messy bits.
1da177e4
LT
179 */
180
1da177e4
LT
181/**
182 * move_addr_to_kernel - copy a socket address into kernel space
183 * @uaddr: Address in user space
184 * @kaddr: Address in kernel space
185 * @ulen: Length in user space
186 *
187 * The address is copied into kernel space. If the provided address is
188 * too long an error code of -EINVAL is returned. If the copy gives
189 * invalid addresses -EFAULT is returned. On a success 0 is returned.
190 */
191
43db362d 192int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 193{
230b1839 194 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 195 return -EINVAL;
89bddce5 196 if (ulen == 0)
1da177e4 197 return 0;
89bddce5 198 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 199 return -EFAULT;
3ec3b2fb 200 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
201}
202
203/**
204 * move_addr_to_user - copy an address to user space
205 * @kaddr: kernel space address
206 * @klen: length of address in kernel
207 * @uaddr: user space address
208 * @ulen: pointer to user length field
209 *
210 * The value pointed to by ulen on entry is the buffer length available.
211 * This is overwritten with the buffer space used. -EINVAL is returned
212 * if an overlong buffer is specified or a negative buffer size. -EFAULT
213 * is returned if either the buffer or the length field are not
214 * accessible.
215 * After copying the data up to the limit the user specifies, the true
216 * length of the data is written over the length limit the user
217 * specified. Zero is returned for a success.
218 */
89bddce5 219
43db362d 220static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 221 void __user *uaddr, int __user *ulen)
1da177e4
LT
222{
223 int err;
224 int len;
225
68c6beb3 226 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
227 err = get_user(len, ulen);
228 if (err)
1da177e4 229 return err;
89bddce5
SH
230 if (len > klen)
231 len = klen;
68c6beb3 232 if (len < 0)
1da177e4 233 return -EINVAL;
89bddce5 234 if (len) {
d6fe3945
SG
235 if (audit_sockaddr(klen, kaddr))
236 return -ENOMEM;
89bddce5 237 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
238 return -EFAULT;
239 }
240 /*
89bddce5
SH
241 * "fromlen shall refer to the value before truncation.."
242 * 1003.1g
1da177e4
LT
243 */
244 return __put_user(klen, ulen);
245}
246
08009a76 247static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
248
249static struct inode *sock_alloc_inode(struct super_block *sb)
250{
251 struct socket_alloc *ei;
89bddce5 252
e94b1766 253 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
254 if (!ei)
255 return NULL;
333f7909
AV
256 init_waitqueue_head(&ei->socket.wq.wait);
257 ei->socket.wq.fasync_list = NULL;
258 ei->socket.wq.flags = 0;
89bddce5 259
1da177e4
LT
260 ei->socket.state = SS_UNCONNECTED;
261 ei->socket.flags = 0;
262 ei->socket.ops = NULL;
263 ei->socket.sk = NULL;
264 ei->socket.file = NULL;
1da177e4
LT
265
266 return &ei->vfs_inode;
267}
268
6d7855c5 269static void sock_free_inode(struct inode *inode)
1da177e4 270{
43815482
ED
271 struct socket_alloc *ei;
272
273 ei = container_of(inode, struct socket_alloc, vfs_inode);
43815482 274 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
275}
276
51cc5068 277static void init_once(void *foo)
1da177e4 278{
89bddce5 279 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 280
a35afb83 281 inode_init_once(&ei->vfs_inode);
1da177e4 282}
89bddce5 283
1e911632 284static void init_inodecache(void)
1da177e4
LT
285{
286 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
287 sizeof(struct socket_alloc),
288 0,
289 (SLAB_HWCACHE_ALIGN |
290 SLAB_RECLAIM_ACCOUNT |
5d097056 291 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 292 init_once);
1e911632 293 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
294}
295
b87221de 296static const struct super_operations sockfs_ops = {
c6d409cf 297 .alloc_inode = sock_alloc_inode,
6d7855c5 298 .free_inode = sock_free_inode,
c6d409cf 299 .statfs = simple_statfs,
1da177e4
LT
300};
301
c23fbb6b
ED
302/*
303 * sockfs_dname() is called from d_path().
304 */
305static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
306{
307 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 308 d_inode(dentry)->i_ino);
c23fbb6b
ED
309}
310
3ba13d17 311static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 312 .d_dname = sockfs_dname,
1da177e4
LT
313};
314
bba0bd31
AG
315static int sockfs_xattr_get(const struct xattr_handler *handler,
316 struct dentry *dentry, struct inode *inode,
317 const char *suffix, void *value, size_t size)
318{
319 if (value) {
320 if (dentry->d_name.len + 1 > size)
321 return -ERANGE;
322 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
323 }
324 return dentry->d_name.len + 1;
325}
326
327#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
328#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
329#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
330
331static const struct xattr_handler sockfs_xattr_handler = {
332 .name = XATTR_NAME_SOCKPROTONAME,
333 .get = sockfs_xattr_get,
334};
335
4a590153
AG
336static int sockfs_security_xattr_set(const struct xattr_handler *handler,
337 struct dentry *dentry, struct inode *inode,
338 const char *suffix, const void *value,
339 size_t size, int flags)
340{
341 /* Handled by LSM. */
342 return -EAGAIN;
343}
344
345static const struct xattr_handler sockfs_security_xattr_handler = {
346 .prefix = XATTR_SECURITY_PREFIX,
347 .set = sockfs_security_xattr_set,
348};
349
bba0bd31
AG
350static const struct xattr_handler *sockfs_xattr_handlers[] = {
351 &sockfs_xattr_handler,
4a590153 352 &sockfs_security_xattr_handler,
bba0bd31
AG
353 NULL
354};
355
fba9be49 356static int sockfs_init_fs_context(struct fs_context *fc)
c74a1cbb 357{
fba9be49
DH
358 struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
359 if (!ctx)
360 return -ENOMEM;
361 ctx->ops = &sockfs_ops;
362 ctx->dops = &sockfs_dentry_operations;
363 ctx->xattr = sockfs_xattr_handlers;
364 return 0;
c74a1cbb
AV
365}
366
367static struct vfsmount *sock_mnt __read_mostly;
368
369static struct file_system_type sock_fs_type = {
370 .name = "sockfs",
fba9be49 371 .init_fs_context = sockfs_init_fs_context,
c74a1cbb
AV
372 .kill_sb = kill_anon_super,
373};
374
1da177e4
LT
375/*
376 * Obtains the first available file descriptor and sets it up for use.
377 *
39d8c1b6
DM
378 * These functions create file structures and maps them to fd space
379 * of the current process. On success it returns file descriptor
1da177e4
LT
380 * and file struct implicitly stored in sock->file.
381 * Note that another thread may close file descriptor before we return
382 * from this function. We use the fact that now we do not refer
383 * to socket after mapping. If one day we will need it, this
384 * function will increment ref. count on file by 1.
385 *
386 * In any case returned fd MAY BE not valid!
387 * This race condition is unavoidable
388 * with shared fd spaces, we cannot solve it inside kernel,
389 * but we take care of internal coherence yet.
390 */
391
8a3c245c
PT
392/**
393 * sock_alloc_file - Bind a &socket to a &file
394 * @sock: socket
395 * @flags: file status flags
396 * @dname: protocol name
397 *
398 * Returns the &file bound with @sock, implicitly storing it
399 * in sock->file. If dname is %NULL, sets to "".
400 * On failure the return is a ERR pointer (see linux/err.h).
401 * This function uses GFP_KERNEL internally.
402 */
403
aab174f0 404struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 405{
7cbe66b6 406 struct file *file;
1da177e4 407
d93aa9d8
AV
408 if (!dname)
409 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 410
d93aa9d8
AV
411 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
412 O_RDWR | (flags & O_NONBLOCK),
413 &socket_file_ops);
b5ffe634 414 if (IS_ERR(file)) {
8e1611e2 415 sock_release(sock);
39b65252 416 return file;
cc3808f8
AV
417 }
418
419 sock->file = file;
39d8c1b6 420 file->private_data = sock;
d8e464ec 421 stream_open(SOCK_INODE(sock), file);
28407630 422 return file;
39d8c1b6 423}
56b31d1c 424EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 425
56b31d1c 426static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
427{
428 struct file *newfile;
28407630 429 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
430 if (unlikely(fd < 0)) {
431 sock_release(sock);
28407630 432 return fd;
ce4bb04c 433 }
39d8c1b6 434
aab174f0 435 newfile = sock_alloc_file(sock, flags, NULL);
4546e44c 436 if (!IS_ERR(newfile)) {
39d8c1b6 437 fd_install(fd, newfile);
28407630
AV
438 return fd;
439 }
7cbe66b6 440
28407630
AV
441 put_unused_fd(fd);
442 return PTR_ERR(newfile);
1da177e4
LT
443}
444
8a3c245c
PT
445/**
446 * sock_from_file - Return the &socket bounded to @file.
447 * @file: file
8a3c245c 448 *
dba4a925 449 * On failure returns %NULL.
8a3c245c
PT
450 */
451
dba4a925 452struct socket *sock_from_file(struct file *file)
6cb153ca 453{
6cb153ca
BL
454 if (file->f_op == &socket_file_ops)
455 return file->private_data; /* set in sock_map_fd */
456
23bb80d2 457 return NULL;
6cb153ca 458}
406a3c63 459EXPORT_SYMBOL(sock_from_file);
6cb153ca 460
1da177e4 461/**
c6d409cf 462 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
463 * @fd: file handle
464 * @err: pointer to an error code return
465 *
466 * The file handle passed in is locked and the socket it is bound
241c4667 467 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
468 * with a negative errno code and NULL is returned. The function checks
469 * for both invalid handles and passing a handle which is not a socket.
470 *
471 * On a success the socket object pointer is returned.
472 */
473
474struct socket *sockfd_lookup(int fd, int *err)
475{
476 struct file *file;
1da177e4
LT
477 struct socket *sock;
478
89bddce5
SH
479 file = fget(fd);
480 if (!file) {
1da177e4
LT
481 *err = -EBADF;
482 return NULL;
483 }
89bddce5 484
dba4a925
FR
485 sock = sock_from_file(file);
486 if (!sock) {
487 *err = -ENOTSOCK;
1da177e4 488 fput(file);
dba4a925 489 }
6cb153ca
BL
490 return sock;
491}
c6d409cf 492EXPORT_SYMBOL(sockfd_lookup);
1da177e4 493
6cb153ca
BL
494static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
495{
00e188ef 496 struct fd f = fdget(fd);
6cb153ca
BL
497 struct socket *sock;
498
3672558c 499 *err = -EBADF;
00e188ef 500 if (f.file) {
dba4a925 501 sock = sock_from_file(f.file);
00e188ef 502 if (likely(sock)) {
ce787a5a 503 *fput_needed = f.flags & FDPUT_FPUT;
6cb153ca 504 return sock;
00e188ef 505 }
dba4a925 506 *err = -ENOTSOCK;
00e188ef 507 fdput(f);
1da177e4 508 }
6cb153ca 509 return NULL;
1da177e4
LT
510}
511
600e1779
MY
512static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
513 size_t size)
514{
515 ssize_t len;
516 ssize_t used = 0;
517
c5ef6035 518 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
519 if (len < 0)
520 return len;
521 used += len;
522 if (buffer) {
523 if (size < used)
524 return -ERANGE;
525 buffer += len;
526 }
527
528 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
529 used += len;
530 if (buffer) {
531 if (size < used)
532 return -ERANGE;
533 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
534 buffer += len;
535 }
536
537 return used;
538}
539
dc647ec8 540static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
86741ec2
LC
541{
542 int err = simple_setattr(dentry, iattr);
543
e1a3a60a 544 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
545 struct socket *sock = SOCKET_I(d_inode(dentry));
546
6d8c50dc
CW
547 if (sock->sk)
548 sock->sk->sk_uid = iattr->ia_uid;
549 else
550 err = -ENOENT;
86741ec2
LC
551 }
552
553 return err;
554}
555
600e1779 556static const struct inode_operations sockfs_inode_ops = {
600e1779 557 .listxattr = sockfs_listxattr,
86741ec2 558 .setattr = sockfs_setattr,
600e1779
MY
559};
560
1da177e4 561/**
8a3c245c 562 * sock_alloc - allocate a socket
89bddce5 563 *
1da177e4
LT
564 * Allocate a new inode and socket object. The two are bound together
565 * and initialised. The socket is then returned. If we are out of inodes
8a3c245c 566 * NULL is returned. This functions uses GFP_KERNEL internally.
1da177e4
LT
567 */
568
f4a00aac 569struct socket *sock_alloc(void)
1da177e4 570{
89bddce5
SH
571 struct inode *inode;
572 struct socket *sock;
1da177e4 573
a209dfc7 574 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
575 if (!inode)
576 return NULL;
577
578 sock = SOCKET_I(inode);
579
85fe4025 580 inode->i_ino = get_next_ino();
89bddce5 581 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
582 inode->i_uid = current_fsuid();
583 inode->i_gid = current_fsgid();
600e1779 584 inode->i_op = &sockfs_inode_ops;
1da177e4 585
1da177e4
LT
586 return sock;
587}
f4a00aac 588EXPORT_SYMBOL(sock_alloc);
1da177e4 589
6d8c50dc 590static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
591{
592 if (sock->ops) {
593 struct module *owner = sock->ops->owner;
594
6d8c50dc
CW
595 if (inode)
596 inode_lock(inode);
1da177e4 597 sock->ops->release(sock);
ff7b11aa 598 sock->sk = NULL;
6d8c50dc
CW
599 if (inode)
600 inode_unlock(inode);
1da177e4
LT
601 sock->ops = NULL;
602 module_put(owner);
603 }
604
333f7909 605 if (sock->wq.fasync_list)
3410f22e 606 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 607
1da177e4
LT
608 if (!sock->file) {
609 iput(SOCK_INODE(sock));
610 return;
611 }
89bddce5 612 sock->file = NULL;
1da177e4 613}
6d8c50dc 614
9a8ad9ac
AL
615/**
616 * sock_release - close a socket
617 * @sock: socket to close
618 *
619 * The socket is released from the protocol stack if it has a release
620 * callback, and the inode is then released if the socket is bound to
621 * an inode not a file.
622 */
6d8c50dc
CW
623void sock_release(struct socket *sock)
624{
625 __sock_release(sock, NULL);
626}
c6d409cf 627EXPORT_SYMBOL(sock_release);
1da177e4 628
c14ac945 629void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 630{
140c55d4
ED
631 u8 flags = *tx_flags;
632
c14ac945 633 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
634 flags |= SKBTX_HW_TSTAMP;
635
c14ac945 636 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
637 flags |= SKBTX_SW_TSTAMP;
638
c14ac945 639 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
640 flags |= SKBTX_SCHED_TSTAMP;
641
140c55d4 642 *tx_flags = flags;
20d49473 643}
67cc0d40 644EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 645
8c3c447b
PA
646INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
647 size_t));
a648a592
PA
648INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
649 size_t));
d8725c86 650static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 651{
a648a592
PA
652 int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
653 inet_sendmsg, sock, msg,
654 msg_data_left(msg));
d8725c86
AV
655 BUG_ON(ret == -EIOCBQUEUED);
656 return ret;
1da177e4
LT
657}
658
85806af0
RD
659/**
660 * sock_sendmsg - send a message through @sock
661 * @sock: socket
662 * @msg: message to send
663 *
664 * Sends @msg through @sock, passing through LSM.
665 * Returns the number of bytes sent, or an error code.
666 */
d8725c86 667int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 668{
d8725c86 669 int err = security_socket_sendmsg(sock, msg,
01e97e65 670 msg_data_left(msg));
228e548e 671
d8725c86 672 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 673}
c6d409cf 674EXPORT_SYMBOL(sock_sendmsg);
1da177e4 675
8a3c245c
PT
676/**
677 * kernel_sendmsg - send a message through @sock (kernel-space)
678 * @sock: socket
679 * @msg: message header
680 * @vec: kernel vec
681 * @num: vec array length
682 * @size: total message data size
683 *
684 * Builds the message data with @vec and sends it through @sock.
685 * Returns the number of bytes sent, or an error code.
686 */
687
1da177e4
LT
688int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
689 struct kvec *vec, size_t num, size_t size)
690{
aa563d7b 691 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
d8725c86 692 return sock_sendmsg(sock, msg);
1da177e4 693}
c6d409cf 694EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 695
8a3c245c
PT
696/**
697 * kernel_sendmsg_locked - send a message through @sock (kernel-space)
698 * @sk: sock
699 * @msg: message header
700 * @vec: output s/g array
701 * @num: output s/g array length
702 * @size: total message data size
703 *
704 * Builds the message data with @vec and sends it through @sock.
705 * Returns the number of bytes sent, or an error code.
706 * Caller must hold @sk.
707 */
708
306b13eb
TH
709int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
710 struct kvec *vec, size_t num, size_t size)
711{
712 struct socket *sock = sk->sk_socket;
713
714 if (!sock->ops->sendmsg_locked)
db5980d8 715 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 716
aa563d7b 717 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
306b13eb
TH
718
719 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
720}
721EXPORT_SYMBOL(kernel_sendmsg_locked);
722
8605330a
SHY
723static bool skb_is_err_queue(const struct sk_buff *skb)
724{
725 /* pkt_type of skbs enqueued on the error queue are set to
726 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
727 * in recvmsg, since skbs received on a local socket will never
728 * have a pkt_type of PACKET_OUTGOING.
729 */
730 return skb->pkt_type == PACKET_OUTGOING;
731}
732
b50a5c70
ML
733/* On transmit, software and hardware timestamps are returned independently.
734 * As the two skb clones share the hardware timestamp, which may be updated
735 * before the software timestamp is received, a hardware TX timestamp may be
736 * returned only if there is no software TX timestamp. Ignore false software
737 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 738 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
739 * hardware timestamp.
740 */
741static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
742{
743 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
744}
745
aad9c8c4
ML
746static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
747{
748 struct scm_ts_pktinfo ts_pktinfo;
749 struct net_device *orig_dev;
750
751 if (!skb_mac_header_was_set(skb))
752 return;
753
754 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
755
756 rcu_read_lock();
757 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
758 if (orig_dev)
759 ts_pktinfo.if_index = orig_dev->ifindex;
760 rcu_read_unlock();
761
762 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
763 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
764 sizeof(ts_pktinfo), &ts_pktinfo);
765}
766
92f37fd2
ED
767/*
768 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
769 */
770void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
771 struct sk_buff *skb)
772{
20d49473 773 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 774 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e
DD
775 struct scm_timestamping_internal tss;
776
b50a5c70 777 int empty = 1, false_tstamp = 0;
20d49473
PO
778 struct skb_shared_hwtstamps *shhwtstamps =
779 skb_hwtstamps(skb);
780
781 /* Race occurred between timestamp enabling and packet
782 receiving. Fill in the current time for now. */
b50a5c70 783 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 784 __net_timestamp(skb);
b50a5c70
ML
785 false_tstamp = 1;
786 }
20d49473
PO
787
788 if (need_software_tstamp) {
789 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
790 if (new_tstamp) {
791 struct __kernel_sock_timeval tv;
792
793 skb_get_new_timestamp(skb, &tv);
794 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
795 sizeof(tv), &tv);
796 } else {
797 struct __kernel_old_timeval tv;
798
799 skb_get_timestamp(skb, &tv);
800 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
801 sizeof(tv), &tv);
802 }
20d49473 803 } else {
887feae3
DD
804 if (new_tstamp) {
805 struct __kernel_timespec ts;
806
807 skb_get_new_timestampns(skb, &ts);
808 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
809 sizeof(ts), &ts);
810 } else {
df1b4ba9 811 struct __kernel_old_timespec ts;
887feae3
DD
812
813 skb_get_timestampns(skb, &ts);
814 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
815 sizeof(ts), &ts);
816 }
20d49473
PO
817 }
818 }
819
f24b9be5 820 memset(&tss, 0, sizeof(tss));
c199105d 821 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 822 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 823 empty = 0;
4d276eb6 824 if (shhwtstamps &&
b9f40e21 825 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
b50a5c70 826 !skb_is_swtx_tstamp(skb, false_tstamp) &&
9718475e 827 ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
4d276eb6 828 empty = 0;
aad9c8c4
ML
829 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
830 !skb_is_err_queue(skb))
831 put_ts_pktinfo(msg, skb);
832 }
1c885808 833 if (!empty) {
9718475e
DD
834 if (sock_flag(sk, SOCK_TSTAMP_NEW))
835 put_cmsg_scm_timestamping64(msg, &tss);
836 else
837 put_cmsg_scm_timestamping(msg, &tss);
1c885808 838
8605330a 839 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 840 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
841 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
842 skb->len, skb->data);
843 }
92f37fd2 844}
7c81fd8b
ACM
845EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
846
6e3e939f
JB
847void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
848 struct sk_buff *skb)
849{
850 int ack;
851
852 if (!sock_flag(sk, SOCK_WIFI_STATUS))
853 return;
854 if (!skb->wifi_acked_valid)
855 return;
856
857 ack = skb->wifi_acked;
858
859 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
860}
861EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
862
11165f14 863static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
864 struct sk_buff *skb)
3b885787 865{
744d5a3e 866 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 867 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 868 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
869}
870
767dd033 871void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
872 struct sk_buff *skb)
873{
874 sock_recv_timestamp(msg, sk, skb);
875 sock_recv_drops(msg, sk, skb);
876}
767dd033 877EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 878
8c3c447b 879INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
a648a592
PA
880 size_t, int));
881INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
882 size_t, int));
1b784140 883static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 884 int flags)
1da177e4 885{
a648a592
PA
886 return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
887 inet_recvmsg, sock, msg, msg_data_left(msg),
888 flags);
1da177e4
LT
889}
890
85806af0
RD
891/**
892 * sock_recvmsg - receive a message from @sock
893 * @sock: socket
894 * @msg: message to receive
895 * @flags: message flags
896 *
897 * Receives @msg from @sock, passing through LSM. Returns the total number
898 * of bytes received, or an error.
899 */
2da62906 900int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 901{
2da62906 902 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 903
2da62906 904 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 905}
c6d409cf 906EXPORT_SYMBOL(sock_recvmsg);
1da177e4 907
c1249c0a 908/**
8a3c245c
PT
909 * kernel_recvmsg - Receive a message from a socket (kernel space)
910 * @sock: The socket to receive the message from
911 * @msg: Received message
912 * @vec: Input s/g array for message data
913 * @num: Size of input s/g array
914 * @size: Number of bytes to read
915 * @flags: Message flags (MSG_DONTWAIT, etc...)
c1249c0a 916 *
8a3c245c
PT
917 * On return the msg structure contains the scatter/gather array passed in the
918 * vec argument. The array is modified so that it consists of the unfilled
919 * portion of the original array.
c1249c0a 920 *
8a3c245c 921 * The returned value is the total number of bytes received, or an error.
c1249c0a 922 */
8a3c245c 923
89bddce5
SH
924int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
925 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4 926{
1f466e1f 927 msg->msg_control_is_user = false;
aa563d7b 928 iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
1f466e1f 929 return sock_recvmsg(sock, msg, flags);
1da177e4 930}
c6d409cf 931EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 932
ce1d4d3e
CH
933static ssize_t sock_sendpage(struct file *file, struct page *page,
934 int offset, size_t size, loff_t *ppos, int more)
1da177e4 935{
1da177e4
LT
936 struct socket *sock;
937 int flags;
938
ce1d4d3e
CH
939 sock = file->private_data;
940
35f9c09f
ED
941 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
942 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
943 flags |= more;
ce1d4d3e 944
e6949583 945 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 946}
1da177e4 947
9c55e01c 948static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 949 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
950 unsigned int flags)
951{
952 struct socket *sock = file->private_data;
953
997b37da 954 if (unlikely(!sock->ops->splice_read))
95506588 955 return generic_file_splice_read(file, ppos, pipe, len, flags);
997b37da 956
9c55e01c
JA
957 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
958}
959
8ae5e030 960static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 961{
6d652330
AV
962 struct file *file = iocb->ki_filp;
963 struct socket *sock = file->private_data;
0345f931 964 struct msghdr msg = {.msg_iter = *to,
965 .msg_iocb = iocb};
8ae5e030 966 ssize_t res;
ce1d4d3e 967
ebfcd895 968 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
969 msg.msg_flags = MSG_DONTWAIT;
970
971 if (iocb->ki_pos != 0)
1da177e4 972 return -ESPIPE;
027445c3 973
66ee59af 974 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
975 return 0;
976
2da62906 977 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
978 *to = msg.msg_iter;
979 return res;
1da177e4
LT
980}
981
8ae5e030 982static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 983{
6d652330
AV
984 struct file *file = iocb->ki_filp;
985 struct socket *sock = file->private_data;
0345f931 986 struct msghdr msg = {.msg_iter = *from,
987 .msg_iocb = iocb};
8ae5e030 988 ssize_t res;
1da177e4 989
8ae5e030 990 if (iocb->ki_pos != 0)
ce1d4d3e 991 return -ESPIPE;
027445c3 992
ebfcd895 993 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
994 msg.msg_flags = MSG_DONTWAIT;
995
6d652330
AV
996 if (sock->type == SOCK_SEQPACKET)
997 msg.msg_flags |= MSG_EOR;
998
d8725c86 999 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
1000 *from = msg.msg_iter;
1001 return res;
1da177e4
LT
1002}
1003
1da177e4
LT
1004/*
1005 * Atomic setting of ioctl hooks to avoid race
1006 * with module unload.
1007 */
1008
4a3e2f71 1009static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 1010static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 1011
881d966b 1012void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 1013{
4a3e2f71 1014 mutex_lock(&br_ioctl_mutex);
1da177e4 1015 br_ioctl_hook = hook;
4a3e2f71 1016 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1017}
1018EXPORT_SYMBOL(brioctl_set);
1019
4a3e2f71 1020static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1021static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1022
881d966b 1023void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1024{
4a3e2f71 1025 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1026 vlan_ioctl_hook = hook;
4a3e2f71 1027 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1028}
1029EXPORT_SYMBOL(vlan_ioctl_set);
1030
6b96018b 1031static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 1032 unsigned int cmd, unsigned long arg)
6b96018b
AB
1033{
1034 int err;
1035 void __user *argp = (void __user *)arg;
1036
1037 err = sock->ops->ioctl(sock, cmd, arg);
1038
1039 /*
1040 * If this ioctl is unknown try to hand it down
1041 * to the NIC driver.
1042 */
36fd633e
AV
1043 if (err != -ENOIOCTLCMD)
1044 return err;
6b96018b 1045
36fd633e
AV
1046 if (cmd == SIOCGIFCONF) {
1047 struct ifconf ifc;
1048 if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
1049 return -EFAULT;
1050 rtnl_lock();
1051 err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
1052 rtnl_unlock();
1053 if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
1054 err = -EFAULT;
44c02a2c
AV
1055 } else {
1056 struct ifreq ifr;
1057 bool need_copyout;
63ff03ab 1058 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
44c02a2c
AV
1059 return -EFAULT;
1060 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1061 if (!err && need_copyout)
63ff03ab 1062 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
44c02a2c 1063 return -EFAULT;
36fd633e 1064 }
6b96018b
AB
1065 return err;
1066}
1067
1da177e4
LT
1068/*
1069 * With an ioctl, arg may well be a user mode pointer, but we don't know
1070 * what to do with it - that's up to the protocol still.
1071 */
1072
8a3c245c
PT
1073/**
1074 * get_net_ns - increment the refcount of the network namespace
1075 * @ns: common namespace (net)
1076 *
1077 * Returns the net's common namespace.
1078 */
1079
d8d211a2 1080struct ns_common *get_net_ns(struct ns_common *ns)
c62cce2c
AV
1081{
1082 return &get_net(container_of(ns, struct net, ns))->ns;
1083}
d8d211a2 1084EXPORT_SYMBOL_GPL(get_net_ns);
c62cce2c 1085
1da177e4
LT
1086static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1087{
1088 struct socket *sock;
881d966b 1089 struct sock *sk;
1da177e4
LT
1090 void __user *argp = (void __user *)arg;
1091 int pid, err;
881d966b 1092 struct net *net;
1da177e4 1093
b69aee04 1094 sock = file->private_data;
881d966b 1095 sk = sock->sk;
3b1e0a65 1096 net = sock_net(sk);
44c02a2c
AV
1097 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1098 struct ifreq ifr;
1099 bool need_copyout;
1100 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
1101 return -EFAULT;
1102 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1103 if (!err && need_copyout)
1104 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
1105 return -EFAULT;
1da177e4 1106 } else
3d23e349 1107#ifdef CONFIG_WEXT_CORE
1da177e4 1108 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1109 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1110 } else
3d23e349 1111#endif
89bddce5 1112 switch (cmd) {
1da177e4
LT
1113 case FIOSETOWN:
1114 case SIOCSPGRP:
1115 err = -EFAULT;
1116 if (get_user(pid, (int __user *)argp))
1117 break;
393cc3f5 1118 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1119 break;
1120 case FIOGETOWN:
1121 case SIOCGPGRP:
609d7fa9 1122 err = put_user(f_getown(sock->file),
89bddce5 1123 (int __user *)argp);
1da177e4
LT
1124 break;
1125 case SIOCGIFBR:
1126 case SIOCSIFBR:
1127 case SIOCBRADDBR:
1128 case SIOCBRDELBR:
1129 err = -ENOPKG;
1130 if (!br_ioctl_hook)
1131 request_module("bridge");
1132
4a3e2f71 1133 mutex_lock(&br_ioctl_mutex);
89bddce5 1134 if (br_ioctl_hook)
881d966b 1135 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1136 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1137 break;
1138 case SIOCGIFVLAN:
1139 case SIOCSIFVLAN:
1140 err = -ENOPKG;
1141 if (!vlan_ioctl_hook)
1142 request_module("8021q");
1143
4a3e2f71 1144 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1145 if (vlan_ioctl_hook)
881d966b 1146 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1147 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1148 break;
c62cce2c
AV
1149 case SIOCGSKNS:
1150 err = -EPERM;
1151 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1152 break;
1153
1154 err = open_related_ns(&net->ns, get_net_ns);
1155 break;
0768e170
AB
1156 case SIOCGSTAMP_OLD:
1157 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
1158 if (!sock->ops->gettstamp) {
1159 err = -ENOIOCTLCMD;
1160 break;
1161 }
1162 err = sock->ops->gettstamp(sock, argp,
0768e170
AB
1163 cmd == SIOCGSTAMP_OLD,
1164 !IS_ENABLED(CONFIG_64BIT));
60747828 1165 break;
0768e170
AB
1166 case SIOCGSTAMP_NEW:
1167 case SIOCGSTAMPNS_NEW:
1168 if (!sock->ops->gettstamp) {
1169 err = -ENOIOCTLCMD;
1170 break;
1171 }
1172 err = sock->ops->gettstamp(sock, argp,
1173 cmd == SIOCGSTAMP_NEW,
1174 false);
c7cbdbf2 1175 break;
1da177e4 1176 default:
63ff03ab 1177 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1178 break;
89bddce5 1179 }
1da177e4
LT
1180 return err;
1181}
1182
8a3c245c
PT
1183/**
1184 * sock_create_lite - creates a socket
1185 * @family: protocol family (AF_INET, ...)
1186 * @type: communication type (SOCK_STREAM, ...)
1187 * @protocol: protocol (0, ...)
1188 * @res: new socket
1189 *
1190 * Creates a new socket and assigns it to @res, passing through LSM.
1191 * The new socket initialization is not complete, see kernel_accept().
1192 * Returns 0 or an error. On failure @res is set to %NULL.
1193 * This function internally uses GFP_KERNEL.
1194 */
1195
1da177e4
LT
1196int sock_create_lite(int family, int type, int protocol, struct socket **res)
1197{
1198 int err;
1199 struct socket *sock = NULL;
89bddce5 1200
1da177e4
LT
1201 err = security_socket_create(family, type, protocol, 1);
1202 if (err)
1203 goto out;
1204
1205 sock = sock_alloc();
1206 if (!sock) {
1207 err = -ENOMEM;
1208 goto out;
1209 }
1210
1da177e4 1211 sock->type = type;
7420ed23
VY
1212 err = security_socket_post_create(sock, family, type, protocol, 1);
1213 if (err)
1214 goto out_release;
1215
1da177e4
LT
1216out:
1217 *res = sock;
1218 return err;
7420ed23
VY
1219out_release:
1220 sock_release(sock);
1221 sock = NULL;
1222 goto out;
1da177e4 1223}
c6d409cf 1224EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1225
1226/* No kernel lock held - perfect */
ade994f4 1227static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1228{
3cafb376 1229 struct socket *sock = file->private_data;
a331de3b 1230 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1231
e88958e6
CH
1232 if (!sock->ops->poll)
1233 return 0;
f641f13b 1234
a331de3b
CH
1235 if (sk_can_busy_loop(sock->sk)) {
1236 /* poll once if requested by the syscall */
1237 if (events & POLL_BUSY_LOOP)
1238 sk_busy_loop(sock->sk, 1);
1239
1240 /* if this socket can poll_ll, tell the system call */
1241 flag = POLL_BUSY_LOOP;
1242 }
1243
1244 return sock->ops->poll(file, sock, wait) | flag;
1da177e4
LT
1245}
1246
89bddce5 1247static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1248{
b69aee04 1249 struct socket *sock = file->private_data;
1da177e4
LT
1250
1251 return sock->ops->mmap(file, sock, vma);
1252}
1253
20380731 1254static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1255{
6d8c50dc 1256 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1257 return 0;
1258}
1259
1260/*
1261 * Update the socket async list
1262 *
1263 * Fasync_list locking strategy.
1264 *
1265 * 1. fasync_list is modified only under process context socket lock
1266 * i.e. under semaphore.
1267 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1268 * or under socket lock
1da177e4
LT
1269 */
1270
1271static int sock_fasync(int fd, struct file *filp, int on)
1272{
989a2979
ED
1273 struct socket *sock = filp->private_data;
1274 struct sock *sk = sock->sk;
333f7909 1275 struct socket_wq *wq = &sock->wq;
1da177e4 1276
989a2979 1277 if (sk == NULL)
1da177e4 1278 return -EINVAL;
1da177e4
LT
1279
1280 lock_sock(sk);
eaefd110 1281 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1282
eaefd110 1283 if (!wq->fasync_list)
989a2979
ED
1284 sock_reset_flag(sk, SOCK_FASYNC);
1285 else
bcdce719 1286 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1287
989a2979 1288 release_sock(sk);
1da177e4
LT
1289 return 0;
1290}
1291
ceb5d58b 1292/* This function may be called only under rcu_lock */
1da177e4 1293
ceb5d58b 1294int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1295{
ceb5d58b 1296 if (!wq || !wq->fasync_list)
1da177e4 1297 return -1;
ceb5d58b 1298
89bddce5 1299 switch (how) {
8d8ad9d7 1300 case SOCK_WAKE_WAITD:
ceb5d58b 1301 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1302 break;
1303 goto call_kill;
8d8ad9d7 1304 case SOCK_WAKE_SPACE:
ceb5d58b 1305 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4 1306 break;
7c7ab580 1307 fallthrough;
8d8ad9d7 1308 case SOCK_WAKE_IO:
89bddce5 1309call_kill:
43815482 1310 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1311 break;
8d8ad9d7 1312 case SOCK_WAKE_URG:
43815482 1313 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1314 }
ceb5d58b 1315
1da177e4
LT
1316 return 0;
1317}
c6d409cf 1318EXPORT_SYMBOL(sock_wake_async);
1da177e4 1319
8a3c245c
PT
1320/**
1321 * __sock_create - creates a socket
1322 * @net: net namespace
1323 * @family: protocol family (AF_INET, ...)
1324 * @type: communication type (SOCK_STREAM, ...)
1325 * @protocol: protocol (0, ...)
1326 * @res: new socket
1327 * @kern: boolean for kernel space sockets
1328 *
1329 * Creates a new socket and assigns it to @res, passing through LSM.
1330 * Returns 0 or an error. On failure @res is set to %NULL. @kern must
1331 * be set to true if the socket resides in kernel space.
1332 * This function internally uses GFP_KERNEL.
1333 */
1334
721db93a 1335int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1336 struct socket **res, int kern)
1da177e4
LT
1337{
1338 int err;
1339 struct socket *sock;
55737fda 1340 const struct net_proto_family *pf;
1da177e4
LT
1341
1342 /*
89bddce5 1343 * Check protocol is in range
1da177e4
LT
1344 */
1345 if (family < 0 || family >= NPROTO)
1346 return -EAFNOSUPPORT;
1347 if (type < 0 || type >= SOCK_MAX)
1348 return -EINVAL;
1349
1350 /* Compatibility.
1351
1352 This uglymoron is moved from INET layer to here to avoid
1353 deadlock in module load.
1354 */
1355 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1356 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1357 current->comm);
1da177e4
LT
1358 family = PF_PACKET;
1359 }
1360
1361 err = security_socket_create(family, type, protocol, kern);
1362 if (err)
1363 return err;
89bddce5 1364
55737fda
SH
1365 /*
1366 * Allocate the socket and allow the family to set things up. if
1367 * the protocol is 0, the family is instructed to select an appropriate
1368 * default.
1369 */
1370 sock = sock_alloc();
1371 if (!sock) {
e87cc472 1372 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1373 return -ENFILE; /* Not exactly a match, but its the
1374 closest posix thing */
1375 }
1376
1377 sock->type = type;
1378
95a5afca 1379#ifdef CONFIG_MODULES
89bddce5
SH
1380 /* Attempt to load a protocol module if the find failed.
1381 *
1382 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1383 * requested real, full-featured networking support upon configuration.
1384 * Otherwise module support will break!
1385 */
190683a9 1386 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1387 request_module("net-pf-%d", family);
1da177e4
LT
1388#endif
1389
55737fda
SH
1390 rcu_read_lock();
1391 pf = rcu_dereference(net_families[family]);
1392 err = -EAFNOSUPPORT;
1393 if (!pf)
1394 goto out_release;
1da177e4
LT
1395
1396 /*
1397 * We will call the ->create function, that possibly is in a loadable
1398 * module, so we have to bump that loadable module refcnt first.
1399 */
55737fda 1400 if (!try_module_get(pf->owner))
1da177e4
LT
1401 goto out_release;
1402
55737fda
SH
1403 /* Now protected by module ref count */
1404 rcu_read_unlock();
1405
3f378b68 1406 err = pf->create(net, sock, protocol, kern);
55737fda 1407 if (err < 0)
1da177e4 1408 goto out_module_put;
a79af59e 1409
1da177e4
LT
1410 /*
1411 * Now to bump the refcnt of the [loadable] module that owns this
1412 * socket at sock_release time we decrement its refcnt.
1413 */
55737fda
SH
1414 if (!try_module_get(sock->ops->owner))
1415 goto out_module_busy;
1416
1da177e4
LT
1417 /*
1418 * Now that we're done with the ->create function, the [loadable]
1419 * module can have its refcnt decremented
1420 */
55737fda 1421 module_put(pf->owner);
7420ed23
VY
1422 err = security_socket_post_create(sock, family, type, protocol, kern);
1423 if (err)
3b185525 1424 goto out_sock_release;
55737fda 1425 *res = sock;
1da177e4 1426
55737fda
SH
1427 return 0;
1428
1429out_module_busy:
1430 err = -EAFNOSUPPORT;
1da177e4 1431out_module_put:
55737fda
SH
1432 sock->ops = NULL;
1433 module_put(pf->owner);
1434out_sock_release:
1da177e4 1435 sock_release(sock);
55737fda
SH
1436 return err;
1437
1438out_release:
1439 rcu_read_unlock();
1440 goto out_sock_release;
1da177e4 1441}
721db93a 1442EXPORT_SYMBOL(__sock_create);
1da177e4 1443
8a3c245c
PT
1444/**
1445 * sock_create - creates a socket
1446 * @family: protocol family (AF_INET, ...)
1447 * @type: communication type (SOCK_STREAM, ...)
1448 * @protocol: protocol (0, ...)
1449 * @res: new socket
1450 *
1451 * A wrapper around __sock_create().
1452 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1453 */
1454
1da177e4
LT
1455int sock_create(int family, int type, int protocol, struct socket **res)
1456{
1b8d7ae4 1457 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1458}
c6d409cf 1459EXPORT_SYMBOL(sock_create);
1da177e4 1460
8a3c245c
PT
1461/**
1462 * sock_create_kern - creates a socket (kernel space)
1463 * @net: net namespace
1464 * @family: protocol family (AF_INET, ...)
1465 * @type: communication type (SOCK_STREAM, ...)
1466 * @protocol: protocol (0, ...)
1467 * @res: new socket
1468 *
1469 * A wrapper around __sock_create().
1470 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1471 */
1472
eeb1bd5c 1473int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1474{
eeb1bd5c 1475 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1476}
c6d409cf 1477EXPORT_SYMBOL(sock_create_kern);
1da177e4 1478
9d6a15c3 1479int __sys_socket(int family, int type, int protocol)
1da177e4
LT
1480{
1481 int retval;
1482 struct socket *sock;
a677a039
UD
1483 int flags;
1484
e38b36f3
UD
1485 /* Check the SOCK_* constants for consistency. */
1486 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1487 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1488 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1489 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1490
a677a039 1491 flags = type & ~SOCK_TYPE_MASK;
77d27200 1492 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1493 return -EINVAL;
1494 type &= SOCK_TYPE_MASK;
1da177e4 1495
aaca0bdc
UD
1496 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1497 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1498
1da177e4
LT
1499 retval = sock_create(family, type, protocol, &sock);
1500 if (retval < 0)
8e1611e2 1501 return retval;
1da177e4 1502
8e1611e2 1503 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1504}
1505
9d6a15c3
DB
1506SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1507{
1508 return __sys_socket(family, type, protocol);
1509}
1510
1da177e4
LT
1511/*
1512 * Create a pair of connected sockets.
1513 */
1514
6debc8d8 1515int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1516{
1517 struct socket *sock1, *sock2;
1518 int fd1, fd2, err;
db349509 1519 struct file *newfile1, *newfile2;
a677a039
UD
1520 int flags;
1521
1522 flags = type & ~SOCK_TYPE_MASK;
77d27200 1523 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1524 return -EINVAL;
1525 type &= SOCK_TYPE_MASK;
1da177e4 1526
aaca0bdc
UD
1527 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1528 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1529
016a266b
AV
1530 /*
1531 * reserve descriptors and make sure we won't fail
1532 * to return them to userland.
1533 */
1534 fd1 = get_unused_fd_flags(flags);
1535 if (unlikely(fd1 < 0))
1536 return fd1;
1537
1538 fd2 = get_unused_fd_flags(flags);
1539 if (unlikely(fd2 < 0)) {
1540 put_unused_fd(fd1);
1541 return fd2;
1542 }
1543
1544 err = put_user(fd1, &usockvec[0]);
1545 if (err)
1546 goto out;
1547
1548 err = put_user(fd2, &usockvec[1]);
1549 if (err)
1550 goto out;
1551
1da177e4
LT
1552 /*
1553 * Obtain the first socket and check if the underlying protocol
1554 * supports the socketpair call.
1555 */
1556
1557 err = sock_create(family, type, protocol, &sock1);
016a266b 1558 if (unlikely(err < 0))
1da177e4
LT
1559 goto out;
1560
1561 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1562 if (unlikely(err < 0)) {
1563 sock_release(sock1);
1564 goto out;
bf3c23d1 1565 }
d73aa286 1566
d47cd945
DH
1567 err = security_socket_socketpair(sock1, sock2);
1568 if (unlikely(err)) {
1569 sock_release(sock2);
1570 sock_release(sock1);
1571 goto out;
1572 }
1573
016a266b
AV
1574 err = sock1->ops->socketpair(sock1, sock2);
1575 if (unlikely(err < 0)) {
1576 sock_release(sock2);
1577 sock_release(sock1);
1578 goto out;
28407630
AV
1579 }
1580
aab174f0 1581 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1582 if (IS_ERR(newfile1)) {
28407630 1583 err = PTR_ERR(newfile1);
016a266b
AV
1584 sock_release(sock2);
1585 goto out;
28407630
AV
1586 }
1587
aab174f0 1588 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1589 if (IS_ERR(newfile2)) {
1590 err = PTR_ERR(newfile2);
016a266b
AV
1591 fput(newfile1);
1592 goto out;
db349509
AV
1593 }
1594
157cf649 1595 audit_fd_pair(fd1, fd2);
d73aa286 1596
db349509
AV
1597 fd_install(fd1, newfile1);
1598 fd_install(fd2, newfile2);
d73aa286 1599 return 0;
1da177e4 1600
016a266b 1601out:
d73aa286 1602 put_unused_fd(fd2);
d73aa286 1603 put_unused_fd(fd1);
1da177e4
LT
1604 return err;
1605}
1606
6debc8d8
DB
1607SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1608 int __user *, usockvec)
1609{
1610 return __sys_socketpair(family, type, protocol, usockvec);
1611}
1612
1da177e4
LT
1613/*
1614 * Bind a name to a socket. Nothing much to do here since it's
1615 * the protocol's responsibility to handle the local address.
1616 *
1617 * We move the socket address to kernel space before we call
1618 * the protocol layer (having also checked the address is ok).
1619 */
1620
a87d35d8 1621int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1622{
1623 struct socket *sock;
230b1839 1624 struct sockaddr_storage address;
6cb153ca 1625 int err, fput_needed;
1da177e4 1626
89bddce5 1627 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1628 if (sock) {
43db362d 1629 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1630 if (!err) {
89bddce5 1631 err = security_socket_bind(sock,
230b1839 1632 (struct sockaddr *)&address,
89bddce5 1633 addrlen);
6cb153ca
BL
1634 if (!err)
1635 err = sock->ops->bind(sock,
89bddce5 1636 (struct sockaddr *)
230b1839 1637 &address, addrlen);
1da177e4 1638 }
6cb153ca 1639 fput_light(sock->file, fput_needed);
89bddce5 1640 }
1da177e4
LT
1641 return err;
1642}
1643
a87d35d8
DB
1644SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1645{
1646 return __sys_bind(fd, umyaddr, addrlen);
1647}
1648
1da177e4
LT
1649/*
1650 * Perform a listen. Basically, we allow the protocol to do anything
1651 * necessary for a listen, and if that works, we mark the socket as
1652 * ready for listening.
1653 */
1654
25e290ee 1655int __sys_listen(int fd, int backlog)
1da177e4
LT
1656{
1657 struct socket *sock;
6cb153ca 1658 int err, fput_needed;
b8e1f9b5 1659 int somaxconn;
89bddce5
SH
1660
1661 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1662 if (sock) {
8efa6e93 1663 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1664 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1665 backlog = somaxconn;
1da177e4
LT
1666
1667 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1668 if (!err)
1669 err = sock->ops->listen(sock, backlog);
1da177e4 1670
6cb153ca 1671 fput_light(sock->file, fput_needed);
1da177e4
LT
1672 }
1673 return err;
1674}
1675
25e290ee
DB
1676SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1677{
1678 return __sys_listen(fd, backlog);
1679}
1680
de2ea4b6
JA
1681int __sys_accept4_file(struct file *file, unsigned file_flags,
1682 struct sockaddr __user *upeer_sockaddr,
09952e3e
JA
1683 int __user *upeer_addrlen, int flags,
1684 unsigned long nofile)
1da177e4
LT
1685{
1686 struct socket *sock, *newsock;
39d8c1b6 1687 struct file *newfile;
de2ea4b6 1688 int err, len, newfd;
230b1839 1689 struct sockaddr_storage address;
1da177e4 1690
77d27200 1691 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1692 return -EINVAL;
1693
1694 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1695 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1696
dba4a925
FR
1697 sock = sock_from_file(file);
1698 if (!sock) {
1699 err = -ENOTSOCK;
1da177e4 1700 goto out;
dba4a925 1701 }
1da177e4
LT
1702
1703 err = -ENFILE;
c6d409cf
ED
1704 newsock = sock_alloc();
1705 if (!newsock)
de2ea4b6 1706 goto out;
1da177e4
LT
1707
1708 newsock->type = sock->type;
1709 newsock->ops = sock->ops;
1710
1da177e4
LT
1711 /*
1712 * We don't need try_module_get here, as the listening socket (sock)
1713 * has the protocol module (sock->ops->owner) held.
1714 */
1715 __module_get(newsock->ops->owner);
1716
09952e3e 1717 newfd = __get_unused_fd_flags(flags, nofile);
39d8c1b6
DM
1718 if (unlikely(newfd < 0)) {
1719 err = newfd;
9a1875e6 1720 sock_release(newsock);
de2ea4b6 1721 goto out;
39d8c1b6 1722 }
aab174f0 1723 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1724 if (IS_ERR(newfile)) {
28407630
AV
1725 err = PTR_ERR(newfile);
1726 put_unused_fd(newfd);
de2ea4b6 1727 goto out;
28407630 1728 }
39d8c1b6 1729
a79af59e
FF
1730 err = security_socket_accept(sock, newsock);
1731 if (err)
39d8c1b6 1732 goto out_fd;
a79af59e 1733
de2ea4b6
JA
1734 err = sock->ops->accept(sock, newsock, sock->file->f_flags | file_flags,
1735 false);
1da177e4 1736 if (err < 0)
39d8c1b6 1737 goto out_fd;
1da177e4
LT
1738
1739 if (upeer_sockaddr) {
9b2c45d4
DV
1740 len = newsock->ops->getname(newsock,
1741 (struct sockaddr *)&address, 2);
1742 if (len < 0) {
1da177e4 1743 err = -ECONNABORTED;
39d8c1b6 1744 goto out_fd;
1da177e4 1745 }
43db362d 1746 err = move_addr_to_user(&address,
230b1839 1747 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1748 if (err < 0)
39d8c1b6 1749 goto out_fd;
1da177e4
LT
1750 }
1751
1752 /* File flags are not inherited via accept() unlike another OSes. */
1753
39d8c1b6
DM
1754 fd_install(newfd, newfile);
1755 err = newfd;
1da177e4
LT
1756out:
1757 return err;
39d8c1b6 1758out_fd:
9606a216 1759 fput(newfile);
39d8c1b6 1760 put_unused_fd(newfd);
de2ea4b6
JA
1761 goto out;
1762
1763}
1764
1765/*
1766 * For accept, we attempt to create a new socket, set up the link
1767 * with the client, wake up the client, then return the new
1768 * connected fd. We collect the address of the connector in kernel
1769 * space and move it to user at the very end. This is unclean because
1770 * we open the socket then return an error.
1771 *
1772 * 1003.1g adds the ability to recvmsg() to query connection pending
1773 * status to recvmsg. We need to add that support in a way thats
1774 * clean when we restructure accept also.
1775 */
1776
1777int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1778 int __user *upeer_addrlen, int flags)
1779{
1780 int ret = -EBADF;
1781 struct fd f;
1782
1783 f = fdget(fd);
1784 if (f.file) {
1785 ret = __sys_accept4_file(f.file, 0, upeer_sockaddr,
09952e3e
JA
1786 upeer_addrlen, flags,
1787 rlimit(RLIMIT_NOFILE));
6b07edeb 1788 fdput(f);
de2ea4b6
JA
1789 }
1790
1791 return ret;
1da177e4
LT
1792}
1793
4541e805
DB
1794SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1795 int __user *, upeer_addrlen, int, flags)
1796{
1797 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1798}
1799
20f37034
HC
1800SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1801 int __user *, upeer_addrlen)
aaca0bdc 1802{
4541e805 1803 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1804}
1805
1da177e4
LT
1806/*
1807 * Attempt to connect to a socket with the server address. The address
1808 * is in user space so we verify it is OK and move it to kernel space.
1809 *
1810 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1811 * break bindings
1812 *
1813 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1814 * other SEQPACKET protocols that take time to connect() as it doesn't
1815 * include the -EINPROGRESS status for such sockets.
1816 */
1817
f499a021 1818int __sys_connect_file(struct file *file, struct sockaddr_storage *address,
bd3ded31 1819 int addrlen, int file_flags)
1da177e4
LT
1820{
1821 struct socket *sock;
bd3ded31 1822 int err;
1da177e4 1823
dba4a925
FR
1824 sock = sock_from_file(file);
1825 if (!sock) {
1826 err = -ENOTSOCK;
1da177e4 1827 goto out;
dba4a925 1828 }
1da177e4 1829
89bddce5 1830 err =
f499a021 1831 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4 1832 if (err)
bd3ded31 1833 goto out;
1da177e4 1834
f499a021 1835 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
bd3ded31 1836 sock->file->f_flags | file_flags);
1da177e4
LT
1837out:
1838 return err;
1839}
1840
bd3ded31
JA
1841int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1842{
1843 int ret = -EBADF;
1844 struct fd f;
1845
1846 f = fdget(fd);
1847 if (f.file) {
f499a021
JA
1848 struct sockaddr_storage address;
1849
1850 ret = move_addr_to_kernel(uservaddr, addrlen, &address);
1851 if (!ret)
1852 ret = __sys_connect_file(f.file, &address, addrlen, 0);
6b07edeb 1853 fdput(f);
bd3ded31
JA
1854 }
1855
1856 return ret;
1857}
1858
1387c2c2
DB
1859SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1860 int, addrlen)
1861{
1862 return __sys_connect(fd, uservaddr, addrlen);
1863}
1864
1da177e4
LT
1865/*
1866 * Get the local address ('name') of a socket object. Move the obtained
1867 * name to user space.
1868 */
1869
8882a107
DB
1870int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1871 int __user *usockaddr_len)
1da177e4
LT
1872{
1873 struct socket *sock;
230b1839 1874 struct sockaddr_storage address;
9b2c45d4 1875 int err, fput_needed;
89bddce5 1876
6cb153ca 1877 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1878 if (!sock)
1879 goto out;
1880
1881 err = security_socket_getsockname(sock);
1882 if (err)
1883 goto out_put;
1884
9b2c45d4
DV
1885 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
1886 if (err < 0)
1da177e4 1887 goto out_put;
9b2c45d4
DV
1888 /* "err" is actually length in this case */
1889 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
1890
1891out_put:
6cb153ca 1892 fput_light(sock->file, fput_needed);
1da177e4
LT
1893out:
1894 return err;
1895}
1896
8882a107
DB
1897SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1898 int __user *, usockaddr_len)
1899{
1900 return __sys_getsockname(fd, usockaddr, usockaddr_len);
1901}
1902
1da177e4
LT
1903/*
1904 * Get the remote address ('name') of a socket object. Move the obtained
1905 * name to user space.
1906 */
1907
b21c8f83
DB
1908int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1909 int __user *usockaddr_len)
1da177e4
LT
1910{
1911 struct socket *sock;
230b1839 1912 struct sockaddr_storage address;
9b2c45d4 1913 int err, fput_needed;
1da177e4 1914
89bddce5
SH
1915 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1916 if (sock != NULL) {
1da177e4
LT
1917 err = security_socket_getpeername(sock);
1918 if (err) {
6cb153ca 1919 fput_light(sock->file, fput_needed);
1da177e4
LT
1920 return err;
1921 }
1922
9b2c45d4
DV
1923 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
1924 if (err >= 0)
1925 /* "err" is actually length in this case */
1926 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 1927 usockaddr_len);
6cb153ca 1928 fput_light(sock->file, fput_needed);
1da177e4
LT
1929 }
1930 return err;
1931}
1932
b21c8f83
DB
1933SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1934 int __user *, usockaddr_len)
1935{
1936 return __sys_getpeername(fd, usockaddr, usockaddr_len);
1937}
1938
1da177e4
LT
1939/*
1940 * Send a datagram to a given address. We move the address into kernel
1941 * space and check the user space data area is readable before invoking
1942 * the protocol.
1943 */
211b634b
DB
1944int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
1945 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
1946{
1947 struct socket *sock;
230b1839 1948 struct sockaddr_storage address;
1da177e4
LT
1949 int err;
1950 struct msghdr msg;
1951 struct iovec iov;
6cb153ca 1952 int fput_needed;
6cb153ca 1953
602bd0e9
AV
1954 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1955 if (unlikely(err))
1956 return err;
de0fa95c
PE
1957 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1958 if (!sock)
4387ff75 1959 goto out;
6cb153ca 1960
89bddce5 1961 msg.msg_name = NULL;
89bddce5
SH
1962 msg.msg_control = NULL;
1963 msg.msg_controllen = 0;
1964 msg.msg_namelen = 0;
6cb153ca 1965 if (addr) {
43db362d 1966 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1967 if (err < 0)
1968 goto out_put;
230b1839 1969 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1970 msg.msg_namelen = addr_len;
1da177e4
LT
1971 }
1972 if (sock->file->f_flags & O_NONBLOCK)
1973 flags |= MSG_DONTWAIT;
1974 msg.msg_flags = flags;
d8725c86 1975 err = sock_sendmsg(sock, &msg);
1da177e4 1976
89bddce5 1977out_put:
de0fa95c 1978 fput_light(sock->file, fput_needed);
4387ff75 1979out:
1da177e4
LT
1980 return err;
1981}
1982
211b634b
DB
1983SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1984 unsigned int, flags, struct sockaddr __user *, addr,
1985 int, addr_len)
1986{
1987 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
1988}
1989
1da177e4 1990/*
89bddce5 1991 * Send a datagram down a socket.
1da177e4
LT
1992 */
1993
3e0fa65f 1994SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1995 unsigned int, flags)
1da177e4 1996{
211b634b 1997 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
1998}
1999
2000/*
89bddce5 2001 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
2002 * sender. We verify the buffers are writable and if needed move the
2003 * sender address from kernel to user space.
2004 */
7a09e1eb
DB
2005int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
2006 struct sockaddr __user *addr, int __user *addr_len)
1da177e4
LT
2007{
2008 struct socket *sock;
2009 struct iovec iov;
2010 struct msghdr msg;
230b1839 2011 struct sockaddr_storage address;
89bddce5 2012 int err, err2;
6cb153ca
BL
2013 int fput_needed;
2014
602bd0e9
AV
2015 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
2016 if (unlikely(err))
2017 return err;
de0fa95c 2018 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 2019 if (!sock)
de0fa95c 2020 goto out;
1da177e4 2021
89bddce5
SH
2022 msg.msg_control = NULL;
2023 msg.msg_controllen = 0;
f3d33426
HFS
2024 /* Save some cycles and don't copy the address if not needed */
2025 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
2026 /* We assume all kernel code knows the size of sockaddr_storage */
2027 msg.msg_namelen = 0;
130ed5d1 2028 msg.msg_iocb = NULL;
9f138fa6 2029 msg.msg_flags = 0;
1da177e4
LT
2030 if (sock->file->f_flags & O_NONBLOCK)
2031 flags |= MSG_DONTWAIT;
2da62906 2032 err = sock_recvmsg(sock, &msg, flags);
1da177e4 2033
89bddce5 2034 if (err >= 0 && addr != NULL) {
43db362d 2035 err2 = move_addr_to_user(&address,
230b1839 2036 msg.msg_namelen, addr, addr_len);
89bddce5
SH
2037 if (err2 < 0)
2038 err = err2;
1da177e4 2039 }
de0fa95c
PE
2040
2041 fput_light(sock->file, fput_needed);
4387ff75 2042out:
1da177e4
LT
2043 return err;
2044}
2045
7a09e1eb
DB
2046SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2047 unsigned int, flags, struct sockaddr __user *, addr,
2048 int __user *, addr_len)
2049{
2050 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2051}
2052
1da177e4 2053/*
89bddce5 2054 * Receive a datagram from a socket.
1da177e4
LT
2055 */
2056
b7c0ddf5
JG
2057SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2058 unsigned int, flags)
1da177e4 2059{
7a09e1eb 2060 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
2061}
2062
83f0c10b
FW
2063static bool sock_use_custom_sol_socket(const struct socket *sock)
2064{
2065 const struct sock *sk = sock->sk;
2066
2067 /* Use sock->ops->setsockopt() for MPTCP */
2068 return IS_ENABLED(CONFIG_MPTCP) &&
2069 sk->sk_protocol == IPPROTO_MPTCP &&
2070 sk->sk_type == SOCK_STREAM &&
2071 (sk->sk_family == AF_INET || sk->sk_family == AF_INET6);
2072}
2073
1da177e4
LT
2074/*
2075 * Set a socket option. Because we don't know the option lengths we have
2076 * to pass the user mode parameter for the protocols to sort out.
2077 */
a7b75c5a 2078int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
55db9c0e 2079 int optlen)
1da177e4 2080{
519a8a6c 2081 sockptr_t optval = USER_SOCKPTR(user_optval);
0d01da6a 2082 char *kernel_optval = NULL;
6cb153ca 2083 int err, fput_needed;
1da177e4
LT
2084 struct socket *sock;
2085
2086 if (optlen < 0)
2087 return -EINVAL;
89bddce5
SH
2088
2089 sock = sockfd_lookup_light(fd, &err, &fput_needed);
4a367299
CH
2090 if (!sock)
2091 return err;
1da177e4 2092
4a367299
CH
2093 err = security_socket_setsockopt(sock, level, optname);
2094 if (err)
2095 goto out_put;
0d01da6a 2096
55db9c0e
CH
2097 if (!in_compat_syscall())
2098 err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname,
a7b75c5a 2099 user_optval, &optlen,
55db9c0e 2100 &kernel_optval);
4a367299
CH
2101 if (err < 0)
2102 goto out_put;
2103 if (err > 0) {
2104 err = 0;
2105 goto out_put;
2106 }
0d01da6a 2107
a7b75c5a
CH
2108 if (kernel_optval)
2109 optval = KERNEL_SOCKPTR(kernel_optval);
4a367299 2110 if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock))
a7b75c5a 2111 err = sock_setsockopt(sock, level, optname, optval, optlen);
a44d9e72
CH
2112 else if (unlikely(!sock->ops->setsockopt))
2113 err = -EOPNOTSUPP;
4a367299
CH
2114 else
2115 err = sock->ops->setsockopt(sock, level, optname, optval,
89bddce5 2116 optlen);
a7b75c5a 2117 kfree(kernel_optval);
4a367299
CH
2118out_put:
2119 fput_light(sock->file, fput_needed);
1da177e4
LT
2120 return err;
2121}
2122
cc36dca0
DB
2123SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2124 char __user *, optval, int, optlen)
2125{
2126 return __sys_setsockopt(fd, level, optname, optval, optlen);
2127}
2128
1da177e4
LT
2129/*
2130 * Get a socket option. Because we don't know the option lengths we have
2131 * to pass a user mode parameter for the protocols to sort out.
2132 */
55db9c0e
CH
2133int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
2134 int __user *optlen)
1da177e4 2135{
6cb153ca 2136 int err, fput_needed;
1da177e4 2137 struct socket *sock;
0d01da6a 2138 int max_optlen;
1da177e4 2139
89bddce5 2140 sock = sockfd_lookup_light(fd, &err, &fput_needed);
d8a9b38f
CH
2141 if (!sock)
2142 return err;
2143
2144 err = security_socket_getsockopt(sock, level, optname);
2145 if (err)
2146 goto out_put;
1da177e4 2147
55db9c0e
CH
2148 if (!in_compat_syscall())
2149 max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
0d01da6a 2150
d8a9b38f
CH
2151 if (level == SOL_SOCKET)
2152 err = sock_getsockopt(sock, level, optname, optval, optlen);
a44d9e72
CH
2153 else if (unlikely(!sock->ops->getsockopt))
2154 err = -EOPNOTSUPP;
d8a9b38f
CH
2155 else
2156 err = sock->ops->getsockopt(sock, level, optname, optval,
89bddce5 2157 optlen);
0d01da6a 2158
55db9c0e
CH
2159 if (!in_compat_syscall())
2160 err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
2161 optval, optlen, max_optlen,
2162 err);
6cb153ca 2163out_put:
d8a9b38f 2164 fput_light(sock->file, fput_needed);
1da177e4
LT
2165 return err;
2166}
2167
13a2d70e
DB
2168SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2169 char __user *, optval, int __user *, optlen)
2170{
2171 return __sys_getsockopt(fd, level, optname, optval, optlen);
2172}
2173
1da177e4
LT
2174/*
2175 * Shutdown a socket.
2176 */
2177
005a1aea 2178int __sys_shutdown(int fd, int how)
1da177e4 2179{
6cb153ca 2180 int err, fput_needed;
1da177e4
LT
2181 struct socket *sock;
2182
89bddce5
SH
2183 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2184 if (sock != NULL) {
1da177e4 2185 err = security_socket_shutdown(sock, how);
6cb153ca
BL
2186 if (!err)
2187 err = sock->ops->shutdown(sock, how);
2188 fput_light(sock->file, fput_needed);
1da177e4
LT
2189 }
2190 return err;
2191}
2192
005a1aea
DB
2193SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2194{
2195 return __sys_shutdown(fd, how);
2196}
2197
89bddce5 2198/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2199 * fields which are the same type (int / unsigned) on our platforms.
2200 */
2201#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2202#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2203#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2204
c71d8ebe
TH
2205struct used_address {
2206 struct sockaddr_storage name;
2207 unsigned int name_len;
2208};
2209
0a384abf
JA
2210int __copy_msghdr_from_user(struct msghdr *kmsg,
2211 struct user_msghdr __user *umsg,
2212 struct sockaddr __user **save_addr,
2213 struct iovec __user **uiov, size_t *nsegs)
1661bf36 2214{
ffb07550 2215 struct user_msghdr msg;
08adb7da
AV
2216 ssize_t err;
2217
ffb07550 2218 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 2219 return -EFAULT;
dbb490b9 2220
1f466e1f
CH
2221 kmsg->msg_control_is_user = true;
2222 kmsg->msg_control_user = msg.msg_control;
ffb07550
AV
2223 kmsg->msg_controllen = msg.msg_controllen;
2224 kmsg->msg_flags = msg.msg_flags;
2225
2226 kmsg->msg_namelen = msg.msg_namelen;
2227 if (!msg.msg_name)
6a2a2b3a
AS
2228 kmsg->msg_namelen = 0;
2229
dbb490b9
ML
2230 if (kmsg->msg_namelen < 0)
2231 return -EINVAL;
2232
1661bf36 2233 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2234 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2235
2236 if (save_addr)
ffb07550 2237 *save_addr = msg.msg_name;
08adb7da 2238
ffb07550 2239 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 2240 if (!save_addr) {
864d9664
PA
2241 err = move_addr_to_kernel(msg.msg_name,
2242 kmsg->msg_namelen,
08adb7da
AV
2243 kmsg->msg_name);
2244 if (err < 0)
2245 return err;
2246 }
2247 } else {
2248 kmsg->msg_name = NULL;
2249 kmsg->msg_namelen = 0;
2250 }
2251
ffb07550 2252 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2253 return -EMSGSIZE;
2254
0345f931 2255 kmsg->msg_iocb = NULL;
0a384abf
JA
2256 *uiov = msg.msg_iov;
2257 *nsegs = msg.msg_iovlen;
2258 return 0;
2259}
2260
2261static int copy_msghdr_from_user(struct msghdr *kmsg,
2262 struct user_msghdr __user *umsg,
2263 struct sockaddr __user **save_addr,
2264 struct iovec **iov)
2265{
2266 struct user_msghdr msg;
2267 ssize_t err;
2268
2269 err = __copy_msghdr_from_user(kmsg, umsg, save_addr, &msg.msg_iov,
2270 &msg.msg_iovlen);
2271 if (err)
2272 return err;
0345f931 2273
87e5e6da 2274 err = import_iovec(save_addr ? READ : WRITE,
ffb07550 2275 msg.msg_iov, msg.msg_iovlen,
da184284 2276 UIO_FASTIOV, iov, &kmsg->msg_iter);
87e5e6da 2277 return err < 0 ? err : 0;
1661bf36
DC
2278}
2279
4257c8ca
JA
2280static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
2281 unsigned int flags, struct used_address *used_address,
2282 unsigned int allowed_msghdr_flags)
1da177e4 2283{
b9d717a7 2284 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2285 __aligned(sizeof(__kernel_size_t));
89bddce5 2286 /* 20 is size of ipv6_pktinfo */
1da177e4 2287 unsigned char *ctl_buf = ctl;
d8725c86 2288 int ctl_len;
08adb7da 2289 ssize_t err;
89bddce5 2290
1da177e4
LT
2291 err = -ENOBUFS;
2292
228e548e 2293 if (msg_sys->msg_controllen > INT_MAX)
4257c8ca 2294 goto out;
28a94d8f 2295 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2296 ctl_len = msg_sys->msg_controllen;
1da177e4 2297 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2298 err =
228e548e 2299 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2300 sizeof(ctl));
1da177e4 2301 if (err)
4257c8ca 2302 goto out;
228e548e
AB
2303 ctl_buf = msg_sys->msg_control;
2304 ctl_len = msg_sys->msg_controllen;
1da177e4 2305 } else if (ctl_len) {
ac4340fc
DM
2306 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2307 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2308 if (ctl_len > sizeof(ctl)) {
1da177e4 2309 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2310 if (ctl_buf == NULL)
4257c8ca 2311 goto out;
1da177e4
LT
2312 }
2313 err = -EFAULT;
1f466e1f 2314 if (copy_from_user(ctl_buf, msg_sys->msg_control_user, ctl_len))
1da177e4 2315 goto out_freectl;
228e548e 2316 msg_sys->msg_control = ctl_buf;
1f466e1f 2317 msg_sys->msg_control_is_user = false;
1da177e4 2318 }
228e548e 2319 msg_sys->msg_flags = flags;
1da177e4
LT
2320
2321 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2322 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2323 /*
2324 * If this is sendmmsg() and current destination address is same as
2325 * previously succeeded address, omit asking LSM's decision.
2326 * used_address->name_len is initialized to UINT_MAX so that the first
2327 * destination address never matches.
2328 */
bc909d9d
MD
2329 if (used_address && msg_sys->msg_name &&
2330 used_address->name_len == msg_sys->msg_namelen &&
2331 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2332 used_address->name_len)) {
d8725c86 2333 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2334 goto out_freectl;
2335 }
d8725c86 2336 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2337 /*
2338 * If this is sendmmsg() and sending to current destination address was
2339 * successful, remember it.
2340 */
2341 if (used_address && err >= 0) {
2342 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2343 if (msg_sys->msg_name)
2344 memcpy(&used_address->name, msg_sys->msg_name,
2345 used_address->name_len);
c71d8ebe 2346 }
1da177e4
LT
2347
2348out_freectl:
89bddce5 2349 if (ctl_buf != ctl)
1da177e4 2350 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
4257c8ca
JA
2351out:
2352 return err;
2353}
2354
03b1230c
JA
2355int sendmsg_copy_msghdr(struct msghdr *msg,
2356 struct user_msghdr __user *umsg, unsigned flags,
2357 struct iovec **iov)
4257c8ca
JA
2358{
2359 int err;
2360
2361 if (flags & MSG_CMSG_COMPAT) {
2362 struct compat_msghdr __user *msg_compat;
2363
2364 msg_compat = (struct compat_msghdr __user *) umsg;
2365 err = get_compat_msghdr(msg, msg_compat, NULL, iov);
2366 } else {
2367 err = copy_msghdr_from_user(msg, umsg, NULL, iov);
2368 }
2369 if (err < 0)
2370 return err;
2371
2372 return 0;
2373}
2374
2375static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
2376 struct msghdr *msg_sys, unsigned int flags,
2377 struct used_address *used_address,
2378 unsigned int allowed_msghdr_flags)
2379{
2380 struct sockaddr_storage address;
2381 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2382 ssize_t err;
2383
2384 msg_sys->msg_name = &address;
2385
2386 err = sendmsg_copy_msghdr(msg_sys, msg, flags, &iov);
2387 if (err < 0)
2388 return err;
2389
2390 err = ____sys_sendmsg(sock, msg_sys, flags, used_address,
2391 allowed_msghdr_flags);
da184284 2392 kfree(iov);
228e548e
AB
2393 return err;
2394}
2395
2396/*
2397 * BSD sendmsg interface
2398 */
03b1230c 2399long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
0fa03c62
JA
2400 unsigned int flags)
2401{
d69e0779 2402 /* disallow ancillary data requests from this path */
03b1230c
JA
2403 if (msg->msg_control || msg->msg_controllen)
2404 return -EINVAL;
d69e0779 2405
03b1230c 2406 return ____sys_sendmsg(sock, msg, flags, NULL, 0);
0fa03c62 2407}
228e548e 2408
e1834a32
DB
2409long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2410 bool forbid_cmsg_compat)
228e548e
AB
2411{
2412 int fput_needed, err;
2413 struct msghdr msg_sys;
1be374a0
AL
2414 struct socket *sock;
2415
e1834a32
DB
2416 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2417 return -EINVAL;
2418
1be374a0 2419 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2420 if (!sock)
2421 goto out;
2422
28a94d8f 2423 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2424
6cb153ca 2425 fput_light(sock->file, fput_needed);
89bddce5 2426out:
1da177e4
LT
2427 return err;
2428}
2429
666547ff 2430SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2431{
e1834a32 2432 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2433}
2434
228e548e
AB
2435/*
2436 * Linux sendmmsg interface
2437 */
2438
2439int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2440 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2441{
2442 int fput_needed, err, datagrams;
2443 struct socket *sock;
2444 struct mmsghdr __user *entry;
2445 struct compat_mmsghdr __user *compat_entry;
2446 struct msghdr msg_sys;
c71d8ebe 2447 struct used_address used_address;
f092276d 2448 unsigned int oflags = flags;
228e548e 2449
e1834a32
DB
2450 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2451 return -EINVAL;
2452
98382f41
AB
2453 if (vlen > UIO_MAXIOV)
2454 vlen = UIO_MAXIOV;
228e548e
AB
2455
2456 datagrams = 0;
2457
2458 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2459 if (!sock)
2460 return err;
2461
c71d8ebe 2462 used_address.name_len = UINT_MAX;
228e548e
AB
2463 entry = mmsg;
2464 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2465 err = 0;
f092276d 2466 flags |= MSG_BATCH;
228e548e
AB
2467
2468 while (datagrams < vlen) {
f092276d
TH
2469 if (datagrams == vlen - 1)
2470 flags = oflags;
2471
228e548e 2472 if (MSG_CMSG_COMPAT & flags) {
666547ff 2473 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2474 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2475 if (err < 0)
2476 break;
2477 err = __put_user(err, &compat_entry->msg_len);
2478 ++compat_entry;
2479 } else {
a7526eb5 2480 err = ___sys_sendmsg(sock,
666547ff 2481 (struct user_msghdr __user *)entry,
28a94d8f 2482 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2483 if (err < 0)
2484 break;
2485 err = put_user(err, &entry->msg_len);
2486 ++entry;
2487 }
2488
2489 if (err)
2490 break;
2491 ++datagrams;
3023898b
SHY
2492 if (msg_data_left(&msg_sys))
2493 break;
a78cb84c 2494 cond_resched();
228e548e
AB
2495 }
2496
228e548e
AB
2497 fput_light(sock->file, fput_needed);
2498
728ffb86
AB
2499 /* We only return an error if no datagrams were able to be sent */
2500 if (datagrams != 0)
228e548e
AB
2501 return datagrams;
2502
228e548e
AB
2503 return err;
2504}
2505
2506SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2507 unsigned int, vlen, unsigned int, flags)
2508{
e1834a32 2509 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2510}
2511
03b1230c
JA
2512int recvmsg_copy_msghdr(struct msghdr *msg,
2513 struct user_msghdr __user *umsg, unsigned flags,
2514 struct sockaddr __user **uaddr,
2515 struct iovec **iov)
1da177e4 2516{
08adb7da 2517 ssize_t err;
1da177e4 2518
4257c8ca
JA
2519 if (MSG_CMSG_COMPAT & flags) {
2520 struct compat_msghdr __user *msg_compat;
1da177e4 2521
4257c8ca
JA
2522 msg_compat = (struct compat_msghdr __user *) umsg;
2523 err = get_compat_msghdr(msg, msg_compat, uaddr, iov);
2524 } else {
2525 err = copy_msghdr_from_user(msg, umsg, uaddr, iov);
2526 }
1da177e4 2527 if (err < 0)
da184284 2528 return err;
1da177e4 2529
4257c8ca
JA
2530 return 0;
2531}
2532
2533static int ____sys_recvmsg(struct socket *sock, struct msghdr *msg_sys,
2534 struct user_msghdr __user *msg,
2535 struct sockaddr __user *uaddr,
2536 unsigned int flags, int nosec)
2537{
2538 struct compat_msghdr __user *msg_compat =
2539 (struct compat_msghdr __user *) msg;
2540 int __user *uaddr_len = COMPAT_NAMELEN(msg);
2541 struct sockaddr_storage addr;
2542 unsigned long cmsg_ptr;
2543 int len;
2544 ssize_t err;
2545
2546 msg_sys->msg_name = &addr;
a2e27255
ACM
2547 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2548 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2549
f3d33426
HFS
2550 /* We assume all kernel code knows the size of sockaddr_storage */
2551 msg_sys->msg_namelen = 0;
2552
1da177e4
LT
2553 if (sock->file->f_flags & O_NONBLOCK)
2554 flags |= MSG_DONTWAIT;
1af66221
ED
2555
2556 if (unlikely(nosec))
2557 err = sock_recvmsg_nosec(sock, msg_sys, flags);
2558 else
2559 err = sock_recvmsg(sock, msg_sys, flags);
2560
1da177e4 2561 if (err < 0)
4257c8ca 2562 goto out;
1da177e4
LT
2563 len = err;
2564
2565 if (uaddr != NULL) {
43db362d 2566 err = move_addr_to_user(&addr,
a2e27255 2567 msg_sys->msg_namelen, uaddr,
89bddce5 2568 uaddr_len);
1da177e4 2569 if (err < 0)
4257c8ca 2570 goto out;
1da177e4 2571 }
a2e27255 2572 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2573 COMPAT_FLAGS(msg));
1da177e4 2574 if (err)
4257c8ca 2575 goto out;
1da177e4 2576 if (MSG_CMSG_COMPAT & flags)
a2e27255 2577 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2578 &msg_compat->msg_controllen);
2579 else
a2e27255 2580 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2581 &msg->msg_controllen);
2582 if (err)
4257c8ca 2583 goto out;
1da177e4 2584 err = len;
4257c8ca
JA
2585out:
2586 return err;
2587}
2588
2589static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
2590 struct msghdr *msg_sys, unsigned int flags, int nosec)
2591{
2592 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2593 /* user mode address pointers */
2594 struct sockaddr __user *uaddr;
2595 ssize_t err;
2596
2597 err = recvmsg_copy_msghdr(msg_sys, msg, flags, &uaddr, &iov);
2598 if (err < 0)
2599 return err;
1da177e4 2600
4257c8ca 2601 err = ____sys_recvmsg(sock, msg_sys, msg, uaddr, flags, nosec);
da184284 2602 kfree(iov);
a2e27255
ACM
2603 return err;
2604}
2605
2606/*
2607 * BSD recvmsg interface
2608 */
2609
03b1230c
JA
2610long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
2611 struct user_msghdr __user *umsg,
2612 struct sockaddr __user *uaddr, unsigned int flags)
aa1fa28f 2613{
583bbf06
LH
2614 if (msg->msg_control || msg->msg_controllen) {
2615 /* disallow ancillary data reqs unless cmsg is plain data */
2616 if (!(sock->ops->flags & PROTO_CMSG_DATA_ONLY))
2617 return -EINVAL;
2618 }
aa1fa28f 2619
03b1230c 2620 return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0);
aa1fa28f
JA
2621}
2622
e1834a32
DB
2623long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2624 bool forbid_cmsg_compat)
a2e27255
ACM
2625{
2626 int fput_needed, err;
2627 struct msghdr msg_sys;
1be374a0
AL
2628 struct socket *sock;
2629
e1834a32
DB
2630 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2631 return -EINVAL;
2632
1be374a0 2633 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2634 if (!sock)
2635 goto out;
2636
a7526eb5 2637 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2638
6cb153ca 2639 fput_light(sock->file, fput_needed);
1da177e4
LT
2640out:
2641 return err;
2642}
2643
666547ff 2644SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2645 unsigned int, flags)
2646{
e1834a32 2647 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2648}
2649
a2e27255
ACM
2650/*
2651 * Linux recvmmsg interface
2652 */
2653
e11d4284
AB
2654static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2655 unsigned int vlen, unsigned int flags,
2656 struct timespec64 *timeout)
a2e27255
ACM
2657{
2658 int fput_needed, err, datagrams;
2659 struct socket *sock;
2660 struct mmsghdr __user *entry;
d7256d0e 2661 struct compat_mmsghdr __user *compat_entry;
a2e27255 2662 struct msghdr msg_sys;
766b9f92
DD
2663 struct timespec64 end_time;
2664 struct timespec64 timeout64;
a2e27255
ACM
2665
2666 if (timeout &&
2667 poll_select_set_timeout(&end_time, timeout->tv_sec,
2668 timeout->tv_nsec))
2669 return -EINVAL;
2670
2671 datagrams = 0;
2672
2673 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2674 if (!sock)
2675 return err;
2676
7797dc41
SHY
2677 if (likely(!(flags & MSG_ERRQUEUE))) {
2678 err = sock_error(sock->sk);
2679 if (err) {
2680 datagrams = err;
2681 goto out_put;
2682 }
e623a9e9 2683 }
a2e27255
ACM
2684
2685 entry = mmsg;
d7256d0e 2686 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2687
2688 while (datagrams < vlen) {
2689 /*
2690 * No need to ask LSM for more than the first datagram.
2691 */
d7256d0e 2692 if (MSG_CMSG_COMPAT & flags) {
666547ff 2693 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2694 &msg_sys, flags & ~MSG_WAITFORONE,
2695 datagrams);
d7256d0e
JMG
2696 if (err < 0)
2697 break;
2698 err = __put_user(err, &compat_entry->msg_len);
2699 ++compat_entry;
2700 } else {
a7526eb5 2701 err = ___sys_recvmsg(sock,
666547ff 2702 (struct user_msghdr __user *)entry,
a7526eb5
AL
2703 &msg_sys, flags & ~MSG_WAITFORONE,
2704 datagrams);
d7256d0e
JMG
2705 if (err < 0)
2706 break;
2707 err = put_user(err, &entry->msg_len);
2708 ++entry;
2709 }
2710
a2e27255
ACM
2711 if (err)
2712 break;
a2e27255
ACM
2713 ++datagrams;
2714
71c5c159
BB
2715 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2716 if (flags & MSG_WAITFORONE)
2717 flags |= MSG_DONTWAIT;
2718
a2e27255 2719 if (timeout) {
766b9f92 2720 ktime_get_ts64(&timeout64);
c2e6c856 2721 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2722 if (timeout->tv_sec < 0) {
2723 timeout->tv_sec = timeout->tv_nsec = 0;
2724 break;
2725 }
2726
2727 /* Timeout, return less than vlen datagrams */
2728 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2729 break;
2730 }
2731
2732 /* Out of band data, return right away */
2733 if (msg_sys.msg_flags & MSG_OOB)
2734 break;
a78cb84c 2735 cond_resched();
a2e27255
ACM
2736 }
2737
a2e27255 2738 if (err == 0)
34b88a68
ACM
2739 goto out_put;
2740
2741 if (datagrams == 0) {
2742 datagrams = err;
2743 goto out_put;
2744 }
a2e27255 2745
34b88a68
ACM
2746 /*
2747 * We may return less entries than requested (vlen) if the
2748 * sock is non block and there aren't enough datagrams...
2749 */
2750 if (err != -EAGAIN) {
a2e27255 2751 /*
34b88a68
ACM
2752 * ... or if recvmsg returns an error after we
2753 * received some datagrams, where we record the
2754 * error to return on the next call or if the
2755 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2756 */
34b88a68 2757 sock->sk->sk_err = -err;
a2e27255 2758 }
34b88a68
ACM
2759out_put:
2760 fput_light(sock->file, fput_needed);
a2e27255 2761
34b88a68 2762 return datagrams;
a2e27255
ACM
2763}
2764
e11d4284
AB
2765int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2766 unsigned int vlen, unsigned int flags,
2767 struct __kernel_timespec __user *timeout,
2768 struct old_timespec32 __user *timeout32)
a2e27255
ACM
2769{
2770 int datagrams;
c2e6c856 2771 struct timespec64 timeout_sys;
a2e27255 2772
e11d4284
AB
2773 if (timeout && get_timespec64(&timeout_sys, timeout))
2774 return -EFAULT;
a2e27255 2775
e11d4284 2776 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2777 return -EFAULT;
2778
e11d4284
AB
2779 if (!timeout && !timeout32)
2780 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2781
2782 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 2783
e11d4284
AB
2784 if (datagrams <= 0)
2785 return datagrams;
2786
2787 if (timeout && put_timespec64(&timeout_sys, timeout))
2788 datagrams = -EFAULT;
2789
2790 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2791 datagrams = -EFAULT;
2792
2793 return datagrams;
2794}
2795
1255e269
DB
2796SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2797 unsigned int, vlen, unsigned int, flags,
c2e6c856 2798 struct __kernel_timespec __user *, timeout)
1255e269 2799{
e11d4284
AB
2800 if (flags & MSG_CMSG_COMPAT)
2801 return -EINVAL;
2802
2803 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
2804}
2805
2806#ifdef CONFIG_COMPAT_32BIT_TIME
2807SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2808 unsigned int, vlen, unsigned int, flags,
2809 struct old_timespec32 __user *, timeout)
2810{
2811 if (flags & MSG_CMSG_COMPAT)
2812 return -EINVAL;
2813
2814 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 2815}
e11d4284 2816#endif
1255e269 2817
a2e27255 2818#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2819/* Argument list sizes for sys_socketcall */
2820#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2821static const unsigned char nargs[21] = {
c6d409cf
ED
2822 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2823 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2824 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2825 AL(4), AL(5), AL(4)
89bddce5
SH
2826};
2827
1da177e4
LT
2828#undef AL
2829
2830/*
89bddce5 2831 * System call vectors.
1da177e4
LT
2832 *
2833 * Argument checking cleaned up. Saved 20% in size.
2834 * This function doesn't need to set the kernel lock because
89bddce5 2835 * it is set by the callees.
1da177e4
LT
2836 */
2837
3e0fa65f 2838SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2839{
2950fa9d 2840 unsigned long a[AUDITSC_ARGS];
89bddce5 2841 unsigned long a0, a1;
1da177e4 2842 int err;
47379052 2843 unsigned int len;
1da177e4 2844
228e548e 2845 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2846 return -EINVAL;
c8e8cd57 2847 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2848
47379052
AV
2849 len = nargs[call];
2850 if (len > sizeof(a))
2851 return -EINVAL;
2852
1da177e4 2853 /* copy_from_user should be SMP safe. */
47379052 2854 if (copy_from_user(a, args, len))
1da177e4 2855 return -EFAULT;
3ec3b2fb 2856
2950fa9d
CG
2857 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2858 if (err)
2859 return err;
3ec3b2fb 2860
89bddce5
SH
2861 a0 = a[0];
2862 a1 = a[1];
2863
2864 switch (call) {
2865 case SYS_SOCKET:
9d6a15c3 2866 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
2867 break;
2868 case SYS_BIND:
a87d35d8 2869 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2870 break;
2871 case SYS_CONNECT:
1387c2c2 2872 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2873 break;
2874 case SYS_LISTEN:
25e290ee 2875 err = __sys_listen(a0, a1);
89bddce5
SH
2876 break;
2877 case SYS_ACCEPT:
4541e805
DB
2878 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2879 (int __user *)a[2], 0);
89bddce5
SH
2880 break;
2881 case SYS_GETSOCKNAME:
2882 err =
8882a107
DB
2883 __sys_getsockname(a0, (struct sockaddr __user *)a1,
2884 (int __user *)a[2]);
89bddce5
SH
2885 break;
2886 case SYS_GETPEERNAME:
2887 err =
b21c8f83
DB
2888 __sys_getpeername(a0, (struct sockaddr __user *)a1,
2889 (int __user *)a[2]);
89bddce5
SH
2890 break;
2891 case SYS_SOCKETPAIR:
6debc8d8 2892 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
2893 break;
2894 case SYS_SEND:
f3bf896b
DB
2895 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2896 NULL, 0);
89bddce5
SH
2897 break;
2898 case SYS_SENDTO:
211b634b
DB
2899 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2900 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
2901 break;
2902 case SYS_RECV:
d27e9afc
DB
2903 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2904 NULL, NULL);
89bddce5
SH
2905 break;
2906 case SYS_RECVFROM:
7a09e1eb
DB
2907 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2908 (struct sockaddr __user *)a[4],
2909 (int __user *)a[5]);
89bddce5
SH
2910 break;
2911 case SYS_SHUTDOWN:
005a1aea 2912 err = __sys_shutdown(a0, a1);
89bddce5
SH
2913 break;
2914 case SYS_SETSOCKOPT:
cc36dca0
DB
2915 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
2916 a[4]);
89bddce5
SH
2917 break;
2918 case SYS_GETSOCKOPT:
2919 err =
13a2d70e
DB
2920 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2921 (int __user *)a[4]);
89bddce5
SH
2922 break;
2923 case SYS_SENDMSG:
e1834a32
DB
2924 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
2925 a[2], true);
89bddce5 2926 break;
228e548e 2927 case SYS_SENDMMSG:
e1834a32
DB
2928 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2929 a[3], true);
228e548e 2930 break;
89bddce5 2931 case SYS_RECVMSG:
e1834a32
DB
2932 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
2933 a[2], true);
89bddce5 2934 break;
a2e27255 2935 case SYS_RECVMMSG:
3ca47e95 2936 if (IS_ENABLED(CONFIG_64BIT))
e11d4284
AB
2937 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2938 a[2], a[3],
2939 (struct __kernel_timespec __user *)a[4],
2940 NULL);
2941 else
2942 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2943 a[2], a[3], NULL,
2944 (struct old_timespec32 __user *)a[4]);
a2e27255 2945 break;
de11defe 2946 case SYS_ACCEPT4:
4541e805
DB
2947 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2948 (int __user *)a[2], a[3]);
aaca0bdc 2949 break;
89bddce5
SH
2950 default:
2951 err = -EINVAL;
2952 break;
1da177e4
LT
2953 }
2954 return err;
2955}
2956
89bddce5 2957#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2958
55737fda
SH
2959/**
2960 * sock_register - add a socket protocol handler
2961 * @ops: description of protocol
2962 *
1da177e4
LT
2963 * This function is called by a protocol handler that wants to
2964 * advertise its address family, and have it linked into the
e793c0f7 2965 * socket interface. The value ops->family corresponds to the
55737fda 2966 * socket system call protocol family.
1da177e4 2967 */
f0fd27d4 2968int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2969{
2970 int err;
2971
2972 if (ops->family >= NPROTO) {
3410f22e 2973 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2974 return -ENOBUFS;
2975 }
55737fda
SH
2976
2977 spin_lock(&net_family_lock);
190683a9
ED
2978 if (rcu_dereference_protected(net_families[ops->family],
2979 lockdep_is_held(&net_family_lock)))
55737fda
SH
2980 err = -EEXIST;
2981 else {
cf778b00 2982 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2983 err = 0;
2984 }
55737fda
SH
2985 spin_unlock(&net_family_lock);
2986
3410f22e 2987 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2988 return err;
2989}
c6d409cf 2990EXPORT_SYMBOL(sock_register);
1da177e4 2991
55737fda
SH
2992/**
2993 * sock_unregister - remove a protocol handler
2994 * @family: protocol family to remove
2995 *
1da177e4
LT
2996 * This function is called by a protocol handler that wants to
2997 * remove its address family, and have it unlinked from the
55737fda
SH
2998 * new socket creation.
2999 *
3000 * If protocol handler is a module, then it can use module reference
3001 * counts to protect against new references. If protocol handler is not
3002 * a module then it needs to provide its own protection in
3003 * the ops->create routine.
1da177e4 3004 */
f0fd27d4 3005void sock_unregister(int family)
1da177e4 3006{
f0fd27d4 3007 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 3008
55737fda 3009 spin_lock(&net_family_lock);
a9b3cd7f 3010 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
3011 spin_unlock(&net_family_lock);
3012
3013 synchronize_rcu();
3014
3410f22e 3015 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 3016}
c6d409cf 3017EXPORT_SYMBOL(sock_unregister);
1da177e4 3018
bf2ae2e4
XL
3019bool sock_is_registered(int family)
3020{
66b51b0a 3021 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
3022}
3023
77d76ea3 3024static int __init sock_init(void)
1da177e4 3025{
b3e19d92 3026 int err;
2ca794e5
EB
3027 /*
3028 * Initialize the network sysctl infrastructure.
3029 */
3030 err = net_sysctl_init();
3031 if (err)
3032 goto out;
b3e19d92 3033
1da177e4 3034 /*
89bddce5 3035 * Initialize skbuff SLAB cache
1da177e4
LT
3036 */
3037 skb_init();
1da177e4
LT
3038
3039 /*
89bddce5 3040 * Initialize the protocols module.
1da177e4
LT
3041 */
3042
3043 init_inodecache();
b3e19d92
NP
3044
3045 err = register_filesystem(&sock_fs_type);
3046 if (err)
47260ba9 3047 goto out;
1da177e4 3048 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
3049 if (IS_ERR(sock_mnt)) {
3050 err = PTR_ERR(sock_mnt);
3051 goto out_mount;
3052 }
77d76ea3
AK
3053
3054 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
3055 */
3056
3057#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
3058 err = netfilter_init();
3059 if (err)
3060 goto out;
1da177e4 3061#endif
cbeb321a 3062
408eccce 3063 ptp_classifier_init();
c1f19b51 3064
b3e19d92
NP
3065out:
3066 return err;
3067
3068out_mount:
3069 unregister_filesystem(&sock_fs_type);
b3e19d92 3070 goto out;
1da177e4
LT
3071}
3072
77d76ea3
AK
3073core_initcall(sock_init); /* early initcall */
3074
1da177e4
LT
3075#ifdef CONFIG_PROC_FS
3076void socket_seq_show(struct seq_file *seq)
3077{
648845ab
TZ
3078 seq_printf(seq, "sockets: used %d\n",
3079 sock_inuse_get(seq->private));
1da177e4 3080}
89bddce5 3081#endif /* CONFIG_PROC_FS */
1da177e4 3082
89bbfc95 3083#ifdef CONFIG_COMPAT
36fd633e 3084static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 3085{
6b96018b 3086 struct compat_ifconf ifc32;
7a229387 3087 struct ifconf ifc;
7a229387
AB
3088 int err;
3089
6b96018b 3090 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
3091 return -EFAULT;
3092
36fd633e
AV
3093 ifc.ifc_len = ifc32.ifc_len;
3094 ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
7a229387 3095
36fd633e
AV
3096 rtnl_lock();
3097 err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
3098 rtnl_unlock();
7a229387
AB
3099 if (err)
3100 return err;
3101
36fd633e 3102 ifc32.ifc_len = ifc.ifc_len;
6b96018b 3103 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
3104 return -EFAULT;
3105
3106 return 0;
3107}
3108
6b96018b 3109static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 3110{
3a7da39d
BH
3111 struct compat_ethtool_rxnfc __user *compat_rxnfc;
3112 bool convert_in = false, convert_out = false;
44c02a2c
AV
3113 size_t buf_size = 0;
3114 struct ethtool_rxnfc __user *rxnfc = NULL;
3115 struct ifreq ifr;
3a7da39d
BH
3116 u32 rule_cnt = 0, actual_rule_cnt;
3117 u32 ethcmd;
7a229387 3118 u32 data;
3a7da39d 3119 int ret;
7a229387 3120
3a7da39d
BH
3121 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
3122 return -EFAULT;
7a229387 3123
3a7da39d
BH
3124 compat_rxnfc = compat_ptr(data);
3125
3126 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
3127 return -EFAULT;
3128
3a7da39d
BH
3129 /* Most ethtool structures are defined without padding.
3130 * Unfortunately struct ethtool_rxnfc is an exception.
3131 */
3132 switch (ethcmd) {
3133 default:
3134 break;
3135 case ETHTOOL_GRXCLSRLALL:
3136 /* Buffer size is variable */
3137 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
3138 return -EFAULT;
3139 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
3140 return -ENOMEM;
3141 buf_size += rule_cnt * sizeof(u32);
7c7ab580 3142 fallthrough;
3a7da39d
BH
3143 case ETHTOOL_GRXRINGS:
3144 case ETHTOOL_GRXCLSRLCNT:
3145 case ETHTOOL_GRXCLSRULE:
55664f32 3146 case ETHTOOL_SRXCLSRLINS:
3a7da39d 3147 convert_out = true;
7c7ab580 3148 fallthrough;
3a7da39d 3149 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
3150 buf_size += sizeof(struct ethtool_rxnfc);
3151 convert_in = true;
44c02a2c 3152 rxnfc = compat_alloc_user_space(buf_size);
3a7da39d
BH
3153 break;
3154 }
3155
44c02a2c 3156 if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
3157 return -EFAULT;
3158
44c02a2c 3159 ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
7a229387 3160
3a7da39d 3161 if (convert_in) {
127fe533 3162 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
3163 * fs.ring_cookie and at the end of fs, but nowhere else.
3164 */
127fe533
AD
3165 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
3166 sizeof(compat_rxnfc->fs.m_ext) !=
3167 offsetof(struct ethtool_rxnfc, fs.m_ext) +
3168 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
3169 BUILD_BUG_ON(
3170 offsetof(struct compat_ethtool_rxnfc, fs.location) -
3171 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
3172 offsetof(struct ethtool_rxnfc, fs.location) -
3173 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
3174
3175 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
3176 (void __user *)(&rxnfc->fs.m_ext + 1) -
3177 (void __user *)rxnfc) ||
3a7da39d
BH
3178 copy_in_user(&rxnfc->fs.ring_cookie,
3179 &compat_rxnfc->fs.ring_cookie,
954b1244 3180 (void __user *)(&rxnfc->fs.location + 1) -
b6168562
WW
3181 (void __user *)&rxnfc->fs.ring_cookie))
3182 return -EFAULT;
3183 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3184 if (put_user(rule_cnt, &rxnfc->rule_cnt))
3185 return -EFAULT;
3186 } else if (copy_in_user(&rxnfc->rule_cnt,
3187 &compat_rxnfc->rule_cnt,
3188 sizeof(rxnfc->rule_cnt)))
3a7da39d
BH
3189 return -EFAULT;
3190 }
3191
44c02a2c 3192 ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
3a7da39d
BH
3193 if (ret)
3194 return ret;
3195
3196 if (convert_out) {
3197 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
3198 (const void __user *)(&rxnfc->fs.m_ext + 1) -
3199 (const void __user *)rxnfc) ||
3a7da39d
BH
3200 copy_in_user(&compat_rxnfc->fs.ring_cookie,
3201 &rxnfc->fs.ring_cookie,
954b1244
SH
3202 (const void __user *)(&rxnfc->fs.location + 1) -
3203 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
3204 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
3205 sizeof(rxnfc->rule_cnt)))
3206 return -EFAULT;
3207
3208 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3209 /* As an optimisation, we only copy the actual
3210 * number of rules that the underlying
3211 * function returned. Since Mallory might
3212 * change the rule count in user memory, we
3213 * check that it is less than the rule count
3214 * originally given (as the user buffer size),
3215 * which has been range-checked.
3216 */
3217 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
3218 return -EFAULT;
3219 if (actual_rule_cnt < rule_cnt)
3220 rule_cnt = actual_rule_cnt;
3221 if (copy_in_user(&compat_rxnfc->rule_locs[0],
3222 &rxnfc->rule_locs[0],
3223 rule_cnt * sizeof(u32)))
3224 return -EFAULT;
3225 }
3226 }
3227
3228 return 0;
7a229387
AB
3229}
3230
7a50a240
AB
3231static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3232{
7a50a240 3233 compat_uptr_t uptr32;
44c02a2c
AV
3234 struct ifreq ifr;
3235 void __user *saved;
3236 int err;
7a50a240 3237
44c02a2c 3238 if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
7a50a240
AB
3239 return -EFAULT;
3240
3241 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3242 return -EFAULT;
3243
44c02a2c
AV
3244 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3245 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 3246
44c02a2c
AV
3247 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
3248 if (!err) {
3249 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
3250 if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
3251 err = -EFAULT;
ccbd6a5a 3252 }
44c02a2c 3253 return err;
7a229387
AB
3254}
3255
590d4693
BH
3256/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3257static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3258 struct compat_ifreq __user *u_ifreq32)
7a229387 3259{
44c02a2c 3260 struct ifreq ifreq;
7a229387
AB
3261 u32 data32;
3262
44c02a2c 3263 if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
7a229387 3264 return -EFAULT;
44c02a2c 3265 if (get_user(data32, &u_ifreq32->ifr_data))
7a229387 3266 return -EFAULT;
44c02a2c 3267 ifreq.ifr_data = compat_ptr(data32);
7a229387 3268
44c02a2c 3269 return dev_ioctl(net, cmd, &ifreq, NULL);
7a229387
AB
3270}
3271
37ac39bd
JB
3272static int compat_ifreq_ioctl(struct net *net, struct socket *sock,
3273 unsigned int cmd,
3274 struct compat_ifreq __user *uifr32)
3275{
3276 struct ifreq __user *uifr;
3277 int err;
3278
3279 /* Handle the fact that while struct ifreq has the same *layout* on
3280 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3281 * which are handled elsewhere, it still has different *size* due to
3282 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3283 * resulting in struct ifreq being 32 and 40 bytes respectively).
3284 * As a result, if the struct happens to be at the end of a page and
3285 * the next page isn't readable/writable, we get a fault. To prevent
3286 * that, copy back and forth to the full size.
3287 */
3288
3289 uifr = compat_alloc_user_space(sizeof(*uifr));
3290 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3291 return -EFAULT;
3292
3293 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3294
3295 if (!err) {
3296 switch (cmd) {
3297 case SIOCGIFFLAGS:
3298 case SIOCGIFMETRIC:
3299 case SIOCGIFMTU:
3300 case SIOCGIFMEM:
3301 case SIOCGIFHWADDR:
3302 case SIOCGIFINDEX:
3303 case SIOCGIFADDR:
3304 case SIOCGIFBRDADDR:
3305 case SIOCGIFDSTADDR:
3306 case SIOCGIFNETMASK:
3307 case SIOCGIFPFLAGS:
3308 case SIOCGIFTXQLEN:
3309 case SIOCGMIIPHY:
3310 case SIOCGMIIREG:
c6c9fee3 3311 case SIOCGIFNAME:
37ac39bd
JB
3312 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
3313 err = -EFAULT;
3314 break;
3315 }
3316 }
3317 return err;
3318}
3319
a2116ed2
AB
3320static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3321 struct compat_ifreq __user *uifr32)
3322{
3323 struct ifreq ifr;
3324 struct compat_ifmap __user *uifmap32;
a2116ed2
AB
3325 int err;
3326
3327 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3328 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3329 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3330 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3331 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3332 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3333 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3334 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3335 if (err)
3336 return -EFAULT;
3337
44c02a2c 3338 err = dev_ioctl(net, cmd, &ifr, NULL);
a2116ed2
AB
3339
3340 if (cmd == SIOCGIFMAP && !err) {
3341 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3342 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3343 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3344 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3345 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3346 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3347 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3348 if (err)
3349 err = -EFAULT;
3350 }
3351 return err;
3352}
3353
7a229387
AB
3354/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3355 * for some operations; this forces use of the newer bridge-utils that
25985edc 3356 * use compatible ioctls
7a229387 3357 */
6b96018b 3358static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3359{
6b96018b 3360 compat_ulong_t tmp;
7a229387 3361
6b96018b 3362 if (get_user(tmp, argp))
7a229387
AB
3363 return -EFAULT;
3364 if (tmp == BRCTL_GET_VERSION)
3365 return BRCTL_VERSION + 1;
3366 return -EINVAL;
3367}
3368
6b96018b
AB
3369static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3370 unsigned int cmd, unsigned long arg)
3371{
3372 void __user *argp = compat_ptr(arg);
3373 struct sock *sk = sock->sk;
3374 struct net *net = sock_net(sk);
7a229387 3375
6b96018b 3376 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3377 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3378
3379 switch (cmd) {
3380 case SIOCSIFBR:
3381 case SIOCGIFBR:
3382 return old_bridge_ioctl(argp);
6b96018b 3383 case SIOCGIFCONF:
36fd633e 3384 return compat_dev_ifconf(net, argp);
6b96018b
AB
3385 case SIOCETHTOOL:
3386 return ethtool_ioctl(net, argp);
7a50a240
AB
3387 case SIOCWANDEV:
3388 return compat_siocwandev(net, argp);
a2116ed2
AB
3389 case SIOCGIFMAP:
3390 case SIOCSIFMAP:
3391 return compat_sioc_ifmap(net, cmd, argp);
0768e170
AB
3392 case SIOCGSTAMP_OLD:
3393 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
3394 if (!sock->ops->gettstamp)
3395 return -ENOIOCTLCMD;
0768e170 3396 return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
c7cbdbf2
AB
3397 !COMPAT_USE_64BIT_TIME);
3398
590d4693
BH
3399 case SIOCBONDSLAVEINFOQUERY:
3400 case SIOCBONDINFOQUERY:
a2116ed2 3401 case SIOCSHWTSTAMP:
fd468c74 3402 case SIOCGHWTSTAMP:
590d4693 3403 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3404
3405 case FIOSETOWN:
3406 case SIOCSPGRP:
3407 case FIOGETOWN:
3408 case SIOCGPGRP:
3409 case SIOCBRADDBR:
3410 case SIOCBRDELBR:
3411 case SIOCGIFVLAN:
3412 case SIOCSIFVLAN:
c62cce2c 3413 case SIOCGSKNS:
0768e170
AB
3414 case SIOCGSTAMP_NEW:
3415 case SIOCGSTAMPNS_NEW:
6b96018b
AB
3416 return sock_ioctl(file, cmd, arg);
3417
3418 case SIOCGIFFLAGS:
3419 case SIOCSIFFLAGS:
3420 case SIOCGIFMETRIC:
3421 case SIOCSIFMETRIC:
3422 case SIOCGIFMTU:
3423 case SIOCSIFMTU:
3424 case SIOCGIFMEM:
3425 case SIOCSIFMEM:
3426 case SIOCGIFHWADDR:
3427 case SIOCSIFHWADDR:
3428 case SIOCADDMULTI:
3429 case SIOCDELMULTI:
3430 case SIOCGIFINDEX:
6b96018b
AB
3431 case SIOCGIFADDR:
3432 case SIOCSIFADDR:
3433 case SIOCSIFHWBROADCAST:
6b96018b 3434 case SIOCDIFADDR:
6b96018b
AB
3435 case SIOCGIFBRDADDR:
3436 case SIOCSIFBRDADDR:
3437 case SIOCGIFDSTADDR:
3438 case SIOCSIFDSTADDR:
3439 case SIOCGIFNETMASK:
3440 case SIOCSIFNETMASK:
3441 case SIOCSIFPFLAGS:
3442 case SIOCGIFPFLAGS:
3443 case SIOCGIFTXQLEN:
3444 case SIOCSIFTXQLEN:
3445 case SIOCBRADDIF:
3446 case SIOCBRDELIF:
c6c9fee3 3447 case SIOCGIFNAME:
9177efd3
AB
3448 case SIOCSIFNAME:
3449 case SIOCGMIIPHY:
3450 case SIOCGMIIREG:
3451 case SIOCSMIIREG:
f92d4fc9
AV
3452 case SIOCBONDENSLAVE:
3453 case SIOCBONDRELEASE:
3454 case SIOCBONDSETHWADDR:
3455 case SIOCBONDCHANGEACTIVE:
37ac39bd
JB
3456 return compat_ifreq_ioctl(net, sock, cmd, argp);
3457
6b96018b
AB
3458 case SIOCSARP:
3459 case SIOCGARP:
3460 case SIOCDARP:
c7dc504e 3461 case SIOCOUTQ:
9d7bf41f 3462 case SIOCOUTQNSD:
6b96018b 3463 case SIOCATMARK:
63ff03ab 3464 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3465 }
3466
6b96018b
AB
3467 return -ENOIOCTLCMD;
3468}
7a229387 3469
95c96174 3470static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3471 unsigned long arg)
89bbfc95
SP
3472{
3473 struct socket *sock = file->private_data;
3474 int ret = -ENOIOCTLCMD;
87de87d5
DM
3475 struct sock *sk;
3476 struct net *net;
3477
3478 sk = sock->sk;
3479 net = sock_net(sk);
89bbfc95
SP
3480
3481 if (sock->ops->compat_ioctl)
3482 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3483
87de87d5
DM
3484 if (ret == -ENOIOCTLCMD &&
3485 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3486 ret = compat_wext_handle_ioctl(net, cmd, arg);
3487
6b96018b
AB
3488 if (ret == -ENOIOCTLCMD)
3489 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3490
89bbfc95
SP
3491 return ret;
3492}
3493#endif
3494
8a3c245c
PT
3495/**
3496 * kernel_bind - bind an address to a socket (kernel space)
3497 * @sock: socket
3498 * @addr: address
3499 * @addrlen: length of address
3500 *
3501 * Returns 0 or an error.
3502 */
3503
ac5a488e
SS
3504int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3505{
3506 return sock->ops->bind(sock, addr, addrlen);
3507}
c6d409cf 3508EXPORT_SYMBOL(kernel_bind);
ac5a488e 3509
8a3c245c
PT
3510/**
3511 * kernel_listen - move socket to listening state (kernel space)
3512 * @sock: socket
3513 * @backlog: pending connections queue size
3514 *
3515 * Returns 0 or an error.
3516 */
3517
ac5a488e
SS
3518int kernel_listen(struct socket *sock, int backlog)
3519{
3520 return sock->ops->listen(sock, backlog);
3521}
c6d409cf 3522EXPORT_SYMBOL(kernel_listen);
ac5a488e 3523
8a3c245c
PT
3524/**
3525 * kernel_accept - accept a connection (kernel space)
3526 * @sock: listening socket
3527 * @newsock: new connected socket
3528 * @flags: flags
3529 *
3530 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3531 * If it fails, @newsock is guaranteed to be %NULL.
3532 * Returns 0 or an error.
3533 */
3534
ac5a488e
SS
3535int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3536{
3537 struct sock *sk = sock->sk;
3538 int err;
3539
3540 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3541 newsock);
3542 if (err < 0)
3543 goto done;
3544
cdfbabfb 3545 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3546 if (err < 0) {
3547 sock_release(*newsock);
fa8705b0 3548 *newsock = NULL;
ac5a488e
SS
3549 goto done;
3550 }
3551
3552 (*newsock)->ops = sock->ops;
1b08534e 3553 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3554
3555done:
3556 return err;
3557}
c6d409cf 3558EXPORT_SYMBOL(kernel_accept);
ac5a488e 3559
8a3c245c
PT
3560/**
3561 * kernel_connect - connect a socket (kernel space)
3562 * @sock: socket
3563 * @addr: address
3564 * @addrlen: address length
3565 * @flags: flags (O_NONBLOCK, ...)
3566 *
3567 * For datagram sockets, @addr is the addres to which datagrams are sent
3568 * by default, and the only address from which datagrams are received.
3569 * For stream sockets, attempts to connect to @addr.
3570 * Returns 0 or an error code.
3571 */
3572
ac5a488e 3573int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3574 int flags)
ac5a488e
SS
3575{
3576 return sock->ops->connect(sock, addr, addrlen, flags);
3577}
c6d409cf 3578EXPORT_SYMBOL(kernel_connect);
ac5a488e 3579
8a3c245c
PT
3580/**
3581 * kernel_getsockname - get the address which the socket is bound (kernel space)
3582 * @sock: socket
3583 * @addr: address holder
3584 *
3585 * Fills the @addr pointer with the address which the socket is bound.
3586 * Returns 0 or an error code.
3587 */
3588
9b2c45d4 3589int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3590{
9b2c45d4 3591 return sock->ops->getname(sock, addr, 0);
ac5a488e 3592}
c6d409cf 3593EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3594
8a3c245c 3595/**
645f0897 3596 * kernel_getpeername - get the address which the socket is connected (kernel space)
8a3c245c
PT
3597 * @sock: socket
3598 * @addr: address holder
3599 *
3600 * Fills the @addr pointer with the address which the socket is connected.
3601 * Returns 0 or an error code.
3602 */
3603
9b2c45d4 3604int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3605{
9b2c45d4 3606 return sock->ops->getname(sock, addr, 1);
ac5a488e 3607}
c6d409cf 3608EXPORT_SYMBOL(kernel_getpeername);
ac5a488e 3609
8a3c245c
PT
3610/**
3611 * kernel_sendpage - send a &page through a socket (kernel space)
3612 * @sock: socket
3613 * @page: page
3614 * @offset: page offset
3615 * @size: total size in bytes
3616 * @flags: flags (MSG_DONTWAIT, ...)
3617 *
3618 * Returns the total amount sent in bytes or an error.
3619 */
3620
ac5a488e
SS
3621int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3622 size_t size, int flags)
3623{
7b62d31d
CL
3624 if (sock->ops->sendpage) {
3625 /* Warn in case the improper page to zero-copy send */
3626 WARN_ONCE(!sendpage_ok(page), "improper page for zero-copy send");
ac5a488e 3627 return sock->ops->sendpage(sock, page, offset, size, flags);
7b62d31d 3628 }
ac5a488e
SS
3629 return sock_no_sendpage(sock, page, offset, size, flags);
3630}
c6d409cf 3631EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3632
8a3c245c
PT
3633/**
3634 * kernel_sendpage_locked - send a &page through the locked sock (kernel space)
3635 * @sk: sock
3636 * @page: page
3637 * @offset: page offset
3638 * @size: total size in bytes
3639 * @flags: flags (MSG_DONTWAIT, ...)
3640 *
3641 * Returns the total amount sent in bytes or an error.
3642 * Caller must hold @sk.
3643 */
3644
306b13eb
TH
3645int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3646 size_t size, int flags)
3647{
3648 struct socket *sock = sk->sk_socket;
3649
3650 if (sock->ops->sendpage_locked)
3651 return sock->ops->sendpage_locked(sk, page, offset, size,
3652 flags);
3653
3654 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3655}
3656EXPORT_SYMBOL(kernel_sendpage_locked);
3657
8a3c245c 3658/**
645f0897 3659 * kernel_sock_shutdown - shut down part of a full-duplex connection (kernel space)
8a3c245c
PT
3660 * @sock: socket
3661 * @how: connection part
3662 *
3663 * Returns 0 or an error.
3664 */
3665
91cf45f0
TM
3666int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3667{
3668 return sock->ops->shutdown(sock, how);
3669}
91cf45f0 3670EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075 3671
8a3c245c
PT
3672/**
3673 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3674 * @sk: socket
3675 *
3676 * This routine returns the IP overhead imposed by a socket i.e.
3677 * the length of the underlying IP header, depending on whether
3678 * this is an IPv4 or IPv6 socket and the length from IP options turned
3679 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075 3680 */
8a3c245c 3681
113c3075
P
3682u32 kernel_sock_ip_overhead(struct sock *sk)
3683{
3684 struct inet_sock *inet;
3685 struct ip_options_rcu *opt;
3686 u32 overhead = 0;
113c3075
P
3687#if IS_ENABLED(CONFIG_IPV6)
3688 struct ipv6_pinfo *np;
3689 struct ipv6_txoptions *optv6 = NULL;
3690#endif /* IS_ENABLED(CONFIG_IPV6) */
3691
3692 if (!sk)
3693 return overhead;
3694
113c3075
P
3695 switch (sk->sk_family) {
3696 case AF_INET:
3697 inet = inet_sk(sk);
3698 overhead += sizeof(struct iphdr);
3699 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3700 sock_owned_by_user(sk));
113c3075
P
3701 if (opt)
3702 overhead += opt->opt.optlen;
3703 return overhead;
3704#if IS_ENABLED(CONFIG_IPV6)
3705 case AF_INET6:
3706 np = inet6_sk(sk);
3707 overhead += sizeof(struct ipv6hdr);
3708 if (np)
3709 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3710 sock_owned_by_user(sk));
113c3075
P
3711 if (optv6)
3712 overhead += (optv6->opt_flen + optv6->opt_nflen);
3713 return overhead;
3714#endif /* IS_ENABLED(CONFIG_IPV6) */
3715 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3716 return overhead;
3717 }
3718}
3719EXPORT_SYMBOL(kernel_sock_ip_overhead);