Merge tag 'wireless-drivers-2021-03-03' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-block.git] / net / socket.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * NET An implementation of the SOCKET network access protocol.
4 *
5 * Version: @(#)socket.c 1.1.93 18/02/95
6 *
7 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 8 * Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 *
11 * Fixes:
12 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
13 * shutdown()
14 * Alan Cox : verify_area() fixes
15 * Alan Cox : Removed DDI
16 * Jonathan Kamens : SOCK_DGRAM reconnect bug
17 * Alan Cox : Moved a load of checks to the very
18 * top level.
19 * Alan Cox : Move address structures to/from user
20 * mode above the protocol layers.
21 * Rob Janssen : Allow 0 length sends.
22 * Alan Cox : Asynchronous I/O support (cribbed from the
23 * tty drivers).
24 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
25 * Jeff Uphoff : Made max number of sockets command-line
26 * configurable.
27 * Matti Aarnio : Made the number of sockets dynamic,
28 * to be allocated when needed, and mr.
29 * Uphoff's max is used as max to be
30 * allowed to allocate.
31 * Linus : Argh. removed all the socket allocation
32 * altogether: it's in the inode now.
33 * Alan Cox : Made sock_alloc()/sock_release() public
34 * for NetROM and future kernel nfsd type
35 * stuff.
36 * Alan Cox : sendmsg/recvmsg basics.
37 * Tom Dyas : Export net symbols.
38 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
39 * Alan Cox : Added thread locking to sys_* calls
40 * for sockets. May have errors at the
41 * moment.
42 * Kevin Buhr : Fixed the dumb errors in the above.
43 * Andi Kleen : Some small cleanups, optimizations,
44 * and fixed a copy_from_user() bug.
45 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 46 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
47 * protocol-independent
48 *
1da177e4 49 * This module is effectively the top level interface to the BSD socket
89bddce5 50 * paradigm.
1da177e4
LT
51 *
52 * Based upon Swansea University Computer Society NET3.039
53 */
54
cc69837f 55#include <linux/ethtool.h>
1da177e4 56#include <linux/mm.h>
1da177e4
LT
57#include <linux/socket.h>
58#include <linux/file.h>
59#include <linux/net.h>
60#include <linux/interrupt.h>
aaca0bdc 61#include <linux/thread_info.h>
55737fda 62#include <linux/rcupdate.h>
1da177e4
LT
63#include <linux/netdevice.h>
64#include <linux/proc_fs.h>
65#include <linux/seq_file.h>
4a3e2f71 66#include <linux/mutex.h>
1da177e4 67#include <linux/if_bridge.h>
20380731 68#include <linux/if_vlan.h>
408eccce 69#include <linux/ptp_classify.h>
1da177e4
LT
70#include <linux/init.h>
71#include <linux/poll.h>
72#include <linux/cache.h>
73#include <linux/module.h>
74#include <linux/highmem.h>
1da177e4 75#include <linux/mount.h>
fba9be49 76#include <linux/pseudo_fs.h>
1da177e4
LT
77#include <linux/security.h>
78#include <linux/syscalls.h>
79#include <linux/compat.h>
80#include <linux/kmod.h>
3ec3b2fb 81#include <linux/audit.h>
d86b5e0e 82#include <linux/wireless.h>
1b8d7ae4 83#include <linux/nsproxy.h>
1fd7317d 84#include <linux/magic.h>
5a0e3ad6 85#include <linux/slab.h>
600e1779 86#include <linux/xattr.h>
c8e8cd57 87#include <linux/nospec.h>
8c3c447b 88#include <linux/indirect_call_wrapper.h>
1da177e4 89
7c0f6ba6 90#include <linux/uaccess.h>
1da177e4
LT
91#include <asm/unistd.h>
92
93#include <net/compat.h>
87de87d5 94#include <net/wext.h>
f8451725 95#include <net/cls_cgroup.h>
1da177e4
LT
96
97#include <net/sock.h>
98#include <linux/netfilter.h>
99
6b96018b
AB
100#include <linux/if_tun.h>
101#include <linux/ipv6_route.h>
102#include <linux/route.h>
c7dc504e 103#include <linux/termios.h>
6b96018b 104#include <linux/sockios.h>
076bb0c8 105#include <net/busy_poll.h>
f24b9be5 106#include <linux/errqueue.h>
06021292 107
e0d1095a 108#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
109unsigned int sysctl_net_busy_read __read_mostly;
110unsigned int sysctl_net_busy_poll __read_mostly;
06021292 111#endif
6b96018b 112
8ae5e030
AV
113static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
114static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 115static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
116
117static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
118static __poll_t sock_poll(struct file *file,
119 struct poll_table_struct *wait);
89bddce5 120static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
121#ifdef CONFIG_COMPAT
122static long compat_sock_ioctl(struct file *file,
89bddce5 123 unsigned int cmd, unsigned long arg);
89bbfc95 124#endif
1da177e4 125static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
126static ssize_t sock_sendpage(struct file *file, struct page *page,
127 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 128static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 129 struct pipe_inode_info *pipe, size_t len,
9c55e01c 130 unsigned int flags);
542d3065
AB
131
132#ifdef CONFIG_PROC_FS
133static void sock_show_fdinfo(struct seq_file *m, struct file *f)
134{
135 struct socket *sock = f->private_data;
136
137 if (sock->ops->show_fdinfo)
138 sock->ops->show_fdinfo(m, sock);
139}
140#else
141#define sock_show_fdinfo NULL
142#endif
1da177e4 143
1da177e4
LT
144/*
145 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
146 * in the operation structures but are done directly via the socketcall() multiplexor.
147 */
148
da7071d7 149static const struct file_operations socket_file_ops = {
1da177e4
LT
150 .owner = THIS_MODULE,
151 .llseek = no_llseek,
8ae5e030
AV
152 .read_iter = sock_read_iter,
153 .write_iter = sock_write_iter,
1da177e4
LT
154 .poll = sock_poll,
155 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
156#ifdef CONFIG_COMPAT
157 .compat_ioctl = compat_sock_ioctl,
158#endif
1da177e4 159 .mmap = sock_mmap,
1da177e4
LT
160 .release = sock_close,
161 .fasync = sock_fasync,
5274f052
JA
162 .sendpage = sock_sendpage,
163 .splice_write = generic_splice_sendpage,
9c55e01c 164 .splice_read = sock_splice_read,
b4653342 165 .show_fdinfo = sock_show_fdinfo,
1da177e4
LT
166};
167
168/*
169 * The protocol list. Each protocol is registered in here.
170 */
171
1da177e4 172static DEFINE_SPINLOCK(net_family_lock);
190683a9 173static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 174
1da177e4 175/*
89bddce5
SH
176 * Support routines.
177 * Move socket addresses back and forth across the kernel/user
178 * divide and look after the messy bits.
1da177e4
LT
179 */
180
1da177e4
LT
181/**
182 * move_addr_to_kernel - copy a socket address into kernel space
183 * @uaddr: Address in user space
184 * @kaddr: Address in kernel space
185 * @ulen: Length in user space
186 *
187 * The address is copied into kernel space. If the provided address is
188 * too long an error code of -EINVAL is returned. If the copy gives
189 * invalid addresses -EFAULT is returned. On a success 0 is returned.
190 */
191
43db362d 192int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 193{
230b1839 194 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 195 return -EINVAL;
89bddce5 196 if (ulen == 0)
1da177e4 197 return 0;
89bddce5 198 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 199 return -EFAULT;
3ec3b2fb 200 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
201}
202
203/**
204 * move_addr_to_user - copy an address to user space
205 * @kaddr: kernel space address
206 * @klen: length of address in kernel
207 * @uaddr: user space address
208 * @ulen: pointer to user length field
209 *
210 * The value pointed to by ulen on entry is the buffer length available.
211 * This is overwritten with the buffer space used. -EINVAL is returned
212 * if an overlong buffer is specified or a negative buffer size. -EFAULT
213 * is returned if either the buffer or the length field are not
214 * accessible.
215 * After copying the data up to the limit the user specifies, the true
216 * length of the data is written over the length limit the user
217 * specified. Zero is returned for a success.
218 */
89bddce5 219
43db362d 220static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 221 void __user *uaddr, int __user *ulen)
1da177e4
LT
222{
223 int err;
224 int len;
225
68c6beb3 226 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
227 err = get_user(len, ulen);
228 if (err)
1da177e4 229 return err;
89bddce5
SH
230 if (len > klen)
231 len = klen;
68c6beb3 232 if (len < 0)
1da177e4 233 return -EINVAL;
89bddce5 234 if (len) {
d6fe3945
SG
235 if (audit_sockaddr(klen, kaddr))
236 return -ENOMEM;
89bddce5 237 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
238 return -EFAULT;
239 }
240 /*
89bddce5
SH
241 * "fromlen shall refer to the value before truncation.."
242 * 1003.1g
1da177e4
LT
243 */
244 return __put_user(klen, ulen);
245}
246
08009a76 247static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
248
249static struct inode *sock_alloc_inode(struct super_block *sb)
250{
251 struct socket_alloc *ei;
89bddce5 252
e94b1766 253 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
254 if (!ei)
255 return NULL;
333f7909
AV
256 init_waitqueue_head(&ei->socket.wq.wait);
257 ei->socket.wq.fasync_list = NULL;
258 ei->socket.wq.flags = 0;
89bddce5 259
1da177e4
LT
260 ei->socket.state = SS_UNCONNECTED;
261 ei->socket.flags = 0;
262 ei->socket.ops = NULL;
263 ei->socket.sk = NULL;
264 ei->socket.file = NULL;
1da177e4
LT
265
266 return &ei->vfs_inode;
267}
268
6d7855c5 269static void sock_free_inode(struct inode *inode)
1da177e4 270{
43815482
ED
271 struct socket_alloc *ei;
272
273 ei = container_of(inode, struct socket_alloc, vfs_inode);
43815482 274 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
275}
276
51cc5068 277static void init_once(void *foo)
1da177e4 278{
89bddce5 279 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 280
a35afb83 281 inode_init_once(&ei->vfs_inode);
1da177e4 282}
89bddce5 283
1e911632 284static void init_inodecache(void)
1da177e4
LT
285{
286 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
287 sizeof(struct socket_alloc),
288 0,
289 (SLAB_HWCACHE_ALIGN |
290 SLAB_RECLAIM_ACCOUNT |
5d097056 291 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 292 init_once);
1e911632 293 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
294}
295
b87221de 296static const struct super_operations sockfs_ops = {
c6d409cf 297 .alloc_inode = sock_alloc_inode,
6d7855c5 298 .free_inode = sock_free_inode,
c6d409cf 299 .statfs = simple_statfs,
1da177e4
LT
300};
301
c23fbb6b
ED
302/*
303 * sockfs_dname() is called from d_path().
304 */
305static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
306{
307 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 308 d_inode(dentry)->i_ino);
c23fbb6b
ED
309}
310
3ba13d17 311static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 312 .d_dname = sockfs_dname,
1da177e4
LT
313};
314
bba0bd31
AG
315static int sockfs_xattr_get(const struct xattr_handler *handler,
316 struct dentry *dentry, struct inode *inode,
317 const char *suffix, void *value, size_t size)
318{
319 if (value) {
320 if (dentry->d_name.len + 1 > size)
321 return -ERANGE;
322 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
323 }
324 return dentry->d_name.len + 1;
325}
326
327#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
328#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
329#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
330
331static const struct xattr_handler sockfs_xattr_handler = {
332 .name = XATTR_NAME_SOCKPROTONAME,
333 .get = sockfs_xattr_get,
334};
335
4a590153 336static int sockfs_security_xattr_set(const struct xattr_handler *handler,
e65ce2a5 337 struct user_namespace *mnt_userns,
4a590153
AG
338 struct dentry *dentry, struct inode *inode,
339 const char *suffix, const void *value,
340 size_t size, int flags)
341{
342 /* Handled by LSM. */
343 return -EAGAIN;
344}
345
346static const struct xattr_handler sockfs_security_xattr_handler = {
347 .prefix = XATTR_SECURITY_PREFIX,
348 .set = sockfs_security_xattr_set,
349};
350
bba0bd31
AG
351static const struct xattr_handler *sockfs_xattr_handlers[] = {
352 &sockfs_xattr_handler,
4a590153 353 &sockfs_security_xattr_handler,
bba0bd31
AG
354 NULL
355};
356
fba9be49 357static int sockfs_init_fs_context(struct fs_context *fc)
c74a1cbb 358{
fba9be49
DH
359 struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
360 if (!ctx)
361 return -ENOMEM;
362 ctx->ops = &sockfs_ops;
363 ctx->dops = &sockfs_dentry_operations;
364 ctx->xattr = sockfs_xattr_handlers;
365 return 0;
c74a1cbb
AV
366}
367
368static struct vfsmount *sock_mnt __read_mostly;
369
370static struct file_system_type sock_fs_type = {
371 .name = "sockfs",
fba9be49 372 .init_fs_context = sockfs_init_fs_context,
c74a1cbb
AV
373 .kill_sb = kill_anon_super,
374};
375
1da177e4
LT
376/*
377 * Obtains the first available file descriptor and sets it up for use.
378 *
39d8c1b6
DM
379 * These functions create file structures and maps them to fd space
380 * of the current process. On success it returns file descriptor
1da177e4
LT
381 * and file struct implicitly stored in sock->file.
382 * Note that another thread may close file descriptor before we return
383 * from this function. We use the fact that now we do not refer
384 * to socket after mapping. If one day we will need it, this
385 * function will increment ref. count on file by 1.
386 *
387 * In any case returned fd MAY BE not valid!
388 * This race condition is unavoidable
389 * with shared fd spaces, we cannot solve it inside kernel,
390 * but we take care of internal coherence yet.
391 */
392
8a3c245c
PT
393/**
394 * sock_alloc_file - Bind a &socket to a &file
395 * @sock: socket
396 * @flags: file status flags
397 * @dname: protocol name
398 *
399 * Returns the &file bound with @sock, implicitly storing it
400 * in sock->file. If dname is %NULL, sets to "".
401 * On failure the return is a ERR pointer (see linux/err.h).
402 * This function uses GFP_KERNEL internally.
403 */
404
aab174f0 405struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 406{
7cbe66b6 407 struct file *file;
1da177e4 408
d93aa9d8
AV
409 if (!dname)
410 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 411
d93aa9d8
AV
412 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
413 O_RDWR | (flags & O_NONBLOCK),
414 &socket_file_ops);
b5ffe634 415 if (IS_ERR(file)) {
8e1611e2 416 sock_release(sock);
39b65252 417 return file;
cc3808f8
AV
418 }
419
420 sock->file = file;
39d8c1b6 421 file->private_data = sock;
d8e464ec 422 stream_open(SOCK_INODE(sock), file);
28407630 423 return file;
39d8c1b6 424}
56b31d1c 425EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 426
56b31d1c 427static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
428{
429 struct file *newfile;
28407630 430 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
431 if (unlikely(fd < 0)) {
432 sock_release(sock);
28407630 433 return fd;
ce4bb04c 434 }
39d8c1b6 435
aab174f0 436 newfile = sock_alloc_file(sock, flags, NULL);
4546e44c 437 if (!IS_ERR(newfile)) {
39d8c1b6 438 fd_install(fd, newfile);
28407630
AV
439 return fd;
440 }
7cbe66b6 441
28407630
AV
442 put_unused_fd(fd);
443 return PTR_ERR(newfile);
1da177e4
LT
444}
445
8a3c245c
PT
446/**
447 * sock_from_file - Return the &socket bounded to @file.
448 * @file: file
8a3c245c 449 *
dba4a925 450 * On failure returns %NULL.
8a3c245c
PT
451 */
452
dba4a925 453struct socket *sock_from_file(struct file *file)
6cb153ca 454{
6cb153ca
BL
455 if (file->f_op == &socket_file_ops)
456 return file->private_data; /* set in sock_map_fd */
457
23bb80d2 458 return NULL;
6cb153ca 459}
406a3c63 460EXPORT_SYMBOL(sock_from_file);
6cb153ca 461
1da177e4 462/**
c6d409cf 463 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
464 * @fd: file handle
465 * @err: pointer to an error code return
466 *
467 * The file handle passed in is locked and the socket it is bound
241c4667 468 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
469 * with a negative errno code and NULL is returned. The function checks
470 * for both invalid handles and passing a handle which is not a socket.
471 *
472 * On a success the socket object pointer is returned.
473 */
474
475struct socket *sockfd_lookup(int fd, int *err)
476{
477 struct file *file;
1da177e4
LT
478 struct socket *sock;
479
89bddce5
SH
480 file = fget(fd);
481 if (!file) {
1da177e4
LT
482 *err = -EBADF;
483 return NULL;
484 }
89bddce5 485
dba4a925
FR
486 sock = sock_from_file(file);
487 if (!sock) {
488 *err = -ENOTSOCK;
1da177e4 489 fput(file);
dba4a925 490 }
6cb153ca
BL
491 return sock;
492}
c6d409cf 493EXPORT_SYMBOL(sockfd_lookup);
1da177e4 494
6cb153ca
BL
495static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
496{
00e188ef 497 struct fd f = fdget(fd);
6cb153ca
BL
498 struct socket *sock;
499
3672558c 500 *err = -EBADF;
00e188ef 501 if (f.file) {
dba4a925 502 sock = sock_from_file(f.file);
00e188ef 503 if (likely(sock)) {
ce787a5a 504 *fput_needed = f.flags & FDPUT_FPUT;
6cb153ca 505 return sock;
00e188ef 506 }
dba4a925 507 *err = -ENOTSOCK;
00e188ef 508 fdput(f);
1da177e4 509 }
6cb153ca 510 return NULL;
1da177e4
LT
511}
512
600e1779
MY
513static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
514 size_t size)
515{
516 ssize_t len;
517 ssize_t used = 0;
518
c5ef6035 519 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
520 if (len < 0)
521 return len;
522 used += len;
523 if (buffer) {
524 if (size < used)
525 return -ERANGE;
526 buffer += len;
527 }
528
529 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
530 used += len;
531 if (buffer) {
532 if (size < used)
533 return -ERANGE;
534 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
535 buffer += len;
536 }
537
538 return used;
539}
540
549c7297
CB
541static int sockfs_setattr(struct user_namespace *mnt_userns,
542 struct dentry *dentry, struct iattr *iattr)
86741ec2 543{
549c7297 544 int err = simple_setattr(&init_user_ns, dentry, iattr);
86741ec2 545
e1a3a60a 546 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
547 struct socket *sock = SOCKET_I(d_inode(dentry));
548
6d8c50dc
CW
549 if (sock->sk)
550 sock->sk->sk_uid = iattr->ia_uid;
551 else
552 err = -ENOENT;
86741ec2
LC
553 }
554
555 return err;
556}
557
600e1779 558static const struct inode_operations sockfs_inode_ops = {
600e1779 559 .listxattr = sockfs_listxattr,
86741ec2 560 .setattr = sockfs_setattr,
600e1779
MY
561};
562
1da177e4 563/**
8a3c245c 564 * sock_alloc - allocate a socket
89bddce5 565 *
1da177e4
LT
566 * Allocate a new inode and socket object. The two are bound together
567 * and initialised. The socket is then returned. If we are out of inodes
8a3c245c 568 * NULL is returned. This functions uses GFP_KERNEL internally.
1da177e4
LT
569 */
570
f4a00aac 571struct socket *sock_alloc(void)
1da177e4 572{
89bddce5
SH
573 struct inode *inode;
574 struct socket *sock;
1da177e4 575
a209dfc7 576 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
577 if (!inode)
578 return NULL;
579
580 sock = SOCKET_I(inode);
581
85fe4025 582 inode->i_ino = get_next_ino();
89bddce5 583 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
584 inode->i_uid = current_fsuid();
585 inode->i_gid = current_fsgid();
600e1779 586 inode->i_op = &sockfs_inode_ops;
1da177e4 587
1da177e4
LT
588 return sock;
589}
f4a00aac 590EXPORT_SYMBOL(sock_alloc);
1da177e4 591
6d8c50dc 592static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
593{
594 if (sock->ops) {
595 struct module *owner = sock->ops->owner;
596
6d8c50dc
CW
597 if (inode)
598 inode_lock(inode);
1da177e4 599 sock->ops->release(sock);
ff7b11aa 600 sock->sk = NULL;
6d8c50dc
CW
601 if (inode)
602 inode_unlock(inode);
1da177e4
LT
603 sock->ops = NULL;
604 module_put(owner);
605 }
606
333f7909 607 if (sock->wq.fasync_list)
3410f22e 608 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 609
1da177e4
LT
610 if (!sock->file) {
611 iput(SOCK_INODE(sock));
612 return;
613 }
89bddce5 614 sock->file = NULL;
1da177e4 615}
6d8c50dc 616
9a8ad9ac
AL
617/**
618 * sock_release - close a socket
619 * @sock: socket to close
620 *
621 * The socket is released from the protocol stack if it has a release
622 * callback, and the inode is then released if the socket is bound to
623 * an inode not a file.
624 */
6d8c50dc
CW
625void sock_release(struct socket *sock)
626{
627 __sock_release(sock, NULL);
628}
c6d409cf 629EXPORT_SYMBOL(sock_release);
1da177e4 630
c14ac945 631void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 632{
140c55d4
ED
633 u8 flags = *tx_flags;
634
c14ac945 635 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
636 flags |= SKBTX_HW_TSTAMP;
637
c14ac945 638 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
639 flags |= SKBTX_SW_TSTAMP;
640
c14ac945 641 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
642 flags |= SKBTX_SCHED_TSTAMP;
643
140c55d4 644 *tx_flags = flags;
20d49473 645}
67cc0d40 646EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 647
8c3c447b
PA
648INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
649 size_t));
a648a592
PA
650INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
651 size_t));
d8725c86 652static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 653{
a648a592
PA
654 int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
655 inet_sendmsg, sock, msg,
656 msg_data_left(msg));
d8725c86
AV
657 BUG_ON(ret == -EIOCBQUEUED);
658 return ret;
1da177e4
LT
659}
660
85806af0
RD
661/**
662 * sock_sendmsg - send a message through @sock
663 * @sock: socket
664 * @msg: message to send
665 *
666 * Sends @msg through @sock, passing through LSM.
667 * Returns the number of bytes sent, or an error code.
668 */
d8725c86 669int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 670{
d8725c86 671 int err = security_socket_sendmsg(sock, msg,
01e97e65 672 msg_data_left(msg));
228e548e 673
d8725c86 674 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 675}
c6d409cf 676EXPORT_SYMBOL(sock_sendmsg);
1da177e4 677
8a3c245c
PT
678/**
679 * kernel_sendmsg - send a message through @sock (kernel-space)
680 * @sock: socket
681 * @msg: message header
682 * @vec: kernel vec
683 * @num: vec array length
684 * @size: total message data size
685 *
686 * Builds the message data with @vec and sends it through @sock.
687 * Returns the number of bytes sent, or an error code.
688 */
689
1da177e4
LT
690int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
691 struct kvec *vec, size_t num, size_t size)
692{
aa563d7b 693 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
d8725c86 694 return sock_sendmsg(sock, msg);
1da177e4 695}
c6d409cf 696EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 697
8a3c245c
PT
698/**
699 * kernel_sendmsg_locked - send a message through @sock (kernel-space)
700 * @sk: sock
701 * @msg: message header
702 * @vec: output s/g array
703 * @num: output s/g array length
704 * @size: total message data size
705 *
706 * Builds the message data with @vec and sends it through @sock.
707 * Returns the number of bytes sent, or an error code.
708 * Caller must hold @sk.
709 */
710
306b13eb
TH
711int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
712 struct kvec *vec, size_t num, size_t size)
713{
714 struct socket *sock = sk->sk_socket;
715
716 if (!sock->ops->sendmsg_locked)
db5980d8 717 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 718
aa563d7b 719 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
306b13eb
TH
720
721 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
722}
723EXPORT_SYMBOL(kernel_sendmsg_locked);
724
8605330a
SHY
725static bool skb_is_err_queue(const struct sk_buff *skb)
726{
727 /* pkt_type of skbs enqueued on the error queue are set to
728 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
729 * in recvmsg, since skbs received on a local socket will never
730 * have a pkt_type of PACKET_OUTGOING.
731 */
732 return skb->pkt_type == PACKET_OUTGOING;
733}
734
b50a5c70
ML
735/* On transmit, software and hardware timestamps are returned independently.
736 * As the two skb clones share the hardware timestamp, which may be updated
737 * before the software timestamp is received, a hardware TX timestamp may be
738 * returned only if there is no software TX timestamp. Ignore false software
739 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 740 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
741 * hardware timestamp.
742 */
743static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
744{
745 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
746}
747
aad9c8c4
ML
748static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
749{
750 struct scm_ts_pktinfo ts_pktinfo;
751 struct net_device *orig_dev;
752
753 if (!skb_mac_header_was_set(skb))
754 return;
755
756 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
757
758 rcu_read_lock();
759 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
760 if (orig_dev)
761 ts_pktinfo.if_index = orig_dev->ifindex;
762 rcu_read_unlock();
763
764 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
765 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
766 sizeof(ts_pktinfo), &ts_pktinfo);
767}
768
92f37fd2
ED
769/*
770 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
771 */
772void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
773 struct sk_buff *skb)
774{
20d49473 775 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 776 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e
DD
777 struct scm_timestamping_internal tss;
778
b50a5c70 779 int empty = 1, false_tstamp = 0;
20d49473
PO
780 struct skb_shared_hwtstamps *shhwtstamps =
781 skb_hwtstamps(skb);
782
783 /* Race occurred between timestamp enabling and packet
784 receiving. Fill in the current time for now. */
b50a5c70 785 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 786 __net_timestamp(skb);
b50a5c70
ML
787 false_tstamp = 1;
788 }
20d49473
PO
789
790 if (need_software_tstamp) {
791 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
792 if (new_tstamp) {
793 struct __kernel_sock_timeval tv;
794
795 skb_get_new_timestamp(skb, &tv);
796 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
797 sizeof(tv), &tv);
798 } else {
799 struct __kernel_old_timeval tv;
800
801 skb_get_timestamp(skb, &tv);
802 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
803 sizeof(tv), &tv);
804 }
20d49473 805 } else {
887feae3
DD
806 if (new_tstamp) {
807 struct __kernel_timespec ts;
808
809 skb_get_new_timestampns(skb, &ts);
810 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
811 sizeof(ts), &ts);
812 } else {
df1b4ba9 813 struct __kernel_old_timespec ts;
887feae3
DD
814
815 skb_get_timestampns(skb, &ts);
816 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
817 sizeof(ts), &ts);
818 }
20d49473
PO
819 }
820 }
821
f24b9be5 822 memset(&tss, 0, sizeof(tss));
c199105d 823 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 824 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 825 empty = 0;
4d276eb6 826 if (shhwtstamps &&
b9f40e21 827 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
b50a5c70 828 !skb_is_swtx_tstamp(skb, false_tstamp) &&
9718475e 829 ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
4d276eb6 830 empty = 0;
aad9c8c4
ML
831 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
832 !skb_is_err_queue(skb))
833 put_ts_pktinfo(msg, skb);
834 }
1c885808 835 if (!empty) {
9718475e
DD
836 if (sock_flag(sk, SOCK_TSTAMP_NEW))
837 put_cmsg_scm_timestamping64(msg, &tss);
838 else
839 put_cmsg_scm_timestamping(msg, &tss);
1c885808 840
8605330a 841 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 842 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
843 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
844 skb->len, skb->data);
845 }
92f37fd2 846}
7c81fd8b
ACM
847EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
848
6e3e939f
JB
849void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
850 struct sk_buff *skb)
851{
852 int ack;
853
854 if (!sock_flag(sk, SOCK_WIFI_STATUS))
855 return;
856 if (!skb->wifi_acked_valid)
857 return;
858
859 ack = skb->wifi_acked;
860
861 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
862}
863EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
864
11165f14 865static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
866 struct sk_buff *skb)
3b885787 867{
744d5a3e 868 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 869 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 870 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
871}
872
767dd033 873void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
874 struct sk_buff *skb)
875{
876 sock_recv_timestamp(msg, sk, skb);
877 sock_recv_drops(msg, sk, skb);
878}
767dd033 879EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 880
8c3c447b 881INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
a648a592
PA
882 size_t, int));
883INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
884 size_t, int));
1b784140 885static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 886 int flags)
1da177e4 887{
a648a592
PA
888 return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
889 inet_recvmsg, sock, msg, msg_data_left(msg),
890 flags);
1da177e4
LT
891}
892
85806af0
RD
893/**
894 * sock_recvmsg - receive a message from @sock
895 * @sock: socket
896 * @msg: message to receive
897 * @flags: message flags
898 *
899 * Receives @msg from @sock, passing through LSM. Returns the total number
900 * of bytes received, or an error.
901 */
2da62906 902int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 903{
2da62906 904 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 905
2da62906 906 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 907}
c6d409cf 908EXPORT_SYMBOL(sock_recvmsg);
1da177e4 909
c1249c0a 910/**
8a3c245c
PT
911 * kernel_recvmsg - Receive a message from a socket (kernel space)
912 * @sock: The socket to receive the message from
913 * @msg: Received message
914 * @vec: Input s/g array for message data
915 * @num: Size of input s/g array
916 * @size: Number of bytes to read
917 * @flags: Message flags (MSG_DONTWAIT, etc...)
c1249c0a 918 *
8a3c245c
PT
919 * On return the msg structure contains the scatter/gather array passed in the
920 * vec argument. The array is modified so that it consists of the unfilled
921 * portion of the original array.
c1249c0a 922 *
8a3c245c 923 * The returned value is the total number of bytes received, or an error.
c1249c0a 924 */
8a3c245c 925
89bddce5
SH
926int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
927 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4 928{
1f466e1f 929 msg->msg_control_is_user = false;
aa563d7b 930 iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
1f466e1f 931 return sock_recvmsg(sock, msg, flags);
1da177e4 932}
c6d409cf 933EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 934
ce1d4d3e
CH
935static ssize_t sock_sendpage(struct file *file, struct page *page,
936 int offset, size_t size, loff_t *ppos, int more)
1da177e4 937{
1da177e4
LT
938 struct socket *sock;
939 int flags;
940
ce1d4d3e
CH
941 sock = file->private_data;
942
35f9c09f
ED
943 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
944 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
945 flags |= more;
ce1d4d3e 946
e6949583 947 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 948}
1da177e4 949
9c55e01c 950static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 951 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
952 unsigned int flags)
953{
954 struct socket *sock = file->private_data;
955
997b37da 956 if (unlikely(!sock->ops->splice_read))
95506588 957 return generic_file_splice_read(file, ppos, pipe, len, flags);
997b37da 958
9c55e01c
JA
959 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
960}
961
8ae5e030 962static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 963{
6d652330
AV
964 struct file *file = iocb->ki_filp;
965 struct socket *sock = file->private_data;
0345f931 966 struct msghdr msg = {.msg_iter = *to,
967 .msg_iocb = iocb};
8ae5e030 968 ssize_t res;
ce1d4d3e 969
ebfcd895 970 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
971 msg.msg_flags = MSG_DONTWAIT;
972
973 if (iocb->ki_pos != 0)
1da177e4 974 return -ESPIPE;
027445c3 975
66ee59af 976 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
977 return 0;
978
2da62906 979 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
980 *to = msg.msg_iter;
981 return res;
1da177e4
LT
982}
983
8ae5e030 984static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 985{
6d652330
AV
986 struct file *file = iocb->ki_filp;
987 struct socket *sock = file->private_data;
0345f931 988 struct msghdr msg = {.msg_iter = *from,
989 .msg_iocb = iocb};
8ae5e030 990 ssize_t res;
1da177e4 991
8ae5e030 992 if (iocb->ki_pos != 0)
ce1d4d3e 993 return -ESPIPE;
027445c3 994
ebfcd895 995 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
996 msg.msg_flags = MSG_DONTWAIT;
997
6d652330
AV
998 if (sock->type == SOCK_SEQPACKET)
999 msg.msg_flags |= MSG_EOR;
1000
d8725c86 1001 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
1002 *from = msg.msg_iter;
1003 return res;
1da177e4
LT
1004}
1005
1da177e4
LT
1006/*
1007 * Atomic setting of ioctl hooks to avoid race
1008 * with module unload.
1009 */
1010
4a3e2f71 1011static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 1012static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 1013
881d966b 1014void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 1015{
4a3e2f71 1016 mutex_lock(&br_ioctl_mutex);
1da177e4 1017 br_ioctl_hook = hook;
4a3e2f71 1018 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1019}
1020EXPORT_SYMBOL(brioctl_set);
1021
4a3e2f71 1022static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1023static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1024
881d966b 1025void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1026{
4a3e2f71 1027 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1028 vlan_ioctl_hook = hook;
4a3e2f71 1029 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1030}
1031EXPORT_SYMBOL(vlan_ioctl_set);
1032
6b96018b 1033static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 1034 unsigned int cmd, unsigned long arg)
6b96018b
AB
1035{
1036 int err;
1037 void __user *argp = (void __user *)arg;
1038
1039 err = sock->ops->ioctl(sock, cmd, arg);
1040
1041 /*
1042 * If this ioctl is unknown try to hand it down
1043 * to the NIC driver.
1044 */
36fd633e
AV
1045 if (err != -ENOIOCTLCMD)
1046 return err;
6b96018b 1047
36fd633e
AV
1048 if (cmd == SIOCGIFCONF) {
1049 struct ifconf ifc;
1050 if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
1051 return -EFAULT;
1052 rtnl_lock();
1053 err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
1054 rtnl_unlock();
1055 if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
1056 err = -EFAULT;
44c02a2c
AV
1057 } else {
1058 struct ifreq ifr;
1059 bool need_copyout;
63ff03ab 1060 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
44c02a2c
AV
1061 return -EFAULT;
1062 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1063 if (!err && need_copyout)
63ff03ab 1064 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
44c02a2c 1065 return -EFAULT;
36fd633e 1066 }
6b96018b
AB
1067 return err;
1068}
1069
1da177e4
LT
1070/*
1071 * With an ioctl, arg may well be a user mode pointer, but we don't know
1072 * what to do with it - that's up to the protocol still.
1073 */
1074
8a3c245c
PT
1075/**
1076 * get_net_ns - increment the refcount of the network namespace
1077 * @ns: common namespace (net)
1078 *
1079 * Returns the net's common namespace.
1080 */
1081
d8d211a2 1082struct ns_common *get_net_ns(struct ns_common *ns)
c62cce2c
AV
1083{
1084 return &get_net(container_of(ns, struct net, ns))->ns;
1085}
d8d211a2 1086EXPORT_SYMBOL_GPL(get_net_ns);
c62cce2c 1087
1da177e4
LT
1088static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1089{
1090 struct socket *sock;
881d966b 1091 struct sock *sk;
1da177e4
LT
1092 void __user *argp = (void __user *)arg;
1093 int pid, err;
881d966b 1094 struct net *net;
1da177e4 1095
b69aee04 1096 sock = file->private_data;
881d966b 1097 sk = sock->sk;
3b1e0a65 1098 net = sock_net(sk);
44c02a2c
AV
1099 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1100 struct ifreq ifr;
1101 bool need_copyout;
1102 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
1103 return -EFAULT;
1104 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1105 if (!err && need_copyout)
1106 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
1107 return -EFAULT;
1da177e4 1108 } else
3d23e349 1109#ifdef CONFIG_WEXT_CORE
1da177e4 1110 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1111 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1112 } else
3d23e349 1113#endif
89bddce5 1114 switch (cmd) {
1da177e4
LT
1115 case FIOSETOWN:
1116 case SIOCSPGRP:
1117 err = -EFAULT;
1118 if (get_user(pid, (int __user *)argp))
1119 break;
393cc3f5 1120 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1121 break;
1122 case FIOGETOWN:
1123 case SIOCGPGRP:
609d7fa9 1124 err = put_user(f_getown(sock->file),
89bddce5 1125 (int __user *)argp);
1da177e4
LT
1126 break;
1127 case SIOCGIFBR:
1128 case SIOCSIFBR:
1129 case SIOCBRADDBR:
1130 case SIOCBRDELBR:
1131 err = -ENOPKG;
1132 if (!br_ioctl_hook)
1133 request_module("bridge");
1134
4a3e2f71 1135 mutex_lock(&br_ioctl_mutex);
89bddce5 1136 if (br_ioctl_hook)
881d966b 1137 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1138 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1139 break;
1140 case SIOCGIFVLAN:
1141 case SIOCSIFVLAN:
1142 err = -ENOPKG;
1143 if (!vlan_ioctl_hook)
1144 request_module("8021q");
1145
4a3e2f71 1146 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1147 if (vlan_ioctl_hook)
881d966b 1148 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1149 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1150 break;
c62cce2c
AV
1151 case SIOCGSKNS:
1152 err = -EPERM;
1153 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1154 break;
1155
1156 err = open_related_ns(&net->ns, get_net_ns);
1157 break;
0768e170
AB
1158 case SIOCGSTAMP_OLD:
1159 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
1160 if (!sock->ops->gettstamp) {
1161 err = -ENOIOCTLCMD;
1162 break;
1163 }
1164 err = sock->ops->gettstamp(sock, argp,
0768e170
AB
1165 cmd == SIOCGSTAMP_OLD,
1166 !IS_ENABLED(CONFIG_64BIT));
60747828 1167 break;
0768e170
AB
1168 case SIOCGSTAMP_NEW:
1169 case SIOCGSTAMPNS_NEW:
1170 if (!sock->ops->gettstamp) {
1171 err = -ENOIOCTLCMD;
1172 break;
1173 }
1174 err = sock->ops->gettstamp(sock, argp,
1175 cmd == SIOCGSTAMP_NEW,
1176 false);
c7cbdbf2 1177 break;
1da177e4 1178 default:
63ff03ab 1179 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1180 break;
89bddce5 1181 }
1da177e4
LT
1182 return err;
1183}
1184
8a3c245c
PT
1185/**
1186 * sock_create_lite - creates a socket
1187 * @family: protocol family (AF_INET, ...)
1188 * @type: communication type (SOCK_STREAM, ...)
1189 * @protocol: protocol (0, ...)
1190 * @res: new socket
1191 *
1192 * Creates a new socket and assigns it to @res, passing through LSM.
1193 * The new socket initialization is not complete, see kernel_accept().
1194 * Returns 0 or an error. On failure @res is set to %NULL.
1195 * This function internally uses GFP_KERNEL.
1196 */
1197
1da177e4
LT
1198int sock_create_lite(int family, int type, int protocol, struct socket **res)
1199{
1200 int err;
1201 struct socket *sock = NULL;
89bddce5 1202
1da177e4
LT
1203 err = security_socket_create(family, type, protocol, 1);
1204 if (err)
1205 goto out;
1206
1207 sock = sock_alloc();
1208 if (!sock) {
1209 err = -ENOMEM;
1210 goto out;
1211 }
1212
1da177e4 1213 sock->type = type;
7420ed23
VY
1214 err = security_socket_post_create(sock, family, type, protocol, 1);
1215 if (err)
1216 goto out_release;
1217
1da177e4
LT
1218out:
1219 *res = sock;
1220 return err;
7420ed23
VY
1221out_release:
1222 sock_release(sock);
1223 sock = NULL;
1224 goto out;
1da177e4 1225}
c6d409cf 1226EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1227
1228/* No kernel lock held - perfect */
ade994f4 1229static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1230{
3cafb376 1231 struct socket *sock = file->private_data;
a331de3b 1232 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1233
e88958e6
CH
1234 if (!sock->ops->poll)
1235 return 0;
f641f13b 1236
a331de3b
CH
1237 if (sk_can_busy_loop(sock->sk)) {
1238 /* poll once if requested by the syscall */
1239 if (events & POLL_BUSY_LOOP)
1240 sk_busy_loop(sock->sk, 1);
1241
1242 /* if this socket can poll_ll, tell the system call */
1243 flag = POLL_BUSY_LOOP;
1244 }
1245
1246 return sock->ops->poll(file, sock, wait) | flag;
1da177e4
LT
1247}
1248
89bddce5 1249static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1250{
b69aee04 1251 struct socket *sock = file->private_data;
1da177e4
LT
1252
1253 return sock->ops->mmap(file, sock, vma);
1254}
1255
20380731 1256static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1257{
6d8c50dc 1258 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1259 return 0;
1260}
1261
1262/*
1263 * Update the socket async list
1264 *
1265 * Fasync_list locking strategy.
1266 *
1267 * 1. fasync_list is modified only under process context socket lock
1268 * i.e. under semaphore.
1269 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1270 * or under socket lock
1da177e4
LT
1271 */
1272
1273static int sock_fasync(int fd, struct file *filp, int on)
1274{
989a2979
ED
1275 struct socket *sock = filp->private_data;
1276 struct sock *sk = sock->sk;
333f7909 1277 struct socket_wq *wq = &sock->wq;
1da177e4 1278
989a2979 1279 if (sk == NULL)
1da177e4 1280 return -EINVAL;
1da177e4
LT
1281
1282 lock_sock(sk);
eaefd110 1283 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1284
eaefd110 1285 if (!wq->fasync_list)
989a2979
ED
1286 sock_reset_flag(sk, SOCK_FASYNC);
1287 else
bcdce719 1288 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1289
989a2979 1290 release_sock(sk);
1da177e4
LT
1291 return 0;
1292}
1293
ceb5d58b 1294/* This function may be called only under rcu_lock */
1da177e4 1295
ceb5d58b 1296int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1297{
ceb5d58b 1298 if (!wq || !wq->fasync_list)
1da177e4 1299 return -1;
ceb5d58b 1300
89bddce5 1301 switch (how) {
8d8ad9d7 1302 case SOCK_WAKE_WAITD:
ceb5d58b 1303 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1304 break;
1305 goto call_kill;
8d8ad9d7 1306 case SOCK_WAKE_SPACE:
ceb5d58b 1307 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4 1308 break;
7c7ab580 1309 fallthrough;
8d8ad9d7 1310 case SOCK_WAKE_IO:
89bddce5 1311call_kill:
43815482 1312 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1313 break;
8d8ad9d7 1314 case SOCK_WAKE_URG:
43815482 1315 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1316 }
ceb5d58b 1317
1da177e4
LT
1318 return 0;
1319}
c6d409cf 1320EXPORT_SYMBOL(sock_wake_async);
1da177e4 1321
8a3c245c
PT
1322/**
1323 * __sock_create - creates a socket
1324 * @net: net namespace
1325 * @family: protocol family (AF_INET, ...)
1326 * @type: communication type (SOCK_STREAM, ...)
1327 * @protocol: protocol (0, ...)
1328 * @res: new socket
1329 * @kern: boolean for kernel space sockets
1330 *
1331 * Creates a new socket and assigns it to @res, passing through LSM.
1332 * Returns 0 or an error. On failure @res is set to %NULL. @kern must
1333 * be set to true if the socket resides in kernel space.
1334 * This function internally uses GFP_KERNEL.
1335 */
1336
721db93a 1337int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1338 struct socket **res, int kern)
1da177e4
LT
1339{
1340 int err;
1341 struct socket *sock;
55737fda 1342 const struct net_proto_family *pf;
1da177e4
LT
1343
1344 /*
89bddce5 1345 * Check protocol is in range
1da177e4
LT
1346 */
1347 if (family < 0 || family >= NPROTO)
1348 return -EAFNOSUPPORT;
1349 if (type < 0 || type >= SOCK_MAX)
1350 return -EINVAL;
1351
1352 /* Compatibility.
1353
1354 This uglymoron is moved from INET layer to here to avoid
1355 deadlock in module load.
1356 */
1357 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1358 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1359 current->comm);
1da177e4
LT
1360 family = PF_PACKET;
1361 }
1362
1363 err = security_socket_create(family, type, protocol, kern);
1364 if (err)
1365 return err;
89bddce5 1366
55737fda
SH
1367 /*
1368 * Allocate the socket and allow the family to set things up. if
1369 * the protocol is 0, the family is instructed to select an appropriate
1370 * default.
1371 */
1372 sock = sock_alloc();
1373 if (!sock) {
e87cc472 1374 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1375 return -ENFILE; /* Not exactly a match, but its the
1376 closest posix thing */
1377 }
1378
1379 sock->type = type;
1380
95a5afca 1381#ifdef CONFIG_MODULES
89bddce5
SH
1382 /* Attempt to load a protocol module if the find failed.
1383 *
1384 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1385 * requested real, full-featured networking support upon configuration.
1386 * Otherwise module support will break!
1387 */
190683a9 1388 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1389 request_module("net-pf-%d", family);
1da177e4
LT
1390#endif
1391
55737fda
SH
1392 rcu_read_lock();
1393 pf = rcu_dereference(net_families[family]);
1394 err = -EAFNOSUPPORT;
1395 if (!pf)
1396 goto out_release;
1da177e4
LT
1397
1398 /*
1399 * We will call the ->create function, that possibly is in a loadable
1400 * module, so we have to bump that loadable module refcnt first.
1401 */
55737fda 1402 if (!try_module_get(pf->owner))
1da177e4
LT
1403 goto out_release;
1404
55737fda
SH
1405 /* Now protected by module ref count */
1406 rcu_read_unlock();
1407
3f378b68 1408 err = pf->create(net, sock, protocol, kern);
55737fda 1409 if (err < 0)
1da177e4 1410 goto out_module_put;
a79af59e 1411
1da177e4
LT
1412 /*
1413 * Now to bump the refcnt of the [loadable] module that owns this
1414 * socket at sock_release time we decrement its refcnt.
1415 */
55737fda
SH
1416 if (!try_module_get(sock->ops->owner))
1417 goto out_module_busy;
1418
1da177e4
LT
1419 /*
1420 * Now that we're done with the ->create function, the [loadable]
1421 * module can have its refcnt decremented
1422 */
55737fda 1423 module_put(pf->owner);
7420ed23
VY
1424 err = security_socket_post_create(sock, family, type, protocol, kern);
1425 if (err)
3b185525 1426 goto out_sock_release;
55737fda 1427 *res = sock;
1da177e4 1428
55737fda
SH
1429 return 0;
1430
1431out_module_busy:
1432 err = -EAFNOSUPPORT;
1da177e4 1433out_module_put:
55737fda
SH
1434 sock->ops = NULL;
1435 module_put(pf->owner);
1436out_sock_release:
1da177e4 1437 sock_release(sock);
55737fda
SH
1438 return err;
1439
1440out_release:
1441 rcu_read_unlock();
1442 goto out_sock_release;
1da177e4 1443}
721db93a 1444EXPORT_SYMBOL(__sock_create);
1da177e4 1445
8a3c245c
PT
1446/**
1447 * sock_create - creates a socket
1448 * @family: protocol family (AF_INET, ...)
1449 * @type: communication type (SOCK_STREAM, ...)
1450 * @protocol: protocol (0, ...)
1451 * @res: new socket
1452 *
1453 * A wrapper around __sock_create().
1454 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1455 */
1456
1da177e4
LT
1457int sock_create(int family, int type, int protocol, struct socket **res)
1458{
1b8d7ae4 1459 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1460}
c6d409cf 1461EXPORT_SYMBOL(sock_create);
1da177e4 1462
8a3c245c
PT
1463/**
1464 * sock_create_kern - creates a socket (kernel space)
1465 * @net: net namespace
1466 * @family: protocol family (AF_INET, ...)
1467 * @type: communication type (SOCK_STREAM, ...)
1468 * @protocol: protocol (0, ...)
1469 * @res: new socket
1470 *
1471 * A wrapper around __sock_create().
1472 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1473 */
1474
eeb1bd5c 1475int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1476{
eeb1bd5c 1477 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1478}
c6d409cf 1479EXPORT_SYMBOL(sock_create_kern);
1da177e4 1480
9d6a15c3 1481int __sys_socket(int family, int type, int protocol)
1da177e4
LT
1482{
1483 int retval;
1484 struct socket *sock;
a677a039
UD
1485 int flags;
1486
e38b36f3
UD
1487 /* Check the SOCK_* constants for consistency. */
1488 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1489 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1490 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1491 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1492
a677a039 1493 flags = type & ~SOCK_TYPE_MASK;
77d27200 1494 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1495 return -EINVAL;
1496 type &= SOCK_TYPE_MASK;
1da177e4 1497
aaca0bdc
UD
1498 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1499 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1500
1da177e4
LT
1501 retval = sock_create(family, type, protocol, &sock);
1502 if (retval < 0)
8e1611e2 1503 return retval;
1da177e4 1504
8e1611e2 1505 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1506}
1507
9d6a15c3
DB
1508SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1509{
1510 return __sys_socket(family, type, protocol);
1511}
1512
1da177e4
LT
1513/*
1514 * Create a pair of connected sockets.
1515 */
1516
6debc8d8 1517int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1518{
1519 struct socket *sock1, *sock2;
1520 int fd1, fd2, err;
db349509 1521 struct file *newfile1, *newfile2;
a677a039
UD
1522 int flags;
1523
1524 flags = type & ~SOCK_TYPE_MASK;
77d27200 1525 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1526 return -EINVAL;
1527 type &= SOCK_TYPE_MASK;
1da177e4 1528
aaca0bdc
UD
1529 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1530 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1531
016a266b
AV
1532 /*
1533 * reserve descriptors and make sure we won't fail
1534 * to return them to userland.
1535 */
1536 fd1 = get_unused_fd_flags(flags);
1537 if (unlikely(fd1 < 0))
1538 return fd1;
1539
1540 fd2 = get_unused_fd_flags(flags);
1541 if (unlikely(fd2 < 0)) {
1542 put_unused_fd(fd1);
1543 return fd2;
1544 }
1545
1546 err = put_user(fd1, &usockvec[0]);
1547 if (err)
1548 goto out;
1549
1550 err = put_user(fd2, &usockvec[1]);
1551 if (err)
1552 goto out;
1553
1da177e4
LT
1554 /*
1555 * Obtain the first socket and check if the underlying protocol
1556 * supports the socketpair call.
1557 */
1558
1559 err = sock_create(family, type, protocol, &sock1);
016a266b 1560 if (unlikely(err < 0))
1da177e4
LT
1561 goto out;
1562
1563 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1564 if (unlikely(err < 0)) {
1565 sock_release(sock1);
1566 goto out;
bf3c23d1 1567 }
d73aa286 1568
d47cd945
DH
1569 err = security_socket_socketpair(sock1, sock2);
1570 if (unlikely(err)) {
1571 sock_release(sock2);
1572 sock_release(sock1);
1573 goto out;
1574 }
1575
016a266b
AV
1576 err = sock1->ops->socketpair(sock1, sock2);
1577 if (unlikely(err < 0)) {
1578 sock_release(sock2);
1579 sock_release(sock1);
1580 goto out;
28407630
AV
1581 }
1582
aab174f0 1583 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1584 if (IS_ERR(newfile1)) {
28407630 1585 err = PTR_ERR(newfile1);
016a266b
AV
1586 sock_release(sock2);
1587 goto out;
28407630
AV
1588 }
1589
aab174f0 1590 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1591 if (IS_ERR(newfile2)) {
1592 err = PTR_ERR(newfile2);
016a266b
AV
1593 fput(newfile1);
1594 goto out;
db349509
AV
1595 }
1596
157cf649 1597 audit_fd_pair(fd1, fd2);
d73aa286 1598
db349509
AV
1599 fd_install(fd1, newfile1);
1600 fd_install(fd2, newfile2);
d73aa286 1601 return 0;
1da177e4 1602
016a266b 1603out:
d73aa286 1604 put_unused_fd(fd2);
d73aa286 1605 put_unused_fd(fd1);
1da177e4
LT
1606 return err;
1607}
1608
6debc8d8
DB
1609SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1610 int __user *, usockvec)
1611{
1612 return __sys_socketpair(family, type, protocol, usockvec);
1613}
1614
1da177e4
LT
1615/*
1616 * Bind a name to a socket. Nothing much to do here since it's
1617 * the protocol's responsibility to handle the local address.
1618 *
1619 * We move the socket address to kernel space before we call
1620 * the protocol layer (having also checked the address is ok).
1621 */
1622
a87d35d8 1623int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1624{
1625 struct socket *sock;
230b1839 1626 struct sockaddr_storage address;
6cb153ca 1627 int err, fput_needed;
1da177e4 1628
89bddce5 1629 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1630 if (sock) {
43db362d 1631 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1632 if (!err) {
89bddce5 1633 err = security_socket_bind(sock,
230b1839 1634 (struct sockaddr *)&address,
89bddce5 1635 addrlen);
6cb153ca
BL
1636 if (!err)
1637 err = sock->ops->bind(sock,
89bddce5 1638 (struct sockaddr *)
230b1839 1639 &address, addrlen);
1da177e4 1640 }
6cb153ca 1641 fput_light(sock->file, fput_needed);
89bddce5 1642 }
1da177e4
LT
1643 return err;
1644}
1645
a87d35d8
DB
1646SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1647{
1648 return __sys_bind(fd, umyaddr, addrlen);
1649}
1650
1da177e4
LT
1651/*
1652 * Perform a listen. Basically, we allow the protocol to do anything
1653 * necessary for a listen, and if that works, we mark the socket as
1654 * ready for listening.
1655 */
1656
25e290ee 1657int __sys_listen(int fd, int backlog)
1da177e4
LT
1658{
1659 struct socket *sock;
6cb153ca 1660 int err, fput_needed;
b8e1f9b5 1661 int somaxconn;
89bddce5
SH
1662
1663 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1664 if (sock) {
8efa6e93 1665 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1666 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1667 backlog = somaxconn;
1da177e4
LT
1668
1669 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1670 if (!err)
1671 err = sock->ops->listen(sock, backlog);
1da177e4 1672
6cb153ca 1673 fput_light(sock->file, fput_needed);
1da177e4
LT
1674 }
1675 return err;
1676}
1677
25e290ee
DB
1678SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1679{
1680 return __sys_listen(fd, backlog);
1681}
1682
de2ea4b6
JA
1683int __sys_accept4_file(struct file *file, unsigned file_flags,
1684 struct sockaddr __user *upeer_sockaddr,
09952e3e
JA
1685 int __user *upeer_addrlen, int flags,
1686 unsigned long nofile)
1da177e4
LT
1687{
1688 struct socket *sock, *newsock;
39d8c1b6 1689 struct file *newfile;
de2ea4b6 1690 int err, len, newfd;
230b1839 1691 struct sockaddr_storage address;
1da177e4 1692
77d27200 1693 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1694 return -EINVAL;
1695
1696 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1697 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1698
dba4a925
FR
1699 sock = sock_from_file(file);
1700 if (!sock) {
1701 err = -ENOTSOCK;
1da177e4 1702 goto out;
dba4a925 1703 }
1da177e4
LT
1704
1705 err = -ENFILE;
c6d409cf
ED
1706 newsock = sock_alloc();
1707 if (!newsock)
de2ea4b6 1708 goto out;
1da177e4
LT
1709
1710 newsock->type = sock->type;
1711 newsock->ops = sock->ops;
1712
1da177e4
LT
1713 /*
1714 * We don't need try_module_get here, as the listening socket (sock)
1715 * has the protocol module (sock->ops->owner) held.
1716 */
1717 __module_get(newsock->ops->owner);
1718
09952e3e 1719 newfd = __get_unused_fd_flags(flags, nofile);
39d8c1b6
DM
1720 if (unlikely(newfd < 0)) {
1721 err = newfd;
9a1875e6 1722 sock_release(newsock);
de2ea4b6 1723 goto out;
39d8c1b6 1724 }
aab174f0 1725 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1726 if (IS_ERR(newfile)) {
28407630
AV
1727 err = PTR_ERR(newfile);
1728 put_unused_fd(newfd);
de2ea4b6 1729 goto out;
28407630 1730 }
39d8c1b6 1731
a79af59e
FF
1732 err = security_socket_accept(sock, newsock);
1733 if (err)
39d8c1b6 1734 goto out_fd;
a79af59e 1735
de2ea4b6
JA
1736 err = sock->ops->accept(sock, newsock, sock->file->f_flags | file_flags,
1737 false);
1da177e4 1738 if (err < 0)
39d8c1b6 1739 goto out_fd;
1da177e4
LT
1740
1741 if (upeer_sockaddr) {
9b2c45d4
DV
1742 len = newsock->ops->getname(newsock,
1743 (struct sockaddr *)&address, 2);
1744 if (len < 0) {
1da177e4 1745 err = -ECONNABORTED;
39d8c1b6 1746 goto out_fd;
1da177e4 1747 }
43db362d 1748 err = move_addr_to_user(&address,
230b1839 1749 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1750 if (err < 0)
39d8c1b6 1751 goto out_fd;
1da177e4
LT
1752 }
1753
1754 /* File flags are not inherited via accept() unlike another OSes. */
1755
39d8c1b6
DM
1756 fd_install(newfd, newfile);
1757 err = newfd;
1da177e4
LT
1758out:
1759 return err;
39d8c1b6 1760out_fd:
9606a216 1761 fput(newfile);
39d8c1b6 1762 put_unused_fd(newfd);
de2ea4b6
JA
1763 goto out;
1764
1765}
1766
1767/*
1768 * For accept, we attempt to create a new socket, set up the link
1769 * with the client, wake up the client, then return the new
1770 * connected fd. We collect the address of the connector in kernel
1771 * space and move it to user at the very end. This is unclean because
1772 * we open the socket then return an error.
1773 *
1774 * 1003.1g adds the ability to recvmsg() to query connection pending
1775 * status to recvmsg. We need to add that support in a way thats
1776 * clean when we restructure accept also.
1777 */
1778
1779int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1780 int __user *upeer_addrlen, int flags)
1781{
1782 int ret = -EBADF;
1783 struct fd f;
1784
1785 f = fdget(fd);
1786 if (f.file) {
1787 ret = __sys_accept4_file(f.file, 0, upeer_sockaddr,
09952e3e
JA
1788 upeer_addrlen, flags,
1789 rlimit(RLIMIT_NOFILE));
6b07edeb 1790 fdput(f);
de2ea4b6
JA
1791 }
1792
1793 return ret;
1da177e4
LT
1794}
1795
4541e805
DB
1796SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1797 int __user *, upeer_addrlen, int, flags)
1798{
1799 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1800}
1801
20f37034
HC
1802SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1803 int __user *, upeer_addrlen)
aaca0bdc 1804{
4541e805 1805 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1806}
1807
1da177e4
LT
1808/*
1809 * Attempt to connect to a socket with the server address. The address
1810 * is in user space so we verify it is OK and move it to kernel space.
1811 *
1812 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1813 * break bindings
1814 *
1815 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1816 * other SEQPACKET protocols that take time to connect() as it doesn't
1817 * include the -EINPROGRESS status for such sockets.
1818 */
1819
f499a021 1820int __sys_connect_file(struct file *file, struct sockaddr_storage *address,
bd3ded31 1821 int addrlen, int file_flags)
1da177e4
LT
1822{
1823 struct socket *sock;
bd3ded31 1824 int err;
1da177e4 1825
dba4a925
FR
1826 sock = sock_from_file(file);
1827 if (!sock) {
1828 err = -ENOTSOCK;
1da177e4 1829 goto out;
dba4a925 1830 }
1da177e4 1831
89bddce5 1832 err =
f499a021 1833 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4 1834 if (err)
bd3ded31 1835 goto out;
1da177e4 1836
f499a021 1837 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
bd3ded31 1838 sock->file->f_flags | file_flags);
1da177e4
LT
1839out:
1840 return err;
1841}
1842
bd3ded31
JA
1843int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1844{
1845 int ret = -EBADF;
1846 struct fd f;
1847
1848 f = fdget(fd);
1849 if (f.file) {
f499a021
JA
1850 struct sockaddr_storage address;
1851
1852 ret = move_addr_to_kernel(uservaddr, addrlen, &address);
1853 if (!ret)
1854 ret = __sys_connect_file(f.file, &address, addrlen, 0);
6b07edeb 1855 fdput(f);
bd3ded31
JA
1856 }
1857
1858 return ret;
1859}
1860
1387c2c2
DB
1861SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1862 int, addrlen)
1863{
1864 return __sys_connect(fd, uservaddr, addrlen);
1865}
1866
1da177e4
LT
1867/*
1868 * Get the local address ('name') of a socket object. Move the obtained
1869 * name to user space.
1870 */
1871
8882a107
DB
1872int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1873 int __user *usockaddr_len)
1da177e4
LT
1874{
1875 struct socket *sock;
230b1839 1876 struct sockaddr_storage address;
9b2c45d4 1877 int err, fput_needed;
89bddce5 1878
6cb153ca 1879 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1880 if (!sock)
1881 goto out;
1882
1883 err = security_socket_getsockname(sock);
1884 if (err)
1885 goto out_put;
1886
9b2c45d4
DV
1887 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
1888 if (err < 0)
1da177e4 1889 goto out_put;
9b2c45d4
DV
1890 /* "err" is actually length in this case */
1891 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
1892
1893out_put:
6cb153ca 1894 fput_light(sock->file, fput_needed);
1da177e4
LT
1895out:
1896 return err;
1897}
1898
8882a107
DB
1899SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1900 int __user *, usockaddr_len)
1901{
1902 return __sys_getsockname(fd, usockaddr, usockaddr_len);
1903}
1904
1da177e4
LT
1905/*
1906 * Get the remote address ('name') of a socket object. Move the obtained
1907 * name to user space.
1908 */
1909
b21c8f83
DB
1910int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1911 int __user *usockaddr_len)
1da177e4
LT
1912{
1913 struct socket *sock;
230b1839 1914 struct sockaddr_storage address;
9b2c45d4 1915 int err, fput_needed;
1da177e4 1916
89bddce5
SH
1917 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1918 if (sock != NULL) {
1da177e4
LT
1919 err = security_socket_getpeername(sock);
1920 if (err) {
6cb153ca 1921 fput_light(sock->file, fput_needed);
1da177e4
LT
1922 return err;
1923 }
1924
9b2c45d4
DV
1925 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
1926 if (err >= 0)
1927 /* "err" is actually length in this case */
1928 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 1929 usockaddr_len);
6cb153ca 1930 fput_light(sock->file, fput_needed);
1da177e4
LT
1931 }
1932 return err;
1933}
1934
b21c8f83
DB
1935SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1936 int __user *, usockaddr_len)
1937{
1938 return __sys_getpeername(fd, usockaddr, usockaddr_len);
1939}
1940
1da177e4
LT
1941/*
1942 * Send a datagram to a given address. We move the address into kernel
1943 * space and check the user space data area is readable before invoking
1944 * the protocol.
1945 */
211b634b
DB
1946int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
1947 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
1948{
1949 struct socket *sock;
230b1839 1950 struct sockaddr_storage address;
1da177e4
LT
1951 int err;
1952 struct msghdr msg;
1953 struct iovec iov;
6cb153ca 1954 int fput_needed;
6cb153ca 1955
602bd0e9
AV
1956 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1957 if (unlikely(err))
1958 return err;
de0fa95c
PE
1959 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1960 if (!sock)
4387ff75 1961 goto out;
6cb153ca 1962
89bddce5 1963 msg.msg_name = NULL;
89bddce5
SH
1964 msg.msg_control = NULL;
1965 msg.msg_controllen = 0;
1966 msg.msg_namelen = 0;
6cb153ca 1967 if (addr) {
43db362d 1968 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1969 if (err < 0)
1970 goto out_put;
230b1839 1971 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1972 msg.msg_namelen = addr_len;
1da177e4
LT
1973 }
1974 if (sock->file->f_flags & O_NONBLOCK)
1975 flags |= MSG_DONTWAIT;
1976 msg.msg_flags = flags;
d8725c86 1977 err = sock_sendmsg(sock, &msg);
1da177e4 1978
89bddce5 1979out_put:
de0fa95c 1980 fput_light(sock->file, fput_needed);
4387ff75 1981out:
1da177e4
LT
1982 return err;
1983}
1984
211b634b
DB
1985SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1986 unsigned int, flags, struct sockaddr __user *, addr,
1987 int, addr_len)
1988{
1989 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
1990}
1991
1da177e4 1992/*
89bddce5 1993 * Send a datagram down a socket.
1da177e4
LT
1994 */
1995
3e0fa65f 1996SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1997 unsigned int, flags)
1da177e4 1998{
211b634b 1999 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
2000}
2001
2002/*
89bddce5 2003 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
2004 * sender. We verify the buffers are writable and if needed move the
2005 * sender address from kernel to user space.
2006 */
7a09e1eb
DB
2007int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
2008 struct sockaddr __user *addr, int __user *addr_len)
1da177e4
LT
2009{
2010 struct socket *sock;
2011 struct iovec iov;
2012 struct msghdr msg;
230b1839 2013 struct sockaddr_storage address;
89bddce5 2014 int err, err2;
6cb153ca
BL
2015 int fput_needed;
2016
602bd0e9
AV
2017 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
2018 if (unlikely(err))
2019 return err;
de0fa95c 2020 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 2021 if (!sock)
de0fa95c 2022 goto out;
1da177e4 2023
89bddce5
SH
2024 msg.msg_control = NULL;
2025 msg.msg_controllen = 0;
f3d33426
HFS
2026 /* Save some cycles and don't copy the address if not needed */
2027 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
2028 /* We assume all kernel code knows the size of sockaddr_storage */
2029 msg.msg_namelen = 0;
130ed5d1 2030 msg.msg_iocb = NULL;
9f138fa6 2031 msg.msg_flags = 0;
1da177e4
LT
2032 if (sock->file->f_flags & O_NONBLOCK)
2033 flags |= MSG_DONTWAIT;
2da62906 2034 err = sock_recvmsg(sock, &msg, flags);
1da177e4 2035
89bddce5 2036 if (err >= 0 && addr != NULL) {
43db362d 2037 err2 = move_addr_to_user(&address,
230b1839 2038 msg.msg_namelen, addr, addr_len);
89bddce5
SH
2039 if (err2 < 0)
2040 err = err2;
1da177e4 2041 }
de0fa95c
PE
2042
2043 fput_light(sock->file, fput_needed);
4387ff75 2044out:
1da177e4
LT
2045 return err;
2046}
2047
7a09e1eb
DB
2048SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2049 unsigned int, flags, struct sockaddr __user *, addr,
2050 int __user *, addr_len)
2051{
2052 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2053}
2054
1da177e4 2055/*
89bddce5 2056 * Receive a datagram from a socket.
1da177e4
LT
2057 */
2058
b7c0ddf5
JG
2059SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2060 unsigned int, flags)
1da177e4 2061{
7a09e1eb 2062 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
2063}
2064
83f0c10b
FW
2065static bool sock_use_custom_sol_socket(const struct socket *sock)
2066{
2067 const struct sock *sk = sock->sk;
2068
2069 /* Use sock->ops->setsockopt() for MPTCP */
2070 return IS_ENABLED(CONFIG_MPTCP) &&
2071 sk->sk_protocol == IPPROTO_MPTCP &&
2072 sk->sk_type == SOCK_STREAM &&
2073 (sk->sk_family == AF_INET || sk->sk_family == AF_INET6);
2074}
2075
1da177e4
LT
2076/*
2077 * Set a socket option. Because we don't know the option lengths we have
2078 * to pass the user mode parameter for the protocols to sort out.
2079 */
a7b75c5a 2080int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
55db9c0e 2081 int optlen)
1da177e4 2082{
519a8a6c 2083 sockptr_t optval = USER_SOCKPTR(user_optval);
0d01da6a 2084 char *kernel_optval = NULL;
6cb153ca 2085 int err, fput_needed;
1da177e4
LT
2086 struct socket *sock;
2087
2088 if (optlen < 0)
2089 return -EINVAL;
89bddce5
SH
2090
2091 sock = sockfd_lookup_light(fd, &err, &fput_needed);
4a367299
CH
2092 if (!sock)
2093 return err;
1da177e4 2094
4a367299
CH
2095 err = security_socket_setsockopt(sock, level, optname);
2096 if (err)
2097 goto out_put;
0d01da6a 2098
55db9c0e
CH
2099 if (!in_compat_syscall())
2100 err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname,
a7b75c5a 2101 user_optval, &optlen,
55db9c0e 2102 &kernel_optval);
4a367299
CH
2103 if (err < 0)
2104 goto out_put;
2105 if (err > 0) {
2106 err = 0;
2107 goto out_put;
2108 }
0d01da6a 2109
a7b75c5a
CH
2110 if (kernel_optval)
2111 optval = KERNEL_SOCKPTR(kernel_optval);
4a367299 2112 if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock))
a7b75c5a 2113 err = sock_setsockopt(sock, level, optname, optval, optlen);
a44d9e72
CH
2114 else if (unlikely(!sock->ops->setsockopt))
2115 err = -EOPNOTSUPP;
4a367299
CH
2116 else
2117 err = sock->ops->setsockopt(sock, level, optname, optval,
89bddce5 2118 optlen);
a7b75c5a 2119 kfree(kernel_optval);
4a367299
CH
2120out_put:
2121 fput_light(sock->file, fput_needed);
1da177e4
LT
2122 return err;
2123}
2124
cc36dca0
DB
2125SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2126 char __user *, optval, int, optlen)
2127{
2128 return __sys_setsockopt(fd, level, optname, optval, optlen);
2129}
2130
9cacf81f
SF
2131INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
2132 int optname));
2133
1da177e4
LT
2134/*
2135 * Get a socket option. Because we don't know the option lengths we have
2136 * to pass a user mode parameter for the protocols to sort out.
2137 */
55db9c0e
CH
2138int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
2139 int __user *optlen)
1da177e4 2140{
6cb153ca 2141 int err, fput_needed;
1da177e4 2142 struct socket *sock;
0d01da6a 2143 int max_optlen;
1da177e4 2144
89bddce5 2145 sock = sockfd_lookup_light(fd, &err, &fput_needed);
d8a9b38f
CH
2146 if (!sock)
2147 return err;
2148
2149 err = security_socket_getsockopt(sock, level, optname);
2150 if (err)
2151 goto out_put;
1da177e4 2152
55db9c0e
CH
2153 if (!in_compat_syscall())
2154 max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
0d01da6a 2155
d8a9b38f
CH
2156 if (level == SOL_SOCKET)
2157 err = sock_getsockopt(sock, level, optname, optval, optlen);
a44d9e72
CH
2158 else if (unlikely(!sock->ops->getsockopt))
2159 err = -EOPNOTSUPP;
d8a9b38f
CH
2160 else
2161 err = sock->ops->getsockopt(sock, level, optname, optval,
89bddce5 2162 optlen);
0d01da6a 2163
55db9c0e
CH
2164 if (!in_compat_syscall())
2165 err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
2166 optval, optlen, max_optlen,
2167 err);
6cb153ca 2168out_put:
d8a9b38f 2169 fput_light(sock->file, fput_needed);
1da177e4
LT
2170 return err;
2171}
2172
13a2d70e
DB
2173SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2174 char __user *, optval, int __user *, optlen)
2175{
2176 return __sys_getsockopt(fd, level, optname, optval, optlen);
2177}
2178
1da177e4
LT
2179/*
2180 * Shutdown a socket.
2181 */
2182
b713c195
JA
2183int __sys_shutdown_sock(struct socket *sock, int how)
2184{
2185 int err;
2186
2187 err = security_socket_shutdown(sock, how);
2188 if (!err)
2189 err = sock->ops->shutdown(sock, how);
2190
2191 return err;
2192}
2193
005a1aea 2194int __sys_shutdown(int fd, int how)
1da177e4 2195{
6cb153ca 2196 int err, fput_needed;
1da177e4
LT
2197 struct socket *sock;
2198
89bddce5
SH
2199 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2200 if (sock != NULL) {
b713c195 2201 err = __sys_shutdown_sock(sock, how);
6cb153ca 2202 fput_light(sock->file, fput_needed);
1da177e4
LT
2203 }
2204 return err;
2205}
2206
005a1aea
DB
2207SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2208{
2209 return __sys_shutdown(fd, how);
2210}
2211
89bddce5 2212/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2213 * fields which are the same type (int / unsigned) on our platforms.
2214 */
2215#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2216#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2217#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2218
c71d8ebe
TH
2219struct used_address {
2220 struct sockaddr_storage name;
2221 unsigned int name_len;
2222};
2223
0a384abf
JA
2224int __copy_msghdr_from_user(struct msghdr *kmsg,
2225 struct user_msghdr __user *umsg,
2226 struct sockaddr __user **save_addr,
2227 struct iovec __user **uiov, size_t *nsegs)
1661bf36 2228{
ffb07550 2229 struct user_msghdr msg;
08adb7da
AV
2230 ssize_t err;
2231
ffb07550 2232 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 2233 return -EFAULT;
dbb490b9 2234
1f466e1f
CH
2235 kmsg->msg_control_is_user = true;
2236 kmsg->msg_control_user = msg.msg_control;
ffb07550
AV
2237 kmsg->msg_controllen = msg.msg_controllen;
2238 kmsg->msg_flags = msg.msg_flags;
2239
2240 kmsg->msg_namelen = msg.msg_namelen;
2241 if (!msg.msg_name)
6a2a2b3a
AS
2242 kmsg->msg_namelen = 0;
2243
dbb490b9
ML
2244 if (kmsg->msg_namelen < 0)
2245 return -EINVAL;
2246
1661bf36 2247 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2248 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2249
2250 if (save_addr)
ffb07550 2251 *save_addr = msg.msg_name;
08adb7da 2252
ffb07550 2253 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 2254 if (!save_addr) {
864d9664
PA
2255 err = move_addr_to_kernel(msg.msg_name,
2256 kmsg->msg_namelen,
08adb7da
AV
2257 kmsg->msg_name);
2258 if (err < 0)
2259 return err;
2260 }
2261 } else {
2262 kmsg->msg_name = NULL;
2263 kmsg->msg_namelen = 0;
2264 }
2265
ffb07550 2266 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2267 return -EMSGSIZE;
2268
0345f931 2269 kmsg->msg_iocb = NULL;
0a384abf
JA
2270 *uiov = msg.msg_iov;
2271 *nsegs = msg.msg_iovlen;
2272 return 0;
2273}
2274
2275static int copy_msghdr_from_user(struct msghdr *kmsg,
2276 struct user_msghdr __user *umsg,
2277 struct sockaddr __user **save_addr,
2278 struct iovec **iov)
2279{
2280 struct user_msghdr msg;
2281 ssize_t err;
2282
2283 err = __copy_msghdr_from_user(kmsg, umsg, save_addr, &msg.msg_iov,
2284 &msg.msg_iovlen);
2285 if (err)
2286 return err;
0345f931 2287
87e5e6da 2288 err = import_iovec(save_addr ? READ : WRITE,
ffb07550 2289 msg.msg_iov, msg.msg_iovlen,
da184284 2290 UIO_FASTIOV, iov, &kmsg->msg_iter);
87e5e6da 2291 return err < 0 ? err : 0;
1661bf36
DC
2292}
2293
4257c8ca
JA
2294static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
2295 unsigned int flags, struct used_address *used_address,
2296 unsigned int allowed_msghdr_flags)
1da177e4 2297{
b9d717a7 2298 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2299 __aligned(sizeof(__kernel_size_t));
89bddce5 2300 /* 20 is size of ipv6_pktinfo */
1da177e4 2301 unsigned char *ctl_buf = ctl;
d8725c86 2302 int ctl_len;
08adb7da 2303 ssize_t err;
89bddce5 2304
1da177e4
LT
2305 err = -ENOBUFS;
2306
228e548e 2307 if (msg_sys->msg_controllen > INT_MAX)
4257c8ca 2308 goto out;
28a94d8f 2309 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2310 ctl_len = msg_sys->msg_controllen;
1da177e4 2311 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2312 err =
228e548e 2313 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2314 sizeof(ctl));
1da177e4 2315 if (err)
4257c8ca 2316 goto out;
228e548e
AB
2317 ctl_buf = msg_sys->msg_control;
2318 ctl_len = msg_sys->msg_controllen;
1da177e4 2319 } else if (ctl_len) {
ac4340fc
DM
2320 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2321 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2322 if (ctl_len > sizeof(ctl)) {
1da177e4 2323 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2324 if (ctl_buf == NULL)
4257c8ca 2325 goto out;
1da177e4
LT
2326 }
2327 err = -EFAULT;
1f466e1f 2328 if (copy_from_user(ctl_buf, msg_sys->msg_control_user, ctl_len))
1da177e4 2329 goto out_freectl;
228e548e 2330 msg_sys->msg_control = ctl_buf;
1f466e1f 2331 msg_sys->msg_control_is_user = false;
1da177e4 2332 }
228e548e 2333 msg_sys->msg_flags = flags;
1da177e4
LT
2334
2335 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2336 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2337 /*
2338 * If this is sendmmsg() and current destination address is same as
2339 * previously succeeded address, omit asking LSM's decision.
2340 * used_address->name_len is initialized to UINT_MAX so that the first
2341 * destination address never matches.
2342 */
bc909d9d
MD
2343 if (used_address && msg_sys->msg_name &&
2344 used_address->name_len == msg_sys->msg_namelen &&
2345 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2346 used_address->name_len)) {
d8725c86 2347 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2348 goto out_freectl;
2349 }
d8725c86 2350 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2351 /*
2352 * If this is sendmmsg() and sending to current destination address was
2353 * successful, remember it.
2354 */
2355 if (used_address && err >= 0) {
2356 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2357 if (msg_sys->msg_name)
2358 memcpy(&used_address->name, msg_sys->msg_name,
2359 used_address->name_len);
c71d8ebe 2360 }
1da177e4
LT
2361
2362out_freectl:
89bddce5 2363 if (ctl_buf != ctl)
1da177e4 2364 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
4257c8ca
JA
2365out:
2366 return err;
2367}
2368
03b1230c
JA
2369int sendmsg_copy_msghdr(struct msghdr *msg,
2370 struct user_msghdr __user *umsg, unsigned flags,
2371 struct iovec **iov)
4257c8ca
JA
2372{
2373 int err;
2374
2375 if (flags & MSG_CMSG_COMPAT) {
2376 struct compat_msghdr __user *msg_compat;
2377
2378 msg_compat = (struct compat_msghdr __user *) umsg;
2379 err = get_compat_msghdr(msg, msg_compat, NULL, iov);
2380 } else {
2381 err = copy_msghdr_from_user(msg, umsg, NULL, iov);
2382 }
2383 if (err < 0)
2384 return err;
2385
2386 return 0;
2387}
2388
2389static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
2390 struct msghdr *msg_sys, unsigned int flags,
2391 struct used_address *used_address,
2392 unsigned int allowed_msghdr_flags)
2393{
2394 struct sockaddr_storage address;
2395 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2396 ssize_t err;
2397
2398 msg_sys->msg_name = &address;
2399
2400 err = sendmsg_copy_msghdr(msg_sys, msg, flags, &iov);
2401 if (err < 0)
2402 return err;
2403
2404 err = ____sys_sendmsg(sock, msg_sys, flags, used_address,
2405 allowed_msghdr_flags);
da184284 2406 kfree(iov);
228e548e
AB
2407 return err;
2408}
2409
2410/*
2411 * BSD sendmsg interface
2412 */
03b1230c 2413long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
0fa03c62
JA
2414 unsigned int flags)
2415{
d69e0779 2416 /* disallow ancillary data requests from this path */
03b1230c
JA
2417 if (msg->msg_control || msg->msg_controllen)
2418 return -EINVAL;
d69e0779 2419
03b1230c 2420 return ____sys_sendmsg(sock, msg, flags, NULL, 0);
0fa03c62 2421}
228e548e 2422
e1834a32
DB
2423long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2424 bool forbid_cmsg_compat)
228e548e
AB
2425{
2426 int fput_needed, err;
2427 struct msghdr msg_sys;
1be374a0
AL
2428 struct socket *sock;
2429
e1834a32
DB
2430 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2431 return -EINVAL;
2432
1be374a0 2433 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2434 if (!sock)
2435 goto out;
2436
28a94d8f 2437 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2438
6cb153ca 2439 fput_light(sock->file, fput_needed);
89bddce5 2440out:
1da177e4
LT
2441 return err;
2442}
2443
666547ff 2444SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2445{
e1834a32 2446 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2447}
2448
228e548e
AB
2449/*
2450 * Linux sendmmsg interface
2451 */
2452
2453int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2454 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2455{
2456 int fput_needed, err, datagrams;
2457 struct socket *sock;
2458 struct mmsghdr __user *entry;
2459 struct compat_mmsghdr __user *compat_entry;
2460 struct msghdr msg_sys;
c71d8ebe 2461 struct used_address used_address;
f092276d 2462 unsigned int oflags = flags;
228e548e 2463
e1834a32
DB
2464 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2465 return -EINVAL;
2466
98382f41
AB
2467 if (vlen > UIO_MAXIOV)
2468 vlen = UIO_MAXIOV;
228e548e
AB
2469
2470 datagrams = 0;
2471
2472 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2473 if (!sock)
2474 return err;
2475
c71d8ebe 2476 used_address.name_len = UINT_MAX;
228e548e
AB
2477 entry = mmsg;
2478 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2479 err = 0;
f092276d 2480 flags |= MSG_BATCH;
228e548e
AB
2481
2482 while (datagrams < vlen) {
f092276d
TH
2483 if (datagrams == vlen - 1)
2484 flags = oflags;
2485
228e548e 2486 if (MSG_CMSG_COMPAT & flags) {
666547ff 2487 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2488 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2489 if (err < 0)
2490 break;
2491 err = __put_user(err, &compat_entry->msg_len);
2492 ++compat_entry;
2493 } else {
a7526eb5 2494 err = ___sys_sendmsg(sock,
666547ff 2495 (struct user_msghdr __user *)entry,
28a94d8f 2496 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2497 if (err < 0)
2498 break;
2499 err = put_user(err, &entry->msg_len);
2500 ++entry;
2501 }
2502
2503 if (err)
2504 break;
2505 ++datagrams;
3023898b
SHY
2506 if (msg_data_left(&msg_sys))
2507 break;
a78cb84c 2508 cond_resched();
228e548e
AB
2509 }
2510
228e548e
AB
2511 fput_light(sock->file, fput_needed);
2512
728ffb86
AB
2513 /* We only return an error if no datagrams were able to be sent */
2514 if (datagrams != 0)
228e548e
AB
2515 return datagrams;
2516
228e548e
AB
2517 return err;
2518}
2519
2520SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2521 unsigned int, vlen, unsigned int, flags)
2522{
e1834a32 2523 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2524}
2525
03b1230c
JA
2526int recvmsg_copy_msghdr(struct msghdr *msg,
2527 struct user_msghdr __user *umsg, unsigned flags,
2528 struct sockaddr __user **uaddr,
2529 struct iovec **iov)
1da177e4 2530{
08adb7da 2531 ssize_t err;
1da177e4 2532
4257c8ca
JA
2533 if (MSG_CMSG_COMPAT & flags) {
2534 struct compat_msghdr __user *msg_compat;
1da177e4 2535
4257c8ca
JA
2536 msg_compat = (struct compat_msghdr __user *) umsg;
2537 err = get_compat_msghdr(msg, msg_compat, uaddr, iov);
2538 } else {
2539 err = copy_msghdr_from_user(msg, umsg, uaddr, iov);
2540 }
1da177e4 2541 if (err < 0)
da184284 2542 return err;
1da177e4 2543
4257c8ca
JA
2544 return 0;
2545}
2546
2547static int ____sys_recvmsg(struct socket *sock, struct msghdr *msg_sys,
2548 struct user_msghdr __user *msg,
2549 struct sockaddr __user *uaddr,
2550 unsigned int flags, int nosec)
2551{
2552 struct compat_msghdr __user *msg_compat =
2553 (struct compat_msghdr __user *) msg;
2554 int __user *uaddr_len = COMPAT_NAMELEN(msg);
2555 struct sockaddr_storage addr;
2556 unsigned long cmsg_ptr;
2557 int len;
2558 ssize_t err;
2559
2560 msg_sys->msg_name = &addr;
a2e27255
ACM
2561 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2562 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2563
f3d33426
HFS
2564 /* We assume all kernel code knows the size of sockaddr_storage */
2565 msg_sys->msg_namelen = 0;
2566
1da177e4
LT
2567 if (sock->file->f_flags & O_NONBLOCK)
2568 flags |= MSG_DONTWAIT;
1af66221
ED
2569
2570 if (unlikely(nosec))
2571 err = sock_recvmsg_nosec(sock, msg_sys, flags);
2572 else
2573 err = sock_recvmsg(sock, msg_sys, flags);
2574
1da177e4 2575 if (err < 0)
4257c8ca 2576 goto out;
1da177e4
LT
2577 len = err;
2578
2579 if (uaddr != NULL) {
43db362d 2580 err = move_addr_to_user(&addr,
a2e27255 2581 msg_sys->msg_namelen, uaddr,
89bddce5 2582 uaddr_len);
1da177e4 2583 if (err < 0)
4257c8ca 2584 goto out;
1da177e4 2585 }
a2e27255 2586 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2587 COMPAT_FLAGS(msg));
1da177e4 2588 if (err)
4257c8ca 2589 goto out;
1da177e4 2590 if (MSG_CMSG_COMPAT & flags)
a2e27255 2591 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2592 &msg_compat->msg_controllen);
2593 else
a2e27255 2594 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2595 &msg->msg_controllen);
2596 if (err)
4257c8ca 2597 goto out;
1da177e4 2598 err = len;
4257c8ca
JA
2599out:
2600 return err;
2601}
2602
2603static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
2604 struct msghdr *msg_sys, unsigned int flags, int nosec)
2605{
2606 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2607 /* user mode address pointers */
2608 struct sockaddr __user *uaddr;
2609 ssize_t err;
2610
2611 err = recvmsg_copy_msghdr(msg_sys, msg, flags, &uaddr, &iov);
2612 if (err < 0)
2613 return err;
1da177e4 2614
4257c8ca 2615 err = ____sys_recvmsg(sock, msg_sys, msg, uaddr, flags, nosec);
da184284 2616 kfree(iov);
a2e27255
ACM
2617 return err;
2618}
2619
2620/*
2621 * BSD recvmsg interface
2622 */
2623
03b1230c
JA
2624long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
2625 struct user_msghdr __user *umsg,
2626 struct sockaddr __user *uaddr, unsigned int flags)
aa1fa28f 2627{
583bbf06
LH
2628 if (msg->msg_control || msg->msg_controllen) {
2629 /* disallow ancillary data reqs unless cmsg is plain data */
2630 if (!(sock->ops->flags & PROTO_CMSG_DATA_ONLY))
2631 return -EINVAL;
2632 }
aa1fa28f 2633
03b1230c 2634 return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0);
aa1fa28f
JA
2635}
2636
e1834a32
DB
2637long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2638 bool forbid_cmsg_compat)
a2e27255
ACM
2639{
2640 int fput_needed, err;
2641 struct msghdr msg_sys;
1be374a0
AL
2642 struct socket *sock;
2643
e1834a32
DB
2644 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2645 return -EINVAL;
2646
1be374a0 2647 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2648 if (!sock)
2649 goto out;
2650
a7526eb5 2651 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2652
6cb153ca 2653 fput_light(sock->file, fput_needed);
1da177e4
LT
2654out:
2655 return err;
2656}
2657
666547ff 2658SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2659 unsigned int, flags)
2660{
e1834a32 2661 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2662}
2663
a2e27255
ACM
2664/*
2665 * Linux recvmmsg interface
2666 */
2667
e11d4284
AB
2668static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2669 unsigned int vlen, unsigned int flags,
2670 struct timespec64 *timeout)
a2e27255
ACM
2671{
2672 int fput_needed, err, datagrams;
2673 struct socket *sock;
2674 struct mmsghdr __user *entry;
d7256d0e 2675 struct compat_mmsghdr __user *compat_entry;
a2e27255 2676 struct msghdr msg_sys;
766b9f92
DD
2677 struct timespec64 end_time;
2678 struct timespec64 timeout64;
a2e27255
ACM
2679
2680 if (timeout &&
2681 poll_select_set_timeout(&end_time, timeout->tv_sec,
2682 timeout->tv_nsec))
2683 return -EINVAL;
2684
2685 datagrams = 0;
2686
2687 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2688 if (!sock)
2689 return err;
2690
7797dc41
SHY
2691 if (likely(!(flags & MSG_ERRQUEUE))) {
2692 err = sock_error(sock->sk);
2693 if (err) {
2694 datagrams = err;
2695 goto out_put;
2696 }
e623a9e9 2697 }
a2e27255
ACM
2698
2699 entry = mmsg;
d7256d0e 2700 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2701
2702 while (datagrams < vlen) {
2703 /*
2704 * No need to ask LSM for more than the first datagram.
2705 */
d7256d0e 2706 if (MSG_CMSG_COMPAT & flags) {
666547ff 2707 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2708 &msg_sys, flags & ~MSG_WAITFORONE,
2709 datagrams);
d7256d0e
JMG
2710 if (err < 0)
2711 break;
2712 err = __put_user(err, &compat_entry->msg_len);
2713 ++compat_entry;
2714 } else {
a7526eb5 2715 err = ___sys_recvmsg(sock,
666547ff 2716 (struct user_msghdr __user *)entry,
a7526eb5
AL
2717 &msg_sys, flags & ~MSG_WAITFORONE,
2718 datagrams);
d7256d0e
JMG
2719 if (err < 0)
2720 break;
2721 err = put_user(err, &entry->msg_len);
2722 ++entry;
2723 }
2724
a2e27255
ACM
2725 if (err)
2726 break;
a2e27255
ACM
2727 ++datagrams;
2728
71c5c159
BB
2729 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2730 if (flags & MSG_WAITFORONE)
2731 flags |= MSG_DONTWAIT;
2732
a2e27255 2733 if (timeout) {
766b9f92 2734 ktime_get_ts64(&timeout64);
c2e6c856 2735 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2736 if (timeout->tv_sec < 0) {
2737 timeout->tv_sec = timeout->tv_nsec = 0;
2738 break;
2739 }
2740
2741 /* Timeout, return less than vlen datagrams */
2742 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2743 break;
2744 }
2745
2746 /* Out of band data, return right away */
2747 if (msg_sys.msg_flags & MSG_OOB)
2748 break;
a78cb84c 2749 cond_resched();
a2e27255
ACM
2750 }
2751
a2e27255 2752 if (err == 0)
34b88a68
ACM
2753 goto out_put;
2754
2755 if (datagrams == 0) {
2756 datagrams = err;
2757 goto out_put;
2758 }
a2e27255 2759
34b88a68
ACM
2760 /*
2761 * We may return less entries than requested (vlen) if the
2762 * sock is non block and there aren't enough datagrams...
2763 */
2764 if (err != -EAGAIN) {
a2e27255 2765 /*
34b88a68
ACM
2766 * ... or if recvmsg returns an error after we
2767 * received some datagrams, where we record the
2768 * error to return on the next call or if the
2769 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2770 */
34b88a68 2771 sock->sk->sk_err = -err;
a2e27255 2772 }
34b88a68
ACM
2773out_put:
2774 fput_light(sock->file, fput_needed);
a2e27255 2775
34b88a68 2776 return datagrams;
a2e27255
ACM
2777}
2778
e11d4284
AB
2779int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2780 unsigned int vlen, unsigned int flags,
2781 struct __kernel_timespec __user *timeout,
2782 struct old_timespec32 __user *timeout32)
a2e27255
ACM
2783{
2784 int datagrams;
c2e6c856 2785 struct timespec64 timeout_sys;
a2e27255 2786
e11d4284
AB
2787 if (timeout && get_timespec64(&timeout_sys, timeout))
2788 return -EFAULT;
a2e27255 2789
e11d4284 2790 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2791 return -EFAULT;
2792
e11d4284
AB
2793 if (!timeout && !timeout32)
2794 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2795
2796 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 2797
e11d4284
AB
2798 if (datagrams <= 0)
2799 return datagrams;
2800
2801 if (timeout && put_timespec64(&timeout_sys, timeout))
2802 datagrams = -EFAULT;
2803
2804 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2805 datagrams = -EFAULT;
2806
2807 return datagrams;
2808}
2809
1255e269
DB
2810SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2811 unsigned int, vlen, unsigned int, flags,
c2e6c856 2812 struct __kernel_timespec __user *, timeout)
1255e269 2813{
e11d4284
AB
2814 if (flags & MSG_CMSG_COMPAT)
2815 return -EINVAL;
2816
2817 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
2818}
2819
2820#ifdef CONFIG_COMPAT_32BIT_TIME
2821SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2822 unsigned int, vlen, unsigned int, flags,
2823 struct old_timespec32 __user *, timeout)
2824{
2825 if (flags & MSG_CMSG_COMPAT)
2826 return -EINVAL;
2827
2828 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 2829}
e11d4284 2830#endif
1255e269 2831
a2e27255 2832#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2833/* Argument list sizes for sys_socketcall */
2834#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2835static const unsigned char nargs[21] = {
c6d409cf
ED
2836 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2837 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2838 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2839 AL(4), AL(5), AL(4)
89bddce5
SH
2840};
2841
1da177e4
LT
2842#undef AL
2843
2844/*
89bddce5 2845 * System call vectors.
1da177e4
LT
2846 *
2847 * Argument checking cleaned up. Saved 20% in size.
2848 * This function doesn't need to set the kernel lock because
89bddce5 2849 * it is set by the callees.
1da177e4
LT
2850 */
2851
3e0fa65f 2852SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2853{
2950fa9d 2854 unsigned long a[AUDITSC_ARGS];
89bddce5 2855 unsigned long a0, a1;
1da177e4 2856 int err;
47379052 2857 unsigned int len;
1da177e4 2858
228e548e 2859 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2860 return -EINVAL;
c8e8cd57 2861 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2862
47379052
AV
2863 len = nargs[call];
2864 if (len > sizeof(a))
2865 return -EINVAL;
2866
1da177e4 2867 /* copy_from_user should be SMP safe. */
47379052 2868 if (copy_from_user(a, args, len))
1da177e4 2869 return -EFAULT;
3ec3b2fb 2870
2950fa9d
CG
2871 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2872 if (err)
2873 return err;
3ec3b2fb 2874
89bddce5
SH
2875 a0 = a[0];
2876 a1 = a[1];
2877
2878 switch (call) {
2879 case SYS_SOCKET:
9d6a15c3 2880 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
2881 break;
2882 case SYS_BIND:
a87d35d8 2883 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2884 break;
2885 case SYS_CONNECT:
1387c2c2 2886 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2887 break;
2888 case SYS_LISTEN:
25e290ee 2889 err = __sys_listen(a0, a1);
89bddce5
SH
2890 break;
2891 case SYS_ACCEPT:
4541e805
DB
2892 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2893 (int __user *)a[2], 0);
89bddce5
SH
2894 break;
2895 case SYS_GETSOCKNAME:
2896 err =
8882a107
DB
2897 __sys_getsockname(a0, (struct sockaddr __user *)a1,
2898 (int __user *)a[2]);
89bddce5
SH
2899 break;
2900 case SYS_GETPEERNAME:
2901 err =
b21c8f83
DB
2902 __sys_getpeername(a0, (struct sockaddr __user *)a1,
2903 (int __user *)a[2]);
89bddce5
SH
2904 break;
2905 case SYS_SOCKETPAIR:
6debc8d8 2906 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
2907 break;
2908 case SYS_SEND:
f3bf896b
DB
2909 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2910 NULL, 0);
89bddce5
SH
2911 break;
2912 case SYS_SENDTO:
211b634b
DB
2913 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2914 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
2915 break;
2916 case SYS_RECV:
d27e9afc
DB
2917 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2918 NULL, NULL);
89bddce5
SH
2919 break;
2920 case SYS_RECVFROM:
7a09e1eb
DB
2921 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2922 (struct sockaddr __user *)a[4],
2923 (int __user *)a[5]);
89bddce5
SH
2924 break;
2925 case SYS_SHUTDOWN:
005a1aea 2926 err = __sys_shutdown(a0, a1);
89bddce5
SH
2927 break;
2928 case SYS_SETSOCKOPT:
cc36dca0
DB
2929 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
2930 a[4]);
89bddce5
SH
2931 break;
2932 case SYS_GETSOCKOPT:
2933 err =
13a2d70e
DB
2934 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2935 (int __user *)a[4]);
89bddce5
SH
2936 break;
2937 case SYS_SENDMSG:
e1834a32
DB
2938 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
2939 a[2], true);
89bddce5 2940 break;
228e548e 2941 case SYS_SENDMMSG:
e1834a32
DB
2942 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2943 a[3], true);
228e548e 2944 break;
89bddce5 2945 case SYS_RECVMSG:
e1834a32
DB
2946 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
2947 a[2], true);
89bddce5 2948 break;
a2e27255 2949 case SYS_RECVMMSG:
3ca47e95 2950 if (IS_ENABLED(CONFIG_64BIT))
e11d4284
AB
2951 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2952 a[2], a[3],
2953 (struct __kernel_timespec __user *)a[4],
2954 NULL);
2955 else
2956 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2957 a[2], a[3], NULL,
2958 (struct old_timespec32 __user *)a[4]);
a2e27255 2959 break;
de11defe 2960 case SYS_ACCEPT4:
4541e805
DB
2961 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2962 (int __user *)a[2], a[3]);
aaca0bdc 2963 break;
89bddce5
SH
2964 default:
2965 err = -EINVAL;
2966 break;
1da177e4
LT
2967 }
2968 return err;
2969}
2970
89bddce5 2971#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2972
55737fda
SH
2973/**
2974 * sock_register - add a socket protocol handler
2975 * @ops: description of protocol
2976 *
1da177e4
LT
2977 * This function is called by a protocol handler that wants to
2978 * advertise its address family, and have it linked into the
e793c0f7 2979 * socket interface. The value ops->family corresponds to the
55737fda 2980 * socket system call protocol family.
1da177e4 2981 */
f0fd27d4 2982int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2983{
2984 int err;
2985
2986 if (ops->family >= NPROTO) {
3410f22e 2987 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2988 return -ENOBUFS;
2989 }
55737fda
SH
2990
2991 spin_lock(&net_family_lock);
190683a9
ED
2992 if (rcu_dereference_protected(net_families[ops->family],
2993 lockdep_is_held(&net_family_lock)))
55737fda
SH
2994 err = -EEXIST;
2995 else {
cf778b00 2996 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2997 err = 0;
2998 }
55737fda
SH
2999 spin_unlock(&net_family_lock);
3000
3410f22e 3001 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
3002 return err;
3003}
c6d409cf 3004EXPORT_SYMBOL(sock_register);
1da177e4 3005
55737fda
SH
3006/**
3007 * sock_unregister - remove a protocol handler
3008 * @family: protocol family to remove
3009 *
1da177e4
LT
3010 * This function is called by a protocol handler that wants to
3011 * remove its address family, and have it unlinked from the
55737fda
SH
3012 * new socket creation.
3013 *
3014 * If protocol handler is a module, then it can use module reference
3015 * counts to protect against new references. If protocol handler is not
3016 * a module then it needs to provide its own protection in
3017 * the ops->create routine.
1da177e4 3018 */
f0fd27d4 3019void sock_unregister(int family)
1da177e4 3020{
f0fd27d4 3021 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 3022
55737fda 3023 spin_lock(&net_family_lock);
a9b3cd7f 3024 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
3025 spin_unlock(&net_family_lock);
3026
3027 synchronize_rcu();
3028
3410f22e 3029 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 3030}
c6d409cf 3031EXPORT_SYMBOL(sock_unregister);
1da177e4 3032
bf2ae2e4
XL
3033bool sock_is_registered(int family)
3034{
66b51b0a 3035 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
3036}
3037
77d76ea3 3038static int __init sock_init(void)
1da177e4 3039{
b3e19d92 3040 int err;
2ca794e5
EB
3041 /*
3042 * Initialize the network sysctl infrastructure.
3043 */
3044 err = net_sysctl_init();
3045 if (err)
3046 goto out;
b3e19d92 3047
1da177e4 3048 /*
89bddce5 3049 * Initialize skbuff SLAB cache
1da177e4
LT
3050 */
3051 skb_init();
1da177e4
LT
3052
3053 /*
89bddce5 3054 * Initialize the protocols module.
1da177e4
LT
3055 */
3056
3057 init_inodecache();
b3e19d92
NP
3058
3059 err = register_filesystem(&sock_fs_type);
3060 if (err)
47260ba9 3061 goto out;
1da177e4 3062 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
3063 if (IS_ERR(sock_mnt)) {
3064 err = PTR_ERR(sock_mnt);
3065 goto out_mount;
3066 }
77d76ea3
AK
3067
3068 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
3069 */
3070
3071#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
3072 err = netfilter_init();
3073 if (err)
3074 goto out;
1da177e4 3075#endif
cbeb321a 3076
408eccce 3077 ptp_classifier_init();
c1f19b51 3078
b3e19d92
NP
3079out:
3080 return err;
3081
3082out_mount:
3083 unregister_filesystem(&sock_fs_type);
b3e19d92 3084 goto out;
1da177e4
LT
3085}
3086
77d76ea3
AK
3087core_initcall(sock_init); /* early initcall */
3088
1da177e4
LT
3089#ifdef CONFIG_PROC_FS
3090void socket_seq_show(struct seq_file *seq)
3091{
648845ab
TZ
3092 seq_printf(seq, "sockets: used %d\n",
3093 sock_inuse_get(seq->private));
1da177e4 3094}
89bddce5 3095#endif /* CONFIG_PROC_FS */
1da177e4 3096
89bbfc95 3097#ifdef CONFIG_COMPAT
36fd633e 3098static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 3099{
6b96018b 3100 struct compat_ifconf ifc32;
7a229387 3101 struct ifconf ifc;
7a229387
AB
3102 int err;
3103
6b96018b 3104 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
3105 return -EFAULT;
3106
36fd633e
AV
3107 ifc.ifc_len = ifc32.ifc_len;
3108 ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
7a229387 3109
36fd633e
AV
3110 rtnl_lock();
3111 err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
3112 rtnl_unlock();
7a229387
AB
3113 if (err)
3114 return err;
3115
36fd633e 3116 ifc32.ifc_len = ifc.ifc_len;
6b96018b 3117 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
3118 return -EFAULT;
3119
3120 return 0;
3121}
3122
6b96018b 3123static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 3124{
3a7da39d
BH
3125 struct compat_ethtool_rxnfc __user *compat_rxnfc;
3126 bool convert_in = false, convert_out = false;
44c02a2c
AV
3127 size_t buf_size = 0;
3128 struct ethtool_rxnfc __user *rxnfc = NULL;
3129 struct ifreq ifr;
3a7da39d
BH
3130 u32 rule_cnt = 0, actual_rule_cnt;
3131 u32 ethcmd;
7a229387 3132 u32 data;
3a7da39d 3133 int ret;
7a229387 3134
3a7da39d
BH
3135 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
3136 return -EFAULT;
7a229387 3137
3a7da39d
BH
3138 compat_rxnfc = compat_ptr(data);
3139
3140 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
3141 return -EFAULT;
3142
3a7da39d
BH
3143 /* Most ethtool structures are defined without padding.
3144 * Unfortunately struct ethtool_rxnfc is an exception.
3145 */
3146 switch (ethcmd) {
3147 default:
3148 break;
3149 case ETHTOOL_GRXCLSRLALL:
3150 /* Buffer size is variable */
3151 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
3152 return -EFAULT;
3153 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
3154 return -ENOMEM;
3155 buf_size += rule_cnt * sizeof(u32);
7c7ab580 3156 fallthrough;
3a7da39d
BH
3157 case ETHTOOL_GRXRINGS:
3158 case ETHTOOL_GRXCLSRLCNT:
3159 case ETHTOOL_GRXCLSRULE:
55664f32 3160 case ETHTOOL_SRXCLSRLINS:
3a7da39d 3161 convert_out = true;
7c7ab580 3162 fallthrough;
3a7da39d 3163 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
3164 buf_size += sizeof(struct ethtool_rxnfc);
3165 convert_in = true;
44c02a2c 3166 rxnfc = compat_alloc_user_space(buf_size);
3a7da39d
BH
3167 break;
3168 }
3169
44c02a2c 3170 if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
3171 return -EFAULT;
3172
44c02a2c 3173 ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
7a229387 3174
3a7da39d 3175 if (convert_in) {
127fe533 3176 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
3177 * fs.ring_cookie and at the end of fs, but nowhere else.
3178 */
127fe533
AD
3179 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
3180 sizeof(compat_rxnfc->fs.m_ext) !=
3181 offsetof(struct ethtool_rxnfc, fs.m_ext) +
3182 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
3183 BUILD_BUG_ON(
3184 offsetof(struct compat_ethtool_rxnfc, fs.location) -
3185 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
3186 offsetof(struct ethtool_rxnfc, fs.location) -
3187 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
3188
3189 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
3190 (void __user *)(&rxnfc->fs.m_ext + 1) -
3191 (void __user *)rxnfc) ||
3a7da39d
BH
3192 copy_in_user(&rxnfc->fs.ring_cookie,
3193 &compat_rxnfc->fs.ring_cookie,
954b1244 3194 (void __user *)(&rxnfc->fs.location + 1) -
b6168562
WW
3195 (void __user *)&rxnfc->fs.ring_cookie))
3196 return -EFAULT;
3197 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3198 if (put_user(rule_cnt, &rxnfc->rule_cnt))
3199 return -EFAULT;
3200 } else if (copy_in_user(&rxnfc->rule_cnt,
3201 &compat_rxnfc->rule_cnt,
3202 sizeof(rxnfc->rule_cnt)))
3a7da39d
BH
3203 return -EFAULT;
3204 }
3205
44c02a2c 3206 ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
3a7da39d
BH
3207 if (ret)
3208 return ret;
3209
3210 if (convert_out) {
3211 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
3212 (const void __user *)(&rxnfc->fs.m_ext + 1) -
3213 (const void __user *)rxnfc) ||
3a7da39d
BH
3214 copy_in_user(&compat_rxnfc->fs.ring_cookie,
3215 &rxnfc->fs.ring_cookie,
954b1244
SH
3216 (const void __user *)(&rxnfc->fs.location + 1) -
3217 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
3218 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
3219 sizeof(rxnfc->rule_cnt)))
3220 return -EFAULT;
3221
3222 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3223 /* As an optimisation, we only copy the actual
3224 * number of rules that the underlying
3225 * function returned. Since Mallory might
3226 * change the rule count in user memory, we
3227 * check that it is less than the rule count
3228 * originally given (as the user buffer size),
3229 * which has been range-checked.
3230 */
3231 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
3232 return -EFAULT;
3233 if (actual_rule_cnt < rule_cnt)
3234 rule_cnt = actual_rule_cnt;
3235 if (copy_in_user(&compat_rxnfc->rule_locs[0],
3236 &rxnfc->rule_locs[0],
3237 rule_cnt * sizeof(u32)))
3238 return -EFAULT;
3239 }
3240 }
3241
3242 return 0;
7a229387
AB
3243}
3244
7a50a240
AB
3245static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3246{
7a50a240 3247 compat_uptr_t uptr32;
44c02a2c
AV
3248 struct ifreq ifr;
3249 void __user *saved;
3250 int err;
7a50a240 3251
44c02a2c 3252 if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
7a50a240
AB
3253 return -EFAULT;
3254
3255 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3256 return -EFAULT;
3257
44c02a2c
AV
3258 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3259 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 3260
44c02a2c
AV
3261 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
3262 if (!err) {
3263 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
3264 if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
3265 err = -EFAULT;
ccbd6a5a 3266 }
44c02a2c 3267 return err;
7a229387
AB
3268}
3269
590d4693
BH
3270/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3271static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3272 struct compat_ifreq __user *u_ifreq32)
7a229387 3273{
44c02a2c 3274 struct ifreq ifreq;
7a229387
AB
3275 u32 data32;
3276
44c02a2c 3277 if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
7a229387 3278 return -EFAULT;
44c02a2c 3279 if (get_user(data32, &u_ifreq32->ifr_data))
7a229387 3280 return -EFAULT;
44c02a2c 3281 ifreq.ifr_data = compat_ptr(data32);
7a229387 3282
44c02a2c 3283 return dev_ioctl(net, cmd, &ifreq, NULL);
7a229387
AB
3284}
3285
37ac39bd
JB
3286static int compat_ifreq_ioctl(struct net *net, struct socket *sock,
3287 unsigned int cmd,
3288 struct compat_ifreq __user *uifr32)
3289{
3290 struct ifreq __user *uifr;
3291 int err;
3292
3293 /* Handle the fact that while struct ifreq has the same *layout* on
3294 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3295 * which are handled elsewhere, it still has different *size* due to
3296 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3297 * resulting in struct ifreq being 32 and 40 bytes respectively).
3298 * As a result, if the struct happens to be at the end of a page and
3299 * the next page isn't readable/writable, we get a fault. To prevent
3300 * that, copy back and forth to the full size.
3301 */
3302
3303 uifr = compat_alloc_user_space(sizeof(*uifr));
3304 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3305 return -EFAULT;
3306
3307 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3308
3309 if (!err) {
3310 switch (cmd) {
3311 case SIOCGIFFLAGS:
3312 case SIOCGIFMETRIC:
3313 case SIOCGIFMTU:
3314 case SIOCGIFMEM:
3315 case SIOCGIFHWADDR:
3316 case SIOCGIFINDEX:
3317 case SIOCGIFADDR:
3318 case SIOCGIFBRDADDR:
3319 case SIOCGIFDSTADDR:
3320 case SIOCGIFNETMASK:
3321 case SIOCGIFPFLAGS:
3322 case SIOCGIFTXQLEN:
3323 case SIOCGMIIPHY:
3324 case SIOCGMIIREG:
c6c9fee3 3325 case SIOCGIFNAME:
37ac39bd
JB
3326 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
3327 err = -EFAULT;
3328 break;
3329 }
3330 }
3331 return err;
3332}
3333
a2116ed2
AB
3334static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3335 struct compat_ifreq __user *uifr32)
3336{
3337 struct ifreq ifr;
3338 struct compat_ifmap __user *uifmap32;
a2116ed2
AB
3339 int err;
3340
3341 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3342 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3343 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3344 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3345 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3346 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3347 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3348 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3349 if (err)
3350 return -EFAULT;
3351
44c02a2c 3352 err = dev_ioctl(net, cmd, &ifr, NULL);
a2116ed2
AB
3353
3354 if (cmd == SIOCGIFMAP && !err) {
3355 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3356 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3357 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3358 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3359 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3360 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3361 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3362 if (err)
3363 err = -EFAULT;
3364 }
3365 return err;
3366}
3367
7a229387
AB
3368/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3369 * for some operations; this forces use of the newer bridge-utils that
25985edc 3370 * use compatible ioctls
7a229387 3371 */
6b96018b 3372static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3373{
6b96018b 3374 compat_ulong_t tmp;
7a229387 3375
6b96018b 3376 if (get_user(tmp, argp))
7a229387
AB
3377 return -EFAULT;
3378 if (tmp == BRCTL_GET_VERSION)
3379 return BRCTL_VERSION + 1;
3380 return -EINVAL;
3381}
3382
6b96018b
AB
3383static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3384 unsigned int cmd, unsigned long arg)
3385{
3386 void __user *argp = compat_ptr(arg);
3387 struct sock *sk = sock->sk;
3388 struct net *net = sock_net(sk);
7a229387 3389
6b96018b 3390 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3391 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3392
3393 switch (cmd) {
3394 case SIOCSIFBR:
3395 case SIOCGIFBR:
3396 return old_bridge_ioctl(argp);
6b96018b 3397 case SIOCGIFCONF:
36fd633e 3398 return compat_dev_ifconf(net, argp);
6b96018b
AB
3399 case SIOCETHTOOL:
3400 return ethtool_ioctl(net, argp);
7a50a240
AB
3401 case SIOCWANDEV:
3402 return compat_siocwandev(net, argp);
a2116ed2
AB
3403 case SIOCGIFMAP:
3404 case SIOCSIFMAP:
3405 return compat_sioc_ifmap(net, cmd, argp);
0768e170
AB
3406 case SIOCGSTAMP_OLD:
3407 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
3408 if (!sock->ops->gettstamp)
3409 return -ENOIOCTLCMD;
0768e170 3410 return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
c7cbdbf2
AB
3411 !COMPAT_USE_64BIT_TIME);
3412
590d4693
BH
3413 case SIOCBONDSLAVEINFOQUERY:
3414 case SIOCBONDINFOQUERY:
a2116ed2 3415 case SIOCSHWTSTAMP:
fd468c74 3416 case SIOCGHWTSTAMP:
590d4693 3417 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3418
3419 case FIOSETOWN:
3420 case SIOCSPGRP:
3421 case FIOGETOWN:
3422 case SIOCGPGRP:
3423 case SIOCBRADDBR:
3424 case SIOCBRDELBR:
3425 case SIOCGIFVLAN:
3426 case SIOCSIFVLAN:
c62cce2c 3427 case SIOCGSKNS:
0768e170
AB
3428 case SIOCGSTAMP_NEW:
3429 case SIOCGSTAMPNS_NEW:
6b96018b
AB
3430 return sock_ioctl(file, cmd, arg);
3431
3432 case SIOCGIFFLAGS:
3433 case SIOCSIFFLAGS:
3434 case SIOCGIFMETRIC:
3435 case SIOCSIFMETRIC:
3436 case SIOCGIFMTU:
3437 case SIOCSIFMTU:
3438 case SIOCGIFMEM:
3439 case SIOCSIFMEM:
3440 case SIOCGIFHWADDR:
3441 case SIOCSIFHWADDR:
3442 case SIOCADDMULTI:
3443 case SIOCDELMULTI:
3444 case SIOCGIFINDEX:
6b96018b
AB
3445 case SIOCGIFADDR:
3446 case SIOCSIFADDR:
3447 case SIOCSIFHWBROADCAST:
6b96018b 3448 case SIOCDIFADDR:
6b96018b
AB
3449 case SIOCGIFBRDADDR:
3450 case SIOCSIFBRDADDR:
3451 case SIOCGIFDSTADDR:
3452 case SIOCSIFDSTADDR:
3453 case SIOCGIFNETMASK:
3454 case SIOCSIFNETMASK:
3455 case SIOCSIFPFLAGS:
3456 case SIOCGIFPFLAGS:
3457 case SIOCGIFTXQLEN:
3458 case SIOCSIFTXQLEN:
3459 case SIOCBRADDIF:
3460 case SIOCBRDELIF:
c6c9fee3 3461 case SIOCGIFNAME:
9177efd3
AB
3462 case SIOCSIFNAME:
3463 case SIOCGMIIPHY:
3464 case SIOCGMIIREG:
3465 case SIOCSMIIREG:
f92d4fc9
AV
3466 case SIOCBONDENSLAVE:
3467 case SIOCBONDRELEASE:
3468 case SIOCBONDSETHWADDR:
3469 case SIOCBONDCHANGEACTIVE:
37ac39bd
JB
3470 return compat_ifreq_ioctl(net, sock, cmd, argp);
3471
6b96018b
AB
3472 case SIOCSARP:
3473 case SIOCGARP:
3474 case SIOCDARP:
c7dc504e 3475 case SIOCOUTQ:
9d7bf41f 3476 case SIOCOUTQNSD:
6b96018b 3477 case SIOCATMARK:
63ff03ab 3478 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3479 }
3480
6b96018b
AB
3481 return -ENOIOCTLCMD;
3482}
7a229387 3483
95c96174 3484static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3485 unsigned long arg)
89bbfc95
SP
3486{
3487 struct socket *sock = file->private_data;
3488 int ret = -ENOIOCTLCMD;
87de87d5
DM
3489 struct sock *sk;
3490 struct net *net;
3491
3492 sk = sock->sk;
3493 net = sock_net(sk);
89bbfc95
SP
3494
3495 if (sock->ops->compat_ioctl)
3496 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3497
87de87d5
DM
3498 if (ret == -ENOIOCTLCMD &&
3499 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3500 ret = compat_wext_handle_ioctl(net, cmd, arg);
3501
6b96018b
AB
3502 if (ret == -ENOIOCTLCMD)
3503 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3504
89bbfc95
SP
3505 return ret;
3506}
3507#endif
3508
8a3c245c
PT
3509/**
3510 * kernel_bind - bind an address to a socket (kernel space)
3511 * @sock: socket
3512 * @addr: address
3513 * @addrlen: length of address
3514 *
3515 * Returns 0 or an error.
3516 */
3517
ac5a488e
SS
3518int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3519{
3520 return sock->ops->bind(sock, addr, addrlen);
3521}
c6d409cf 3522EXPORT_SYMBOL(kernel_bind);
ac5a488e 3523
8a3c245c
PT
3524/**
3525 * kernel_listen - move socket to listening state (kernel space)
3526 * @sock: socket
3527 * @backlog: pending connections queue size
3528 *
3529 * Returns 0 or an error.
3530 */
3531
ac5a488e
SS
3532int kernel_listen(struct socket *sock, int backlog)
3533{
3534 return sock->ops->listen(sock, backlog);
3535}
c6d409cf 3536EXPORT_SYMBOL(kernel_listen);
ac5a488e 3537
8a3c245c
PT
3538/**
3539 * kernel_accept - accept a connection (kernel space)
3540 * @sock: listening socket
3541 * @newsock: new connected socket
3542 * @flags: flags
3543 *
3544 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3545 * If it fails, @newsock is guaranteed to be %NULL.
3546 * Returns 0 or an error.
3547 */
3548
ac5a488e
SS
3549int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3550{
3551 struct sock *sk = sock->sk;
3552 int err;
3553
3554 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3555 newsock);
3556 if (err < 0)
3557 goto done;
3558
cdfbabfb 3559 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3560 if (err < 0) {
3561 sock_release(*newsock);
fa8705b0 3562 *newsock = NULL;
ac5a488e
SS
3563 goto done;
3564 }
3565
3566 (*newsock)->ops = sock->ops;
1b08534e 3567 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3568
3569done:
3570 return err;
3571}
c6d409cf 3572EXPORT_SYMBOL(kernel_accept);
ac5a488e 3573
8a3c245c
PT
3574/**
3575 * kernel_connect - connect a socket (kernel space)
3576 * @sock: socket
3577 * @addr: address
3578 * @addrlen: address length
3579 * @flags: flags (O_NONBLOCK, ...)
3580 *
3581 * For datagram sockets, @addr is the addres to which datagrams are sent
3582 * by default, and the only address from which datagrams are received.
3583 * For stream sockets, attempts to connect to @addr.
3584 * Returns 0 or an error code.
3585 */
3586
ac5a488e 3587int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3588 int flags)
ac5a488e
SS
3589{
3590 return sock->ops->connect(sock, addr, addrlen, flags);
3591}
c6d409cf 3592EXPORT_SYMBOL(kernel_connect);
ac5a488e 3593
8a3c245c
PT
3594/**
3595 * kernel_getsockname - get the address which the socket is bound (kernel space)
3596 * @sock: socket
3597 * @addr: address holder
3598 *
3599 * Fills the @addr pointer with the address which the socket is bound.
3600 * Returns 0 or an error code.
3601 */
3602
9b2c45d4 3603int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3604{
9b2c45d4 3605 return sock->ops->getname(sock, addr, 0);
ac5a488e 3606}
c6d409cf 3607EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3608
8a3c245c 3609/**
645f0897 3610 * kernel_getpeername - get the address which the socket is connected (kernel space)
8a3c245c
PT
3611 * @sock: socket
3612 * @addr: address holder
3613 *
3614 * Fills the @addr pointer with the address which the socket is connected.
3615 * Returns 0 or an error code.
3616 */
3617
9b2c45d4 3618int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3619{
9b2c45d4 3620 return sock->ops->getname(sock, addr, 1);
ac5a488e 3621}
c6d409cf 3622EXPORT_SYMBOL(kernel_getpeername);
ac5a488e 3623
8a3c245c
PT
3624/**
3625 * kernel_sendpage - send a &page through a socket (kernel space)
3626 * @sock: socket
3627 * @page: page
3628 * @offset: page offset
3629 * @size: total size in bytes
3630 * @flags: flags (MSG_DONTWAIT, ...)
3631 *
3632 * Returns the total amount sent in bytes or an error.
3633 */
3634
ac5a488e
SS
3635int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3636 size_t size, int flags)
3637{
7b62d31d
CL
3638 if (sock->ops->sendpage) {
3639 /* Warn in case the improper page to zero-copy send */
3640 WARN_ONCE(!sendpage_ok(page), "improper page for zero-copy send");
ac5a488e 3641 return sock->ops->sendpage(sock, page, offset, size, flags);
7b62d31d 3642 }
ac5a488e
SS
3643 return sock_no_sendpage(sock, page, offset, size, flags);
3644}
c6d409cf 3645EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3646
8a3c245c
PT
3647/**
3648 * kernel_sendpage_locked - send a &page through the locked sock (kernel space)
3649 * @sk: sock
3650 * @page: page
3651 * @offset: page offset
3652 * @size: total size in bytes
3653 * @flags: flags (MSG_DONTWAIT, ...)
3654 *
3655 * Returns the total amount sent in bytes or an error.
3656 * Caller must hold @sk.
3657 */
3658
306b13eb
TH
3659int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3660 size_t size, int flags)
3661{
3662 struct socket *sock = sk->sk_socket;
3663
3664 if (sock->ops->sendpage_locked)
3665 return sock->ops->sendpage_locked(sk, page, offset, size,
3666 flags);
3667
3668 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3669}
3670EXPORT_SYMBOL(kernel_sendpage_locked);
3671
8a3c245c 3672/**
645f0897 3673 * kernel_sock_shutdown - shut down part of a full-duplex connection (kernel space)
8a3c245c
PT
3674 * @sock: socket
3675 * @how: connection part
3676 *
3677 * Returns 0 or an error.
3678 */
3679
91cf45f0
TM
3680int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3681{
3682 return sock->ops->shutdown(sock, how);
3683}
91cf45f0 3684EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075 3685
8a3c245c
PT
3686/**
3687 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3688 * @sk: socket
3689 *
3690 * This routine returns the IP overhead imposed by a socket i.e.
3691 * the length of the underlying IP header, depending on whether
3692 * this is an IPv4 or IPv6 socket and the length from IP options turned
3693 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075 3694 */
8a3c245c 3695
113c3075
P
3696u32 kernel_sock_ip_overhead(struct sock *sk)
3697{
3698 struct inet_sock *inet;
3699 struct ip_options_rcu *opt;
3700 u32 overhead = 0;
113c3075
P
3701#if IS_ENABLED(CONFIG_IPV6)
3702 struct ipv6_pinfo *np;
3703 struct ipv6_txoptions *optv6 = NULL;
3704#endif /* IS_ENABLED(CONFIG_IPV6) */
3705
3706 if (!sk)
3707 return overhead;
3708
113c3075
P
3709 switch (sk->sk_family) {
3710 case AF_INET:
3711 inet = inet_sk(sk);
3712 overhead += sizeof(struct iphdr);
3713 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3714 sock_owned_by_user(sk));
113c3075
P
3715 if (opt)
3716 overhead += opt->opt.optlen;
3717 return overhead;
3718#if IS_ENABLED(CONFIG_IPV6)
3719 case AF_INET6:
3720 np = inet6_sk(sk);
3721 overhead += sizeof(struct ipv6hdr);
3722 if (np)
3723 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3724 sock_owned_by_user(sk));
113c3075
P
3725 if (optv6)
3726 overhead += (optv6->opt_flen + optv6->opt_nflen);
3727 return overhead;
3728#endif /* IS_ENABLED(CONFIG_IPV6) */
3729 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3730 return overhead;
3731 }
3732}
3733EXPORT_SYMBOL(kernel_sock_ip_overhead);