Merge tag 'v6.4-p2' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
[linux-block.git] / net / socket.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * NET An implementation of the SOCKET network access protocol.
4 *
5 * Version: @(#)socket.c 1.1.93 18/02/95
6 *
7 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 8 * Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 *
11 * Fixes:
12 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
13 * shutdown()
14 * Alan Cox : verify_area() fixes
15 * Alan Cox : Removed DDI
16 * Jonathan Kamens : SOCK_DGRAM reconnect bug
17 * Alan Cox : Moved a load of checks to the very
18 * top level.
19 * Alan Cox : Move address structures to/from user
20 * mode above the protocol layers.
21 * Rob Janssen : Allow 0 length sends.
22 * Alan Cox : Asynchronous I/O support (cribbed from the
23 * tty drivers).
24 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
25 * Jeff Uphoff : Made max number of sockets command-line
26 * configurable.
27 * Matti Aarnio : Made the number of sockets dynamic,
28 * to be allocated when needed, and mr.
29 * Uphoff's max is used as max to be
30 * allowed to allocate.
31 * Linus : Argh. removed all the socket allocation
32 * altogether: it's in the inode now.
33 * Alan Cox : Made sock_alloc()/sock_release() public
34 * for NetROM and future kernel nfsd type
35 * stuff.
36 * Alan Cox : sendmsg/recvmsg basics.
37 * Tom Dyas : Export net symbols.
38 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
39 * Alan Cox : Added thread locking to sys_* calls
40 * for sockets. May have errors at the
41 * moment.
42 * Kevin Buhr : Fixed the dumb errors in the above.
43 * Andi Kleen : Some small cleanups, optimizations,
44 * and fixed a copy_from_user() bug.
45 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 46 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
47 * protocol-independent
48 *
1da177e4 49 * This module is effectively the top level interface to the BSD socket
89bddce5 50 * paradigm.
1da177e4
LT
51 *
52 * Based upon Swansea University Computer Society NET3.039
53 */
54
aef2feda 55#include <linux/bpf-cgroup.h>
cc69837f 56#include <linux/ethtool.h>
1da177e4 57#include <linux/mm.h>
1da177e4
LT
58#include <linux/socket.h>
59#include <linux/file.h>
60#include <linux/net.h>
61#include <linux/interrupt.h>
aaca0bdc 62#include <linux/thread_info.h>
55737fda 63#include <linux/rcupdate.h>
1da177e4
LT
64#include <linux/netdevice.h>
65#include <linux/proc_fs.h>
66#include <linux/seq_file.h>
4a3e2f71 67#include <linux/mutex.h>
1da177e4 68#include <linux/if_bridge.h>
20380731 69#include <linux/if_vlan.h>
408eccce 70#include <linux/ptp_classify.h>
1da177e4
LT
71#include <linux/init.h>
72#include <linux/poll.h>
73#include <linux/cache.h>
74#include <linux/module.h>
75#include <linux/highmem.h>
1da177e4 76#include <linux/mount.h>
fba9be49 77#include <linux/pseudo_fs.h>
1da177e4
LT
78#include <linux/security.h>
79#include <linux/syscalls.h>
80#include <linux/compat.h>
81#include <linux/kmod.h>
3ec3b2fb 82#include <linux/audit.h>
d86b5e0e 83#include <linux/wireless.h>
1b8d7ae4 84#include <linux/nsproxy.h>
1fd7317d 85#include <linux/magic.h>
5a0e3ad6 86#include <linux/slab.h>
600e1779 87#include <linux/xattr.h>
c8e8cd57 88#include <linux/nospec.h>
8c3c447b 89#include <linux/indirect_call_wrapper.h>
1da177e4 90
7c0f6ba6 91#include <linux/uaccess.h>
1da177e4
LT
92#include <asm/unistd.h>
93
94#include <net/compat.h>
87de87d5 95#include <net/wext.h>
f8451725 96#include <net/cls_cgroup.h>
1da177e4
LT
97
98#include <net/sock.h>
99#include <linux/netfilter.h>
100
6b96018b
AB
101#include <linux/if_tun.h>
102#include <linux/ipv6_route.h>
103#include <linux/route.h>
c7dc504e 104#include <linux/termios.h>
6b96018b 105#include <linux/sockios.h>
076bb0c8 106#include <net/busy_poll.h>
f24b9be5 107#include <linux/errqueue.h>
d7c08826 108#include <linux/ptp_clock_kernel.h>
6e6eda44 109#include <trace/events/sock.h>
06021292 110
e0d1095a 111#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
112unsigned int sysctl_net_busy_read __read_mostly;
113unsigned int sysctl_net_busy_poll __read_mostly;
06021292 114#endif
6b96018b 115
8ae5e030
AV
116static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
117static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 118static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
119
120static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
121static __poll_t sock_poll(struct file *file,
122 struct poll_table_struct *wait);
89bddce5 123static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
124#ifdef CONFIG_COMPAT
125static long compat_sock_ioctl(struct file *file,
89bddce5 126 unsigned int cmd, unsigned long arg);
89bbfc95 127#endif
1da177e4 128static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
129static ssize_t sock_sendpage(struct file *file, struct page *page,
130 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 132 struct pipe_inode_info *pipe, size_t len,
9c55e01c 133 unsigned int flags);
542d3065
AB
134
135#ifdef CONFIG_PROC_FS
136static void sock_show_fdinfo(struct seq_file *m, struct file *f)
137{
138 struct socket *sock = f->private_data;
139
140 if (sock->ops->show_fdinfo)
141 sock->ops->show_fdinfo(m, sock);
142}
143#else
144#define sock_show_fdinfo NULL
145#endif
1da177e4 146
1da177e4
LT
147/*
148 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
149 * in the operation structures but are done directly via the socketcall() multiplexor.
150 */
151
da7071d7 152static const struct file_operations socket_file_ops = {
1da177e4
LT
153 .owner = THIS_MODULE,
154 .llseek = no_llseek,
8ae5e030
AV
155 .read_iter = sock_read_iter,
156 .write_iter = sock_write_iter,
1da177e4
LT
157 .poll = sock_poll,
158 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
159#ifdef CONFIG_COMPAT
160 .compat_ioctl = compat_sock_ioctl,
161#endif
1da177e4 162 .mmap = sock_mmap,
1da177e4
LT
163 .release = sock_close,
164 .fasync = sock_fasync,
5274f052
JA
165 .sendpage = sock_sendpage,
166 .splice_write = generic_splice_sendpage,
9c55e01c 167 .splice_read = sock_splice_read,
b4653342 168 .show_fdinfo = sock_show_fdinfo,
1da177e4
LT
169};
170
fe0bdbde
YD
171static const char * const pf_family_names[] = {
172 [PF_UNSPEC] = "PF_UNSPEC",
173 [PF_UNIX] = "PF_UNIX/PF_LOCAL",
174 [PF_INET] = "PF_INET",
175 [PF_AX25] = "PF_AX25",
176 [PF_IPX] = "PF_IPX",
177 [PF_APPLETALK] = "PF_APPLETALK",
178 [PF_NETROM] = "PF_NETROM",
179 [PF_BRIDGE] = "PF_BRIDGE",
180 [PF_ATMPVC] = "PF_ATMPVC",
181 [PF_X25] = "PF_X25",
182 [PF_INET6] = "PF_INET6",
183 [PF_ROSE] = "PF_ROSE",
184 [PF_DECnet] = "PF_DECnet",
185 [PF_NETBEUI] = "PF_NETBEUI",
186 [PF_SECURITY] = "PF_SECURITY",
187 [PF_KEY] = "PF_KEY",
188 [PF_NETLINK] = "PF_NETLINK/PF_ROUTE",
189 [PF_PACKET] = "PF_PACKET",
190 [PF_ASH] = "PF_ASH",
191 [PF_ECONET] = "PF_ECONET",
192 [PF_ATMSVC] = "PF_ATMSVC",
193 [PF_RDS] = "PF_RDS",
194 [PF_SNA] = "PF_SNA",
195 [PF_IRDA] = "PF_IRDA",
196 [PF_PPPOX] = "PF_PPPOX",
197 [PF_WANPIPE] = "PF_WANPIPE",
198 [PF_LLC] = "PF_LLC",
199 [PF_IB] = "PF_IB",
200 [PF_MPLS] = "PF_MPLS",
201 [PF_CAN] = "PF_CAN",
202 [PF_TIPC] = "PF_TIPC",
203 [PF_BLUETOOTH] = "PF_BLUETOOTH",
204 [PF_IUCV] = "PF_IUCV",
205 [PF_RXRPC] = "PF_RXRPC",
206 [PF_ISDN] = "PF_ISDN",
207 [PF_PHONET] = "PF_PHONET",
208 [PF_IEEE802154] = "PF_IEEE802154",
209 [PF_CAIF] = "PF_CAIF",
210 [PF_ALG] = "PF_ALG",
211 [PF_NFC] = "PF_NFC",
212 [PF_VSOCK] = "PF_VSOCK",
213 [PF_KCM] = "PF_KCM",
214 [PF_QIPCRTR] = "PF_QIPCRTR",
215 [PF_SMC] = "PF_SMC",
216 [PF_XDP] = "PF_XDP",
bc49d816 217 [PF_MCTP] = "PF_MCTP",
fe0bdbde
YD
218};
219
1da177e4
LT
220/*
221 * The protocol list. Each protocol is registered in here.
222 */
223
1da177e4 224static DEFINE_SPINLOCK(net_family_lock);
190683a9 225static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 226
1da177e4 227/*
89bddce5
SH
228 * Support routines.
229 * Move socket addresses back and forth across the kernel/user
230 * divide and look after the messy bits.
1da177e4
LT
231 */
232
1da177e4
LT
233/**
234 * move_addr_to_kernel - copy a socket address into kernel space
235 * @uaddr: Address in user space
236 * @kaddr: Address in kernel space
237 * @ulen: Length in user space
238 *
239 * The address is copied into kernel space. If the provided address is
240 * too long an error code of -EINVAL is returned. If the copy gives
241 * invalid addresses -EFAULT is returned. On a success 0 is returned.
242 */
243
43db362d 244int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 245{
230b1839 246 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 247 return -EINVAL;
89bddce5 248 if (ulen == 0)
1da177e4 249 return 0;
89bddce5 250 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 251 return -EFAULT;
3ec3b2fb 252 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
253}
254
255/**
256 * move_addr_to_user - copy an address to user space
257 * @kaddr: kernel space address
258 * @klen: length of address in kernel
259 * @uaddr: user space address
260 * @ulen: pointer to user length field
261 *
262 * The value pointed to by ulen on entry is the buffer length available.
263 * This is overwritten with the buffer space used. -EINVAL is returned
264 * if an overlong buffer is specified or a negative buffer size. -EFAULT
265 * is returned if either the buffer or the length field are not
266 * accessible.
267 * After copying the data up to the limit the user specifies, the true
268 * length of the data is written over the length limit the user
269 * specified. Zero is returned for a success.
270 */
89bddce5 271
43db362d 272static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 273 void __user *uaddr, int __user *ulen)
1da177e4
LT
274{
275 int err;
276 int len;
277
68c6beb3 278 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
279 err = get_user(len, ulen);
280 if (err)
1da177e4 281 return err;
89bddce5
SH
282 if (len > klen)
283 len = klen;
68c6beb3 284 if (len < 0)
1da177e4 285 return -EINVAL;
89bddce5 286 if (len) {
d6fe3945
SG
287 if (audit_sockaddr(klen, kaddr))
288 return -ENOMEM;
89bddce5 289 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
290 return -EFAULT;
291 }
292 /*
89bddce5
SH
293 * "fromlen shall refer to the value before truncation.."
294 * 1003.1g
1da177e4
LT
295 */
296 return __put_user(klen, ulen);
297}
298
08009a76 299static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
300
301static struct inode *sock_alloc_inode(struct super_block *sb)
302{
303 struct socket_alloc *ei;
89bddce5 304
fd60b288 305 ei = alloc_inode_sb(sb, sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
306 if (!ei)
307 return NULL;
333f7909
AV
308 init_waitqueue_head(&ei->socket.wq.wait);
309 ei->socket.wq.fasync_list = NULL;
310 ei->socket.wq.flags = 0;
89bddce5 311
1da177e4
LT
312 ei->socket.state = SS_UNCONNECTED;
313 ei->socket.flags = 0;
314 ei->socket.ops = NULL;
315 ei->socket.sk = NULL;
316 ei->socket.file = NULL;
1da177e4
LT
317
318 return &ei->vfs_inode;
319}
320
6d7855c5 321static void sock_free_inode(struct inode *inode)
1da177e4 322{
43815482
ED
323 struct socket_alloc *ei;
324
325 ei = container_of(inode, struct socket_alloc, vfs_inode);
43815482 326 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
327}
328
51cc5068 329static void init_once(void *foo)
1da177e4 330{
89bddce5 331 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 332
a35afb83 333 inode_init_once(&ei->vfs_inode);
1da177e4 334}
89bddce5 335
1e911632 336static void init_inodecache(void)
1da177e4
LT
337{
338 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
339 sizeof(struct socket_alloc),
340 0,
341 (SLAB_HWCACHE_ALIGN |
342 SLAB_RECLAIM_ACCOUNT |
5d097056 343 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 344 init_once);
1e911632 345 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
346}
347
b87221de 348static const struct super_operations sockfs_ops = {
c6d409cf 349 .alloc_inode = sock_alloc_inode,
6d7855c5 350 .free_inode = sock_free_inode,
c6d409cf 351 .statfs = simple_statfs,
1da177e4
LT
352};
353
c23fbb6b
ED
354/*
355 * sockfs_dname() is called from d_path().
356 */
357static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
358{
0f60d288 359 return dynamic_dname(buffer, buflen, "socket:[%lu]",
c5ef6035 360 d_inode(dentry)->i_ino);
c23fbb6b
ED
361}
362
3ba13d17 363static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 364 .d_dname = sockfs_dname,
1da177e4
LT
365};
366
bba0bd31
AG
367static int sockfs_xattr_get(const struct xattr_handler *handler,
368 struct dentry *dentry, struct inode *inode,
369 const char *suffix, void *value, size_t size)
370{
371 if (value) {
372 if (dentry->d_name.len + 1 > size)
373 return -ERANGE;
374 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
375 }
376 return dentry->d_name.len + 1;
377}
378
379#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
380#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
381#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
382
383static const struct xattr_handler sockfs_xattr_handler = {
384 .name = XATTR_NAME_SOCKPROTONAME,
385 .get = sockfs_xattr_get,
386};
387
4a590153 388static int sockfs_security_xattr_set(const struct xattr_handler *handler,
39f60c1c 389 struct mnt_idmap *idmap,
4a590153
AG
390 struct dentry *dentry, struct inode *inode,
391 const char *suffix, const void *value,
392 size_t size, int flags)
393{
394 /* Handled by LSM. */
395 return -EAGAIN;
396}
397
398static const struct xattr_handler sockfs_security_xattr_handler = {
399 .prefix = XATTR_SECURITY_PREFIX,
400 .set = sockfs_security_xattr_set,
401};
402
bba0bd31
AG
403static const struct xattr_handler *sockfs_xattr_handlers[] = {
404 &sockfs_xattr_handler,
4a590153 405 &sockfs_security_xattr_handler,
bba0bd31
AG
406 NULL
407};
408
fba9be49 409static int sockfs_init_fs_context(struct fs_context *fc)
c74a1cbb 410{
fba9be49
DH
411 struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
412 if (!ctx)
413 return -ENOMEM;
414 ctx->ops = &sockfs_ops;
415 ctx->dops = &sockfs_dentry_operations;
416 ctx->xattr = sockfs_xattr_handlers;
417 return 0;
c74a1cbb
AV
418}
419
420static struct vfsmount *sock_mnt __read_mostly;
421
422static struct file_system_type sock_fs_type = {
423 .name = "sockfs",
fba9be49 424 .init_fs_context = sockfs_init_fs_context,
c74a1cbb
AV
425 .kill_sb = kill_anon_super,
426};
427
1da177e4
LT
428/*
429 * Obtains the first available file descriptor and sets it up for use.
430 *
39d8c1b6
DM
431 * These functions create file structures and maps them to fd space
432 * of the current process. On success it returns file descriptor
1da177e4
LT
433 * and file struct implicitly stored in sock->file.
434 * Note that another thread may close file descriptor before we return
435 * from this function. We use the fact that now we do not refer
436 * to socket after mapping. If one day we will need it, this
437 * function will increment ref. count on file by 1.
438 *
439 * In any case returned fd MAY BE not valid!
440 * This race condition is unavoidable
441 * with shared fd spaces, we cannot solve it inside kernel,
442 * but we take care of internal coherence yet.
443 */
444
8a3c245c
PT
445/**
446 * sock_alloc_file - Bind a &socket to a &file
447 * @sock: socket
448 * @flags: file status flags
449 * @dname: protocol name
450 *
451 * Returns the &file bound with @sock, implicitly storing it
452 * in sock->file. If dname is %NULL, sets to "".
649c15c7
TLSC
453 *
454 * On failure @sock is released, and an ERR pointer is returned.
455 *
8a3c245c
PT
456 * This function uses GFP_KERNEL internally.
457 */
458
aab174f0 459struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 460{
7cbe66b6 461 struct file *file;
1da177e4 462
d93aa9d8
AV
463 if (!dname)
464 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 465
d93aa9d8
AV
466 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
467 O_RDWR | (flags & O_NONBLOCK),
468 &socket_file_ops);
b5ffe634 469 if (IS_ERR(file)) {
8e1611e2 470 sock_release(sock);
39b65252 471 return file;
cc3808f8
AV
472 }
473
474 sock->file = file;
39d8c1b6 475 file->private_data = sock;
d8e464ec 476 stream_open(SOCK_INODE(sock), file);
28407630 477 return file;
39d8c1b6 478}
56b31d1c 479EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 480
56b31d1c 481static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
482{
483 struct file *newfile;
28407630 484 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
485 if (unlikely(fd < 0)) {
486 sock_release(sock);
28407630 487 return fd;
ce4bb04c 488 }
39d8c1b6 489
aab174f0 490 newfile = sock_alloc_file(sock, flags, NULL);
4546e44c 491 if (!IS_ERR(newfile)) {
39d8c1b6 492 fd_install(fd, newfile);
28407630
AV
493 return fd;
494 }
7cbe66b6 495
28407630
AV
496 put_unused_fd(fd);
497 return PTR_ERR(newfile);
1da177e4
LT
498}
499
8a3c245c
PT
500/**
501 * sock_from_file - Return the &socket bounded to @file.
502 * @file: file
8a3c245c 503 *
dba4a925 504 * On failure returns %NULL.
8a3c245c
PT
505 */
506
dba4a925 507struct socket *sock_from_file(struct file *file)
6cb153ca 508{
6cb153ca 509 if (file->f_op == &socket_file_ops)
da214a47 510 return file->private_data; /* set in sock_alloc_file */
6cb153ca 511
23bb80d2 512 return NULL;
6cb153ca 513}
406a3c63 514EXPORT_SYMBOL(sock_from_file);
6cb153ca 515
1da177e4 516/**
c6d409cf 517 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
518 * @fd: file handle
519 * @err: pointer to an error code return
520 *
521 * The file handle passed in is locked and the socket it is bound
241c4667 522 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
523 * with a negative errno code and NULL is returned. The function checks
524 * for both invalid handles and passing a handle which is not a socket.
525 *
526 * On a success the socket object pointer is returned.
527 */
528
529struct socket *sockfd_lookup(int fd, int *err)
530{
531 struct file *file;
1da177e4
LT
532 struct socket *sock;
533
89bddce5
SH
534 file = fget(fd);
535 if (!file) {
1da177e4
LT
536 *err = -EBADF;
537 return NULL;
538 }
89bddce5 539
dba4a925
FR
540 sock = sock_from_file(file);
541 if (!sock) {
542 *err = -ENOTSOCK;
1da177e4 543 fput(file);
dba4a925 544 }
6cb153ca
BL
545 return sock;
546}
c6d409cf 547EXPORT_SYMBOL(sockfd_lookup);
1da177e4 548
6cb153ca
BL
549static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
550{
00e188ef 551 struct fd f = fdget(fd);
6cb153ca
BL
552 struct socket *sock;
553
3672558c 554 *err = -EBADF;
00e188ef 555 if (f.file) {
dba4a925 556 sock = sock_from_file(f.file);
00e188ef 557 if (likely(sock)) {
ce787a5a 558 *fput_needed = f.flags & FDPUT_FPUT;
6cb153ca 559 return sock;
00e188ef 560 }
dba4a925 561 *err = -ENOTSOCK;
00e188ef 562 fdput(f);
1da177e4 563 }
6cb153ca 564 return NULL;
1da177e4
LT
565}
566
600e1779
MY
567static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
568 size_t size)
569{
570 ssize_t len;
571 ssize_t used = 0;
572
c5ef6035 573 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
574 if (len < 0)
575 return len;
576 used += len;
577 if (buffer) {
578 if (size < used)
579 return -ERANGE;
580 buffer += len;
581 }
582
583 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
584 used += len;
585 if (buffer) {
586 if (size < used)
587 return -ERANGE;
588 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
589 buffer += len;
590 }
591
592 return used;
593}
594
c1632a0f 595static int sockfs_setattr(struct mnt_idmap *idmap,
549c7297 596 struct dentry *dentry, struct iattr *iattr)
86741ec2 597{
c1632a0f 598 int err = simple_setattr(&nop_mnt_idmap, dentry, iattr);
86741ec2 599
e1a3a60a 600 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
601 struct socket *sock = SOCKET_I(d_inode(dentry));
602
6d8c50dc
CW
603 if (sock->sk)
604 sock->sk->sk_uid = iattr->ia_uid;
605 else
606 err = -ENOENT;
86741ec2
LC
607 }
608
609 return err;
610}
611
600e1779 612static const struct inode_operations sockfs_inode_ops = {
600e1779 613 .listxattr = sockfs_listxattr,
86741ec2 614 .setattr = sockfs_setattr,
600e1779
MY
615};
616
1da177e4 617/**
8a3c245c 618 * sock_alloc - allocate a socket
89bddce5 619 *
1da177e4
LT
620 * Allocate a new inode and socket object. The two are bound together
621 * and initialised. The socket is then returned. If we are out of inodes
8a3c245c 622 * NULL is returned. This functions uses GFP_KERNEL internally.
1da177e4
LT
623 */
624
f4a00aac 625struct socket *sock_alloc(void)
1da177e4 626{
89bddce5
SH
627 struct inode *inode;
628 struct socket *sock;
1da177e4 629
a209dfc7 630 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
631 if (!inode)
632 return NULL;
633
634 sock = SOCKET_I(inode);
635
85fe4025 636 inode->i_ino = get_next_ino();
89bddce5 637 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
638 inode->i_uid = current_fsuid();
639 inode->i_gid = current_fsgid();
600e1779 640 inode->i_op = &sockfs_inode_ops;
1da177e4 641
1da177e4
LT
642 return sock;
643}
f4a00aac 644EXPORT_SYMBOL(sock_alloc);
1da177e4 645
6d8c50dc 646static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
647{
648 if (sock->ops) {
649 struct module *owner = sock->ops->owner;
650
6d8c50dc
CW
651 if (inode)
652 inode_lock(inode);
1da177e4 653 sock->ops->release(sock);
ff7b11aa 654 sock->sk = NULL;
6d8c50dc
CW
655 if (inode)
656 inode_unlock(inode);
1da177e4
LT
657 sock->ops = NULL;
658 module_put(owner);
659 }
660
333f7909 661 if (sock->wq.fasync_list)
3410f22e 662 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 663
1da177e4
LT
664 if (!sock->file) {
665 iput(SOCK_INODE(sock));
666 return;
667 }
89bddce5 668 sock->file = NULL;
1da177e4 669}
6d8c50dc 670
9a8ad9ac
AL
671/**
672 * sock_release - close a socket
673 * @sock: socket to close
674 *
675 * The socket is released from the protocol stack if it has a release
676 * callback, and the inode is then released if the socket is bound to
677 * an inode not a file.
678 */
6d8c50dc
CW
679void sock_release(struct socket *sock)
680{
681 __sock_release(sock, NULL);
682}
c6d409cf 683EXPORT_SYMBOL(sock_release);
1da177e4 684
c14ac945 685void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 686{
140c55d4
ED
687 u8 flags = *tx_flags;
688
51eb7492 689 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE) {
140c55d4
ED
690 flags |= SKBTX_HW_TSTAMP;
691
51eb7492
GE
692 /* PTP hardware clocks can provide a free running cycle counter
693 * as a time base for virtual clocks. Tell driver to use the
694 * free running cycle counter for timestamp if socket is bound
695 * to virtual clock.
696 */
697 if (tsflags & SOF_TIMESTAMPING_BIND_PHC)
698 flags |= SKBTX_HW_TSTAMP_USE_CYCLES;
699 }
700
c14ac945 701 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
702 flags |= SKBTX_SW_TSTAMP;
703
c14ac945 704 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
705 flags |= SKBTX_SCHED_TSTAMP;
706
140c55d4 707 *tx_flags = flags;
20d49473 708}
67cc0d40 709EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 710
8c3c447b
PA
711INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
712 size_t));
a648a592
PA
713INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
714 size_t));
6e6eda44
YC
715
716static noinline void call_trace_sock_send_length(struct sock *sk, int ret,
717 int flags)
718{
719 trace_sock_send_length(sk, ret, 0);
720}
721
d8725c86 722static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 723{
a648a592
PA
724 int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
725 inet_sendmsg, sock, msg,
726 msg_data_left(msg));
d8725c86 727 BUG_ON(ret == -EIOCBQUEUED);
6e6eda44
YC
728
729 if (trace_sock_send_length_enabled())
730 call_trace_sock_send_length(sock->sk, ret, 0);
d8725c86 731 return ret;
1da177e4
LT
732}
733
85806af0
RD
734/**
735 * sock_sendmsg - send a message through @sock
736 * @sock: socket
737 * @msg: message to send
738 *
739 * Sends @msg through @sock, passing through LSM.
740 * Returns the number of bytes sent, or an error code.
741 */
d8725c86 742int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 743{
d8725c86 744 int err = security_socket_sendmsg(sock, msg,
01e97e65 745 msg_data_left(msg));
228e548e 746
d8725c86 747 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 748}
c6d409cf 749EXPORT_SYMBOL(sock_sendmsg);
1da177e4 750
8a3c245c
PT
751/**
752 * kernel_sendmsg - send a message through @sock (kernel-space)
753 * @sock: socket
754 * @msg: message header
755 * @vec: kernel vec
756 * @num: vec array length
757 * @size: total message data size
758 *
759 * Builds the message data with @vec and sends it through @sock.
760 * Returns the number of bytes sent, or an error code.
761 */
762
1da177e4
LT
763int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
764 struct kvec *vec, size_t num, size_t size)
765{
de4eda9d 766 iov_iter_kvec(&msg->msg_iter, ITER_SOURCE, vec, num, size);
d8725c86 767 return sock_sendmsg(sock, msg);
1da177e4 768}
c6d409cf 769EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 770
8a3c245c
PT
771/**
772 * kernel_sendmsg_locked - send a message through @sock (kernel-space)
773 * @sk: sock
774 * @msg: message header
775 * @vec: output s/g array
776 * @num: output s/g array length
777 * @size: total message data size
778 *
779 * Builds the message data with @vec and sends it through @sock.
780 * Returns the number of bytes sent, or an error code.
781 * Caller must hold @sk.
782 */
783
306b13eb
TH
784int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
785 struct kvec *vec, size_t num, size_t size)
786{
787 struct socket *sock = sk->sk_socket;
788
789 if (!sock->ops->sendmsg_locked)
db5980d8 790 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 791
de4eda9d 792 iov_iter_kvec(&msg->msg_iter, ITER_SOURCE, vec, num, size);
306b13eb
TH
793
794 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
795}
796EXPORT_SYMBOL(kernel_sendmsg_locked);
797
8605330a
SHY
798static bool skb_is_err_queue(const struct sk_buff *skb)
799{
800 /* pkt_type of skbs enqueued on the error queue are set to
801 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
802 * in recvmsg, since skbs received on a local socket will never
803 * have a pkt_type of PACKET_OUTGOING.
804 */
805 return skb->pkt_type == PACKET_OUTGOING;
806}
807
b50a5c70
ML
808/* On transmit, software and hardware timestamps are returned independently.
809 * As the two skb clones share the hardware timestamp, which may be updated
810 * before the software timestamp is received, a hardware TX timestamp may be
811 * returned only if there is no software TX timestamp. Ignore false software
812 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 813 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
814 * hardware timestamp.
815 */
816static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
817{
818 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
819}
820
97dc7cd9
GE
821static ktime_t get_timestamp(struct sock *sk, struct sk_buff *skb, int *if_index)
822{
823 bool cycles = sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC;
824 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
825 struct net_device *orig_dev;
826 ktime_t hwtstamp;
827
828 rcu_read_lock();
829 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
830 if (orig_dev) {
831 *if_index = orig_dev->ifindex;
832 hwtstamp = netdev_get_tstamp(orig_dev, shhwtstamps, cycles);
833 } else {
834 hwtstamp = shhwtstamps->hwtstamp;
835 }
836 rcu_read_unlock();
837
838 return hwtstamp;
839}
840
841static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb,
842 int if_index)
aad9c8c4
ML
843{
844 struct scm_ts_pktinfo ts_pktinfo;
845 struct net_device *orig_dev;
846
847 if (!skb_mac_header_was_set(skb))
848 return;
849
850 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
851
97dc7cd9
GE
852 if (!if_index) {
853 rcu_read_lock();
854 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
855 if (orig_dev)
856 if_index = orig_dev->ifindex;
857 rcu_read_unlock();
858 }
859 ts_pktinfo.if_index = if_index;
aad9c8c4
ML
860
861 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
862 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
863 sizeof(ts_pktinfo), &ts_pktinfo);
864}
865
92f37fd2
ED
866/*
867 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
868 */
869void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
870 struct sk_buff *skb)
871{
20d49473 872 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 873 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e
DD
874 struct scm_timestamping_internal tss;
875
b50a5c70 876 int empty = 1, false_tstamp = 0;
20d49473
PO
877 struct skb_shared_hwtstamps *shhwtstamps =
878 skb_hwtstamps(skb);
97dc7cd9 879 int if_index;
007747a9 880 ktime_t hwtstamp;
20d49473
PO
881
882 /* Race occurred between timestamp enabling and packet
883 receiving. Fill in the current time for now. */
b50a5c70 884 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 885 __net_timestamp(skb);
b50a5c70
ML
886 false_tstamp = 1;
887 }
20d49473
PO
888
889 if (need_software_tstamp) {
890 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
891 if (new_tstamp) {
892 struct __kernel_sock_timeval tv;
893
894 skb_get_new_timestamp(skb, &tv);
895 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
896 sizeof(tv), &tv);
897 } else {
898 struct __kernel_old_timeval tv;
899
900 skb_get_timestamp(skb, &tv);
901 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
902 sizeof(tv), &tv);
903 }
20d49473 904 } else {
887feae3
DD
905 if (new_tstamp) {
906 struct __kernel_timespec ts;
907
908 skb_get_new_timestampns(skb, &ts);
909 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
910 sizeof(ts), &ts);
911 } else {
df1b4ba9 912 struct __kernel_old_timespec ts;
887feae3
DD
913
914 skb_get_timestampns(skb, &ts);
915 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
916 sizeof(ts), &ts);
917 }
20d49473
PO
918 }
919 }
920
f24b9be5 921 memset(&tss, 0, sizeof(tss));
c199105d 922 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 923 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 924 empty = 0;
4d276eb6 925 if (shhwtstamps &&
b9f40e21 926 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
d7c08826 927 !skb_is_swtx_tstamp(skb, false_tstamp)) {
97dc7cd9
GE
928 if_index = 0;
929 if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NETDEV)
930 hwtstamp = get_timestamp(sk, skb, &if_index);
007747a9
ML
931 else
932 hwtstamp = shhwtstamps->hwtstamp;
d7c08826 933
97dc7cd9
GE
934 if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC)
935 hwtstamp = ptp_convert_timestamp(&hwtstamp,
936 sk->sk_bind_phc);
937
007747a9 938 if (ktime_to_timespec64_cond(hwtstamp, tss.ts + 2)) {
d7c08826
YL
939 empty = 0;
940
941 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
942 !skb_is_err_queue(skb))
97dc7cd9 943 put_ts_pktinfo(msg, skb, if_index);
d7c08826 944 }
aad9c8c4 945 }
1c885808 946 if (!empty) {
9718475e
DD
947 if (sock_flag(sk, SOCK_TSTAMP_NEW))
948 put_cmsg_scm_timestamping64(msg, &tss);
949 else
950 put_cmsg_scm_timestamping(msg, &tss);
1c885808 951
8605330a 952 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 953 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
954 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
955 skb->len, skb->data);
956 }
92f37fd2 957}
7c81fd8b
ACM
958EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
959
eb6fba75 960#ifdef CONFIG_WIRELESS
6e3e939f
JB
961void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
962 struct sk_buff *skb)
963{
964 int ack;
965
966 if (!sock_flag(sk, SOCK_WIFI_STATUS))
967 return;
968 if (!skb->wifi_acked_valid)
969 return;
970
971 ack = skb->wifi_acked;
972
973 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
974}
975EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
eb6fba75 976#endif
6e3e939f 977
11165f14 978static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
979 struct sk_buff *skb)
3b885787 980{
744d5a3e 981 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 982 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 983 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
984}
985
6fd1d51c
EM
986static void sock_recv_mark(struct msghdr *msg, struct sock *sk,
987 struct sk_buff *skb)
988{
2558b803
ED
989 if (sock_flag(sk, SOCK_RCVMARK) && skb) {
990 /* We must use a bounce buffer for CONFIG_HARDENED_USERCOPY=y */
991 __u32 mark = skb->mark;
992
993 put_cmsg(msg, SOL_SOCKET, SO_MARK, sizeof(__u32), &mark);
994 }
6fd1d51c
EM
995}
996
997void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
998 struct sk_buff *skb)
3b885787
NH
999{
1000 sock_recv_timestamp(msg, sk, skb);
1001 sock_recv_drops(msg, sk, skb);
6fd1d51c 1002 sock_recv_mark(msg, sk, skb);
3b885787 1003}
6fd1d51c 1004EXPORT_SYMBOL_GPL(__sock_recv_cmsgs);
3b885787 1005
8c3c447b 1006INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
a648a592
PA
1007 size_t, int));
1008INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
1009 size_t, int));
6e6eda44
YC
1010
1011static noinline void call_trace_sock_recv_length(struct sock *sk, int ret, int flags)
1012{
1013 trace_sock_recv_length(sk, ret, flags);
1014}
1015
1b784140 1016static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 1017 int flags)
1da177e4 1018{
6e6eda44
YC
1019 int ret = INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
1020 inet_recvmsg, sock, msg,
1021 msg_data_left(msg), flags);
1022 if (trace_sock_recv_length_enabled())
1023 call_trace_sock_recv_length(sock->sk, ret, flags);
1024 return ret;
1da177e4
LT
1025}
1026
85806af0
RD
1027/**
1028 * sock_recvmsg - receive a message from @sock
1029 * @sock: socket
1030 * @msg: message to receive
1031 * @flags: message flags
1032 *
1033 * Receives @msg from @sock, passing through LSM. Returns the total number
1034 * of bytes received, or an error.
1035 */
2da62906 1036int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 1037{
2da62906 1038 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 1039
2da62906 1040 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 1041}
c6d409cf 1042EXPORT_SYMBOL(sock_recvmsg);
1da177e4 1043
c1249c0a 1044/**
8a3c245c
PT
1045 * kernel_recvmsg - Receive a message from a socket (kernel space)
1046 * @sock: The socket to receive the message from
1047 * @msg: Received message
1048 * @vec: Input s/g array for message data
1049 * @num: Size of input s/g array
1050 * @size: Number of bytes to read
1051 * @flags: Message flags (MSG_DONTWAIT, etc...)
c1249c0a 1052 *
8a3c245c
PT
1053 * On return the msg structure contains the scatter/gather array passed in the
1054 * vec argument. The array is modified so that it consists of the unfilled
1055 * portion of the original array.
c1249c0a 1056 *
8a3c245c 1057 * The returned value is the total number of bytes received, or an error.
c1249c0a 1058 */
8a3c245c 1059
89bddce5
SH
1060int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
1061 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4 1062{
1f466e1f 1063 msg->msg_control_is_user = false;
de4eda9d 1064 iov_iter_kvec(&msg->msg_iter, ITER_DEST, vec, num, size);
1f466e1f 1065 return sock_recvmsg(sock, msg, flags);
1da177e4 1066}
c6d409cf 1067EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 1068
ce1d4d3e
CH
1069static ssize_t sock_sendpage(struct file *file, struct page *page,
1070 int offset, size_t size, loff_t *ppos, int more)
1da177e4 1071{
1da177e4
LT
1072 struct socket *sock;
1073 int flags;
6e6eda44 1074 int ret;
1da177e4 1075
ce1d4d3e
CH
1076 sock = file->private_data;
1077
35f9c09f
ED
1078 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
1079 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
1080 flags |= more;
ce1d4d3e 1081
6e6eda44
YC
1082 ret = kernel_sendpage(sock, page, offset, size, flags);
1083
1084 if (trace_sock_send_length_enabled())
1085 call_trace_sock_send_length(sock->sk, ret, 0);
1086 return ret;
ce1d4d3e 1087}
1da177e4 1088
9c55e01c 1089static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 1090 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
1091 unsigned int flags)
1092{
1093 struct socket *sock = file->private_data;
1094
997b37da 1095 if (unlikely(!sock->ops->splice_read))
95506588 1096 return generic_file_splice_read(file, ppos, pipe, len, flags);
997b37da 1097
9c55e01c
JA
1098 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
1099}
1100
8ae5e030 1101static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 1102{
6d652330
AV
1103 struct file *file = iocb->ki_filp;
1104 struct socket *sock = file->private_data;
0345f931 1105 struct msghdr msg = {.msg_iter = *to,
1106 .msg_iocb = iocb};
8ae5e030 1107 ssize_t res;
ce1d4d3e 1108
ebfcd895 1109 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
1110 msg.msg_flags = MSG_DONTWAIT;
1111
1112 if (iocb->ki_pos != 0)
1da177e4 1113 return -ESPIPE;
027445c3 1114
66ee59af 1115 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
1116 return 0;
1117
2da62906 1118 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
1119 *to = msg.msg_iter;
1120 return res;
1da177e4
LT
1121}
1122
8ae5e030 1123static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 1124{
6d652330
AV
1125 struct file *file = iocb->ki_filp;
1126 struct socket *sock = file->private_data;
0345f931 1127 struct msghdr msg = {.msg_iter = *from,
1128 .msg_iocb = iocb};
8ae5e030 1129 ssize_t res;
1da177e4 1130
8ae5e030 1131 if (iocb->ki_pos != 0)
ce1d4d3e 1132 return -ESPIPE;
027445c3 1133
ebfcd895 1134 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
1135 msg.msg_flags = MSG_DONTWAIT;
1136
6d652330
AV
1137 if (sock->type == SOCK_SEQPACKET)
1138 msg.msg_flags |= MSG_EOR;
1139
d8725c86 1140 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
1141 *from = msg.msg_iter;
1142 return res;
1da177e4
LT
1143}
1144
1da177e4
LT
1145/*
1146 * Atomic setting of ioctl hooks to avoid race
1147 * with module unload.
1148 */
1149
4a3e2f71 1150static DEFINE_MUTEX(br_ioctl_mutex);
ad2f99ae
AB
1151static int (*br_ioctl_hook)(struct net *net, struct net_bridge *br,
1152 unsigned int cmd, struct ifreq *ifr,
1153 void __user *uarg);
1da177e4 1154
ad2f99ae
AB
1155void brioctl_set(int (*hook)(struct net *net, struct net_bridge *br,
1156 unsigned int cmd, struct ifreq *ifr,
1157 void __user *uarg))
1da177e4 1158{
4a3e2f71 1159 mutex_lock(&br_ioctl_mutex);
1da177e4 1160 br_ioctl_hook = hook;
4a3e2f71 1161 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1162}
1163EXPORT_SYMBOL(brioctl_set);
1164
ad2f99ae
AB
1165int br_ioctl_call(struct net *net, struct net_bridge *br, unsigned int cmd,
1166 struct ifreq *ifr, void __user *uarg)
1167{
1168 int err = -ENOPKG;
1169
1170 if (!br_ioctl_hook)
1171 request_module("bridge");
1172
1173 mutex_lock(&br_ioctl_mutex);
1174 if (br_ioctl_hook)
1175 err = br_ioctl_hook(net, br, cmd, ifr, uarg);
1176 mutex_unlock(&br_ioctl_mutex);
1177
1178 return err;
1179}
1180
4a3e2f71 1181static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1182static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1183
881d966b 1184void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1185{
4a3e2f71 1186 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1187 vlan_ioctl_hook = hook;
4a3e2f71 1188 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1189}
1190EXPORT_SYMBOL(vlan_ioctl_set);
1191
6b96018b 1192static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 1193 unsigned int cmd, unsigned long arg)
6b96018b 1194{
876f0bf9
AB
1195 struct ifreq ifr;
1196 bool need_copyout;
6b96018b
AB
1197 int err;
1198 void __user *argp = (void __user *)arg;
a554bf96 1199 void __user *data;
6b96018b
AB
1200
1201 err = sock->ops->ioctl(sock, cmd, arg);
1202
1203 /*
1204 * If this ioctl is unknown try to hand it down
1205 * to the NIC driver.
1206 */
36fd633e
AV
1207 if (err != -ENOIOCTLCMD)
1208 return err;
6b96018b 1209
29ce8f97
JK
1210 if (!is_socket_ioctl_cmd(cmd))
1211 return -ENOTTY;
1212
a554bf96 1213 if (get_user_ifreq(&ifr, &data, argp))
876f0bf9 1214 return -EFAULT;
a554bf96 1215 err = dev_ioctl(net, cmd, &ifr, data, &need_copyout);
876f0bf9 1216 if (!err && need_copyout)
a554bf96 1217 if (put_user_ifreq(&ifr, argp))
44c02a2c 1218 return -EFAULT;
876f0bf9 1219
6b96018b
AB
1220 return err;
1221}
1222
1da177e4
LT
1223/*
1224 * With an ioctl, arg may well be a user mode pointer, but we don't know
1225 * what to do with it - that's up to the protocol still.
1226 */
1227
1228static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1229{
1230 struct socket *sock;
881d966b 1231 struct sock *sk;
1da177e4
LT
1232 void __user *argp = (void __user *)arg;
1233 int pid, err;
881d966b 1234 struct net *net;
1da177e4 1235
b69aee04 1236 sock = file->private_data;
881d966b 1237 sk = sock->sk;
3b1e0a65 1238 net = sock_net(sk);
44c02a2c
AV
1239 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1240 struct ifreq ifr;
a554bf96 1241 void __user *data;
44c02a2c 1242 bool need_copyout;
a554bf96 1243 if (get_user_ifreq(&ifr, &data, argp))
44c02a2c 1244 return -EFAULT;
a554bf96 1245 err = dev_ioctl(net, cmd, &ifr, data, &need_copyout);
44c02a2c 1246 if (!err && need_copyout)
a554bf96 1247 if (put_user_ifreq(&ifr, argp))
44c02a2c 1248 return -EFAULT;
1da177e4 1249 } else
3d23e349 1250#ifdef CONFIG_WEXT_CORE
1da177e4 1251 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1252 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1253 } else
3d23e349 1254#endif
89bddce5 1255 switch (cmd) {
1da177e4
LT
1256 case FIOSETOWN:
1257 case SIOCSPGRP:
1258 err = -EFAULT;
1259 if (get_user(pid, (int __user *)argp))
1260 break;
393cc3f5 1261 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1262 break;
1263 case FIOGETOWN:
1264 case SIOCGPGRP:
609d7fa9 1265 err = put_user(f_getown(sock->file),
89bddce5 1266 (int __user *)argp);
1da177e4
LT
1267 break;
1268 case SIOCGIFBR:
1269 case SIOCSIFBR:
1270 case SIOCBRADDBR:
1271 case SIOCBRDELBR:
ad2f99ae 1272 err = br_ioctl_call(net, NULL, cmd, NULL, argp);
1da177e4
LT
1273 break;
1274 case SIOCGIFVLAN:
1275 case SIOCSIFVLAN:
1276 err = -ENOPKG;
1277 if (!vlan_ioctl_hook)
1278 request_module("8021q");
1279
4a3e2f71 1280 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1281 if (vlan_ioctl_hook)
881d966b 1282 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1283 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1284 break;
c62cce2c
AV
1285 case SIOCGSKNS:
1286 err = -EPERM;
1287 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1288 break;
1289
1290 err = open_related_ns(&net->ns, get_net_ns);
1291 break;
0768e170
AB
1292 case SIOCGSTAMP_OLD:
1293 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
1294 if (!sock->ops->gettstamp) {
1295 err = -ENOIOCTLCMD;
1296 break;
1297 }
1298 err = sock->ops->gettstamp(sock, argp,
0768e170
AB
1299 cmd == SIOCGSTAMP_OLD,
1300 !IS_ENABLED(CONFIG_64BIT));
60747828 1301 break;
0768e170
AB
1302 case SIOCGSTAMP_NEW:
1303 case SIOCGSTAMPNS_NEW:
1304 if (!sock->ops->gettstamp) {
1305 err = -ENOIOCTLCMD;
1306 break;
1307 }
1308 err = sock->ops->gettstamp(sock, argp,
1309 cmd == SIOCGSTAMP_NEW,
1310 false);
c7cbdbf2 1311 break;
876f0bf9
AB
1312
1313 case SIOCGIFCONF:
1314 err = dev_ifconf(net, argp);
1315 break;
1316
1da177e4 1317 default:
63ff03ab 1318 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1319 break;
89bddce5 1320 }
1da177e4
LT
1321 return err;
1322}
1323
8a3c245c
PT
1324/**
1325 * sock_create_lite - creates a socket
1326 * @family: protocol family (AF_INET, ...)
1327 * @type: communication type (SOCK_STREAM, ...)
1328 * @protocol: protocol (0, ...)
1329 * @res: new socket
1330 *
1331 * Creates a new socket and assigns it to @res, passing through LSM.
1332 * The new socket initialization is not complete, see kernel_accept().
1333 * Returns 0 or an error. On failure @res is set to %NULL.
1334 * This function internally uses GFP_KERNEL.
1335 */
1336
1da177e4
LT
1337int sock_create_lite(int family, int type, int protocol, struct socket **res)
1338{
1339 int err;
1340 struct socket *sock = NULL;
89bddce5 1341
1da177e4
LT
1342 err = security_socket_create(family, type, protocol, 1);
1343 if (err)
1344 goto out;
1345
1346 sock = sock_alloc();
1347 if (!sock) {
1348 err = -ENOMEM;
1349 goto out;
1350 }
1351
1da177e4 1352 sock->type = type;
7420ed23
VY
1353 err = security_socket_post_create(sock, family, type, protocol, 1);
1354 if (err)
1355 goto out_release;
1356
1da177e4
LT
1357out:
1358 *res = sock;
1359 return err;
7420ed23
VY
1360out_release:
1361 sock_release(sock);
1362 sock = NULL;
1363 goto out;
1da177e4 1364}
c6d409cf 1365EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1366
1367/* No kernel lock held - perfect */
ade994f4 1368static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1369{
3cafb376 1370 struct socket *sock = file->private_data;
a331de3b 1371 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1372
e88958e6
CH
1373 if (!sock->ops->poll)
1374 return 0;
f641f13b 1375
a331de3b
CH
1376 if (sk_can_busy_loop(sock->sk)) {
1377 /* poll once if requested by the syscall */
1378 if (events & POLL_BUSY_LOOP)
1379 sk_busy_loop(sock->sk, 1);
1380
1381 /* if this socket can poll_ll, tell the system call */
1382 flag = POLL_BUSY_LOOP;
1383 }
1384
1385 return sock->ops->poll(file, sock, wait) | flag;
1da177e4
LT
1386}
1387
89bddce5 1388static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1389{
b69aee04 1390 struct socket *sock = file->private_data;
1da177e4
LT
1391
1392 return sock->ops->mmap(file, sock, vma);
1393}
1394
20380731 1395static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1396{
6d8c50dc 1397 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1398 return 0;
1399}
1400
1401/*
1402 * Update the socket async list
1403 *
1404 * Fasync_list locking strategy.
1405 *
1406 * 1. fasync_list is modified only under process context socket lock
1407 * i.e. under semaphore.
1408 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1409 * or under socket lock
1da177e4
LT
1410 */
1411
1412static int sock_fasync(int fd, struct file *filp, int on)
1413{
989a2979
ED
1414 struct socket *sock = filp->private_data;
1415 struct sock *sk = sock->sk;
333f7909 1416 struct socket_wq *wq = &sock->wq;
1da177e4 1417
989a2979 1418 if (sk == NULL)
1da177e4 1419 return -EINVAL;
1da177e4
LT
1420
1421 lock_sock(sk);
eaefd110 1422 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1423
eaefd110 1424 if (!wq->fasync_list)
989a2979
ED
1425 sock_reset_flag(sk, SOCK_FASYNC);
1426 else
bcdce719 1427 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1428
989a2979 1429 release_sock(sk);
1da177e4
LT
1430 return 0;
1431}
1432
ceb5d58b 1433/* This function may be called only under rcu_lock */
1da177e4 1434
ceb5d58b 1435int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1436{
ceb5d58b 1437 if (!wq || !wq->fasync_list)
1da177e4 1438 return -1;
ceb5d58b 1439
89bddce5 1440 switch (how) {
8d8ad9d7 1441 case SOCK_WAKE_WAITD:
ceb5d58b 1442 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1443 break;
1444 goto call_kill;
8d8ad9d7 1445 case SOCK_WAKE_SPACE:
ceb5d58b 1446 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4 1447 break;
7c7ab580 1448 fallthrough;
8d8ad9d7 1449 case SOCK_WAKE_IO:
89bddce5 1450call_kill:
43815482 1451 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1452 break;
8d8ad9d7 1453 case SOCK_WAKE_URG:
43815482 1454 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1455 }
ceb5d58b 1456
1da177e4
LT
1457 return 0;
1458}
c6d409cf 1459EXPORT_SYMBOL(sock_wake_async);
1da177e4 1460
8a3c245c
PT
1461/**
1462 * __sock_create - creates a socket
1463 * @net: net namespace
1464 * @family: protocol family (AF_INET, ...)
1465 * @type: communication type (SOCK_STREAM, ...)
1466 * @protocol: protocol (0, ...)
1467 * @res: new socket
1468 * @kern: boolean for kernel space sockets
1469 *
1470 * Creates a new socket and assigns it to @res, passing through LSM.
1471 * Returns 0 or an error. On failure @res is set to %NULL. @kern must
1472 * be set to true if the socket resides in kernel space.
1473 * This function internally uses GFP_KERNEL.
1474 */
1475
721db93a 1476int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1477 struct socket **res, int kern)
1da177e4
LT
1478{
1479 int err;
1480 struct socket *sock;
55737fda 1481 const struct net_proto_family *pf;
1da177e4
LT
1482
1483 /*
89bddce5 1484 * Check protocol is in range
1da177e4
LT
1485 */
1486 if (family < 0 || family >= NPROTO)
1487 return -EAFNOSUPPORT;
1488 if (type < 0 || type >= SOCK_MAX)
1489 return -EINVAL;
1490
1491 /* Compatibility.
1492
1493 This uglymoron is moved from INET layer to here to avoid
1494 deadlock in module load.
1495 */
1496 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1497 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1498 current->comm);
1da177e4
LT
1499 family = PF_PACKET;
1500 }
1501
1502 err = security_socket_create(family, type, protocol, kern);
1503 if (err)
1504 return err;
89bddce5 1505
55737fda
SH
1506 /*
1507 * Allocate the socket and allow the family to set things up. if
1508 * the protocol is 0, the family is instructed to select an appropriate
1509 * default.
1510 */
1511 sock = sock_alloc();
1512 if (!sock) {
e87cc472 1513 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1514 return -ENFILE; /* Not exactly a match, but its the
1515 closest posix thing */
1516 }
1517
1518 sock->type = type;
1519
95a5afca 1520#ifdef CONFIG_MODULES
89bddce5
SH
1521 /* Attempt to load a protocol module if the find failed.
1522 *
1523 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1524 * requested real, full-featured networking support upon configuration.
1525 * Otherwise module support will break!
1526 */
190683a9 1527 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1528 request_module("net-pf-%d", family);
1da177e4
LT
1529#endif
1530
55737fda
SH
1531 rcu_read_lock();
1532 pf = rcu_dereference(net_families[family]);
1533 err = -EAFNOSUPPORT;
1534 if (!pf)
1535 goto out_release;
1da177e4
LT
1536
1537 /*
1538 * We will call the ->create function, that possibly is in a loadable
1539 * module, so we have to bump that loadable module refcnt first.
1540 */
55737fda 1541 if (!try_module_get(pf->owner))
1da177e4
LT
1542 goto out_release;
1543
55737fda
SH
1544 /* Now protected by module ref count */
1545 rcu_read_unlock();
1546
3f378b68 1547 err = pf->create(net, sock, protocol, kern);
55737fda 1548 if (err < 0)
1da177e4 1549 goto out_module_put;
a79af59e 1550
1da177e4
LT
1551 /*
1552 * Now to bump the refcnt of the [loadable] module that owns this
1553 * socket at sock_release time we decrement its refcnt.
1554 */
55737fda
SH
1555 if (!try_module_get(sock->ops->owner))
1556 goto out_module_busy;
1557
1da177e4
LT
1558 /*
1559 * Now that we're done with the ->create function, the [loadable]
1560 * module can have its refcnt decremented
1561 */
55737fda 1562 module_put(pf->owner);
7420ed23
VY
1563 err = security_socket_post_create(sock, family, type, protocol, kern);
1564 if (err)
3b185525 1565 goto out_sock_release;
55737fda 1566 *res = sock;
1da177e4 1567
55737fda
SH
1568 return 0;
1569
1570out_module_busy:
1571 err = -EAFNOSUPPORT;
1da177e4 1572out_module_put:
55737fda
SH
1573 sock->ops = NULL;
1574 module_put(pf->owner);
1575out_sock_release:
1da177e4 1576 sock_release(sock);
55737fda
SH
1577 return err;
1578
1579out_release:
1580 rcu_read_unlock();
1581 goto out_sock_release;
1da177e4 1582}
721db93a 1583EXPORT_SYMBOL(__sock_create);
1da177e4 1584
8a3c245c
PT
1585/**
1586 * sock_create - creates a socket
1587 * @family: protocol family (AF_INET, ...)
1588 * @type: communication type (SOCK_STREAM, ...)
1589 * @protocol: protocol (0, ...)
1590 * @res: new socket
1591 *
1592 * A wrapper around __sock_create().
1593 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1594 */
1595
1da177e4
LT
1596int sock_create(int family, int type, int protocol, struct socket **res)
1597{
1b8d7ae4 1598 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1599}
c6d409cf 1600EXPORT_SYMBOL(sock_create);
1da177e4 1601
8a3c245c
PT
1602/**
1603 * sock_create_kern - creates a socket (kernel space)
1604 * @net: net namespace
1605 * @family: protocol family (AF_INET, ...)
1606 * @type: communication type (SOCK_STREAM, ...)
1607 * @protocol: protocol (0, ...)
1608 * @res: new socket
1609 *
1610 * A wrapper around __sock_create().
1611 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1612 */
1613
eeb1bd5c 1614int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1615{
eeb1bd5c 1616 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1617}
c6d409cf 1618EXPORT_SYMBOL(sock_create_kern);
1da177e4 1619
da214a47 1620static struct socket *__sys_socket_create(int family, int type, int protocol)
1da177e4 1621{
1da177e4 1622 struct socket *sock;
da214a47 1623 int retval;
a677a039 1624
e38b36f3
UD
1625 /* Check the SOCK_* constants for consistency. */
1626 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1627 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1628 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1629 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1630
da214a47
JA
1631 if ((type & ~SOCK_TYPE_MASK) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1632 return ERR_PTR(-EINVAL);
a677a039 1633 type &= SOCK_TYPE_MASK;
1da177e4 1634
da214a47
JA
1635 retval = sock_create(family, type, protocol, &sock);
1636 if (retval < 0)
1637 return ERR_PTR(retval);
1638
1639 return sock;
1640}
1641
1642struct file *__sys_socket_file(int family, int type, int protocol)
1643{
1644 struct socket *sock;
da214a47
JA
1645 int flags;
1646
1647 sock = __sys_socket_create(family, type, protocol);
1648 if (IS_ERR(sock))
1649 return ERR_CAST(sock);
1650
1651 flags = type & ~SOCK_TYPE_MASK;
aaca0bdc
UD
1652 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1653 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1654
649c15c7 1655 return sock_alloc_file(sock, flags, NULL);
da214a47
JA
1656}
1657
1658int __sys_socket(int family, int type, int protocol)
1659{
1660 struct socket *sock;
1661 int flags;
1662
1663 sock = __sys_socket_create(family, type, protocol);
1664 if (IS_ERR(sock))
1665 return PTR_ERR(sock);
1666
1667 flags = type & ~SOCK_TYPE_MASK;
1668 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1669 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1da177e4 1670
8e1611e2 1671 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1672}
1673
9d6a15c3
DB
1674SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1675{
1676 return __sys_socket(family, type, protocol);
1677}
1678
1da177e4
LT
1679/*
1680 * Create a pair of connected sockets.
1681 */
1682
6debc8d8 1683int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1684{
1685 struct socket *sock1, *sock2;
1686 int fd1, fd2, err;
db349509 1687 struct file *newfile1, *newfile2;
a677a039
UD
1688 int flags;
1689
1690 flags = type & ~SOCK_TYPE_MASK;
77d27200 1691 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1692 return -EINVAL;
1693 type &= SOCK_TYPE_MASK;
1da177e4 1694
aaca0bdc
UD
1695 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1696 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1697
016a266b
AV
1698 /*
1699 * reserve descriptors and make sure we won't fail
1700 * to return them to userland.
1701 */
1702 fd1 = get_unused_fd_flags(flags);
1703 if (unlikely(fd1 < 0))
1704 return fd1;
1705
1706 fd2 = get_unused_fd_flags(flags);
1707 if (unlikely(fd2 < 0)) {
1708 put_unused_fd(fd1);
1709 return fd2;
1710 }
1711
1712 err = put_user(fd1, &usockvec[0]);
1713 if (err)
1714 goto out;
1715
1716 err = put_user(fd2, &usockvec[1]);
1717 if (err)
1718 goto out;
1719
1da177e4
LT
1720 /*
1721 * Obtain the first socket and check if the underlying protocol
1722 * supports the socketpair call.
1723 */
1724
1725 err = sock_create(family, type, protocol, &sock1);
016a266b 1726 if (unlikely(err < 0))
1da177e4
LT
1727 goto out;
1728
1729 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1730 if (unlikely(err < 0)) {
1731 sock_release(sock1);
1732 goto out;
bf3c23d1 1733 }
d73aa286 1734
d47cd945
DH
1735 err = security_socket_socketpair(sock1, sock2);
1736 if (unlikely(err)) {
1737 sock_release(sock2);
1738 sock_release(sock1);
1739 goto out;
1740 }
1741
016a266b
AV
1742 err = sock1->ops->socketpair(sock1, sock2);
1743 if (unlikely(err < 0)) {
1744 sock_release(sock2);
1745 sock_release(sock1);
1746 goto out;
28407630
AV
1747 }
1748
aab174f0 1749 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1750 if (IS_ERR(newfile1)) {
28407630 1751 err = PTR_ERR(newfile1);
016a266b
AV
1752 sock_release(sock2);
1753 goto out;
28407630
AV
1754 }
1755
aab174f0 1756 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1757 if (IS_ERR(newfile2)) {
1758 err = PTR_ERR(newfile2);
016a266b
AV
1759 fput(newfile1);
1760 goto out;
db349509
AV
1761 }
1762
157cf649 1763 audit_fd_pair(fd1, fd2);
d73aa286 1764
db349509
AV
1765 fd_install(fd1, newfile1);
1766 fd_install(fd2, newfile2);
d73aa286 1767 return 0;
1da177e4 1768
016a266b 1769out:
d73aa286 1770 put_unused_fd(fd2);
d73aa286 1771 put_unused_fd(fd1);
1da177e4
LT
1772 return err;
1773}
1774
6debc8d8
DB
1775SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1776 int __user *, usockvec)
1777{
1778 return __sys_socketpair(family, type, protocol, usockvec);
1779}
1780
1da177e4
LT
1781/*
1782 * Bind a name to a socket. Nothing much to do here since it's
1783 * the protocol's responsibility to handle the local address.
1784 *
1785 * We move the socket address to kernel space before we call
1786 * the protocol layer (having also checked the address is ok).
1787 */
1788
a87d35d8 1789int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1790{
1791 struct socket *sock;
230b1839 1792 struct sockaddr_storage address;
6cb153ca 1793 int err, fput_needed;
1da177e4 1794
89bddce5 1795 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1796 if (sock) {
43db362d 1797 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1798 if (!err) {
89bddce5 1799 err = security_socket_bind(sock,
230b1839 1800 (struct sockaddr *)&address,
89bddce5 1801 addrlen);
6cb153ca
BL
1802 if (!err)
1803 err = sock->ops->bind(sock,
89bddce5 1804 (struct sockaddr *)
230b1839 1805 &address, addrlen);
1da177e4 1806 }
6cb153ca 1807 fput_light(sock->file, fput_needed);
89bddce5 1808 }
1da177e4
LT
1809 return err;
1810}
1811
a87d35d8
DB
1812SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1813{
1814 return __sys_bind(fd, umyaddr, addrlen);
1815}
1816
1da177e4
LT
1817/*
1818 * Perform a listen. Basically, we allow the protocol to do anything
1819 * necessary for a listen, and if that works, we mark the socket as
1820 * ready for listening.
1821 */
1822
25e290ee 1823int __sys_listen(int fd, int backlog)
1da177e4
LT
1824{
1825 struct socket *sock;
6cb153ca 1826 int err, fput_needed;
b8e1f9b5 1827 int somaxconn;
89bddce5
SH
1828
1829 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1830 if (sock) {
3c9ba81d 1831 somaxconn = READ_ONCE(sock_net(sock->sk)->core.sysctl_somaxconn);
95c96174 1832 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1833 backlog = somaxconn;
1da177e4
LT
1834
1835 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1836 if (!err)
1837 err = sock->ops->listen(sock, backlog);
1da177e4 1838
6cb153ca 1839 fput_light(sock->file, fput_needed);
1da177e4
LT
1840 }
1841 return err;
1842}
1843
25e290ee
DB
1844SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1845{
1846 return __sys_listen(fd, backlog);
1847}
1848
d32f89da 1849struct file *do_accept(struct file *file, unsigned file_flags,
de2ea4b6 1850 struct sockaddr __user *upeer_sockaddr,
d32f89da 1851 int __user *upeer_addrlen, int flags)
1da177e4
LT
1852{
1853 struct socket *sock, *newsock;
39d8c1b6 1854 struct file *newfile;
d32f89da 1855 int err, len;
230b1839 1856 struct sockaddr_storage address;
1da177e4 1857
dba4a925 1858 sock = sock_from_file(file);
d32f89da
PB
1859 if (!sock)
1860 return ERR_PTR(-ENOTSOCK);
1da177e4 1861
c6d409cf
ED
1862 newsock = sock_alloc();
1863 if (!newsock)
d32f89da 1864 return ERR_PTR(-ENFILE);
1da177e4
LT
1865
1866 newsock->type = sock->type;
1867 newsock->ops = sock->ops;
1868
1da177e4
LT
1869 /*
1870 * We don't need try_module_get here, as the listening socket (sock)
1871 * has the protocol module (sock->ops->owner) held.
1872 */
1873 __module_get(newsock->ops->owner);
1874
aab174f0 1875 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
d32f89da
PB
1876 if (IS_ERR(newfile))
1877 return newfile;
39d8c1b6 1878
a79af59e
FF
1879 err = security_socket_accept(sock, newsock);
1880 if (err)
39d8c1b6 1881 goto out_fd;
a79af59e 1882
de2ea4b6
JA
1883 err = sock->ops->accept(sock, newsock, sock->file->f_flags | file_flags,
1884 false);
1da177e4 1885 if (err < 0)
39d8c1b6 1886 goto out_fd;
1da177e4
LT
1887
1888 if (upeer_sockaddr) {
9b2c45d4
DV
1889 len = newsock->ops->getname(newsock,
1890 (struct sockaddr *)&address, 2);
1891 if (len < 0) {
1da177e4 1892 err = -ECONNABORTED;
39d8c1b6 1893 goto out_fd;
1da177e4 1894 }
43db362d 1895 err = move_addr_to_user(&address,
230b1839 1896 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1897 if (err < 0)
39d8c1b6 1898 goto out_fd;
1da177e4
LT
1899 }
1900
1901 /* File flags are not inherited via accept() unlike another OSes. */
d32f89da 1902 return newfile;
39d8c1b6 1903out_fd:
9606a216 1904 fput(newfile);
d32f89da
PB
1905 return ERR_PTR(err);
1906}
1907
c0424532
YD
1908static int __sys_accept4_file(struct file *file, struct sockaddr __user *upeer_sockaddr,
1909 int __user *upeer_addrlen, int flags)
d32f89da
PB
1910{
1911 struct file *newfile;
1912 int newfd;
1913
1914 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1915 return -EINVAL;
1916
1917 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1918 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
de2ea4b6 1919
c0424532 1920 newfd = get_unused_fd_flags(flags);
d32f89da
PB
1921 if (unlikely(newfd < 0))
1922 return newfd;
1923
c0424532 1924 newfile = do_accept(file, 0, upeer_sockaddr, upeer_addrlen,
d32f89da
PB
1925 flags);
1926 if (IS_ERR(newfile)) {
1927 put_unused_fd(newfd);
1928 return PTR_ERR(newfile);
1929 }
1930 fd_install(newfd, newfile);
1931 return newfd;
de2ea4b6
JA
1932}
1933
1934/*
1935 * For accept, we attempt to create a new socket, set up the link
1936 * with the client, wake up the client, then return the new
1937 * connected fd. We collect the address of the connector in kernel
1938 * space and move it to user at the very end. This is unclean because
1939 * we open the socket then return an error.
1940 *
1941 * 1003.1g adds the ability to recvmsg() to query connection pending
1942 * status to recvmsg. We need to add that support in a way thats
1943 * clean when we restructure accept also.
1944 */
1945
1946int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1947 int __user *upeer_addrlen, int flags)
1948{
1949 int ret = -EBADF;
1950 struct fd f;
1951
1952 f = fdget(fd);
1953 if (f.file) {
c0424532
YD
1954 ret = __sys_accept4_file(f.file, upeer_sockaddr,
1955 upeer_addrlen, flags);
6b07edeb 1956 fdput(f);
de2ea4b6
JA
1957 }
1958
1959 return ret;
1da177e4
LT
1960}
1961
4541e805
DB
1962SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1963 int __user *, upeer_addrlen, int, flags)
1964{
1965 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1966}
1967
20f37034
HC
1968SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1969 int __user *, upeer_addrlen)
aaca0bdc 1970{
4541e805 1971 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1972}
1973
1da177e4
LT
1974/*
1975 * Attempt to connect to a socket with the server address. The address
1976 * is in user space so we verify it is OK and move it to kernel space.
1977 *
1978 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1979 * break bindings
1980 *
1981 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1982 * other SEQPACKET protocols that take time to connect() as it doesn't
1983 * include the -EINPROGRESS status for such sockets.
1984 */
1985
f499a021 1986int __sys_connect_file(struct file *file, struct sockaddr_storage *address,
bd3ded31 1987 int addrlen, int file_flags)
1da177e4
LT
1988{
1989 struct socket *sock;
bd3ded31 1990 int err;
1da177e4 1991
dba4a925
FR
1992 sock = sock_from_file(file);
1993 if (!sock) {
1994 err = -ENOTSOCK;
1da177e4 1995 goto out;
dba4a925 1996 }
1da177e4 1997
89bddce5 1998 err =
f499a021 1999 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4 2000 if (err)
bd3ded31 2001 goto out;
1da177e4 2002
f499a021 2003 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
bd3ded31 2004 sock->file->f_flags | file_flags);
1da177e4
LT
2005out:
2006 return err;
2007}
2008
bd3ded31
JA
2009int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
2010{
2011 int ret = -EBADF;
2012 struct fd f;
2013
2014 f = fdget(fd);
2015 if (f.file) {
f499a021
JA
2016 struct sockaddr_storage address;
2017
2018 ret = move_addr_to_kernel(uservaddr, addrlen, &address);
2019 if (!ret)
2020 ret = __sys_connect_file(f.file, &address, addrlen, 0);
6b07edeb 2021 fdput(f);
bd3ded31
JA
2022 }
2023
2024 return ret;
2025}
2026
1387c2c2
DB
2027SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
2028 int, addrlen)
2029{
2030 return __sys_connect(fd, uservaddr, addrlen);
2031}
2032
1da177e4
LT
2033/*
2034 * Get the local address ('name') of a socket object. Move the obtained
2035 * name to user space.
2036 */
2037
8882a107
DB
2038int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
2039 int __user *usockaddr_len)
1da177e4
LT
2040{
2041 struct socket *sock;
230b1839 2042 struct sockaddr_storage address;
9b2c45d4 2043 int err, fput_needed;
89bddce5 2044
6cb153ca 2045 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
2046 if (!sock)
2047 goto out;
2048
2049 err = security_socket_getsockname(sock);
2050 if (err)
2051 goto out_put;
2052
9b2c45d4
DV
2053 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
2054 if (err < 0)
1da177e4 2055 goto out_put;
e44ef1d4 2056 /* "err" is actually length in this case */
9b2c45d4 2057 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
2058
2059out_put:
6cb153ca 2060 fput_light(sock->file, fput_needed);
1da177e4
LT
2061out:
2062 return err;
2063}
2064
8882a107
DB
2065SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
2066 int __user *, usockaddr_len)
2067{
2068 return __sys_getsockname(fd, usockaddr, usockaddr_len);
2069}
2070
1da177e4
LT
2071/*
2072 * Get the remote address ('name') of a socket object. Move the obtained
2073 * name to user space.
2074 */
2075
b21c8f83
DB
2076int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
2077 int __user *usockaddr_len)
1da177e4
LT
2078{
2079 struct socket *sock;
230b1839 2080 struct sockaddr_storage address;
9b2c45d4 2081 int err, fput_needed;
1da177e4 2082
89bddce5
SH
2083 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2084 if (sock != NULL) {
1da177e4
LT
2085 err = security_socket_getpeername(sock);
2086 if (err) {
6cb153ca 2087 fput_light(sock->file, fput_needed);
1da177e4
LT
2088 return err;
2089 }
2090
9b2c45d4
DV
2091 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
2092 if (err >= 0)
2093 /* "err" is actually length in this case */
2094 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 2095 usockaddr_len);
6cb153ca 2096 fput_light(sock->file, fput_needed);
1da177e4
LT
2097 }
2098 return err;
2099}
2100
b21c8f83
DB
2101SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
2102 int __user *, usockaddr_len)
2103{
2104 return __sys_getpeername(fd, usockaddr, usockaddr_len);
2105}
2106
1da177e4
LT
2107/*
2108 * Send a datagram to a given address. We move the address into kernel
2109 * space and check the user space data area is readable before invoking
2110 * the protocol.
2111 */
211b634b
DB
2112int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
2113 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
2114{
2115 struct socket *sock;
230b1839 2116 struct sockaddr_storage address;
1da177e4
LT
2117 int err;
2118 struct msghdr msg;
2119 struct iovec iov;
6cb153ca 2120 int fput_needed;
6cb153ca 2121
de4eda9d 2122 err = import_single_range(ITER_SOURCE, buff, len, &iov, &msg.msg_iter);
602bd0e9
AV
2123 if (unlikely(err))
2124 return err;
de0fa95c
PE
2125 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2126 if (!sock)
4387ff75 2127 goto out;
6cb153ca 2128
89bddce5 2129 msg.msg_name = NULL;
89bddce5
SH
2130 msg.msg_control = NULL;
2131 msg.msg_controllen = 0;
2132 msg.msg_namelen = 0;
7c701d92 2133 msg.msg_ubuf = NULL;
6cb153ca 2134 if (addr) {
43db362d 2135 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
2136 if (err < 0)
2137 goto out_put;
230b1839 2138 msg.msg_name = (struct sockaddr *)&address;
89bddce5 2139 msg.msg_namelen = addr_len;
1da177e4
LT
2140 }
2141 if (sock->file->f_flags & O_NONBLOCK)
2142 flags |= MSG_DONTWAIT;
2143 msg.msg_flags = flags;
d8725c86 2144 err = sock_sendmsg(sock, &msg);
1da177e4 2145
89bddce5 2146out_put:
de0fa95c 2147 fput_light(sock->file, fput_needed);
4387ff75 2148out:
1da177e4
LT
2149 return err;
2150}
2151
211b634b
DB
2152SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
2153 unsigned int, flags, struct sockaddr __user *, addr,
2154 int, addr_len)
2155{
2156 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
2157}
2158
1da177e4 2159/*
89bddce5 2160 * Send a datagram down a socket.
1da177e4
LT
2161 */
2162
3e0fa65f 2163SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 2164 unsigned int, flags)
1da177e4 2165{
211b634b 2166 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
2167}
2168
2169/*
89bddce5 2170 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
2171 * sender. We verify the buffers are writable and if needed move the
2172 * sender address from kernel to user space.
2173 */
7a09e1eb
DB
2174int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
2175 struct sockaddr __user *addr, int __user *addr_len)
1da177e4 2176{
1228b34c
ED
2177 struct sockaddr_storage address;
2178 struct msghdr msg = {
2179 /* Save some cycles and don't copy the address if not needed */
2180 .msg_name = addr ? (struct sockaddr *)&address : NULL,
2181 };
1da177e4
LT
2182 struct socket *sock;
2183 struct iovec iov;
89bddce5 2184 int err, err2;
6cb153ca
BL
2185 int fput_needed;
2186
de4eda9d 2187 err = import_single_range(ITER_DEST, ubuf, size, &iov, &msg.msg_iter);
602bd0e9
AV
2188 if (unlikely(err))
2189 return err;
de0fa95c 2190 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 2191 if (!sock)
de0fa95c 2192 goto out;
1da177e4 2193
1da177e4
LT
2194 if (sock->file->f_flags & O_NONBLOCK)
2195 flags |= MSG_DONTWAIT;
2da62906 2196 err = sock_recvmsg(sock, &msg, flags);
1da177e4 2197
89bddce5 2198 if (err >= 0 && addr != NULL) {
43db362d 2199 err2 = move_addr_to_user(&address,
230b1839 2200 msg.msg_namelen, addr, addr_len);
89bddce5
SH
2201 if (err2 < 0)
2202 err = err2;
1da177e4 2203 }
de0fa95c
PE
2204
2205 fput_light(sock->file, fput_needed);
4387ff75 2206out:
1da177e4
LT
2207 return err;
2208}
2209
7a09e1eb
DB
2210SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2211 unsigned int, flags, struct sockaddr __user *, addr,
2212 int __user *, addr_len)
2213{
2214 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2215}
2216
1da177e4 2217/*
89bddce5 2218 * Receive a datagram from a socket.
1da177e4
LT
2219 */
2220
b7c0ddf5
JG
2221SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2222 unsigned int, flags)
1da177e4 2223{
7a09e1eb 2224 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
2225}
2226
83f0c10b
FW
2227static bool sock_use_custom_sol_socket(const struct socket *sock)
2228{
a5ef058d 2229 return test_bit(SOCK_CUSTOM_SOCKOPT, &sock->flags);
83f0c10b
FW
2230}
2231
1da177e4
LT
2232/*
2233 * Set a socket option. Because we don't know the option lengths we have
2234 * to pass the user mode parameter for the protocols to sort out.
2235 */
a7b75c5a 2236int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
55db9c0e 2237 int optlen)
1da177e4 2238{
519a8a6c 2239 sockptr_t optval = USER_SOCKPTR(user_optval);
0d01da6a 2240 char *kernel_optval = NULL;
6cb153ca 2241 int err, fput_needed;
1da177e4
LT
2242 struct socket *sock;
2243
2244 if (optlen < 0)
2245 return -EINVAL;
89bddce5
SH
2246
2247 sock = sockfd_lookup_light(fd, &err, &fput_needed);
4a367299
CH
2248 if (!sock)
2249 return err;
1da177e4 2250
4a367299
CH
2251 err = security_socket_setsockopt(sock, level, optname);
2252 if (err)
2253 goto out_put;
0d01da6a 2254
55db9c0e
CH
2255 if (!in_compat_syscall())
2256 err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname,
a7b75c5a 2257 user_optval, &optlen,
55db9c0e 2258 &kernel_optval);
4a367299
CH
2259 if (err < 0)
2260 goto out_put;
2261 if (err > 0) {
2262 err = 0;
2263 goto out_put;
2264 }
0d01da6a 2265
a7b75c5a
CH
2266 if (kernel_optval)
2267 optval = KERNEL_SOCKPTR(kernel_optval);
4a367299 2268 if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock))
a7b75c5a 2269 err = sock_setsockopt(sock, level, optname, optval, optlen);
a44d9e72
CH
2270 else if (unlikely(!sock->ops->setsockopt))
2271 err = -EOPNOTSUPP;
4a367299
CH
2272 else
2273 err = sock->ops->setsockopt(sock, level, optname, optval,
89bddce5 2274 optlen);
a7b75c5a 2275 kfree(kernel_optval);
4a367299
CH
2276out_put:
2277 fput_light(sock->file, fput_needed);
1da177e4
LT
2278 return err;
2279}
2280
cc36dca0
DB
2281SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2282 char __user *, optval, int, optlen)
2283{
2284 return __sys_setsockopt(fd, level, optname, optval, optlen);
2285}
2286
9cacf81f
SF
2287INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
2288 int optname));
2289
1da177e4
LT
2290/*
2291 * Get a socket option. Because we don't know the option lengths we have
2292 * to pass a user mode parameter for the protocols to sort out.
2293 */
55db9c0e
CH
2294int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
2295 int __user *optlen)
1da177e4 2296{
ad4bf5f2 2297 int max_optlen __maybe_unused;
6cb153ca 2298 int err, fput_needed;
1da177e4
LT
2299 struct socket *sock;
2300
89bddce5 2301 sock = sockfd_lookup_light(fd, &err, &fput_needed);
d8a9b38f
CH
2302 if (!sock)
2303 return err;
2304
2305 err = security_socket_getsockopt(sock, level, optname);
2306 if (err)
2307 goto out_put;
1da177e4 2308
55db9c0e
CH
2309 if (!in_compat_syscall())
2310 max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
0d01da6a 2311
d8a9b38f
CH
2312 if (level == SOL_SOCKET)
2313 err = sock_getsockopt(sock, level, optname, optval, optlen);
a44d9e72
CH
2314 else if (unlikely(!sock->ops->getsockopt))
2315 err = -EOPNOTSUPP;
d8a9b38f
CH
2316 else
2317 err = sock->ops->getsockopt(sock, level, optname, optval,
89bddce5 2318 optlen);
0d01da6a 2319
55db9c0e
CH
2320 if (!in_compat_syscall())
2321 err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
2322 optval, optlen, max_optlen,
2323 err);
6cb153ca 2324out_put:
d8a9b38f 2325 fput_light(sock->file, fput_needed);
1da177e4
LT
2326 return err;
2327}
2328
13a2d70e
DB
2329SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2330 char __user *, optval, int __user *, optlen)
2331{
2332 return __sys_getsockopt(fd, level, optname, optval, optlen);
2333}
2334
1da177e4
LT
2335/*
2336 * Shutdown a socket.
2337 */
2338
b713c195
JA
2339int __sys_shutdown_sock(struct socket *sock, int how)
2340{
2341 int err;
2342
2343 err = security_socket_shutdown(sock, how);
2344 if (!err)
2345 err = sock->ops->shutdown(sock, how);
2346
2347 return err;
2348}
2349
005a1aea 2350int __sys_shutdown(int fd, int how)
1da177e4 2351{
6cb153ca 2352 int err, fput_needed;
1da177e4
LT
2353 struct socket *sock;
2354
89bddce5
SH
2355 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2356 if (sock != NULL) {
b713c195 2357 err = __sys_shutdown_sock(sock, how);
6cb153ca 2358 fput_light(sock->file, fput_needed);
1da177e4
LT
2359 }
2360 return err;
2361}
2362
005a1aea
DB
2363SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2364{
2365 return __sys_shutdown(fd, how);
2366}
2367
89bddce5 2368/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2369 * fields which are the same type (int / unsigned) on our platforms.
2370 */
2371#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2372#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2373#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2374
c71d8ebe
TH
2375struct used_address {
2376 struct sockaddr_storage name;
2377 unsigned int name_len;
2378};
2379
7fa875b8
DY
2380int __copy_msghdr(struct msghdr *kmsg,
2381 struct user_msghdr *msg,
2382 struct sockaddr __user **save_addr)
1661bf36 2383{
08adb7da
AV
2384 ssize_t err;
2385
1f466e1f 2386 kmsg->msg_control_is_user = true;
1228b34c 2387 kmsg->msg_get_inq = 0;
7fa875b8
DY
2388 kmsg->msg_control_user = msg->msg_control;
2389 kmsg->msg_controllen = msg->msg_controllen;
2390 kmsg->msg_flags = msg->msg_flags;
ffb07550 2391
7fa875b8
DY
2392 kmsg->msg_namelen = msg->msg_namelen;
2393 if (!msg->msg_name)
6a2a2b3a
AS
2394 kmsg->msg_namelen = 0;
2395
dbb490b9
ML
2396 if (kmsg->msg_namelen < 0)
2397 return -EINVAL;
2398
1661bf36 2399 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2400 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2401
2402 if (save_addr)
7fa875b8 2403 *save_addr = msg->msg_name;
08adb7da 2404
7fa875b8 2405 if (msg->msg_name && kmsg->msg_namelen) {
08adb7da 2406 if (!save_addr) {
7fa875b8 2407 err = move_addr_to_kernel(msg->msg_name,
864d9664 2408 kmsg->msg_namelen,
08adb7da
AV
2409 kmsg->msg_name);
2410 if (err < 0)
2411 return err;
2412 }
2413 } else {
2414 kmsg->msg_name = NULL;
2415 kmsg->msg_namelen = 0;
2416 }
2417
7fa875b8 2418 if (msg->msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2419 return -EMSGSIZE;
2420
0345f931 2421 kmsg->msg_iocb = NULL;
7c701d92 2422 kmsg->msg_ubuf = NULL;
0a384abf
JA
2423 return 0;
2424}
2425
2426static int copy_msghdr_from_user(struct msghdr *kmsg,
2427 struct user_msghdr __user *umsg,
2428 struct sockaddr __user **save_addr,
2429 struct iovec **iov)
2430{
2431 struct user_msghdr msg;
2432 ssize_t err;
2433
7fa875b8
DY
2434 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
2435 return -EFAULT;
2436
2437 err = __copy_msghdr(kmsg, &msg, save_addr);
0a384abf
JA
2438 if (err)
2439 return err;
0345f931 2440
de4eda9d 2441 err = import_iovec(save_addr ? ITER_DEST : ITER_SOURCE,
ffb07550 2442 msg.msg_iov, msg.msg_iovlen,
da184284 2443 UIO_FASTIOV, iov, &kmsg->msg_iter);
87e5e6da 2444 return err < 0 ? err : 0;
1661bf36
DC
2445}
2446
4257c8ca
JA
2447static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
2448 unsigned int flags, struct used_address *used_address,
2449 unsigned int allowed_msghdr_flags)
1da177e4 2450{
b9d717a7 2451 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2452 __aligned(sizeof(__kernel_size_t));
89bddce5 2453 /* 20 is size of ipv6_pktinfo */
1da177e4 2454 unsigned char *ctl_buf = ctl;
d8725c86 2455 int ctl_len;
08adb7da 2456 ssize_t err;
89bddce5 2457
1da177e4
LT
2458 err = -ENOBUFS;
2459
228e548e 2460 if (msg_sys->msg_controllen > INT_MAX)
4257c8ca 2461 goto out;
28a94d8f 2462 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2463 ctl_len = msg_sys->msg_controllen;
1da177e4 2464 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2465 err =
228e548e 2466 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2467 sizeof(ctl));
1da177e4 2468 if (err)
4257c8ca 2469 goto out;
228e548e
AB
2470 ctl_buf = msg_sys->msg_control;
2471 ctl_len = msg_sys->msg_controllen;
1da177e4 2472 } else if (ctl_len) {
ac4340fc
DM
2473 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2474 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2475 if (ctl_len > sizeof(ctl)) {
1da177e4 2476 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2477 if (ctl_buf == NULL)
4257c8ca 2478 goto out;
1da177e4
LT
2479 }
2480 err = -EFAULT;
1f466e1f 2481 if (copy_from_user(ctl_buf, msg_sys->msg_control_user, ctl_len))
1da177e4 2482 goto out_freectl;
228e548e 2483 msg_sys->msg_control = ctl_buf;
1f466e1f 2484 msg_sys->msg_control_is_user = false;
1da177e4 2485 }
228e548e 2486 msg_sys->msg_flags = flags;
1da177e4
LT
2487
2488 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2489 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2490 /*
2491 * If this is sendmmsg() and current destination address is same as
2492 * previously succeeded address, omit asking LSM's decision.
2493 * used_address->name_len is initialized to UINT_MAX so that the first
2494 * destination address never matches.
2495 */
bc909d9d
MD
2496 if (used_address && msg_sys->msg_name &&
2497 used_address->name_len == msg_sys->msg_namelen &&
2498 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2499 used_address->name_len)) {
d8725c86 2500 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2501 goto out_freectl;
2502 }
d8725c86 2503 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2504 /*
2505 * If this is sendmmsg() and sending to current destination address was
2506 * successful, remember it.
2507 */
2508 if (used_address && err >= 0) {
2509 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2510 if (msg_sys->msg_name)
2511 memcpy(&used_address->name, msg_sys->msg_name,
2512 used_address->name_len);
c71d8ebe 2513 }
1da177e4
LT
2514
2515out_freectl:
89bddce5 2516 if (ctl_buf != ctl)
1da177e4 2517 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
4257c8ca
JA
2518out:
2519 return err;
2520}
2521
03b1230c
JA
2522int sendmsg_copy_msghdr(struct msghdr *msg,
2523 struct user_msghdr __user *umsg, unsigned flags,
2524 struct iovec **iov)
4257c8ca
JA
2525{
2526 int err;
2527
2528 if (flags & MSG_CMSG_COMPAT) {
2529 struct compat_msghdr __user *msg_compat;
2530
2531 msg_compat = (struct compat_msghdr __user *) umsg;
2532 err = get_compat_msghdr(msg, msg_compat, NULL, iov);
2533 } else {
2534 err = copy_msghdr_from_user(msg, umsg, NULL, iov);
2535 }
2536 if (err < 0)
2537 return err;
2538
2539 return 0;
2540}
2541
2542static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
2543 struct msghdr *msg_sys, unsigned int flags,
2544 struct used_address *used_address,
2545 unsigned int allowed_msghdr_flags)
2546{
2547 struct sockaddr_storage address;
2548 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2549 ssize_t err;
2550
2551 msg_sys->msg_name = &address;
2552
2553 err = sendmsg_copy_msghdr(msg_sys, msg, flags, &iov);
2554 if (err < 0)
2555 return err;
2556
2557 err = ____sys_sendmsg(sock, msg_sys, flags, used_address,
2558 allowed_msghdr_flags);
da184284 2559 kfree(iov);
228e548e
AB
2560 return err;
2561}
2562
2563/*
2564 * BSD sendmsg interface
2565 */
03b1230c 2566long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
0fa03c62
JA
2567 unsigned int flags)
2568{
03b1230c 2569 return ____sys_sendmsg(sock, msg, flags, NULL, 0);
0fa03c62 2570}
228e548e 2571
e1834a32
DB
2572long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2573 bool forbid_cmsg_compat)
228e548e
AB
2574{
2575 int fput_needed, err;
2576 struct msghdr msg_sys;
1be374a0
AL
2577 struct socket *sock;
2578
e1834a32
DB
2579 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2580 return -EINVAL;
2581
1be374a0 2582 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2583 if (!sock)
2584 goto out;
2585
28a94d8f 2586 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2587
6cb153ca 2588 fput_light(sock->file, fput_needed);
89bddce5 2589out:
1da177e4
LT
2590 return err;
2591}
2592
666547ff 2593SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2594{
e1834a32 2595 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2596}
2597
228e548e
AB
2598/*
2599 * Linux sendmmsg interface
2600 */
2601
2602int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2603 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2604{
2605 int fput_needed, err, datagrams;
2606 struct socket *sock;
2607 struct mmsghdr __user *entry;
2608 struct compat_mmsghdr __user *compat_entry;
2609 struct msghdr msg_sys;
c71d8ebe 2610 struct used_address used_address;
f092276d 2611 unsigned int oflags = flags;
228e548e 2612
e1834a32
DB
2613 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2614 return -EINVAL;
2615
98382f41
AB
2616 if (vlen > UIO_MAXIOV)
2617 vlen = UIO_MAXIOV;
228e548e
AB
2618
2619 datagrams = 0;
2620
2621 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2622 if (!sock)
2623 return err;
2624
c71d8ebe 2625 used_address.name_len = UINT_MAX;
228e548e
AB
2626 entry = mmsg;
2627 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2628 err = 0;
f092276d 2629 flags |= MSG_BATCH;
228e548e
AB
2630
2631 while (datagrams < vlen) {
f092276d
TH
2632 if (datagrams == vlen - 1)
2633 flags = oflags;
2634
228e548e 2635 if (MSG_CMSG_COMPAT & flags) {
666547ff 2636 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2637 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2638 if (err < 0)
2639 break;
2640 err = __put_user(err, &compat_entry->msg_len);
2641 ++compat_entry;
2642 } else {
a7526eb5 2643 err = ___sys_sendmsg(sock,
666547ff 2644 (struct user_msghdr __user *)entry,
28a94d8f 2645 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2646 if (err < 0)
2647 break;
2648 err = put_user(err, &entry->msg_len);
2649 ++entry;
2650 }
2651
2652 if (err)
2653 break;
2654 ++datagrams;
3023898b
SHY
2655 if (msg_data_left(&msg_sys))
2656 break;
a78cb84c 2657 cond_resched();
228e548e
AB
2658 }
2659
228e548e
AB
2660 fput_light(sock->file, fput_needed);
2661
728ffb86
AB
2662 /* We only return an error if no datagrams were able to be sent */
2663 if (datagrams != 0)
228e548e
AB
2664 return datagrams;
2665
228e548e
AB
2666 return err;
2667}
2668
2669SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2670 unsigned int, vlen, unsigned int, flags)
2671{
e1834a32 2672 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2673}
2674
03b1230c
JA
2675int recvmsg_copy_msghdr(struct msghdr *msg,
2676 struct user_msghdr __user *umsg, unsigned flags,
2677 struct sockaddr __user **uaddr,
2678 struct iovec **iov)
1da177e4 2679{
08adb7da 2680 ssize_t err;
1da177e4 2681
4257c8ca
JA
2682 if (MSG_CMSG_COMPAT & flags) {
2683 struct compat_msghdr __user *msg_compat;
1da177e4 2684
4257c8ca
JA
2685 msg_compat = (struct compat_msghdr __user *) umsg;
2686 err = get_compat_msghdr(msg, msg_compat, uaddr, iov);
2687 } else {
2688 err = copy_msghdr_from_user(msg, umsg, uaddr, iov);
2689 }
1da177e4 2690 if (err < 0)
da184284 2691 return err;
1da177e4 2692
4257c8ca
JA
2693 return 0;
2694}
2695
2696static int ____sys_recvmsg(struct socket *sock, struct msghdr *msg_sys,
2697 struct user_msghdr __user *msg,
2698 struct sockaddr __user *uaddr,
2699 unsigned int flags, int nosec)
2700{
2701 struct compat_msghdr __user *msg_compat =
2702 (struct compat_msghdr __user *) msg;
2703 int __user *uaddr_len = COMPAT_NAMELEN(msg);
2704 struct sockaddr_storage addr;
2705 unsigned long cmsg_ptr;
2706 int len;
2707 ssize_t err;
2708
2709 msg_sys->msg_name = &addr;
a2e27255
ACM
2710 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2711 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2712
f3d33426
HFS
2713 /* We assume all kernel code knows the size of sockaddr_storage */
2714 msg_sys->msg_namelen = 0;
2715
1da177e4
LT
2716 if (sock->file->f_flags & O_NONBLOCK)
2717 flags |= MSG_DONTWAIT;
1af66221
ED
2718
2719 if (unlikely(nosec))
2720 err = sock_recvmsg_nosec(sock, msg_sys, flags);
2721 else
2722 err = sock_recvmsg(sock, msg_sys, flags);
2723
1da177e4 2724 if (err < 0)
4257c8ca 2725 goto out;
1da177e4
LT
2726 len = err;
2727
2728 if (uaddr != NULL) {
43db362d 2729 err = move_addr_to_user(&addr,
a2e27255 2730 msg_sys->msg_namelen, uaddr,
89bddce5 2731 uaddr_len);
1da177e4 2732 if (err < 0)
4257c8ca 2733 goto out;
1da177e4 2734 }
a2e27255 2735 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2736 COMPAT_FLAGS(msg));
1da177e4 2737 if (err)
4257c8ca 2738 goto out;
1da177e4 2739 if (MSG_CMSG_COMPAT & flags)
a2e27255 2740 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2741 &msg_compat->msg_controllen);
2742 else
a2e27255 2743 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2744 &msg->msg_controllen);
2745 if (err)
4257c8ca 2746 goto out;
1da177e4 2747 err = len;
4257c8ca
JA
2748out:
2749 return err;
2750}
2751
2752static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
2753 struct msghdr *msg_sys, unsigned int flags, int nosec)
2754{
2755 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2756 /* user mode address pointers */
2757 struct sockaddr __user *uaddr;
2758 ssize_t err;
2759
2760 err = recvmsg_copy_msghdr(msg_sys, msg, flags, &uaddr, &iov);
2761 if (err < 0)
2762 return err;
1da177e4 2763
4257c8ca 2764 err = ____sys_recvmsg(sock, msg_sys, msg, uaddr, flags, nosec);
da184284 2765 kfree(iov);
a2e27255
ACM
2766 return err;
2767}
2768
2769/*
2770 * BSD recvmsg interface
2771 */
2772
03b1230c
JA
2773long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
2774 struct user_msghdr __user *umsg,
2775 struct sockaddr __user *uaddr, unsigned int flags)
aa1fa28f 2776{
03b1230c 2777 return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0);
aa1fa28f
JA
2778}
2779
e1834a32
DB
2780long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2781 bool forbid_cmsg_compat)
a2e27255
ACM
2782{
2783 int fput_needed, err;
2784 struct msghdr msg_sys;
1be374a0
AL
2785 struct socket *sock;
2786
e1834a32
DB
2787 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2788 return -EINVAL;
2789
1be374a0 2790 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2791 if (!sock)
2792 goto out;
2793
a7526eb5 2794 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2795
6cb153ca 2796 fput_light(sock->file, fput_needed);
1da177e4
LT
2797out:
2798 return err;
2799}
2800
666547ff 2801SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2802 unsigned int, flags)
2803{
e1834a32 2804 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2805}
2806
a2e27255
ACM
2807/*
2808 * Linux recvmmsg interface
2809 */
2810
e11d4284
AB
2811static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2812 unsigned int vlen, unsigned int flags,
2813 struct timespec64 *timeout)
a2e27255
ACM
2814{
2815 int fput_needed, err, datagrams;
2816 struct socket *sock;
2817 struct mmsghdr __user *entry;
d7256d0e 2818 struct compat_mmsghdr __user *compat_entry;
a2e27255 2819 struct msghdr msg_sys;
766b9f92
DD
2820 struct timespec64 end_time;
2821 struct timespec64 timeout64;
a2e27255
ACM
2822
2823 if (timeout &&
2824 poll_select_set_timeout(&end_time, timeout->tv_sec,
2825 timeout->tv_nsec))
2826 return -EINVAL;
2827
2828 datagrams = 0;
2829
2830 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2831 if (!sock)
2832 return err;
2833
7797dc41
SHY
2834 if (likely(!(flags & MSG_ERRQUEUE))) {
2835 err = sock_error(sock->sk);
2836 if (err) {
2837 datagrams = err;
2838 goto out_put;
2839 }
e623a9e9 2840 }
a2e27255
ACM
2841
2842 entry = mmsg;
d7256d0e 2843 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2844
2845 while (datagrams < vlen) {
2846 /*
2847 * No need to ask LSM for more than the first datagram.
2848 */
d7256d0e 2849 if (MSG_CMSG_COMPAT & flags) {
666547ff 2850 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2851 &msg_sys, flags & ~MSG_WAITFORONE,
2852 datagrams);
d7256d0e
JMG
2853 if (err < 0)
2854 break;
2855 err = __put_user(err, &compat_entry->msg_len);
2856 ++compat_entry;
2857 } else {
a7526eb5 2858 err = ___sys_recvmsg(sock,
666547ff 2859 (struct user_msghdr __user *)entry,
a7526eb5
AL
2860 &msg_sys, flags & ~MSG_WAITFORONE,
2861 datagrams);
d7256d0e
JMG
2862 if (err < 0)
2863 break;
2864 err = put_user(err, &entry->msg_len);
2865 ++entry;
2866 }
2867
a2e27255
ACM
2868 if (err)
2869 break;
a2e27255
ACM
2870 ++datagrams;
2871
71c5c159
BB
2872 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2873 if (flags & MSG_WAITFORONE)
2874 flags |= MSG_DONTWAIT;
2875
a2e27255 2876 if (timeout) {
766b9f92 2877 ktime_get_ts64(&timeout64);
c2e6c856 2878 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2879 if (timeout->tv_sec < 0) {
2880 timeout->tv_sec = timeout->tv_nsec = 0;
2881 break;
2882 }
2883
2884 /* Timeout, return less than vlen datagrams */
2885 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2886 break;
2887 }
2888
2889 /* Out of band data, return right away */
2890 if (msg_sys.msg_flags & MSG_OOB)
2891 break;
a78cb84c 2892 cond_resched();
a2e27255
ACM
2893 }
2894
a2e27255 2895 if (err == 0)
34b88a68
ACM
2896 goto out_put;
2897
2898 if (datagrams == 0) {
2899 datagrams = err;
2900 goto out_put;
2901 }
a2e27255 2902
34b88a68
ACM
2903 /*
2904 * We may return less entries than requested (vlen) if the
2905 * sock is non block and there aren't enough datagrams...
2906 */
2907 if (err != -EAGAIN) {
a2e27255 2908 /*
34b88a68
ACM
2909 * ... or if recvmsg returns an error after we
2910 * received some datagrams, where we record the
2911 * error to return on the next call or if the
2912 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2913 */
34b88a68 2914 sock->sk->sk_err = -err;
a2e27255 2915 }
34b88a68
ACM
2916out_put:
2917 fput_light(sock->file, fput_needed);
a2e27255 2918
34b88a68 2919 return datagrams;
a2e27255
ACM
2920}
2921
e11d4284
AB
2922int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2923 unsigned int vlen, unsigned int flags,
2924 struct __kernel_timespec __user *timeout,
2925 struct old_timespec32 __user *timeout32)
a2e27255
ACM
2926{
2927 int datagrams;
c2e6c856 2928 struct timespec64 timeout_sys;
a2e27255 2929
e11d4284
AB
2930 if (timeout && get_timespec64(&timeout_sys, timeout))
2931 return -EFAULT;
a2e27255 2932
e11d4284 2933 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2934 return -EFAULT;
2935
e11d4284
AB
2936 if (!timeout && !timeout32)
2937 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2938
2939 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 2940
e11d4284
AB
2941 if (datagrams <= 0)
2942 return datagrams;
2943
2944 if (timeout && put_timespec64(&timeout_sys, timeout))
2945 datagrams = -EFAULT;
2946
2947 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2948 datagrams = -EFAULT;
2949
2950 return datagrams;
2951}
2952
1255e269
DB
2953SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2954 unsigned int, vlen, unsigned int, flags,
c2e6c856 2955 struct __kernel_timespec __user *, timeout)
1255e269 2956{
e11d4284
AB
2957 if (flags & MSG_CMSG_COMPAT)
2958 return -EINVAL;
2959
2960 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
2961}
2962
2963#ifdef CONFIG_COMPAT_32BIT_TIME
2964SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2965 unsigned int, vlen, unsigned int, flags,
2966 struct old_timespec32 __user *, timeout)
2967{
2968 if (flags & MSG_CMSG_COMPAT)
2969 return -EINVAL;
2970
2971 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 2972}
e11d4284 2973#endif
1255e269 2974
a2e27255 2975#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2976/* Argument list sizes for sys_socketcall */
2977#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2978static const unsigned char nargs[21] = {
c6d409cf
ED
2979 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2980 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2981 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2982 AL(4), AL(5), AL(4)
89bddce5
SH
2983};
2984
1da177e4
LT
2985#undef AL
2986
2987/*
89bddce5 2988 * System call vectors.
1da177e4
LT
2989 *
2990 * Argument checking cleaned up. Saved 20% in size.
2991 * This function doesn't need to set the kernel lock because
89bddce5 2992 * it is set by the callees.
1da177e4
LT
2993 */
2994
3e0fa65f 2995SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2996{
2950fa9d 2997 unsigned long a[AUDITSC_ARGS];
89bddce5 2998 unsigned long a0, a1;
1da177e4 2999 int err;
47379052 3000 unsigned int len;
1da177e4 3001
228e548e 3002 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 3003 return -EINVAL;
c8e8cd57 3004 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 3005
47379052
AV
3006 len = nargs[call];
3007 if (len > sizeof(a))
3008 return -EINVAL;
3009
1da177e4 3010 /* copy_from_user should be SMP safe. */
47379052 3011 if (copy_from_user(a, args, len))
1da177e4 3012 return -EFAULT;
3ec3b2fb 3013
2950fa9d
CG
3014 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3015 if (err)
3016 return err;
3ec3b2fb 3017
89bddce5
SH
3018 a0 = a[0];
3019 a1 = a[1];
3020
3021 switch (call) {
3022 case SYS_SOCKET:
9d6a15c3 3023 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
3024 break;
3025 case SYS_BIND:
a87d35d8 3026 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
3027 break;
3028 case SYS_CONNECT:
1387c2c2 3029 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
3030 break;
3031 case SYS_LISTEN:
25e290ee 3032 err = __sys_listen(a0, a1);
89bddce5
SH
3033 break;
3034 case SYS_ACCEPT:
4541e805
DB
3035 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
3036 (int __user *)a[2], 0);
89bddce5
SH
3037 break;
3038 case SYS_GETSOCKNAME:
3039 err =
8882a107
DB
3040 __sys_getsockname(a0, (struct sockaddr __user *)a1,
3041 (int __user *)a[2]);
89bddce5
SH
3042 break;
3043 case SYS_GETPEERNAME:
3044 err =
b21c8f83
DB
3045 __sys_getpeername(a0, (struct sockaddr __user *)a1,
3046 (int __user *)a[2]);
89bddce5
SH
3047 break;
3048 case SYS_SOCKETPAIR:
6debc8d8 3049 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
3050 break;
3051 case SYS_SEND:
f3bf896b
DB
3052 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
3053 NULL, 0);
89bddce5
SH
3054 break;
3055 case SYS_SENDTO:
211b634b
DB
3056 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
3057 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
3058 break;
3059 case SYS_RECV:
d27e9afc
DB
3060 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
3061 NULL, NULL);
89bddce5
SH
3062 break;
3063 case SYS_RECVFROM:
7a09e1eb
DB
3064 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
3065 (struct sockaddr __user *)a[4],
3066 (int __user *)a[5]);
89bddce5
SH
3067 break;
3068 case SYS_SHUTDOWN:
005a1aea 3069 err = __sys_shutdown(a0, a1);
89bddce5
SH
3070 break;
3071 case SYS_SETSOCKOPT:
cc36dca0
DB
3072 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
3073 a[4]);
89bddce5
SH
3074 break;
3075 case SYS_GETSOCKOPT:
3076 err =
13a2d70e
DB
3077 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
3078 (int __user *)a[4]);
89bddce5
SH
3079 break;
3080 case SYS_SENDMSG:
e1834a32
DB
3081 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
3082 a[2], true);
89bddce5 3083 break;
228e548e 3084 case SYS_SENDMMSG:
e1834a32
DB
3085 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
3086 a[3], true);
228e548e 3087 break;
89bddce5 3088 case SYS_RECVMSG:
e1834a32
DB
3089 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
3090 a[2], true);
89bddce5 3091 break;
a2e27255 3092 case SYS_RECVMMSG:
3ca47e95 3093 if (IS_ENABLED(CONFIG_64BIT))
e11d4284
AB
3094 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
3095 a[2], a[3],
3096 (struct __kernel_timespec __user *)a[4],
3097 NULL);
3098 else
3099 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
3100 a[2], a[3], NULL,
3101 (struct old_timespec32 __user *)a[4]);
a2e27255 3102 break;
de11defe 3103 case SYS_ACCEPT4:
4541e805
DB
3104 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
3105 (int __user *)a[2], a[3]);
aaca0bdc 3106 break;
89bddce5
SH
3107 default:
3108 err = -EINVAL;
3109 break;
1da177e4
LT
3110 }
3111 return err;
3112}
3113
89bddce5 3114#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 3115
55737fda
SH
3116/**
3117 * sock_register - add a socket protocol handler
3118 * @ops: description of protocol
3119 *
1da177e4
LT
3120 * This function is called by a protocol handler that wants to
3121 * advertise its address family, and have it linked into the
e793c0f7 3122 * socket interface. The value ops->family corresponds to the
55737fda 3123 * socket system call protocol family.
1da177e4 3124 */
f0fd27d4 3125int sock_register(const struct net_proto_family *ops)
1da177e4
LT
3126{
3127 int err;
3128
3129 if (ops->family >= NPROTO) {
3410f22e 3130 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
3131 return -ENOBUFS;
3132 }
55737fda
SH
3133
3134 spin_lock(&net_family_lock);
190683a9
ED
3135 if (rcu_dereference_protected(net_families[ops->family],
3136 lockdep_is_held(&net_family_lock)))
55737fda
SH
3137 err = -EEXIST;
3138 else {
cf778b00 3139 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
3140 err = 0;
3141 }
55737fda
SH
3142 spin_unlock(&net_family_lock);
3143
fe0bdbde 3144 pr_info("NET: Registered %s protocol family\n", pf_family_names[ops->family]);
1da177e4
LT
3145 return err;
3146}
c6d409cf 3147EXPORT_SYMBOL(sock_register);
1da177e4 3148
55737fda
SH
3149/**
3150 * sock_unregister - remove a protocol handler
3151 * @family: protocol family to remove
3152 *
1da177e4
LT
3153 * This function is called by a protocol handler that wants to
3154 * remove its address family, and have it unlinked from the
55737fda
SH
3155 * new socket creation.
3156 *
3157 * If protocol handler is a module, then it can use module reference
3158 * counts to protect against new references. If protocol handler is not
3159 * a module then it needs to provide its own protection in
3160 * the ops->create routine.
1da177e4 3161 */
f0fd27d4 3162void sock_unregister(int family)
1da177e4 3163{
f0fd27d4 3164 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 3165
55737fda 3166 spin_lock(&net_family_lock);
a9b3cd7f 3167 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
3168 spin_unlock(&net_family_lock);
3169
3170 synchronize_rcu();
3171
fe0bdbde 3172 pr_info("NET: Unregistered %s protocol family\n", pf_family_names[family]);
1da177e4 3173}
c6d409cf 3174EXPORT_SYMBOL(sock_unregister);
1da177e4 3175
bf2ae2e4
XL
3176bool sock_is_registered(int family)
3177{
66b51b0a 3178 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
3179}
3180
77d76ea3 3181static int __init sock_init(void)
1da177e4 3182{
b3e19d92 3183 int err;
2ca794e5
EB
3184 /*
3185 * Initialize the network sysctl infrastructure.
3186 */
3187 err = net_sysctl_init();
3188 if (err)
3189 goto out;
b3e19d92 3190
1da177e4 3191 /*
89bddce5 3192 * Initialize skbuff SLAB cache
1da177e4
LT
3193 */
3194 skb_init();
1da177e4
LT
3195
3196 /*
89bddce5 3197 * Initialize the protocols module.
1da177e4
LT
3198 */
3199
3200 init_inodecache();
b3e19d92
NP
3201
3202 err = register_filesystem(&sock_fs_type);
3203 if (err)
47260ba9 3204 goto out;
1da177e4 3205 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
3206 if (IS_ERR(sock_mnt)) {
3207 err = PTR_ERR(sock_mnt);
3208 goto out_mount;
3209 }
77d76ea3
AK
3210
3211 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
3212 */
3213
3214#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
3215 err = netfilter_init();
3216 if (err)
3217 goto out;
1da177e4 3218#endif
cbeb321a 3219
408eccce 3220 ptp_classifier_init();
c1f19b51 3221
b3e19d92
NP
3222out:
3223 return err;
3224
3225out_mount:
3226 unregister_filesystem(&sock_fs_type);
b3e19d92 3227 goto out;
1da177e4
LT
3228}
3229
77d76ea3
AK
3230core_initcall(sock_init); /* early initcall */
3231
1da177e4
LT
3232#ifdef CONFIG_PROC_FS
3233void socket_seq_show(struct seq_file *seq)
3234{
648845ab
TZ
3235 seq_printf(seq, "sockets: used %d\n",
3236 sock_inuse_get(seq->private));
1da177e4 3237}
89bddce5 3238#endif /* CONFIG_PROC_FS */
1da177e4 3239
29c49648
AB
3240/* Handle the fact that while struct ifreq has the same *layout* on
3241 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3242 * which are handled elsewhere, it still has different *size* due to
3243 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3244 * resulting in struct ifreq being 32 and 40 bytes respectively).
3245 * As a result, if the struct happens to be at the end of a page and
3246 * the next page isn't readable/writable, we get a fault. To prevent
3247 * that, copy back and forth to the full size.
3248 */
3249int get_user_ifreq(struct ifreq *ifr, void __user **ifrdata, void __user *arg)
7a229387 3250{
29c49648
AB
3251 if (in_compat_syscall()) {
3252 struct compat_ifreq *ifr32 = (struct compat_ifreq *)ifr;
7a229387 3253
29c49648
AB
3254 memset(ifr, 0, sizeof(*ifr));
3255 if (copy_from_user(ifr32, arg, sizeof(*ifr32)))
3256 return -EFAULT;
7a229387 3257
29c49648
AB
3258 if (ifrdata)
3259 *ifrdata = compat_ptr(ifr32->ifr_data);
7a229387 3260
29c49648
AB
3261 return 0;
3262 }
7a229387 3263
29c49648 3264 if (copy_from_user(ifr, arg, sizeof(*ifr)))
7a229387
AB
3265 return -EFAULT;
3266
29c49648
AB
3267 if (ifrdata)
3268 *ifrdata = ifr->ifr_data;
3269
7a229387
AB
3270 return 0;
3271}
29c49648 3272EXPORT_SYMBOL(get_user_ifreq);
7a229387 3273
29c49648 3274int put_user_ifreq(struct ifreq *ifr, void __user *arg)
7a229387 3275{
29c49648 3276 size_t size = sizeof(*ifr);
7a229387 3277
29c49648
AB
3278 if (in_compat_syscall())
3279 size = sizeof(struct compat_ifreq);
7a229387 3280
29c49648 3281 if (copy_to_user(arg, ifr, size))
7a229387
AB
3282 return -EFAULT;
3283
3a7da39d 3284 return 0;
7a229387 3285}
29c49648 3286EXPORT_SYMBOL(put_user_ifreq);
7a229387 3287
89bbfc95 3288#ifdef CONFIG_COMPAT
7a50a240
AB
3289static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3290{
7a50a240 3291 compat_uptr_t uptr32;
44c02a2c
AV
3292 struct ifreq ifr;
3293 void __user *saved;
3294 int err;
7a50a240 3295
29c49648 3296 if (get_user_ifreq(&ifr, NULL, uifr32))
7a50a240
AB
3297 return -EFAULT;
3298
3299 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3300 return -EFAULT;
3301
44c02a2c
AV
3302 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3303 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 3304
a554bf96 3305 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL, NULL);
44c02a2c
AV
3306 if (!err) {
3307 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
29c49648 3308 if (put_user_ifreq(&ifr, uifr32))
44c02a2c 3309 err = -EFAULT;
ccbd6a5a 3310 }
44c02a2c 3311 return err;
7a229387
AB
3312}
3313
590d4693
BH
3314/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3315static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3316 struct compat_ifreq __user *u_ifreq32)
7a229387 3317{
44c02a2c 3318 struct ifreq ifreq;
a554bf96 3319 void __user *data;
7a229387 3320
d0efb162
PC
3321 if (!is_socket_ioctl_cmd(cmd))
3322 return -ENOTTY;
a554bf96 3323 if (get_user_ifreq(&ifreq, &data, u_ifreq32))
7a229387 3324 return -EFAULT;
a554bf96 3325 ifreq.ifr_data = data;
7a229387 3326
a554bf96 3327 return dev_ioctl(net, cmd, &ifreq, data, NULL);
a2116ed2
AB
3328}
3329
6b96018b
AB
3330static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3331 unsigned int cmd, unsigned long arg)
3332{
3333 void __user *argp = compat_ptr(arg);
3334 struct sock *sk = sock->sk;
3335 struct net *net = sock_net(sk);
7a229387 3336
6b96018b 3337 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
88fc023f 3338 return sock_ioctl(file, cmd, (unsigned long)argp);
6b96018b
AB
3339
3340 switch (cmd) {
7a50a240
AB
3341 case SIOCWANDEV:
3342 return compat_siocwandev(net, argp);
0768e170
AB
3343 case SIOCGSTAMP_OLD:
3344 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
3345 if (!sock->ops->gettstamp)
3346 return -ENOIOCTLCMD;
0768e170 3347 return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
c7cbdbf2
AB
3348 !COMPAT_USE_64BIT_TIME);
3349
dd98d289 3350 case SIOCETHTOOL:
590d4693
BH
3351 case SIOCBONDSLAVEINFOQUERY:
3352 case SIOCBONDINFOQUERY:
a2116ed2 3353 case SIOCSHWTSTAMP:
fd468c74 3354 case SIOCGHWTSTAMP:
590d4693 3355 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3356
3357 case FIOSETOWN:
3358 case SIOCSPGRP:
3359 case FIOGETOWN:
3360 case SIOCGPGRP:
3361 case SIOCBRADDBR:
3362 case SIOCBRDELBR:
3363 case SIOCGIFVLAN:
3364 case SIOCSIFVLAN:
c62cce2c 3365 case SIOCGSKNS:
0768e170
AB
3366 case SIOCGSTAMP_NEW:
3367 case SIOCGSTAMPNS_NEW:
876f0bf9 3368 case SIOCGIFCONF:
fd3a4590
RP
3369 case SIOCSIFBR:
3370 case SIOCGIFBR:
6b96018b
AB
3371 return sock_ioctl(file, cmd, arg);
3372
3373 case SIOCGIFFLAGS:
3374 case SIOCSIFFLAGS:
709566d7
AB
3375 case SIOCGIFMAP:
3376 case SIOCSIFMAP:
6b96018b
AB
3377 case SIOCGIFMETRIC:
3378 case SIOCSIFMETRIC:
3379 case SIOCGIFMTU:
3380 case SIOCSIFMTU:
3381 case SIOCGIFMEM:
3382 case SIOCSIFMEM:
3383 case SIOCGIFHWADDR:
3384 case SIOCSIFHWADDR:
3385 case SIOCADDMULTI:
3386 case SIOCDELMULTI:
3387 case SIOCGIFINDEX:
6b96018b
AB
3388 case SIOCGIFADDR:
3389 case SIOCSIFADDR:
3390 case SIOCSIFHWBROADCAST:
6b96018b 3391 case SIOCDIFADDR:
6b96018b
AB
3392 case SIOCGIFBRDADDR:
3393 case SIOCSIFBRDADDR:
3394 case SIOCGIFDSTADDR:
3395 case SIOCSIFDSTADDR:
3396 case SIOCGIFNETMASK:
3397 case SIOCSIFNETMASK:
3398 case SIOCSIFPFLAGS:
3399 case SIOCGIFPFLAGS:
3400 case SIOCGIFTXQLEN:
3401 case SIOCSIFTXQLEN:
3402 case SIOCBRADDIF:
3403 case SIOCBRDELIF:
c6c9fee3 3404 case SIOCGIFNAME:
9177efd3
AB
3405 case SIOCSIFNAME:
3406 case SIOCGMIIPHY:
3407 case SIOCGMIIREG:
3408 case SIOCSMIIREG:
f92d4fc9
AV
3409 case SIOCBONDENSLAVE:
3410 case SIOCBONDRELEASE:
3411 case SIOCBONDSETHWADDR:
3412 case SIOCBONDCHANGEACTIVE:
6b96018b
AB
3413 case SIOCSARP:
3414 case SIOCGARP:
3415 case SIOCDARP:
c7dc504e 3416 case SIOCOUTQ:
9d7bf41f 3417 case SIOCOUTQNSD:
6b96018b 3418 case SIOCATMARK:
63ff03ab 3419 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3420 }
3421
6b96018b
AB
3422 return -ENOIOCTLCMD;
3423}
7a229387 3424
95c96174 3425static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3426 unsigned long arg)
89bbfc95
SP
3427{
3428 struct socket *sock = file->private_data;
3429 int ret = -ENOIOCTLCMD;
87de87d5
DM
3430 struct sock *sk;
3431 struct net *net;
3432
3433 sk = sock->sk;
3434 net = sock_net(sk);
89bbfc95
SP
3435
3436 if (sock->ops->compat_ioctl)
3437 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3438
87de87d5
DM
3439 if (ret == -ENOIOCTLCMD &&
3440 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3441 ret = compat_wext_handle_ioctl(net, cmd, arg);
3442
6b96018b
AB
3443 if (ret == -ENOIOCTLCMD)
3444 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3445
89bbfc95
SP
3446 return ret;
3447}
3448#endif
3449
8a3c245c
PT
3450/**
3451 * kernel_bind - bind an address to a socket (kernel space)
3452 * @sock: socket
3453 * @addr: address
3454 * @addrlen: length of address
3455 *
3456 * Returns 0 or an error.
3457 */
3458
ac5a488e
SS
3459int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3460{
3461 return sock->ops->bind(sock, addr, addrlen);
3462}
c6d409cf 3463EXPORT_SYMBOL(kernel_bind);
ac5a488e 3464
8a3c245c
PT
3465/**
3466 * kernel_listen - move socket to listening state (kernel space)
3467 * @sock: socket
3468 * @backlog: pending connections queue size
3469 *
3470 * Returns 0 or an error.
3471 */
3472
ac5a488e
SS
3473int kernel_listen(struct socket *sock, int backlog)
3474{
3475 return sock->ops->listen(sock, backlog);
3476}
c6d409cf 3477EXPORT_SYMBOL(kernel_listen);
ac5a488e 3478
8a3c245c
PT
3479/**
3480 * kernel_accept - accept a connection (kernel space)
3481 * @sock: listening socket
3482 * @newsock: new connected socket
3483 * @flags: flags
3484 *
3485 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3486 * If it fails, @newsock is guaranteed to be %NULL.
3487 * Returns 0 or an error.
3488 */
3489
ac5a488e
SS
3490int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3491{
3492 struct sock *sk = sock->sk;
3493 int err;
3494
3495 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3496 newsock);
3497 if (err < 0)
3498 goto done;
3499
cdfbabfb 3500 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3501 if (err < 0) {
3502 sock_release(*newsock);
fa8705b0 3503 *newsock = NULL;
ac5a488e
SS
3504 goto done;
3505 }
3506
3507 (*newsock)->ops = sock->ops;
1b08534e 3508 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3509
3510done:
3511 return err;
3512}
c6d409cf 3513EXPORT_SYMBOL(kernel_accept);
ac5a488e 3514
8a3c245c
PT
3515/**
3516 * kernel_connect - connect a socket (kernel space)
3517 * @sock: socket
3518 * @addr: address
3519 * @addrlen: address length
3520 * @flags: flags (O_NONBLOCK, ...)
3521 *
f1dcffcc 3522 * For datagram sockets, @addr is the address to which datagrams are sent
8a3c245c
PT
3523 * by default, and the only address from which datagrams are received.
3524 * For stream sockets, attempts to connect to @addr.
3525 * Returns 0 or an error code.
3526 */
3527
ac5a488e 3528int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3529 int flags)
ac5a488e
SS
3530{
3531 return sock->ops->connect(sock, addr, addrlen, flags);
3532}
c6d409cf 3533EXPORT_SYMBOL(kernel_connect);
ac5a488e 3534
8a3c245c
PT
3535/**
3536 * kernel_getsockname - get the address which the socket is bound (kernel space)
3537 * @sock: socket
3538 * @addr: address holder
3539 *
3540 * Fills the @addr pointer with the address which the socket is bound.
0fc95dec 3541 * Returns the length of the address in bytes or an error code.
8a3c245c
PT
3542 */
3543
9b2c45d4 3544int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3545{
9b2c45d4 3546 return sock->ops->getname(sock, addr, 0);
ac5a488e 3547}
c6d409cf 3548EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3549
8a3c245c 3550/**
645f0897 3551 * kernel_getpeername - get the address which the socket is connected (kernel space)
8a3c245c
PT
3552 * @sock: socket
3553 * @addr: address holder
3554 *
3555 * Fills the @addr pointer with the address which the socket is connected.
0fc95dec 3556 * Returns the length of the address in bytes or an error code.
8a3c245c
PT
3557 */
3558
9b2c45d4 3559int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3560{
9b2c45d4 3561 return sock->ops->getname(sock, addr, 1);
ac5a488e 3562}
c6d409cf 3563EXPORT_SYMBOL(kernel_getpeername);
ac5a488e 3564
8a3c245c
PT
3565/**
3566 * kernel_sendpage - send a &page through a socket (kernel space)
3567 * @sock: socket
3568 * @page: page
3569 * @offset: page offset
3570 * @size: total size in bytes
3571 * @flags: flags (MSG_DONTWAIT, ...)
3572 *
3573 * Returns the total amount sent in bytes or an error.
3574 */
3575
ac5a488e
SS
3576int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3577 size_t size, int flags)
3578{
7b62d31d
CL
3579 if (sock->ops->sendpage) {
3580 /* Warn in case the improper page to zero-copy send */
3581 WARN_ONCE(!sendpage_ok(page), "improper page for zero-copy send");
ac5a488e 3582 return sock->ops->sendpage(sock, page, offset, size, flags);
7b62d31d 3583 }
ac5a488e
SS
3584 return sock_no_sendpage(sock, page, offset, size, flags);
3585}
c6d409cf 3586EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3587
8a3c245c
PT
3588/**
3589 * kernel_sendpage_locked - send a &page through the locked sock (kernel space)
3590 * @sk: sock
3591 * @page: page
3592 * @offset: page offset
3593 * @size: total size in bytes
3594 * @flags: flags (MSG_DONTWAIT, ...)
3595 *
3596 * Returns the total amount sent in bytes or an error.
3597 * Caller must hold @sk.
3598 */
3599
306b13eb
TH
3600int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3601 size_t size, int flags)
3602{
3603 struct socket *sock = sk->sk_socket;
3604
3605 if (sock->ops->sendpage_locked)
3606 return sock->ops->sendpage_locked(sk, page, offset, size,
3607 flags);
3608
3609 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3610}
3611EXPORT_SYMBOL(kernel_sendpage_locked);
3612
8a3c245c 3613/**
645f0897 3614 * kernel_sock_shutdown - shut down part of a full-duplex connection (kernel space)
8a3c245c
PT
3615 * @sock: socket
3616 * @how: connection part
3617 *
3618 * Returns 0 or an error.
3619 */
3620
91cf45f0
TM
3621int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3622{
3623 return sock->ops->shutdown(sock, how);
3624}
91cf45f0 3625EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075 3626
8a3c245c
PT
3627/**
3628 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3629 * @sk: socket
3630 *
3631 * This routine returns the IP overhead imposed by a socket i.e.
3632 * the length of the underlying IP header, depending on whether
3633 * this is an IPv4 or IPv6 socket and the length from IP options turned
3634 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075 3635 */
8a3c245c 3636
113c3075
P
3637u32 kernel_sock_ip_overhead(struct sock *sk)
3638{
3639 struct inet_sock *inet;
3640 struct ip_options_rcu *opt;
3641 u32 overhead = 0;
113c3075
P
3642#if IS_ENABLED(CONFIG_IPV6)
3643 struct ipv6_pinfo *np;
3644 struct ipv6_txoptions *optv6 = NULL;
3645#endif /* IS_ENABLED(CONFIG_IPV6) */
3646
3647 if (!sk)
3648 return overhead;
3649
113c3075
P
3650 switch (sk->sk_family) {
3651 case AF_INET:
3652 inet = inet_sk(sk);
3653 overhead += sizeof(struct iphdr);
3654 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3655 sock_owned_by_user(sk));
113c3075
P
3656 if (opt)
3657 overhead += opt->opt.optlen;
3658 return overhead;
3659#if IS_ENABLED(CONFIG_IPV6)
3660 case AF_INET6:
3661 np = inet6_sk(sk);
3662 overhead += sizeof(struct ipv6hdr);
3663 if (np)
3664 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3665 sock_owned_by_user(sk));
113c3075
P
3666 if (optv6)
3667 overhead += (optv6->opt_flen + optv6->opt_nflen);
3668 return overhead;
3669#endif /* IS_ENABLED(CONFIG_IPV6) */
3670 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3671 return overhead;
3672 }
3673}
3674EXPORT_SYMBOL(kernel_sock_ip_overhead);