ublk: add UBLK_CMD_DEL_DEV_ASYNC
[linux-block.git] / net / socket.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * NET An implementation of the SOCKET network access protocol.
4 *
5 * Version: @(#)socket.c 1.1.93 18/02/95
6 *
7 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 8 * Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 *
11 * Fixes:
12 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
13 * shutdown()
14 * Alan Cox : verify_area() fixes
15 * Alan Cox : Removed DDI
16 * Jonathan Kamens : SOCK_DGRAM reconnect bug
17 * Alan Cox : Moved a load of checks to the very
18 * top level.
19 * Alan Cox : Move address structures to/from user
20 * mode above the protocol layers.
21 * Rob Janssen : Allow 0 length sends.
22 * Alan Cox : Asynchronous I/O support (cribbed from the
23 * tty drivers).
24 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
25 * Jeff Uphoff : Made max number of sockets command-line
26 * configurable.
27 * Matti Aarnio : Made the number of sockets dynamic,
28 * to be allocated when needed, and mr.
29 * Uphoff's max is used as max to be
30 * allowed to allocate.
31 * Linus : Argh. removed all the socket allocation
32 * altogether: it's in the inode now.
33 * Alan Cox : Made sock_alloc()/sock_release() public
34 * for NetROM and future kernel nfsd type
35 * stuff.
36 * Alan Cox : sendmsg/recvmsg basics.
37 * Tom Dyas : Export net symbols.
38 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
39 * Alan Cox : Added thread locking to sys_* calls
40 * for sockets. May have errors at the
41 * moment.
42 * Kevin Buhr : Fixed the dumb errors in the above.
43 * Andi Kleen : Some small cleanups, optimizations,
44 * and fixed a copy_from_user() bug.
45 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 46 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
47 * protocol-independent
48 *
1da177e4 49 * This module is effectively the top level interface to the BSD socket
89bddce5 50 * paradigm.
1da177e4
LT
51 *
52 * Based upon Swansea University Computer Society NET3.039
53 */
54
aef2feda 55#include <linux/bpf-cgroup.h>
cc69837f 56#include <linux/ethtool.h>
1da177e4 57#include <linux/mm.h>
1da177e4
LT
58#include <linux/socket.h>
59#include <linux/file.h>
2dc334f1 60#include <linux/splice.h>
1da177e4
LT
61#include <linux/net.h>
62#include <linux/interrupt.h>
aaca0bdc 63#include <linux/thread_info.h>
55737fda 64#include <linux/rcupdate.h>
1da177e4
LT
65#include <linux/netdevice.h>
66#include <linux/proc_fs.h>
67#include <linux/seq_file.h>
4a3e2f71 68#include <linux/mutex.h>
1da177e4 69#include <linux/if_bridge.h>
20380731 70#include <linux/if_vlan.h>
408eccce 71#include <linux/ptp_classify.h>
1da177e4
LT
72#include <linux/init.h>
73#include <linux/poll.h>
74#include <linux/cache.h>
75#include <linux/module.h>
76#include <linux/highmem.h>
1da177e4 77#include <linux/mount.h>
fba9be49 78#include <linux/pseudo_fs.h>
1da177e4
LT
79#include <linux/security.h>
80#include <linux/syscalls.h>
81#include <linux/compat.h>
82#include <linux/kmod.h>
3ec3b2fb 83#include <linux/audit.h>
d86b5e0e 84#include <linux/wireless.h>
1b8d7ae4 85#include <linux/nsproxy.h>
1fd7317d 86#include <linux/magic.h>
5a0e3ad6 87#include <linux/slab.h>
600e1779 88#include <linux/xattr.h>
c8e8cd57 89#include <linux/nospec.h>
8c3c447b 90#include <linux/indirect_call_wrapper.h>
8e9fad0e 91#include <linux/io_uring.h>
1da177e4 92
7c0f6ba6 93#include <linux/uaccess.h>
1da177e4
LT
94#include <asm/unistd.h>
95
96#include <net/compat.h>
87de87d5 97#include <net/wext.h>
f8451725 98#include <net/cls_cgroup.h>
1da177e4
LT
99
100#include <net/sock.h>
101#include <linux/netfilter.h>
102
6b96018b
AB
103#include <linux/if_tun.h>
104#include <linux/ipv6_route.h>
105#include <linux/route.h>
c7dc504e 106#include <linux/termios.h>
6b96018b 107#include <linux/sockios.h>
076bb0c8 108#include <net/busy_poll.h>
f24b9be5 109#include <linux/errqueue.h>
d7c08826 110#include <linux/ptp_clock_kernel.h>
6e6eda44 111#include <trace/events/sock.h>
06021292 112
e0d1095a 113#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
114unsigned int sysctl_net_busy_read __read_mostly;
115unsigned int sysctl_net_busy_poll __read_mostly;
06021292 116#endif
6b96018b 117
8ae5e030
AV
118static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
119static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 120static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
121
122static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
123static __poll_t sock_poll(struct file *file,
124 struct poll_table_struct *wait);
89bddce5 125static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
126#ifdef CONFIG_COMPAT
127static long compat_sock_ioctl(struct file *file,
89bddce5 128 unsigned int cmd, unsigned long arg);
89bbfc95 129#endif
1da177e4 130static int sock_fasync(int fd, struct file *filp, int on);
9c55e01c 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 132 struct pipe_inode_info *pipe, size_t len,
9c55e01c 133 unsigned int flags);
2bfc6685 134static void sock_splice_eof(struct file *file);
542d3065
AB
135
136#ifdef CONFIG_PROC_FS
137static void sock_show_fdinfo(struct seq_file *m, struct file *f)
138{
139 struct socket *sock = f->private_data;
1ded5e5a 140 const struct proto_ops *ops = READ_ONCE(sock->ops);
542d3065 141
1ded5e5a
ED
142 if (ops->show_fdinfo)
143 ops->show_fdinfo(m, sock);
542d3065
AB
144}
145#else
146#define sock_show_fdinfo NULL
147#endif
1da177e4 148
1da177e4
LT
149/*
150 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
151 * in the operation structures but are done directly via the socketcall() multiplexor.
152 */
153
da7071d7 154static const struct file_operations socket_file_ops = {
1da177e4
LT
155 .owner = THIS_MODULE,
156 .llseek = no_llseek,
8ae5e030
AV
157 .read_iter = sock_read_iter,
158 .write_iter = sock_write_iter,
1da177e4
LT
159 .poll = sock_poll,
160 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
161#ifdef CONFIG_COMPAT
162 .compat_ioctl = compat_sock_ioctl,
163#endif
8e9fad0e 164 .uring_cmd = io_uring_cmd_sock,
1da177e4 165 .mmap = sock_mmap,
1da177e4
LT
166 .release = sock_close,
167 .fasync = sock_fasync,
2dc334f1 168 .splice_write = splice_to_socket,
9c55e01c 169 .splice_read = sock_splice_read,
2bfc6685 170 .splice_eof = sock_splice_eof,
b4653342 171 .show_fdinfo = sock_show_fdinfo,
1da177e4
LT
172};
173
fe0bdbde
YD
174static const char * const pf_family_names[] = {
175 [PF_UNSPEC] = "PF_UNSPEC",
176 [PF_UNIX] = "PF_UNIX/PF_LOCAL",
177 [PF_INET] = "PF_INET",
178 [PF_AX25] = "PF_AX25",
179 [PF_IPX] = "PF_IPX",
180 [PF_APPLETALK] = "PF_APPLETALK",
181 [PF_NETROM] = "PF_NETROM",
182 [PF_BRIDGE] = "PF_BRIDGE",
183 [PF_ATMPVC] = "PF_ATMPVC",
184 [PF_X25] = "PF_X25",
185 [PF_INET6] = "PF_INET6",
186 [PF_ROSE] = "PF_ROSE",
187 [PF_DECnet] = "PF_DECnet",
188 [PF_NETBEUI] = "PF_NETBEUI",
189 [PF_SECURITY] = "PF_SECURITY",
190 [PF_KEY] = "PF_KEY",
191 [PF_NETLINK] = "PF_NETLINK/PF_ROUTE",
192 [PF_PACKET] = "PF_PACKET",
193 [PF_ASH] = "PF_ASH",
194 [PF_ECONET] = "PF_ECONET",
195 [PF_ATMSVC] = "PF_ATMSVC",
196 [PF_RDS] = "PF_RDS",
197 [PF_SNA] = "PF_SNA",
198 [PF_IRDA] = "PF_IRDA",
199 [PF_PPPOX] = "PF_PPPOX",
200 [PF_WANPIPE] = "PF_WANPIPE",
201 [PF_LLC] = "PF_LLC",
202 [PF_IB] = "PF_IB",
203 [PF_MPLS] = "PF_MPLS",
204 [PF_CAN] = "PF_CAN",
205 [PF_TIPC] = "PF_TIPC",
206 [PF_BLUETOOTH] = "PF_BLUETOOTH",
207 [PF_IUCV] = "PF_IUCV",
208 [PF_RXRPC] = "PF_RXRPC",
209 [PF_ISDN] = "PF_ISDN",
210 [PF_PHONET] = "PF_PHONET",
211 [PF_IEEE802154] = "PF_IEEE802154",
212 [PF_CAIF] = "PF_CAIF",
213 [PF_ALG] = "PF_ALG",
214 [PF_NFC] = "PF_NFC",
215 [PF_VSOCK] = "PF_VSOCK",
216 [PF_KCM] = "PF_KCM",
217 [PF_QIPCRTR] = "PF_QIPCRTR",
218 [PF_SMC] = "PF_SMC",
219 [PF_XDP] = "PF_XDP",
bc49d816 220 [PF_MCTP] = "PF_MCTP",
fe0bdbde
YD
221};
222
1da177e4
LT
223/*
224 * The protocol list. Each protocol is registered in here.
225 */
226
1da177e4 227static DEFINE_SPINLOCK(net_family_lock);
190683a9 228static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 229
1da177e4 230/*
89bddce5
SH
231 * Support routines.
232 * Move socket addresses back and forth across the kernel/user
233 * divide and look after the messy bits.
1da177e4
LT
234 */
235
1da177e4
LT
236/**
237 * move_addr_to_kernel - copy a socket address into kernel space
238 * @uaddr: Address in user space
239 * @kaddr: Address in kernel space
240 * @ulen: Length in user space
241 *
242 * The address is copied into kernel space. If the provided address is
243 * too long an error code of -EINVAL is returned. If the copy gives
244 * invalid addresses -EFAULT is returned. On a success 0 is returned.
245 */
246
43db362d 247int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 248{
230b1839 249 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 250 return -EINVAL;
89bddce5 251 if (ulen == 0)
1da177e4 252 return 0;
89bddce5 253 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 254 return -EFAULT;
3ec3b2fb 255 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
256}
257
258/**
259 * move_addr_to_user - copy an address to user space
260 * @kaddr: kernel space address
261 * @klen: length of address in kernel
262 * @uaddr: user space address
263 * @ulen: pointer to user length field
264 *
265 * The value pointed to by ulen on entry is the buffer length available.
266 * This is overwritten with the buffer space used. -EINVAL is returned
267 * if an overlong buffer is specified or a negative buffer size. -EFAULT
268 * is returned if either the buffer or the length field are not
269 * accessible.
270 * After copying the data up to the limit the user specifies, the true
271 * length of the data is written over the length limit the user
272 * specified. Zero is returned for a success.
273 */
89bddce5 274
43db362d 275static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 276 void __user *uaddr, int __user *ulen)
1da177e4
LT
277{
278 int err;
279 int len;
280
68c6beb3 281 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
282 err = get_user(len, ulen);
283 if (err)
1da177e4 284 return err;
89bddce5
SH
285 if (len > klen)
286 len = klen;
68c6beb3 287 if (len < 0)
1da177e4 288 return -EINVAL;
89bddce5 289 if (len) {
d6fe3945
SG
290 if (audit_sockaddr(klen, kaddr))
291 return -ENOMEM;
89bddce5 292 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
293 return -EFAULT;
294 }
295 /*
89bddce5
SH
296 * "fromlen shall refer to the value before truncation.."
297 * 1003.1g
1da177e4
LT
298 */
299 return __put_user(klen, ulen);
300}
301
08009a76 302static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
303
304static struct inode *sock_alloc_inode(struct super_block *sb)
305{
306 struct socket_alloc *ei;
89bddce5 307
fd60b288 308 ei = alloc_inode_sb(sb, sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
309 if (!ei)
310 return NULL;
333f7909
AV
311 init_waitqueue_head(&ei->socket.wq.wait);
312 ei->socket.wq.fasync_list = NULL;
313 ei->socket.wq.flags = 0;
89bddce5 314
1da177e4
LT
315 ei->socket.state = SS_UNCONNECTED;
316 ei->socket.flags = 0;
317 ei->socket.ops = NULL;
318 ei->socket.sk = NULL;
319 ei->socket.file = NULL;
1da177e4
LT
320
321 return &ei->vfs_inode;
322}
323
6d7855c5 324static void sock_free_inode(struct inode *inode)
1da177e4 325{
43815482
ED
326 struct socket_alloc *ei;
327
328 ei = container_of(inode, struct socket_alloc, vfs_inode);
43815482 329 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
330}
331
51cc5068 332static void init_once(void *foo)
1da177e4 333{
89bddce5 334 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 335
a35afb83 336 inode_init_once(&ei->vfs_inode);
1da177e4 337}
89bddce5 338
1e911632 339static void init_inodecache(void)
1da177e4
LT
340{
341 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
342 sizeof(struct socket_alloc),
343 0,
344 (SLAB_HWCACHE_ALIGN |
345 SLAB_RECLAIM_ACCOUNT |
5d097056 346 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 347 init_once);
1e911632 348 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
349}
350
b87221de 351static const struct super_operations sockfs_ops = {
c6d409cf 352 .alloc_inode = sock_alloc_inode,
6d7855c5 353 .free_inode = sock_free_inode,
c6d409cf 354 .statfs = simple_statfs,
1da177e4
LT
355};
356
c23fbb6b
ED
357/*
358 * sockfs_dname() is called from d_path().
359 */
360static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
361{
0f60d288 362 return dynamic_dname(buffer, buflen, "socket:[%lu]",
c5ef6035 363 d_inode(dentry)->i_ino);
c23fbb6b
ED
364}
365
3ba13d17 366static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 367 .d_dname = sockfs_dname,
1da177e4
LT
368};
369
bba0bd31
AG
370static int sockfs_xattr_get(const struct xattr_handler *handler,
371 struct dentry *dentry, struct inode *inode,
372 const char *suffix, void *value, size_t size)
373{
374 if (value) {
375 if (dentry->d_name.len + 1 > size)
376 return -ERANGE;
377 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
378 }
379 return dentry->d_name.len + 1;
380}
381
382#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
383#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
384#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
385
386static const struct xattr_handler sockfs_xattr_handler = {
387 .name = XATTR_NAME_SOCKPROTONAME,
388 .get = sockfs_xattr_get,
389};
390
4a590153 391static int sockfs_security_xattr_set(const struct xattr_handler *handler,
39f60c1c 392 struct mnt_idmap *idmap,
4a590153
AG
393 struct dentry *dentry, struct inode *inode,
394 const char *suffix, const void *value,
395 size_t size, int flags)
396{
397 /* Handled by LSM. */
398 return -EAGAIN;
399}
400
401static const struct xattr_handler sockfs_security_xattr_handler = {
402 .prefix = XATTR_SECURITY_PREFIX,
403 .set = sockfs_security_xattr_set,
404};
405
295d3c44 406static const struct xattr_handler * const sockfs_xattr_handlers[] = {
bba0bd31 407 &sockfs_xattr_handler,
4a590153 408 &sockfs_security_xattr_handler,
bba0bd31
AG
409 NULL
410};
411
fba9be49 412static int sockfs_init_fs_context(struct fs_context *fc)
c74a1cbb 413{
fba9be49
DH
414 struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
415 if (!ctx)
416 return -ENOMEM;
417 ctx->ops = &sockfs_ops;
418 ctx->dops = &sockfs_dentry_operations;
419 ctx->xattr = sockfs_xattr_handlers;
420 return 0;
c74a1cbb
AV
421}
422
423static struct vfsmount *sock_mnt __read_mostly;
424
425static struct file_system_type sock_fs_type = {
426 .name = "sockfs",
fba9be49 427 .init_fs_context = sockfs_init_fs_context,
c74a1cbb
AV
428 .kill_sb = kill_anon_super,
429};
430
1da177e4
LT
431/*
432 * Obtains the first available file descriptor and sets it up for use.
433 *
39d8c1b6
DM
434 * These functions create file structures and maps them to fd space
435 * of the current process. On success it returns file descriptor
1da177e4
LT
436 * and file struct implicitly stored in sock->file.
437 * Note that another thread may close file descriptor before we return
438 * from this function. We use the fact that now we do not refer
439 * to socket after mapping. If one day we will need it, this
440 * function will increment ref. count on file by 1.
441 *
442 * In any case returned fd MAY BE not valid!
443 * This race condition is unavoidable
444 * with shared fd spaces, we cannot solve it inside kernel,
445 * but we take care of internal coherence yet.
446 */
447
8a3c245c
PT
448/**
449 * sock_alloc_file - Bind a &socket to a &file
450 * @sock: socket
451 * @flags: file status flags
452 * @dname: protocol name
453 *
454 * Returns the &file bound with @sock, implicitly storing it
455 * in sock->file. If dname is %NULL, sets to "".
649c15c7
TLSC
456 *
457 * On failure @sock is released, and an ERR pointer is returned.
458 *
8a3c245c
PT
459 * This function uses GFP_KERNEL internally.
460 */
461
aab174f0 462struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 463{
7cbe66b6 464 struct file *file;
1da177e4 465
d93aa9d8
AV
466 if (!dname)
467 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 468
d93aa9d8
AV
469 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
470 O_RDWR | (flags & O_NONBLOCK),
471 &socket_file_ops);
b5ffe634 472 if (IS_ERR(file)) {
8e1611e2 473 sock_release(sock);
39b65252 474 return file;
cc3808f8
AV
475 }
476
fe34db06 477 file->f_mode |= FMODE_NOWAIT;
cc3808f8 478 sock->file = file;
39d8c1b6 479 file->private_data = sock;
d8e464ec 480 stream_open(SOCK_INODE(sock), file);
28407630 481 return file;
39d8c1b6 482}
56b31d1c 483EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 484
56b31d1c 485static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
486{
487 struct file *newfile;
28407630 488 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
489 if (unlikely(fd < 0)) {
490 sock_release(sock);
28407630 491 return fd;
ce4bb04c 492 }
39d8c1b6 493
aab174f0 494 newfile = sock_alloc_file(sock, flags, NULL);
4546e44c 495 if (!IS_ERR(newfile)) {
39d8c1b6 496 fd_install(fd, newfile);
28407630
AV
497 return fd;
498 }
7cbe66b6 499
28407630
AV
500 put_unused_fd(fd);
501 return PTR_ERR(newfile);
1da177e4
LT
502}
503
8a3c245c
PT
504/**
505 * sock_from_file - Return the &socket bounded to @file.
506 * @file: file
8a3c245c 507 *
dba4a925 508 * On failure returns %NULL.
8a3c245c
PT
509 */
510
dba4a925 511struct socket *sock_from_file(struct file *file)
6cb153ca 512{
6cb153ca 513 if (file->f_op == &socket_file_ops)
da214a47 514 return file->private_data; /* set in sock_alloc_file */
6cb153ca 515
23bb80d2 516 return NULL;
6cb153ca 517}
406a3c63 518EXPORT_SYMBOL(sock_from_file);
6cb153ca 519
1da177e4 520/**
c6d409cf 521 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
522 * @fd: file handle
523 * @err: pointer to an error code return
524 *
525 * The file handle passed in is locked and the socket it is bound
241c4667 526 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
527 * with a negative errno code and NULL is returned. The function checks
528 * for both invalid handles and passing a handle which is not a socket.
529 *
530 * On a success the socket object pointer is returned.
531 */
532
533struct socket *sockfd_lookup(int fd, int *err)
534{
535 struct file *file;
1da177e4
LT
536 struct socket *sock;
537
89bddce5
SH
538 file = fget(fd);
539 if (!file) {
1da177e4
LT
540 *err = -EBADF;
541 return NULL;
542 }
89bddce5 543
dba4a925
FR
544 sock = sock_from_file(file);
545 if (!sock) {
546 *err = -ENOTSOCK;
1da177e4 547 fput(file);
dba4a925 548 }
6cb153ca
BL
549 return sock;
550}
c6d409cf 551EXPORT_SYMBOL(sockfd_lookup);
1da177e4 552
6cb153ca
BL
553static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
554{
00e188ef 555 struct fd f = fdget(fd);
6cb153ca
BL
556 struct socket *sock;
557
3672558c 558 *err = -EBADF;
00e188ef 559 if (f.file) {
dba4a925 560 sock = sock_from_file(f.file);
00e188ef 561 if (likely(sock)) {
ce787a5a 562 *fput_needed = f.flags & FDPUT_FPUT;
6cb153ca 563 return sock;
00e188ef 564 }
dba4a925 565 *err = -ENOTSOCK;
00e188ef 566 fdput(f);
1da177e4 567 }
6cb153ca 568 return NULL;
1da177e4
LT
569}
570
600e1779
MY
571static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
572 size_t size)
573{
574 ssize_t len;
575 ssize_t used = 0;
576
c5ef6035 577 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
578 if (len < 0)
579 return len;
580 used += len;
581 if (buffer) {
582 if (size < used)
583 return -ERANGE;
584 buffer += len;
585 }
586
587 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
588 used += len;
589 if (buffer) {
590 if (size < used)
591 return -ERANGE;
592 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
593 buffer += len;
594 }
595
596 return used;
597}
598
c1632a0f 599static int sockfs_setattr(struct mnt_idmap *idmap,
549c7297 600 struct dentry *dentry, struct iattr *iattr)
86741ec2 601{
c1632a0f 602 int err = simple_setattr(&nop_mnt_idmap, dentry, iattr);
86741ec2 603
e1a3a60a 604 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
605 struct socket *sock = SOCKET_I(d_inode(dentry));
606
6d8c50dc
CW
607 if (sock->sk)
608 sock->sk->sk_uid = iattr->ia_uid;
609 else
610 err = -ENOENT;
86741ec2
LC
611 }
612
613 return err;
614}
615
600e1779 616static const struct inode_operations sockfs_inode_ops = {
600e1779 617 .listxattr = sockfs_listxattr,
86741ec2 618 .setattr = sockfs_setattr,
600e1779
MY
619};
620
1da177e4 621/**
8a3c245c 622 * sock_alloc - allocate a socket
89bddce5 623 *
1da177e4
LT
624 * Allocate a new inode and socket object. The two are bound together
625 * and initialised. The socket is then returned. If we are out of inodes
8a3c245c 626 * NULL is returned. This functions uses GFP_KERNEL internally.
1da177e4
LT
627 */
628
f4a00aac 629struct socket *sock_alloc(void)
1da177e4 630{
89bddce5
SH
631 struct inode *inode;
632 struct socket *sock;
1da177e4 633
a209dfc7 634 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
635 if (!inode)
636 return NULL;
637
638 sock = SOCKET_I(inode);
639
85fe4025 640 inode->i_ino = get_next_ino();
89bddce5 641 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
642 inode->i_uid = current_fsuid();
643 inode->i_gid = current_fsgid();
600e1779 644 inode->i_op = &sockfs_inode_ops;
1da177e4 645
1da177e4
LT
646 return sock;
647}
f4a00aac 648EXPORT_SYMBOL(sock_alloc);
1da177e4 649
6d8c50dc 650static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4 651{
1ded5e5a
ED
652 const struct proto_ops *ops = READ_ONCE(sock->ops);
653
654 if (ops) {
655 struct module *owner = ops->owner;
1da177e4 656
6d8c50dc
CW
657 if (inode)
658 inode_lock(inode);
1ded5e5a 659 ops->release(sock);
ff7b11aa 660 sock->sk = NULL;
6d8c50dc
CW
661 if (inode)
662 inode_unlock(inode);
1da177e4
LT
663 sock->ops = NULL;
664 module_put(owner);
665 }
666
333f7909 667 if (sock->wq.fasync_list)
3410f22e 668 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 669
1da177e4
LT
670 if (!sock->file) {
671 iput(SOCK_INODE(sock));
672 return;
673 }
89bddce5 674 sock->file = NULL;
1da177e4 675}
6d8c50dc 676
9a8ad9ac
AL
677/**
678 * sock_release - close a socket
679 * @sock: socket to close
680 *
681 * The socket is released from the protocol stack if it has a release
682 * callback, and the inode is then released if the socket is bound to
683 * an inode not a file.
684 */
6d8c50dc
CW
685void sock_release(struct socket *sock)
686{
687 __sock_release(sock, NULL);
688}
c6d409cf 689EXPORT_SYMBOL(sock_release);
1da177e4 690
c14ac945 691void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 692{
140c55d4
ED
693 u8 flags = *tx_flags;
694
51eb7492 695 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE) {
140c55d4
ED
696 flags |= SKBTX_HW_TSTAMP;
697
51eb7492
GE
698 /* PTP hardware clocks can provide a free running cycle counter
699 * as a time base for virtual clocks. Tell driver to use the
700 * free running cycle counter for timestamp if socket is bound
701 * to virtual clock.
702 */
703 if (tsflags & SOF_TIMESTAMPING_BIND_PHC)
704 flags |= SKBTX_HW_TSTAMP_USE_CYCLES;
705 }
706
c14ac945 707 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
708 flags |= SKBTX_SW_TSTAMP;
709
c14ac945 710 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
711 flags |= SKBTX_SCHED_TSTAMP;
712
140c55d4 713 *tx_flags = flags;
20d49473 714}
67cc0d40 715EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 716
8c3c447b
PA
717INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
718 size_t));
a648a592
PA
719INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
720 size_t));
6e6eda44
YC
721
722static noinline void call_trace_sock_send_length(struct sock *sk, int ret,
723 int flags)
724{
725 trace_sock_send_length(sk, ret, 0);
726}
727
d8725c86 728static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 729{
1ded5e5a 730 int ret = INDIRECT_CALL_INET(READ_ONCE(sock->ops)->sendmsg, inet6_sendmsg,
a648a592
PA
731 inet_sendmsg, sock, msg,
732 msg_data_left(msg));
d8725c86 733 BUG_ON(ret == -EIOCBQUEUED);
6e6eda44
YC
734
735 if (trace_sock_send_length_enabled())
736 call_trace_sock_send_length(sock->sk, ret, 0);
d8725c86 737 return ret;
1da177e4
LT
738}
739
86a7e0b6
JR
740static int __sock_sendmsg(struct socket *sock, struct msghdr *msg)
741{
742 int err = security_socket_sendmsg(sock, msg,
743 msg_data_left(msg));
744
745 return err ?: sock_sendmsg_nosec(sock, msg);
746}
747
85806af0
RD
748/**
749 * sock_sendmsg - send a message through @sock
750 * @sock: socket
751 * @msg: message to send
752 *
753 * Sends @msg through @sock, passing through LSM.
754 * Returns the number of bytes sent, or an error code.
755 */
d8725c86 756int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 757{
86a7e0b6
JR
758 struct sockaddr_storage *save_addr = (struct sockaddr_storage *)msg->msg_name;
759 struct sockaddr_storage address;
01b2885d 760 int save_len = msg->msg_namelen;
86a7e0b6 761 int ret;
228e548e 762
86a7e0b6
JR
763 if (msg->msg_name) {
764 memcpy(&address, msg->msg_name, msg->msg_namelen);
765 msg->msg_name = &address;
766 }
767
768 ret = __sock_sendmsg(sock, msg);
769 msg->msg_name = save_addr;
01b2885d 770 msg->msg_namelen = save_len;
86a7e0b6
JR
771
772 return ret;
0cf00c6f 773}
c6d409cf 774EXPORT_SYMBOL(sock_sendmsg);
1da177e4 775
8a3c245c
PT
776/**
777 * kernel_sendmsg - send a message through @sock (kernel-space)
778 * @sock: socket
779 * @msg: message header
780 * @vec: kernel vec
781 * @num: vec array length
782 * @size: total message data size
783 *
784 * Builds the message data with @vec and sends it through @sock.
785 * Returns the number of bytes sent, or an error code.
786 */
787
1da177e4
LT
788int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
789 struct kvec *vec, size_t num, size_t size)
790{
de4eda9d 791 iov_iter_kvec(&msg->msg_iter, ITER_SOURCE, vec, num, size);
d8725c86 792 return sock_sendmsg(sock, msg);
1da177e4 793}
c6d409cf 794EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 795
8a3c245c
PT
796/**
797 * kernel_sendmsg_locked - send a message through @sock (kernel-space)
798 * @sk: sock
799 * @msg: message header
800 * @vec: output s/g array
801 * @num: output s/g array length
802 * @size: total message data size
803 *
804 * Builds the message data with @vec and sends it through @sock.
805 * Returns the number of bytes sent, or an error code.
806 * Caller must hold @sk.
807 */
808
306b13eb
TH
809int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
810 struct kvec *vec, size_t num, size_t size)
811{
812 struct socket *sock = sk->sk_socket;
1ded5e5a 813 const struct proto_ops *ops = READ_ONCE(sock->ops);
306b13eb 814
1ded5e5a 815 if (!ops->sendmsg_locked)
db5980d8 816 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 817
de4eda9d 818 iov_iter_kvec(&msg->msg_iter, ITER_SOURCE, vec, num, size);
306b13eb 819
1ded5e5a 820 return ops->sendmsg_locked(sk, msg, msg_data_left(msg));
306b13eb
TH
821}
822EXPORT_SYMBOL(kernel_sendmsg_locked);
823
8605330a
SHY
824static bool skb_is_err_queue(const struct sk_buff *skb)
825{
826 /* pkt_type of skbs enqueued on the error queue are set to
827 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
828 * in recvmsg, since skbs received on a local socket will never
829 * have a pkt_type of PACKET_OUTGOING.
830 */
831 return skb->pkt_type == PACKET_OUTGOING;
832}
833
b50a5c70
ML
834/* On transmit, software and hardware timestamps are returned independently.
835 * As the two skb clones share the hardware timestamp, which may be updated
836 * before the software timestamp is received, a hardware TX timestamp may be
837 * returned only if there is no software TX timestamp. Ignore false software
838 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 839 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
840 * hardware timestamp.
841 */
842static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
843{
844 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
845}
846
97dc7cd9
GE
847static ktime_t get_timestamp(struct sock *sk, struct sk_buff *skb, int *if_index)
848{
e3390b30 849 bool cycles = READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_BIND_PHC;
97dc7cd9
GE
850 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
851 struct net_device *orig_dev;
852 ktime_t hwtstamp;
853
854 rcu_read_lock();
855 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
856 if (orig_dev) {
857 *if_index = orig_dev->ifindex;
858 hwtstamp = netdev_get_tstamp(orig_dev, shhwtstamps, cycles);
859 } else {
860 hwtstamp = shhwtstamps->hwtstamp;
861 }
862 rcu_read_unlock();
863
864 return hwtstamp;
865}
866
867static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb,
868 int if_index)
aad9c8c4
ML
869{
870 struct scm_ts_pktinfo ts_pktinfo;
871 struct net_device *orig_dev;
872
873 if (!skb_mac_header_was_set(skb))
874 return;
875
876 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
877
97dc7cd9
GE
878 if (!if_index) {
879 rcu_read_lock();
880 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
881 if (orig_dev)
882 if_index = orig_dev->ifindex;
883 rcu_read_unlock();
884 }
885 ts_pktinfo.if_index = if_index;
aad9c8c4
ML
886
887 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
888 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
889 sizeof(ts_pktinfo), &ts_pktinfo);
890}
891
92f37fd2
ED
892/*
893 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
894 */
895void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
896 struct sk_buff *skb)
897{
20d49473 898 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 899 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e 900 struct scm_timestamping_internal tss;
b50a5c70 901 int empty = 1, false_tstamp = 0;
20d49473
PO
902 struct skb_shared_hwtstamps *shhwtstamps =
903 skb_hwtstamps(skb);
97dc7cd9 904 int if_index;
007747a9 905 ktime_t hwtstamp;
e3390b30 906 u32 tsflags;
20d49473
PO
907
908 /* Race occurred between timestamp enabling and packet
909 receiving. Fill in the current time for now. */
b50a5c70 910 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 911 __net_timestamp(skb);
b50a5c70
ML
912 false_tstamp = 1;
913 }
20d49473
PO
914
915 if (need_software_tstamp) {
916 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
917 if (new_tstamp) {
918 struct __kernel_sock_timeval tv;
919
920 skb_get_new_timestamp(skb, &tv);
921 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
922 sizeof(tv), &tv);
923 } else {
924 struct __kernel_old_timeval tv;
925
926 skb_get_timestamp(skb, &tv);
927 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
928 sizeof(tv), &tv);
929 }
20d49473 930 } else {
887feae3
DD
931 if (new_tstamp) {
932 struct __kernel_timespec ts;
933
934 skb_get_new_timestampns(skb, &ts);
935 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
936 sizeof(ts), &ts);
937 } else {
df1b4ba9 938 struct __kernel_old_timespec ts;
887feae3
DD
939
940 skb_get_timestampns(skb, &ts);
941 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
942 sizeof(ts), &ts);
943 }
20d49473
PO
944 }
945 }
946
f24b9be5 947 memset(&tss, 0, sizeof(tss));
e3390b30
ED
948 tsflags = READ_ONCE(sk->sk_tsflags);
949 if ((tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 950 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 951 empty = 0;
4d276eb6 952 if (shhwtstamps &&
e3390b30 953 (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
d7c08826 954 !skb_is_swtx_tstamp(skb, false_tstamp)) {
97dc7cd9
GE
955 if_index = 0;
956 if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NETDEV)
957 hwtstamp = get_timestamp(sk, skb, &if_index);
007747a9
ML
958 else
959 hwtstamp = shhwtstamps->hwtstamp;
d7c08826 960
e3390b30 961 if (tsflags & SOF_TIMESTAMPING_BIND_PHC)
97dc7cd9 962 hwtstamp = ptp_convert_timestamp(&hwtstamp,
251cd405 963 READ_ONCE(sk->sk_bind_phc));
97dc7cd9 964
007747a9 965 if (ktime_to_timespec64_cond(hwtstamp, tss.ts + 2)) {
d7c08826
YL
966 empty = 0;
967
e3390b30 968 if ((tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
d7c08826 969 !skb_is_err_queue(skb))
97dc7cd9 970 put_ts_pktinfo(msg, skb, if_index);
d7c08826 971 }
aad9c8c4 972 }
1c885808 973 if (!empty) {
9718475e
DD
974 if (sock_flag(sk, SOCK_TSTAMP_NEW))
975 put_cmsg_scm_timestamping64(msg, &tss);
976 else
977 put_cmsg_scm_timestamping(msg, &tss);
1c885808 978
8605330a 979 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 980 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
981 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
982 skb->len, skb->data);
983 }
92f37fd2 984}
7c81fd8b
ACM
985EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
986
eb6fba75 987#ifdef CONFIG_WIRELESS
6e3e939f
JB
988void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
989 struct sk_buff *skb)
990{
991 int ack;
992
993 if (!sock_flag(sk, SOCK_WIFI_STATUS))
994 return;
995 if (!skb->wifi_acked_valid)
996 return;
997
998 ack = skb->wifi_acked;
999
1000 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
1001}
1002EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
eb6fba75 1003#endif
6e3e939f 1004
11165f14 1005static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
1006 struct sk_buff *skb)
3b885787 1007{
744d5a3e 1008 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 1009 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 1010 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
1011}
1012
6fd1d51c
EM
1013static void sock_recv_mark(struct msghdr *msg, struct sock *sk,
1014 struct sk_buff *skb)
1015{
2558b803
ED
1016 if (sock_flag(sk, SOCK_RCVMARK) && skb) {
1017 /* We must use a bounce buffer for CONFIG_HARDENED_USERCOPY=y */
1018 __u32 mark = skb->mark;
1019
1020 put_cmsg(msg, SOL_SOCKET, SO_MARK, sizeof(__u32), &mark);
1021 }
6fd1d51c
EM
1022}
1023
1024void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
1025 struct sk_buff *skb)
3b885787
NH
1026{
1027 sock_recv_timestamp(msg, sk, skb);
1028 sock_recv_drops(msg, sk, skb);
6fd1d51c 1029 sock_recv_mark(msg, sk, skb);
3b885787 1030}
6fd1d51c 1031EXPORT_SYMBOL_GPL(__sock_recv_cmsgs);
3b885787 1032
8c3c447b 1033INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
a648a592
PA
1034 size_t, int));
1035INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
1036 size_t, int));
6e6eda44
YC
1037
1038static noinline void call_trace_sock_recv_length(struct sock *sk, int ret, int flags)
1039{
1040 trace_sock_recv_length(sk, ret, flags);
1041}
1042
1b784140 1043static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 1044 int flags)
1da177e4 1045{
1ded5e5a
ED
1046 int ret = INDIRECT_CALL_INET(READ_ONCE(sock->ops)->recvmsg,
1047 inet6_recvmsg,
6e6eda44
YC
1048 inet_recvmsg, sock, msg,
1049 msg_data_left(msg), flags);
1050 if (trace_sock_recv_length_enabled())
1051 call_trace_sock_recv_length(sock->sk, ret, flags);
1052 return ret;
1da177e4
LT
1053}
1054
85806af0
RD
1055/**
1056 * sock_recvmsg - receive a message from @sock
1057 * @sock: socket
1058 * @msg: message to receive
1059 * @flags: message flags
1060 *
1061 * Receives @msg from @sock, passing through LSM. Returns the total number
1062 * of bytes received, or an error.
1063 */
2da62906 1064int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 1065{
2da62906 1066 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 1067
2da62906 1068 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 1069}
c6d409cf 1070EXPORT_SYMBOL(sock_recvmsg);
1da177e4 1071
c1249c0a 1072/**
8a3c245c
PT
1073 * kernel_recvmsg - Receive a message from a socket (kernel space)
1074 * @sock: The socket to receive the message from
1075 * @msg: Received message
1076 * @vec: Input s/g array for message data
1077 * @num: Size of input s/g array
1078 * @size: Number of bytes to read
1079 * @flags: Message flags (MSG_DONTWAIT, etc...)
c1249c0a 1080 *
8a3c245c
PT
1081 * On return the msg structure contains the scatter/gather array passed in the
1082 * vec argument. The array is modified so that it consists of the unfilled
1083 * portion of the original array.
c1249c0a 1084 *
8a3c245c 1085 * The returned value is the total number of bytes received, or an error.
c1249c0a 1086 */
8a3c245c 1087
89bddce5
SH
1088int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
1089 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4 1090{
1f466e1f 1091 msg->msg_control_is_user = false;
de4eda9d 1092 iov_iter_kvec(&msg->msg_iter, ITER_DEST, vec, num, size);
1f466e1f 1093 return sock_recvmsg(sock, msg, flags);
1da177e4 1094}
c6d409cf 1095EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 1096
9c55e01c 1097static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 1098 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
1099 unsigned int flags)
1100{
1101 struct socket *sock = file->private_data;
1ded5e5a 1102 const struct proto_ops *ops;
9c55e01c 1103
1ded5e5a
ED
1104 ops = READ_ONCE(sock->ops);
1105 if (unlikely(!ops->splice_read))
67178fd0 1106 return copy_splice_read(file, ppos, pipe, len, flags);
997b37da 1107
1ded5e5a 1108 return ops->splice_read(sock, ppos, pipe, len, flags);
9c55e01c
JA
1109}
1110
2bfc6685
DH
1111static void sock_splice_eof(struct file *file)
1112{
1113 struct socket *sock = file->private_data;
1ded5e5a 1114 const struct proto_ops *ops;
2bfc6685 1115
1ded5e5a
ED
1116 ops = READ_ONCE(sock->ops);
1117 if (ops->splice_eof)
1118 ops->splice_eof(sock);
2bfc6685
DH
1119}
1120
8ae5e030 1121static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 1122{
6d652330
AV
1123 struct file *file = iocb->ki_filp;
1124 struct socket *sock = file->private_data;
0345f931 1125 struct msghdr msg = {.msg_iter = *to,
1126 .msg_iocb = iocb};
8ae5e030 1127 ssize_t res;
ce1d4d3e 1128
ebfcd895 1129 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
1130 msg.msg_flags = MSG_DONTWAIT;
1131
1132 if (iocb->ki_pos != 0)
1da177e4 1133 return -ESPIPE;
027445c3 1134
66ee59af 1135 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
1136 return 0;
1137
2da62906 1138 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
1139 *to = msg.msg_iter;
1140 return res;
1da177e4
LT
1141}
1142
8ae5e030 1143static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 1144{
6d652330
AV
1145 struct file *file = iocb->ki_filp;
1146 struct socket *sock = file->private_data;
0345f931 1147 struct msghdr msg = {.msg_iter = *from,
1148 .msg_iocb = iocb};
8ae5e030 1149 ssize_t res;
1da177e4 1150
8ae5e030 1151 if (iocb->ki_pos != 0)
ce1d4d3e 1152 return -ESPIPE;
027445c3 1153
ebfcd895 1154 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
1155 msg.msg_flags = MSG_DONTWAIT;
1156
6d652330
AV
1157 if (sock->type == SOCK_SEQPACKET)
1158 msg.msg_flags |= MSG_EOR;
1159
86a7e0b6 1160 res = __sock_sendmsg(sock, &msg);
8ae5e030
AV
1161 *from = msg.msg_iter;
1162 return res;
1da177e4
LT
1163}
1164
1da177e4
LT
1165/*
1166 * Atomic setting of ioctl hooks to avoid race
1167 * with module unload.
1168 */
1169
4a3e2f71 1170static DEFINE_MUTEX(br_ioctl_mutex);
ad2f99ae
AB
1171static int (*br_ioctl_hook)(struct net *net, struct net_bridge *br,
1172 unsigned int cmd, struct ifreq *ifr,
1173 void __user *uarg);
1da177e4 1174
ad2f99ae
AB
1175void brioctl_set(int (*hook)(struct net *net, struct net_bridge *br,
1176 unsigned int cmd, struct ifreq *ifr,
1177 void __user *uarg))
1da177e4 1178{
4a3e2f71 1179 mutex_lock(&br_ioctl_mutex);
1da177e4 1180 br_ioctl_hook = hook;
4a3e2f71 1181 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1182}
1183EXPORT_SYMBOL(brioctl_set);
1184
ad2f99ae
AB
1185int br_ioctl_call(struct net *net, struct net_bridge *br, unsigned int cmd,
1186 struct ifreq *ifr, void __user *uarg)
1187{
1188 int err = -ENOPKG;
1189
1190 if (!br_ioctl_hook)
1191 request_module("bridge");
1192
1193 mutex_lock(&br_ioctl_mutex);
1194 if (br_ioctl_hook)
1195 err = br_ioctl_hook(net, br, cmd, ifr, uarg);
1196 mutex_unlock(&br_ioctl_mutex);
1197
1198 return err;
1199}
1200
4a3e2f71 1201static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1202static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1203
881d966b 1204void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1205{
4a3e2f71 1206 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1207 vlan_ioctl_hook = hook;
4a3e2f71 1208 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1209}
1210EXPORT_SYMBOL(vlan_ioctl_set);
1211
6b96018b 1212static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 1213 unsigned int cmd, unsigned long arg)
6b96018b 1214{
1ded5e5a 1215 const struct proto_ops *ops = READ_ONCE(sock->ops);
876f0bf9
AB
1216 struct ifreq ifr;
1217 bool need_copyout;
6b96018b
AB
1218 int err;
1219 void __user *argp = (void __user *)arg;
a554bf96 1220 void __user *data;
6b96018b 1221
1ded5e5a 1222 err = ops->ioctl(sock, cmd, arg);
6b96018b
AB
1223
1224 /*
1225 * If this ioctl is unknown try to hand it down
1226 * to the NIC driver.
1227 */
36fd633e
AV
1228 if (err != -ENOIOCTLCMD)
1229 return err;
6b96018b 1230
29ce8f97
JK
1231 if (!is_socket_ioctl_cmd(cmd))
1232 return -ENOTTY;
1233
a554bf96 1234 if (get_user_ifreq(&ifr, &data, argp))
876f0bf9 1235 return -EFAULT;
a554bf96 1236 err = dev_ioctl(net, cmd, &ifr, data, &need_copyout);
876f0bf9 1237 if (!err && need_copyout)
a554bf96 1238 if (put_user_ifreq(&ifr, argp))
44c02a2c 1239 return -EFAULT;
876f0bf9 1240
6b96018b
AB
1241 return err;
1242}
1243
1da177e4
LT
1244/*
1245 * With an ioctl, arg may well be a user mode pointer, but we don't know
1246 * what to do with it - that's up to the protocol still.
1247 */
1248
1249static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1250{
1ded5e5a 1251 const struct proto_ops *ops;
1da177e4 1252 struct socket *sock;
881d966b 1253 struct sock *sk;
1da177e4
LT
1254 void __user *argp = (void __user *)arg;
1255 int pid, err;
881d966b 1256 struct net *net;
1da177e4 1257
b69aee04 1258 sock = file->private_data;
1ded5e5a 1259 ops = READ_ONCE(sock->ops);
881d966b 1260 sk = sock->sk;
3b1e0a65 1261 net = sock_net(sk);
44c02a2c
AV
1262 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1263 struct ifreq ifr;
a554bf96 1264 void __user *data;
44c02a2c 1265 bool need_copyout;
a554bf96 1266 if (get_user_ifreq(&ifr, &data, argp))
44c02a2c 1267 return -EFAULT;
a554bf96 1268 err = dev_ioctl(net, cmd, &ifr, data, &need_copyout);
44c02a2c 1269 if (!err && need_copyout)
a554bf96 1270 if (put_user_ifreq(&ifr, argp))
44c02a2c 1271 return -EFAULT;
1da177e4 1272 } else
3d23e349 1273#ifdef CONFIG_WEXT_CORE
1da177e4 1274 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1275 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1276 } else
3d23e349 1277#endif
89bddce5 1278 switch (cmd) {
1da177e4
LT
1279 case FIOSETOWN:
1280 case SIOCSPGRP:
1281 err = -EFAULT;
1282 if (get_user(pid, (int __user *)argp))
1283 break;
393cc3f5 1284 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1285 break;
1286 case FIOGETOWN:
1287 case SIOCGPGRP:
609d7fa9 1288 err = put_user(f_getown(sock->file),
89bddce5 1289 (int __user *)argp);
1da177e4
LT
1290 break;
1291 case SIOCGIFBR:
1292 case SIOCSIFBR:
1293 case SIOCBRADDBR:
1294 case SIOCBRDELBR:
ad2f99ae 1295 err = br_ioctl_call(net, NULL, cmd, NULL, argp);
1da177e4
LT
1296 break;
1297 case SIOCGIFVLAN:
1298 case SIOCSIFVLAN:
1299 err = -ENOPKG;
1300 if (!vlan_ioctl_hook)
1301 request_module("8021q");
1302
4a3e2f71 1303 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1304 if (vlan_ioctl_hook)
881d966b 1305 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1306 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1307 break;
c62cce2c
AV
1308 case SIOCGSKNS:
1309 err = -EPERM;
1310 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1311 break;
1312
1313 err = open_related_ns(&net->ns, get_net_ns);
1314 break;
0768e170
AB
1315 case SIOCGSTAMP_OLD:
1316 case SIOCGSTAMPNS_OLD:
1ded5e5a 1317 if (!ops->gettstamp) {
c7cbdbf2
AB
1318 err = -ENOIOCTLCMD;
1319 break;
1320 }
1ded5e5a
ED
1321 err = ops->gettstamp(sock, argp,
1322 cmd == SIOCGSTAMP_OLD,
1323 !IS_ENABLED(CONFIG_64BIT));
60747828 1324 break;
0768e170
AB
1325 case SIOCGSTAMP_NEW:
1326 case SIOCGSTAMPNS_NEW:
1ded5e5a 1327 if (!ops->gettstamp) {
0768e170
AB
1328 err = -ENOIOCTLCMD;
1329 break;
1330 }
1ded5e5a
ED
1331 err = ops->gettstamp(sock, argp,
1332 cmd == SIOCGSTAMP_NEW,
1333 false);
c7cbdbf2 1334 break;
876f0bf9
AB
1335
1336 case SIOCGIFCONF:
1337 err = dev_ifconf(net, argp);
1338 break;
1339
1da177e4 1340 default:
63ff03ab 1341 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1342 break;
89bddce5 1343 }
1da177e4
LT
1344 return err;
1345}
1346
8a3c245c
PT
1347/**
1348 * sock_create_lite - creates a socket
1349 * @family: protocol family (AF_INET, ...)
1350 * @type: communication type (SOCK_STREAM, ...)
1351 * @protocol: protocol (0, ...)
1352 * @res: new socket
1353 *
1354 * Creates a new socket and assigns it to @res, passing through LSM.
1355 * The new socket initialization is not complete, see kernel_accept().
1356 * Returns 0 or an error. On failure @res is set to %NULL.
1357 * This function internally uses GFP_KERNEL.
1358 */
1359
1da177e4
LT
1360int sock_create_lite(int family, int type, int protocol, struct socket **res)
1361{
1362 int err;
1363 struct socket *sock = NULL;
89bddce5 1364
1da177e4
LT
1365 err = security_socket_create(family, type, protocol, 1);
1366 if (err)
1367 goto out;
1368
1369 sock = sock_alloc();
1370 if (!sock) {
1371 err = -ENOMEM;
1372 goto out;
1373 }
1374
1da177e4 1375 sock->type = type;
7420ed23
VY
1376 err = security_socket_post_create(sock, family, type, protocol, 1);
1377 if (err)
1378 goto out_release;
1379
1da177e4
LT
1380out:
1381 *res = sock;
1382 return err;
7420ed23
VY
1383out_release:
1384 sock_release(sock);
1385 sock = NULL;
1386 goto out;
1da177e4 1387}
c6d409cf 1388EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1389
1390/* No kernel lock held - perfect */
ade994f4 1391static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1392{
3cafb376 1393 struct socket *sock = file->private_data;
1ded5e5a 1394 const struct proto_ops *ops = READ_ONCE(sock->ops);
a331de3b 1395 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1396
1ded5e5a 1397 if (!ops->poll)
e88958e6 1398 return 0;
f641f13b 1399
a331de3b
CH
1400 if (sk_can_busy_loop(sock->sk)) {
1401 /* poll once if requested by the syscall */
1402 if (events & POLL_BUSY_LOOP)
1403 sk_busy_loop(sock->sk, 1);
1404
1405 /* if this socket can poll_ll, tell the system call */
1406 flag = POLL_BUSY_LOOP;
1407 }
1408
1ded5e5a 1409 return ops->poll(file, sock, wait) | flag;
1da177e4
LT
1410}
1411
89bddce5 1412static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1413{
b69aee04 1414 struct socket *sock = file->private_data;
1da177e4 1415
1ded5e5a 1416 return READ_ONCE(sock->ops)->mmap(file, sock, vma);
1da177e4
LT
1417}
1418
20380731 1419static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1420{
6d8c50dc 1421 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1422 return 0;
1423}
1424
1425/*
1426 * Update the socket async list
1427 *
1428 * Fasync_list locking strategy.
1429 *
1430 * 1. fasync_list is modified only under process context socket lock
1431 * i.e. under semaphore.
1432 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1433 * or under socket lock
1da177e4
LT
1434 */
1435
1436static int sock_fasync(int fd, struct file *filp, int on)
1437{
989a2979
ED
1438 struct socket *sock = filp->private_data;
1439 struct sock *sk = sock->sk;
333f7909 1440 struct socket_wq *wq = &sock->wq;
1da177e4 1441
989a2979 1442 if (sk == NULL)
1da177e4 1443 return -EINVAL;
1da177e4
LT
1444
1445 lock_sock(sk);
eaefd110 1446 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1447
eaefd110 1448 if (!wq->fasync_list)
989a2979
ED
1449 sock_reset_flag(sk, SOCK_FASYNC);
1450 else
bcdce719 1451 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1452
989a2979 1453 release_sock(sk);
1da177e4
LT
1454 return 0;
1455}
1456
ceb5d58b 1457/* This function may be called only under rcu_lock */
1da177e4 1458
ceb5d58b 1459int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1460{
ceb5d58b 1461 if (!wq || !wq->fasync_list)
1da177e4 1462 return -1;
ceb5d58b 1463
89bddce5 1464 switch (how) {
8d8ad9d7 1465 case SOCK_WAKE_WAITD:
ceb5d58b 1466 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1467 break;
1468 goto call_kill;
8d8ad9d7 1469 case SOCK_WAKE_SPACE:
ceb5d58b 1470 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4 1471 break;
7c7ab580 1472 fallthrough;
8d8ad9d7 1473 case SOCK_WAKE_IO:
89bddce5 1474call_kill:
43815482 1475 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1476 break;
8d8ad9d7 1477 case SOCK_WAKE_URG:
43815482 1478 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1479 }
ceb5d58b 1480
1da177e4
LT
1481 return 0;
1482}
c6d409cf 1483EXPORT_SYMBOL(sock_wake_async);
1da177e4 1484
8a3c245c
PT
1485/**
1486 * __sock_create - creates a socket
1487 * @net: net namespace
1488 * @family: protocol family (AF_INET, ...)
1489 * @type: communication type (SOCK_STREAM, ...)
1490 * @protocol: protocol (0, ...)
1491 * @res: new socket
1492 * @kern: boolean for kernel space sockets
1493 *
1494 * Creates a new socket and assigns it to @res, passing through LSM.
1495 * Returns 0 or an error. On failure @res is set to %NULL. @kern must
1496 * be set to true if the socket resides in kernel space.
1497 * This function internally uses GFP_KERNEL.
1498 */
1499
721db93a 1500int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1501 struct socket **res, int kern)
1da177e4
LT
1502{
1503 int err;
1504 struct socket *sock;
55737fda 1505 const struct net_proto_family *pf;
1da177e4
LT
1506
1507 /*
89bddce5 1508 * Check protocol is in range
1da177e4
LT
1509 */
1510 if (family < 0 || family >= NPROTO)
1511 return -EAFNOSUPPORT;
1512 if (type < 0 || type >= SOCK_MAX)
1513 return -EINVAL;
1514
1515 /* Compatibility.
1516
1517 This uglymoron is moved from INET layer to here to avoid
1518 deadlock in module load.
1519 */
1520 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1521 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1522 current->comm);
1da177e4
LT
1523 family = PF_PACKET;
1524 }
1525
1526 err = security_socket_create(family, type, protocol, kern);
1527 if (err)
1528 return err;
89bddce5 1529
55737fda
SH
1530 /*
1531 * Allocate the socket and allow the family to set things up. if
1532 * the protocol is 0, the family is instructed to select an appropriate
1533 * default.
1534 */
1535 sock = sock_alloc();
1536 if (!sock) {
e87cc472 1537 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1538 return -ENFILE; /* Not exactly a match, but its the
1539 closest posix thing */
1540 }
1541
1542 sock->type = type;
1543
95a5afca 1544#ifdef CONFIG_MODULES
89bddce5
SH
1545 /* Attempt to load a protocol module if the find failed.
1546 *
1547 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1548 * requested real, full-featured networking support upon configuration.
1549 * Otherwise module support will break!
1550 */
190683a9 1551 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1552 request_module("net-pf-%d", family);
1da177e4
LT
1553#endif
1554
55737fda
SH
1555 rcu_read_lock();
1556 pf = rcu_dereference(net_families[family]);
1557 err = -EAFNOSUPPORT;
1558 if (!pf)
1559 goto out_release;
1da177e4
LT
1560
1561 /*
1562 * We will call the ->create function, that possibly is in a loadable
1563 * module, so we have to bump that loadable module refcnt first.
1564 */
55737fda 1565 if (!try_module_get(pf->owner))
1da177e4
LT
1566 goto out_release;
1567
55737fda
SH
1568 /* Now protected by module ref count */
1569 rcu_read_unlock();
1570
3f378b68 1571 err = pf->create(net, sock, protocol, kern);
55737fda 1572 if (err < 0)
1da177e4 1573 goto out_module_put;
a79af59e 1574
1da177e4
LT
1575 /*
1576 * Now to bump the refcnt of the [loadable] module that owns this
1577 * socket at sock_release time we decrement its refcnt.
1578 */
55737fda
SH
1579 if (!try_module_get(sock->ops->owner))
1580 goto out_module_busy;
1581
1da177e4
LT
1582 /*
1583 * Now that we're done with the ->create function, the [loadable]
1584 * module can have its refcnt decremented
1585 */
55737fda 1586 module_put(pf->owner);
7420ed23
VY
1587 err = security_socket_post_create(sock, family, type, protocol, kern);
1588 if (err)
3b185525 1589 goto out_sock_release;
55737fda 1590 *res = sock;
1da177e4 1591
55737fda
SH
1592 return 0;
1593
1594out_module_busy:
1595 err = -EAFNOSUPPORT;
1da177e4 1596out_module_put:
55737fda
SH
1597 sock->ops = NULL;
1598 module_put(pf->owner);
1599out_sock_release:
1da177e4 1600 sock_release(sock);
55737fda
SH
1601 return err;
1602
1603out_release:
1604 rcu_read_unlock();
1605 goto out_sock_release;
1da177e4 1606}
721db93a 1607EXPORT_SYMBOL(__sock_create);
1da177e4 1608
8a3c245c
PT
1609/**
1610 * sock_create - creates a socket
1611 * @family: protocol family (AF_INET, ...)
1612 * @type: communication type (SOCK_STREAM, ...)
1613 * @protocol: protocol (0, ...)
1614 * @res: new socket
1615 *
1616 * A wrapper around __sock_create().
1617 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1618 */
1619
1da177e4
LT
1620int sock_create(int family, int type, int protocol, struct socket **res)
1621{
1b8d7ae4 1622 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1623}
c6d409cf 1624EXPORT_SYMBOL(sock_create);
1da177e4 1625
8a3c245c
PT
1626/**
1627 * sock_create_kern - creates a socket (kernel space)
1628 * @net: net namespace
1629 * @family: protocol family (AF_INET, ...)
1630 * @type: communication type (SOCK_STREAM, ...)
1631 * @protocol: protocol (0, ...)
1632 * @res: new socket
1633 *
1634 * A wrapper around __sock_create().
1635 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1636 */
1637
eeb1bd5c 1638int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1639{
eeb1bd5c 1640 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1641}
c6d409cf 1642EXPORT_SYMBOL(sock_create_kern);
1da177e4 1643
da214a47 1644static struct socket *__sys_socket_create(int family, int type, int protocol)
1da177e4 1645{
1da177e4 1646 struct socket *sock;
da214a47 1647 int retval;
a677a039 1648
e38b36f3
UD
1649 /* Check the SOCK_* constants for consistency. */
1650 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1651 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1652 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1653 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1654
da214a47
JA
1655 if ((type & ~SOCK_TYPE_MASK) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1656 return ERR_PTR(-EINVAL);
a677a039 1657 type &= SOCK_TYPE_MASK;
1da177e4 1658
da214a47
JA
1659 retval = sock_create(family, type, protocol, &sock);
1660 if (retval < 0)
1661 return ERR_PTR(retval);
1662
1663 return sock;
1664}
1665
1666struct file *__sys_socket_file(int family, int type, int protocol)
1667{
1668 struct socket *sock;
da214a47
JA
1669 int flags;
1670
1671 sock = __sys_socket_create(family, type, protocol);
1672 if (IS_ERR(sock))
1673 return ERR_CAST(sock);
1674
1675 flags = type & ~SOCK_TYPE_MASK;
aaca0bdc
UD
1676 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1677 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1678
649c15c7 1679 return sock_alloc_file(sock, flags, NULL);
da214a47
JA
1680}
1681
0dd061a6
GT
1682/* A hook for bpf progs to attach to and update socket protocol.
1683 *
1684 * A static noinline declaration here could cause the compiler to
1685 * optimize away the function. A global noinline declaration will
1686 * keep the definition, but may optimize away the callsite.
1687 * Therefore, __weak is needed to ensure that the call is still
1688 * emitted, by telling the compiler that we don't know what the
1689 * function might eventually be.
0dd061a6
GT
1690 */
1691
15fb6f2b 1692__bpf_hook_start();
0dd061a6
GT
1693
1694__weak noinline int update_socket_protocol(int family, int type, int protocol)
1695{
1696 return protocol;
1697}
1698
15fb6f2b 1699__bpf_hook_end();
0dd061a6 1700
da214a47
JA
1701int __sys_socket(int family, int type, int protocol)
1702{
1703 struct socket *sock;
1704 int flags;
1705
0dd061a6
GT
1706 sock = __sys_socket_create(family, type,
1707 update_socket_protocol(family, type, protocol));
da214a47
JA
1708 if (IS_ERR(sock))
1709 return PTR_ERR(sock);
1710
1711 flags = type & ~SOCK_TYPE_MASK;
1712 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1713 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1da177e4 1714
8e1611e2 1715 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1716}
1717
9d6a15c3
DB
1718SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1719{
1720 return __sys_socket(family, type, protocol);
1721}
1722
1da177e4
LT
1723/*
1724 * Create a pair of connected sockets.
1725 */
1726
6debc8d8 1727int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1728{
1729 struct socket *sock1, *sock2;
1730 int fd1, fd2, err;
db349509 1731 struct file *newfile1, *newfile2;
a677a039
UD
1732 int flags;
1733
1734 flags = type & ~SOCK_TYPE_MASK;
77d27200 1735 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1736 return -EINVAL;
1737 type &= SOCK_TYPE_MASK;
1da177e4 1738
aaca0bdc
UD
1739 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1740 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1741
016a266b
AV
1742 /*
1743 * reserve descriptors and make sure we won't fail
1744 * to return them to userland.
1745 */
1746 fd1 = get_unused_fd_flags(flags);
1747 if (unlikely(fd1 < 0))
1748 return fd1;
1749
1750 fd2 = get_unused_fd_flags(flags);
1751 if (unlikely(fd2 < 0)) {
1752 put_unused_fd(fd1);
1753 return fd2;
1754 }
1755
1756 err = put_user(fd1, &usockvec[0]);
1757 if (err)
1758 goto out;
1759
1760 err = put_user(fd2, &usockvec[1]);
1761 if (err)
1762 goto out;
1763
1da177e4
LT
1764 /*
1765 * Obtain the first socket and check if the underlying protocol
1766 * supports the socketpair call.
1767 */
1768
1769 err = sock_create(family, type, protocol, &sock1);
016a266b 1770 if (unlikely(err < 0))
1da177e4
LT
1771 goto out;
1772
1773 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1774 if (unlikely(err < 0)) {
1775 sock_release(sock1);
1776 goto out;
bf3c23d1 1777 }
d73aa286 1778
d47cd945
DH
1779 err = security_socket_socketpair(sock1, sock2);
1780 if (unlikely(err)) {
1781 sock_release(sock2);
1782 sock_release(sock1);
1783 goto out;
1784 }
1785
1ded5e5a 1786 err = READ_ONCE(sock1->ops)->socketpair(sock1, sock2);
016a266b
AV
1787 if (unlikely(err < 0)) {
1788 sock_release(sock2);
1789 sock_release(sock1);
1790 goto out;
28407630
AV
1791 }
1792
aab174f0 1793 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1794 if (IS_ERR(newfile1)) {
28407630 1795 err = PTR_ERR(newfile1);
016a266b
AV
1796 sock_release(sock2);
1797 goto out;
28407630
AV
1798 }
1799
aab174f0 1800 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1801 if (IS_ERR(newfile2)) {
1802 err = PTR_ERR(newfile2);
016a266b
AV
1803 fput(newfile1);
1804 goto out;
db349509
AV
1805 }
1806
157cf649 1807 audit_fd_pair(fd1, fd2);
d73aa286 1808
db349509
AV
1809 fd_install(fd1, newfile1);
1810 fd_install(fd2, newfile2);
d73aa286 1811 return 0;
1da177e4 1812
016a266b 1813out:
d73aa286 1814 put_unused_fd(fd2);
d73aa286 1815 put_unused_fd(fd1);
1da177e4
LT
1816 return err;
1817}
1818
6debc8d8
DB
1819SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1820 int __user *, usockvec)
1821{
1822 return __sys_socketpair(family, type, protocol, usockvec);
1823}
1824
1da177e4
LT
1825/*
1826 * Bind a name to a socket. Nothing much to do here since it's
1827 * the protocol's responsibility to handle the local address.
1828 *
1829 * We move the socket address to kernel space before we call
1830 * the protocol layer (having also checked the address is ok).
1831 */
1832
a87d35d8 1833int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1834{
1835 struct socket *sock;
230b1839 1836 struct sockaddr_storage address;
6cb153ca 1837 int err, fput_needed;
1da177e4 1838
89bddce5 1839 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1840 if (sock) {
43db362d 1841 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1842 if (!err) {
89bddce5 1843 err = security_socket_bind(sock,
230b1839 1844 (struct sockaddr *)&address,
89bddce5 1845 addrlen);
6cb153ca 1846 if (!err)
1ded5e5a 1847 err = READ_ONCE(sock->ops)->bind(sock,
89bddce5 1848 (struct sockaddr *)
230b1839 1849 &address, addrlen);
1da177e4 1850 }
6cb153ca 1851 fput_light(sock->file, fput_needed);
89bddce5 1852 }
1da177e4
LT
1853 return err;
1854}
1855
a87d35d8
DB
1856SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1857{
1858 return __sys_bind(fd, umyaddr, addrlen);
1859}
1860
1da177e4
LT
1861/*
1862 * Perform a listen. Basically, we allow the protocol to do anything
1863 * necessary for a listen, and if that works, we mark the socket as
1864 * ready for listening.
1865 */
1866
25e290ee 1867int __sys_listen(int fd, int backlog)
1da177e4
LT
1868{
1869 struct socket *sock;
6cb153ca 1870 int err, fput_needed;
b8e1f9b5 1871 int somaxconn;
89bddce5
SH
1872
1873 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1874 if (sock) {
3c9ba81d 1875 somaxconn = READ_ONCE(sock_net(sock->sk)->core.sysctl_somaxconn);
95c96174 1876 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1877 backlog = somaxconn;
1da177e4
LT
1878
1879 err = security_socket_listen(sock, backlog);
6cb153ca 1880 if (!err)
1ded5e5a 1881 err = READ_ONCE(sock->ops)->listen(sock, backlog);
1da177e4 1882
6cb153ca 1883 fput_light(sock->file, fput_needed);
1da177e4
LT
1884 }
1885 return err;
1886}
1887
25e290ee
DB
1888SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1889{
1890 return __sys_listen(fd, backlog);
1891}
1892
d32f89da 1893struct file *do_accept(struct file *file, unsigned file_flags,
de2ea4b6 1894 struct sockaddr __user *upeer_sockaddr,
d32f89da 1895 int __user *upeer_addrlen, int flags)
1da177e4
LT
1896{
1897 struct socket *sock, *newsock;
39d8c1b6 1898 struct file *newfile;
d32f89da 1899 int err, len;
230b1839 1900 struct sockaddr_storage address;
1ded5e5a 1901 const struct proto_ops *ops;
1da177e4 1902
dba4a925 1903 sock = sock_from_file(file);
d32f89da
PB
1904 if (!sock)
1905 return ERR_PTR(-ENOTSOCK);
1da177e4 1906
c6d409cf
ED
1907 newsock = sock_alloc();
1908 if (!newsock)
d32f89da 1909 return ERR_PTR(-ENFILE);
1ded5e5a 1910 ops = READ_ONCE(sock->ops);
1da177e4
LT
1911
1912 newsock->type = sock->type;
1ded5e5a 1913 newsock->ops = ops;
1da177e4 1914
1da177e4
LT
1915 /*
1916 * We don't need try_module_get here, as the listening socket (sock)
1917 * has the protocol module (sock->ops->owner) held.
1918 */
1ded5e5a 1919 __module_get(ops->owner);
1da177e4 1920
aab174f0 1921 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
d32f89da
PB
1922 if (IS_ERR(newfile))
1923 return newfile;
39d8c1b6 1924
a79af59e
FF
1925 err = security_socket_accept(sock, newsock);
1926 if (err)
39d8c1b6 1927 goto out_fd;
a79af59e 1928
1ded5e5a 1929 err = ops->accept(sock, newsock, sock->file->f_flags | file_flags,
de2ea4b6 1930 false);
1da177e4 1931 if (err < 0)
39d8c1b6 1932 goto out_fd;
1da177e4
LT
1933
1934 if (upeer_sockaddr) {
1ded5e5a 1935 len = ops->getname(newsock, (struct sockaddr *)&address, 2);
9b2c45d4 1936 if (len < 0) {
1da177e4 1937 err = -ECONNABORTED;
39d8c1b6 1938 goto out_fd;
1da177e4 1939 }
43db362d 1940 err = move_addr_to_user(&address,
230b1839 1941 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1942 if (err < 0)
39d8c1b6 1943 goto out_fd;
1da177e4
LT
1944 }
1945
1946 /* File flags are not inherited via accept() unlike another OSes. */
d32f89da 1947 return newfile;
39d8c1b6 1948out_fd:
9606a216 1949 fput(newfile);
d32f89da
PB
1950 return ERR_PTR(err);
1951}
1952
c0424532
YD
1953static int __sys_accept4_file(struct file *file, struct sockaddr __user *upeer_sockaddr,
1954 int __user *upeer_addrlen, int flags)
d32f89da
PB
1955{
1956 struct file *newfile;
1957 int newfd;
1958
1959 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1960 return -EINVAL;
1961
1962 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1963 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
de2ea4b6 1964
c0424532 1965 newfd = get_unused_fd_flags(flags);
d32f89da
PB
1966 if (unlikely(newfd < 0))
1967 return newfd;
1968
c0424532 1969 newfile = do_accept(file, 0, upeer_sockaddr, upeer_addrlen,
d32f89da
PB
1970 flags);
1971 if (IS_ERR(newfile)) {
1972 put_unused_fd(newfd);
1973 return PTR_ERR(newfile);
1974 }
1975 fd_install(newfd, newfile);
1976 return newfd;
de2ea4b6
JA
1977}
1978
1979/*
1980 * For accept, we attempt to create a new socket, set up the link
1981 * with the client, wake up the client, then return the new
1982 * connected fd. We collect the address of the connector in kernel
1983 * space and move it to user at the very end. This is unclean because
1984 * we open the socket then return an error.
1985 *
1986 * 1003.1g adds the ability to recvmsg() to query connection pending
1987 * status to recvmsg. We need to add that support in a way thats
1988 * clean when we restructure accept also.
1989 */
1990
1991int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1992 int __user *upeer_addrlen, int flags)
1993{
1994 int ret = -EBADF;
1995 struct fd f;
1996
1997 f = fdget(fd);
1998 if (f.file) {
c0424532
YD
1999 ret = __sys_accept4_file(f.file, upeer_sockaddr,
2000 upeer_addrlen, flags);
6b07edeb 2001 fdput(f);
de2ea4b6
JA
2002 }
2003
2004 return ret;
1da177e4
LT
2005}
2006
4541e805
DB
2007SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
2008 int __user *, upeer_addrlen, int, flags)
2009{
2010 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
2011}
2012
20f37034
HC
2013SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
2014 int __user *, upeer_addrlen)
aaca0bdc 2015{
4541e805 2016 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
2017}
2018
1da177e4
LT
2019/*
2020 * Attempt to connect to a socket with the server address. The address
2021 * is in user space so we verify it is OK and move it to kernel space.
2022 *
2023 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
2024 * break bindings
2025 *
2026 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
2027 * other SEQPACKET protocols that take time to connect() as it doesn't
2028 * include the -EINPROGRESS status for such sockets.
2029 */
2030
f499a021 2031int __sys_connect_file(struct file *file, struct sockaddr_storage *address,
bd3ded31 2032 int addrlen, int file_flags)
1da177e4
LT
2033{
2034 struct socket *sock;
bd3ded31 2035 int err;
1da177e4 2036
dba4a925
FR
2037 sock = sock_from_file(file);
2038 if (!sock) {
2039 err = -ENOTSOCK;
1da177e4 2040 goto out;
dba4a925 2041 }
1da177e4 2042
89bddce5 2043 err =
f499a021 2044 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4 2045 if (err)
bd3ded31 2046 goto out;
1da177e4 2047
1ded5e5a
ED
2048 err = READ_ONCE(sock->ops)->connect(sock, (struct sockaddr *)address,
2049 addrlen, sock->file->f_flags | file_flags);
1da177e4
LT
2050out:
2051 return err;
2052}
2053
bd3ded31
JA
2054int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
2055{
2056 int ret = -EBADF;
2057 struct fd f;
2058
2059 f = fdget(fd);
2060 if (f.file) {
f499a021
JA
2061 struct sockaddr_storage address;
2062
2063 ret = move_addr_to_kernel(uservaddr, addrlen, &address);
2064 if (!ret)
2065 ret = __sys_connect_file(f.file, &address, addrlen, 0);
6b07edeb 2066 fdput(f);
bd3ded31
JA
2067 }
2068
2069 return ret;
2070}
2071
1387c2c2
DB
2072SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
2073 int, addrlen)
2074{
2075 return __sys_connect(fd, uservaddr, addrlen);
2076}
2077
1da177e4
LT
2078/*
2079 * Get the local address ('name') of a socket object. Move the obtained
2080 * name to user space.
2081 */
2082
8882a107
DB
2083int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
2084 int __user *usockaddr_len)
1da177e4
LT
2085{
2086 struct socket *sock;
230b1839 2087 struct sockaddr_storage address;
9b2c45d4 2088 int err, fput_needed;
89bddce5 2089
6cb153ca 2090 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
2091 if (!sock)
2092 goto out;
2093
2094 err = security_socket_getsockname(sock);
2095 if (err)
2096 goto out_put;
2097
1ded5e5a 2098 err = READ_ONCE(sock->ops)->getname(sock, (struct sockaddr *)&address, 0);
9b2c45d4 2099 if (err < 0)
1da177e4 2100 goto out_put;
e44ef1d4 2101 /* "err" is actually length in this case */
9b2c45d4 2102 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
2103
2104out_put:
6cb153ca 2105 fput_light(sock->file, fput_needed);
1da177e4
LT
2106out:
2107 return err;
2108}
2109
8882a107
DB
2110SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
2111 int __user *, usockaddr_len)
2112{
2113 return __sys_getsockname(fd, usockaddr, usockaddr_len);
2114}
2115
1da177e4
LT
2116/*
2117 * Get the remote address ('name') of a socket object. Move the obtained
2118 * name to user space.
2119 */
2120
b21c8f83
DB
2121int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
2122 int __user *usockaddr_len)
1da177e4
LT
2123{
2124 struct socket *sock;
230b1839 2125 struct sockaddr_storage address;
9b2c45d4 2126 int err, fput_needed;
1da177e4 2127
89bddce5
SH
2128 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2129 if (sock != NULL) {
1ded5e5a
ED
2130 const struct proto_ops *ops = READ_ONCE(sock->ops);
2131
1da177e4
LT
2132 err = security_socket_getpeername(sock);
2133 if (err) {
6cb153ca 2134 fput_light(sock->file, fput_needed);
1da177e4
LT
2135 return err;
2136 }
2137
1ded5e5a 2138 err = ops->getname(sock, (struct sockaddr *)&address, 1);
9b2c45d4
DV
2139 if (err >= 0)
2140 /* "err" is actually length in this case */
2141 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 2142 usockaddr_len);
6cb153ca 2143 fput_light(sock->file, fput_needed);
1da177e4
LT
2144 }
2145 return err;
2146}
2147
b21c8f83
DB
2148SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
2149 int __user *, usockaddr_len)
2150{
2151 return __sys_getpeername(fd, usockaddr, usockaddr_len);
2152}
2153
1da177e4
LT
2154/*
2155 * Send a datagram to a given address. We move the address into kernel
2156 * space and check the user space data area is readable before invoking
2157 * the protocol.
2158 */
211b634b
DB
2159int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
2160 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
2161{
2162 struct socket *sock;
230b1839 2163 struct sockaddr_storage address;
1da177e4
LT
2164 int err;
2165 struct msghdr msg;
6cb153ca 2166 int fput_needed;
6cb153ca 2167
9fd7874c 2168 err = import_ubuf(ITER_SOURCE, buff, len, &msg.msg_iter);
602bd0e9
AV
2169 if (unlikely(err))
2170 return err;
de0fa95c
PE
2171 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2172 if (!sock)
4387ff75 2173 goto out;
6cb153ca 2174
89bddce5 2175 msg.msg_name = NULL;
89bddce5
SH
2176 msg.msg_control = NULL;
2177 msg.msg_controllen = 0;
2178 msg.msg_namelen = 0;
7c701d92 2179 msg.msg_ubuf = NULL;
6cb153ca 2180 if (addr) {
43db362d 2181 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
2182 if (err < 0)
2183 goto out_put;
230b1839 2184 msg.msg_name = (struct sockaddr *)&address;
89bddce5 2185 msg.msg_namelen = addr_len;
1da177e4 2186 }
b841b901 2187 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
1da177e4
LT
2188 if (sock->file->f_flags & O_NONBLOCK)
2189 flags |= MSG_DONTWAIT;
2190 msg.msg_flags = flags;
86a7e0b6 2191 err = __sock_sendmsg(sock, &msg);
1da177e4 2192
89bddce5 2193out_put:
de0fa95c 2194 fput_light(sock->file, fput_needed);
4387ff75 2195out:
1da177e4
LT
2196 return err;
2197}
2198
211b634b
DB
2199SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
2200 unsigned int, flags, struct sockaddr __user *, addr,
2201 int, addr_len)
2202{
2203 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
2204}
2205
1da177e4 2206/*
89bddce5 2207 * Send a datagram down a socket.
1da177e4
LT
2208 */
2209
3e0fa65f 2210SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 2211 unsigned int, flags)
1da177e4 2212{
211b634b 2213 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
2214}
2215
2216/*
89bddce5 2217 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
2218 * sender. We verify the buffers are writable and if needed move the
2219 * sender address from kernel to user space.
2220 */
7a09e1eb
DB
2221int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
2222 struct sockaddr __user *addr, int __user *addr_len)
1da177e4 2223{
1228b34c
ED
2224 struct sockaddr_storage address;
2225 struct msghdr msg = {
2226 /* Save some cycles and don't copy the address if not needed */
2227 .msg_name = addr ? (struct sockaddr *)&address : NULL,
2228 };
1da177e4 2229 struct socket *sock;
89bddce5 2230 int err, err2;
6cb153ca
BL
2231 int fput_needed;
2232
9fd7874c 2233 err = import_ubuf(ITER_DEST, ubuf, size, &msg.msg_iter);
602bd0e9
AV
2234 if (unlikely(err))
2235 return err;
de0fa95c 2236 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 2237 if (!sock)
de0fa95c 2238 goto out;
1da177e4 2239
1da177e4
LT
2240 if (sock->file->f_flags & O_NONBLOCK)
2241 flags |= MSG_DONTWAIT;
2da62906 2242 err = sock_recvmsg(sock, &msg, flags);
1da177e4 2243
89bddce5 2244 if (err >= 0 && addr != NULL) {
43db362d 2245 err2 = move_addr_to_user(&address,
230b1839 2246 msg.msg_namelen, addr, addr_len);
89bddce5
SH
2247 if (err2 < 0)
2248 err = err2;
1da177e4 2249 }
de0fa95c
PE
2250
2251 fput_light(sock->file, fput_needed);
4387ff75 2252out:
1da177e4
LT
2253 return err;
2254}
2255
7a09e1eb
DB
2256SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2257 unsigned int, flags, struct sockaddr __user *, addr,
2258 int __user *, addr_len)
2259{
2260 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2261}
2262
1da177e4 2263/*
89bddce5 2264 * Receive a datagram from a socket.
1da177e4
LT
2265 */
2266
b7c0ddf5
JG
2267SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2268 unsigned int, flags)
1da177e4 2269{
7a09e1eb 2270 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
2271}
2272
83f0c10b
FW
2273static bool sock_use_custom_sol_socket(const struct socket *sock)
2274{
a5ef058d 2275 return test_bit(SOCK_CUSTOM_SOCKOPT, &sock->flags);
83f0c10b
FW
2276}
2277
1406245c
BL
2278int do_sock_setsockopt(struct socket *sock, bool compat, int level,
2279 int optname, sockptr_t optval, int optlen)
1da177e4 2280{
1ded5e5a 2281 const struct proto_ops *ops;
0d01da6a 2282 char *kernel_optval = NULL;
1406245c 2283 int err;
1da177e4
LT
2284
2285 if (optlen < 0)
2286 return -EINVAL;
89bddce5 2287
4a367299
CH
2288 err = security_socket_setsockopt(sock, level, optname);
2289 if (err)
2290 goto out_put;
0d01da6a 2291
1406245c 2292 if (!compat)
55db9c0e 2293 err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname,
3f31e0d1 2294 optval, &optlen,
55db9c0e 2295 &kernel_optval);
4a367299
CH
2296 if (err < 0)
2297 goto out_put;
2298 if (err > 0) {
2299 err = 0;
2300 goto out_put;
2301 }
0d01da6a 2302
a7b75c5a
CH
2303 if (kernel_optval)
2304 optval = KERNEL_SOCKPTR(kernel_optval);
1ded5e5a 2305 ops = READ_ONCE(sock->ops);
4a367299 2306 if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock))
a7b75c5a 2307 err = sock_setsockopt(sock, level, optname, optval, optlen);
1ded5e5a 2308 else if (unlikely(!ops->setsockopt))
a44d9e72 2309 err = -EOPNOTSUPP;
4a367299 2310 else
1ded5e5a 2311 err = ops->setsockopt(sock, level, optname, optval,
89bddce5 2312 optlen);
a7b75c5a 2313 kfree(kernel_optval);
4a367299 2314out_put:
1406245c
BL
2315 return err;
2316}
2317EXPORT_SYMBOL(do_sock_setsockopt);
2318
2319/* Set a socket option. Because we don't know the option lengths we have
2320 * to pass the user mode parameter for the protocols to sort out.
2321 */
2322int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
2323 int optlen)
2324{
2325 sockptr_t optval = USER_SOCKPTR(user_optval);
2326 bool compat = in_compat_syscall();
2327 int err, fput_needed;
2328 struct socket *sock;
2329
2330 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2331 if (!sock)
2332 return err;
2333
2334 err = do_sock_setsockopt(sock, compat, level, optname, optval, optlen);
2335
4a367299 2336 fput_light(sock->file, fput_needed);
1da177e4
LT
2337 return err;
2338}
2339
cc36dca0
DB
2340SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2341 char __user *, optval, int, optlen)
2342{
2343 return __sys_setsockopt(fd, level, optname, optval, optlen);
2344}
2345
9cacf81f
SF
2346INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
2347 int optname));
2348
0b05b0cd
BL
2349int do_sock_getsockopt(struct socket *sock, bool compat, int level,
2350 int optname, sockptr_t optval, sockptr_t optlen)
1da177e4 2351{
ad4bf5f2 2352 int max_optlen __maybe_unused;
1ded5e5a 2353 const struct proto_ops *ops;
0b05b0cd 2354 int err;
d8a9b38f
CH
2355
2356 err = security_socket_getsockopt(sock, level, optname);
2357 if (err)
0b05b0cd 2358 return err;
1da177e4 2359
0b05b0cd 2360 if (!compat)
55db9c0e 2361 max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
0d01da6a 2362
1ded5e5a 2363 ops = READ_ONCE(sock->ops);
0b05b0cd
BL
2364 if (level == SOL_SOCKET) {
2365 err = sk_getsockopt(sock->sk, level, optname, optval, optlen);
2366 } else if (unlikely(!ops->getsockopt)) {
a44d9e72 2367 err = -EOPNOTSUPP;
0b05b0cd
BL
2368 } else {
2369 if (WARN_ONCE(optval.is_kernel || optlen.is_kernel,
2370 "Invalid argument type"))
2371 return -EOPNOTSUPP;
2372
2373 err = ops->getsockopt(sock, level, optname, optval.user,
2374 optlen.user);
2375 }
0d01da6a 2376
0b05b0cd 2377 if (!compat)
55db9c0e
CH
2378 err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
2379 optval, optlen, max_optlen,
2380 err);
0b05b0cd
BL
2381
2382 return err;
2383}
2384EXPORT_SYMBOL(do_sock_getsockopt);
2385
1da177e4
LT
2386/*
2387 * Get a socket option. Because we don't know the option lengths we have
2388 * to pass a user mode parameter for the protocols to sort out.
2389 */
55db9c0e
CH
2390int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
2391 int __user *optlen)
1da177e4 2392{
6cb153ca 2393 int err, fput_needed;
1da177e4 2394 struct socket *sock;
0b05b0cd 2395 bool compat;
1da177e4 2396
89bddce5 2397 sock = sockfd_lookup_light(fd, &err, &fput_needed);
d8a9b38f
CH
2398 if (!sock)
2399 return err;
2400
0b05b0cd
BL
2401 compat = in_compat_syscall();
2402 err = do_sock_getsockopt(sock, compat, level, optname,
2403 USER_SOCKPTR(optval), USER_SOCKPTR(optlen));
1da177e4 2404
d8a9b38f 2405 fput_light(sock->file, fput_needed);
1da177e4
LT
2406 return err;
2407}
2408
13a2d70e
DB
2409SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2410 char __user *, optval, int __user *, optlen)
2411{
2412 return __sys_getsockopt(fd, level, optname, optval, optlen);
2413}
2414
1da177e4
LT
2415/*
2416 * Shutdown a socket.
2417 */
2418
b713c195
JA
2419int __sys_shutdown_sock(struct socket *sock, int how)
2420{
2421 int err;
2422
2423 err = security_socket_shutdown(sock, how);
2424 if (!err)
1ded5e5a 2425 err = READ_ONCE(sock->ops)->shutdown(sock, how);
b713c195
JA
2426
2427 return err;
2428}
2429
005a1aea 2430int __sys_shutdown(int fd, int how)
1da177e4 2431{
6cb153ca 2432 int err, fput_needed;
1da177e4
LT
2433 struct socket *sock;
2434
89bddce5
SH
2435 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2436 if (sock != NULL) {
b713c195 2437 err = __sys_shutdown_sock(sock, how);
6cb153ca 2438 fput_light(sock->file, fput_needed);
1da177e4
LT
2439 }
2440 return err;
2441}
2442
005a1aea
DB
2443SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2444{
2445 return __sys_shutdown(fd, how);
2446}
2447
89bddce5 2448/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2449 * fields which are the same type (int / unsigned) on our platforms.
2450 */
2451#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2452#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2453#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2454
c71d8ebe
TH
2455struct used_address {
2456 struct sockaddr_storage name;
2457 unsigned int name_len;
2458};
2459
7fa875b8
DY
2460int __copy_msghdr(struct msghdr *kmsg,
2461 struct user_msghdr *msg,
2462 struct sockaddr __user **save_addr)
1661bf36 2463{
08adb7da
AV
2464 ssize_t err;
2465
1f466e1f 2466 kmsg->msg_control_is_user = true;
1228b34c 2467 kmsg->msg_get_inq = 0;
7fa875b8
DY
2468 kmsg->msg_control_user = msg->msg_control;
2469 kmsg->msg_controllen = msg->msg_controllen;
2470 kmsg->msg_flags = msg->msg_flags;
ffb07550 2471
7fa875b8
DY
2472 kmsg->msg_namelen = msg->msg_namelen;
2473 if (!msg->msg_name)
6a2a2b3a
AS
2474 kmsg->msg_namelen = 0;
2475
dbb490b9
ML
2476 if (kmsg->msg_namelen < 0)
2477 return -EINVAL;
2478
1661bf36 2479 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2480 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2481
2482 if (save_addr)
7fa875b8 2483 *save_addr = msg->msg_name;
08adb7da 2484
7fa875b8 2485 if (msg->msg_name && kmsg->msg_namelen) {
08adb7da 2486 if (!save_addr) {
7fa875b8 2487 err = move_addr_to_kernel(msg->msg_name,
864d9664 2488 kmsg->msg_namelen,
08adb7da
AV
2489 kmsg->msg_name);
2490 if (err < 0)
2491 return err;
2492 }
2493 } else {
2494 kmsg->msg_name = NULL;
2495 kmsg->msg_namelen = 0;
2496 }
2497
7fa875b8 2498 if (msg->msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2499 return -EMSGSIZE;
2500
0345f931 2501 kmsg->msg_iocb = NULL;
7c701d92 2502 kmsg->msg_ubuf = NULL;
0a384abf
JA
2503 return 0;
2504}
2505
2506static int copy_msghdr_from_user(struct msghdr *kmsg,
2507 struct user_msghdr __user *umsg,
2508 struct sockaddr __user **save_addr,
2509 struct iovec **iov)
2510{
2511 struct user_msghdr msg;
2512 ssize_t err;
2513
7fa875b8
DY
2514 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
2515 return -EFAULT;
2516
2517 err = __copy_msghdr(kmsg, &msg, save_addr);
0a384abf
JA
2518 if (err)
2519 return err;
0345f931 2520
de4eda9d 2521 err = import_iovec(save_addr ? ITER_DEST : ITER_SOURCE,
ffb07550 2522 msg.msg_iov, msg.msg_iovlen,
da184284 2523 UIO_FASTIOV, iov, &kmsg->msg_iter);
87e5e6da 2524 return err < 0 ? err : 0;
1661bf36
DC
2525}
2526
4257c8ca
JA
2527static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
2528 unsigned int flags, struct used_address *used_address,
2529 unsigned int allowed_msghdr_flags)
1da177e4 2530{
b9d717a7 2531 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2532 __aligned(sizeof(__kernel_size_t));
89bddce5 2533 /* 20 is size of ipv6_pktinfo */
1da177e4 2534 unsigned char *ctl_buf = ctl;
d8725c86 2535 int ctl_len;
08adb7da 2536 ssize_t err;
89bddce5 2537
1da177e4
LT
2538 err = -ENOBUFS;
2539
228e548e 2540 if (msg_sys->msg_controllen > INT_MAX)
4257c8ca 2541 goto out;
28a94d8f 2542 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2543 ctl_len = msg_sys->msg_controllen;
1da177e4 2544 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2545 err =
228e548e 2546 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2547 sizeof(ctl));
1da177e4 2548 if (err)
4257c8ca 2549 goto out;
228e548e
AB
2550 ctl_buf = msg_sys->msg_control;
2551 ctl_len = msg_sys->msg_controllen;
1da177e4 2552 } else if (ctl_len) {
ac4340fc
DM
2553 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2554 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2555 if (ctl_len > sizeof(ctl)) {
1da177e4 2556 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2557 if (ctl_buf == NULL)
4257c8ca 2558 goto out;
1da177e4
LT
2559 }
2560 err = -EFAULT;
1f466e1f 2561 if (copy_from_user(ctl_buf, msg_sys->msg_control_user, ctl_len))
1da177e4 2562 goto out_freectl;
228e548e 2563 msg_sys->msg_control = ctl_buf;
1f466e1f 2564 msg_sys->msg_control_is_user = false;
1da177e4 2565 }
b841b901 2566 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
228e548e 2567 msg_sys->msg_flags = flags;
1da177e4
LT
2568
2569 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2570 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2571 /*
2572 * If this is sendmmsg() and current destination address is same as
2573 * previously succeeded address, omit asking LSM's decision.
2574 * used_address->name_len is initialized to UINT_MAX so that the first
2575 * destination address never matches.
2576 */
bc909d9d
MD
2577 if (used_address && msg_sys->msg_name &&
2578 used_address->name_len == msg_sys->msg_namelen &&
2579 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2580 used_address->name_len)) {
d8725c86 2581 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2582 goto out_freectl;
2583 }
86a7e0b6 2584 err = __sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2585 /*
2586 * If this is sendmmsg() and sending to current destination address was
2587 * successful, remember it.
2588 */
2589 if (used_address && err >= 0) {
2590 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2591 if (msg_sys->msg_name)
2592 memcpy(&used_address->name, msg_sys->msg_name,
2593 used_address->name_len);
c71d8ebe 2594 }
1da177e4
LT
2595
2596out_freectl:
89bddce5 2597 if (ctl_buf != ctl)
1da177e4 2598 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
4257c8ca
JA
2599out:
2600 return err;
2601}
2602
03b1230c
JA
2603int sendmsg_copy_msghdr(struct msghdr *msg,
2604 struct user_msghdr __user *umsg, unsigned flags,
2605 struct iovec **iov)
4257c8ca
JA
2606{
2607 int err;
2608
2609 if (flags & MSG_CMSG_COMPAT) {
2610 struct compat_msghdr __user *msg_compat;
2611
2612 msg_compat = (struct compat_msghdr __user *) umsg;
2613 err = get_compat_msghdr(msg, msg_compat, NULL, iov);
2614 } else {
2615 err = copy_msghdr_from_user(msg, umsg, NULL, iov);
2616 }
2617 if (err < 0)
2618 return err;
2619
2620 return 0;
2621}
2622
2623static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
2624 struct msghdr *msg_sys, unsigned int flags,
2625 struct used_address *used_address,
2626 unsigned int allowed_msghdr_flags)
2627{
2628 struct sockaddr_storage address;
2629 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2630 ssize_t err;
2631
2632 msg_sys->msg_name = &address;
2633
2634 err = sendmsg_copy_msghdr(msg_sys, msg, flags, &iov);
2635 if (err < 0)
2636 return err;
2637
2638 err = ____sys_sendmsg(sock, msg_sys, flags, used_address,
2639 allowed_msghdr_flags);
da184284 2640 kfree(iov);
228e548e
AB
2641 return err;
2642}
2643
2644/*
2645 * BSD sendmsg interface
2646 */
03b1230c 2647long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
0fa03c62
JA
2648 unsigned int flags)
2649{
03b1230c 2650 return ____sys_sendmsg(sock, msg, flags, NULL, 0);
0fa03c62 2651}
228e548e 2652
e1834a32
DB
2653long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2654 bool forbid_cmsg_compat)
228e548e
AB
2655{
2656 int fput_needed, err;
2657 struct msghdr msg_sys;
1be374a0
AL
2658 struct socket *sock;
2659
e1834a32
DB
2660 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2661 return -EINVAL;
2662
1be374a0 2663 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2664 if (!sock)
2665 goto out;
2666
28a94d8f 2667 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2668
6cb153ca 2669 fput_light(sock->file, fput_needed);
89bddce5 2670out:
1da177e4
LT
2671 return err;
2672}
2673
666547ff 2674SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2675{
e1834a32 2676 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2677}
2678
228e548e
AB
2679/*
2680 * Linux sendmmsg interface
2681 */
2682
2683int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2684 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2685{
2686 int fput_needed, err, datagrams;
2687 struct socket *sock;
2688 struct mmsghdr __user *entry;
2689 struct compat_mmsghdr __user *compat_entry;
2690 struct msghdr msg_sys;
c71d8ebe 2691 struct used_address used_address;
f092276d 2692 unsigned int oflags = flags;
228e548e 2693
e1834a32
DB
2694 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2695 return -EINVAL;
2696
98382f41
AB
2697 if (vlen > UIO_MAXIOV)
2698 vlen = UIO_MAXIOV;
228e548e
AB
2699
2700 datagrams = 0;
2701
2702 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2703 if (!sock)
2704 return err;
2705
c71d8ebe 2706 used_address.name_len = UINT_MAX;
228e548e
AB
2707 entry = mmsg;
2708 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2709 err = 0;
f092276d 2710 flags |= MSG_BATCH;
228e548e
AB
2711
2712 while (datagrams < vlen) {
f092276d
TH
2713 if (datagrams == vlen - 1)
2714 flags = oflags;
2715
228e548e 2716 if (MSG_CMSG_COMPAT & flags) {
666547ff 2717 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2718 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2719 if (err < 0)
2720 break;
2721 err = __put_user(err, &compat_entry->msg_len);
2722 ++compat_entry;
2723 } else {
a7526eb5 2724 err = ___sys_sendmsg(sock,
666547ff 2725 (struct user_msghdr __user *)entry,
28a94d8f 2726 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2727 if (err < 0)
2728 break;
2729 err = put_user(err, &entry->msg_len);
2730 ++entry;
2731 }
2732
2733 if (err)
2734 break;
2735 ++datagrams;
3023898b
SHY
2736 if (msg_data_left(&msg_sys))
2737 break;
a78cb84c 2738 cond_resched();
228e548e
AB
2739 }
2740
228e548e
AB
2741 fput_light(sock->file, fput_needed);
2742
728ffb86
AB
2743 /* We only return an error if no datagrams were able to be sent */
2744 if (datagrams != 0)
228e548e
AB
2745 return datagrams;
2746
228e548e
AB
2747 return err;
2748}
2749
2750SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2751 unsigned int, vlen, unsigned int, flags)
2752{
e1834a32 2753 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2754}
2755
03b1230c
JA
2756int recvmsg_copy_msghdr(struct msghdr *msg,
2757 struct user_msghdr __user *umsg, unsigned flags,
2758 struct sockaddr __user **uaddr,
2759 struct iovec **iov)
1da177e4 2760{
08adb7da 2761 ssize_t err;
1da177e4 2762
4257c8ca
JA
2763 if (MSG_CMSG_COMPAT & flags) {
2764 struct compat_msghdr __user *msg_compat;
1da177e4 2765
4257c8ca
JA
2766 msg_compat = (struct compat_msghdr __user *) umsg;
2767 err = get_compat_msghdr(msg, msg_compat, uaddr, iov);
2768 } else {
2769 err = copy_msghdr_from_user(msg, umsg, uaddr, iov);
2770 }
1da177e4 2771 if (err < 0)
da184284 2772 return err;
1da177e4 2773
4257c8ca
JA
2774 return 0;
2775}
2776
2777static int ____sys_recvmsg(struct socket *sock, struct msghdr *msg_sys,
2778 struct user_msghdr __user *msg,
2779 struct sockaddr __user *uaddr,
2780 unsigned int flags, int nosec)
2781{
2782 struct compat_msghdr __user *msg_compat =
2783 (struct compat_msghdr __user *) msg;
2784 int __user *uaddr_len = COMPAT_NAMELEN(msg);
2785 struct sockaddr_storage addr;
2786 unsigned long cmsg_ptr;
2787 int len;
2788 ssize_t err;
2789
2790 msg_sys->msg_name = &addr;
a2e27255
ACM
2791 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2792 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2793
f3d33426
HFS
2794 /* We assume all kernel code knows the size of sockaddr_storage */
2795 msg_sys->msg_namelen = 0;
2796
1da177e4
LT
2797 if (sock->file->f_flags & O_NONBLOCK)
2798 flags |= MSG_DONTWAIT;
1af66221
ED
2799
2800 if (unlikely(nosec))
2801 err = sock_recvmsg_nosec(sock, msg_sys, flags);
2802 else
2803 err = sock_recvmsg(sock, msg_sys, flags);
2804
1da177e4 2805 if (err < 0)
4257c8ca 2806 goto out;
1da177e4
LT
2807 len = err;
2808
2809 if (uaddr != NULL) {
43db362d 2810 err = move_addr_to_user(&addr,
a2e27255 2811 msg_sys->msg_namelen, uaddr,
89bddce5 2812 uaddr_len);
1da177e4 2813 if (err < 0)
4257c8ca 2814 goto out;
1da177e4 2815 }
a2e27255 2816 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2817 COMPAT_FLAGS(msg));
1da177e4 2818 if (err)
4257c8ca 2819 goto out;
1da177e4 2820 if (MSG_CMSG_COMPAT & flags)
a2e27255 2821 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2822 &msg_compat->msg_controllen);
2823 else
a2e27255 2824 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2825 &msg->msg_controllen);
2826 if (err)
4257c8ca 2827 goto out;
1da177e4 2828 err = len;
4257c8ca
JA
2829out:
2830 return err;
2831}
2832
2833static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
2834 struct msghdr *msg_sys, unsigned int flags, int nosec)
2835{
2836 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2837 /* user mode address pointers */
2838 struct sockaddr __user *uaddr;
2839 ssize_t err;
2840
2841 err = recvmsg_copy_msghdr(msg_sys, msg, flags, &uaddr, &iov);
2842 if (err < 0)
2843 return err;
1da177e4 2844
4257c8ca 2845 err = ____sys_recvmsg(sock, msg_sys, msg, uaddr, flags, nosec);
da184284 2846 kfree(iov);
a2e27255
ACM
2847 return err;
2848}
2849
2850/*
2851 * BSD recvmsg interface
2852 */
2853
03b1230c
JA
2854long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
2855 struct user_msghdr __user *umsg,
2856 struct sockaddr __user *uaddr, unsigned int flags)
aa1fa28f 2857{
03b1230c 2858 return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0);
aa1fa28f
JA
2859}
2860
e1834a32
DB
2861long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2862 bool forbid_cmsg_compat)
a2e27255
ACM
2863{
2864 int fput_needed, err;
2865 struct msghdr msg_sys;
1be374a0
AL
2866 struct socket *sock;
2867
e1834a32
DB
2868 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2869 return -EINVAL;
2870
1be374a0 2871 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2872 if (!sock)
2873 goto out;
2874
a7526eb5 2875 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2876
6cb153ca 2877 fput_light(sock->file, fput_needed);
1da177e4
LT
2878out:
2879 return err;
2880}
2881
666547ff 2882SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2883 unsigned int, flags)
2884{
e1834a32 2885 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2886}
2887
a2e27255
ACM
2888/*
2889 * Linux recvmmsg interface
2890 */
2891
e11d4284
AB
2892static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2893 unsigned int vlen, unsigned int flags,
2894 struct timespec64 *timeout)
a2e27255
ACM
2895{
2896 int fput_needed, err, datagrams;
2897 struct socket *sock;
2898 struct mmsghdr __user *entry;
d7256d0e 2899 struct compat_mmsghdr __user *compat_entry;
a2e27255 2900 struct msghdr msg_sys;
766b9f92
DD
2901 struct timespec64 end_time;
2902 struct timespec64 timeout64;
a2e27255
ACM
2903
2904 if (timeout &&
2905 poll_select_set_timeout(&end_time, timeout->tv_sec,
2906 timeout->tv_nsec))
2907 return -EINVAL;
2908
2909 datagrams = 0;
2910
2911 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2912 if (!sock)
2913 return err;
2914
7797dc41
SHY
2915 if (likely(!(flags & MSG_ERRQUEUE))) {
2916 err = sock_error(sock->sk);
2917 if (err) {
2918 datagrams = err;
2919 goto out_put;
2920 }
e623a9e9 2921 }
a2e27255
ACM
2922
2923 entry = mmsg;
d7256d0e 2924 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2925
2926 while (datagrams < vlen) {
2927 /*
2928 * No need to ask LSM for more than the first datagram.
2929 */
d7256d0e 2930 if (MSG_CMSG_COMPAT & flags) {
666547ff 2931 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2932 &msg_sys, flags & ~MSG_WAITFORONE,
2933 datagrams);
d7256d0e
JMG
2934 if (err < 0)
2935 break;
2936 err = __put_user(err, &compat_entry->msg_len);
2937 ++compat_entry;
2938 } else {
a7526eb5 2939 err = ___sys_recvmsg(sock,
666547ff 2940 (struct user_msghdr __user *)entry,
a7526eb5
AL
2941 &msg_sys, flags & ~MSG_WAITFORONE,
2942 datagrams);
d7256d0e
JMG
2943 if (err < 0)
2944 break;
2945 err = put_user(err, &entry->msg_len);
2946 ++entry;
2947 }
2948
a2e27255
ACM
2949 if (err)
2950 break;
a2e27255
ACM
2951 ++datagrams;
2952
71c5c159
BB
2953 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2954 if (flags & MSG_WAITFORONE)
2955 flags |= MSG_DONTWAIT;
2956
a2e27255 2957 if (timeout) {
766b9f92 2958 ktime_get_ts64(&timeout64);
c2e6c856 2959 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2960 if (timeout->tv_sec < 0) {
2961 timeout->tv_sec = timeout->tv_nsec = 0;
2962 break;
2963 }
2964
2965 /* Timeout, return less than vlen datagrams */
2966 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2967 break;
2968 }
2969
2970 /* Out of band data, return right away */
2971 if (msg_sys.msg_flags & MSG_OOB)
2972 break;
a78cb84c 2973 cond_resched();
a2e27255
ACM
2974 }
2975
a2e27255 2976 if (err == 0)
34b88a68
ACM
2977 goto out_put;
2978
2979 if (datagrams == 0) {
2980 datagrams = err;
2981 goto out_put;
2982 }
a2e27255 2983
34b88a68
ACM
2984 /*
2985 * We may return less entries than requested (vlen) if the
2986 * sock is non block and there aren't enough datagrams...
2987 */
2988 if (err != -EAGAIN) {
a2e27255 2989 /*
34b88a68
ACM
2990 * ... or if recvmsg returns an error after we
2991 * received some datagrams, where we record the
2992 * error to return on the next call or if the
2993 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2994 */
e05a5f51 2995 WRITE_ONCE(sock->sk->sk_err, -err);
a2e27255 2996 }
34b88a68
ACM
2997out_put:
2998 fput_light(sock->file, fput_needed);
a2e27255 2999
34b88a68 3000 return datagrams;
a2e27255
ACM
3001}
3002
e11d4284
AB
3003int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
3004 unsigned int vlen, unsigned int flags,
3005 struct __kernel_timespec __user *timeout,
3006 struct old_timespec32 __user *timeout32)
a2e27255
ACM
3007{
3008 int datagrams;
c2e6c856 3009 struct timespec64 timeout_sys;
a2e27255 3010
e11d4284
AB
3011 if (timeout && get_timespec64(&timeout_sys, timeout))
3012 return -EFAULT;
a2e27255 3013
e11d4284 3014 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
3015 return -EFAULT;
3016
e11d4284
AB
3017 if (!timeout && !timeout32)
3018 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
3019
3020 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 3021
e11d4284
AB
3022 if (datagrams <= 0)
3023 return datagrams;
3024
3025 if (timeout && put_timespec64(&timeout_sys, timeout))
3026 datagrams = -EFAULT;
3027
3028 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
3029 datagrams = -EFAULT;
3030
3031 return datagrams;
3032}
3033
1255e269
DB
3034SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
3035 unsigned int, vlen, unsigned int, flags,
c2e6c856 3036 struct __kernel_timespec __user *, timeout)
1255e269 3037{
e11d4284
AB
3038 if (flags & MSG_CMSG_COMPAT)
3039 return -EINVAL;
3040
3041 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
3042}
3043
3044#ifdef CONFIG_COMPAT_32BIT_TIME
3045SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
3046 unsigned int, vlen, unsigned int, flags,
3047 struct old_timespec32 __user *, timeout)
3048{
3049 if (flags & MSG_CMSG_COMPAT)
3050 return -EINVAL;
3051
3052 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 3053}
e11d4284 3054#endif
1255e269 3055
a2e27255 3056#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
3057/* Argument list sizes for sys_socketcall */
3058#define AL(x) ((x) * sizeof(unsigned long))
228e548e 3059static const unsigned char nargs[21] = {
c6d409cf
ED
3060 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
3061 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
3062 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 3063 AL(4), AL(5), AL(4)
89bddce5
SH
3064};
3065
1da177e4
LT
3066#undef AL
3067
3068/*
89bddce5 3069 * System call vectors.
1da177e4
LT
3070 *
3071 * Argument checking cleaned up. Saved 20% in size.
3072 * This function doesn't need to set the kernel lock because
89bddce5 3073 * it is set by the callees.
1da177e4
LT
3074 */
3075
3e0fa65f 3076SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 3077{
2950fa9d 3078 unsigned long a[AUDITSC_ARGS];
89bddce5 3079 unsigned long a0, a1;
1da177e4 3080 int err;
47379052 3081 unsigned int len;
1da177e4 3082
228e548e 3083 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 3084 return -EINVAL;
c8e8cd57 3085 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 3086
47379052
AV
3087 len = nargs[call];
3088 if (len > sizeof(a))
3089 return -EINVAL;
3090
1da177e4 3091 /* copy_from_user should be SMP safe. */
47379052 3092 if (copy_from_user(a, args, len))
1da177e4 3093 return -EFAULT;
3ec3b2fb 3094
2950fa9d
CG
3095 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3096 if (err)
3097 return err;
3ec3b2fb 3098
89bddce5
SH
3099 a0 = a[0];
3100 a1 = a[1];
3101
3102 switch (call) {
3103 case SYS_SOCKET:
9d6a15c3 3104 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
3105 break;
3106 case SYS_BIND:
a87d35d8 3107 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
3108 break;
3109 case SYS_CONNECT:
1387c2c2 3110 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
3111 break;
3112 case SYS_LISTEN:
25e290ee 3113 err = __sys_listen(a0, a1);
89bddce5
SH
3114 break;
3115 case SYS_ACCEPT:
4541e805
DB
3116 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
3117 (int __user *)a[2], 0);
89bddce5
SH
3118 break;
3119 case SYS_GETSOCKNAME:
3120 err =
8882a107
DB
3121 __sys_getsockname(a0, (struct sockaddr __user *)a1,
3122 (int __user *)a[2]);
89bddce5
SH
3123 break;
3124 case SYS_GETPEERNAME:
3125 err =
b21c8f83
DB
3126 __sys_getpeername(a0, (struct sockaddr __user *)a1,
3127 (int __user *)a[2]);
89bddce5
SH
3128 break;
3129 case SYS_SOCKETPAIR:
6debc8d8 3130 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
3131 break;
3132 case SYS_SEND:
f3bf896b
DB
3133 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
3134 NULL, 0);
89bddce5
SH
3135 break;
3136 case SYS_SENDTO:
211b634b
DB
3137 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
3138 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
3139 break;
3140 case SYS_RECV:
d27e9afc
DB
3141 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
3142 NULL, NULL);
89bddce5
SH
3143 break;
3144 case SYS_RECVFROM:
7a09e1eb
DB
3145 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
3146 (struct sockaddr __user *)a[4],
3147 (int __user *)a[5]);
89bddce5
SH
3148 break;
3149 case SYS_SHUTDOWN:
005a1aea 3150 err = __sys_shutdown(a0, a1);
89bddce5
SH
3151 break;
3152 case SYS_SETSOCKOPT:
cc36dca0
DB
3153 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
3154 a[4]);
89bddce5
SH
3155 break;
3156 case SYS_GETSOCKOPT:
3157 err =
13a2d70e
DB
3158 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
3159 (int __user *)a[4]);
89bddce5
SH
3160 break;
3161 case SYS_SENDMSG:
e1834a32
DB
3162 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
3163 a[2], true);
89bddce5 3164 break;
228e548e 3165 case SYS_SENDMMSG:
e1834a32
DB
3166 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
3167 a[3], true);
228e548e 3168 break;
89bddce5 3169 case SYS_RECVMSG:
e1834a32
DB
3170 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
3171 a[2], true);
89bddce5 3172 break;
a2e27255 3173 case SYS_RECVMMSG:
3ca47e95 3174 if (IS_ENABLED(CONFIG_64BIT))
e11d4284
AB
3175 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
3176 a[2], a[3],
3177 (struct __kernel_timespec __user *)a[4],
3178 NULL);
3179 else
3180 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
3181 a[2], a[3], NULL,
3182 (struct old_timespec32 __user *)a[4]);
a2e27255 3183 break;
de11defe 3184 case SYS_ACCEPT4:
4541e805
DB
3185 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
3186 (int __user *)a[2], a[3]);
aaca0bdc 3187 break;
89bddce5
SH
3188 default:
3189 err = -EINVAL;
3190 break;
1da177e4
LT
3191 }
3192 return err;
3193}
3194
89bddce5 3195#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 3196
55737fda
SH
3197/**
3198 * sock_register - add a socket protocol handler
3199 * @ops: description of protocol
3200 *
1da177e4
LT
3201 * This function is called by a protocol handler that wants to
3202 * advertise its address family, and have it linked into the
e793c0f7 3203 * socket interface. The value ops->family corresponds to the
55737fda 3204 * socket system call protocol family.
1da177e4 3205 */
f0fd27d4 3206int sock_register(const struct net_proto_family *ops)
1da177e4
LT
3207{
3208 int err;
3209
3210 if (ops->family >= NPROTO) {
3410f22e 3211 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
3212 return -ENOBUFS;
3213 }
55737fda
SH
3214
3215 spin_lock(&net_family_lock);
190683a9
ED
3216 if (rcu_dereference_protected(net_families[ops->family],
3217 lockdep_is_held(&net_family_lock)))
55737fda
SH
3218 err = -EEXIST;
3219 else {
cf778b00 3220 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
3221 err = 0;
3222 }
55737fda
SH
3223 spin_unlock(&net_family_lock);
3224
fe0bdbde 3225 pr_info("NET: Registered %s protocol family\n", pf_family_names[ops->family]);
1da177e4
LT
3226 return err;
3227}
c6d409cf 3228EXPORT_SYMBOL(sock_register);
1da177e4 3229
55737fda
SH
3230/**
3231 * sock_unregister - remove a protocol handler
3232 * @family: protocol family to remove
3233 *
1da177e4
LT
3234 * This function is called by a protocol handler that wants to
3235 * remove its address family, and have it unlinked from the
55737fda
SH
3236 * new socket creation.
3237 *
3238 * If protocol handler is a module, then it can use module reference
3239 * counts to protect against new references. If protocol handler is not
3240 * a module then it needs to provide its own protection in
3241 * the ops->create routine.
1da177e4 3242 */
f0fd27d4 3243void sock_unregister(int family)
1da177e4 3244{
f0fd27d4 3245 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 3246
55737fda 3247 spin_lock(&net_family_lock);
a9b3cd7f 3248 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
3249 spin_unlock(&net_family_lock);
3250
3251 synchronize_rcu();
3252
fe0bdbde 3253 pr_info("NET: Unregistered %s protocol family\n", pf_family_names[family]);
1da177e4 3254}
c6d409cf 3255EXPORT_SYMBOL(sock_unregister);
1da177e4 3256
bf2ae2e4
XL
3257bool sock_is_registered(int family)
3258{
66b51b0a 3259 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
3260}
3261
77d76ea3 3262static int __init sock_init(void)
1da177e4 3263{
b3e19d92 3264 int err;
2ca794e5
EB
3265 /*
3266 * Initialize the network sysctl infrastructure.
3267 */
3268 err = net_sysctl_init();
3269 if (err)
3270 goto out;
b3e19d92 3271
1da177e4 3272 /*
89bddce5 3273 * Initialize skbuff SLAB cache
1da177e4
LT
3274 */
3275 skb_init();
1da177e4
LT
3276
3277 /*
89bddce5 3278 * Initialize the protocols module.
1da177e4
LT
3279 */
3280
3281 init_inodecache();
b3e19d92
NP
3282
3283 err = register_filesystem(&sock_fs_type);
3284 if (err)
47260ba9 3285 goto out;
1da177e4 3286 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
3287 if (IS_ERR(sock_mnt)) {
3288 err = PTR_ERR(sock_mnt);
3289 goto out_mount;
3290 }
77d76ea3
AK
3291
3292 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
3293 */
3294
3295#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
3296 err = netfilter_init();
3297 if (err)
3298 goto out;
1da177e4 3299#endif
cbeb321a 3300
408eccce 3301 ptp_classifier_init();
c1f19b51 3302
b3e19d92
NP
3303out:
3304 return err;
3305
3306out_mount:
3307 unregister_filesystem(&sock_fs_type);
b3e19d92 3308 goto out;
1da177e4
LT
3309}
3310
77d76ea3
AK
3311core_initcall(sock_init); /* early initcall */
3312
1da177e4
LT
3313#ifdef CONFIG_PROC_FS
3314void socket_seq_show(struct seq_file *seq)
3315{
648845ab
TZ
3316 seq_printf(seq, "sockets: used %d\n",
3317 sock_inuse_get(seq->private));
1da177e4 3318}
89bddce5 3319#endif /* CONFIG_PROC_FS */
1da177e4 3320
29c49648
AB
3321/* Handle the fact that while struct ifreq has the same *layout* on
3322 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3323 * which are handled elsewhere, it still has different *size* due to
3324 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3325 * resulting in struct ifreq being 32 and 40 bytes respectively).
3326 * As a result, if the struct happens to be at the end of a page and
3327 * the next page isn't readable/writable, we get a fault. To prevent
3328 * that, copy back and forth to the full size.
3329 */
3330int get_user_ifreq(struct ifreq *ifr, void __user **ifrdata, void __user *arg)
7a229387 3331{
29c49648
AB
3332 if (in_compat_syscall()) {
3333 struct compat_ifreq *ifr32 = (struct compat_ifreq *)ifr;
7a229387 3334
29c49648
AB
3335 memset(ifr, 0, sizeof(*ifr));
3336 if (copy_from_user(ifr32, arg, sizeof(*ifr32)))
3337 return -EFAULT;
7a229387 3338
29c49648
AB
3339 if (ifrdata)
3340 *ifrdata = compat_ptr(ifr32->ifr_data);
7a229387 3341
29c49648
AB
3342 return 0;
3343 }
7a229387 3344
29c49648 3345 if (copy_from_user(ifr, arg, sizeof(*ifr)))
7a229387
AB
3346 return -EFAULT;
3347
29c49648
AB
3348 if (ifrdata)
3349 *ifrdata = ifr->ifr_data;
3350
7a229387
AB
3351 return 0;
3352}
29c49648 3353EXPORT_SYMBOL(get_user_ifreq);
7a229387 3354
29c49648 3355int put_user_ifreq(struct ifreq *ifr, void __user *arg)
7a229387 3356{
29c49648 3357 size_t size = sizeof(*ifr);
7a229387 3358
29c49648
AB
3359 if (in_compat_syscall())
3360 size = sizeof(struct compat_ifreq);
7a229387 3361
29c49648 3362 if (copy_to_user(arg, ifr, size))
7a229387
AB
3363 return -EFAULT;
3364
3a7da39d 3365 return 0;
7a229387 3366}
29c49648 3367EXPORT_SYMBOL(put_user_ifreq);
7a229387 3368
89bbfc95 3369#ifdef CONFIG_COMPAT
7a50a240
AB
3370static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3371{
7a50a240 3372 compat_uptr_t uptr32;
44c02a2c
AV
3373 struct ifreq ifr;
3374 void __user *saved;
3375 int err;
7a50a240 3376
29c49648 3377 if (get_user_ifreq(&ifr, NULL, uifr32))
7a50a240
AB
3378 return -EFAULT;
3379
3380 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3381 return -EFAULT;
3382
44c02a2c
AV
3383 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3384 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 3385
a554bf96 3386 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL, NULL);
44c02a2c
AV
3387 if (!err) {
3388 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
29c49648 3389 if (put_user_ifreq(&ifr, uifr32))
44c02a2c 3390 err = -EFAULT;
ccbd6a5a 3391 }
44c02a2c 3392 return err;
7a229387
AB
3393}
3394
590d4693
BH
3395/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3396static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3397 struct compat_ifreq __user *u_ifreq32)
7a229387 3398{
44c02a2c 3399 struct ifreq ifreq;
a554bf96 3400 void __user *data;
7a229387 3401
d0efb162
PC
3402 if (!is_socket_ioctl_cmd(cmd))
3403 return -ENOTTY;
a554bf96 3404 if (get_user_ifreq(&ifreq, &data, u_ifreq32))
7a229387 3405 return -EFAULT;
a554bf96 3406 ifreq.ifr_data = data;
7a229387 3407
a554bf96 3408 return dev_ioctl(net, cmd, &ifreq, data, NULL);
a2116ed2
AB
3409}
3410
6b96018b
AB
3411static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3412 unsigned int cmd, unsigned long arg)
3413{
3414 void __user *argp = compat_ptr(arg);
3415 struct sock *sk = sock->sk;
3416 struct net *net = sock_net(sk);
1ded5e5a 3417 const struct proto_ops *ops;
7a229387 3418
6b96018b 3419 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
88fc023f 3420 return sock_ioctl(file, cmd, (unsigned long)argp);
6b96018b
AB
3421
3422 switch (cmd) {
7a50a240
AB
3423 case SIOCWANDEV:
3424 return compat_siocwandev(net, argp);
0768e170
AB
3425 case SIOCGSTAMP_OLD:
3426 case SIOCGSTAMPNS_OLD:
1ded5e5a
ED
3427 ops = READ_ONCE(sock->ops);
3428 if (!ops->gettstamp)
c7cbdbf2 3429 return -ENOIOCTLCMD;
1ded5e5a
ED
3430 return ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
3431 !COMPAT_USE_64BIT_TIME);
c7cbdbf2 3432
dd98d289 3433 case SIOCETHTOOL:
590d4693
BH
3434 case SIOCBONDSLAVEINFOQUERY:
3435 case SIOCBONDINFOQUERY:
a2116ed2 3436 case SIOCSHWTSTAMP:
fd468c74 3437 case SIOCGHWTSTAMP:
590d4693 3438 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3439
3440 case FIOSETOWN:
3441 case SIOCSPGRP:
3442 case FIOGETOWN:
3443 case SIOCGPGRP:
3444 case SIOCBRADDBR:
3445 case SIOCBRDELBR:
3446 case SIOCGIFVLAN:
3447 case SIOCSIFVLAN:
c62cce2c 3448 case SIOCGSKNS:
0768e170
AB
3449 case SIOCGSTAMP_NEW:
3450 case SIOCGSTAMPNS_NEW:
876f0bf9 3451 case SIOCGIFCONF:
fd3a4590
RP
3452 case SIOCSIFBR:
3453 case SIOCGIFBR:
6b96018b
AB
3454 return sock_ioctl(file, cmd, arg);
3455
3456 case SIOCGIFFLAGS:
3457 case SIOCSIFFLAGS:
709566d7
AB
3458 case SIOCGIFMAP:
3459 case SIOCSIFMAP:
6b96018b
AB
3460 case SIOCGIFMETRIC:
3461 case SIOCSIFMETRIC:
3462 case SIOCGIFMTU:
3463 case SIOCSIFMTU:
3464 case SIOCGIFMEM:
3465 case SIOCSIFMEM:
3466 case SIOCGIFHWADDR:
3467 case SIOCSIFHWADDR:
3468 case SIOCADDMULTI:
3469 case SIOCDELMULTI:
3470 case SIOCGIFINDEX:
6b96018b
AB
3471 case SIOCGIFADDR:
3472 case SIOCSIFADDR:
3473 case SIOCSIFHWBROADCAST:
6b96018b 3474 case SIOCDIFADDR:
6b96018b
AB
3475 case SIOCGIFBRDADDR:
3476 case SIOCSIFBRDADDR:
3477 case SIOCGIFDSTADDR:
3478 case SIOCSIFDSTADDR:
3479 case SIOCGIFNETMASK:
3480 case SIOCSIFNETMASK:
3481 case SIOCSIFPFLAGS:
3482 case SIOCGIFPFLAGS:
3483 case SIOCGIFTXQLEN:
3484 case SIOCSIFTXQLEN:
3485 case SIOCBRADDIF:
3486 case SIOCBRDELIF:
c6c9fee3 3487 case SIOCGIFNAME:
9177efd3
AB
3488 case SIOCSIFNAME:
3489 case SIOCGMIIPHY:
3490 case SIOCGMIIREG:
3491 case SIOCSMIIREG:
f92d4fc9
AV
3492 case SIOCBONDENSLAVE:
3493 case SIOCBONDRELEASE:
3494 case SIOCBONDSETHWADDR:
3495 case SIOCBONDCHANGEACTIVE:
6b96018b
AB
3496 case SIOCSARP:
3497 case SIOCGARP:
3498 case SIOCDARP:
c7dc504e 3499 case SIOCOUTQ:
9d7bf41f 3500 case SIOCOUTQNSD:
6b96018b 3501 case SIOCATMARK:
63ff03ab 3502 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3503 }
3504
6b96018b
AB
3505 return -ENOIOCTLCMD;
3506}
7a229387 3507
95c96174 3508static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3509 unsigned long arg)
89bbfc95
SP
3510{
3511 struct socket *sock = file->private_data;
1ded5e5a 3512 const struct proto_ops *ops = READ_ONCE(sock->ops);
89bbfc95 3513 int ret = -ENOIOCTLCMD;
87de87d5
DM
3514 struct sock *sk;
3515 struct net *net;
3516
3517 sk = sock->sk;
3518 net = sock_net(sk);
89bbfc95 3519
1ded5e5a
ED
3520 if (ops->compat_ioctl)
3521 ret = ops->compat_ioctl(sock, cmd, arg);
89bbfc95 3522
87de87d5
DM
3523 if (ret == -ENOIOCTLCMD &&
3524 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3525 ret = compat_wext_handle_ioctl(net, cmd, arg);
3526
6b96018b
AB
3527 if (ret == -ENOIOCTLCMD)
3528 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3529
89bbfc95
SP
3530 return ret;
3531}
3532#endif
3533
8a3c245c
PT
3534/**
3535 * kernel_bind - bind an address to a socket (kernel space)
3536 * @sock: socket
3537 * @addr: address
3538 * @addrlen: length of address
3539 *
3540 * Returns 0 or an error.
3541 */
3542
ac5a488e
SS
3543int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3544{
c889a99a
JR
3545 struct sockaddr_storage address;
3546
3547 memcpy(&address, addr, addrlen);
3548
3549 return READ_ONCE(sock->ops)->bind(sock, (struct sockaddr *)&address,
3550 addrlen);
ac5a488e 3551}
c6d409cf 3552EXPORT_SYMBOL(kernel_bind);
ac5a488e 3553
8a3c245c
PT
3554/**
3555 * kernel_listen - move socket to listening state (kernel space)
3556 * @sock: socket
3557 * @backlog: pending connections queue size
3558 *
3559 * Returns 0 or an error.
3560 */
3561
ac5a488e
SS
3562int kernel_listen(struct socket *sock, int backlog)
3563{
1ded5e5a 3564 return READ_ONCE(sock->ops)->listen(sock, backlog);
ac5a488e 3565}
c6d409cf 3566EXPORT_SYMBOL(kernel_listen);
ac5a488e 3567
8a3c245c
PT
3568/**
3569 * kernel_accept - accept a connection (kernel space)
3570 * @sock: listening socket
3571 * @newsock: new connected socket
3572 * @flags: flags
3573 *
3574 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3575 * If it fails, @newsock is guaranteed to be %NULL.
3576 * Returns 0 or an error.
3577 */
3578
ac5a488e
SS
3579int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3580{
3581 struct sock *sk = sock->sk;
1ded5e5a 3582 const struct proto_ops *ops = READ_ONCE(sock->ops);
ac5a488e
SS
3583 int err;
3584
3585 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3586 newsock);
3587 if (err < 0)
3588 goto done;
3589
1ded5e5a 3590 err = ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3591 if (err < 0) {
3592 sock_release(*newsock);
fa8705b0 3593 *newsock = NULL;
ac5a488e
SS
3594 goto done;
3595 }
3596
1ded5e5a
ED
3597 (*newsock)->ops = ops;
3598 __module_get(ops->owner);
ac5a488e
SS
3599
3600done:
3601 return err;
3602}
c6d409cf 3603EXPORT_SYMBOL(kernel_accept);
ac5a488e 3604
8a3c245c
PT
3605/**
3606 * kernel_connect - connect a socket (kernel space)
3607 * @sock: socket
3608 * @addr: address
3609 * @addrlen: address length
3610 * @flags: flags (O_NONBLOCK, ...)
3611 *
f1dcffcc 3612 * For datagram sockets, @addr is the address to which datagrams are sent
8a3c245c
PT
3613 * by default, and the only address from which datagrams are received.
3614 * For stream sockets, attempts to connect to @addr.
3615 * Returns 0 or an error code.
3616 */
3617
ac5a488e 3618int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3619 int flags)
ac5a488e 3620{
0bdf3993
JR
3621 struct sockaddr_storage address;
3622
3623 memcpy(&address, addr, addrlen);
3624
3625 return READ_ONCE(sock->ops)->connect(sock, (struct sockaddr *)&address,
3626 addrlen, flags);
ac5a488e 3627}
c6d409cf 3628EXPORT_SYMBOL(kernel_connect);
ac5a488e 3629
8a3c245c
PT
3630/**
3631 * kernel_getsockname - get the address which the socket is bound (kernel space)
3632 * @sock: socket
3633 * @addr: address holder
3634 *
3635 * Fills the @addr pointer with the address which the socket is bound.
0fc95dec 3636 * Returns the length of the address in bytes or an error code.
8a3c245c
PT
3637 */
3638
9b2c45d4 3639int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3640{
1ded5e5a 3641 return READ_ONCE(sock->ops)->getname(sock, addr, 0);
ac5a488e 3642}
c6d409cf 3643EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3644
8a3c245c 3645/**
645f0897 3646 * kernel_getpeername - get the address which the socket is connected (kernel space)
8a3c245c
PT
3647 * @sock: socket
3648 * @addr: address holder
3649 *
3650 * Fills the @addr pointer with the address which the socket is connected.
0fc95dec 3651 * Returns the length of the address in bytes or an error code.
8a3c245c
PT
3652 */
3653
9b2c45d4 3654int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3655{
1ded5e5a 3656 return READ_ONCE(sock->ops)->getname(sock, addr, 1);
ac5a488e 3657}
c6d409cf 3658EXPORT_SYMBOL(kernel_getpeername);
ac5a488e 3659
8a3c245c 3660/**
645f0897 3661 * kernel_sock_shutdown - shut down part of a full-duplex connection (kernel space)
8a3c245c
PT
3662 * @sock: socket
3663 * @how: connection part
3664 *
3665 * Returns 0 or an error.
3666 */
3667
91cf45f0
TM
3668int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3669{
1ded5e5a 3670 return READ_ONCE(sock->ops)->shutdown(sock, how);
91cf45f0 3671}
91cf45f0 3672EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075 3673
8a3c245c
PT
3674/**
3675 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3676 * @sk: socket
3677 *
3678 * This routine returns the IP overhead imposed by a socket i.e.
3679 * the length of the underlying IP header, depending on whether
3680 * this is an IPv4 or IPv6 socket and the length from IP options turned
3681 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075 3682 */
8a3c245c 3683
113c3075
P
3684u32 kernel_sock_ip_overhead(struct sock *sk)
3685{
3686 struct inet_sock *inet;
3687 struct ip_options_rcu *opt;
3688 u32 overhead = 0;
113c3075
P
3689#if IS_ENABLED(CONFIG_IPV6)
3690 struct ipv6_pinfo *np;
3691 struct ipv6_txoptions *optv6 = NULL;
3692#endif /* IS_ENABLED(CONFIG_IPV6) */
3693
3694 if (!sk)
3695 return overhead;
3696
113c3075
P
3697 switch (sk->sk_family) {
3698 case AF_INET:
3699 inet = inet_sk(sk);
3700 overhead += sizeof(struct iphdr);
3701 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3702 sock_owned_by_user(sk));
113c3075
P
3703 if (opt)
3704 overhead += opt->opt.optlen;
3705 return overhead;
3706#if IS_ENABLED(CONFIG_IPV6)
3707 case AF_INET6:
3708 np = inet6_sk(sk);
3709 overhead += sizeof(struct ipv6hdr);
3710 if (np)
3711 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3712 sock_owned_by_user(sk));
113c3075
P
3713 if (optv6)
3714 overhead += (optv6->opt_flen + optv6->opt_nflen);
3715 return overhead;
3716#endif /* IS_ENABLED(CONFIG_IPV6) */
3717 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3718 return overhead;
3719 }
3720}
3721EXPORT_SYMBOL(kernel_sock_ip_overhead);