Merge tag 'probes-fixes-v6.7-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-block.git] / net / socket.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * NET An implementation of the SOCKET network access protocol.
4 *
5 * Version: @(#)socket.c 1.1.93 18/02/95
6 *
7 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 8 * Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 *
11 * Fixes:
12 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
13 * shutdown()
14 * Alan Cox : verify_area() fixes
15 * Alan Cox : Removed DDI
16 * Jonathan Kamens : SOCK_DGRAM reconnect bug
17 * Alan Cox : Moved a load of checks to the very
18 * top level.
19 * Alan Cox : Move address structures to/from user
20 * mode above the protocol layers.
21 * Rob Janssen : Allow 0 length sends.
22 * Alan Cox : Asynchronous I/O support (cribbed from the
23 * tty drivers).
24 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
25 * Jeff Uphoff : Made max number of sockets command-line
26 * configurable.
27 * Matti Aarnio : Made the number of sockets dynamic,
28 * to be allocated when needed, and mr.
29 * Uphoff's max is used as max to be
30 * allowed to allocate.
31 * Linus : Argh. removed all the socket allocation
32 * altogether: it's in the inode now.
33 * Alan Cox : Made sock_alloc()/sock_release() public
34 * for NetROM and future kernel nfsd type
35 * stuff.
36 * Alan Cox : sendmsg/recvmsg basics.
37 * Tom Dyas : Export net symbols.
38 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
39 * Alan Cox : Added thread locking to sys_* calls
40 * for sockets. May have errors at the
41 * moment.
42 * Kevin Buhr : Fixed the dumb errors in the above.
43 * Andi Kleen : Some small cleanups, optimizations,
44 * and fixed a copy_from_user() bug.
45 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 46 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
47 * protocol-independent
48 *
1da177e4 49 * This module is effectively the top level interface to the BSD socket
89bddce5 50 * paradigm.
1da177e4
LT
51 *
52 * Based upon Swansea University Computer Society NET3.039
53 */
54
aef2feda 55#include <linux/bpf-cgroup.h>
cc69837f 56#include <linux/ethtool.h>
1da177e4 57#include <linux/mm.h>
1da177e4
LT
58#include <linux/socket.h>
59#include <linux/file.h>
2dc334f1 60#include <linux/splice.h>
1da177e4
LT
61#include <linux/net.h>
62#include <linux/interrupt.h>
aaca0bdc 63#include <linux/thread_info.h>
55737fda 64#include <linux/rcupdate.h>
1da177e4
LT
65#include <linux/netdevice.h>
66#include <linux/proc_fs.h>
67#include <linux/seq_file.h>
4a3e2f71 68#include <linux/mutex.h>
1da177e4 69#include <linux/if_bridge.h>
20380731 70#include <linux/if_vlan.h>
408eccce 71#include <linux/ptp_classify.h>
1da177e4
LT
72#include <linux/init.h>
73#include <linux/poll.h>
74#include <linux/cache.h>
75#include <linux/module.h>
76#include <linux/highmem.h>
1da177e4 77#include <linux/mount.h>
fba9be49 78#include <linux/pseudo_fs.h>
1da177e4
LT
79#include <linux/security.h>
80#include <linux/syscalls.h>
81#include <linux/compat.h>
82#include <linux/kmod.h>
3ec3b2fb 83#include <linux/audit.h>
d86b5e0e 84#include <linux/wireless.h>
1b8d7ae4 85#include <linux/nsproxy.h>
1fd7317d 86#include <linux/magic.h>
5a0e3ad6 87#include <linux/slab.h>
600e1779 88#include <linux/xattr.h>
c8e8cd57 89#include <linux/nospec.h>
8c3c447b 90#include <linux/indirect_call_wrapper.h>
8e9fad0e 91#include <linux/io_uring.h>
1da177e4 92
7c0f6ba6 93#include <linux/uaccess.h>
1da177e4
LT
94#include <asm/unistd.h>
95
96#include <net/compat.h>
87de87d5 97#include <net/wext.h>
f8451725 98#include <net/cls_cgroup.h>
1da177e4
LT
99
100#include <net/sock.h>
101#include <linux/netfilter.h>
102
6b96018b
AB
103#include <linux/if_tun.h>
104#include <linux/ipv6_route.h>
105#include <linux/route.h>
c7dc504e 106#include <linux/termios.h>
6b96018b 107#include <linux/sockios.h>
076bb0c8 108#include <net/busy_poll.h>
f24b9be5 109#include <linux/errqueue.h>
d7c08826 110#include <linux/ptp_clock_kernel.h>
6e6eda44 111#include <trace/events/sock.h>
06021292 112
e0d1095a 113#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
114unsigned int sysctl_net_busy_read __read_mostly;
115unsigned int sysctl_net_busy_poll __read_mostly;
06021292 116#endif
6b96018b 117
8ae5e030
AV
118static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
119static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 120static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
121
122static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
123static __poll_t sock_poll(struct file *file,
124 struct poll_table_struct *wait);
89bddce5 125static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
126#ifdef CONFIG_COMPAT
127static long compat_sock_ioctl(struct file *file,
89bddce5 128 unsigned int cmd, unsigned long arg);
89bbfc95 129#endif
1da177e4 130static int sock_fasync(int fd, struct file *filp, int on);
9c55e01c 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 132 struct pipe_inode_info *pipe, size_t len,
9c55e01c 133 unsigned int flags);
2bfc6685 134static void sock_splice_eof(struct file *file);
542d3065
AB
135
136#ifdef CONFIG_PROC_FS
137static void sock_show_fdinfo(struct seq_file *m, struct file *f)
138{
139 struct socket *sock = f->private_data;
1ded5e5a 140 const struct proto_ops *ops = READ_ONCE(sock->ops);
542d3065 141
1ded5e5a
ED
142 if (ops->show_fdinfo)
143 ops->show_fdinfo(m, sock);
542d3065
AB
144}
145#else
146#define sock_show_fdinfo NULL
147#endif
1da177e4 148
1da177e4
LT
149/*
150 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
151 * in the operation structures but are done directly via the socketcall() multiplexor.
152 */
153
da7071d7 154static const struct file_operations socket_file_ops = {
1da177e4
LT
155 .owner = THIS_MODULE,
156 .llseek = no_llseek,
8ae5e030
AV
157 .read_iter = sock_read_iter,
158 .write_iter = sock_write_iter,
1da177e4
LT
159 .poll = sock_poll,
160 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
161#ifdef CONFIG_COMPAT
162 .compat_ioctl = compat_sock_ioctl,
163#endif
8e9fad0e 164 .uring_cmd = io_uring_cmd_sock,
1da177e4 165 .mmap = sock_mmap,
1da177e4
LT
166 .release = sock_close,
167 .fasync = sock_fasync,
2dc334f1 168 .splice_write = splice_to_socket,
9c55e01c 169 .splice_read = sock_splice_read,
2bfc6685 170 .splice_eof = sock_splice_eof,
b4653342 171 .show_fdinfo = sock_show_fdinfo,
1da177e4
LT
172};
173
fe0bdbde
YD
174static const char * const pf_family_names[] = {
175 [PF_UNSPEC] = "PF_UNSPEC",
176 [PF_UNIX] = "PF_UNIX/PF_LOCAL",
177 [PF_INET] = "PF_INET",
178 [PF_AX25] = "PF_AX25",
179 [PF_IPX] = "PF_IPX",
180 [PF_APPLETALK] = "PF_APPLETALK",
181 [PF_NETROM] = "PF_NETROM",
182 [PF_BRIDGE] = "PF_BRIDGE",
183 [PF_ATMPVC] = "PF_ATMPVC",
184 [PF_X25] = "PF_X25",
185 [PF_INET6] = "PF_INET6",
186 [PF_ROSE] = "PF_ROSE",
187 [PF_DECnet] = "PF_DECnet",
188 [PF_NETBEUI] = "PF_NETBEUI",
189 [PF_SECURITY] = "PF_SECURITY",
190 [PF_KEY] = "PF_KEY",
191 [PF_NETLINK] = "PF_NETLINK/PF_ROUTE",
192 [PF_PACKET] = "PF_PACKET",
193 [PF_ASH] = "PF_ASH",
194 [PF_ECONET] = "PF_ECONET",
195 [PF_ATMSVC] = "PF_ATMSVC",
196 [PF_RDS] = "PF_RDS",
197 [PF_SNA] = "PF_SNA",
198 [PF_IRDA] = "PF_IRDA",
199 [PF_PPPOX] = "PF_PPPOX",
200 [PF_WANPIPE] = "PF_WANPIPE",
201 [PF_LLC] = "PF_LLC",
202 [PF_IB] = "PF_IB",
203 [PF_MPLS] = "PF_MPLS",
204 [PF_CAN] = "PF_CAN",
205 [PF_TIPC] = "PF_TIPC",
206 [PF_BLUETOOTH] = "PF_BLUETOOTH",
207 [PF_IUCV] = "PF_IUCV",
208 [PF_RXRPC] = "PF_RXRPC",
209 [PF_ISDN] = "PF_ISDN",
210 [PF_PHONET] = "PF_PHONET",
211 [PF_IEEE802154] = "PF_IEEE802154",
212 [PF_CAIF] = "PF_CAIF",
213 [PF_ALG] = "PF_ALG",
214 [PF_NFC] = "PF_NFC",
215 [PF_VSOCK] = "PF_VSOCK",
216 [PF_KCM] = "PF_KCM",
217 [PF_QIPCRTR] = "PF_QIPCRTR",
218 [PF_SMC] = "PF_SMC",
219 [PF_XDP] = "PF_XDP",
bc49d816 220 [PF_MCTP] = "PF_MCTP",
fe0bdbde
YD
221};
222
1da177e4
LT
223/*
224 * The protocol list. Each protocol is registered in here.
225 */
226
1da177e4 227static DEFINE_SPINLOCK(net_family_lock);
190683a9 228static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 229
1da177e4 230/*
89bddce5
SH
231 * Support routines.
232 * Move socket addresses back and forth across the kernel/user
233 * divide and look after the messy bits.
1da177e4
LT
234 */
235
1da177e4
LT
236/**
237 * move_addr_to_kernel - copy a socket address into kernel space
238 * @uaddr: Address in user space
239 * @kaddr: Address in kernel space
240 * @ulen: Length in user space
241 *
242 * The address is copied into kernel space. If the provided address is
243 * too long an error code of -EINVAL is returned. If the copy gives
244 * invalid addresses -EFAULT is returned. On a success 0 is returned.
245 */
246
43db362d 247int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 248{
230b1839 249 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 250 return -EINVAL;
89bddce5 251 if (ulen == 0)
1da177e4 252 return 0;
89bddce5 253 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 254 return -EFAULT;
3ec3b2fb 255 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
256}
257
258/**
259 * move_addr_to_user - copy an address to user space
260 * @kaddr: kernel space address
261 * @klen: length of address in kernel
262 * @uaddr: user space address
263 * @ulen: pointer to user length field
264 *
265 * The value pointed to by ulen on entry is the buffer length available.
266 * This is overwritten with the buffer space used. -EINVAL is returned
267 * if an overlong buffer is specified or a negative buffer size. -EFAULT
268 * is returned if either the buffer or the length field are not
269 * accessible.
270 * After copying the data up to the limit the user specifies, the true
271 * length of the data is written over the length limit the user
272 * specified. Zero is returned for a success.
273 */
89bddce5 274
43db362d 275static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 276 void __user *uaddr, int __user *ulen)
1da177e4
LT
277{
278 int err;
279 int len;
280
68c6beb3 281 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
282 err = get_user(len, ulen);
283 if (err)
1da177e4 284 return err;
89bddce5
SH
285 if (len > klen)
286 len = klen;
68c6beb3 287 if (len < 0)
1da177e4 288 return -EINVAL;
89bddce5 289 if (len) {
d6fe3945
SG
290 if (audit_sockaddr(klen, kaddr))
291 return -ENOMEM;
89bddce5 292 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
293 return -EFAULT;
294 }
295 /*
89bddce5
SH
296 * "fromlen shall refer to the value before truncation.."
297 * 1003.1g
1da177e4
LT
298 */
299 return __put_user(klen, ulen);
300}
301
08009a76 302static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
303
304static struct inode *sock_alloc_inode(struct super_block *sb)
305{
306 struct socket_alloc *ei;
89bddce5 307
fd60b288 308 ei = alloc_inode_sb(sb, sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
309 if (!ei)
310 return NULL;
333f7909
AV
311 init_waitqueue_head(&ei->socket.wq.wait);
312 ei->socket.wq.fasync_list = NULL;
313 ei->socket.wq.flags = 0;
89bddce5 314
1da177e4
LT
315 ei->socket.state = SS_UNCONNECTED;
316 ei->socket.flags = 0;
317 ei->socket.ops = NULL;
318 ei->socket.sk = NULL;
319 ei->socket.file = NULL;
1da177e4
LT
320
321 return &ei->vfs_inode;
322}
323
6d7855c5 324static void sock_free_inode(struct inode *inode)
1da177e4 325{
43815482
ED
326 struct socket_alloc *ei;
327
328 ei = container_of(inode, struct socket_alloc, vfs_inode);
43815482 329 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
330}
331
51cc5068 332static void init_once(void *foo)
1da177e4 333{
89bddce5 334 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 335
a35afb83 336 inode_init_once(&ei->vfs_inode);
1da177e4 337}
89bddce5 338
1e911632 339static void init_inodecache(void)
1da177e4
LT
340{
341 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
342 sizeof(struct socket_alloc),
343 0,
344 (SLAB_HWCACHE_ALIGN |
345 SLAB_RECLAIM_ACCOUNT |
5d097056 346 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 347 init_once);
1e911632 348 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
349}
350
b87221de 351static const struct super_operations sockfs_ops = {
c6d409cf 352 .alloc_inode = sock_alloc_inode,
6d7855c5 353 .free_inode = sock_free_inode,
c6d409cf 354 .statfs = simple_statfs,
1da177e4
LT
355};
356
c23fbb6b
ED
357/*
358 * sockfs_dname() is called from d_path().
359 */
360static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
361{
0f60d288 362 return dynamic_dname(buffer, buflen, "socket:[%lu]",
c5ef6035 363 d_inode(dentry)->i_ino);
c23fbb6b
ED
364}
365
3ba13d17 366static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 367 .d_dname = sockfs_dname,
1da177e4
LT
368};
369
bba0bd31
AG
370static int sockfs_xattr_get(const struct xattr_handler *handler,
371 struct dentry *dentry, struct inode *inode,
372 const char *suffix, void *value, size_t size)
373{
374 if (value) {
375 if (dentry->d_name.len + 1 > size)
376 return -ERANGE;
377 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
378 }
379 return dentry->d_name.len + 1;
380}
381
382#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
383#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
384#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
385
386static const struct xattr_handler sockfs_xattr_handler = {
387 .name = XATTR_NAME_SOCKPROTONAME,
388 .get = sockfs_xattr_get,
389};
390
4a590153 391static int sockfs_security_xattr_set(const struct xattr_handler *handler,
39f60c1c 392 struct mnt_idmap *idmap,
4a590153
AG
393 struct dentry *dentry, struct inode *inode,
394 const char *suffix, const void *value,
395 size_t size, int flags)
396{
397 /* Handled by LSM. */
398 return -EAGAIN;
399}
400
401static const struct xattr_handler sockfs_security_xattr_handler = {
402 .prefix = XATTR_SECURITY_PREFIX,
403 .set = sockfs_security_xattr_set,
404};
405
295d3c44 406static const struct xattr_handler * const sockfs_xattr_handlers[] = {
bba0bd31 407 &sockfs_xattr_handler,
4a590153 408 &sockfs_security_xattr_handler,
bba0bd31
AG
409 NULL
410};
411
fba9be49 412static int sockfs_init_fs_context(struct fs_context *fc)
c74a1cbb 413{
fba9be49
DH
414 struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
415 if (!ctx)
416 return -ENOMEM;
417 ctx->ops = &sockfs_ops;
418 ctx->dops = &sockfs_dentry_operations;
419 ctx->xattr = sockfs_xattr_handlers;
420 return 0;
c74a1cbb
AV
421}
422
423static struct vfsmount *sock_mnt __read_mostly;
424
425static struct file_system_type sock_fs_type = {
426 .name = "sockfs",
fba9be49 427 .init_fs_context = sockfs_init_fs_context,
c74a1cbb
AV
428 .kill_sb = kill_anon_super,
429};
430
1da177e4
LT
431/*
432 * Obtains the first available file descriptor and sets it up for use.
433 *
39d8c1b6
DM
434 * These functions create file structures and maps them to fd space
435 * of the current process. On success it returns file descriptor
1da177e4
LT
436 * and file struct implicitly stored in sock->file.
437 * Note that another thread may close file descriptor before we return
438 * from this function. We use the fact that now we do not refer
439 * to socket after mapping. If one day we will need it, this
440 * function will increment ref. count on file by 1.
441 *
442 * In any case returned fd MAY BE not valid!
443 * This race condition is unavoidable
444 * with shared fd spaces, we cannot solve it inside kernel,
445 * but we take care of internal coherence yet.
446 */
447
8a3c245c
PT
448/**
449 * sock_alloc_file - Bind a &socket to a &file
450 * @sock: socket
451 * @flags: file status flags
452 * @dname: protocol name
453 *
454 * Returns the &file bound with @sock, implicitly storing it
455 * in sock->file. If dname is %NULL, sets to "".
649c15c7
TLSC
456 *
457 * On failure @sock is released, and an ERR pointer is returned.
458 *
8a3c245c
PT
459 * This function uses GFP_KERNEL internally.
460 */
461
aab174f0 462struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 463{
7cbe66b6 464 struct file *file;
1da177e4 465
d93aa9d8
AV
466 if (!dname)
467 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 468
d93aa9d8
AV
469 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
470 O_RDWR | (flags & O_NONBLOCK),
471 &socket_file_ops);
b5ffe634 472 if (IS_ERR(file)) {
8e1611e2 473 sock_release(sock);
39b65252 474 return file;
cc3808f8
AV
475 }
476
fe34db06 477 file->f_mode |= FMODE_NOWAIT;
cc3808f8 478 sock->file = file;
39d8c1b6 479 file->private_data = sock;
d8e464ec 480 stream_open(SOCK_INODE(sock), file);
28407630 481 return file;
39d8c1b6 482}
56b31d1c 483EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 484
56b31d1c 485static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
486{
487 struct file *newfile;
28407630 488 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
489 if (unlikely(fd < 0)) {
490 sock_release(sock);
28407630 491 return fd;
ce4bb04c 492 }
39d8c1b6 493
aab174f0 494 newfile = sock_alloc_file(sock, flags, NULL);
4546e44c 495 if (!IS_ERR(newfile)) {
39d8c1b6 496 fd_install(fd, newfile);
28407630
AV
497 return fd;
498 }
7cbe66b6 499
28407630
AV
500 put_unused_fd(fd);
501 return PTR_ERR(newfile);
1da177e4
LT
502}
503
8a3c245c
PT
504/**
505 * sock_from_file - Return the &socket bounded to @file.
506 * @file: file
8a3c245c 507 *
dba4a925 508 * On failure returns %NULL.
8a3c245c
PT
509 */
510
dba4a925 511struct socket *sock_from_file(struct file *file)
6cb153ca 512{
6cb153ca 513 if (file->f_op == &socket_file_ops)
da214a47 514 return file->private_data; /* set in sock_alloc_file */
6cb153ca 515
23bb80d2 516 return NULL;
6cb153ca 517}
406a3c63 518EXPORT_SYMBOL(sock_from_file);
6cb153ca 519
1da177e4 520/**
c6d409cf 521 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
522 * @fd: file handle
523 * @err: pointer to an error code return
524 *
525 * The file handle passed in is locked and the socket it is bound
241c4667 526 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
527 * with a negative errno code and NULL is returned. The function checks
528 * for both invalid handles and passing a handle which is not a socket.
529 *
530 * On a success the socket object pointer is returned.
531 */
532
533struct socket *sockfd_lookup(int fd, int *err)
534{
535 struct file *file;
1da177e4
LT
536 struct socket *sock;
537
89bddce5
SH
538 file = fget(fd);
539 if (!file) {
1da177e4
LT
540 *err = -EBADF;
541 return NULL;
542 }
89bddce5 543
dba4a925
FR
544 sock = sock_from_file(file);
545 if (!sock) {
546 *err = -ENOTSOCK;
1da177e4 547 fput(file);
dba4a925 548 }
6cb153ca
BL
549 return sock;
550}
c6d409cf 551EXPORT_SYMBOL(sockfd_lookup);
1da177e4 552
6cb153ca
BL
553static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
554{
00e188ef 555 struct fd f = fdget(fd);
6cb153ca
BL
556 struct socket *sock;
557
3672558c 558 *err = -EBADF;
00e188ef 559 if (f.file) {
dba4a925 560 sock = sock_from_file(f.file);
00e188ef 561 if (likely(sock)) {
ce787a5a 562 *fput_needed = f.flags & FDPUT_FPUT;
6cb153ca 563 return sock;
00e188ef 564 }
dba4a925 565 *err = -ENOTSOCK;
00e188ef 566 fdput(f);
1da177e4 567 }
6cb153ca 568 return NULL;
1da177e4
LT
569}
570
600e1779
MY
571static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
572 size_t size)
573{
574 ssize_t len;
575 ssize_t used = 0;
576
c5ef6035 577 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
578 if (len < 0)
579 return len;
580 used += len;
581 if (buffer) {
582 if (size < used)
583 return -ERANGE;
584 buffer += len;
585 }
586
587 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
588 used += len;
589 if (buffer) {
590 if (size < used)
591 return -ERANGE;
592 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
593 buffer += len;
594 }
595
596 return used;
597}
598
c1632a0f 599static int sockfs_setattr(struct mnt_idmap *idmap,
549c7297 600 struct dentry *dentry, struct iattr *iattr)
86741ec2 601{
c1632a0f 602 int err = simple_setattr(&nop_mnt_idmap, dentry, iattr);
86741ec2 603
e1a3a60a 604 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
605 struct socket *sock = SOCKET_I(d_inode(dentry));
606
6d8c50dc
CW
607 if (sock->sk)
608 sock->sk->sk_uid = iattr->ia_uid;
609 else
610 err = -ENOENT;
86741ec2
LC
611 }
612
613 return err;
614}
615
600e1779 616static const struct inode_operations sockfs_inode_ops = {
600e1779 617 .listxattr = sockfs_listxattr,
86741ec2 618 .setattr = sockfs_setattr,
600e1779
MY
619};
620
1da177e4 621/**
8a3c245c 622 * sock_alloc - allocate a socket
89bddce5 623 *
1da177e4
LT
624 * Allocate a new inode and socket object. The two are bound together
625 * and initialised. The socket is then returned. If we are out of inodes
8a3c245c 626 * NULL is returned. This functions uses GFP_KERNEL internally.
1da177e4
LT
627 */
628
f4a00aac 629struct socket *sock_alloc(void)
1da177e4 630{
89bddce5
SH
631 struct inode *inode;
632 struct socket *sock;
1da177e4 633
a209dfc7 634 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
635 if (!inode)
636 return NULL;
637
638 sock = SOCKET_I(inode);
639
85fe4025 640 inode->i_ino = get_next_ino();
89bddce5 641 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
642 inode->i_uid = current_fsuid();
643 inode->i_gid = current_fsgid();
600e1779 644 inode->i_op = &sockfs_inode_ops;
1da177e4 645
1da177e4
LT
646 return sock;
647}
f4a00aac 648EXPORT_SYMBOL(sock_alloc);
1da177e4 649
6d8c50dc 650static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4 651{
1ded5e5a
ED
652 const struct proto_ops *ops = READ_ONCE(sock->ops);
653
654 if (ops) {
655 struct module *owner = ops->owner;
1da177e4 656
6d8c50dc
CW
657 if (inode)
658 inode_lock(inode);
1ded5e5a 659 ops->release(sock);
ff7b11aa 660 sock->sk = NULL;
6d8c50dc
CW
661 if (inode)
662 inode_unlock(inode);
1da177e4
LT
663 sock->ops = NULL;
664 module_put(owner);
665 }
666
333f7909 667 if (sock->wq.fasync_list)
3410f22e 668 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 669
1da177e4
LT
670 if (!sock->file) {
671 iput(SOCK_INODE(sock));
672 return;
673 }
89bddce5 674 sock->file = NULL;
1da177e4 675}
6d8c50dc 676
9a8ad9ac
AL
677/**
678 * sock_release - close a socket
679 * @sock: socket to close
680 *
681 * The socket is released from the protocol stack if it has a release
682 * callback, and the inode is then released if the socket is bound to
683 * an inode not a file.
684 */
6d8c50dc
CW
685void sock_release(struct socket *sock)
686{
687 __sock_release(sock, NULL);
688}
c6d409cf 689EXPORT_SYMBOL(sock_release);
1da177e4 690
c14ac945 691void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 692{
140c55d4
ED
693 u8 flags = *tx_flags;
694
51eb7492 695 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE) {
140c55d4
ED
696 flags |= SKBTX_HW_TSTAMP;
697
51eb7492
GE
698 /* PTP hardware clocks can provide a free running cycle counter
699 * as a time base for virtual clocks. Tell driver to use the
700 * free running cycle counter for timestamp if socket is bound
701 * to virtual clock.
702 */
703 if (tsflags & SOF_TIMESTAMPING_BIND_PHC)
704 flags |= SKBTX_HW_TSTAMP_USE_CYCLES;
705 }
706
c14ac945 707 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
708 flags |= SKBTX_SW_TSTAMP;
709
c14ac945 710 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
711 flags |= SKBTX_SCHED_TSTAMP;
712
140c55d4 713 *tx_flags = flags;
20d49473 714}
67cc0d40 715EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 716
8c3c447b
PA
717INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
718 size_t));
a648a592
PA
719INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
720 size_t));
6e6eda44
YC
721
722static noinline void call_trace_sock_send_length(struct sock *sk, int ret,
723 int flags)
724{
725 trace_sock_send_length(sk, ret, 0);
726}
727
d8725c86 728static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 729{
1ded5e5a 730 int ret = INDIRECT_CALL_INET(READ_ONCE(sock->ops)->sendmsg, inet6_sendmsg,
a648a592
PA
731 inet_sendmsg, sock, msg,
732 msg_data_left(msg));
d8725c86 733 BUG_ON(ret == -EIOCBQUEUED);
6e6eda44
YC
734
735 if (trace_sock_send_length_enabled())
736 call_trace_sock_send_length(sock->sk, ret, 0);
d8725c86 737 return ret;
1da177e4
LT
738}
739
86a7e0b6
JR
740static int __sock_sendmsg(struct socket *sock, struct msghdr *msg)
741{
742 int err = security_socket_sendmsg(sock, msg,
743 msg_data_left(msg));
744
745 return err ?: sock_sendmsg_nosec(sock, msg);
746}
747
85806af0
RD
748/**
749 * sock_sendmsg - send a message through @sock
750 * @sock: socket
751 * @msg: message to send
752 *
753 * Sends @msg through @sock, passing through LSM.
754 * Returns the number of bytes sent, or an error code.
755 */
d8725c86 756int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 757{
86a7e0b6
JR
758 struct sockaddr_storage *save_addr = (struct sockaddr_storage *)msg->msg_name;
759 struct sockaddr_storage address;
760 int ret;
228e548e 761
86a7e0b6
JR
762 if (msg->msg_name) {
763 memcpy(&address, msg->msg_name, msg->msg_namelen);
764 msg->msg_name = &address;
765 }
766
767 ret = __sock_sendmsg(sock, msg);
768 msg->msg_name = save_addr;
769
770 return ret;
0cf00c6f 771}
c6d409cf 772EXPORT_SYMBOL(sock_sendmsg);
1da177e4 773
8a3c245c
PT
774/**
775 * kernel_sendmsg - send a message through @sock (kernel-space)
776 * @sock: socket
777 * @msg: message header
778 * @vec: kernel vec
779 * @num: vec array length
780 * @size: total message data size
781 *
782 * Builds the message data with @vec and sends it through @sock.
783 * Returns the number of bytes sent, or an error code.
784 */
785
1da177e4
LT
786int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
787 struct kvec *vec, size_t num, size_t size)
788{
de4eda9d 789 iov_iter_kvec(&msg->msg_iter, ITER_SOURCE, vec, num, size);
d8725c86 790 return sock_sendmsg(sock, msg);
1da177e4 791}
c6d409cf 792EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 793
8a3c245c
PT
794/**
795 * kernel_sendmsg_locked - send a message through @sock (kernel-space)
796 * @sk: sock
797 * @msg: message header
798 * @vec: output s/g array
799 * @num: output s/g array length
800 * @size: total message data size
801 *
802 * Builds the message data with @vec and sends it through @sock.
803 * Returns the number of bytes sent, or an error code.
804 * Caller must hold @sk.
805 */
806
306b13eb
TH
807int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
808 struct kvec *vec, size_t num, size_t size)
809{
810 struct socket *sock = sk->sk_socket;
1ded5e5a 811 const struct proto_ops *ops = READ_ONCE(sock->ops);
306b13eb 812
1ded5e5a 813 if (!ops->sendmsg_locked)
db5980d8 814 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 815
de4eda9d 816 iov_iter_kvec(&msg->msg_iter, ITER_SOURCE, vec, num, size);
306b13eb 817
1ded5e5a 818 return ops->sendmsg_locked(sk, msg, msg_data_left(msg));
306b13eb
TH
819}
820EXPORT_SYMBOL(kernel_sendmsg_locked);
821
8605330a
SHY
822static bool skb_is_err_queue(const struct sk_buff *skb)
823{
824 /* pkt_type of skbs enqueued on the error queue are set to
825 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
826 * in recvmsg, since skbs received on a local socket will never
827 * have a pkt_type of PACKET_OUTGOING.
828 */
829 return skb->pkt_type == PACKET_OUTGOING;
830}
831
b50a5c70
ML
832/* On transmit, software and hardware timestamps are returned independently.
833 * As the two skb clones share the hardware timestamp, which may be updated
834 * before the software timestamp is received, a hardware TX timestamp may be
835 * returned only if there is no software TX timestamp. Ignore false software
836 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 837 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
838 * hardware timestamp.
839 */
840static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
841{
842 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
843}
844
97dc7cd9
GE
845static ktime_t get_timestamp(struct sock *sk, struct sk_buff *skb, int *if_index)
846{
e3390b30 847 bool cycles = READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_BIND_PHC;
97dc7cd9
GE
848 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
849 struct net_device *orig_dev;
850 ktime_t hwtstamp;
851
852 rcu_read_lock();
853 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
854 if (orig_dev) {
855 *if_index = orig_dev->ifindex;
856 hwtstamp = netdev_get_tstamp(orig_dev, shhwtstamps, cycles);
857 } else {
858 hwtstamp = shhwtstamps->hwtstamp;
859 }
860 rcu_read_unlock();
861
862 return hwtstamp;
863}
864
865static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb,
866 int if_index)
aad9c8c4
ML
867{
868 struct scm_ts_pktinfo ts_pktinfo;
869 struct net_device *orig_dev;
870
871 if (!skb_mac_header_was_set(skb))
872 return;
873
874 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
875
97dc7cd9
GE
876 if (!if_index) {
877 rcu_read_lock();
878 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
879 if (orig_dev)
880 if_index = orig_dev->ifindex;
881 rcu_read_unlock();
882 }
883 ts_pktinfo.if_index = if_index;
aad9c8c4
ML
884
885 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
886 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
887 sizeof(ts_pktinfo), &ts_pktinfo);
888}
889
92f37fd2
ED
890/*
891 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
892 */
893void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
894 struct sk_buff *skb)
895{
20d49473 896 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 897 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e 898 struct scm_timestamping_internal tss;
b50a5c70 899 int empty = 1, false_tstamp = 0;
20d49473
PO
900 struct skb_shared_hwtstamps *shhwtstamps =
901 skb_hwtstamps(skb);
97dc7cd9 902 int if_index;
007747a9 903 ktime_t hwtstamp;
e3390b30 904 u32 tsflags;
20d49473
PO
905
906 /* Race occurred between timestamp enabling and packet
907 receiving. Fill in the current time for now. */
b50a5c70 908 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 909 __net_timestamp(skb);
b50a5c70
ML
910 false_tstamp = 1;
911 }
20d49473
PO
912
913 if (need_software_tstamp) {
914 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
915 if (new_tstamp) {
916 struct __kernel_sock_timeval tv;
917
918 skb_get_new_timestamp(skb, &tv);
919 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
920 sizeof(tv), &tv);
921 } else {
922 struct __kernel_old_timeval tv;
923
924 skb_get_timestamp(skb, &tv);
925 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
926 sizeof(tv), &tv);
927 }
20d49473 928 } else {
887feae3
DD
929 if (new_tstamp) {
930 struct __kernel_timespec ts;
931
932 skb_get_new_timestampns(skb, &ts);
933 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
934 sizeof(ts), &ts);
935 } else {
df1b4ba9 936 struct __kernel_old_timespec ts;
887feae3
DD
937
938 skb_get_timestampns(skb, &ts);
939 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
940 sizeof(ts), &ts);
941 }
20d49473
PO
942 }
943 }
944
f24b9be5 945 memset(&tss, 0, sizeof(tss));
e3390b30
ED
946 tsflags = READ_ONCE(sk->sk_tsflags);
947 if ((tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 948 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 949 empty = 0;
4d276eb6 950 if (shhwtstamps &&
e3390b30 951 (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
d7c08826 952 !skb_is_swtx_tstamp(skb, false_tstamp)) {
97dc7cd9
GE
953 if_index = 0;
954 if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NETDEV)
955 hwtstamp = get_timestamp(sk, skb, &if_index);
007747a9
ML
956 else
957 hwtstamp = shhwtstamps->hwtstamp;
d7c08826 958
e3390b30 959 if (tsflags & SOF_TIMESTAMPING_BIND_PHC)
97dc7cd9 960 hwtstamp = ptp_convert_timestamp(&hwtstamp,
251cd405 961 READ_ONCE(sk->sk_bind_phc));
97dc7cd9 962
007747a9 963 if (ktime_to_timespec64_cond(hwtstamp, tss.ts + 2)) {
d7c08826
YL
964 empty = 0;
965
e3390b30 966 if ((tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
d7c08826 967 !skb_is_err_queue(skb))
97dc7cd9 968 put_ts_pktinfo(msg, skb, if_index);
d7c08826 969 }
aad9c8c4 970 }
1c885808 971 if (!empty) {
9718475e
DD
972 if (sock_flag(sk, SOCK_TSTAMP_NEW))
973 put_cmsg_scm_timestamping64(msg, &tss);
974 else
975 put_cmsg_scm_timestamping(msg, &tss);
1c885808 976
8605330a 977 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 978 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
979 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
980 skb->len, skb->data);
981 }
92f37fd2 982}
7c81fd8b
ACM
983EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
984
eb6fba75 985#ifdef CONFIG_WIRELESS
6e3e939f
JB
986void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
987 struct sk_buff *skb)
988{
989 int ack;
990
991 if (!sock_flag(sk, SOCK_WIFI_STATUS))
992 return;
993 if (!skb->wifi_acked_valid)
994 return;
995
996 ack = skb->wifi_acked;
997
998 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
999}
1000EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
eb6fba75 1001#endif
6e3e939f 1002
11165f14 1003static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
1004 struct sk_buff *skb)
3b885787 1005{
744d5a3e 1006 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 1007 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 1008 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
1009}
1010
6fd1d51c
EM
1011static void sock_recv_mark(struct msghdr *msg, struct sock *sk,
1012 struct sk_buff *skb)
1013{
2558b803
ED
1014 if (sock_flag(sk, SOCK_RCVMARK) && skb) {
1015 /* We must use a bounce buffer for CONFIG_HARDENED_USERCOPY=y */
1016 __u32 mark = skb->mark;
1017
1018 put_cmsg(msg, SOL_SOCKET, SO_MARK, sizeof(__u32), &mark);
1019 }
6fd1d51c
EM
1020}
1021
1022void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
1023 struct sk_buff *skb)
3b885787
NH
1024{
1025 sock_recv_timestamp(msg, sk, skb);
1026 sock_recv_drops(msg, sk, skb);
6fd1d51c 1027 sock_recv_mark(msg, sk, skb);
3b885787 1028}
6fd1d51c 1029EXPORT_SYMBOL_GPL(__sock_recv_cmsgs);
3b885787 1030
8c3c447b 1031INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
a648a592
PA
1032 size_t, int));
1033INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
1034 size_t, int));
6e6eda44
YC
1035
1036static noinline void call_trace_sock_recv_length(struct sock *sk, int ret, int flags)
1037{
1038 trace_sock_recv_length(sk, ret, flags);
1039}
1040
1b784140 1041static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 1042 int flags)
1da177e4 1043{
1ded5e5a
ED
1044 int ret = INDIRECT_CALL_INET(READ_ONCE(sock->ops)->recvmsg,
1045 inet6_recvmsg,
6e6eda44
YC
1046 inet_recvmsg, sock, msg,
1047 msg_data_left(msg), flags);
1048 if (trace_sock_recv_length_enabled())
1049 call_trace_sock_recv_length(sock->sk, ret, flags);
1050 return ret;
1da177e4
LT
1051}
1052
85806af0
RD
1053/**
1054 * sock_recvmsg - receive a message from @sock
1055 * @sock: socket
1056 * @msg: message to receive
1057 * @flags: message flags
1058 *
1059 * Receives @msg from @sock, passing through LSM. Returns the total number
1060 * of bytes received, or an error.
1061 */
2da62906 1062int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 1063{
2da62906 1064 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 1065
2da62906 1066 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 1067}
c6d409cf 1068EXPORT_SYMBOL(sock_recvmsg);
1da177e4 1069
c1249c0a 1070/**
8a3c245c
PT
1071 * kernel_recvmsg - Receive a message from a socket (kernel space)
1072 * @sock: The socket to receive the message from
1073 * @msg: Received message
1074 * @vec: Input s/g array for message data
1075 * @num: Size of input s/g array
1076 * @size: Number of bytes to read
1077 * @flags: Message flags (MSG_DONTWAIT, etc...)
c1249c0a 1078 *
8a3c245c
PT
1079 * On return the msg structure contains the scatter/gather array passed in the
1080 * vec argument. The array is modified so that it consists of the unfilled
1081 * portion of the original array.
c1249c0a 1082 *
8a3c245c 1083 * The returned value is the total number of bytes received, or an error.
c1249c0a 1084 */
8a3c245c 1085
89bddce5
SH
1086int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
1087 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4 1088{
1f466e1f 1089 msg->msg_control_is_user = false;
de4eda9d 1090 iov_iter_kvec(&msg->msg_iter, ITER_DEST, vec, num, size);
1f466e1f 1091 return sock_recvmsg(sock, msg, flags);
1da177e4 1092}
c6d409cf 1093EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 1094
9c55e01c 1095static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 1096 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
1097 unsigned int flags)
1098{
1099 struct socket *sock = file->private_data;
1ded5e5a 1100 const struct proto_ops *ops;
9c55e01c 1101
1ded5e5a
ED
1102 ops = READ_ONCE(sock->ops);
1103 if (unlikely(!ops->splice_read))
67178fd0 1104 return copy_splice_read(file, ppos, pipe, len, flags);
997b37da 1105
1ded5e5a 1106 return ops->splice_read(sock, ppos, pipe, len, flags);
9c55e01c
JA
1107}
1108
2bfc6685
DH
1109static void sock_splice_eof(struct file *file)
1110{
1111 struct socket *sock = file->private_data;
1ded5e5a 1112 const struct proto_ops *ops;
2bfc6685 1113
1ded5e5a
ED
1114 ops = READ_ONCE(sock->ops);
1115 if (ops->splice_eof)
1116 ops->splice_eof(sock);
2bfc6685
DH
1117}
1118
8ae5e030 1119static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 1120{
6d652330
AV
1121 struct file *file = iocb->ki_filp;
1122 struct socket *sock = file->private_data;
0345f931 1123 struct msghdr msg = {.msg_iter = *to,
1124 .msg_iocb = iocb};
8ae5e030 1125 ssize_t res;
ce1d4d3e 1126
ebfcd895 1127 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
1128 msg.msg_flags = MSG_DONTWAIT;
1129
1130 if (iocb->ki_pos != 0)
1da177e4 1131 return -ESPIPE;
027445c3 1132
66ee59af 1133 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
1134 return 0;
1135
2da62906 1136 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
1137 *to = msg.msg_iter;
1138 return res;
1da177e4
LT
1139}
1140
8ae5e030 1141static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 1142{
6d652330
AV
1143 struct file *file = iocb->ki_filp;
1144 struct socket *sock = file->private_data;
0345f931 1145 struct msghdr msg = {.msg_iter = *from,
1146 .msg_iocb = iocb};
8ae5e030 1147 ssize_t res;
1da177e4 1148
8ae5e030 1149 if (iocb->ki_pos != 0)
ce1d4d3e 1150 return -ESPIPE;
027445c3 1151
ebfcd895 1152 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
1153 msg.msg_flags = MSG_DONTWAIT;
1154
6d652330
AV
1155 if (sock->type == SOCK_SEQPACKET)
1156 msg.msg_flags |= MSG_EOR;
1157
86a7e0b6 1158 res = __sock_sendmsg(sock, &msg);
8ae5e030
AV
1159 *from = msg.msg_iter;
1160 return res;
1da177e4
LT
1161}
1162
1da177e4
LT
1163/*
1164 * Atomic setting of ioctl hooks to avoid race
1165 * with module unload.
1166 */
1167
4a3e2f71 1168static DEFINE_MUTEX(br_ioctl_mutex);
ad2f99ae
AB
1169static int (*br_ioctl_hook)(struct net *net, struct net_bridge *br,
1170 unsigned int cmd, struct ifreq *ifr,
1171 void __user *uarg);
1da177e4 1172
ad2f99ae
AB
1173void brioctl_set(int (*hook)(struct net *net, struct net_bridge *br,
1174 unsigned int cmd, struct ifreq *ifr,
1175 void __user *uarg))
1da177e4 1176{
4a3e2f71 1177 mutex_lock(&br_ioctl_mutex);
1da177e4 1178 br_ioctl_hook = hook;
4a3e2f71 1179 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1180}
1181EXPORT_SYMBOL(brioctl_set);
1182
ad2f99ae
AB
1183int br_ioctl_call(struct net *net, struct net_bridge *br, unsigned int cmd,
1184 struct ifreq *ifr, void __user *uarg)
1185{
1186 int err = -ENOPKG;
1187
1188 if (!br_ioctl_hook)
1189 request_module("bridge");
1190
1191 mutex_lock(&br_ioctl_mutex);
1192 if (br_ioctl_hook)
1193 err = br_ioctl_hook(net, br, cmd, ifr, uarg);
1194 mutex_unlock(&br_ioctl_mutex);
1195
1196 return err;
1197}
1198
4a3e2f71 1199static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1200static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1201
881d966b 1202void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1203{
4a3e2f71 1204 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1205 vlan_ioctl_hook = hook;
4a3e2f71 1206 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1207}
1208EXPORT_SYMBOL(vlan_ioctl_set);
1209
6b96018b 1210static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 1211 unsigned int cmd, unsigned long arg)
6b96018b 1212{
1ded5e5a 1213 const struct proto_ops *ops = READ_ONCE(sock->ops);
876f0bf9
AB
1214 struct ifreq ifr;
1215 bool need_copyout;
6b96018b
AB
1216 int err;
1217 void __user *argp = (void __user *)arg;
a554bf96 1218 void __user *data;
6b96018b 1219
1ded5e5a 1220 err = ops->ioctl(sock, cmd, arg);
6b96018b
AB
1221
1222 /*
1223 * If this ioctl is unknown try to hand it down
1224 * to the NIC driver.
1225 */
36fd633e
AV
1226 if (err != -ENOIOCTLCMD)
1227 return err;
6b96018b 1228
29ce8f97
JK
1229 if (!is_socket_ioctl_cmd(cmd))
1230 return -ENOTTY;
1231
a554bf96 1232 if (get_user_ifreq(&ifr, &data, argp))
876f0bf9 1233 return -EFAULT;
a554bf96 1234 err = dev_ioctl(net, cmd, &ifr, data, &need_copyout);
876f0bf9 1235 if (!err && need_copyout)
a554bf96 1236 if (put_user_ifreq(&ifr, argp))
44c02a2c 1237 return -EFAULT;
876f0bf9 1238
6b96018b
AB
1239 return err;
1240}
1241
1da177e4
LT
1242/*
1243 * With an ioctl, arg may well be a user mode pointer, but we don't know
1244 * what to do with it - that's up to the protocol still.
1245 */
1246
1247static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1248{
1ded5e5a 1249 const struct proto_ops *ops;
1da177e4 1250 struct socket *sock;
881d966b 1251 struct sock *sk;
1da177e4
LT
1252 void __user *argp = (void __user *)arg;
1253 int pid, err;
881d966b 1254 struct net *net;
1da177e4 1255
b69aee04 1256 sock = file->private_data;
1ded5e5a 1257 ops = READ_ONCE(sock->ops);
881d966b 1258 sk = sock->sk;
3b1e0a65 1259 net = sock_net(sk);
44c02a2c
AV
1260 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1261 struct ifreq ifr;
a554bf96 1262 void __user *data;
44c02a2c 1263 bool need_copyout;
a554bf96 1264 if (get_user_ifreq(&ifr, &data, argp))
44c02a2c 1265 return -EFAULT;
a554bf96 1266 err = dev_ioctl(net, cmd, &ifr, data, &need_copyout);
44c02a2c 1267 if (!err && need_copyout)
a554bf96 1268 if (put_user_ifreq(&ifr, argp))
44c02a2c 1269 return -EFAULT;
1da177e4 1270 } else
3d23e349 1271#ifdef CONFIG_WEXT_CORE
1da177e4 1272 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1273 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1274 } else
3d23e349 1275#endif
89bddce5 1276 switch (cmd) {
1da177e4
LT
1277 case FIOSETOWN:
1278 case SIOCSPGRP:
1279 err = -EFAULT;
1280 if (get_user(pid, (int __user *)argp))
1281 break;
393cc3f5 1282 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1283 break;
1284 case FIOGETOWN:
1285 case SIOCGPGRP:
609d7fa9 1286 err = put_user(f_getown(sock->file),
89bddce5 1287 (int __user *)argp);
1da177e4
LT
1288 break;
1289 case SIOCGIFBR:
1290 case SIOCSIFBR:
1291 case SIOCBRADDBR:
1292 case SIOCBRDELBR:
ad2f99ae 1293 err = br_ioctl_call(net, NULL, cmd, NULL, argp);
1da177e4
LT
1294 break;
1295 case SIOCGIFVLAN:
1296 case SIOCSIFVLAN:
1297 err = -ENOPKG;
1298 if (!vlan_ioctl_hook)
1299 request_module("8021q");
1300
4a3e2f71 1301 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1302 if (vlan_ioctl_hook)
881d966b 1303 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1304 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1305 break;
c62cce2c
AV
1306 case SIOCGSKNS:
1307 err = -EPERM;
1308 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1309 break;
1310
1311 err = open_related_ns(&net->ns, get_net_ns);
1312 break;
0768e170
AB
1313 case SIOCGSTAMP_OLD:
1314 case SIOCGSTAMPNS_OLD:
1ded5e5a 1315 if (!ops->gettstamp) {
c7cbdbf2
AB
1316 err = -ENOIOCTLCMD;
1317 break;
1318 }
1ded5e5a
ED
1319 err = ops->gettstamp(sock, argp,
1320 cmd == SIOCGSTAMP_OLD,
1321 !IS_ENABLED(CONFIG_64BIT));
60747828 1322 break;
0768e170
AB
1323 case SIOCGSTAMP_NEW:
1324 case SIOCGSTAMPNS_NEW:
1ded5e5a 1325 if (!ops->gettstamp) {
0768e170
AB
1326 err = -ENOIOCTLCMD;
1327 break;
1328 }
1ded5e5a
ED
1329 err = ops->gettstamp(sock, argp,
1330 cmd == SIOCGSTAMP_NEW,
1331 false);
c7cbdbf2 1332 break;
876f0bf9
AB
1333
1334 case SIOCGIFCONF:
1335 err = dev_ifconf(net, argp);
1336 break;
1337
1da177e4 1338 default:
63ff03ab 1339 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1340 break;
89bddce5 1341 }
1da177e4
LT
1342 return err;
1343}
1344
8a3c245c
PT
1345/**
1346 * sock_create_lite - creates a socket
1347 * @family: protocol family (AF_INET, ...)
1348 * @type: communication type (SOCK_STREAM, ...)
1349 * @protocol: protocol (0, ...)
1350 * @res: new socket
1351 *
1352 * Creates a new socket and assigns it to @res, passing through LSM.
1353 * The new socket initialization is not complete, see kernel_accept().
1354 * Returns 0 or an error. On failure @res is set to %NULL.
1355 * This function internally uses GFP_KERNEL.
1356 */
1357
1da177e4
LT
1358int sock_create_lite(int family, int type, int protocol, struct socket **res)
1359{
1360 int err;
1361 struct socket *sock = NULL;
89bddce5 1362
1da177e4
LT
1363 err = security_socket_create(family, type, protocol, 1);
1364 if (err)
1365 goto out;
1366
1367 sock = sock_alloc();
1368 if (!sock) {
1369 err = -ENOMEM;
1370 goto out;
1371 }
1372
1da177e4 1373 sock->type = type;
7420ed23
VY
1374 err = security_socket_post_create(sock, family, type, protocol, 1);
1375 if (err)
1376 goto out_release;
1377
1da177e4
LT
1378out:
1379 *res = sock;
1380 return err;
7420ed23
VY
1381out_release:
1382 sock_release(sock);
1383 sock = NULL;
1384 goto out;
1da177e4 1385}
c6d409cf 1386EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1387
1388/* No kernel lock held - perfect */
ade994f4 1389static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1390{
3cafb376 1391 struct socket *sock = file->private_data;
1ded5e5a 1392 const struct proto_ops *ops = READ_ONCE(sock->ops);
a331de3b 1393 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1394
1ded5e5a 1395 if (!ops->poll)
e88958e6 1396 return 0;
f641f13b 1397
a331de3b
CH
1398 if (sk_can_busy_loop(sock->sk)) {
1399 /* poll once if requested by the syscall */
1400 if (events & POLL_BUSY_LOOP)
1401 sk_busy_loop(sock->sk, 1);
1402
1403 /* if this socket can poll_ll, tell the system call */
1404 flag = POLL_BUSY_LOOP;
1405 }
1406
1ded5e5a 1407 return ops->poll(file, sock, wait) | flag;
1da177e4
LT
1408}
1409
89bddce5 1410static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1411{
b69aee04 1412 struct socket *sock = file->private_data;
1da177e4 1413
1ded5e5a 1414 return READ_ONCE(sock->ops)->mmap(file, sock, vma);
1da177e4
LT
1415}
1416
20380731 1417static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1418{
6d8c50dc 1419 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1420 return 0;
1421}
1422
1423/*
1424 * Update the socket async list
1425 *
1426 * Fasync_list locking strategy.
1427 *
1428 * 1. fasync_list is modified only under process context socket lock
1429 * i.e. under semaphore.
1430 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1431 * or under socket lock
1da177e4
LT
1432 */
1433
1434static int sock_fasync(int fd, struct file *filp, int on)
1435{
989a2979
ED
1436 struct socket *sock = filp->private_data;
1437 struct sock *sk = sock->sk;
333f7909 1438 struct socket_wq *wq = &sock->wq;
1da177e4 1439
989a2979 1440 if (sk == NULL)
1da177e4 1441 return -EINVAL;
1da177e4
LT
1442
1443 lock_sock(sk);
eaefd110 1444 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1445
eaefd110 1446 if (!wq->fasync_list)
989a2979
ED
1447 sock_reset_flag(sk, SOCK_FASYNC);
1448 else
bcdce719 1449 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1450
989a2979 1451 release_sock(sk);
1da177e4
LT
1452 return 0;
1453}
1454
ceb5d58b 1455/* This function may be called only under rcu_lock */
1da177e4 1456
ceb5d58b 1457int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1458{
ceb5d58b 1459 if (!wq || !wq->fasync_list)
1da177e4 1460 return -1;
ceb5d58b 1461
89bddce5 1462 switch (how) {
8d8ad9d7 1463 case SOCK_WAKE_WAITD:
ceb5d58b 1464 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1465 break;
1466 goto call_kill;
8d8ad9d7 1467 case SOCK_WAKE_SPACE:
ceb5d58b 1468 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4 1469 break;
7c7ab580 1470 fallthrough;
8d8ad9d7 1471 case SOCK_WAKE_IO:
89bddce5 1472call_kill:
43815482 1473 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1474 break;
8d8ad9d7 1475 case SOCK_WAKE_URG:
43815482 1476 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1477 }
ceb5d58b 1478
1da177e4
LT
1479 return 0;
1480}
c6d409cf 1481EXPORT_SYMBOL(sock_wake_async);
1da177e4 1482
8a3c245c
PT
1483/**
1484 * __sock_create - creates a socket
1485 * @net: net namespace
1486 * @family: protocol family (AF_INET, ...)
1487 * @type: communication type (SOCK_STREAM, ...)
1488 * @protocol: protocol (0, ...)
1489 * @res: new socket
1490 * @kern: boolean for kernel space sockets
1491 *
1492 * Creates a new socket and assigns it to @res, passing through LSM.
1493 * Returns 0 or an error. On failure @res is set to %NULL. @kern must
1494 * be set to true if the socket resides in kernel space.
1495 * This function internally uses GFP_KERNEL.
1496 */
1497
721db93a 1498int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1499 struct socket **res, int kern)
1da177e4
LT
1500{
1501 int err;
1502 struct socket *sock;
55737fda 1503 const struct net_proto_family *pf;
1da177e4
LT
1504
1505 /*
89bddce5 1506 * Check protocol is in range
1da177e4
LT
1507 */
1508 if (family < 0 || family >= NPROTO)
1509 return -EAFNOSUPPORT;
1510 if (type < 0 || type >= SOCK_MAX)
1511 return -EINVAL;
1512
1513 /* Compatibility.
1514
1515 This uglymoron is moved from INET layer to here to avoid
1516 deadlock in module load.
1517 */
1518 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1519 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1520 current->comm);
1da177e4
LT
1521 family = PF_PACKET;
1522 }
1523
1524 err = security_socket_create(family, type, protocol, kern);
1525 if (err)
1526 return err;
89bddce5 1527
55737fda
SH
1528 /*
1529 * Allocate the socket and allow the family to set things up. if
1530 * the protocol is 0, the family is instructed to select an appropriate
1531 * default.
1532 */
1533 sock = sock_alloc();
1534 if (!sock) {
e87cc472 1535 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1536 return -ENFILE; /* Not exactly a match, but its the
1537 closest posix thing */
1538 }
1539
1540 sock->type = type;
1541
95a5afca 1542#ifdef CONFIG_MODULES
89bddce5
SH
1543 /* Attempt to load a protocol module if the find failed.
1544 *
1545 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1546 * requested real, full-featured networking support upon configuration.
1547 * Otherwise module support will break!
1548 */
190683a9 1549 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1550 request_module("net-pf-%d", family);
1da177e4
LT
1551#endif
1552
55737fda
SH
1553 rcu_read_lock();
1554 pf = rcu_dereference(net_families[family]);
1555 err = -EAFNOSUPPORT;
1556 if (!pf)
1557 goto out_release;
1da177e4
LT
1558
1559 /*
1560 * We will call the ->create function, that possibly is in a loadable
1561 * module, so we have to bump that loadable module refcnt first.
1562 */
55737fda 1563 if (!try_module_get(pf->owner))
1da177e4
LT
1564 goto out_release;
1565
55737fda
SH
1566 /* Now protected by module ref count */
1567 rcu_read_unlock();
1568
3f378b68 1569 err = pf->create(net, sock, protocol, kern);
55737fda 1570 if (err < 0)
1da177e4 1571 goto out_module_put;
a79af59e 1572
1da177e4
LT
1573 /*
1574 * Now to bump the refcnt of the [loadable] module that owns this
1575 * socket at sock_release time we decrement its refcnt.
1576 */
55737fda
SH
1577 if (!try_module_get(sock->ops->owner))
1578 goto out_module_busy;
1579
1da177e4
LT
1580 /*
1581 * Now that we're done with the ->create function, the [loadable]
1582 * module can have its refcnt decremented
1583 */
55737fda 1584 module_put(pf->owner);
7420ed23
VY
1585 err = security_socket_post_create(sock, family, type, protocol, kern);
1586 if (err)
3b185525 1587 goto out_sock_release;
55737fda 1588 *res = sock;
1da177e4 1589
55737fda
SH
1590 return 0;
1591
1592out_module_busy:
1593 err = -EAFNOSUPPORT;
1da177e4 1594out_module_put:
55737fda
SH
1595 sock->ops = NULL;
1596 module_put(pf->owner);
1597out_sock_release:
1da177e4 1598 sock_release(sock);
55737fda
SH
1599 return err;
1600
1601out_release:
1602 rcu_read_unlock();
1603 goto out_sock_release;
1da177e4 1604}
721db93a 1605EXPORT_SYMBOL(__sock_create);
1da177e4 1606
8a3c245c
PT
1607/**
1608 * sock_create - creates a socket
1609 * @family: protocol family (AF_INET, ...)
1610 * @type: communication type (SOCK_STREAM, ...)
1611 * @protocol: protocol (0, ...)
1612 * @res: new socket
1613 *
1614 * A wrapper around __sock_create().
1615 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1616 */
1617
1da177e4
LT
1618int sock_create(int family, int type, int protocol, struct socket **res)
1619{
1b8d7ae4 1620 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1621}
c6d409cf 1622EXPORT_SYMBOL(sock_create);
1da177e4 1623
8a3c245c
PT
1624/**
1625 * sock_create_kern - creates a socket (kernel space)
1626 * @net: net namespace
1627 * @family: protocol family (AF_INET, ...)
1628 * @type: communication type (SOCK_STREAM, ...)
1629 * @protocol: protocol (0, ...)
1630 * @res: new socket
1631 *
1632 * A wrapper around __sock_create().
1633 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1634 */
1635
eeb1bd5c 1636int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1637{
eeb1bd5c 1638 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1639}
c6d409cf 1640EXPORT_SYMBOL(sock_create_kern);
1da177e4 1641
da214a47 1642static struct socket *__sys_socket_create(int family, int type, int protocol)
1da177e4 1643{
1da177e4 1644 struct socket *sock;
da214a47 1645 int retval;
a677a039 1646
e38b36f3
UD
1647 /* Check the SOCK_* constants for consistency. */
1648 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1649 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1650 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1651 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1652
da214a47
JA
1653 if ((type & ~SOCK_TYPE_MASK) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1654 return ERR_PTR(-EINVAL);
a677a039 1655 type &= SOCK_TYPE_MASK;
1da177e4 1656
da214a47
JA
1657 retval = sock_create(family, type, protocol, &sock);
1658 if (retval < 0)
1659 return ERR_PTR(retval);
1660
1661 return sock;
1662}
1663
1664struct file *__sys_socket_file(int family, int type, int protocol)
1665{
1666 struct socket *sock;
da214a47
JA
1667 int flags;
1668
1669 sock = __sys_socket_create(family, type, protocol);
1670 if (IS_ERR(sock))
1671 return ERR_CAST(sock);
1672
1673 flags = type & ~SOCK_TYPE_MASK;
aaca0bdc
UD
1674 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1675 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1676
649c15c7 1677 return sock_alloc_file(sock, flags, NULL);
da214a47
JA
1678}
1679
0dd061a6
GT
1680/* A hook for bpf progs to attach to and update socket protocol.
1681 *
1682 * A static noinline declaration here could cause the compiler to
1683 * optimize away the function. A global noinline declaration will
1684 * keep the definition, but may optimize away the callsite.
1685 * Therefore, __weak is needed to ensure that the call is still
1686 * emitted, by telling the compiler that we don't know what the
1687 * function might eventually be.
0dd061a6
GT
1688 */
1689
15fb6f2b 1690__bpf_hook_start();
0dd061a6
GT
1691
1692__weak noinline int update_socket_protocol(int family, int type, int protocol)
1693{
1694 return protocol;
1695}
1696
15fb6f2b 1697__bpf_hook_end();
0dd061a6 1698
da214a47
JA
1699int __sys_socket(int family, int type, int protocol)
1700{
1701 struct socket *sock;
1702 int flags;
1703
0dd061a6
GT
1704 sock = __sys_socket_create(family, type,
1705 update_socket_protocol(family, type, protocol));
da214a47
JA
1706 if (IS_ERR(sock))
1707 return PTR_ERR(sock);
1708
1709 flags = type & ~SOCK_TYPE_MASK;
1710 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1711 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1da177e4 1712
8e1611e2 1713 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1714}
1715
9d6a15c3
DB
1716SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1717{
1718 return __sys_socket(family, type, protocol);
1719}
1720
1da177e4
LT
1721/*
1722 * Create a pair of connected sockets.
1723 */
1724
6debc8d8 1725int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1726{
1727 struct socket *sock1, *sock2;
1728 int fd1, fd2, err;
db349509 1729 struct file *newfile1, *newfile2;
a677a039
UD
1730 int flags;
1731
1732 flags = type & ~SOCK_TYPE_MASK;
77d27200 1733 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1734 return -EINVAL;
1735 type &= SOCK_TYPE_MASK;
1da177e4 1736
aaca0bdc
UD
1737 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1738 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1739
016a266b
AV
1740 /*
1741 * reserve descriptors and make sure we won't fail
1742 * to return them to userland.
1743 */
1744 fd1 = get_unused_fd_flags(flags);
1745 if (unlikely(fd1 < 0))
1746 return fd1;
1747
1748 fd2 = get_unused_fd_flags(flags);
1749 if (unlikely(fd2 < 0)) {
1750 put_unused_fd(fd1);
1751 return fd2;
1752 }
1753
1754 err = put_user(fd1, &usockvec[0]);
1755 if (err)
1756 goto out;
1757
1758 err = put_user(fd2, &usockvec[1]);
1759 if (err)
1760 goto out;
1761
1da177e4
LT
1762 /*
1763 * Obtain the first socket and check if the underlying protocol
1764 * supports the socketpair call.
1765 */
1766
1767 err = sock_create(family, type, protocol, &sock1);
016a266b 1768 if (unlikely(err < 0))
1da177e4
LT
1769 goto out;
1770
1771 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1772 if (unlikely(err < 0)) {
1773 sock_release(sock1);
1774 goto out;
bf3c23d1 1775 }
d73aa286 1776
d47cd945
DH
1777 err = security_socket_socketpair(sock1, sock2);
1778 if (unlikely(err)) {
1779 sock_release(sock2);
1780 sock_release(sock1);
1781 goto out;
1782 }
1783
1ded5e5a 1784 err = READ_ONCE(sock1->ops)->socketpair(sock1, sock2);
016a266b
AV
1785 if (unlikely(err < 0)) {
1786 sock_release(sock2);
1787 sock_release(sock1);
1788 goto out;
28407630
AV
1789 }
1790
aab174f0 1791 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1792 if (IS_ERR(newfile1)) {
28407630 1793 err = PTR_ERR(newfile1);
016a266b
AV
1794 sock_release(sock2);
1795 goto out;
28407630
AV
1796 }
1797
aab174f0 1798 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1799 if (IS_ERR(newfile2)) {
1800 err = PTR_ERR(newfile2);
016a266b
AV
1801 fput(newfile1);
1802 goto out;
db349509
AV
1803 }
1804
157cf649 1805 audit_fd_pair(fd1, fd2);
d73aa286 1806
db349509
AV
1807 fd_install(fd1, newfile1);
1808 fd_install(fd2, newfile2);
d73aa286 1809 return 0;
1da177e4 1810
016a266b 1811out:
d73aa286 1812 put_unused_fd(fd2);
d73aa286 1813 put_unused_fd(fd1);
1da177e4
LT
1814 return err;
1815}
1816
6debc8d8
DB
1817SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1818 int __user *, usockvec)
1819{
1820 return __sys_socketpair(family, type, protocol, usockvec);
1821}
1822
1da177e4
LT
1823/*
1824 * Bind a name to a socket. Nothing much to do here since it's
1825 * the protocol's responsibility to handle the local address.
1826 *
1827 * We move the socket address to kernel space before we call
1828 * the protocol layer (having also checked the address is ok).
1829 */
1830
a87d35d8 1831int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1832{
1833 struct socket *sock;
230b1839 1834 struct sockaddr_storage address;
6cb153ca 1835 int err, fput_needed;
1da177e4 1836
89bddce5 1837 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1838 if (sock) {
43db362d 1839 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1840 if (!err) {
89bddce5 1841 err = security_socket_bind(sock,
230b1839 1842 (struct sockaddr *)&address,
89bddce5 1843 addrlen);
6cb153ca 1844 if (!err)
1ded5e5a 1845 err = READ_ONCE(sock->ops)->bind(sock,
89bddce5 1846 (struct sockaddr *)
230b1839 1847 &address, addrlen);
1da177e4 1848 }
6cb153ca 1849 fput_light(sock->file, fput_needed);
89bddce5 1850 }
1da177e4
LT
1851 return err;
1852}
1853
a87d35d8
DB
1854SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1855{
1856 return __sys_bind(fd, umyaddr, addrlen);
1857}
1858
1da177e4
LT
1859/*
1860 * Perform a listen. Basically, we allow the protocol to do anything
1861 * necessary for a listen, and if that works, we mark the socket as
1862 * ready for listening.
1863 */
1864
25e290ee 1865int __sys_listen(int fd, int backlog)
1da177e4
LT
1866{
1867 struct socket *sock;
6cb153ca 1868 int err, fput_needed;
b8e1f9b5 1869 int somaxconn;
89bddce5
SH
1870
1871 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1872 if (sock) {
3c9ba81d 1873 somaxconn = READ_ONCE(sock_net(sock->sk)->core.sysctl_somaxconn);
95c96174 1874 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1875 backlog = somaxconn;
1da177e4
LT
1876
1877 err = security_socket_listen(sock, backlog);
6cb153ca 1878 if (!err)
1ded5e5a 1879 err = READ_ONCE(sock->ops)->listen(sock, backlog);
1da177e4 1880
6cb153ca 1881 fput_light(sock->file, fput_needed);
1da177e4
LT
1882 }
1883 return err;
1884}
1885
25e290ee
DB
1886SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1887{
1888 return __sys_listen(fd, backlog);
1889}
1890
d32f89da 1891struct file *do_accept(struct file *file, unsigned file_flags,
de2ea4b6 1892 struct sockaddr __user *upeer_sockaddr,
d32f89da 1893 int __user *upeer_addrlen, int flags)
1da177e4
LT
1894{
1895 struct socket *sock, *newsock;
39d8c1b6 1896 struct file *newfile;
d32f89da 1897 int err, len;
230b1839 1898 struct sockaddr_storage address;
1ded5e5a 1899 const struct proto_ops *ops;
1da177e4 1900
dba4a925 1901 sock = sock_from_file(file);
d32f89da
PB
1902 if (!sock)
1903 return ERR_PTR(-ENOTSOCK);
1da177e4 1904
c6d409cf
ED
1905 newsock = sock_alloc();
1906 if (!newsock)
d32f89da 1907 return ERR_PTR(-ENFILE);
1ded5e5a 1908 ops = READ_ONCE(sock->ops);
1da177e4
LT
1909
1910 newsock->type = sock->type;
1ded5e5a 1911 newsock->ops = ops;
1da177e4 1912
1da177e4
LT
1913 /*
1914 * We don't need try_module_get here, as the listening socket (sock)
1915 * has the protocol module (sock->ops->owner) held.
1916 */
1ded5e5a 1917 __module_get(ops->owner);
1da177e4 1918
aab174f0 1919 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
d32f89da
PB
1920 if (IS_ERR(newfile))
1921 return newfile;
39d8c1b6 1922
a79af59e
FF
1923 err = security_socket_accept(sock, newsock);
1924 if (err)
39d8c1b6 1925 goto out_fd;
a79af59e 1926
1ded5e5a 1927 err = ops->accept(sock, newsock, sock->file->f_flags | file_flags,
de2ea4b6 1928 false);
1da177e4 1929 if (err < 0)
39d8c1b6 1930 goto out_fd;
1da177e4
LT
1931
1932 if (upeer_sockaddr) {
1ded5e5a 1933 len = ops->getname(newsock, (struct sockaddr *)&address, 2);
9b2c45d4 1934 if (len < 0) {
1da177e4 1935 err = -ECONNABORTED;
39d8c1b6 1936 goto out_fd;
1da177e4 1937 }
43db362d 1938 err = move_addr_to_user(&address,
230b1839 1939 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1940 if (err < 0)
39d8c1b6 1941 goto out_fd;
1da177e4
LT
1942 }
1943
1944 /* File flags are not inherited via accept() unlike another OSes. */
d32f89da 1945 return newfile;
39d8c1b6 1946out_fd:
9606a216 1947 fput(newfile);
d32f89da
PB
1948 return ERR_PTR(err);
1949}
1950
c0424532
YD
1951static int __sys_accept4_file(struct file *file, struct sockaddr __user *upeer_sockaddr,
1952 int __user *upeer_addrlen, int flags)
d32f89da
PB
1953{
1954 struct file *newfile;
1955 int newfd;
1956
1957 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1958 return -EINVAL;
1959
1960 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1961 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
de2ea4b6 1962
c0424532 1963 newfd = get_unused_fd_flags(flags);
d32f89da
PB
1964 if (unlikely(newfd < 0))
1965 return newfd;
1966
c0424532 1967 newfile = do_accept(file, 0, upeer_sockaddr, upeer_addrlen,
d32f89da
PB
1968 flags);
1969 if (IS_ERR(newfile)) {
1970 put_unused_fd(newfd);
1971 return PTR_ERR(newfile);
1972 }
1973 fd_install(newfd, newfile);
1974 return newfd;
de2ea4b6
JA
1975}
1976
1977/*
1978 * For accept, we attempt to create a new socket, set up the link
1979 * with the client, wake up the client, then return the new
1980 * connected fd. We collect the address of the connector in kernel
1981 * space and move it to user at the very end. This is unclean because
1982 * we open the socket then return an error.
1983 *
1984 * 1003.1g adds the ability to recvmsg() to query connection pending
1985 * status to recvmsg. We need to add that support in a way thats
1986 * clean when we restructure accept also.
1987 */
1988
1989int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1990 int __user *upeer_addrlen, int flags)
1991{
1992 int ret = -EBADF;
1993 struct fd f;
1994
1995 f = fdget(fd);
1996 if (f.file) {
c0424532
YD
1997 ret = __sys_accept4_file(f.file, upeer_sockaddr,
1998 upeer_addrlen, flags);
6b07edeb 1999 fdput(f);
de2ea4b6
JA
2000 }
2001
2002 return ret;
1da177e4
LT
2003}
2004
4541e805
DB
2005SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
2006 int __user *, upeer_addrlen, int, flags)
2007{
2008 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
2009}
2010
20f37034
HC
2011SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
2012 int __user *, upeer_addrlen)
aaca0bdc 2013{
4541e805 2014 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
2015}
2016
1da177e4
LT
2017/*
2018 * Attempt to connect to a socket with the server address. The address
2019 * is in user space so we verify it is OK and move it to kernel space.
2020 *
2021 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
2022 * break bindings
2023 *
2024 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
2025 * other SEQPACKET protocols that take time to connect() as it doesn't
2026 * include the -EINPROGRESS status for such sockets.
2027 */
2028
f499a021 2029int __sys_connect_file(struct file *file, struct sockaddr_storage *address,
bd3ded31 2030 int addrlen, int file_flags)
1da177e4
LT
2031{
2032 struct socket *sock;
bd3ded31 2033 int err;
1da177e4 2034
dba4a925
FR
2035 sock = sock_from_file(file);
2036 if (!sock) {
2037 err = -ENOTSOCK;
1da177e4 2038 goto out;
dba4a925 2039 }
1da177e4 2040
89bddce5 2041 err =
f499a021 2042 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4 2043 if (err)
bd3ded31 2044 goto out;
1da177e4 2045
1ded5e5a
ED
2046 err = READ_ONCE(sock->ops)->connect(sock, (struct sockaddr *)address,
2047 addrlen, sock->file->f_flags | file_flags);
1da177e4
LT
2048out:
2049 return err;
2050}
2051
bd3ded31
JA
2052int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
2053{
2054 int ret = -EBADF;
2055 struct fd f;
2056
2057 f = fdget(fd);
2058 if (f.file) {
f499a021
JA
2059 struct sockaddr_storage address;
2060
2061 ret = move_addr_to_kernel(uservaddr, addrlen, &address);
2062 if (!ret)
2063 ret = __sys_connect_file(f.file, &address, addrlen, 0);
6b07edeb 2064 fdput(f);
bd3ded31
JA
2065 }
2066
2067 return ret;
2068}
2069
1387c2c2
DB
2070SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
2071 int, addrlen)
2072{
2073 return __sys_connect(fd, uservaddr, addrlen);
2074}
2075
1da177e4
LT
2076/*
2077 * Get the local address ('name') of a socket object. Move the obtained
2078 * name to user space.
2079 */
2080
8882a107
DB
2081int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
2082 int __user *usockaddr_len)
1da177e4
LT
2083{
2084 struct socket *sock;
230b1839 2085 struct sockaddr_storage address;
9b2c45d4 2086 int err, fput_needed;
89bddce5 2087
6cb153ca 2088 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
2089 if (!sock)
2090 goto out;
2091
2092 err = security_socket_getsockname(sock);
2093 if (err)
2094 goto out_put;
2095
1ded5e5a 2096 err = READ_ONCE(sock->ops)->getname(sock, (struct sockaddr *)&address, 0);
9b2c45d4 2097 if (err < 0)
1da177e4 2098 goto out_put;
e44ef1d4 2099 /* "err" is actually length in this case */
9b2c45d4 2100 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
2101
2102out_put:
6cb153ca 2103 fput_light(sock->file, fput_needed);
1da177e4
LT
2104out:
2105 return err;
2106}
2107
8882a107
DB
2108SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
2109 int __user *, usockaddr_len)
2110{
2111 return __sys_getsockname(fd, usockaddr, usockaddr_len);
2112}
2113
1da177e4
LT
2114/*
2115 * Get the remote address ('name') of a socket object. Move the obtained
2116 * name to user space.
2117 */
2118
b21c8f83
DB
2119int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
2120 int __user *usockaddr_len)
1da177e4
LT
2121{
2122 struct socket *sock;
230b1839 2123 struct sockaddr_storage address;
9b2c45d4 2124 int err, fput_needed;
1da177e4 2125
89bddce5
SH
2126 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2127 if (sock != NULL) {
1ded5e5a
ED
2128 const struct proto_ops *ops = READ_ONCE(sock->ops);
2129
1da177e4
LT
2130 err = security_socket_getpeername(sock);
2131 if (err) {
6cb153ca 2132 fput_light(sock->file, fput_needed);
1da177e4
LT
2133 return err;
2134 }
2135
1ded5e5a 2136 err = ops->getname(sock, (struct sockaddr *)&address, 1);
9b2c45d4
DV
2137 if (err >= 0)
2138 /* "err" is actually length in this case */
2139 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 2140 usockaddr_len);
6cb153ca 2141 fput_light(sock->file, fput_needed);
1da177e4
LT
2142 }
2143 return err;
2144}
2145
b21c8f83
DB
2146SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
2147 int __user *, usockaddr_len)
2148{
2149 return __sys_getpeername(fd, usockaddr, usockaddr_len);
2150}
2151
1da177e4
LT
2152/*
2153 * Send a datagram to a given address. We move the address into kernel
2154 * space and check the user space data area is readable before invoking
2155 * the protocol.
2156 */
211b634b
DB
2157int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
2158 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
2159{
2160 struct socket *sock;
230b1839 2161 struct sockaddr_storage address;
1da177e4
LT
2162 int err;
2163 struct msghdr msg;
2164 struct iovec iov;
6cb153ca 2165 int fput_needed;
6cb153ca 2166
de4eda9d 2167 err = import_single_range(ITER_SOURCE, buff, len, &iov, &msg.msg_iter);
602bd0e9
AV
2168 if (unlikely(err))
2169 return err;
de0fa95c
PE
2170 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2171 if (!sock)
4387ff75 2172 goto out;
6cb153ca 2173
89bddce5 2174 msg.msg_name = NULL;
89bddce5
SH
2175 msg.msg_control = NULL;
2176 msg.msg_controllen = 0;
2177 msg.msg_namelen = 0;
7c701d92 2178 msg.msg_ubuf = NULL;
6cb153ca 2179 if (addr) {
43db362d 2180 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
2181 if (err < 0)
2182 goto out_put;
230b1839 2183 msg.msg_name = (struct sockaddr *)&address;
89bddce5 2184 msg.msg_namelen = addr_len;
1da177e4 2185 }
b841b901 2186 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
1da177e4
LT
2187 if (sock->file->f_flags & O_NONBLOCK)
2188 flags |= MSG_DONTWAIT;
2189 msg.msg_flags = flags;
86a7e0b6 2190 err = __sock_sendmsg(sock, &msg);
1da177e4 2191
89bddce5 2192out_put:
de0fa95c 2193 fput_light(sock->file, fput_needed);
4387ff75 2194out:
1da177e4
LT
2195 return err;
2196}
2197
211b634b
DB
2198SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
2199 unsigned int, flags, struct sockaddr __user *, addr,
2200 int, addr_len)
2201{
2202 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
2203}
2204
1da177e4 2205/*
89bddce5 2206 * Send a datagram down a socket.
1da177e4
LT
2207 */
2208
3e0fa65f 2209SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 2210 unsigned int, flags)
1da177e4 2211{
211b634b 2212 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
2213}
2214
2215/*
89bddce5 2216 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
2217 * sender. We verify the buffers are writable and if needed move the
2218 * sender address from kernel to user space.
2219 */
7a09e1eb
DB
2220int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
2221 struct sockaddr __user *addr, int __user *addr_len)
1da177e4 2222{
1228b34c
ED
2223 struct sockaddr_storage address;
2224 struct msghdr msg = {
2225 /* Save some cycles and don't copy the address if not needed */
2226 .msg_name = addr ? (struct sockaddr *)&address : NULL,
2227 };
1da177e4
LT
2228 struct socket *sock;
2229 struct iovec iov;
89bddce5 2230 int err, err2;
6cb153ca
BL
2231 int fput_needed;
2232
de4eda9d 2233 err = import_single_range(ITER_DEST, ubuf, size, &iov, &msg.msg_iter);
602bd0e9
AV
2234 if (unlikely(err))
2235 return err;
de0fa95c 2236 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 2237 if (!sock)
de0fa95c 2238 goto out;
1da177e4 2239
1da177e4
LT
2240 if (sock->file->f_flags & O_NONBLOCK)
2241 flags |= MSG_DONTWAIT;
2da62906 2242 err = sock_recvmsg(sock, &msg, flags);
1da177e4 2243
89bddce5 2244 if (err >= 0 && addr != NULL) {
43db362d 2245 err2 = move_addr_to_user(&address,
230b1839 2246 msg.msg_namelen, addr, addr_len);
89bddce5
SH
2247 if (err2 < 0)
2248 err = err2;
1da177e4 2249 }
de0fa95c
PE
2250
2251 fput_light(sock->file, fput_needed);
4387ff75 2252out:
1da177e4
LT
2253 return err;
2254}
2255
7a09e1eb
DB
2256SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2257 unsigned int, flags, struct sockaddr __user *, addr,
2258 int __user *, addr_len)
2259{
2260 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2261}
2262
1da177e4 2263/*
89bddce5 2264 * Receive a datagram from a socket.
1da177e4
LT
2265 */
2266
b7c0ddf5
JG
2267SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2268 unsigned int, flags)
1da177e4 2269{
7a09e1eb 2270 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
2271}
2272
83f0c10b
FW
2273static bool sock_use_custom_sol_socket(const struct socket *sock)
2274{
a5ef058d 2275 return test_bit(SOCK_CUSTOM_SOCKOPT, &sock->flags);
83f0c10b
FW
2276}
2277
1406245c
BL
2278int do_sock_setsockopt(struct socket *sock, bool compat, int level,
2279 int optname, sockptr_t optval, int optlen)
1da177e4 2280{
1ded5e5a 2281 const struct proto_ops *ops;
0d01da6a 2282 char *kernel_optval = NULL;
1406245c 2283 int err;
1da177e4
LT
2284
2285 if (optlen < 0)
2286 return -EINVAL;
89bddce5 2287
4a367299
CH
2288 err = security_socket_setsockopt(sock, level, optname);
2289 if (err)
2290 goto out_put;
0d01da6a 2291
1406245c 2292 if (!compat)
55db9c0e 2293 err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname,
3f31e0d1 2294 optval, &optlen,
55db9c0e 2295 &kernel_optval);
4a367299
CH
2296 if (err < 0)
2297 goto out_put;
2298 if (err > 0) {
2299 err = 0;
2300 goto out_put;
2301 }
0d01da6a 2302
a7b75c5a
CH
2303 if (kernel_optval)
2304 optval = KERNEL_SOCKPTR(kernel_optval);
1ded5e5a 2305 ops = READ_ONCE(sock->ops);
4a367299 2306 if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock))
a7b75c5a 2307 err = sock_setsockopt(sock, level, optname, optval, optlen);
1ded5e5a 2308 else if (unlikely(!ops->setsockopt))
a44d9e72 2309 err = -EOPNOTSUPP;
4a367299 2310 else
1ded5e5a 2311 err = ops->setsockopt(sock, level, optname, optval,
89bddce5 2312 optlen);
a7b75c5a 2313 kfree(kernel_optval);
4a367299 2314out_put:
1406245c
BL
2315 return err;
2316}
2317EXPORT_SYMBOL(do_sock_setsockopt);
2318
2319/* Set a socket option. Because we don't know the option lengths we have
2320 * to pass the user mode parameter for the protocols to sort out.
2321 */
2322int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
2323 int optlen)
2324{
2325 sockptr_t optval = USER_SOCKPTR(user_optval);
2326 bool compat = in_compat_syscall();
2327 int err, fput_needed;
2328 struct socket *sock;
2329
2330 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2331 if (!sock)
2332 return err;
2333
2334 err = do_sock_setsockopt(sock, compat, level, optname, optval, optlen);
2335
4a367299 2336 fput_light(sock->file, fput_needed);
1da177e4
LT
2337 return err;
2338}
2339
cc36dca0
DB
2340SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2341 char __user *, optval, int, optlen)
2342{
2343 return __sys_setsockopt(fd, level, optname, optval, optlen);
2344}
2345
9cacf81f
SF
2346INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
2347 int optname));
2348
0b05b0cd
BL
2349int do_sock_getsockopt(struct socket *sock, bool compat, int level,
2350 int optname, sockptr_t optval, sockptr_t optlen)
1da177e4 2351{
ad4bf5f2 2352 int max_optlen __maybe_unused;
1ded5e5a 2353 const struct proto_ops *ops;
0b05b0cd 2354 int err;
d8a9b38f
CH
2355
2356 err = security_socket_getsockopt(sock, level, optname);
2357 if (err)
0b05b0cd 2358 return err;
1da177e4 2359
0b05b0cd 2360 if (!compat)
55db9c0e 2361 max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
0d01da6a 2362
1ded5e5a 2363 ops = READ_ONCE(sock->ops);
0b05b0cd
BL
2364 if (level == SOL_SOCKET) {
2365 err = sk_getsockopt(sock->sk, level, optname, optval, optlen);
2366 } else if (unlikely(!ops->getsockopt)) {
a44d9e72 2367 err = -EOPNOTSUPP;
0b05b0cd
BL
2368 } else {
2369 if (WARN_ONCE(optval.is_kernel || optlen.is_kernel,
2370 "Invalid argument type"))
2371 return -EOPNOTSUPP;
2372
2373 err = ops->getsockopt(sock, level, optname, optval.user,
2374 optlen.user);
2375 }
0d01da6a 2376
0b05b0cd 2377 if (!compat)
55db9c0e
CH
2378 err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
2379 optval, optlen, max_optlen,
2380 err);
0b05b0cd
BL
2381
2382 return err;
2383}
2384EXPORT_SYMBOL(do_sock_getsockopt);
2385
1da177e4
LT
2386/*
2387 * Get a socket option. Because we don't know the option lengths we have
2388 * to pass a user mode parameter for the protocols to sort out.
2389 */
55db9c0e
CH
2390int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
2391 int __user *optlen)
1da177e4 2392{
6cb153ca 2393 int err, fput_needed;
1da177e4 2394 struct socket *sock;
0b05b0cd 2395 bool compat;
1da177e4 2396
89bddce5 2397 sock = sockfd_lookup_light(fd, &err, &fput_needed);
d8a9b38f
CH
2398 if (!sock)
2399 return err;
2400
0b05b0cd
BL
2401 compat = in_compat_syscall();
2402 err = do_sock_getsockopt(sock, compat, level, optname,
2403 USER_SOCKPTR(optval), USER_SOCKPTR(optlen));
1da177e4 2404
d8a9b38f 2405 fput_light(sock->file, fput_needed);
1da177e4
LT
2406 return err;
2407}
2408
13a2d70e
DB
2409SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2410 char __user *, optval, int __user *, optlen)
2411{
2412 return __sys_getsockopt(fd, level, optname, optval, optlen);
2413}
2414
1da177e4
LT
2415/*
2416 * Shutdown a socket.
2417 */
2418
b713c195
JA
2419int __sys_shutdown_sock(struct socket *sock, int how)
2420{
2421 int err;
2422
2423 err = security_socket_shutdown(sock, how);
2424 if (!err)
1ded5e5a 2425 err = READ_ONCE(sock->ops)->shutdown(sock, how);
b713c195
JA
2426
2427 return err;
2428}
2429
005a1aea 2430int __sys_shutdown(int fd, int how)
1da177e4 2431{
6cb153ca 2432 int err, fput_needed;
1da177e4
LT
2433 struct socket *sock;
2434
89bddce5
SH
2435 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2436 if (sock != NULL) {
b713c195 2437 err = __sys_shutdown_sock(sock, how);
6cb153ca 2438 fput_light(sock->file, fput_needed);
1da177e4
LT
2439 }
2440 return err;
2441}
2442
005a1aea
DB
2443SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2444{
2445 return __sys_shutdown(fd, how);
2446}
2447
89bddce5 2448/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2449 * fields which are the same type (int / unsigned) on our platforms.
2450 */
2451#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2452#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2453#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2454
c71d8ebe
TH
2455struct used_address {
2456 struct sockaddr_storage name;
2457 unsigned int name_len;
2458};
2459
7fa875b8
DY
2460int __copy_msghdr(struct msghdr *kmsg,
2461 struct user_msghdr *msg,
2462 struct sockaddr __user **save_addr)
1661bf36 2463{
08adb7da
AV
2464 ssize_t err;
2465
1f466e1f 2466 kmsg->msg_control_is_user = true;
1228b34c 2467 kmsg->msg_get_inq = 0;
7fa875b8
DY
2468 kmsg->msg_control_user = msg->msg_control;
2469 kmsg->msg_controllen = msg->msg_controllen;
2470 kmsg->msg_flags = msg->msg_flags;
ffb07550 2471
7fa875b8
DY
2472 kmsg->msg_namelen = msg->msg_namelen;
2473 if (!msg->msg_name)
6a2a2b3a
AS
2474 kmsg->msg_namelen = 0;
2475
dbb490b9
ML
2476 if (kmsg->msg_namelen < 0)
2477 return -EINVAL;
2478
1661bf36 2479 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2480 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2481
2482 if (save_addr)
7fa875b8 2483 *save_addr = msg->msg_name;
08adb7da 2484
7fa875b8 2485 if (msg->msg_name && kmsg->msg_namelen) {
08adb7da 2486 if (!save_addr) {
7fa875b8 2487 err = move_addr_to_kernel(msg->msg_name,
864d9664 2488 kmsg->msg_namelen,
08adb7da
AV
2489 kmsg->msg_name);
2490 if (err < 0)
2491 return err;
2492 }
2493 } else {
2494 kmsg->msg_name = NULL;
2495 kmsg->msg_namelen = 0;
2496 }
2497
7fa875b8 2498 if (msg->msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2499 return -EMSGSIZE;
2500
0345f931 2501 kmsg->msg_iocb = NULL;
7c701d92 2502 kmsg->msg_ubuf = NULL;
0a384abf
JA
2503 return 0;
2504}
2505
2506static int copy_msghdr_from_user(struct msghdr *kmsg,
2507 struct user_msghdr __user *umsg,
2508 struct sockaddr __user **save_addr,
2509 struct iovec **iov)
2510{
2511 struct user_msghdr msg;
2512 ssize_t err;
2513
7fa875b8
DY
2514 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
2515 return -EFAULT;
2516
2517 err = __copy_msghdr(kmsg, &msg, save_addr);
0a384abf
JA
2518 if (err)
2519 return err;
0345f931 2520
de4eda9d 2521 err = import_iovec(save_addr ? ITER_DEST : ITER_SOURCE,
ffb07550 2522 msg.msg_iov, msg.msg_iovlen,
da184284 2523 UIO_FASTIOV, iov, &kmsg->msg_iter);
87e5e6da 2524 return err < 0 ? err : 0;
1661bf36
DC
2525}
2526
4257c8ca
JA
2527static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
2528 unsigned int flags, struct used_address *used_address,
2529 unsigned int allowed_msghdr_flags)
1da177e4 2530{
b9d717a7 2531 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2532 __aligned(sizeof(__kernel_size_t));
89bddce5 2533 /* 20 is size of ipv6_pktinfo */
1da177e4 2534 unsigned char *ctl_buf = ctl;
d8725c86 2535 int ctl_len;
08adb7da 2536 ssize_t err;
89bddce5 2537
1da177e4
LT
2538 err = -ENOBUFS;
2539
228e548e 2540 if (msg_sys->msg_controllen > INT_MAX)
4257c8ca 2541 goto out;
28a94d8f 2542 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2543 ctl_len = msg_sys->msg_controllen;
1da177e4 2544 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2545 err =
228e548e 2546 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2547 sizeof(ctl));
1da177e4 2548 if (err)
4257c8ca 2549 goto out;
228e548e
AB
2550 ctl_buf = msg_sys->msg_control;
2551 ctl_len = msg_sys->msg_controllen;
1da177e4 2552 } else if (ctl_len) {
ac4340fc
DM
2553 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2554 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2555 if (ctl_len > sizeof(ctl)) {
1da177e4 2556 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2557 if (ctl_buf == NULL)
4257c8ca 2558 goto out;
1da177e4
LT
2559 }
2560 err = -EFAULT;
1f466e1f 2561 if (copy_from_user(ctl_buf, msg_sys->msg_control_user, ctl_len))
1da177e4 2562 goto out_freectl;
228e548e 2563 msg_sys->msg_control = ctl_buf;
1f466e1f 2564 msg_sys->msg_control_is_user = false;
1da177e4 2565 }
b841b901 2566 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
228e548e 2567 msg_sys->msg_flags = flags;
1da177e4
LT
2568
2569 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2570 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2571 /*
2572 * If this is sendmmsg() and current destination address is same as
2573 * previously succeeded address, omit asking LSM's decision.
2574 * used_address->name_len is initialized to UINT_MAX so that the first
2575 * destination address never matches.
2576 */
bc909d9d
MD
2577 if (used_address && msg_sys->msg_name &&
2578 used_address->name_len == msg_sys->msg_namelen &&
2579 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2580 used_address->name_len)) {
d8725c86 2581 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2582 goto out_freectl;
2583 }
86a7e0b6 2584 err = __sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2585 /*
2586 * If this is sendmmsg() and sending to current destination address was
2587 * successful, remember it.
2588 */
2589 if (used_address && err >= 0) {
2590 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2591 if (msg_sys->msg_name)
2592 memcpy(&used_address->name, msg_sys->msg_name,
2593 used_address->name_len);
c71d8ebe 2594 }
1da177e4
LT
2595
2596out_freectl:
89bddce5 2597 if (ctl_buf != ctl)
1da177e4 2598 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
4257c8ca
JA
2599out:
2600 return err;
2601}
2602
03b1230c
JA
2603int sendmsg_copy_msghdr(struct msghdr *msg,
2604 struct user_msghdr __user *umsg, unsigned flags,
2605 struct iovec **iov)
4257c8ca
JA
2606{
2607 int err;
2608
2609 if (flags & MSG_CMSG_COMPAT) {
2610 struct compat_msghdr __user *msg_compat;
2611
2612 msg_compat = (struct compat_msghdr __user *) umsg;
2613 err = get_compat_msghdr(msg, msg_compat, NULL, iov);
2614 } else {
2615 err = copy_msghdr_from_user(msg, umsg, NULL, iov);
2616 }
2617 if (err < 0)
2618 return err;
2619
2620 return 0;
2621}
2622
2623static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
2624 struct msghdr *msg_sys, unsigned int flags,
2625 struct used_address *used_address,
2626 unsigned int allowed_msghdr_flags)
2627{
2628 struct sockaddr_storage address;
2629 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2630 ssize_t err;
2631
2632 msg_sys->msg_name = &address;
2633
2634 err = sendmsg_copy_msghdr(msg_sys, msg, flags, &iov);
2635 if (err < 0)
2636 return err;
2637
2638 err = ____sys_sendmsg(sock, msg_sys, flags, used_address,
2639 allowed_msghdr_flags);
da184284 2640 kfree(iov);
228e548e
AB
2641 return err;
2642}
2643
2644/*
2645 * BSD sendmsg interface
2646 */
03b1230c 2647long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
0fa03c62
JA
2648 unsigned int flags)
2649{
03b1230c 2650 return ____sys_sendmsg(sock, msg, flags, NULL, 0);
0fa03c62 2651}
228e548e 2652
e1834a32
DB
2653long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2654 bool forbid_cmsg_compat)
228e548e
AB
2655{
2656 int fput_needed, err;
2657 struct msghdr msg_sys;
1be374a0
AL
2658 struct socket *sock;
2659
e1834a32
DB
2660 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2661 return -EINVAL;
2662
1be374a0 2663 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2664 if (!sock)
2665 goto out;
2666
28a94d8f 2667 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2668
6cb153ca 2669 fput_light(sock->file, fput_needed);
89bddce5 2670out:
1da177e4
LT
2671 return err;
2672}
2673
666547ff 2674SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2675{
e1834a32 2676 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2677}
2678
228e548e
AB
2679/*
2680 * Linux sendmmsg interface
2681 */
2682
2683int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2684 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2685{
2686 int fput_needed, err, datagrams;
2687 struct socket *sock;
2688 struct mmsghdr __user *entry;
2689 struct compat_mmsghdr __user *compat_entry;
2690 struct msghdr msg_sys;
c71d8ebe 2691 struct used_address used_address;
f092276d 2692 unsigned int oflags = flags;
228e548e 2693
e1834a32
DB
2694 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2695 return -EINVAL;
2696
98382f41
AB
2697 if (vlen > UIO_MAXIOV)
2698 vlen = UIO_MAXIOV;
228e548e
AB
2699
2700 datagrams = 0;
2701
2702 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2703 if (!sock)
2704 return err;
2705
c71d8ebe 2706 used_address.name_len = UINT_MAX;
228e548e
AB
2707 entry = mmsg;
2708 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2709 err = 0;
f092276d 2710 flags |= MSG_BATCH;
228e548e
AB
2711
2712 while (datagrams < vlen) {
f092276d
TH
2713 if (datagrams == vlen - 1)
2714 flags = oflags;
2715
228e548e 2716 if (MSG_CMSG_COMPAT & flags) {
666547ff 2717 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2718 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2719 if (err < 0)
2720 break;
2721 err = __put_user(err, &compat_entry->msg_len);
2722 ++compat_entry;
2723 } else {
a7526eb5 2724 err = ___sys_sendmsg(sock,
666547ff 2725 (struct user_msghdr __user *)entry,
28a94d8f 2726 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2727 if (err < 0)
2728 break;
2729 err = put_user(err, &entry->msg_len);
2730 ++entry;
2731 }
2732
2733 if (err)
2734 break;
2735 ++datagrams;
3023898b
SHY
2736 if (msg_data_left(&msg_sys))
2737 break;
a78cb84c 2738 cond_resched();
228e548e
AB
2739 }
2740
228e548e
AB
2741 fput_light(sock->file, fput_needed);
2742
728ffb86
AB
2743 /* We only return an error if no datagrams were able to be sent */
2744 if (datagrams != 0)
228e548e
AB
2745 return datagrams;
2746
228e548e
AB
2747 return err;
2748}
2749
2750SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2751 unsigned int, vlen, unsigned int, flags)
2752{
e1834a32 2753 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2754}
2755
03b1230c
JA
2756int recvmsg_copy_msghdr(struct msghdr *msg,
2757 struct user_msghdr __user *umsg, unsigned flags,
2758 struct sockaddr __user **uaddr,
2759 struct iovec **iov)
1da177e4 2760{
08adb7da 2761 ssize_t err;
1da177e4 2762
4257c8ca
JA
2763 if (MSG_CMSG_COMPAT & flags) {
2764 struct compat_msghdr __user *msg_compat;
1da177e4 2765
4257c8ca
JA
2766 msg_compat = (struct compat_msghdr __user *) umsg;
2767 err = get_compat_msghdr(msg, msg_compat, uaddr, iov);
2768 } else {
2769 err = copy_msghdr_from_user(msg, umsg, uaddr, iov);
2770 }
1da177e4 2771 if (err < 0)
da184284 2772 return err;
1da177e4 2773
4257c8ca
JA
2774 return 0;
2775}
2776
2777static int ____sys_recvmsg(struct socket *sock, struct msghdr *msg_sys,
2778 struct user_msghdr __user *msg,
2779 struct sockaddr __user *uaddr,
2780 unsigned int flags, int nosec)
2781{
2782 struct compat_msghdr __user *msg_compat =
2783 (struct compat_msghdr __user *) msg;
2784 int __user *uaddr_len = COMPAT_NAMELEN(msg);
2785 struct sockaddr_storage addr;
2786 unsigned long cmsg_ptr;
2787 int len;
2788 ssize_t err;
2789
2790 msg_sys->msg_name = &addr;
a2e27255
ACM
2791 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2792 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2793
f3d33426
HFS
2794 /* We assume all kernel code knows the size of sockaddr_storage */
2795 msg_sys->msg_namelen = 0;
2796
1da177e4
LT
2797 if (sock->file->f_flags & O_NONBLOCK)
2798 flags |= MSG_DONTWAIT;
1af66221
ED
2799
2800 if (unlikely(nosec))
2801 err = sock_recvmsg_nosec(sock, msg_sys, flags);
2802 else
2803 err = sock_recvmsg(sock, msg_sys, flags);
2804
1da177e4 2805 if (err < 0)
4257c8ca 2806 goto out;
1da177e4
LT
2807 len = err;
2808
2809 if (uaddr != NULL) {
43db362d 2810 err = move_addr_to_user(&addr,
a2e27255 2811 msg_sys->msg_namelen, uaddr,
89bddce5 2812 uaddr_len);
1da177e4 2813 if (err < 0)
4257c8ca 2814 goto out;
1da177e4 2815 }
a2e27255 2816 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2817 COMPAT_FLAGS(msg));
1da177e4 2818 if (err)
4257c8ca 2819 goto out;
1da177e4 2820 if (MSG_CMSG_COMPAT & flags)
a2e27255 2821 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2822 &msg_compat->msg_controllen);
2823 else
a2e27255 2824 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2825 &msg->msg_controllen);
2826 if (err)
4257c8ca 2827 goto out;
1da177e4 2828 err = len;
4257c8ca
JA
2829out:
2830 return err;
2831}
2832
2833static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
2834 struct msghdr *msg_sys, unsigned int flags, int nosec)
2835{
2836 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2837 /* user mode address pointers */
2838 struct sockaddr __user *uaddr;
2839 ssize_t err;
2840
2841 err = recvmsg_copy_msghdr(msg_sys, msg, flags, &uaddr, &iov);
2842 if (err < 0)
2843 return err;
1da177e4 2844
4257c8ca 2845 err = ____sys_recvmsg(sock, msg_sys, msg, uaddr, flags, nosec);
da184284 2846 kfree(iov);
a2e27255
ACM
2847 return err;
2848}
2849
2850/*
2851 * BSD recvmsg interface
2852 */
2853
03b1230c
JA
2854long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
2855 struct user_msghdr __user *umsg,
2856 struct sockaddr __user *uaddr, unsigned int flags)
aa1fa28f 2857{
03b1230c 2858 return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0);
aa1fa28f
JA
2859}
2860
e1834a32
DB
2861long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2862 bool forbid_cmsg_compat)
a2e27255
ACM
2863{
2864 int fput_needed, err;
2865 struct msghdr msg_sys;
1be374a0
AL
2866 struct socket *sock;
2867
e1834a32
DB
2868 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2869 return -EINVAL;
2870
1be374a0 2871 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2872 if (!sock)
2873 goto out;
2874
a7526eb5 2875 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2876
6cb153ca 2877 fput_light(sock->file, fput_needed);
1da177e4
LT
2878out:
2879 return err;
2880}
2881
666547ff 2882SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2883 unsigned int, flags)
2884{
e1834a32 2885 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2886}
2887
a2e27255
ACM
2888/*
2889 * Linux recvmmsg interface
2890 */
2891
e11d4284
AB
2892static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2893 unsigned int vlen, unsigned int flags,
2894 struct timespec64 *timeout)
a2e27255
ACM
2895{
2896 int fput_needed, err, datagrams;
2897 struct socket *sock;
2898 struct mmsghdr __user *entry;
d7256d0e 2899 struct compat_mmsghdr __user *compat_entry;
a2e27255 2900 struct msghdr msg_sys;
766b9f92
DD
2901 struct timespec64 end_time;
2902 struct timespec64 timeout64;
a2e27255
ACM
2903
2904 if (timeout &&
2905 poll_select_set_timeout(&end_time, timeout->tv_sec,
2906 timeout->tv_nsec))
2907 return -EINVAL;
2908
2909 datagrams = 0;
2910
2911 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2912 if (!sock)
2913 return err;
2914
7797dc41
SHY
2915 if (likely(!(flags & MSG_ERRQUEUE))) {
2916 err = sock_error(sock->sk);
2917 if (err) {
2918 datagrams = err;
2919 goto out_put;
2920 }
e623a9e9 2921 }
a2e27255
ACM
2922
2923 entry = mmsg;
d7256d0e 2924 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2925
2926 while (datagrams < vlen) {
2927 /*
2928 * No need to ask LSM for more than the first datagram.
2929 */
d7256d0e 2930 if (MSG_CMSG_COMPAT & flags) {
666547ff 2931 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2932 &msg_sys, flags & ~MSG_WAITFORONE,
2933 datagrams);
d7256d0e
JMG
2934 if (err < 0)
2935 break;
2936 err = __put_user(err, &compat_entry->msg_len);
2937 ++compat_entry;
2938 } else {
a7526eb5 2939 err = ___sys_recvmsg(sock,
666547ff 2940 (struct user_msghdr __user *)entry,
a7526eb5
AL
2941 &msg_sys, flags & ~MSG_WAITFORONE,
2942 datagrams);
d7256d0e
JMG
2943 if (err < 0)
2944 break;
2945 err = put_user(err, &entry->msg_len);
2946 ++entry;
2947 }
2948
a2e27255
ACM
2949 if (err)
2950 break;
a2e27255
ACM
2951 ++datagrams;
2952
71c5c159
BB
2953 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2954 if (flags & MSG_WAITFORONE)
2955 flags |= MSG_DONTWAIT;
2956
a2e27255 2957 if (timeout) {
766b9f92 2958 ktime_get_ts64(&timeout64);
c2e6c856 2959 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2960 if (timeout->tv_sec < 0) {
2961 timeout->tv_sec = timeout->tv_nsec = 0;
2962 break;
2963 }
2964
2965 /* Timeout, return less than vlen datagrams */
2966 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2967 break;
2968 }
2969
2970 /* Out of band data, return right away */
2971 if (msg_sys.msg_flags & MSG_OOB)
2972 break;
a78cb84c 2973 cond_resched();
a2e27255
ACM
2974 }
2975
a2e27255 2976 if (err == 0)
34b88a68
ACM
2977 goto out_put;
2978
2979 if (datagrams == 0) {
2980 datagrams = err;
2981 goto out_put;
2982 }
a2e27255 2983
34b88a68
ACM
2984 /*
2985 * We may return less entries than requested (vlen) if the
2986 * sock is non block and there aren't enough datagrams...
2987 */
2988 if (err != -EAGAIN) {
a2e27255 2989 /*
34b88a68
ACM
2990 * ... or if recvmsg returns an error after we
2991 * received some datagrams, where we record the
2992 * error to return on the next call or if the
2993 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2994 */
e05a5f51 2995 WRITE_ONCE(sock->sk->sk_err, -err);
a2e27255 2996 }
34b88a68
ACM
2997out_put:
2998 fput_light(sock->file, fput_needed);
a2e27255 2999
34b88a68 3000 return datagrams;
a2e27255
ACM
3001}
3002
e11d4284
AB
3003int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
3004 unsigned int vlen, unsigned int flags,
3005 struct __kernel_timespec __user *timeout,
3006 struct old_timespec32 __user *timeout32)
a2e27255
ACM
3007{
3008 int datagrams;
c2e6c856 3009 struct timespec64 timeout_sys;
a2e27255 3010
e11d4284
AB
3011 if (timeout && get_timespec64(&timeout_sys, timeout))
3012 return -EFAULT;
a2e27255 3013
e11d4284 3014 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
3015 return -EFAULT;
3016
e11d4284
AB
3017 if (!timeout && !timeout32)
3018 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
3019
3020 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 3021
e11d4284
AB
3022 if (datagrams <= 0)
3023 return datagrams;
3024
3025 if (timeout && put_timespec64(&timeout_sys, timeout))
3026 datagrams = -EFAULT;
3027
3028 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
3029 datagrams = -EFAULT;
3030
3031 return datagrams;
3032}
3033
1255e269
DB
3034SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
3035 unsigned int, vlen, unsigned int, flags,
c2e6c856 3036 struct __kernel_timespec __user *, timeout)
1255e269 3037{
e11d4284
AB
3038 if (flags & MSG_CMSG_COMPAT)
3039 return -EINVAL;
3040
3041 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
3042}
3043
3044#ifdef CONFIG_COMPAT_32BIT_TIME
3045SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
3046 unsigned int, vlen, unsigned int, flags,
3047 struct old_timespec32 __user *, timeout)
3048{
3049 if (flags & MSG_CMSG_COMPAT)
3050 return -EINVAL;
3051
3052 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 3053}
e11d4284 3054#endif
1255e269 3055
a2e27255 3056#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
3057/* Argument list sizes for sys_socketcall */
3058#define AL(x) ((x) * sizeof(unsigned long))
228e548e 3059static const unsigned char nargs[21] = {
c6d409cf
ED
3060 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
3061 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
3062 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 3063 AL(4), AL(5), AL(4)
89bddce5
SH
3064};
3065
1da177e4
LT
3066#undef AL
3067
3068/*
89bddce5 3069 * System call vectors.
1da177e4
LT
3070 *
3071 * Argument checking cleaned up. Saved 20% in size.
3072 * This function doesn't need to set the kernel lock because
89bddce5 3073 * it is set by the callees.
1da177e4
LT
3074 */
3075
3e0fa65f 3076SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 3077{
2950fa9d 3078 unsigned long a[AUDITSC_ARGS];
89bddce5 3079 unsigned long a0, a1;
1da177e4 3080 int err;
47379052 3081 unsigned int len;
1da177e4 3082
228e548e 3083 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 3084 return -EINVAL;
c8e8cd57 3085 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 3086
47379052
AV
3087 len = nargs[call];
3088 if (len > sizeof(a))
3089 return -EINVAL;
3090
1da177e4 3091 /* copy_from_user should be SMP safe. */
47379052 3092 if (copy_from_user(a, args, len))
1da177e4 3093 return -EFAULT;
3ec3b2fb 3094
2950fa9d
CG
3095 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3096 if (err)
3097 return err;
3ec3b2fb 3098
89bddce5
SH
3099 a0 = a[0];
3100 a1 = a[1];
3101
3102 switch (call) {
3103 case SYS_SOCKET:
9d6a15c3 3104 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
3105 break;
3106 case SYS_BIND:
a87d35d8 3107 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
3108 break;
3109 case SYS_CONNECT:
1387c2c2 3110 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
3111 break;
3112 case SYS_LISTEN:
25e290ee 3113 err = __sys_listen(a0, a1);
89bddce5
SH
3114 break;
3115 case SYS_ACCEPT:
4541e805
DB
3116 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
3117 (int __user *)a[2], 0);
89bddce5
SH
3118 break;
3119 case SYS_GETSOCKNAME:
3120 err =
8882a107
DB
3121 __sys_getsockname(a0, (struct sockaddr __user *)a1,
3122 (int __user *)a[2]);
89bddce5
SH
3123 break;
3124 case SYS_GETPEERNAME:
3125 err =
b21c8f83
DB
3126 __sys_getpeername(a0, (struct sockaddr __user *)a1,
3127 (int __user *)a[2]);
89bddce5
SH
3128 break;
3129 case SYS_SOCKETPAIR:
6debc8d8 3130 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
3131 break;
3132 case SYS_SEND:
f3bf896b
DB
3133 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
3134 NULL, 0);
89bddce5
SH
3135 break;
3136 case SYS_SENDTO:
211b634b
DB
3137 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
3138 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
3139 break;
3140 case SYS_RECV:
d27e9afc
DB
3141 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
3142 NULL, NULL);
89bddce5
SH
3143 break;
3144 case SYS_RECVFROM:
7a09e1eb
DB
3145 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
3146 (struct sockaddr __user *)a[4],
3147 (int __user *)a[5]);
89bddce5
SH
3148 break;
3149 case SYS_SHUTDOWN:
005a1aea 3150 err = __sys_shutdown(a0, a1);
89bddce5
SH
3151 break;
3152 case SYS_SETSOCKOPT:
cc36dca0
DB
3153 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
3154 a[4]);
89bddce5
SH
3155 break;
3156 case SYS_GETSOCKOPT:
3157 err =
13a2d70e
DB
3158 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
3159 (int __user *)a[4]);
89bddce5
SH
3160 break;
3161 case SYS_SENDMSG:
e1834a32
DB
3162 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
3163 a[2], true);
89bddce5 3164 break;
228e548e 3165 case SYS_SENDMMSG:
e1834a32
DB
3166 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
3167 a[3], true);
228e548e 3168 break;
89bddce5 3169 case SYS_RECVMSG:
e1834a32
DB
3170 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
3171 a[2], true);
89bddce5 3172 break;
a2e27255 3173 case SYS_RECVMMSG:
3ca47e95 3174 if (IS_ENABLED(CONFIG_64BIT))
e11d4284
AB
3175 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
3176 a[2], a[3],
3177 (struct __kernel_timespec __user *)a[4],
3178 NULL);
3179 else
3180 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
3181 a[2], a[3], NULL,
3182 (struct old_timespec32 __user *)a[4]);
a2e27255 3183 break;
de11defe 3184 case SYS_ACCEPT4:
4541e805
DB
3185 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
3186 (int __user *)a[2], a[3]);
aaca0bdc 3187 break;
89bddce5
SH
3188 default:
3189 err = -EINVAL;
3190 break;
1da177e4
LT
3191 }
3192 return err;
3193}
3194
89bddce5 3195#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 3196
55737fda
SH
3197/**
3198 * sock_register - add a socket protocol handler
3199 * @ops: description of protocol
3200 *
1da177e4
LT
3201 * This function is called by a protocol handler that wants to
3202 * advertise its address family, and have it linked into the
e793c0f7 3203 * socket interface. The value ops->family corresponds to the
55737fda 3204 * socket system call protocol family.
1da177e4 3205 */
f0fd27d4 3206int sock_register(const struct net_proto_family *ops)
1da177e4
LT
3207{
3208 int err;
3209
3210 if (ops->family >= NPROTO) {
3410f22e 3211 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
3212 return -ENOBUFS;
3213 }
55737fda
SH
3214
3215 spin_lock(&net_family_lock);
190683a9
ED
3216 if (rcu_dereference_protected(net_families[ops->family],
3217 lockdep_is_held(&net_family_lock)))
55737fda
SH
3218 err = -EEXIST;
3219 else {
cf778b00 3220 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
3221 err = 0;
3222 }
55737fda
SH
3223 spin_unlock(&net_family_lock);
3224
fe0bdbde 3225 pr_info("NET: Registered %s protocol family\n", pf_family_names[ops->family]);
1da177e4
LT
3226 return err;
3227}
c6d409cf 3228EXPORT_SYMBOL(sock_register);
1da177e4 3229
55737fda
SH
3230/**
3231 * sock_unregister - remove a protocol handler
3232 * @family: protocol family to remove
3233 *
1da177e4
LT
3234 * This function is called by a protocol handler that wants to
3235 * remove its address family, and have it unlinked from the
55737fda
SH
3236 * new socket creation.
3237 *
3238 * If protocol handler is a module, then it can use module reference
3239 * counts to protect against new references. If protocol handler is not
3240 * a module then it needs to provide its own protection in
3241 * the ops->create routine.
1da177e4 3242 */
f0fd27d4 3243void sock_unregister(int family)
1da177e4 3244{
f0fd27d4 3245 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 3246
55737fda 3247 spin_lock(&net_family_lock);
a9b3cd7f 3248 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
3249 spin_unlock(&net_family_lock);
3250
3251 synchronize_rcu();
3252
fe0bdbde 3253 pr_info("NET: Unregistered %s protocol family\n", pf_family_names[family]);
1da177e4 3254}
c6d409cf 3255EXPORT_SYMBOL(sock_unregister);
1da177e4 3256
bf2ae2e4
XL
3257bool sock_is_registered(int family)
3258{
66b51b0a 3259 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
3260}
3261
77d76ea3 3262static int __init sock_init(void)
1da177e4 3263{
b3e19d92 3264 int err;
2ca794e5
EB
3265 /*
3266 * Initialize the network sysctl infrastructure.
3267 */
3268 err = net_sysctl_init();
3269 if (err)
3270 goto out;
b3e19d92 3271
1da177e4 3272 /*
89bddce5 3273 * Initialize skbuff SLAB cache
1da177e4
LT
3274 */
3275 skb_init();
1da177e4
LT
3276
3277 /*
89bddce5 3278 * Initialize the protocols module.
1da177e4
LT
3279 */
3280
3281 init_inodecache();
b3e19d92
NP
3282
3283 err = register_filesystem(&sock_fs_type);
3284 if (err)
47260ba9 3285 goto out;
1da177e4 3286 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
3287 if (IS_ERR(sock_mnt)) {
3288 err = PTR_ERR(sock_mnt);
3289 goto out_mount;
3290 }
77d76ea3
AK
3291
3292 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
3293 */
3294
3295#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
3296 err = netfilter_init();
3297 if (err)
3298 goto out;
1da177e4 3299#endif
cbeb321a 3300
408eccce 3301 ptp_classifier_init();
c1f19b51 3302
b3e19d92
NP
3303out:
3304 return err;
3305
3306out_mount:
3307 unregister_filesystem(&sock_fs_type);
b3e19d92 3308 goto out;
1da177e4
LT
3309}
3310
77d76ea3
AK
3311core_initcall(sock_init); /* early initcall */
3312
1da177e4
LT
3313#ifdef CONFIG_PROC_FS
3314void socket_seq_show(struct seq_file *seq)
3315{
648845ab
TZ
3316 seq_printf(seq, "sockets: used %d\n",
3317 sock_inuse_get(seq->private));
1da177e4 3318}
89bddce5 3319#endif /* CONFIG_PROC_FS */
1da177e4 3320
29c49648
AB
3321/* Handle the fact that while struct ifreq has the same *layout* on
3322 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3323 * which are handled elsewhere, it still has different *size* due to
3324 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3325 * resulting in struct ifreq being 32 and 40 bytes respectively).
3326 * As a result, if the struct happens to be at the end of a page and
3327 * the next page isn't readable/writable, we get a fault. To prevent
3328 * that, copy back and forth to the full size.
3329 */
3330int get_user_ifreq(struct ifreq *ifr, void __user **ifrdata, void __user *arg)
7a229387 3331{
29c49648
AB
3332 if (in_compat_syscall()) {
3333 struct compat_ifreq *ifr32 = (struct compat_ifreq *)ifr;
7a229387 3334
29c49648
AB
3335 memset(ifr, 0, sizeof(*ifr));
3336 if (copy_from_user(ifr32, arg, sizeof(*ifr32)))
3337 return -EFAULT;
7a229387 3338
29c49648
AB
3339 if (ifrdata)
3340 *ifrdata = compat_ptr(ifr32->ifr_data);
7a229387 3341
29c49648
AB
3342 return 0;
3343 }
7a229387 3344
29c49648 3345 if (copy_from_user(ifr, arg, sizeof(*ifr)))
7a229387
AB
3346 return -EFAULT;
3347
29c49648
AB
3348 if (ifrdata)
3349 *ifrdata = ifr->ifr_data;
3350
7a229387
AB
3351 return 0;
3352}
29c49648 3353EXPORT_SYMBOL(get_user_ifreq);
7a229387 3354
29c49648 3355int put_user_ifreq(struct ifreq *ifr, void __user *arg)
7a229387 3356{
29c49648 3357 size_t size = sizeof(*ifr);
7a229387 3358
29c49648
AB
3359 if (in_compat_syscall())
3360 size = sizeof(struct compat_ifreq);
7a229387 3361
29c49648 3362 if (copy_to_user(arg, ifr, size))
7a229387
AB
3363 return -EFAULT;
3364
3a7da39d 3365 return 0;
7a229387 3366}
29c49648 3367EXPORT_SYMBOL(put_user_ifreq);
7a229387 3368
89bbfc95 3369#ifdef CONFIG_COMPAT
7a50a240
AB
3370static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3371{
7a50a240 3372 compat_uptr_t uptr32;
44c02a2c
AV
3373 struct ifreq ifr;
3374 void __user *saved;
3375 int err;
7a50a240 3376
29c49648 3377 if (get_user_ifreq(&ifr, NULL, uifr32))
7a50a240
AB
3378 return -EFAULT;
3379
3380 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3381 return -EFAULT;
3382
44c02a2c
AV
3383 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3384 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 3385
a554bf96 3386 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL, NULL);
44c02a2c
AV
3387 if (!err) {
3388 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
29c49648 3389 if (put_user_ifreq(&ifr, uifr32))
44c02a2c 3390 err = -EFAULT;
ccbd6a5a 3391 }
44c02a2c 3392 return err;
7a229387
AB
3393}
3394
590d4693
BH
3395/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3396static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3397 struct compat_ifreq __user *u_ifreq32)
7a229387 3398{
44c02a2c 3399 struct ifreq ifreq;
a554bf96 3400 void __user *data;
7a229387 3401
d0efb162
PC
3402 if (!is_socket_ioctl_cmd(cmd))
3403 return -ENOTTY;
a554bf96 3404 if (get_user_ifreq(&ifreq, &data, u_ifreq32))
7a229387 3405 return -EFAULT;
a554bf96 3406 ifreq.ifr_data = data;
7a229387 3407
a554bf96 3408 return dev_ioctl(net, cmd, &ifreq, data, NULL);
a2116ed2
AB
3409}
3410
6b96018b
AB
3411static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3412 unsigned int cmd, unsigned long arg)
3413{
3414 void __user *argp = compat_ptr(arg);
3415 struct sock *sk = sock->sk;
3416 struct net *net = sock_net(sk);
1ded5e5a 3417 const struct proto_ops *ops;
7a229387 3418
6b96018b 3419 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
88fc023f 3420 return sock_ioctl(file, cmd, (unsigned long)argp);
6b96018b
AB
3421
3422 switch (cmd) {
7a50a240
AB
3423 case SIOCWANDEV:
3424 return compat_siocwandev(net, argp);
0768e170
AB
3425 case SIOCGSTAMP_OLD:
3426 case SIOCGSTAMPNS_OLD:
1ded5e5a
ED
3427 ops = READ_ONCE(sock->ops);
3428 if (!ops->gettstamp)
c7cbdbf2 3429 return -ENOIOCTLCMD;
1ded5e5a
ED
3430 return ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
3431 !COMPAT_USE_64BIT_TIME);
c7cbdbf2 3432
dd98d289 3433 case SIOCETHTOOL:
590d4693
BH
3434 case SIOCBONDSLAVEINFOQUERY:
3435 case SIOCBONDINFOQUERY:
a2116ed2 3436 case SIOCSHWTSTAMP:
fd468c74 3437 case SIOCGHWTSTAMP:
590d4693 3438 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3439
3440 case FIOSETOWN:
3441 case SIOCSPGRP:
3442 case FIOGETOWN:
3443 case SIOCGPGRP:
3444 case SIOCBRADDBR:
3445 case SIOCBRDELBR:
3446 case SIOCGIFVLAN:
3447 case SIOCSIFVLAN:
c62cce2c 3448 case SIOCGSKNS:
0768e170
AB
3449 case SIOCGSTAMP_NEW:
3450 case SIOCGSTAMPNS_NEW:
876f0bf9 3451 case SIOCGIFCONF:
fd3a4590
RP
3452 case SIOCSIFBR:
3453 case SIOCGIFBR:
6b96018b
AB
3454 return sock_ioctl(file, cmd, arg);
3455
3456 case SIOCGIFFLAGS:
3457 case SIOCSIFFLAGS:
709566d7
AB
3458 case SIOCGIFMAP:
3459 case SIOCSIFMAP:
6b96018b
AB
3460 case SIOCGIFMETRIC:
3461 case SIOCSIFMETRIC:
3462 case SIOCGIFMTU:
3463 case SIOCSIFMTU:
3464 case SIOCGIFMEM:
3465 case SIOCSIFMEM:
3466 case SIOCGIFHWADDR:
3467 case SIOCSIFHWADDR:
3468 case SIOCADDMULTI:
3469 case SIOCDELMULTI:
3470 case SIOCGIFINDEX:
6b96018b
AB
3471 case SIOCGIFADDR:
3472 case SIOCSIFADDR:
3473 case SIOCSIFHWBROADCAST:
6b96018b 3474 case SIOCDIFADDR:
6b96018b
AB
3475 case SIOCGIFBRDADDR:
3476 case SIOCSIFBRDADDR:
3477 case SIOCGIFDSTADDR:
3478 case SIOCSIFDSTADDR:
3479 case SIOCGIFNETMASK:
3480 case SIOCSIFNETMASK:
3481 case SIOCSIFPFLAGS:
3482 case SIOCGIFPFLAGS:
3483 case SIOCGIFTXQLEN:
3484 case SIOCSIFTXQLEN:
3485 case SIOCBRADDIF:
3486 case SIOCBRDELIF:
c6c9fee3 3487 case SIOCGIFNAME:
9177efd3
AB
3488 case SIOCSIFNAME:
3489 case SIOCGMIIPHY:
3490 case SIOCGMIIREG:
3491 case SIOCSMIIREG:
f92d4fc9
AV
3492 case SIOCBONDENSLAVE:
3493 case SIOCBONDRELEASE:
3494 case SIOCBONDSETHWADDR:
3495 case SIOCBONDCHANGEACTIVE:
6b96018b
AB
3496 case SIOCSARP:
3497 case SIOCGARP:
3498 case SIOCDARP:
c7dc504e 3499 case SIOCOUTQ:
9d7bf41f 3500 case SIOCOUTQNSD:
6b96018b 3501 case SIOCATMARK:
63ff03ab 3502 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3503 }
3504
6b96018b
AB
3505 return -ENOIOCTLCMD;
3506}
7a229387 3507
95c96174 3508static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3509 unsigned long arg)
89bbfc95
SP
3510{
3511 struct socket *sock = file->private_data;
1ded5e5a 3512 const struct proto_ops *ops = READ_ONCE(sock->ops);
89bbfc95 3513 int ret = -ENOIOCTLCMD;
87de87d5
DM
3514 struct sock *sk;
3515 struct net *net;
3516
3517 sk = sock->sk;
3518 net = sock_net(sk);
89bbfc95 3519
1ded5e5a
ED
3520 if (ops->compat_ioctl)
3521 ret = ops->compat_ioctl(sock, cmd, arg);
89bbfc95 3522
87de87d5
DM
3523 if (ret == -ENOIOCTLCMD &&
3524 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3525 ret = compat_wext_handle_ioctl(net, cmd, arg);
3526
6b96018b
AB
3527 if (ret == -ENOIOCTLCMD)
3528 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3529
89bbfc95
SP
3530 return ret;
3531}
3532#endif
3533
8a3c245c
PT
3534/**
3535 * kernel_bind - bind an address to a socket (kernel space)
3536 * @sock: socket
3537 * @addr: address
3538 * @addrlen: length of address
3539 *
3540 * Returns 0 or an error.
3541 */
3542
ac5a488e
SS
3543int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3544{
c889a99a
JR
3545 struct sockaddr_storage address;
3546
3547 memcpy(&address, addr, addrlen);
3548
3549 return READ_ONCE(sock->ops)->bind(sock, (struct sockaddr *)&address,
3550 addrlen);
ac5a488e 3551}
c6d409cf 3552EXPORT_SYMBOL(kernel_bind);
ac5a488e 3553
8a3c245c
PT
3554/**
3555 * kernel_listen - move socket to listening state (kernel space)
3556 * @sock: socket
3557 * @backlog: pending connections queue size
3558 *
3559 * Returns 0 or an error.
3560 */
3561
ac5a488e
SS
3562int kernel_listen(struct socket *sock, int backlog)
3563{
1ded5e5a 3564 return READ_ONCE(sock->ops)->listen(sock, backlog);
ac5a488e 3565}
c6d409cf 3566EXPORT_SYMBOL(kernel_listen);
ac5a488e 3567
8a3c245c
PT
3568/**
3569 * kernel_accept - accept a connection (kernel space)
3570 * @sock: listening socket
3571 * @newsock: new connected socket
3572 * @flags: flags
3573 *
3574 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3575 * If it fails, @newsock is guaranteed to be %NULL.
3576 * Returns 0 or an error.
3577 */
3578
ac5a488e
SS
3579int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3580{
3581 struct sock *sk = sock->sk;
1ded5e5a 3582 const struct proto_ops *ops = READ_ONCE(sock->ops);
ac5a488e
SS
3583 int err;
3584
3585 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3586 newsock);
3587 if (err < 0)
3588 goto done;
3589
1ded5e5a 3590 err = ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3591 if (err < 0) {
3592 sock_release(*newsock);
fa8705b0 3593 *newsock = NULL;
ac5a488e
SS
3594 goto done;
3595 }
3596
1ded5e5a
ED
3597 (*newsock)->ops = ops;
3598 __module_get(ops->owner);
ac5a488e
SS
3599
3600done:
3601 return err;
3602}
c6d409cf 3603EXPORT_SYMBOL(kernel_accept);
ac5a488e 3604
8a3c245c
PT
3605/**
3606 * kernel_connect - connect a socket (kernel space)
3607 * @sock: socket
3608 * @addr: address
3609 * @addrlen: address length
3610 * @flags: flags (O_NONBLOCK, ...)
3611 *
f1dcffcc 3612 * For datagram sockets, @addr is the address to which datagrams are sent
8a3c245c
PT
3613 * by default, and the only address from which datagrams are received.
3614 * For stream sockets, attempts to connect to @addr.
3615 * Returns 0 or an error code.
3616 */
3617
ac5a488e 3618int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3619 int flags)
ac5a488e 3620{
0bdf3993
JR
3621 struct sockaddr_storage address;
3622
3623 memcpy(&address, addr, addrlen);
3624
3625 return READ_ONCE(sock->ops)->connect(sock, (struct sockaddr *)&address,
3626 addrlen, flags);
ac5a488e 3627}
c6d409cf 3628EXPORT_SYMBOL(kernel_connect);
ac5a488e 3629
8a3c245c
PT
3630/**
3631 * kernel_getsockname - get the address which the socket is bound (kernel space)
3632 * @sock: socket
3633 * @addr: address holder
3634 *
3635 * Fills the @addr pointer with the address which the socket is bound.
0fc95dec 3636 * Returns the length of the address in bytes or an error code.
8a3c245c
PT
3637 */
3638
9b2c45d4 3639int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3640{
1ded5e5a 3641 return READ_ONCE(sock->ops)->getname(sock, addr, 0);
ac5a488e 3642}
c6d409cf 3643EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3644
8a3c245c 3645/**
645f0897 3646 * kernel_getpeername - get the address which the socket is connected (kernel space)
8a3c245c
PT
3647 * @sock: socket
3648 * @addr: address holder
3649 *
3650 * Fills the @addr pointer with the address which the socket is connected.
0fc95dec 3651 * Returns the length of the address in bytes or an error code.
8a3c245c
PT
3652 */
3653
9b2c45d4 3654int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3655{
1ded5e5a 3656 return READ_ONCE(sock->ops)->getname(sock, addr, 1);
ac5a488e 3657}
c6d409cf 3658EXPORT_SYMBOL(kernel_getpeername);
ac5a488e 3659
8a3c245c 3660/**
645f0897 3661 * kernel_sock_shutdown - shut down part of a full-duplex connection (kernel space)
8a3c245c
PT
3662 * @sock: socket
3663 * @how: connection part
3664 *
3665 * Returns 0 or an error.
3666 */
3667
91cf45f0
TM
3668int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3669{
1ded5e5a 3670 return READ_ONCE(sock->ops)->shutdown(sock, how);
91cf45f0 3671}
91cf45f0 3672EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075 3673
8a3c245c
PT
3674/**
3675 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3676 * @sk: socket
3677 *
3678 * This routine returns the IP overhead imposed by a socket i.e.
3679 * the length of the underlying IP header, depending on whether
3680 * this is an IPv4 or IPv6 socket and the length from IP options turned
3681 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075 3682 */
8a3c245c 3683
113c3075
P
3684u32 kernel_sock_ip_overhead(struct sock *sk)
3685{
3686 struct inet_sock *inet;
3687 struct ip_options_rcu *opt;
3688 u32 overhead = 0;
113c3075
P
3689#if IS_ENABLED(CONFIG_IPV6)
3690 struct ipv6_pinfo *np;
3691 struct ipv6_txoptions *optv6 = NULL;
3692#endif /* IS_ENABLED(CONFIG_IPV6) */
3693
3694 if (!sk)
3695 return overhead;
3696
113c3075
P
3697 switch (sk->sk_family) {
3698 case AF_INET:
3699 inet = inet_sk(sk);
3700 overhead += sizeof(struct iphdr);
3701 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3702 sock_owned_by_user(sk));
113c3075
P
3703 if (opt)
3704 overhead += opt->opt.optlen;
3705 return overhead;
3706#if IS_ENABLED(CONFIG_IPV6)
3707 case AF_INET6:
3708 np = inet6_sk(sk);
3709 overhead += sizeof(struct ipv6hdr);
3710 if (np)
3711 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3712 sock_owned_by_user(sk));
113c3075
P
3713 if (optv6)
3714 overhead += (optv6->opt_flen + optv6->opt_nflen);
3715 return overhead;
3716#endif /* IS_ENABLED(CONFIG_IPV6) */
3717 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3718 return overhead;
3719 }
3720}
3721EXPORT_SYMBOL(kernel_sock_ip_overhead);