Merge tag 'drm-fixes-2024-06-01' of https://gitlab.freedesktop.org/drm/kernel
[linux-2.6-block.git] / net / socket.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * NET An implementation of the SOCKET network access protocol.
4 *
5 * Version: @(#)socket.c 1.1.93 18/02/95
6 *
7 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 8 * Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 *
11 * Fixes:
12 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
13 * shutdown()
14 * Alan Cox : verify_area() fixes
15 * Alan Cox : Removed DDI
16 * Jonathan Kamens : SOCK_DGRAM reconnect bug
17 * Alan Cox : Moved a load of checks to the very
18 * top level.
19 * Alan Cox : Move address structures to/from user
20 * mode above the protocol layers.
21 * Rob Janssen : Allow 0 length sends.
22 * Alan Cox : Asynchronous I/O support (cribbed from the
23 * tty drivers).
24 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
25 * Jeff Uphoff : Made max number of sockets command-line
26 * configurable.
27 * Matti Aarnio : Made the number of sockets dynamic,
28 * to be allocated when needed, and mr.
29 * Uphoff's max is used as max to be
30 * allowed to allocate.
31 * Linus : Argh. removed all the socket allocation
32 * altogether: it's in the inode now.
33 * Alan Cox : Made sock_alloc()/sock_release() public
34 * for NetROM and future kernel nfsd type
35 * stuff.
36 * Alan Cox : sendmsg/recvmsg basics.
37 * Tom Dyas : Export net symbols.
38 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
39 * Alan Cox : Added thread locking to sys_* calls
40 * for sockets. May have errors at the
41 * moment.
42 * Kevin Buhr : Fixed the dumb errors in the above.
43 * Andi Kleen : Some small cleanups, optimizations,
44 * and fixed a copy_from_user() bug.
45 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 46 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
47 * protocol-independent
48 *
1da177e4 49 * This module is effectively the top level interface to the BSD socket
89bddce5 50 * paradigm.
1da177e4
LT
51 *
52 * Based upon Swansea University Computer Society NET3.039
53 */
54
aef2feda 55#include <linux/bpf-cgroup.h>
cc69837f 56#include <linux/ethtool.h>
1da177e4 57#include <linux/mm.h>
1da177e4
LT
58#include <linux/socket.h>
59#include <linux/file.h>
2dc334f1 60#include <linux/splice.h>
1da177e4
LT
61#include <linux/net.h>
62#include <linux/interrupt.h>
aaca0bdc 63#include <linux/thread_info.h>
55737fda 64#include <linux/rcupdate.h>
1da177e4
LT
65#include <linux/netdevice.h>
66#include <linux/proc_fs.h>
67#include <linux/seq_file.h>
4a3e2f71 68#include <linux/mutex.h>
1da177e4 69#include <linux/if_bridge.h>
20380731 70#include <linux/if_vlan.h>
408eccce 71#include <linux/ptp_classify.h>
1da177e4
LT
72#include <linux/init.h>
73#include <linux/poll.h>
74#include <linux/cache.h>
75#include <linux/module.h>
76#include <linux/highmem.h>
1da177e4 77#include <linux/mount.h>
fba9be49 78#include <linux/pseudo_fs.h>
1da177e4
LT
79#include <linux/security.h>
80#include <linux/syscalls.h>
81#include <linux/compat.h>
82#include <linux/kmod.h>
3ec3b2fb 83#include <linux/audit.h>
d86b5e0e 84#include <linux/wireless.h>
1b8d7ae4 85#include <linux/nsproxy.h>
1fd7317d 86#include <linux/magic.h>
5a0e3ad6 87#include <linux/slab.h>
600e1779 88#include <linux/xattr.h>
c8e8cd57 89#include <linux/nospec.h>
8c3c447b 90#include <linux/indirect_call_wrapper.h>
8c9a6f54 91#include <linux/io_uring/net.h>
1da177e4 92
7c0f6ba6 93#include <linux/uaccess.h>
1da177e4
LT
94#include <asm/unistd.h>
95
96#include <net/compat.h>
87de87d5 97#include <net/wext.h>
f8451725 98#include <net/cls_cgroup.h>
1da177e4
LT
99
100#include <net/sock.h>
101#include <linux/netfilter.h>
102
6b96018b
AB
103#include <linux/if_tun.h>
104#include <linux/ipv6_route.h>
105#include <linux/route.h>
c7dc504e 106#include <linux/termios.h>
6b96018b 107#include <linux/sockios.h>
076bb0c8 108#include <net/busy_poll.h>
f24b9be5 109#include <linux/errqueue.h>
d7c08826 110#include <linux/ptp_clock_kernel.h>
6e6eda44 111#include <trace/events/sock.h>
06021292 112
e0d1095a 113#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
114unsigned int sysctl_net_busy_read __read_mostly;
115unsigned int sysctl_net_busy_poll __read_mostly;
06021292 116#endif
6b96018b 117
8ae5e030
AV
118static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
119static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 120static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
121
122static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
123static __poll_t sock_poll(struct file *file,
124 struct poll_table_struct *wait);
89bddce5 125static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
126#ifdef CONFIG_COMPAT
127static long compat_sock_ioctl(struct file *file,
89bddce5 128 unsigned int cmd, unsigned long arg);
89bbfc95 129#endif
1da177e4 130static int sock_fasync(int fd, struct file *filp, int on);
9c55e01c 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 132 struct pipe_inode_info *pipe, size_t len,
9c55e01c 133 unsigned int flags);
2bfc6685 134static void sock_splice_eof(struct file *file);
542d3065
AB
135
136#ifdef CONFIG_PROC_FS
137static void sock_show_fdinfo(struct seq_file *m, struct file *f)
138{
139 struct socket *sock = f->private_data;
1ded5e5a 140 const struct proto_ops *ops = READ_ONCE(sock->ops);
542d3065 141
1ded5e5a
ED
142 if (ops->show_fdinfo)
143 ops->show_fdinfo(m, sock);
542d3065
AB
144}
145#else
146#define sock_show_fdinfo NULL
147#endif
1da177e4 148
1da177e4
LT
149/*
150 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
151 * in the operation structures but are done directly via the socketcall() multiplexor.
152 */
153
da7071d7 154static const struct file_operations socket_file_ops = {
1da177e4
LT
155 .owner = THIS_MODULE,
156 .llseek = no_llseek,
8ae5e030
AV
157 .read_iter = sock_read_iter,
158 .write_iter = sock_write_iter,
1da177e4
LT
159 .poll = sock_poll,
160 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
161#ifdef CONFIG_COMPAT
162 .compat_ioctl = compat_sock_ioctl,
163#endif
8e9fad0e 164 .uring_cmd = io_uring_cmd_sock,
1da177e4 165 .mmap = sock_mmap,
1da177e4
LT
166 .release = sock_close,
167 .fasync = sock_fasync,
2dc334f1 168 .splice_write = splice_to_socket,
9c55e01c 169 .splice_read = sock_splice_read,
2bfc6685 170 .splice_eof = sock_splice_eof,
b4653342 171 .show_fdinfo = sock_show_fdinfo,
1da177e4
LT
172};
173
fe0bdbde
YD
174static const char * const pf_family_names[] = {
175 [PF_UNSPEC] = "PF_UNSPEC",
176 [PF_UNIX] = "PF_UNIX/PF_LOCAL",
177 [PF_INET] = "PF_INET",
178 [PF_AX25] = "PF_AX25",
179 [PF_IPX] = "PF_IPX",
180 [PF_APPLETALK] = "PF_APPLETALK",
181 [PF_NETROM] = "PF_NETROM",
182 [PF_BRIDGE] = "PF_BRIDGE",
183 [PF_ATMPVC] = "PF_ATMPVC",
184 [PF_X25] = "PF_X25",
185 [PF_INET6] = "PF_INET6",
186 [PF_ROSE] = "PF_ROSE",
187 [PF_DECnet] = "PF_DECnet",
188 [PF_NETBEUI] = "PF_NETBEUI",
189 [PF_SECURITY] = "PF_SECURITY",
190 [PF_KEY] = "PF_KEY",
191 [PF_NETLINK] = "PF_NETLINK/PF_ROUTE",
192 [PF_PACKET] = "PF_PACKET",
193 [PF_ASH] = "PF_ASH",
194 [PF_ECONET] = "PF_ECONET",
195 [PF_ATMSVC] = "PF_ATMSVC",
196 [PF_RDS] = "PF_RDS",
197 [PF_SNA] = "PF_SNA",
198 [PF_IRDA] = "PF_IRDA",
199 [PF_PPPOX] = "PF_PPPOX",
200 [PF_WANPIPE] = "PF_WANPIPE",
201 [PF_LLC] = "PF_LLC",
202 [PF_IB] = "PF_IB",
203 [PF_MPLS] = "PF_MPLS",
204 [PF_CAN] = "PF_CAN",
205 [PF_TIPC] = "PF_TIPC",
206 [PF_BLUETOOTH] = "PF_BLUETOOTH",
207 [PF_IUCV] = "PF_IUCV",
208 [PF_RXRPC] = "PF_RXRPC",
209 [PF_ISDN] = "PF_ISDN",
210 [PF_PHONET] = "PF_PHONET",
211 [PF_IEEE802154] = "PF_IEEE802154",
212 [PF_CAIF] = "PF_CAIF",
213 [PF_ALG] = "PF_ALG",
214 [PF_NFC] = "PF_NFC",
215 [PF_VSOCK] = "PF_VSOCK",
216 [PF_KCM] = "PF_KCM",
217 [PF_QIPCRTR] = "PF_QIPCRTR",
218 [PF_SMC] = "PF_SMC",
219 [PF_XDP] = "PF_XDP",
bc49d816 220 [PF_MCTP] = "PF_MCTP",
fe0bdbde
YD
221};
222
1da177e4
LT
223/*
224 * The protocol list. Each protocol is registered in here.
225 */
226
1da177e4 227static DEFINE_SPINLOCK(net_family_lock);
190683a9 228static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 229
1da177e4 230/*
89bddce5
SH
231 * Support routines.
232 * Move socket addresses back and forth across the kernel/user
233 * divide and look after the messy bits.
1da177e4
LT
234 */
235
1da177e4
LT
236/**
237 * move_addr_to_kernel - copy a socket address into kernel space
238 * @uaddr: Address in user space
239 * @kaddr: Address in kernel space
240 * @ulen: Length in user space
241 *
242 * The address is copied into kernel space. If the provided address is
243 * too long an error code of -EINVAL is returned. If the copy gives
244 * invalid addresses -EFAULT is returned. On a success 0 is returned.
245 */
246
43db362d 247int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 248{
230b1839 249 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 250 return -EINVAL;
89bddce5 251 if (ulen == 0)
1da177e4 252 return 0;
89bddce5 253 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 254 return -EFAULT;
3ec3b2fb 255 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
256}
257
258/**
259 * move_addr_to_user - copy an address to user space
260 * @kaddr: kernel space address
261 * @klen: length of address in kernel
262 * @uaddr: user space address
263 * @ulen: pointer to user length field
264 *
265 * The value pointed to by ulen on entry is the buffer length available.
266 * This is overwritten with the buffer space used. -EINVAL is returned
267 * if an overlong buffer is specified or a negative buffer size. -EFAULT
268 * is returned if either the buffer or the length field are not
269 * accessible.
270 * After copying the data up to the limit the user specifies, the true
271 * length of the data is written over the length limit the user
272 * specified. Zero is returned for a success.
273 */
89bddce5 274
43db362d 275static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 276 void __user *uaddr, int __user *ulen)
1da177e4
LT
277{
278 int err;
279 int len;
280
68c6beb3 281 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
282 err = get_user(len, ulen);
283 if (err)
1da177e4 284 return err;
89bddce5
SH
285 if (len > klen)
286 len = klen;
68c6beb3 287 if (len < 0)
1da177e4 288 return -EINVAL;
89bddce5 289 if (len) {
d6fe3945
SG
290 if (audit_sockaddr(klen, kaddr))
291 return -ENOMEM;
89bddce5 292 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
293 return -EFAULT;
294 }
295 /*
89bddce5
SH
296 * "fromlen shall refer to the value before truncation.."
297 * 1003.1g
1da177e4
LT
298 */
299 return __put_user(klen, ulen);
300}
301
08009a76 302static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
303
304static struct inode *sock_alloc_inode(struct super_block *sb)
305{
306 struct socket_alloc *ei;
89bddce5 307
fd60b288 308 ei = alloc_inode_sb(sb, sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
309 if (!ei)
310 return NULL;
333f7909
AV
311 init_waitqueue_head(&ei->socket.wq.wait);
312 ei->socket.wq.fasync_list = NULL;
313 ei->socket.wq.flags = 0;
89bddce5 314
1da177e4
LT
315 ei->socket.state = SS_UNCONNECTED;
316 ei->socket.flags = 0;
317 ei->socket.ops = NULL;
318 ei->socket.sk = NULL;
319 ei->socket.file = NULL;
1da177e4
LT
320
321 return &ei->vfs_inode;
322}
323
6d7855c5 324static void sock_free_inode(struct inode *inode)
1da177e4 325{
43815482
ED
326 struct socket_alloc *ei;
327
328 ei = container_of(inode, struct socket_alloc, vfs_inode);
43815482 329 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
330}
331
51cc5068 332static void init_once(void *foo)
1da177e4 333{
89bddce5 334 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 335
a35afb83 336 inode_init_once(&ei->vfs_inode);
1da177e4 337}
89bddce5 338
1e911632 339static void init_inodecache(void)
1da177e4
LT
340{
341 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
342 sizeof(struct socket_alloc),
343 0,
344 (SLAB_HWCACHE_ALIGN |
345 SLAB_RECLAIM_ACCOUNT |
d4f01c5e 346 SLAB_ACCOUNT),
20c2df83 347 init_once);
1e911632 348 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
349}
350
b87221de 351static const struct super_operations sockfs_ops = {
c6d409cf 352 .alloc_inode = sock_alloc_inode,
6d7855c5 353 .free_inode = sock_free_inode,
c6d409cf 354 .statfs = simple_statfs,
1da177e4
LT
355};
356
c23fbb6b
ED
357/*
358 * sockfs_dname() is called from d_path().
359 */
360static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
361{
0f60d288 362 return dynamic_dname(buffer, buflen, "socket:[%lu]",
c5ef6035 363 d_inode(dentry)->i_ino);
c23fbb6b
ED
364}
365
3ba13d17 366static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 367 .d_dname = sockfs_dname,
1da177e4
LT
368};
369
bba0bd31
AG
370static int sockfs_xattr_get(const struct xattr_handler *handler,
371 struct dentry *dentry, struct inode *inode,
372 const char *suffix, void *value, size_t size)
373{
374 if (value) {
375 if (dentry->d_name.len + 1 > size)
376 return -ERANGE;
377 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
378 }
379 return dentry->d_name.len + 1;
380}
381
382#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
383#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
384#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
385
386static const struct xattr_handler sockfs_xattr_handler = {
387 .name = XATTR_NAME_SOCKPROTONAME,
388 .get = sockfs_xattr_get,
389};
390
4a590153 391static int sockfs_security_xattr_set(const struct xattr_handler *handler,
39f60c1c 392 struct mnt_idmap *idmap,
4a590153
AG
393 struct dentry *dentry, struct inode *inode,
394 const char *suffix, const void *value,
395 size_t size, int flags)
396{
397 /* Handled by LSM. */
398 return -EAGAIN;
399}
400
401static const struct xattr_handler sockfs_security_xattr_handler = {
402 .prefix = XATTR_SECURITY_PREFIX,
403 .set = sockfs_security_xattr_set,
404};
405
295d3c44 406static const struct xattr_handler * const sockfs_xattr_handlers[] = {
bba0bd31 407 &sockfs_xattr_handler,
4a590153 408 &sockfs_security_xattr_handler,
bba0bd31
AG
409 NULL
410};
411
fba9be49 412static int sockfs_init_fs_context(struct fs_context *fc)
c74a1cbb 413{
fba9be49
DH
414 struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
415 if (!ctx)
416 return -ENOMEM;
417 ctx->ops = &sockfs_ops;
418 ctx->dops = &sockfs_dentry_operations;
419 ctx->xattr = sockfs_xattr_handlers;
420 return 0;
c74a1cbb
AV
421}
422
423static struct vfsmount *sock_mnt __read_mostly;
424
425static struct file_system_type sock_fs_type = {
426 .name = "sockfs",
fba9be49 427 .init_fs_context = sockfs_init_fs_context,
c74a1cbb
AV
428 .kill_sb = kill_anon_super,
429};
430
1da177e4
LT
431/*
432 * Obtains the first available file descriptor and sets it up for use.
433 *
39d8c1b6
DM
434 * These functions create file structures and maps them to fd space
435 * of the current process. On success it returns file descriptor
1da177e4
LT
436 * and file struct implicitly stored in sock->file.
437 * Note that another thread may close file descriptor before we return
438 * from this function. We use the fact that now we do not refer
439 * to socket after mapping. If one day we will need it, this
440 * function will increment ref. count on file by 1.
441 *
442 * In any case returned fd MAY BE not valid!
443 * This race condition is unavoidable
444 * with shared fd spaces, we cannot solve it inside kernel,
445 * but we take care of internal coherence yet.
446 */
447
8a3c245c
PT
448/**
449 * sock_alloc_file - Bind a &socket to a &file
450 * @sock: socket
451 * @flags: file status flags
452 * @dname: protocol name
453 *
454 * Returns the &file bound with @sock, implicitly storing it
455 * in sock->file. If dname is %NULL, sets to "".
649c15c7
TLSC
456 *
457 * On failure @sock is released, and an ERR pointer is returned.
458 *
8a3c245c
PT
459 * This function uses GFP_KERNEL internally.
460 */
461
aab174f0 462struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 463{
7cbe66b6 464 struct file *file;
1da177e4 465
d93aa9d8
AV
466 if (!dname)
467 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 468
d93aa9d8
AV
469 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
470 O_RDWR | (flags & O_NONBLOCK),
471 &socket_file_ops);
b5ffe634 472 if (IS_ERR(file)) {
8e1611e2 473 sock_release(sock);
39b65252 474 return file;
cc3808f8
AV
475 }
476
fe34db06 477 file->f_mode |= FMODE_NOWAIT;
cc3808f8 478 sock->file = file;
39d8c1b6 479 file->private_data = sock;
d8e464ec 480 stream_open(SOCK_INODE(sock), file);
28407630 481 return file;
39d8c1b6 482}
56b31d1c 483EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 484
56b31d1c 485static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
486{
487 struct file *newfile;
28407630 488 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
489 if (unlikely(fd < 0)) {
490 sock_release(sock);
28407630 491 return fd;
ce4bb04c 492 }
39d8c1b6 493
aab174f0 494 newfile = sock_alloc_file(sock, flags, NULL);
4546e44c 495 if (!IS_ERR(newfile)) {
39d8c1b6 496 fd_install(fd, newfile);
28407630
AV
497 return fd;
498 }
7cbe66b6 499
28407630
AV
500 put_unused_fd(fd);
501 return PTR_ERR(newfile);
1da177e4
LT
502}
503
8a3c245c
PT
504/**
505 * sock_from_file - Return the &socket bounded to @file.
506 * @file: file
8a3c245c 507 *
dba4a925 508 * On failure returns %NULL.
8a3c245c
PT
509 */
510
dba4a925 511struct socket *sock_from_file(struct file *file)
6cb153ca 512{
6cb153ca 513 if (file->f_op == &socket_file_ops)
da214a47 514 return file->private_data; /* set in sock_alloc_file */
6cb153ca 515
23bb80d2 516 return NULL;
6cb153ca 517}
406a3c63 518EXPORT_SYMBOL(sock_from_file);
6cb153ca 519
1da177e4 520/**
c6d409cf 521 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
522 * @fd: file handle
523 * @err: pointer to an error code return
524 *
525 * The file handle passed in is locked and the socket it is bound
241c4667 526 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
527 * with a negative errno code and NULL is returned. The function checks
528 * for both invalid handles and passing a handle which is not a socket.
529 *
530 * On a success the socket object pointer is returned.
531 */
532
533struct socket *sockfd_lookup(int fd, int *err)
534{
535 struct file *file;
1da177e4
LT
536 struct socket *sock;
537
89bddce5
SH
538 file = fget(fd);
539 if (!file) {
1da177e4
LT
540 *err = -EBADF;
541 return NULL;
542 }
89bddce5 543
dba4a925
FR
544 sock = sock_from_file(file);
545 if (!sock) {
546 *err = -ENOTSOCK;
1da177e4 547 fput(file);
dba4a925 548 }
6cb153ca
BL
549 return sock;
550}
c6d409cf 551EXPORT_SYMBOL(sockfd_lookup);
1da177e4 552
6cb153ca
BL
553static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
554{
00e188ef 555 struct fd f = fdget(fd);
6cb153ca
BL
556 struct socket *sock;
557
3672558c 558 *err = -EBADF;
00e188ef 559 if (f.file) {
dba4a925 560 sock = sock_from_file(f.file);
00e188ef 561 if (likely(sock)) {
ce787a5a 562 *fput_needed = f.flags & FDPUT_FPUT;
6cb153ca 563 return sock;
00e188ef 564 }
dba4a925 565 *err = -ENOTSOCK;
00e188ef 566 fdput(f);
1da177e4 567 }
6cb153ca 568 return NULL;
1da177e4
LT
569}
570
600e1779
MY
571static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
572 size_t size)
573{
574 ssize_t len;
575 ssize_t used = 0;
576
c5ef6035 577 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
578 if (len < 0)
579 return len;
580 used += len;
581 if (buffer) {
582 if (size < used)
583 return -ERANGE;
584 buffer += len;
585 }
586
587 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
588 used += len;
589 if (buffer) {
590 if (size < used)
591 return -ERANGE;
592 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
593 buffer += len;
594 }
595
596 return used;
597}
598
c1632a0f 599static int sockfs_setattr(struct mnt_idmap *idmap,
549c7297 600 struct dentry *dentry, struct iattr *iattr)
86741ec2 601{
c1632a0f 602 int err = simple_setattr(&nop_mnt_idmap, dentry, iattr);
86741ec2 603
e1a3a60a 604 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
605 struct socket *sock = SOCKET_I(d_inode(dentry));
606
6d8c50dc
CW
607 if (sock->sk)
608 sock->sk->sk_uid = iattr->ia_uid;
609 else
610 err = -ENOENT;
86741ec2
LC
611 }
612
613 return err;
614}
615
600e1779 616static const struct inode_operations sockfs_inode_ops = {
600e1779 617 .listxattr = sockfs_listxattr,
86741ec2 618 .setattr = sockfs_setattr,
600e1779
MY
619};
620
1da177e4 621/**
8a3c245c 622 * sock_alloc - allocate a socket
89bddce5 623 *
1da177e4
LT
624 * Allocate a new inode and socket object. The two are bound together
625 * and initialised. The socket is then returned. If we are out of inodes
8a3c245c 626 * NULL is returned. This functions uses GFP_KERNEL internally.
1da177e4
LT
627 */
628
f4a00aac 629struct socket *sock_alloc(void)
1da177e4 630{
89bddce5
SH
631 struct inode *inode;
632 struct socket *sock;
1da177e4 633
a209dfc7 634 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
635 if (!inode)
636 return NULL;
637
638 sock = SOCKET_I(inode);
639
85fe4025 640 inode->i_ino = get_next_ino();
89bddce5 641 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
642 inode->i_uid = current_fsuid();
643 inode->i_gid = current_fsgid();
600e1779 644 inode->i_op = &sockfs_inode_ops;
1da177e4 645
1da177e4
LT
646 return sock;
647}
f4a00aac 648EXPORT_SYMBOL(sock_alloc);
1da177e4 649
6d8c50dc 650static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4 651{
1ded5e5a
ED
652 const struct proto_ops *ops = READ_ONCE(sock->ops);
653
654 if (ops) {
655 struct module *owner = ops->owner;
1da177e4 656
6d8c50dc
CW
657 if (inode)
658 inode_lock(inode);
1ded5e5a 659 ops->release(sock);
ff7b11aa 660 sock->sk = NULL;
6d8c50dc
CW
661 if (inode)
662 inode_unlock(inode);
1da177e4
LT
663 sock->ops = NULL;
664 module_put(owner);
665 }
666
333f7909 667 if (sock->wq.fasync_list)
3410f22e 668 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 669
1da177e4
LT
670 if (!sock->file) {
671 iput(SOCK_INODE(sock));
672 return;
673 }
89bddce5 674 sock->file = NULL;
1da177e4 675}
6d8c50dc 676
9a8ad9ac
AL
677/**
678 * sock_release - close a socket
679 * @sock: socket to close
680 *
681 * The socket is released from the protocol stack if it has a release
682 * callback, and the inode is then released if the socket is bound to
683 * an inode not a file.
684 */
6d8c50dc
CW
685void sock_release(struct socket *sock)
686{
687 __sock_release(sock, NULL);
688}
c6d409cf 689EXPORT_SYMBOL(sock_release);
1da177e4 690
c14ac945 691void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 692{
140c55d4
ED
693 u8 flags = *tx_flags;
694
51eb7492 695 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE) {
140c55d4
ED
696 flags |= SKBTX_HW_TSTAMP;
697
51eb7492
GE
698 /* PTP hardware clocks can provide a free running cycle counter
699 * as a time base for virtual clocks. Tell driver to use the
700 * free running cycle counter for timestamp if socket is bound
701 * to virtual clock.
702 */
703 if (tsflags & SOF_TIMESTAMPING_BIND_PHC)
704 flags |= SKBTX_HW_TSTAMP_USE_CYCLES;
705 }
706
c14ac945 707 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
708 flags |= SKBTX_SW_TSTAMP;
709
c14ac945 710 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
711 flags |= SKBTX_SCHED_TSTAMP;
712
140c55d4 713 *tx_flags = flags;
20d49473 714}
67cc0d40 715EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 716
8c3c447b
PA
717INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
718 size_t));
a648a592
PA
719INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
720 size_t));
6e6eda44
YC
721
722static noinline void call_trace_sock_send_length(struct sock *sk, int ret,
723 int flags)
724{
725 trace_sock_send_length(sk, ret, 0);
726}
727
d8725c86 728static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 729{
1ded5e5a 730 int ret = INDIRECT_CALL_INET(READ_ONCE(sock->ops)->sendmsg, inet6_sendmsg,
a648a592
PA
731 inet_sendmsg, sock, msg,
732 msg_data_left(msg));
d8725c86 733 BUG_ON(ret == -EIOCBQUEUED);
6e6eda44
YC
734
735 if (trace_sock_send_length_enabled())
736 call_trace_sock_send_length(sock->sk, ret, 0);
d8725c86 737 return ret;
1da177e4
LT
738}
739
86a7e0b6
JR
740static int __sock_sendmsg(struct socket *sock, struct msghdr *msg)
741{
742 int err = security_socket_sendmsg(sock, msg,
743 msg_data_left(msg));
744
745 return err ?: sock_sendmsg_nosec(sock, msg);
746}
747
85806af0
RD
748/**
749 * sock_sendmsg - send a message through @sock
750 * @sock: socket
751 * @msg: message to send
752 *
753 * Sends @msg through @sock, passing through LSM.
754 * Returns the number of bytes sent, or an error code.
755 */
d8725c86 756int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 757{
86a7e0b6
JR
758 struct sockaddr_storage *save_addr = (struct sockaddr_storage *)msg->msg_name;
759 struct sockaddr_storage address;
01b2885d 760 int save_len = msg->msg_namelen;
86a7e0b6 761 int ret;
228e548e 762
86a7e0b6
JR
763 if (msg->msg_name) {
764 memcpy(&address, msg->msg_name, msg->msg_namelen);
765 msg->msg_name = &address;
766 }
767
768 ret = __sock_sendmsg(sock, msg);
769 msg->msg_name = save_addr;
01b2885d 770 msg->msg_namelen = save_len;
86a7e0b6
JR
771
772 return ret;
0cf00c6f 773}
c6d409cf 774EXPORT_SYMBOL(sock_sendmsg);
1da177e4 775
8a3c245c
PT
776/**
777 * kernel_sendmsg - send a message through @sock (kernel-space)
778 * @sock: socket
779 * @msg: message header
780 * @vec: kernel vec
781 * @num: vec array length
782 * @size: total message data size
783 *
784 * Builds the message data with @vec and sends it through @sock.
785 * Returns the number of bytes sent, or an error code.
786 */
787
1da177e4
LT
788int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
789 struct kvec *vec, size_t num, size_t size)
790{
de4eda9d 791 iov_iter_kvec(&msg->msg_iter, ITER_SOURCE, vec, num, size);
d8725c86 792 return sock_sendmsg(sock, msg);
1da177e4 793}
c6d409cf 794EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 795
8a3c245c
PT
796/**
797 * kernel_sendmsg_locked - send a message through @sock (kernel-space)
798 * @sk: sock
799 * @msg: message header
800 * @vec: output s/g array
801 * @num: output s/g array length
802 * @size: total message data size
803 *
804 * Builds the message data with @vec and sends it through @sock.
805 * Returns the number of bytes sent, or an error code.
806 * Caller must hold @sk.
807 */
808
306b13eb
TH
809int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
810 struct kvec *vec, size_t num, size_t size)
811{
812 struct socket *sock = sk->sk_socket;
1ded5e5a 813 const struct proto_ops *ops = READ_ONCE(sock->ops);
306b13eb 814
1ded5e5a 815 if (!ops->sendmsg_locked)
db5980d8 816 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 817
de4eda9d 818 iov_iter_kvec(&msg->msg_iter, ITER_SOURCE, vec, num, size);
306b13eb 819
1ded5e5a 820 return ops->sendmsg_locked(sk, msg, msg_data_left(msg));
306b13eb
TH
821}
822EXPORT_SYMBOL(kernel_sendmsg_locked);
823
8605330a
SHY
824static bool skb_is_err_queue(const struct sk_buff *skb)
825{
826 /* pkt_type of skbs enqueued on the error queue are set to
827 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
828 * in recvmsg, since skbs received on a local socket will never
829 * have a pkt_type of PACKET_OUTGOING.
830 */
831 return skb->pkt_type == PACKET_OUTGOING;
832}
833
b50a5c70
ML
834/* On transmit, software and hardware timestamps are returned independently.
835 * As the two skb clones share the hardware timestamp, which may be updated
836 * before the software timestamp is received, a hardware TX timestamp may be
837 * returned only if there is no software TX timestamp. Ignore false software
838 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 839 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
840 * hardware timestamp.
841 */
842static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
843{
844 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
845}
846
97dc7cd9
GE
847static ktime_t get_timestamp(struct sock *sk, struct sk_buff *skb, int *if_index)
848{
e3390b30 849 bool cycles = READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_BIND_PHC;
97dc7cd9
GE
850 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
851 struct net_device *orig_dev;
852 ktime_t hwtstamp;
853
854 rcu_read_lock();
855 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
856 if (orig_dev) {
857 *if_index = orig_dev->ifindex;
858 hwtstamp = netdev_get_tstamp(orig_dev, shhwtstamps, cycles);
859 } else {
860 hwtstamp = shhwtstamps->hwtstamp;
861 }
862 rcu_read_unlock();
863
864 return hwtstamp;
865}
866
867static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb,
868 int if_index)
aad9c8c4
ML
869{
870 struct scm_ts_pktinfo ts_pktinfo;
871 struct net_device *orig_dev;
872
873 if (!skb_mac_header_was_set(skb))
874 return;
875
876 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
877
97dc7cd9
GE
878 if (!if_index) {
879 rcu_read_lock();
880 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
881 if (orig_dev)
882 if_index = orig_dev->ifindex;
883 rcu_read_unlock();
884 }
885 ts_pktinfo.if_index = if_index;
aad9c8c4
ML
886
887 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
888 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
889 sizeof(ts_pktinfo), &ts_pktinfo);
890}
891
92f37fd2
ED
892/*
893 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
894 */
895void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
896 struct sk_buff *skb)
897{
20d49473 898 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 899 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e 900 struct scm_timestamping_internal tss;
b50a5c70 901 int empty = 1, false_tstamp = 0;
20d49473
PO
902 struct skb_shared_hwtstamps *shhwtstamps =
903 skb_hwtstamps(skb);
97dc7cd9 904 int if_index;
007747a9 905 ktime_t hwtstamp;
e3390b30 906 u32 tsflags;
20d49473
PO
907
908 /* Race occurred between timestamp enabling and packet
909 receiving. Fill in the current time for now. */
b50a5c70 910 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 911 __net_timestamp(skb);
b50a5c70
ML
912 false_tstamp = 1;
913 }
20d49473
PO
914
915 if (need_software_tstamp) {
916 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
917 if (new_tstamp) {
918 struct __kernel_sock_timeval tv;
919
920 skb_get_new_timestamp(skb, &tv);
921 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
922 sizeof(tv), &tv);
923 } else {
924 struct __kernel_old_timeval tv;
925
926 skb_get_timestamp(skb, &tv);
927 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
928 sizeof(tv), &tv);
929 }
20d49473 930 } else {
887feae3
DD
931 if (new_tstamp) {
932 struct __kernel_timespec ts;
933
934 skb_get_new_timestampns(skb, &ts);
935 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
936 sizeof(ts), &ts);
937 } else {
df1b4ba9 938 struct __kernel_old_timespec ts;
887feae3
DD
939
940 skb_get_timestampns(skb, &ts);
941 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
942 sizeof(ts), &ts);
943 }
20d49473
PO
944 }
945 }
946
f24b9be5 947 memset(&tss, 0, sizeof(tss));
e3390b30
ED
948 tsflags = READ_ONCE(sk->sk_tsflags);
949 if ((tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 950 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 951 empty = 0;
4d276eb6 952 if (shhwtstamps &&
e3390b30 953 (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
d7c08826 954 !skb_is_swtx_tstamp(skb, false_tstamp)) {
97dc7cd9
GE
955 if_index = 0;
956 if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NETDEV)
957 hwtstamp = get_timestamp(sk, skb, &if_index);
007747a9
ML
958 else
959 hwtstamp = shhwtstamps->hwtstamp;
d7c08826 960
e3390b30 961 if (tsflags & SOF_TIMESTAMPING_BIND_PHC)
97dc7cd9 962 hwtstamp = ptp_convert_timestamp(&hwtstamp,
251cd405 963 READ_ONCE(sk->sk_bind_phc));
97dc7cd9 964
007747a9 965 if (ktime_to_timespec64_cond(hwtstamp, tss.ts + 2)) {
d7c08826
YL
966 empty = 0;
967
e3390b30 968 if ((tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
d7c08826 969 !skb_is_err_queue(skb))
97dc7cd9 970 put_ts_pktinfo(msg, skb, if_index);
d7c08826 971 }
aad9c8c4 972 }
1c885808 973 if (!empty) {
9718475e
DD
974 if (sock_flag(sk, SOCK_TSTAMP_NEW))
975 put_cmsg_scm_timestamping64(msg, &tss);
976 else
977 put_cmsg_scm_timestamping(msg, &tss);
1c885808 978
8605330a 979 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 980 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
981 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
982 skb->len, skb->data);
983 }
92f37fd2 984}
7c81fd8b
ACM
985EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
986
eb6fba75 987#ifdef CONFIG_WIRELESS
6e3e939f
JB
988void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
989 struct sk_buff *skb)
990{
991 int ack;
992
993 if (!sock_flag(sk, SOCK_WIFI_STATUS))
994 return;
995 if (!skb->wifi_acked_valid)
996 return;
997
998 ack = skb->wifi_acked;
999
1000 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
1001}
1002EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
eb6fba75 1003#endif
6e3e939f 1004
11165f14 1005static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
1006 struct sk_buff *skb)
3b885787 1007{
744d5a3e 1008 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 1009 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 1010 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
1011}
1012
6fd1d51c
EM
1013static void sock_recv_mark(struct msghdr *msg, struct sock *sk,
1014 struct sk_buff *skb)
1015{
2558b803
ED
1016 if (sock_flag(sk, SOCK_RCVMARK) && skb) {
1017 /* We must use a bounce buffer for CONFIG_HARDENED_USERCOPY=y */
1018 __u32 mark = skb->mark;
1019
1020 put_cmsg(msg, SOL_SOCKET, SO_MARK, sizeof(__u32), &mark);
1021 }
6fd1d51c
EM
1022}
1023
1024void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
1025 struct sk_buff *skb)
3b885787
NH
1026{
1027 sock_recv_timestamp(msg, sk, skb);
1028 sock_recv_drops(msg, sk, skb);
6fd1d51c 1029 sock_recv_mark(msg, sk, skb);
3b885787 1030}
6fd1d51c 1031EXPORT_SYMBOL_GPL(__sock_recv_cmsgs);
3b885787 1032
8c3c447b 1033INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
a648a592
PA
1034 size_t, int));
1035INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
1036 size_t, int));
6e6eda44
YC
1037
1038static noinline void call_trace_sock_recv_length(struct sock *sk, int ret, int flags)
1039{
1040 trace_sock_recv_length(sk, ret, flags);
1041}
1042
1b784140 1043static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 1044 int flags)
1da177e4 1045{
1ded5e5a
ED
1046 int ret = INDIRECT_CALL_INET(READ_ONCE(sock->ops)->recvmsg,
1047 inet6_recvmsg,
6e6eda44
YC
1048 inet_recvmsg, sock, msg,
1049 msg_data_left(msg), flags);
1050 if (trace_sock_recv_length_enabled())
1051 call_trace_sock_recv_length(sock->sk, ret, flags);
1052 return ret;
1da177e4
LT
1053}
1054
85806af0
RD
1055/**
1056 * sock_recvmsg - receive a message from @sock
1057 * @sock: socket
1058 * @msg: message to receive
1059 * @flags: message flags
1060 *
1061 * Receives @msg from @sock, passing through LSM. Returns the total number
1062 * of bytes received, or an error.
1063 */
2da62906 1064int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 1065{
2da62906 1066 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 1067
2da62906 1068 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 1069}
c6d409cf 1070EXPORT_SYMBOL(sock_recvmsg);
1da177e4 1071
c1249c0a 1072/**
8a3c245c
PT
1073 * kernel_recvmsg - Receive a message from a socket (kernel space)
1074 * @sock: The socket to receive the message from
1075 * @msg: Received message
1076 * @vec: Input s/g array for message data
1077 * @num: Size of input s/g array
1078 * @size: Number of bytes to read
1079 * @flags: Message flags (MSG_DONTWAIT, etc...)
c1249c0a 1080 *
8a3c245c
PT
1081 * On return the msg structure contains the scatter/gather array passed in the
1082 * vec argument. The array is modified so that it consists of the unfilled
1083 * portion of the original array.
c1249c0a 1084 *
8a3c245c 1085 * The returned value is the total number of bytes received, or an error.
c1249c0a 1086 */
8a3c245c 1087
89bddce5
SH
1088int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
1089 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4 1090{
1f466e1f 1091 msg->msg_control_is_user = false;
de4eda9d 1092 iov_iter_kvec(&msg->msg_iter, ITER_DEST, vec, num, size);
1f466e1f 1093 return sock_recvmsg(sock, msg, flags);
1da177e4 1094}
c6d409cf 1095EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 1096
9c55e01c 1097static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 1098 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
1099 unsigned int flags)
1100{
1101 struct socket *sock = file->private_data;
1ded5e5a 1102 const struct proto_ops *ops;
9c55e01c 1103
1ded5e5a
ED
1104 ops = READ_ONCE(sock->ops);
1105 if (unlikely(!ops->splice_read))
67178fd0 1106 return copy_splice_read(file, ppos, pipe, len, flags);
997b37da 1107
1ded5e5a 1108 return ops->splice_read(sock, ppos, pipe, len, flags);
9c55e01c
JA
1109}
1110
2bfc6685
DH
1111static void sock_splice_eof(struct file *file)
1112{
1113 struct socket *sock = file->private_data;
1ded5e5a 1114 const struct proto_ops *ops;
2bfc6685 1115
1ded5e5a
ED
1116 ops = READ_ONCE(sock->ops);
1117 if (ops->splice_eof)
1118 ops->splice_eof(sock);
2bfc6685
DH
1119}
1120
8ae5e030 1121static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 1122{
6d652330
AV
1123 struct file *file = iocb->ki_filp;
1124 struct socket *sock = file->private_data;
0345f931 1125 struct msghdr msg = {.msg_iter = *to,
1126 .msg_iocb = iocb};
8ae5e030 1127 ssize_t res;
ce1d4d3e 1128
ebfcd895 1129 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
1130 msg.msg_flags = MSG_DONTWAIT;
1131
1132 if (iocb->ki_pos != 0)
1da177e4 1133 return -ESPIPE;
027445c3 1134
66ee59af 1135 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
1136 return 0;
1137
2da62906 1138 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
1139 *to = msg.msg_iter;
1140 return res;
1da177e4
LT
1141}
1142
8ae5e030 1143static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 1144{
6d652330
AV
1145 struct file *file = iocb->ki_filp;
1146 struct socket *sock = file->private_data;
0345f931 1147 struct msghdr msg = {.msg_iter = *from,
1148 .msg_iocb = iocb};
8ae5e030 1149 ssize_t res;
1da177e4 1150
8ae5e030 1151 if (iocb->ki_pos != 0)
ce1d4d3e 1152 return -ESPIPE;
027445c3 1153
ebfcd895 1154 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
1155 msg.msg_flags = MSG_DONTWAIT;
1156
6d652330
AV
1157 if (sock->type == SOCK_SEQPACKET)
1158 msg.msg_flags |= MSG_EOR;
1159
86a7e0b6 1160 res = __sock_sendmsg(sock, &msg);
8ae5e030
AV
1161 *from = msg.msg_iter;
1162 return res;
1da177e4
LT
1163}
1164
1da177e4
LT
1165/*
1166 * Atomic setting of ioctl hooks to avoid race
1167 * with module unload.
1168 */
1169
4a3e2f71 1170static DEFINE_MUTEX(br_ioctl_mutex);
ad2f99ae
AB
1171static int (*br_ioctl_hook)(struct net *net, struct net_bridge *br,
1172 unsigned int cmd, struct ifreq *ifr,
1173 void __user *uarg);
1da177e4 1174
ad2f99ae
AB
1175void brioctl_set(int (*hook)(struct net *net, struct net_bridge *br,
1176 unsigned int cmd, struct ifreq *ifr,
1177 void __user *uarg))
1da177e4 1178{
4a3e2f71 1179 mutex_lock(&br_ioctl_mutex);
1da177e4 1180 br_ioctl_hook = hook;
4a3e2f71 1181 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1182}
1183EXPORT_SYMBOL(brioctl_set);
1184
ad2f99ae
AB
1185int br_ioctl_call(struct net *net, struct net_bridge *br, unsigned int cmd,
1186 struct ifreq *ifr, void __user *uarg)
1187{
1188 int err = -ENOPKG;
1189
1190 if (!br_ioctl_hook)
1191 request_module("bridge");
1192
1193 mutex_lock(&br_ioctl_mutex);
1194 if (br_ioctl_hook)
1195 err = br_ioctl_hook(net, br, cmd, ifr, uarg);
1196 mutex_unlock(&br_ioctl_mutex);
1197
1198 return err;
1199}
1200
4a3e2f71 1201static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1202static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1203
881d966b 1204void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1205{
4a3e2f71 1206 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1207 vlan_ioctl_hook = hook;
4a3e2f71 1208 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1209}
1210EXPORT_SYMBOL(vlan_ioctl_set);
1211
6b96018b 1212static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 1213 unsigned int cmd, unsigned long arg)
6b96018b 1214{
1ded5e5a 1215 const struct proto_ops *ops = READ_ONCE(sock->ops);
876f0bf9
AB
1216 struct ifreq ifr;
1217 bool need_copyout;
6b96018b
AB
1218 int err;
1219 void __user *argp = (void __user *)arg;
a554bf96 1220 void __user *data;
6b96018b 1221
1ded5e5a 1222 err = ops->ioctl(sock, cmd, arg);
6b96018b
AB
1223
1224 /*
1225 * If this ioctl is unknown try to hand it down
1226 * to the NIC driver.
1227 */
36fd633e
AV
1228 if (err != -ENOIOCTLCMD)
1229 return err;
6b96018b 1230
29ce8f97
JK
1231 if (!is_socket_ioctl_cmd(cmd))
1232 return -ENOTTY;
1233
a554bf96 1234 if (get_user_ifreq(&ifr, &data, argp))
876f0bf9 1235 return -EFAULT;
a554bf96 1236 err = dev_ioctl(net, cmd, &ifr, data, &need_copyout);
876f0bf9 1237 if (!err && need_copyout)
a554bf96 1238 if (put_user_ifreq(&ifr, argp))
44c02a2c 1239 return -EFAULT;
876f0bf9 1240
6b96018b
AB
1241 return err;
1242}
1243
1da177e4
LT
1244/*
1245 * With an ioctl, arg may well be a user mode pointer, but we don't know
1246 * what to do with it - that's up to the protocol still.
1247 */
1248
1249static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1250{
1ded5e5a 1251 const struct proto_ops *ops;
1da177e4 1252 struct socket *sock;
881d966b 1253 struct sock *sk;
1da177e4
LT
1254 void __user *argp = (void __user *)arg;
1255 int pid, err;
881d966b 1256 struct net *net;
1da177e4 1257
b69aee04 1258 sock = file->private_data;
1ded5e5a 1259 ops = READ_ONCE(sock->ops);
881d966b 1260 sk = sock->sk;
3b1e0a65 1261 net = sock_net(sk);
44c02a2c
AV
1262 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1263 struct ifreq ifr;
a554bf96 1264 void __user *data;
44c02a2c 1265 bool need_copyout;
a554bf96 1266 if (get_user_ifreq(&ifr, &data, argp))
44c02a2c 1267 return -EFAULT;
a554bf96 1268 err = dev_ioctl(net, cmd, &ifr, data, &need_copyout);
44c02a2c 1269 if (!err && need_copyout)
a554bf96 1270 if (put_user_ifreq(&ifr, argp))
44c02a2c 1271 return -EFAULT;
1da177e4 1272 } else
3d23e349 1273#ifdef CONFIG_WEXT_CORE
1da177e4 1274 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1275 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1276 } else
3d23e349 1277#endif
89bddce5 1278 switch (cmd) {
1da177e4
LT
1279 case FIOSETOWN:
1280 case SIOCSPGRP:
1281 err = -EFAULT;
1282 if (get_user(pid, (int __user *)argp))
1283 break;
393cc3f5 1284 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1285 break;
1286 case FIOGETOWN:
1287 case SIOCGPGRP:
609d7fa9 1288 err = put_user(f_getown(sock->file),
89bddce5 1289 (int __user *)argp);
1da177e4
LT
1290 break;
1291 case SIOCGIFBR:
1292 case SIOCSIFBR:
1293 case SIOCBRADDBR:
1294 case SIOCBRDELBR:
ad2f99ae 1295 err = br_ioctl_call(net, NULL, cmd, NULL, argp);
1da177e4
LT
1296 break;
1297 case SIOCGIFVLAN:
1298 case SIOCSIFVLAN:
1299 err = -ENOPKG;
1300 if (!vlan_ioctl_hook)
1301 request_module("8021q");
1302
4a3e2f71 1303 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1304 if (vlan_ioctl_hook)
881d966b 1305 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1306 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1307 break;
c62cce2c
AV
1308 case SIOCGSKNS:
1309 err = -EPERM;
1310 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1311 break;
1312
1313 err = open_related_ns(&net->ns, get_net_ns);
1314 break;
0768e170
AB
1315 case SIOCGSTAMP_OLD:
1316 case SIOCGSTAMPNS_OLD:
1ded5e5a 1317 if (!ops->gettstamp) {
c7cbdbf2
AB
1318 err = -ENOIOCTLCMD;
1319 break;
1320 }
1ded5e5a
ED
1321 err = ops->gettstamp(sock, argp,
1322 cmd == SIOCGSTAMP_OLD,
1323 !IS_ENABLED(CONFIG_64BIT));
60747828 1324 break;
0768e170
AB
1325 case SIOCGSTAMP_NEW:
1326 case SIOCGSTAMPNS_NEW:
1ded5e5a 1327 if (!ops->gettstamp) {
0768e170
AB
1328 err = -ENOIOCTLCMD;
1329 break;
1330 }
1ded5e5a
ED
1331 err = ops->gettstamp(sock, argp,
1332 cmd == SIOCGSTAMP_NEW,
1333 false);
c7cbdbf2 1334 break;
876f0bf9
AB
1335
1336 case SIOCGIFCONF:
1337 err = dev_ifconf(net, argp);
1338 break;
1339
1da177e4 1340 default:
63ff03ab 1341 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1342 break;
89bddce5 1343 }
1da177e4
LT
1344 return err;
1345}
1346
8a3c245c
PT
1347/**
1348 * sock_create_lite - creates a socket
1349 * @family: protocol family (AF_INET, ...)
1350 * @type: communication type (SOCK_STREAM, ...)
1351 * @protocol: protocol (0, ...)
1352 * @res: new socket
1353 *
1354 * Creates a new socket and assigns it to @res, passing through LSM.
1355 * The new socket initialization is not complete, see kernel_accept().
1356 * Returns 0 or an error. On failure @res is set to %NULL.
1357 * This function internally uses GFP_KERNEL.
1358 */
1359
1da177e4
LT
1360int sock_create_lite(int family, int type, int protocol, struct socket **res)
1361{
1362 int err;
1363 struct socket *sock = NULL;
89bddce5 1364
1da177e4
LT
1365 err = security_socket_create(family, type, protocol, 1);
1366 if (err)
1367 goto out;
1368
1369 sock = sock_alloc();
1370 if (!sock) {
1371 err = -ENOMEM;
1372 goto out;
1373 }
1374
1da177e4 1375 sock->type = type;
7420ed23
VY
1376 err = security_socket_post_create(sock, family, type, protocol, 1);
1377 if (err)
1378 goto out_release;
1379
1da177e4
LT
1380out:
1381 *res = sock;
1382 return err;
7420ed23
VY
1383out_release:
1384 sock_release(sock);
1385 sock = NULL;
1386 goto out;
1da177e4 1387}
c6d409cf 1388EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1389
1390/* No kernel lock held - perfect */
ade994f4 1391static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1392{
3cafb376 1393 struct socket *sock = file->private_data;
1ded5e5a 1394 const struct proto_ops *ops = READ_ONCE(sock->ops);
a331de3b 1395 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1396
1ded5e5a 1397 if (!ops->poll)
e88958e6 1398 return 0;
f641f13b 1399
a331de3b
CH
1400 if (sk_can_busy_loop(sock->sk)) {
1401 /* poll once if requested by the syscall */
1402 if (events & POLL_BUSY_LOOP)
1403 sk_busy_loop(sock->sk, 1);
1404
1405 /* if this socket can poll_ll, tell the system call */
1406 flag = POLL_BUSY_LOOP;
1407 }
1408
1ded5e5a 1409 return ops->poll(file, sock, wait) | flag;
1da177e4
LT
1410}
1411
89bddce5 1412static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1413{
b69aee04 1414 struct socket *sock = file->private_data;
1da177e4 1415
1ded5e5a 1416 return READ_ONCE(sock->ops)->mmap(file, sock, vma);
1da177e4
LT
1417}
1418
20380731 1419static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1420{
6d8c50dc 1421 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1422 return 0;
1423}
1424
1425/*
1426 * Update the socket async list
1427 *
1428 * Fasync_list locking strategy.
1429 *
1430 * 1. fasync_list is modified only under process context socket lock
1431 * i.e. under semaphore.
1432 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1433 * or under socket lock
1da177e4
LT
1434 */
1435
1436static int sock_fasync(int fd, struct file *filp, int on)
1437{
989a2979
ED
1438 struct socket *sock = filp->private_data;
1439 struct sock *sk = sock->sk;
333f7909 1440 struct socket_wq *wq = &sock->wq;
1da177e4 1441
989a2979 1442 if (sk == NULL)
1da177e4 1443 return -EINVAL;
1da177e4
LT
1444
1445 lock_sock(sk);
eaefd110 1446 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1447
eaefd110 1448 if (!wq->fasync_list)
989a2979
ED
1449 sock_reset_flag(sk, SOCK_FASYNC);
1450 else
bcdce719 1451 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1452
989a2979 1453 release_sock(sk);
1da177e4
LT
1454 return 0;
1455}
1456
ceb5d58b 1457/* This function may be called only under rcu_lock */
1da177e4 1458
ceb5d58b 1459int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1460{
ceb5d58b 1461 if (!wq || !wq->fasync_list)
1da177e4 1462 return -1;
ceb5d58b 1463
89bddce5 1464 switch (how) {
8d8ad9d7 1465 case SOCK_WAKE_WAITD:
ceb5d58b 1466 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1467 break;
1468 goto call_kill;
8d8ad9d7 1469 case SOCK_WAKE_SPACE:
ceb5d58b 1470 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4 1471 break;
7c7ab580 1472 fallthrough;
8d8ad9d7 1473 case SOCK_WAKE_IO:
89bddce5 1474call_kill:
43815482 1475 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1476 break;
8d8ad9d7 1477 case SOCK_WAKE_URG:
43815482 1478 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1479 }
ceb5d58b 1480
1da177e4
LT
1481 return 0;
1482}
c6d409cf 1483EXPORT_SYMBOL(sock_wake_async);
1da177e4 1484
8a3c245c
PT
1485/**
1486 * __sock_create - creates a socket
1487 * @net: net namespace
1488 * @family: protocol family (AF_INET, ...)
1489 * @type: communication type (SOCK_STREAM, ...)
1490 * @protocol: protocol (0, ...)
1491 * @res: new socket
1492 * @kern: boolean for kernel space sockets
1493 *
1494 * Creates a new socket and assigns it to @res, passing through LSM.
1495 * Returns 0 or an error. On failure @res is set to %NULL. @kern must
1496 * be set to true if the socket resides in kernel space.
1497 * This function internally uses GFP_KERNEL.
1498 */
1499
721db93a 1500int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1501 struct socket **res, int kern)
1da177e4
LT
1502{
1503 int err;
1504 struct socket *sock;
55737fda 1505 const struct net_proto_family *pf;
1da177e4
LT
1506
1507 /*
89bddce5 1508 * Check protocol is in range
1da177e4
LT
1509 */
1510 if (family < 0 || family >= NPROTO)
1511 return -EAFNOSUPPORT;
1512 if (type < 0 || type >= SOCK_MAX)
1513 return -EINVAL;
1514
1515 /* Compatibility.
1516
1517 This uglymoron is moved from INET layer to here to avoid
1518 deadlock in module load.
1519 */
1520 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1521 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1522 current->comm);
1da177e4
LT
1523 family = PF_PACKET;
1524 }
1525
1526 err = security_socket_create(family, type, protocol, kern);
1527 if (err)
1528 return err;
89bddce5 1529
55737fda
SH
1530 /*
1531 * Allocate the socket and allow the family to set things up. if
1532 * the protocol is 0, the family is instructed to select an appropriate
1533 * default.
1534 */
1535 sock = sock_alloc();
1536 if (!sock) {
e87cc472 1537 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1538 return -ENFILE; /* Not exactly a match, but its the
1539 closest posix thing */
1540 }
1541
1542 sock->type = type;
1543
95a5afca 1544#ifdef CONFIG_MODULES
89bddce5
SH
1545 /* Attempt to load a protocol module if the find failed.
1546 *
1547 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1548 * requested real, full-featured networking support upon configuration.
1549 * Otherwise module support will break!
1550 */
190683a9 1551 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1552 request_module("net-pf-%d", family);
1da177e4
LT
1553#endif
1554
55737fda
SH
1555 rcu_read_lock();
1556 pf = rcu_dereference(net_families[family]);
1557 err = -EAFNOSUPPORT;
1558 if (!pf)
1559 goto out_release;
1da177e4
LT
1560
1561 /*
1562 * We will call the ->create function, that possibly is in a loadable
1563 * module, so we have to bump that loadable module refcnt first.
1564 */
55737fda 1565 if (!try_module_get(pf->owner))
1da177e4
LT
1566 goto out_release;
1567
55737fda
SH
1568 /* Now protected by module ref count */
1569 rcu_read_unlock();
1570
3f378b68 1571 err = pf->create(net, sock, protocol, kern);
55737fda 1572 if (err < 0)
1da177e4 1573 goto out_module_put;
a79af59e 1574
1da177e4
LT
1575 /*
1576 * Now to bump the refcnt of the [loadable] module that owns this
1577 * socket at sock_release time we decrement its refcnt.
1578 */
55737fda
SH
1579 if (!try_module_get(sock->ops->owner))
1580 goto out_module_busy;
1581
1da177e4
LT
1582 /*
1583 * Now that we're done with the ->create function, the [loadable]
1584 * module can have its refcnt decremented
1585 */
55737fda 1586 module_put(pf->owner);
7420ed23
VY
1587 err = security_socket_post_create(sock, family, type, protocol, kern);
1588 if (err)
3b185525 1589 goto out_sock_release;
55737fda 1590 *res = sock;
1da177e4 1591
55737fda
SH
1592 return 0;
1593
1594out_module_busy:
1595 err = -EAFNOSUPPORT;
1da177e4 1596out_module_put:
55737fda
SH
1597 sock->ops = NULL;
1598 module_put(pf->owner);
1599out_sock_release:
1da177e4 1600 sock_release(sock);
55737fda
SH
1601 return err;
1602
1603out_release:
1604 rcu_read_unlock();
1605 goto out_sock_release;
1da177e4 1606}
721db93a 1607EXPORT_SYMBOL(__sock_create);
1da177e4 1608
8a3c245c
PT
1609/**
1610 * sock_create - creates a socket
1611 * @family: protocol family (AF_INET, ...)
1612 * @type: communication type (SOCK_STREAM, ...)
1613 * @protocol: protocol (0, ...)
1614 * @res: new socket
1615 *
1616 * A wrapper around __sock_create().
1617 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1618 */
1619
1da177e4
LT
1620int sock_create(int family, int type, int protocol, struct socket **res)
1621{
1b8d7ae4 1622 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1623}
c6d409cf 1624EXPORT_SYMBOL(sock_create);
1da177e4 1625
8a3c245c
PT
1626/**
1627 * sock_create_kern - creates a socket (kernel space)
1628 * @net: net namespace
1629 * @family: protocol family (AF_INET, ...)
1630 * @type: communication type (SOCK_STREAM, ...)
1631 * @protocol: protocol (0, ...)
1632 * @res: new socket
1633 *
1634 * A wrapper around __sock_create().
1635 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1636 */
1637
eeb1bd5c 1638int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1639{
eeb1bd5c 1640 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1641}
c6d409cf 1642EXPORT_SYMBOL(sock_create_kern);
1da177e4 1643
da214a47 1644static struct socket *__sys_socket_create(int family, int type, int protocol)
1da177e4 1645{
1da177e4 1646 struct socket *sock;
da214a47 1647 int retval;
a677a039 1648
e38b36f3
UD
1649 /* Check the SOCK_* constants for consistency. */
1650 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1651 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1652 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1653 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1654
da214a47
JA
1655 if ((type & ~SOCK_TYPE_MASK) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1656 return ERR_PTR(-EINVAL);
a677a039 1657 type &= SOCK_TYPE_MASK;
1da177e4 1658
da214a47
JA
1659 retval = sock_create(family, type, protocol, &sock);
1660 if (retval < 0)
1661 return ERR_PTR(retval);
1662
1663 return sock;
1664}
1665
1666struct file *__sys_socket_file(int family, int type, int protocol)
1667{
1668 struct socket *sock;
da214a47
JA
1669 int flags;
1670
1671 sock = __sys_socket_create(family, type, protocol);
1672 if (IS_ERR(sock))
1673 return ERR_CAST(sock);
1674
1675 flags = type & ~SOCK_TYPE_MASK;
aaca0bdc
UD
1676 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1677 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1678
649c15c7 1679 return sock_alloc_file(sock, flags, NULL);
da214a47
JA
1680}
1681
0dd061a6
GT
1682/* A hook for bpf progs to attach to and update socket protocol.
1683 *
1684 * A static noinline declaration here could cause the compiler to
1685 * optimize away the function. A global noinline declaration will
1686 * keep the definition, but may optimize away the callsite.
1687 * Therefore, __weak is needed to ensure that the call is still
1688 * emitted, by telling the compiler that we don't know what the
1689 * function might eventually be.
0dd061a6
GT
1690 */
1691
15fb6f2b 1692__bpf_hook_start();
0dd061a6
GT
1693
1694__weak noinline int update_socket_protocol(int family, int type, int protocol)
1695{
1696 return protocol;
1697}
1698
15fb6f2b 1699__bpf_hook_end();
0dd061a6 1700
da214a47
JA
1701int __sys_socket(int family, int type, int protocol)
1702{
1703 struct socket *sock;
1704 int flags;
1705
0dd061a6
GT
1706 sock = __sys_socket_create(family, type,
1707 update_socket_protocol(family, type, protocol));
da214a47
JA
1708 if (IS_ERR(sock))
1709 return PTR_ERR(sock);
1710
1711 flags = type & ~SOCK_TYPE_MASK;
1712 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1713 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1da177e4 1714
8e1611e2 1715 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1716}
1717
9d6a15c3
DB
1718SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1719{
1720 return __sys_socket(family, type, protocol);
1721}
1722
1da177e4
LT
1723/*
1724 * Create a pair of connected sockets.
1725 */
1726
6debc8d8 1727int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1728{
1729 struct socket *sock1, *sock2;
1730 int fd1, fd2, err;
db349509 1731 struct file *newfile1, *newfile2;
a677a039
UD
1732 int flags;
1733
1734 flags = type & ~SOCK_TYPE_MASK;
77d27200 1735 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1736 return -EINVAL;
1737 type &= SOCK_TYPE_MASK;
1da177e4 1738
aaca0bdc
UD
1739 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1740 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1741
016a266b
AV
1742 /*
1743 * reserve descriptors and make sure we won't fail
1744 * to return them to userland.
1745 */
1746 fd1 = get_unused_fd_flags(flags);
1747 if (unlikely(fd1 < 0))
1748 return fd1;
1749
1750 fd2 = get_unused_fd_flags(flags);
1751 if (unlikely(fd2 < 0)) {
1752 put_unused_fd(fd1);
1753 return fd2;
1754 }
1755
1756 err = put_user(fd1, &usockvec[0]);
1757 if (err)
1758 goto out;
1759
1760 err = put_user(fd2, &usockvec[1]);
1761 if (err)
1762 goto out;
1763
1da177e4
LT
1764 /*
1765 * Obtain the first socket and check if the underlying protocol
1766 * supports the socketpair call.
1767 */
1768
1769 err = sock_create(family, type, protocol, &sock1);
016a266b 1770 if (unlikely(err < 0))
1da177e4
LT
1771 goto out;
1772
1773 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1774 if (unlikely(err < 0)) {
1775 sock_release(sock1);
1776 goto out;
bf3c23d1 1777 }
d73aa286 1778
d47cd945
DH
1779 err = security_socket_socketpair(sock1, sock2);
1780 if (unlikely(err)) {
1781 sock_release(sock2);
1782 sock_release(sock1);
1783 goto out;
1784 }
1785
1ded5e5a 1786 err = READ_ONCE(sock1->ops)->socketpair(sock1, sock2);
016a266b
AV
1787 if (unlikely(err < 0)) {
1788 sock_release(sock2);
1789 sock_release(sock1);
1790 goto out;
28407630
AV
1791 }
1792
aab174f0 1793 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1794 if (IS_ERR(newfile1)) {
28407630 1795 err = PTR_ERR(newfile1);
016a266b
AV
1796 sock_release(sock2);
1797 goto out;
28407630
AV
1798 }
1799
aab174f0 1800 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1801 if (IS_ERR(newfile2)) {
1802 err = PTR_ERR(newfile2);
016a266b
AV
1803 fput(newfile1);
1804 goto out;
db349509
AV
1805 }
1806
157cf649 1807 audit_fd_pair(fd1, fd2);
d73aa286 1808
db349509
AV
1809 fd_install(fd1, newfile1);
1810 fd_install(fd2, newfile2);
d73aa286 1811 return 0;
1da177e4 1812
016a266b 1813out:
d73aa286 1814 put_unused_fd(fd2);
d73aa286 1815 put_unused_fd(fd1);
1da177e4
LT
1816 return err;
1817}
1818
6debc8d8
DB
1819SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1820 int __user *, usockvec)
1821{
1822 return __sys_socketpair(family, type, protocol, usockvec);
1823}
1824
1da177e4
LT
1825/*
1826 * Bind a name to a socket. Nothing much to do here since it's
1827 * the protocol's responsibility to handle the local address.
1828 *
1829 * We move the socket address to kernel space before we call
1830 * the protocol layer (having also checked the address is ok).
1831 */
1832
a87d35d8 1833int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1834{
1835 struct socket *sock;
230b1839 1836 struct sockaddr_storage address;
6cb153ca 1837 int err, fput_needed;
1da177e4 1838
89bddce5 1839 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1840 if (sock) {
43db362d 1841 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1842 if (!err) {
89bddce5 1843 err = security_socket_bind(sock,
230b1839 1844 (struct sockaddr *)&address,
89bddce5 1845 addrlen);
6cb153ca 1846 if (!err)
1ded5e5a 1847 err = READ_ONCE(sock->ops)->bind(sock,
89bddce5 1848 (struct sockaddr *)
230b1839 1849 &address, addrlen);
1da177e4 1850 }
6cb153ca 1851 fput_light(sock->file, fput_needed);
89bddce5 1852 }
1da177e4
LT
1853 return err;
1854}
1855
a87d35d8
DB
1856SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1857{
1858 return __sys_bind(fd, umyaddr, addrlen);
1859}
1860
1da177e4
LT
1861/*
1862 * Perform a listen. Basically, we allow the protocol to do anything
1863 * necessary for a listen, and if that works, we mark the socket as
1864 * ready for listening.
1865 */
1866
25e290ee 1867int __sys_listen(int fd, int backlog)
1da177e4
LT
1868{
1869 struct socket *sock;
6cb153ca 1870 int err, fput_needed;
b8e1f9b5 1871 int somaxconn;
89bddce5
SH
1872
1873 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1874 if (sock) {
3c9ba81d 1875 somaxconn = READ_ONCE(sock_net(sock->sk)->core.sysctl_somaxconn);
95c96174 1876 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1877 backlog = somaxconn;
1da177e4
LT
1878
1879 err = security_socket_listen(sock, backlog);
6cb153ca 1880 if (!err)
1ded5e5a 1881 err = READ_ONCE(sock->ops)->listen(sock, backlog);
1da177e4 1882
6cb153ca 1883 fput_light(sock->file, fput_needed);
1da177e4
LT
1884 }
1885 return err;
1886}
1887
25e290ee
DB
1888SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1889{
1890 return __sys_listen(fd, backlog);
1891}
1892
0645fbe7 1893struct file *do_accept(struct file *file, struct proto_accept_arg *arg,
de2ea4b6 1894 struct sockaddr __user *upeer_sockaddr,
d32f89da 1895 int __user *upeer_addrlen, int flags)
1da177e4
LT
1896{
1897 struct socket *sock, *newsock;
39d8c1b6 1898 struct file *newfile;
d32f89da 1899 int err, len;
230b1839 1900 struct sockaddr_storage address;
1ded5e5a 1901 const struct proto_ops *ops;
1da177e4 1902
dba4a925 1903 sock = sock_from_file(file);
d32f89da
PB
1904 if (!sock)
1905 return ERR_PTR(-ENOTSOCK);
1da177e4 1906
c6d409cf
ED
1907 newsock = sock_alloc();
1908 if (!newsock)
d32f89da 1909 return ERR_PTR(-ENFILE);
1ded5e5a 1910 ops = READ_ONCE(sock->ops);
1da177e4
LT
1911
1912 newsock->type = sock->type;
1ded5e5a 1913 newsock->ops = ops;
1da177e4 1914
1da177e4
LT
1915 /*
1916 * We don't need try_module_get here, as the listening socket (sock)
1917 * has the protocol module (sock->ops->owner) held.
1918 */
1ded5e5a 1919 __module_get(ops->owner);
1da177e4 1920
aab174f0 1921 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
d32f89da
PB
1922 if (IS_ERR(newfile))
1923 return newfile;
39d8c1b6 1924
a79af59e
FF
1925 err = security_socket_accept(sock, newsock);
1926 if (err)
39d8c1b6 1927 goto out_fd;
a79af59e 1928
0645fbe7
JA
1929 arg->flags |= sock->file->f_flags;
1930 err = ops->accept(sock, newsock, arg);
1da177e4 1931 if (err < 0)
39d8c1b6 1932 goto out_fd;
1da177e4
LT
1933
1934 if (upeer_sockaddr) {
1ded5e5a 1935 len = ops->getname(newsock, (struct sockaddr *)&address, 2);
9b2c45d4 1936 if (len < 0) {
1da177e4 1937 err = -ECONNABORTED;
39d8c1b6 1938 goto out_fd;
1da177e4 1939 }
43db362d 1940 err = move_addr_to_user(&address,
230b1839 1941 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1942 if (err < 0)
39d8c1b6 1943 goto out_fd;
1da177e4
LT
1944 }
1945
1946 /* File flags are not inherited via accept() unlike another OSes. */
d32f89da 1947 return newfile;
39d8c1b6 1948out_fd:
9606a216 1949 fput(newfile);
d32f89da
PB
1950 return ERR_PTR(err);
1951}
1952
c0424532
YD
1953static int __sys_accept4_file(struct file *file, struct sockaddr __user *upeer_sockaddr,
1954 int __user *upeer_addrlen, int flags)
d32f89da 1955{
0645fbe7 1956 struct proto_accept_arg arg = { };
d32f89da
PB
1957 struct file *newfile;
1958 int newfd;
1959
1960 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1961 return -EINVAL;
1962
1963 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1964 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
de2ea4b6 1965
c0424532 1966 newfd = get_unused_fd_flags(flags);
d32f89da
PB
1967 if (unlikely(newfd < 0))
1968 return newfd;
1969
0645fbe7 1970 newfile = do_accept(file, &arg, upeer_sockaddr, upeer_addrlen,
d32f89da
PB
1971 flags);
1972 if (IS_ERR(newfile)) {
1973 put_unused_fd(newfd);
1974 return PTR_ERR(newfile);
1975 }
1976 fd_install(newfd, newfile);
1977 return newfd;
de2ea4b6
JA
1978}
1979
1980/*
1981 * For accept, we attempt to create a new socket, set up the link
1982 * with the client, wake up the client, then return the new
1983 * connected fd. We collect the address of the connector in kernel
1984 * space and move it to user at the very end. This is unclean because
1985 * we open the socket then return an error.
1986 *
1987 * 1003.1g adds the ability to recvmsg() to query connection pending
1988 * status to recvmsg. We need to add that support in a way thats
1989 * clean when we restructure accept also.
1990 */
1991
1992int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1993 int __user *upeer_addrlen, int flags)
1994{
1995 int ret = -EBADF;
1996 struct fd f;
1997
1998 f = fdget(fd);
1999 if (f.file) {
c0424532
YD
2000 ret = __sys_accept4_file(f.file, upeer_sockaddr,
2001 upeer_addrlen, flags);
6b07edeb 2002 fdput(f);
de2ea4b6
JA
2003 }
2004
2005 return ret;
1da177e4
LT
2006}
2007
4541e805
DB
2008SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
2009 int __user *, upeer_addrlen, int, flags)
2010{
2011 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
2012}
2013
20f37034
HC
2014SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
2015 int __user *, upeer_addrlen)
aaca0bdc 2016{
4541e805 2017 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
2018}
2019
1da177e4
LT
2020/*
2021 * Attempt to connect to a socket with the server address. The address
2022 * is in user space so we verify it is OK and move it to kernel space.
2023 *
2024 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
2025 * break bindings
2026 *
2027 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
2028 * other SEQPACKET protocols that take time to connect() as it doesn't
2029 * include the -EINPROGRESS status for such sockets.
2030 */
2031
f499a021 2032int __sys_connect_file(struct file *file, struct sockaddr_storage *address,
bd3ded31 2033 int addrlen, int file_flags)
1da177e4
LT
2034{
2035 struct socket *sock;
bd3ded31 2036 int err;
1da177e4 2037
dba4a925
FR
2038 sock = sock_from_file(file);
2039 if (!sock) {
2040 err = -ENOTSOCK;
1da177e4 2041 goto out;
dba4a925 2042 }
1da177e4 2043
89bddce5 2044 err =
f499a021 2045 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4 2046 if (err)
bd3ded31 2047 goto out;
1da177e4 2048
1ded5e5a
ED
2049 err = READ_ONCE(sock->ops)->connect(sock, (struct sockaddr *)address,
2050 addrlen, sock->file->f_flags | file_flags);
1da177e4
LT
2051out:
2052 return err;
2053}
2054
bd3ded31
JA
2055int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
2056{
2057 int ret = -EBADF;
2058 struct fd f;
2059
2060 f = fdget(fd);
2061 if (f.file) {
f499a021
JA
2062 struct sockaddr_storage address;
2063
2064 ret = move_addr_to_kernel(uservaddr, addrlen, &address);
2065 if (!ret)
2066 ret = __sys_connect_file(f.file, &address, addrlen, 0);
6b07edeb 2067 fdput(f);
bd3ded31
JA
2068 }
2069
2070 return ret;
2071}
2072
1387c2c2
DB
2073SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
2074 int, addrlen)
2075{
2076 return __sys_connect(fd, uservaddr, addrlen);
2077}
2078
1da177e4
LT
2079/*
2080 * Get the local address ('name') of a socket object. Move the obtained
2081 * name to user space.
2082 */
2083
8882a107
DB
2084int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
2085 int __user *usockaddr_len)
1da177e4
LT
2086{
2087 struct socket *sock;
230b1839 2088 struct sockaddr_storage address;
9b2c45d4 2089 int err, fput_needed;
89bddce5 2090
6cb153ca 2091 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
2092 if (!sock)
2093 goto out;
2094
2095 err = security_socket_getsockname(sock);
2096 if (err)
2097 goto out_put;
2098
1ded5e5a 2099 err = READ_ONCE(sock->ops)->getname(sock, (struct sockaddr *)&address, 0);
9b2c45d4 2100 if (err < 0)
1da177e4 2101 goto out_put;
e44ef1d4 2102 /* "err" is actually length in this case */
9b2c45d4 2103 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
2104
2105out_put:
6cb153ca 2106 fput_light(sock->file, fput_needed);
1da177e4
LT
2107out:
2108 return err;
2109}
2110
8882a107
DB
2111SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
2112 int __user *, usockaddr_len)
2113{
2114 return __sys_getsockname(fd, usockaddr, usockaddr_len);
2115}
2116
1da177e4
LT
2117/*
2118 * Get the remote address ('name') of a socket object. Move the obtained
2119 * name to user space.
2120 */
2121
b21c8f83
DB
2122int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
2123 int __user *usockaddr_len)
1da177e4
LT
2124{
2125 struct socket *sock;
230b1839 2126 struct sockaddr_storage address;
9b2c45d4 2127 int err, fput_needed;
1da177e4 2128
89bddce5
SH
2129 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2130 if (sock != NULL) {
1ded5e5a
ED
2131 const struct proto_ops *ops = READ_ONCE(sock->ops);
2132
1da177e4
LT
2133 err = security_socket_getpeername(sock);
2134 if (err) {
6cb153ca 2135 fput_light(sock->file, fput_needed);
1da177e4
LT
2136 return err;
2137 }
2138
1ded5e5a 2139 err = ops->getname(sock, (struct sockaddr *)&address, 1);
9b2c45d4
DV
2140 if (err >= 0)
2141 /* "err" is actually length in this case */
2142 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 2143 usockaddr_len);
6cb153ca 2144 fput_light(sock->file, fput_needed);
1da177e4
LT
2145 }
2146 return err;
2147}
2148
b21c8f83
DB
2149SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
2150 int __user *, usockaddr_len)
2151{
2152 return __sys_getpeername(fd, usockaddr, usockaddr_len);
2153}
2154
1da177e4
LT
2155/*
2156 * Send a datagram to a given address. We move the address into kernel
2157 * space and check the user space data area is readable before invoking
2158 * the protocol.
2159 */
211b634b
DB
2160int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
2161 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
2162{
2163 struct socket *sock;
230b1839 2164 struct sockaddr_storage address;
1da177e4
LT
2165 int err;
2166 struct msghdr msg;
6cb153ca 2167 int fput_needed;
6cb153ca 2168
9fd7874c 2169 err = import_ubuf(ITER_SOURCE, buff, len, &msg.msg_iter);
602bd0e9
AV
2170 if (unlikely(err))
2171 return err;
de0fa95c
PE
2172 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2173 if (!sock)
4387ff75 2174 goto out;
6cb153ca 2175
89bddce5 2176 msg.msg_name = NULL;
89bddce5
SH
2177 msg.msg_control = NULL;
2178 msg.msg_controllen = 0;
2179 msg.msg_namelen = 0;
7c701d92 2180 msg.msg_ubuf = NULL;
6cb153ca 2181 if (addr) {
43db362d 2182 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
2183 if (err < 0)
2184 goto out_put;
230b1839 2185 msg.msg_name = (struct sockaddr *)&address;
89bddce5 2186 msg.msg_namelen = addr_len;
1da177e4 2187 }
b841b901 2188 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
1da177e4
LT
2189 if (sock->file->f_flags & O_NONBLOCK)
2190 flags |= MSG_DONTWAIT;
2191 msg.msg_flags = flags;
86a7e0b6 2192 err = __sock_sendmsg(sock, &msg);
1da177e4 2193
89bddce5 2194out_put:
de0fa95c 2195 fput_light(sock->file, fput_needed);
4387ff75 2196out:
1da177e4
LT
2197 return err;
2198}
2199
211b634b
DB
2200SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
2201 unsigned int, flags, struct sockaddr __user *, addr,
2202 int, addr_len)
2203{
2204 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
2205}
2206
1da177e4 2207/*
89bddce5 2208 * Send a datagram down a socket.
1da177e4
LT
2209 */
2210
3e0fa65f 2211SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 2212 unsigned int, flags)
1da177e4 2213{
211b634b 2214 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
2215}
2216
2217/*
89bddce5 2218 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
2219 * sender. We verify the buffers are writable and if needed move the
2220 * sender address from kernel to user space.
2221 */
7a09e1eb
DB
2222int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
2223 struct sockaddr __user *addr, int __user *addr_len)
1da177e4 2224{
1228b34c
ED
2225 struct sockaddr_storage address;
2226 struct msghdr msg = {
2227 /* Save some cycles and don't copy the address if not needed */
2228 .msg_name = addr ? (struct sockaddr *)&address : NULL,
2229 };
1da177e4 2230 struct socket *sock;
89bddce5 2231 int err, err2;
6cb153ca
BL
2232 int fput_needed;
2233
9fd7874c 2234 err = import_ubuf(ITER_DEST, ubuf, size, &msg.msg_iter);
602bd0e9
AV
2235 if (unlikely(err))
2236 return err;
de0fa95c 2237 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 2238 if (!sock)
de0fa95c 2239 goto out;
1da177e4 2240
1da177e4
LT
2241 if (sock->file->f_flags & O_NONBLOCK)
2242 flags |= MSG_DONTWAIT;
2da62906 2243 err = sock_recvmsg(sock, &msg, flags);
1da177e4 2244
89bddce5 2245 if (err >= 0 && addr != NULL) {
43db362d 2246 err2 = move_addr_to_user(&address,
230b1839 2247 msg.msg_namelen, addr, addr_len);
89bddce5
SH
2248 if (err2 < 0)
2249 err = err2;
1da177e4 2250 }
de0fa95c
PE
2251
2252 fput_light(sock->file, fput_needed);
4387ff75 2253out:
1da177e4
LT
2254 return err;
2255}
2256
7a09e1eb
DB
2257SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2258 unsigned int, flags, struct sockaddr __user *, addr,
2259 int __user *, addr_len)
2260{
2261 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2262}
2263
1da177e4 2264/*
89bddce5 2265 * Receive a datagram from a socket.
1da177e4
LT
2266 */
2267
b7c0ddf5
JG
2268SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2269 unsigned int, flags)
1da177e4 2270{
7a09e1eb 2271 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
2272}
2273
83f0c10b
FW
2274static bool sock_use_custom_sol_socket(const struct socket *sock)
2275{
a5ef058d 2276 return test_bit(SOCK_CUSTOM_SOCKOPT, &sock->flags);
83f0c10b
FW
2277}
2278
1406245c
BL
2279int do_sock_setsockopt(struct socket *sock, bool compat, int level,
2280 int optname, sockptr_t optval, int optlen)
1da177e4 2281{
1ded5e5a 2282 const struct proto_ops *ops;
0d01da6a 2283 char *kernel_optval = NULL;
1406245c 2284 int err;
1da177e4
LT
2285
2286 if (optlen < 0)
2287 return -EINVAL;
89bddce5 2288
4a367299
CH
2289 err = security_socket_setsockopt(sock, level, optname);
2290 if (err)
2291 goto out_put;
0d01da6a 2292
1406245c 2293 if (!compat)
55db9c0e 2294 err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname,
3f31e0d1 2295 optval, &optlen,
55db9c0e 2296 &kernel_optval);
4a367299
CH
2297 if (err < 0)
2298 goto out_put;
2299 if (err > 0) {
2300 err = 0;
2301 goto out_put;
2302 }
0d01da6a 2303
a7b75c5a
CH
2304 if (kernel_optval)
2305 optval = KERNEL_SOCKPTR(kernel_optval);
1ded5e5a 2306 ops = READ_ONCE(sock->ops);
4a367299 2307 if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock))
a7b75c5a 2308 err = sock_setsockopt(sock, level, optname, optval, optlen);
1ded5e5a 2309 else if (unlikely(!ops->setsockopt))
a44d9e72 2310 err = -EOPNOTSUPP;
4a367299 2311 else
1ded5e5a 2312 err = ops->setsockopt(sock, level, optname, optval,
89bddce5 2313 optlen);
a7b75c5a 2314 kfree(kernel_optval);
4a367299 2315out_put:
1406245c
BL
2316 return err;
2317}
2318EXPORT_SYMBOL(do_sock_setsockopt);
2319
2320/* Set a socket option. Because we don't know the option lengths we have
2321 * to pass the user mode parameter for the protocols to sort out.
2322 */
2323int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
2324 int optlen)
2325{
2326 sockptr_t optval = USER_SOCKPTR(user_optval);
2327 bool compat = in_compat_syscall();
2328 int err, fput_needed;
2329 struct socket *sock;
2330
2331 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2332 if (!sock)
2333 return err;
2334
2335 err = do_sock_setsockopt(sock, compat, level, optname, optval, optlen);
2336
4a367299 2337 fput_light(sock->file, fput_needed);
1da177e4
LT
2338 return err;
2339}
2340
cc36dca0
DB
2341SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2342 char __user *, optval, int, optlen)
2343{
2344 return __sys_setsockopt(fd, level, optname, optval, optlen);
2345}
2346
9cacf81f
SF
2347INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
2348 int optname));
2349
0b05b0cd
BL
2350int do_sock_getsockopt(struct socket *sock, bool compat, int level,
2351 int optname, sockptr_t optval, sockptr_t optlen)
1da177e4 2352{
ad4bf5f2 2353 int max_optlen __maybe_unused;
1ded5e5a 2354 const struct proto_ops *ops;
0b05b0cd 2355 int err;
d8a9b38f
CH
2356
2357 err = security_socket_getsockopt(sock, level, optname);
2358 if (err)
0b05b0cd 2359 return err;
1da177e4 2360
0b05b0cd 2361 if (!compat)
55db9c0e 2362 max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
0d01da6a 2363
1ded5e5a 2364 ops = READ_ONCE(sock->ops);
0b05b0cd
BL
2365 if (level == SOL_SOCKET) {
2366 err = sk_getsockopt(sock->sk, level, optname, optval, optlen);
2367 } else if (unlikely(!ops->getsockopt)) {
a44d9e72 2368 err = -EOPNOTSUPP;
0b05b0cd
BL
2369 } else {
2370 if (WARN_ONCE(optval.is_kernel || optlen.is_kernel,
2371 "Invalid argument type"))
2372 return -EOPNOTSUPP;
2373
2374 err = ops->getsockopt(sock, level, optname, optval.user,
2375 optlen.user);
2376 }
0d01da6a 2377
0b05b0cd 2378 if (!compat)
55db9c0e
CH
2379 err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
2380 optval, optlen, max_optlen,
2381 err);
0b05b0cd
BL
2382
2383 return err;
2384}
2385EXPORT_SYMBOL(do_sock_getsockopt);
2386
1da177e4
LT
2387/*
2388 * Get a socket option. Because we don't know the option lengths we have
2389 * to pass a user mode parameter for the protocols to sort out.
2390 */
55db9c0e
CH
2391int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
2392 int __user *optlen)
1da177e4 2393{
6cb153ca 2394 int err, fput_needed;
1da177e4 2395 struct socket *sock;
0b05b0cd 2396 bool compat;
1da177e4 2397
89bddce5 2398 sock = sockfd_lookup_light(fd, &err, &fput_needed);
d8a9b38f
CH
2399 if (!sock)
2400 return err;
2401
0b05b0cd
BL
2402 compat = in_compat_syscall();
2403 err = do_sock_getsockopt(sock, compat, level, optname,
2404 USER_SOCKPTR(optval), USER_SOCKPTR(optlen));
1da177e4 2405
d8a9b38f 2406 fput_light(sock->file, fput_needed);
1da177e4
LT
2407 return err;
2408}
2409
13a2d70e
DB
2410SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2411 char __user *, optval, int __user *, optlen)
2412{
2413 return __sys_getsockopt(fd, level, optname, optval, optlen);
2414}
2415
1da177e4
LT
2416/*
2417 * Shutdown a socket.
2418 */
2419
b713c195
JA
2420int __sys_shutdown_sock(struct socket *sock, int how)
2421{
2422 int err;
2423
2424 err = security_socket_shutdown(sock, how);
2425 if (!err)
1ded5e5a 2426 err = READ_ONCE(sock->ops)->shutdown(sock, how);
b713c195
JA
2427
2428 return err;
2429}
2430
005a1aea 2431int __sys_shutdown(int fd, int how)
1da177e4 2432{
6cb153ca 2433 int err, fput_needed;
1da177e4
LT
2434 struct socket *sock;
2435
89bddce5
SH
2436 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2437 if (sock != NULL) {
b713c195 2438 err = __sys_shutdown_sock(sock, how);
6cb153ca 2439 fput_light(sock->file, fput_needed);
1da177e4
LT
2440 }
2441 return err;
2442}
2443
005a1aea
DB
2444SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2445{
2446 return __sys_shutdown(fd, how);
2447}
2448
89bddce5 2449/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2450 * fields which are the same type (int / unsigned) on our platforms.
2451 */
2452#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2453#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2454#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2455
c71d8ebe
TH
2456struct used_address {
2457 struct sockaddr_storage name;
2458 unsigned int name_len;
2459};
2460
7fa875b8
DY
2461int __copy_msghdr(struct msghdr *kmsg,
2462 struct user_msghdr *msg,
2463 struct sockaddr __user **save_addr)
1661bf36 2464{
08adb7da
AV
2465 ssize_t err;
2466
1f466e1f 2467 kmsg->msg_control_is_user = true;
1228b34c 2468 kmsg->msg_get_inq = 0;
7fa875b8
DY
2469 kmsg->msg_control_user = msg->msg_control;
2470 kmsg->msg_controllen = msg->msg_controllen;
2471 kmsg->msg_flags = msg->msg_flags;
ffb07550 2472
7fa875b8
DY
2473 kmsg->msg_namelen = msg->msg_namelen;
2474 if (!msg->msg_name)
6a2a2b3a
AS
2475 kmsg->msg_namelen = 0;
2476
dbb490b9
ML
2477 if (kmsg->msg_namelen < 0)
2478 return -EINVAL;
2479
1661bf36 2480 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2481 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2482
2483 if (save_addr)
7fa875b8 2484 *save_addr = msg->msg_name;
08adb7da 2485
7fa875b8 2486 if (msg->msg_name && kmsg->msg_namelen) {
08adb7da 2487 if (!save_addr) {
7fa875b8 2488 err = move_addr_to_kernel(msg->msg_name,
864d9664 2489 kmsg->msg_namelen,
08adb7da
AV
2490 kmsg->msg_name);
2491 if (err < 0)
2492 return err;
2493 }
2494 } else {
2495 kmsg->msg_name = NULL;
2496 kmsg->msg_namelen = 0;
2497 }
2498
7fa875b8 2499 if (msg->msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2500 return -EMSGSIZE;
2501
0345f931 2502 kmsg->msg_iocb = NULL;
7c701d92 2503 kmsg->msg_ubuf = NULL;
0a384abf
JA
2504 return 0;
2505}
2506
2507static int copy_msghdr_from_user(struct msghdr *kmsg,
2508 struct user_msghdr __user *umsg,
2509 struct sockaddr __user **save_addr,
2510 struct iovec **iov)
2511{
2512 struct user_msghdr msg;
2513 ssize_t err;
2514
7fa875b8
DY
2515 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
2516 return -EFAULT;
2517
2518 err = __copy_msghdr(kmsg, &msg, save_addr);
0a384abf
JA
2519 if (err)
2520 return err;
0345f931 2521
de4eda9d 2522 err = import_iovec(save_addr ? ITER_DEST : ITER_SOURCE,
ffb07550 2523 msg.msg_iov, msg.msg_iovlen,
da184284 2524 UIO_FASTIOV, iov, &kmsg->msg_iter);
87e5e6da 2525 return err < 0 ? err : 0;
1661bf36
DC
2526}
2527
4257c8ca
JA
2528static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
2529 unsigned int flags, struct used_address *used_address,
2530 unsigned int allowed_msghdr_flags)
1da177e4 2531{
b9d717a7 2532 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2533 __aligned(sizeof(__kernel_size_t));
89bddce5 2534 /* 20 is size of ipv6_pktinfo */
1da177e4 2535 unsigned char *ctl_buf = ctl;
d8725c86 2536 int ctl_len;
08adb7da 2537 ssize_t err;
89bddce5 2538
1da177e4
LT
2539 err = -ENOBUFS;
2540
228e548e 2541 if (msg_sys->msg_controllen > INT_MAX)
4257c8ca 2542 goto out;
28a94d8f 2543 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2544 ctl_len = msg_sys->msg_controllen;
1da177e4 2545 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2546 err =
228e548e 2547 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2548 sizeof(ctl));
1da177e4 2549 if (err)
4257c8ca 2550 goto out;
228e548e
AB
2551 ctl_buf = msg_sys->msg_control;
2552 ctl_len = msg_sys->msg_controllen;
1da177e4 2553 } else if (ctl_len) {
ac4340fc
DM
2554 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2555 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2556 if (ctl_len > sizeof(ctl)) {
1da177e4 2557 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2558 if (ctl_buf == NULL)
4257c8ca 2559 goto out;
1da177e4
LT
2560 }
2561 err = -EFAULT;
1f466e1f 2562 if (copy_from_user(ctl_buf, msg_sys->msg_control_user, ctl_len))
1da177e4 2563 goto out_freectl;
228e548e 2564 msg_sys->msg_control = ctl_buf;
1f466e1f 2565 msg_sys->msg_control_is_user = false;
1da177e4 2566 }
b841b901 2567 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
228e548e 2568 msg_sys->msg_flags = flags;
1da177e4
LT
2569
2570 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2571 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2572 /*
2573 * If this is sendmmsg() and current destination address is same as
2574 * previously succeeded address, omit asking LSM's decision.
2575 * used_address->name_len is initialized to UINT_MAX so that the first
2576 * destination address never matches.
2577 */
bc909d9d
MD
2578 if (used_address && msg_sys->msg_name &&
2579 used_address->name_len == msg_sys->msg_namelen &&
2580 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2581 used_address->name_len)) {
d8725c86 2582 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2583 goto out_freectl;
2584 }
86a7e0b6 2585 err = __sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2586 /*
2587 * If this is sendmmsg() and sending to current destination address was
2588 * successful, remember it.
2589 */
2590 if (used_address && err >= 0) {
2591 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2592 if (msg_sys->msg_name)
2593 memcpy(&used_address->name, msg_sys->msg_name,
2594 used_address->name_len);
c71d8ebe 2595 }
1da177e4
LT
2596
2597out_freectl:
89bddce5 2598 if (ctl_buf != ctl)
1da177e4 2599 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
4257c8ca
JA
2600out:
2601 return err;
2602}
2603
e54e09c0
JA
2604static int sendmsg_copy_msghdr(struct msghdr *msg,
2605 struct user_msghdr __user *umsg, unsigned flags,
2606 struct iovec **iov)
4257c8ca
JA
2607{
2608 int err;
2609
2610 if (flags & MSG_CMSG_COMPAT) {
2611 struct compat_msghdr __user *msg_compat;
2612
2613 msg_compat = (struct compat_msghdr __user *) umsg;
2614 err = get_compat_msghdr(msg, msg_compat, NULL, iov);
2615 } else {
2616 err = copy_msghdr_from_user(msg, umsg, NULL, iov);
2617 }
2618 if (err < 0)
2619 return err;
2620
2621 return 0;
2622}
2623
2624static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
2625 struct msghdr *msg_sys, unsigned int flags,
2626 struct used_address *used_address,
2627 unsigned int allowed_msghdr_flags)
2628{
2629 struct sockaddr_storage address;
2630 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2631 ssize_t err;
2632
2633 msg_sys->msg_name = &address;
2634
2635 err = sendmsg_copy_msghdr(msg_sys, msg, flags, &iov);
2636 if (err < 0)
2637 return err;
2638
2639 err = ____sys_sendmsg(sock, msg_sys, flags, used_address,
2640 allowed_msghdr_flags);
da184284 2641 kfree(iov);
228e548e
AB
2642 return err;
2643}
2644
2645/*
2646 * BSD sendmsg interface
2647 */
03b1230c 2648long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
0fa03c62
JA
2649 unsigned int flags)
2650{
03b1230c 2651 return ____sys_sendmsg(sock, msg, flags, NULL, 0);
0fa03c62 2652}
228e548e 2653
e1834a32
DB
2654long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2655 bool forbid_cmsg_compat)
228e548e
AB
2656{
2657 int fput_needed, err;
2658 struct msghdr msg_sys;
1be374a0
AL
2659 struct socket *sock;
2660
e1834a32
DB
2661 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2662 return -EINVAL;
2663
1be374a0 2664 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2665 if (!sock)
2666 goto out;
2667
28a94d8f 2668 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2669
6cb153ca 2670 fput_light(sock->file, fput_needed);
89bddce5 2671out:
1da177e4
LT
2672 return err;
2673}
2674
666547ff 2675SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2676{
e1834a32 2677 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2678}
2679
228e548e
AB
2680/*
2681 * Linux sendmmsg interface
2682 */
2683
2684int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2685 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2686{
2687 int fput_needed, err, datagrams;
2688 struct socket *sock;
2689 struct mmsghdr __user *entry;
2690 struct compat_mmsghdr __user *compat_entry;
2691 struct msghdr msg_sys;
c71d8ebe 2692 struct used_address used_address;
f092276d 2693 unsigned int oflags = flags;
228e548e 2694
e1834a32
DB
2695 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2696 return -EINVAL;
2697
98382f41
AB
2698 if (vlen > UIO_MAXIOV)
2699 vlen = UIO_MAXIOV;
228e548e
AB
2700
2701 datagrams = 0;
2702
2703 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2704 if (!sock)
2705 return err;
2706
c71d8ebe 2707 used_address.name_len = UINT_MAX;
228e548e
AB
2708 entry = mmsg;
2709 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2710 err = 0;
f092276d 2711 flags |= MSG_BATCH;
228e548e
AB
2712
2713 while (datagrams < vlen) {
f092276d
TH
2714 if (datagrams == vlen - 1)
2715 flags = oflags;
2716
228e548e 2717 if (MSG_CMSG_COMPAT & flags) {
666547ff 2718 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2719 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2720 if (err < 0)
2721 break;
2722 err = __put_user(err, &compat_entry->msg_len);
2723 ++compat_entry;
2724 } else {
a7526eb5 2725 err = ___sys_sendmsg(sock,
666547ff 2726 (struct user_msghdr __user *)entry,
28a94d8f 2727 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2728 if (err < 0)
2729 break;
2730 err = put_user(err, &entry->msg_len);
2731 ++entry;
2732 }
2733
2734 if (err)
2735 break;
2736 ++datagrams;
3023898b
SHY
2737 if (msg_data_left(&msg_sys))
2738 break;
a78cb84c 2739 cond_resched();
228e548e
AB
2740 }
2741
228e548e
AB
2742 fput_light(sock->file, fput_needed);
2743
728ffb86
AB
2744 /* We only return an error if no datagrams were able to be sent */
2745 if (datagrams != 0)
228e548e
AB
2746 return datagrams;
2747
228e548e
AB
2748 return err;
2749}
2750
2751SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2752 unsigned int, vlen, unsigned int, flags)
2753{
e1834a32 2754 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2755}
2756
e54e09c0
JA
2757static int recvmsg_copy_msghdr(struct msghdr *msg,
2758 struct user_msghdr __user *umsg, unsigned flags,
2759 struct sockaddr __user **uaddr,
2760 struct iovec **iov)
1da177e4 2761{
08adb7da 2762 ssize_t err;
1da177e4 2763
4257c8ca
JA
2764 if (MSG_CMSG_COMPAT & flags) {
2765 struct compat_msghdr __user *msg_compat;
1da177e4 2766
4257c8ca
JA
2767 msg_compat = (struct compat_msghdr __user *) umsg;
2768 err = get_compat_msghdr(msg, msg_compat, uaddr, iov);
2769 } else {
2770 err = copy_msghdr_from_user(msg, umsg, uaddr, iov);
2771 }
1da177e4 2772 if (err < 0)
da184284 2773 return err;
1da177e4 2774
4257c8ca
JA
2775 return 0;
2776}
2777
2778static int ____sys_recvmsg(struct socket *sock, struct msghdr *msg_sys,
2779 struct user_msghdr __user *msg,
2780 struct sockaddr __user *uaddr,
2781 unsigned int flags, int nosec)
2782{
2783 struct compat_msghdr __user *msg_compat =
2784 (struct compat_msghdr __user *) msg;
2785 int __user *uaddr_len = COMPAT_NAMELEN(msg);
2786 struct sockaddr_storage addr;
2787 unsigned long cmsg_ptr;
2788 int len;
2789 ssize_t err;
2790
2791 msg_sys->msg_name = &addr;
a2e27255
ACM
2792 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2793 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2794
f3d33426
HFS
2795 /* We assume all kernel code knows the size of sockaddr_storage */
2796 msg_sys->msg_namelen = 0;
2797
1da177e4
LT
2798 if (sock->file->f_flags & O_NONBLOCK)
2799 flags |= MSG_DONTWAIT;
1af66221
ED
2800
2801 if (unlikely(nosec))
2802 err = sock_recvmsg_nosec(sock, msg_sys, flags);
2803 else
2804 err = sock_recvmsg(sock, msg_sys, flags);
2805
1da177e4 2806 if (err < 0)
4257c8ca 2807 goto out;
1da177e4
LT
2808 len = err;
2809
2810 if (uaddr != NULL) {
43db362d 2811 err = move_addr_to_user(&addr,
a2e27255 2812 msg_sys->msg_namelen, uaddr,
89bddce5 2813 uaddr_len);
1da177e4 2814 if (err < 0)
4257c8ca 2815 goto out;
1da177e4 2816 }
a2e27255 2817 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2818 COMPAT_FLAGS(msg));
1da177e4 2819 if (err)
4257c8ca 2820 goto out;
1da177e4 2821 if (MSG_CMSG_COMPAT & flags)
a2e27255 2822 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2823 &msg_compat->msg_controllen);
2824 else
a2e27255 2825 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2826 &msg->msg_controllen);
2827 if (err)
4257c8ca 2828 goto out;
1da177e4 2829 err = len;
4257c8ca
JA
2830out:
2831 return err;
2832}
2833
2834static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
2835 struct msghdr *msg_sys, unsigned int flags, int nosec)
2836{
2837 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2838 /* user mode address pointers */
2839 struct sockaddr __user *uaddr;
2840 ssize_t err;
2841
2842 err = recvmsg_copy_msghdr(msg_sys, msg, flags, &uaddr, &iov);
2843 if (err < 0)
2844 return err;
1da177e4 2845
4257c8ca 2846 err = ____sys_recvmsg(sock, msg_sys, msg, uaddr, flags, nosec);
da184284 2847 kfree(iov);
a2e27255
ACM
2848 return err;
2849}
2850
2851/*
2852 * BSD recvmsg interface
2853 */
2854
03b1230c
JA
2855long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
2856 struct user_msghdr __user *umsg,
2857 struct sockaddr __user *uaddr, unsigned int flags)
aa1fa28f 2858{
03b1230c 2859 return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0);
aa1fa28f
JA
2860}
2861
e1834a32
DB
2862long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2863 bool forbid_cmsg_compat)
a2e27255
ACM
2864{
2865 int fput_needed, err;
2866 struct msghdr msg_sys;
1be374a0
AL
2867 struct socket *sock;
2868
e1834a32
DB
2869 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2870 return -EINVAL;
2871
1be374a0 2872 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2873 if (!sock)
2874 goto out;
2875
a7526eb5 2876 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2877
6cb153ca 2878 fput_light(sock->file, fput_needed);
1da177e4
LT
2879out:
2880 return err;
2881}
2882
666547ff 2883SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2884 unsigned int, flags)
2885{
e1834a32 2886 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2887}
2888
a2e27255
ACM
2889/*
2890 * Linux recvmmsg interface
2891 */
2892
e11d4284
AB
2893static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2894 unsigned int vlen, unsigned int flags,
2895 struct timespec64 *timeout)
a2e27255
ACM
2896{
2897 int fput_needed, err, datagrams;
2898 struct socket *sock;
2899 struct mmsghdr __user *entry;
d7256d0e 2900 struct compat_mmsghdr __user *compat_entry;
a2e27255 2901 struct msghdr msg_sys;
766b9f92
DD
2902 struct timespec64 end_time;
2903 struct timespec64 timeout64;
a2e27255
ACM
2904
2905 if (timeout &&
2906 poll_select_set_timeout(&end_time, timeout->tv_sec,
2907 timeout->tv_nsec))
2908 return -EINVAL;
2909
2910 datagrams = 0;
2911
2912 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2913 if (!sock)
2914 return err;
2915
7797dc41
SHY
2916 if (likely(!(flags & MSG_ERRQUEUE))) {
2917 err = sock_error(sock->sk);
2918 if (err) {
2919 datagrams = err;
2920 goto out_put;
2921 }
e623a9e9 2922 }
a2e27255
ACM
2923
2924 entry = mmsg;
d7256d0e 2925 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2926
2927 while (datagrams < vlen) {
2928 /*
2929 * No need to ask LSM for more than the first datagram.
2930 */
d7256d0e 2931 if (MSG_CMSG_COMPAT & flags) {
666547ff 2932 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2933 &msg_sys, flags & ~MSG_WAITFORONE,
2934 datagrams);
d7256d0e
JMG
2935 if (err < 0)
2936 break;
2937 err = __put_user(err, &compat_entry->msg_len);
2938 ++compat_entry;
2939 } else {
a7526eb5 2940 err = ___sys_recvmsg(sock,
666547ff 2941 (struct user_msghdr __user *)entry,
a7526eb5
AL
2942 &msg_sys, flags & ~MSG_WAITFORONE,
2943 datagrams);
d7256d0e
JMG
2944 if (err < 0)
2945 break;
2946 err = put_user(err, &entry->msg_len);
2947 ++entry;
2948 }
2949
a2e27255
ACM
2950 if (err)
2951 break;
a2e27255
ACM
2952 ++datagrams;
2953
71c5c159
BB
2954 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2955 if (flags & MSG_WAITFORONE)
2956 flags |= MSG_DONTWAIT;
2957
a2e27255 2958 if (timeout) {
766b9f92 2959 ktime_get_ts64(&timeout64);
c2e6c856 2960 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2961 if (timeout->tv_sec < 0) {
2962 timeout->tv_sec = timeout->tv_nsec = 0;
2963 break;
2964 }
2965
2966 /* Timeout, return less than vlen datagrams */
2967 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2968 break;
2969 }
2970
2971 /* Out of band data, return right away */
2972 if (msg_sys.msg_flags & MSG_OOB)
2973 break;
a78cb84c 2974 cond_resched();
a2e27255
ACM
2975 }
2976
a2e27255 2977 if (err == 0)
34b88a68
ACM
2978 goto out_put;
2979
2980 if (datagrams == 0) {
2981 datagrams = err;
2982 goto out_put;
2983 }
a2e27255 2984
34b88a68
ACM
2985 /*
2986 * We may return less entries than requested (vlen) if the
2987 * sock is non block and there aren't enough datagrams...
2988 */
2989 if (err != -EAGAIN) {
a2e27255 2990 /*
34b88a68
ACM
2991 * ... or if recvmsg returns an error after we
2992 * received some datagrams, where we record the
2993 * error to return on the next call or if the
2994 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2995 */
e05a5f51 2996 WRITE_ONCE(sock->sk->sk_err, -err);
a2e27255 2997 }
34b88a68
ACM
2998out_put:
2999 fput_light(sock->file, fput_needed);
a2e27255 3000
34b88a68 3001 return datagrams;
a2e27255
ACM
3002}
3003
e11d4284
AB
3004int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
3005 unsigned int vlen, unsigned int flags,
3006 struct __kernel_timespec __user *timeout,
3007 struct old_timespec32 __user *timeout32)
a2e27255
ACM
3008{
3009 int datagrams;
c2e6c856 3010 struct timespec64 timeout_sys;
a2e27255 3011
e11d4284
AB
3012 if (timeout && get_timespec64(&timeout_sys, timeout))
3013 return -EFAULT;
a2e27255 3014
e11d4284 3015 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
3016 return -EFAULT;
3017
e11d4284
AB
3018 if (!timeout && !timeout32)
3019 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
3020
3021 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 3022
e11d4284
AB
3023 if (datagrams <= 0)
3024 return datagrams;
3025
3026 if (timeout && put_timespec64(&timeout_sys, timeout))
3027 datagrams = -EFAULT;
3028
3029 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
3030 datagrams = -EFAULT;
3031
3032 return datagrams;
3033}
3034
1255e269
DB
3035SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
3036 unsigned int, vlen, unsigned int, flags,
c2e6c856 3037 struct __kernel_timespec __user *, timeout)
1255e269 3038{
e11d4284
AB
3039 if (flags & MSG_CMSG_COMPAT)
3040 return -EINVAL;
3041
3042 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
3043}
3044
3045#ifdef CONFIG_COMPAT_32BIT_TIME
3046SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
3047 unsigned int, vlen, unsigned int, flags,
3048 struct old_timespec32 __user *, timeout)
3049{
3050 if (flags & MSG_CMSG_COMPAT)
3051 return -EINVAL;
3052
3053 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 3054}
e11d4284 3055#endif
1255e269 3056
a2e27255 3057#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
3058/* Argument list sizes for sys_socketcall */
3059#define AL(x) ((x) * sizeof(unsigned long))
228e548e 3060static const unsigned char nargs[21] = {
c6d409cf
ED
3061 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
3062 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
3063 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 3064 AL(4), AL(5), AL(4)
89bddce5
SH
3065};
3066
1da177e4
LT
3067#undef AL
3068
3069/*
89bddce5 3070 * System call vectors.
1da177e4
LT
3071 *
3072 * Argument checking cleaned up. Saved 20% in size.
3073 * This function doesn't need to set the kernel lock because
89bddce5 3074 * it is set by the callees.
1da177e4
LT
3075 */
3076
3e0fa65f 3077SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 3078{
2950fa9d 3079 unsigned long a[AUDITSC_ARGS];
89bddce5 3080 unsigned long a0, a1;
1da177e4 3081 int err;
47379052 3082 unsigned int len;
1da177e4 3083
228e548e 3084 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 3085 return -EINVAL;
c8e8cd57 3086 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 3087
47379052
AV
3088 len = nargs[call];
3089 if (len > sizeof(a))
3090 return -EINVAL;
3091
1da177e4 3092 /* copy_from_user should be SMP safe. */
47379052 3093 if (copy_from_user(a, args, len))
1da177e4 3094 return -EFAULT;
3ec3b2fb 3095
2950fa9d
CG
3096 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3097 if (err)
3098 return err;
3ec3b2fb 3099
89bddce5
SH
3100 a0 = a[0];
3101 a1 = a[1];
3102
3103 switch (call) {
3104 case SYS_SOCKET:
9d6a15c3 3105 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
3106 break;
3107 case SYS_BIND:
a87d35d8 3108 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
3109 break;
3110 case SYS_CONNECT:
1387c2c2 3111 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
3112 break;
3113 case SYS_LISTEN:
25e290ee 3114 err = __sys_listen(a0, a1);
89bddce5
SH
3115 break;
3116 case SYS_ACCEPT:
4541e805
DB
3117 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
3118 (int __user *)a[2], 0);
89bddce5
SH
3119 break;
3120 case SYS_GETSOCKNAME:
3121 err =
8882a107
DB
3122 __sys_getsockname(a0, (struct sockaddr __user *)a1,
3123 (int __user *)a[2]);
89bddce5
SH
3124 break;
3125 case SYS_GETPEERNAME:
3126 err =
b21c8f83
DB
3127 __sys_getpeername(a0, (struct sockaddr __user *)a1,
3128 (int __user *)a[2]);
89bddce5
SH
3129 break;
3130 case SYS_SOCKETPAIR:
6debc8d8 3131 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
3132 break;
3133 case SYS_SEND:
f3bf896b
DB
3134 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
3135 NULL, 0);
89bddce5
SH
3136 break;
3137 case SYS_SENDTO:
211b634b
DB
3138 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
3139 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
3140 break;
3141 case SYS_RECV:
d27e9afc
DB
3142 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
3143 NULL, NULL);
89bddce5
SH
3144 break;
3145 case SYS_RECVFROM:
7a09e1eb
DB
3146 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
3147 (struct sockaddr __user *)a[4],
3148 (int __user *)a[5]);
89bddce5
SH
3149 break;
3150 case SYS_SHUTDOWN:
005a1aea 3151 err = __sys_shutdown(a0, a1);
89bddce5
SH
3152 break;
3153 case SYS_SETSOCKOPT:
cc36dca0
DB
3154 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
3155 a[4]);
89bddce5
SH
3156 break;
3157 case SYS_GETSOCKOPT:
3158 err =
13a2d70e
DB
3159 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
3160 (int __user *)a[4]);
89bddce5
SH
3161 break;
3162 case SYS_SENDMSG:
e1834a32
DB
3163 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
3164 a[2], true);
89bddce5 3165 break;
228e548e 3166 case SYS_SENDMMSG:
e1834a32
DB
3167 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
3168 a[3], true);
228e548e 3169 break;
89bddce5 3170 case SYS_RECVMSG:
e1834a32
DB
3171 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
3172 a[2], true);
89bddce5 3173 break;
a2e27255 3174 case SYS_RECVMMSG:
3ca47e95 3175 if (IS_ENABLED(CONFIG_64BIT))
e11d4284
AB
3176 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
3177 a[2], a[3],
3178 (struct __kernel_timespec __user *)a[4],
3179 NULL);
3180 else
3181 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
3182 a[2], a[3], NULL,
3183 (struct old_timespec32 __user *)a[4]);
a2e27255 3184 break;
de11defe 3185 case SYS_ACCEPT4:
4541e805
DB
3186 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
3187 (int __user *)a[2], a[3]);
aaca0bdc 3188 break;
89bddce5
SH
3189 default:
3190 err = -EINVAL;
3191 break;
1da177e4
LT
3192 }
3193 return err;
3194}
3195
89bddce5 3196#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 3197
55737fda
SH
3198/**
3199 * sock_register - add a socket protocol handler
3200 * @ops: description of protocol
3201 *
1da177e4
LT
3202 * This function is called by a protocol handler that wants to
3203 * advertise its address family, and have it linked into the
e793c0f7 3204 * socket interface. The value ops->family corresponds to the
55737fda 3205 * socket system call protocol family.
1da177e4 3206 */
f0fd27d4 3207int sock_register(const struct net_proto_family *ops)
1da177e4
LT
3208{
3209 int err;
3210
3211 if (ops->family >= NPROTO) {
3410f22e 3212 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
3213 return -ENOBUFS;
3214 }
55737fda
SH
3215
3216 spin_lock(&net_family_lock);
190683a9
ED
3217 if (rcu_dereference_protected(net_families[ops->family],
3218 lockdep_is_held(&net_family_lock)))
55737fda
SH
3219 err = -EEXIST;
3220 else {
cf778b00 3221 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
3222 err = 0;
3223 }
55737fda
SH
3224 spin_unlock(&net_family_lock);
3225
fe0bdbde 3226 pr_info("NET: Registered %s protocol family\n", pf_family_names[ops->family]);
1da177e4
LT
3227 return err;
3228}
c6d409cf 3229EXPORT_SYMBOL(sock_register);
1da177e4 3230
55737fda
SH
3231/**
3232 * sock_unregister - remove a protocol handler
3233 * @family: protocol family to remove
3234 *
1da177e4
LT
3235 * This function is called by a protocol handler that wants to
3236 * remove its address family, and have it unlinked from the
55737fda
SH
3237 * new socket creation.
3238 *
3239 * If protocol handler is a module, then it can use module reference
3240 * counts to protect against new references. If protocol handler is not
3241 * a module then it needs to provide its own protection in
3242 * the ops->create routine.
1da177e4 3243 */
f0fd27d4 3244void sock_unregister(int family)
1da177e4 3245{
f0fd27d4 3246 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 3247
55737fda 3248 spin_lock(&net_family_lock);
a9b3cd7f 3249 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
3250 spin_unlock(&net_family_lock);
3251
3252 synchronize_rcu();
3253
fe0bdbde 3254 pr_info("NET: Unregistered %s protocol family\n", pf_family_names[family]);
1da177e4 3255}
c6d409cf 3256EXPORT_SYMBOL(sock_unregister);
1da177e4 3257
bf2ae2e4
XL
3258bool sock_is_registered(int family)
3259{
66b51b0a 3260 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
3261}
3262
77d76ea3 3263static int __init sock_init(void)
1da177e4 3264{
b3e19d92 3265 int err;
2ca794e5
EB
3266 /*
3267 * Initialize the network sysctl infrastructure.
3268 */
3269 err = net_sysctl_init();
3270 if (err)
3271 goto out;
b3e19d92 3272
1da177e4 3273 /*
89bddce5 3274 * Initialize skbuff SLAB cache
1da177e4
LT
3275 */
3276 skb_init();
1da177e4
LT
3277
3278 /*
89bddce5 3279 * Initialize the protocols module.
1da177e4
LT
3280 */
3281
3282 init_inodecache();
b3e19d92
NP
3283
3284 err = register_filesystem(&sock_fs_type);
3285 if (err)
47260ba9 3286 goto out;
1da177e4 3287 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
3288 if (IS_ERR(sock_mnt)) {
3289 err = PTR_ERR(sock_mnt);
3290 goto out_mount;
3291 }
77d76ea3
AK
3292
3293 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
3294 */
3295
3296#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
3297 err = netfilter_init();
3298 if (err)
3299 goto out;
1da177e4 3300#endif
cbeb321a 3301
408eccce 3302 ptp_classifier_init();
c1f19b51 3303
b3e19d92
NP
3304out:
3305 return err;
3306
3307out_mount:
3308 unregister_filesystem(&sock_fs_type);
b3e19d92 3309 goto out;
1da177e4
LT
3310}
3311
77d76ea3
AK
3312core_initcall(sock_init); /* early initcall */
3313
1da177e4
LT
3314#ifdef CONFIG_PROC_FS
3315void socket_seq_show(struct seq_file *seq)
3316{
648845ab
TZ
3317 seq_printf(seq, "sockets: used %d\n",
3318 sock_inuse_get(seq->private));
1da177e4 3319}
89bddce5 3320#endif /* CONFIG_PROC_FS */
1da177e4 3321
29c49648
AB
3322/* Handle the fact that while struct ifreq has the same *layout* on
3323 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3324 * which are handled elsewhere, it still has different *size* due to
3325 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3326 * resulting in struct ifreq being 32 and 40 bytes respectively).
3327 * As a result, if the struct happens to be at the end of a page and
3328 * the next page isn't readable/writable, we get a fault. To prevent
3329 * that, copy back and forth to the full size.
3330 */
3331int get_user_ifreq(struct ifreq *ifr, void __user **ifrdata, void __user *arg)
7a229387 3332{
29c49648
AB
3333 if (in_compat_syscall()) {
3334 struct compat_ifreq *ifr32 = (struct compat_ifreq *)ifr;
7a229387 3335
29c49648
AB
3336 memset(ifr, 0, sizeof(*ifr));
3337 if (copy_from_user(ifr32, arg, sizeof(*ifr32)))
3338 return -EFAULT;
7a229387 3339
29c49648
AB
3340 if (ifrdata)
3341 *ifrdata = compat_ptr(ifr32->ifr_data);
7a229387 3342
29c49648
AB
3343 return 0;
3344 }
7a229387 3345
29c49648 3346 if (copy_from_user(ifr, arg, sizeof(*ifr)))
7a229387
AB
3347 return -EFAULT;
3348
29c49648
AB
3349 if (ifrdata)
3350 *ifrdata = ifr->ifr_data;
3351
7a229387
AB
3352 return 0;
3353}
29c49648 3354EXPORT_SYMBOL(get_user_ifreq);
7a229387 3355
29c49648 3356int put_user_ifreq(struct ifreq *ifr, void __user *arg)
7a229387 3357{
29c49648 3358 size_t size = sizeof(*ifr);
7a229387 3359
29c49648
AB
3360 if (in_compat_syscall())
3361 size = sizeof(struct compat_ifreq);
7a229387 3362
29c49648 3363 if (copy_to_user(arg, ifr, size))
7a229387
AB
3364 return -EFAULT;
3365
3a7da39d 3366 return 0;
7a229387 3367}
29c49648 3368EXPORT_SYMBOL(put_user_ifreq);
7a229387 3369
89bbfc95 3370#ifdef CONFIG_COMPAT
7a50a240
AB
3371static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3372{
7a50a240 3373 compat_uptr_t uptr32;
44c02a2c
AV
3374 struct ifreq ifr;
3375 void __user *saved;
3376 int err;
7a50a240 3377
29c49648 3378 if (get_user_ifreq(&ifr, NULL, uifr32))
7a50a240
AB
3379 return -EFAULT;
3380
3381 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3382 return -EFAULT;
3383
44c02a2c
AV
3384 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3385 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 3386
a554bf96 3387 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL, NULL);
44c02a2c
AV
3388 if (!err) {
3389 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
29c49648 3390 if (put_user_ifreq(&ifr, uifr32))
44c02a2c 3391 err = -EFAULT;
ccbd6a5a 3392 }
44c02a2c 3393 return err;
7a229387
AB
3394}
3395
590d4693
BH
3396/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3397static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3398 struct compat_ifreq __user *u_ifreq32)
7a229387 3399{
44c02a2c 3400 struct ifreq ifreq;
a554bf96 3401 void __user *data;
7a229387 3402
d0efb162
PC
3403 if (!is_socket_ioctl_cmd(cmd))
3404 return -ENOTTY;
a554bf96 3405 if (get_user_ifreq(&ifreq, &data, u_ifreq32))
7a229387 3406 return -EFAULT;
a554bf96 3407 ifreq.ifr_data = data;
7a229387 3408
a554bf96 3409 return dev_ioctl(net, cmd, &ifreq, data, NULL);
a2116ed2
AB
3410}
3411
6b96018b
AB
3412static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3413 unsigned int cmd, unsigned long arg)
3414{
3415 void __user *argp = compat_ptr(arg);
3416 struct sock *sk = sock->sk;
3417 struct net *net = sock_net(sk);
1ded5e5a 3418 const struct proto_ops *ops;
7a229387 3419
6b96018b 3420 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
88fc023f 3421 return sock_ioctl(file, cmd, (unsigned long)argp);
6b96018b
AB
3422
3423 switch (cmd) {
7a50a240
AB
3424 case SIOCWANDEV:
3425 return compat_siocwandev(net, argp);
0768e170
AB
3426 case SIOCGSTAMP_OLD:
3427 case SIOCGSTAMPNS_OLD:
1ded5e5a
ED
3428 ops = READ_ONCE(sock->ops);
3429 if (!ops->gettstamp)
c7cbdbf2 3430 return -ENOIOCTLCMD;
1ded5e5a
ED
3431 return ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
3432 !COMPAT_USE_64BIT_TIME);
c7cbdbf2 3433
dd98d289 3434 case SIOCETHTOOL:
590d4693
BH
3435 case SIOCBONDSLAVEINFOQUERY:
3436 case SIOCBONDINFOQUERY:
a2116ed2 3437 case SIOCSHWTSTAMP:
fd468c74 3438 case SIOCGHWTSTAMP:
590d4693 3439 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3440
3441 case FIOSETOWN:
3442 case SIOCSPGRP:
3443 case FIOGETOWN:
3444 case SIOCGPGRP:
3445 case SIOCBRADDBR:
3446 case SIOCBRDELBR:
3447 case SIOCGIFVLAN:
3448 case SIOCSIFVLAN:
c62cce2c 3449 case SIOCGSKNS:
0768e170
AB
3450 case SIOCGSTAMP_NEW:
3451 case SIOCGSTAMPNS_NEW:
876f0bf9 3452 case SIOCGIFCONF:
fd3a4590
RP
3453 case SIOCSIFBR:
3454 case SIOCGIFBR:
6b96018b
AB
3455 return sock_ioctl(file, cmd, arg);
3456
3457 case SIOCGIFFLAGS:
3458 case SIOCSIFFLAGS:
709566d7
AB
3459 case SIOCGIFMAP:
3460 case SIOCSIFMAP:
6b96018b
AB
3461 case SIOCGIFMETRIC:
3462 case SIOCSIFMETRIC:
3463 case SIOCGIFMTU:
3464 case SIOCSIFMTU:
3465 case SIOCGIFMEM:
3466 case SIOCSIFMEM:
3467 case SIOCGIFHWADDR:
3468 case SIOCSIFHWADDR:
3469 case SIOCADDMULTI:
3470 case SIOCDELMULTI:
3471 case SIOCGIFINDEX:
6b96018b
AB
3472 case SIOCGIFADDR:
3473 case SIOCSIFADDR:
3474 case SIOCSIFHWBROADCAST:
6b96018b 3475 case SIOCDIFADDR:
6b96018b
AB
3476 case SIOCGIFBRDADDR:
3477 case SIOCSIFBRDADDR:
3478 case SIOCGIFDSTADDR:
3479 case SIOCSIFDSTADDR:
3480 case SIOCGIFNETMASK:
3481 case SIOCSIFNETMASK:
3482 case SIOCSIFPFLAGS:
3483 case SIOCGIFPFLAGS:
3484 case SIOCGIFTXQLEN:
3485 case SIOCSIFTXQLEN:
3486 case SIOCBRADDIF:
3487 case SIOCBRDELIF:
c6c9fee3 3488 case SIOCGIFNAME:
9177efd3
AB
3489 case SIOCSIFNAME:
3490 case SIOCGMIIPHY:
3491 case SIOCGMIIREG:
3492 case SIOCSMIIREG:
f92d4fc9
AV
3493 case SIOCBONDENSLAVE:
3494 case SIOCBONDRELEASE:
3495 case SIOCBONDSETHWADDR:
3496 case SIOCBONDCHANGEACTIVE:
6b96018b
AB
3497 case SIOCSARP:
3498 case SIOCGARP:
3499 case SIOCDARP:
c7dc504e 3500 case SIOCOUTQ:
9d7bf41f 3501 case SIOCOUTQNSD:
6b96018b 3502 case SIOCATMARK:
63ff03ab 3503 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3504 }
3505
6b96018b
AB
3506 return -ENOIOCTLCMD;
3507}
7a229387 3508
95c96174 3509static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3510 unsigned long arg)
89bbfc95
SP
3511{
3512 struct socket *sock = file->private_data;
1ded5e5a 3513 const struct proto_ops *ops = READ_ONCE(sock->ops);
89bbfc95 3514 int ret = -ENOIOCTLCMD;
87de87d5
DM
3515 struct sock *sk;
3516 struct net *net;
3517
3518 sk = sock->sk;
3519 net = sock_net(sk);
89bbfc95 3520
1ded5e5a
ED
3521 if (ops->compat_ioctl)
3522 ret = ops->compat_ioctl(sock, cmd, arg);
89bbfc95 3523
87de87d5
DM
3524 if (ret == -ENOIOCTLCMD &&
3525 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3526 ret = compat_wext_handle_ioctl(net, cmd, arg);
3527
6b96018b
AB
3528 if (ret == -ENOIOCTLCMD)
3529 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3530
89bbfc95
SP
3531 return ret;
3532}
3533#endif
3534
8a3c245c
PT
3535/**
3536 * kernel_bind - bind an address to a socket (kernel space)
3537 * @sock: socket
3538 * @addr: address
3539 * @addrlen: length of address
3540 *
3541 * Returns 0 or an error.
3542 */
3543
ac5a488e
SS
3544int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3545{
c889a99a
JR
3546 struct sockaddr_storage address;
3547
3548 memcpy(&address, addr, addrlen);
3549
3550 return READ_ONCE(sock->ops)->bind(sock, (struct sockaddr *)&address,
3551 addrlen);
ac5a488e 3552}
c6d409cf 3553EXPORT_SYMBOL(kernel_bind);
ac5a488e 3554
8a3c245c
PT
3555/**
3556 * kernel_listen - move socket to listening state (kernel space)
3557 * @sock: socket
3558 * @backlog: pending connections queue size
3559 *
3560 * Returns 0 or an error.
3561 */
3562
ac5a488e
SS
3563int kernel_listen(struct socket *sock, int backlog)
3564{
1ded5e5a 3565 return READ_ONCE(sock->ops)->listen(sock, backlog);
ac5a488e 3566}
c6d409cf 3567EXPORT_SYMBOL(kernel_listen);
ac5a488e 3568
8a3c245c
PT
3569/**
3570 * kernel_accept - accept a connection (kernel space)
3571 * @sock: listening socket
3572 * @newsock: new connected socket
3573 * @flags: flags
3574 *
3575 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3576 * If it fails, @newsock is guaranteed to be %NULL.
3577 * Returns 0 or an error.
3578 */
3579
ac5a488e
SS
3580int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3581{
3582 struct sock *sk = sock->sk;
1ded5e5a 3583 const struct proto_ops *ops = READ_ONCE(sock->ops);
92ef0fd5
JA
3584 struct proto_accept_arg arg = {
3585 .flags = flags,
3586 .kern = true,
3587 };
ac5a488e
SS
3588 int err;
3589
3590 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3591 newsock);
3592 if (err < 0)
3593 goto done;
3594
92ef0fd5 3595 err = ops->accept(sock, *newsock, &arg);
ac5a488e
SS
3596 if (err < 0) {
3597 sock_release(*newsock);
fa8705b0 3598 *newsock = NULL;
ac5a488e
SS
3599 goto done;
3600 }
3601
1ded5e5a
ED
3602 (*newsock)->ops = ops;
3603 __module_get(ops->owner);
ac5a488e
SS
3604
3605done:
3606 return err;
3607}
c6d409cf 3608EXPORT_SYMBOL(kernel_accept);
ac5a488e 3609
8a3c245c
PT
3610/**
3611 * kernel_connect - connect a socket (kernel space)
3612 * @sock: socket
3613 * @addr: address
3614 * @addrlen: address length
3615 * @flags: flags (O_NONBLOCK, ...)
3616 *
f1dcffcc 3617 * For datagram sockets, @addr is the address to which datagrams are sent
8a3c245c
PT
3618 * by default, and the only address from which datagrams are received.
3619 * For stream sockets, attempts to connect to @addr.
3620 * Returns 0 or an error code.
3621 */
3622
ac5a488e 3623int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3624 int flags)
ac5a488e 3625{
0bdf3993
JR
3626 struct sockaddr_storage address;
3627
3628 memcpy(&address, addr, addrlen);
3629
3630 return READ_ONCE(sock->ops)->connect(sock, (struct sockaddr *)&address,
3631 addrlen, flags);
ac5a488e 3632}
c6d409cf 3633EXPORT_SYMBOL(kernel_connect);
ac5a488e 3634
8a3c245c
PT
3635/**
3636 * kernel_getsockname - get the address which the socket is bound (kernel space)
3637 * @sock: socket
3638 * @addr: address holder
3639 *
3640 * Fills the @addr pointer with the address which the socket is bound.
0fc95dec 3641 * Returns the length of the address in bytes or an error code.
8a3c245c
PT
3642 */
3643
9b2c45d4 3644int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3645{
1ded5e5a 3646 return READ_ONCE(sock->ops)->getname(sock, addr, 0);
ac5a488e 3647}
c6d409cf 3648EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3649
8a3c245c 3650/**
645f0897 3651 * kernel_getpeername - get the address which the socket is connected (kernel space)
8a3c245c
PT
3652 * @sock: socket
3653 * @addr: address holder
3654 *
3655 * Fills the @addr pointer with the address which the socket is connected.
0fc95dec 3656 * Returns the length of the address in bytes or an error code.
8a3c245c
PT
3657 */
3658
9b2c45d4 3659int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3660{
1ded5e5a 3661 return READ_ONCE(sock->ops)->getname(sock, addr, 1);
ac5a488e 3662}
c6d409cf 3663EXPORT_SYMBOL(kernel_getpeername);
ac5a488e 3664
8a3c245c 3665/**
645f0897 3666 * kernel_sock_shutdown - shut down part of a full-duplex connection (kernel space)
8a3c245c
PT
3667 * @sock: socket
3668 * @how: connection part
3669 *
3670 * Returns 0 or an error.
3671 */
3672
91cf45f0
TM
3673int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3674{
1ded5e5a 3675 return READ_ONCE(sock->ops)->shutdown(sock, how);
91cf45f0 3676}
91cf45f0 3677EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075 3678
8a3c245c
PT
3679/**
3680 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3681 * @sk: socket
3682 *
3683 * This routine returns the IP overhead imposed by a socket i.e.
3684 * the length of the underlying IP header, depending on whether
3685 * this is an IPv4 or IPv6 socket and the length from IP options turned
3686 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075 3687 */
8a3c245c 3688
113c3075
P
3689u32 kernel_sock_ip_overhead(struct sock *sk)
3690{
3691 struct inet_sock *inet;
3692 struct ip_options_rcu *opt;
3693 u32 overhead = 0;
113c3075
P
3694#if IS_ENABLED(CONFIG_IPV6)
3695 struct ipv6_pinfo *np;
3696 struct ipv6_txoptions *optv6 = NULL;
3697#endif /* IS_ENABLED(CONFIG_IPV6) */
3698
3699 if (!sk)
3700 return overhead;
3701
113c3075
P
3702 switch (sk->sk_family) {
3703 case AF_INET:
3704 inet = inet_sk(sk);
3705 overhead += sizeof(struct iphdr);
3706 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3707 sock_owned_by_user(sk));
113c3075
P
3708 if (opt)
3709 overhead += opt->opt.optlen;
3710 return overhead;
3711#if IS_ENABLED(CONFIG_IPV6)
3712 case AF_INET6:
3713 np = inet6_sk(sk);
3714 overhead += sizeof(struct ipv6hdr);
3715 if (np)
3716 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3717 sock_owned_by_user(sk));
113c3075
P
3718 if (optv6)
3719 overhead += (optv6->opt_flen + optv6->opt_nflen);
3720 return overhead;
3721#endif /* IS_ENABLED(CONFIG_IPV6) */
3722 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3723 return overhead;
3724 }
3725}
3726EXPORT_SYMBOL(kernel_sock_ip_overhead);