Merge patch series "bpf, riscv: use BPF prog pack allocator in BPF JIT"
[linux-2.6-block.git] / net / socket.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * NET An implementation of the SOCKET network access protocol.
4 *
5 * Version: @(#)socket.c 1.1.93 18/02/95
6 *
7 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 8 * Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 *
11 * Fixes:
12 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
13 * shutdown()
14 * Alan Cox : verify_area() fixes
15 * Alan Cox : Removed DDI
16 * Jonathan Kamens : SOCK_DGRAM reconnect bug
17 * Alan Cox : Moved a load of checks to the very
18 * top level.
19 * Alan Cox : Move address structures to/from user
20 * mode above the protocol layers.
21 * Rob Janssen : Allow 0 length sends.
22 * Alan Cox : Asynchronous I/O support (cribbed from the
23 * tty drivers).
24 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
25 * Jeff Uphoff : Made max number of sockets command-line
26 * configurable.
27 * Matti Aarnio : Made the number of sockets dynamic,
28 * to be allocated when needed, and mr.
29 * Uphoff's max is used as max to be
30 * allowed to allocate.
31 * Linus : Argh. removed all the socket allocation
32 * altogether: it's in the inode now.
33 * Alan Cox : Made sock_alloc()/sock_release() public
34 * for NetROM and future kernel nfsd type
35 * stuff.
36 * Alan Cox : sendmsg/recvmsg basics.
37 * Tom Dyas : Export net symbols.
38 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
39 * Alan Cox : Added thread locking to sys_* calls
40 * for sockets. May have errors at the
41 * moment.
42 * Kevin Buhr : Fixed the dumb errors in the above.
43 * Andi Kleen : Some small cleanups, optimizations,
44 * and fixed a copy_from_user() bug.
45 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 46 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
47 * protocol-independent
48 *
1da177e4 49 * This module is effectively the top level interface to the BSD socket
89bddce5 50 * paradigm.
1da177e4
LT
51 *
52 * Based upon Swansea University Computer Society NET3.039
53 */
54
aef2feda 55#include <linux/bpf-cgroup.h>
cc69837f 56#include <linux/ethtool.h>
1da177e4 57#include <linux/mm.h>
1da177e4
LT
58#include <linux/socket.h>
59#include <linux/file.h>
2dc334f1 60#include <linux/splice.h>
1da177e4
LT
61#include <linux/net.h>
62#include <linux/interrupt.h>
aaca0bdc 63#include <linux/thread_info.h>
55737fda 64#include <linux/rcupdate.h>
1da177e4
LT
65#include <linux/netdevice.h>
66#include <linux/proc_fs.h>
67#include <linux/seq_file.h>
4a3e2f71 68#include <linux/mutex.h>
1da177e4 69#include <linux/if_bridge.h>
20380731 70#include <linux/if_vlan.h>
408eccce 71#include <linux/ptp_classify.h>
1da177e4
LT
72#include <linux/init.h>
73#include <linux/poll.h>
74#include <linux/cache.h>
75#include <linux/module.h>
76#include <linux/highmem.h>
1da177e4 77#include <linux/mount.h>
fba9be49 78#include <linux/pseudo_fs.h>
1da177e4
LT
79#include <linux/security.h>
80#include <linux/syscalls.h>
81#include <linux/compat.h>
82#include <linux/kmod.h>
3ec3b2fb 83#include <linux/audit.h>
d86b5e0e 84#include <linux/wireless.h>
1b8d7ae4 85#include <linux/nsproxy.h>
1fd7317d 86#include <linux/magic.h>
5a0e3ad6 87#include <linux/slab.h>
600e1779 88#include <linux/xattr.h>
c8e8cd57 89#include <linux/nospec.h>
8c3c447b 90#include <linux/indirect_call_wrapper.h>
8e9fad0e 91#include <linux/io_uring.h>
1da177e4 92
7c0f6ba6 93#include <linux/uaccess.h>
1da177e4
LT
94#include <asm/unistd.h>
95
96#include <net/compat.h>
87de87d5 97#include <net/wext.h>
f8451725 98#include <net/cls_cgroup.h>
1da177e4
LT
99
100#include <net/sock.h>
101#include <linux/netfilter.h>
102
6b96018b
AB
103#include <linux/if_tun.h>
104#include <linux/ipv6_route.h>
105#include <linux/route.h>
c7dc504e 106#include <linux/termios.h>
6b96018b 107#include <linux/sockios.h>
076bb0c8 108#include <net/busy_poll.h>
f24b9be5 109#include <linux/errqueue.h>
d7c08826 110#include <linux/ptp_clock_kernel.h>
6e6eda44 111#include <trace/events/sock.h>
06021292 112
e0d1095a 113#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
114unsigned int sysctl_net_busy_read __read_mostly;
115unsigned int sysctl_net_busy_poll __read_mostly;
06021292 116#endif
6b96018b 117
8ae5e030
AV
118static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
119static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 120static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
121
122static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
123static __poll_t sock_poll(struct file *file,
124 struct poll_table_struct *wait);
89bddce5 125static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
126#ifdef CONFIG_COMPAT
127static long compat_sock_ioctl(struct file *file,
89bddce5 128 unsigned int cmd, unsigned long arg);
89bbfc95 129#endif
1da177e4 130static int sock_fasync(int fd, struct file *filp, int on);
9c55e01c 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 132 struct pipe_inode_info *pipe, size_t len,
9c55e01c 133 unsigned int flags);
2bfc6685 134static void sock_splice_eof(struct file *file);
542d3065
AB
135
136#ifdef CONFIG_PROC_FS
137static void sock_show_fdinfo(struct seq_file *m, struct file *f)
138{
139 struct socket *sock = f->private_data;
1ded5e5a 140 const struct proto_ops *ops = READ_ONCE(sock->ops);
542d3065 141
1ded5e5a
ED
142 if (ops->show_fdinfo)
143 ops->show_fdinfo(m, sock);
542d3065
AB
144}
145#else
146#define sock_show_fdinfo NULL
147#endif
1da177e4 148
1da177e4
LT
149/*
150 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
151 * in the operation structures but are done directly via the socketcall() multiplexor.
152 */
153
da7071d7 154static const struct file_operations socket_file_ops = {
1da177e4
LT
155 .owner = THIS_MODULE,
156 .llseek = no_llseek,
8ae5e030
AV
157 .read_iter = sock_read_iter,
158 .write_iter = sock_write_iter,
1da177e4
LT
159 .poll = sock_poll,
160 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
161#ifdef CONFIG_COMPAT
162 .compat_ioctl = compat_sock_ioctl,
163#endif
8e9fad0e 164 .uring_cmd = io_uring_cmd_sock,
1da177e4 165 .mmap = sock_mmap,
1da177e4
LT
166 .release = sock_close,
167 .fasync = sock_fasync,
2dc334f1 168 .splice_write = splice_to_socket,
9c55e01c 169 .splice_read = sock_splice_read,
2bfc6685 170 .splice_eof = sock_splice_eof,
b4653342 171 .show_fdinfo = sock_show_fdinfo,
1da177e4
LT
172};
173
fe0bdbde
YD
174static const char * const pf_family_names[] = {
175 [PF_UNSPEC] = "PF_UNSPEC",
176 [PF_UNIX] = "PF_UNIX/PF_LOCAL",
177 [PF_INET] = "PF_INET",
178 [PF_AX25] = "PF_AX25",
179 [PF_IPX] = "PF_IPX",
180 [PF_APPLETALK] = "PF_APPLETALK",
181 [PF_NETROM] = "PF_NETROM",
182 [PF_BRIDGE] = "PF_BRIDGE",
183 [PF_ATMPVC] = "PF_ATMPVC",
184 [PF_X25] = "PF_X25",
185 [PF_INET6] = "PF_INET6",
186 [PF_ROSE] = "PF_ROSE",
187 [PF_DECnet] = "PF_DECnet",
188 [PF_NETBEUI] = "PF_NETBEUI",
189 [PF_SECURITY] = "PF_SECURITY",
190 [PF_KEY] = "PF_KEY",
191 [PF_NETLINK] = "PF_NETLINK/PF_ROUTE",
192 [PF_PACKET] = "PF_PACKET",
193 [PF_ASH] = "PF_ASH",
194 [PF_ECONET] = "PF_ECONET",
195 [PF_ATMSVC] = "PF_ATMSVC",
196 [PF_RDS] = "PF_RDS",
197 [PF_SNA] = "PF_SNA",
198 [PF_IRDA] = "PF_IRDA",
199 [PF_PPPOX] = "PF_PPPOX",
200 [PF_WANPIPE] = "PF_WANPIPE",
201 [PF_LLC] = "PF_LLC",
202 [PF_IB] = "PF_IB",
203 [PF_MPLS] = "PF_MPLS",
204 [PF_CAN] = "PF_CAN",
205 [PF_TIPC] = "PF_TIPC",
206 [PF_BLUETOOTH] = "PF_BLUETOOTH",
207 [PF_IUCV] = "PF_IUCV",
208 [PF_RXRPC] = "PF_RXRPC",
209 [PF_ISDN] = "PF_ISDN",
210 [PF_PHONET] = "PF_PHONET",
211 [PF_IEEE802154] = "PF_IEEE802154",
212 [PF_CAIF] = "PF_CAIF",
213 [PF_ALG] = "PF_ALG",
214 [PF_NFC] = "PF_NFC",
215 [PF_VSOCK] = "PF_VSOCK",
216 [PF_KCM] = "PF_KCM",
217 [PF_QIPCRTR] = "PF_QIPCRTR",
218 [PF_SMC] = "PF_SMC",
219 [PF_XDP] = "PF_XDP",
bc49d816 220 [PF_MCTP] = "PF_MCTP",
fe0bdbde
YD
221};
222
1da177e4
LT
223/*
224 * The protocol list. Each protocol is registered in here.
225 */
226
1da177e4 227static DEFINE_SPINLOCK(net_family_lock);
190683a9 228static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 229
1da177e4 230/*
89bddce5
SH
231 * Support routines.
232 * Move socket addresses back and forth across the kernel/user
233 * divide and look after the messy bits.
1da177e4
LT
234 */
235
1da177e4
LT
236/**
237 * move_addr_to_kernel - copy a socket address into kernel space
238 * @uaddr: Address in user space
239 * @kaddr: Address in kernel space
240 * @ulen: Length in user space
241 *
242 * The address is copied into kernel space. If the provided address is
243 * too long an error code of -EINVAL is returned. If the copy gives
244 * invalid addresses -EFAULT is returned. On a success 0 is returned.
245 */
246
43db362d 247int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 248{
230b1839 249 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 250 return -EINVAL;
89bddce5 251 if (ulen == 0)
1da177e4 252 return 0;
89bddce5 253 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 254 return -EFAULT;
3ec3b2fb 255 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
256}
257
258/**
259 * move_addr_to_user - copy an address to user space
260 * @kaddr: kernel space address
261 * @klen: length of address in kernel
262 * @uaddr: user space address
263 * @ulen: pointer to user length field
264 *
265 * The value pointed to by ulen on entry is the buffer length available.
266 * This is overwritten with the buffer space used. -EINVAL is returned
267 * if an overlong buffer is specified or a negative buffer size. -EFAULT
268 * is returned if either the buffer or the length field are not
269 * accessible.
270 * After copying the data up to the limit the user specifies, the true
271 * length of the data is written over the length limit the user
272 * specified. Zero is returned for a success.
273 */
89bddce5 274
43db362d 275static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 276 void __user *uaddr, int __user *ulen)
1da177e4
LT
277{
278 int err;
279 int len;
280
68c6beb3 281 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
282 err = get_user(len, ulen);
283 if (err)
1da177e4 284 return err;
89bddce5
SH
285 if (len > klen)
286 len = klen;
68c6beb3 287 if (len < 0)
1da177e4 288 return -EINVAL;
89bddce5 289 if (len) {
d6fe3945
SG
290 if (audit_sockaddr(klen, kaddr))
291 return -ENOMEM;
89bddce5 292 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
293 return -EFAULT;
294 }
295 /*
89bddce5
SH
296 * "fromlen shall refer to the value before truncation.."
297 * 1003.1g
1da177e4
LT
298 */
299 return __put_user(klen, ulen);
300}
301
08009a76 302static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
303
304static struct inode *sock_alloc_inode(struct super_block *sb)
305{
306 struct socket_alloc *ei;
89bddce5 307
fd60b288 308 ei = alloc_inode_sb(sb, sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
309 if (!ei)
310 return NULL;
333f7909
AV
311 init_waitqueue_head(&ei->socket.wq.wait);
312 ei->socket.wq.fasync_list = NULL;
313 ei->socket.wq.flags = 0;
89bddce5 314
1da177e4
LT
315 ei->socket.state = SS_UNCONNECTED;
316 ei->socket.flags = 0;
317 ei->socket.ops = NULL;
318 ei->socket.sk = NULL;
319 ei->socket.file = NULL;
1da177e4
LT
320
321 return &ei->vfs_inode;
322}
323
6d7855c5 324static void sock_free_inode(struct inode *inode)
1da177e4 325{
43815482
ED
326 struct socket_alloc *ei;
327
328 ei = container_of(inode, struct socket_alloc, vfs_inode);
43815482 329 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
330}
331
51cc5068 332static void init_once(void *foo)
1da177e4 333{
89bddce5 334 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 335
a35afb83 336 inode_init_once(&ei->vfs_inode);
1da177e4 337}
89bddce5 338
1e911632 339static void init_inodecache(void)
1da177e4
LT
340{
341 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
342 sizeof(struct socket_alloc),
343 0,
344 (SLAB_HWCACHE_ALIGN |
345 SLAB_RECLAIM_ACCOUNT |
5d097056 346 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 347 init_once);
1e911632 348 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
349}
350
b87221de 351static const struct super_operations sockfs_ops = {
c6d409cf 352 .alloc_inode = sock_alloc_inode,
6d7855c5 353 .free_inode = sock_free_inode,
c6d409cf 354 .statfs = simple_statfs,
1da177e4
LT
355};
356
c23fbb6b
ED
357/*
358 * sockfs_dname() is called from d_path().
359 */
360static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
361{
0f60d288 362 return dynamic_dname(buffer, buflen, "socket:[%lu]",
c5ef6035 363 d_inode(dentry)->i_ino);
c23fbb6b
ED
364}
365
3ba13d17 366static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 367 .d_dname = sockfs_dname,
1da177e4
LT
368};
369
bba0bd31
AG
370static int sockfs_xattr_get(const struct xattr_handler *handler,
371 struct dentry *dentry, struct inode *inode,
372 const char *suffix, void *value, size_t size)
373{
374 if (value) {
375 if (dentry->d_name.len + 1 > size)
376 return -ERANGE;
377 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
378 }
379 return dentry->d_name.len + 1;
380}
381
382#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
383#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
384#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
385
386static const struct xattr_handler sockfs_xattr_handler = {
387 .name = XATTR_NAME_SOCKPROTONAME,
388 .get = sockfs_xattr_get,
389};
390
4a590153 391static int sockfs_security_xattr_set(const struct xattr_handler *handler,
39f60c1c 392 struct mnt_idmap *idmap,
4a590153
AG
393 struct dentry *dentry, struct inode *inode,
394 const char *suffix, const void *value,
395 size_t size, int flags)
396{
397 /* Handled by LSM. */
398 return -EAGAIN;
399}
400
401static const struct xattr_handler sockfs_security_xattr_handler = {
402 .prefix = XATTR_SECURITY_PREFIX,
403 .set = sockfs_security_xattr_set,
404};
405
bba0bd31
AG
406static const struct xattr_handler *sockfs_xattr_handlers[] = {
407 &sockfs_xattr_handler,
4a590153 408 &sockfs_security_xattr_handler,
bba0bd31
AG
409 NULL
410};
411
fba9be49 412static int sockfs_init_fs_context(struct fs_context *fc)
c74a1cbb 413{
fba9be49
DH
414 struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
415 if (!ctx)
416 return -ENOMEM;
417 ctx->ops = &sockfs_ops;
418 ctx->dops = &sockfs_dentry_operations;
419 ctx->xattr = sockfs_xattr_handlers;
420 return 0;
c74a1cbb
AV
421}
422
423static struct vfsmount *sock_mnt __read_mostly;
424
425static struct file_system_type sock_fs_type = {
426 .name = "sockfs",
fba9be49 427 .init_fs_context = sockfs_init_fs_context,
c74a1cbb
AV
428 .kill_sb = kill_anon_super,
429};
430
1da177e4
LT
431/*
432 * Obtains the first available file descriptor and sets it up for use.
433 *
39d8c1b6
DM
434 * These functions create file structures and maps them to fd space
435 * of the current process. On success it returns file descriptor
1da177e4
LT
436 * and file struct implicitly stored in sock->file.
437 * Note that another thread may close file descriptor before we return
438 * from this function. We use the fact that now we do not refer
439 * to socket after mapping. If one day we will need it, this
440 * function will increment ref. count on file by 1.
441 *
442 * In any case returned fd MAY BE not valid!
443 * This race condition is unavoidable
444 * with shared fd spaces, we cannot solve it inside kernel,
445 * but we take care of internal coherence yet.
446 */
447
8a3c245c
PT
448/**
449 * sock_alloc_file - Bind a &socket to a &file
450 * @sock: socket
451 * @flags: file status flags
452 * @dname: protocol name
453 *
454 * Returns the &file bound with @sock, implicitly storing it
455 * in sock->file. If dname is %NULL, sets to "".
649c15c7
TLSC
456 *
457 * On failure @sock is released, and an ERR pointer is returned.
458 *
8a3c245c
PT
459 * This function uses GFP_KERNEL internally.
460 */
461
aab174f0 462struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 463{
7cbe66b6 464 struct file *file;
1da177e4 465
d93aa9d8
AV
466 if (!dname)
467 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 468
d93aa9d8
AV
469 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
470 O_RDWR | (flags & O_NONBLOCK),
471 &socket_file_ops);
b5ffe634 472 if (IS_ERR(file)) {
8e1611e2 473 sock_release(sock);
39b65252 474 return file;
cc3808f8
AV
475 }
476
fe34db06 477 file->f_mode |= FMODE_NOWAIT;
cc3808f8 478 sock->file = file;
39d8c1b6 479 file->private_data = sock;
d8e464ec 480 stream_open(SOCK_INODE(sock), file);
28407630 481 return file;
39d8c1b6 482}
56b31d1c 483EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 484
56b31d1c 485static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
486{
487 struct file *newfile;
28407630 488 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
489 if (unlikely(fd < 0)) {
490 sock_release(sock);
28407630 491 return fd;
ce4bb04c 492 }
39d8c1b6 493
aab174f0 494 newfile = sock_alloc_file(sock, flags, NULL);
4546e44c 495 if (!IS_ERR(newfile)) {
39d8c1b6 496 fd_install(fd, newfile);
28407630
AV
497 return fd;
498 }
7cbe66b6 499
28407630
AV
500 put_unused_fd(fd);
501 return PTR_ERR(newfile);
1da177e4
LT
502}
503
8a3c245c
PT
504/**
505 * sock_from_file - Return the &socket bounded to @file.
506 * @file: file
8a3c245c 507 *
dba4a925 508 * On failure returns %NULL.
8a3c245c
PT
509 */
510
dba4a925 511struct socket *sock_from_file(struct file *file)
6cb153ca 512{
6cb153ca 513 if (file->f_op == &socket_file_ops)
da214a47 514 return file->private_data; /* set in sock_alloc_file */
6cb153ca 515
23bb80d2 516 return NULL;
6cb153ca 517}
406a3c63 518EXPORT_SYMBOL(sock_from_file);
6cb153ca 519
1da177e4 520/**
c6d409cf 521 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
522 * @fd: file handle
523 * @err: pointer to an error code return
524 *
525 * The file handle passed in is locked and the socket it is bound
241c4667 526 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
527 * with a negative errno code and NULL is returned. The function checks
528 * for both invalid handles and passing a handle which is not a socket.
529 *
530 * On a success the socket object pointer is returned.
531 */
532
533struct socket *sockfd_lookup(int fd, int *err)
534{
535 struct file *file;
1da177e4
LT
536 struct socket *sock;
537
89bddce5
SH
538 file = fget(fd);
539 if (!file) {
1da177e4
LT
540 *err = -EBADF;
541 return NULL;
542 }
89bddce5 543
dba4a925
FR
544 sock = sock_from_file(file);
545 if (!sock) {
546 *err = -ENOTSOCK;
1da177e4 547 fput(file);
dba4a925 548 }
6cb153ca
BL
549 return sock;
550}
c6d409cf 551EXPORT_SYMBOL(sockfd_lookup);
1da177e4 552
6cb153ca
BL
553static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
554{
00e188ef 555 struct fd f = fdget(fd);
6cb153ca
BL
556 struct socket *sock;
557
3672558c 558 *err = -EBADF;
00e188ef 559 if (f.file) {
dba4a925 560 sock = sock_from_file(f.file);
00e188ef 561 if (likely(sock)) {
ce787a5a 562 *fput_needed = f.flags & FDPUT_FPUT;
6cb153ca 563 return sock;
00e188ef 564 }
dba4a925 565 *err = -ENOTSOCK;
00e188ef 566 fdput(f);
1da177e4 567 }
6cb153ca 568 return NULL;
1da177e4
LT
569}
570
600e1779
MY
571static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
572 size_t size)
573{
574 ssize_t len;
575 ssize_t used = 0;
576
c5ef6035 577 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
578 if (len < 0)
579 return len;
580 used += len;
581 if (buffer) {
582 if (size < used)
583 return -ERANGE;
584 buffer += len;
585 }
586
587 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
588 used += len;
589 if (buffer) {
590 if (size < used)
591 return -ERANGE;
592 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
593 buffer += len;
594 }
595
596 return used;
597}
598
c1632a0f 599static int sockfs_setattr(struct mnt_idmap *idmap,
549c7297 600 struct dentry *dentry, struct iattr *iattr)
86741ec2 601{
c1632a0f 602 int err = simple_setattr(&nop_mnt_idmap, dentry, iattr);
86741ec2 603
e1a3a60a 604 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
605 struct socket *sock = SOCKET_I(d_inode(dentry));
606
6d8c50dc
CW
607 if (sock->sk)
608 sock->sk->sk_uid = iattr->ia_uid;
609 else
610 err = -ENOENT;
86741ec2
LC
611 }
612
613 return err;
614}
615
600e1779 616static const struct inode_operations sockfs_inode_ops = {
600e1779 617 .listxattr = sockfs_listxattr,
86741ec2 618 .setattr = sockfs_setattr,
600e1779
MY
619};
620
1da177e4 621/**
8a3c245c 622 * sock_alloc - allocate a socket
89bddce5 623 *
1da177e4
LT
624 * Allocate a new inode and socket object. The two are bound together
625 * and initialised. The socket is then returned. If we are out of inodes
8a3c245c 626 * NULL is returned. This functions uses GFP_KERNEL internally.
1da177e4
LT
627 */
628
f4a00aac 629struct socket *sock_alloc(void)
1da177e4 630{
89bddce5
SH
631 struct inode *inode;
632 struct socket *sock;
1da177e4 633
a209dfc7 634 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
635 if (!inode)
636 return NULL;
637
638 sock = SOCKET_I(inode);
639
85fe4025 640 inode->i_ino = get_next_ino();
89bddce5 641 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
642 inode->i_uid = current_fsuid();
643 inode->i_gid = current_fsgid();
600e1779 644 inode->i_op = &sockfs_inode_ops;
1da177e4 645
1da177e4
LT
646 return sock;
647}
f4a00aac 648EXPORT_SYMBOL(sock_alloc);
1da177e4 649
6d8c50dc 650static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4 651{
1ded5e5a
ED
652 const struct proto_ops *ops = READ_ONCE(sock->ops);
653
654 if (ops) {
655 struct module *owner = ops->owner;
1da177e4 656
6d8c50dc
CW
657 if (inode)
658 inode_lock(inode);
1ded5e5a 659 ops->release(sock);
ff7b11aa 660 sock->sk = NULL;
6d8c50dc
CW
661 if (inode)
662 inode_unlock(inode);
1da177e4
LT
663 sock->ops = NULL;
664 module_put(owner);
665 }
666
333f7909 667 if (sock->wq.fasync_list)
3410f22e 668 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 669
1da177e4
LT
670 if (!sock->file) {
671 iput(SOCK_INODE(sock));
672 return;
673 }
89bddce5 674 sock->file = NULL;
1da177e4 675}
6d8c50dc 676
9a8ad9ac
AL
677/**
678 * sock_release - close a socket
679 * @sock: socket to close
680 *
681 * The socket is released from the protocol stack if it has a release
682 * callback, and the inode is then released if the socket is bound to
683 * an inode not a file.
684 */
6d8c50dc
CW
685void sock_release(struct socket *sock)
686{
687 __sock_release(sock, NULL);
688}
c6d409cf 689EXPORT_SYMBOL(sock_release);
1da177e4 690
c14ac945 691void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 692{
140c55d4
ED
693 u8 flags = *tx_flags;
694
51eb7492 695 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE) {
140c55d4
ED
696 flags |= SKBTX_HW_TSTAMP;
697
51eb7492
GE
698 /* PTP hardware clocks can provide a free running cycle counter
699 * as a time base for virtual clocks. Tell driver to use the
700 * free running cycle counter for timestamp if socket is bound
701 * to virtual clock.
702 */
703 if (tsflags & SOF_TIMESTAMPING_BIND_PHC)
704 flags |= SKBTX_HW_TSTAMP_USE_CYCLES;
705 }
706
c14ac945 707 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
708 flags |= SKBTX_SW_TSTAMP;
709
c14ac945 710 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
711 flags |= SKBTX_SCHED_TSTAMP;
712
140c55d4 713 *tx_flags = flags;
20d49473 714}
67cc0d40 715EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 716
8c3c447b
PA
717INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
718 size_t));
a648a592
PA
719INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
720 size_t));
6e6eda44
YC
721
722static noinline void call_trace_sock_send_length(struct sock *sk, int ret,
723 int flags)
724{
725 trace_sock_send_length(sk, ret, 0);
726}
727
d8725c86 728static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 729{
1ded5e5a 730 int ret = INDIRECT_CALL_INET(READ_ONCE(sock->ops)->sendmsg, inet6_sendmsg,
a648a592
PA
731 inet_sendmsg, sock, msg,
732 msg_data_left(msg));
d8725c86 733 BUG_ON(ret == -EIOCBQUEUED);
6e6eda44
YC
734
735 if (trace_sock_send_length_enabled())
736 call_trace_sock_send_length(sock->sk, ret, 0);
d8725c86 737 return ret;
1da177e4
LT
738}
739
85806af0
RD
740/**
741 * sock_sendmsg - send a message through @sock
742 * @sock: socket
743 * @msg: message to send
744 *
745 * Sends @msg through @sock, passing through LSM.
746 * Returns the number of bytes sent, or an error code.
747 */
d8725c86 748int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 749{
d8725c86 750 int err = security_socket_sendmsg(sock, msg,
01e97e65 751 msg_data_left(msg));
228e548e 752
d8725c86 753 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 754}
c6d409cf 755EXPORT_SYMBOL(sock_sendmsg);
1da177e4 756
8a3c245c
PT
757/**
758 * kernel_sendmsg - send a message through @sock (kernel-space)
759 * @sock: socket
760 * @msg: message header
761 * @vec: kernel vec
762 * @num: vec array length
763 * @size: total message data size
764 *
765 * Builds the message data with @vec and sends it through @sock.
766 * Returns the number of bytes sent, or an error code.
767 */
768
1da177e4
LT
769int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
770 struct kvec *vec, size_t num, size_t size)
771{
de4eda9d 772 iov_iter_kvec(&msg->msg_iter, ITER_SOURCE, vec, num, size);
d8725c86 773 return sock_sendmsg(sock, msg);
1da177e4 774}
c6d409cf 775EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 776
8a3c245c
PT
777/**
778 * kernel_sendmsg_locked - send a message through @sock (kernel-space)
779 * @sk: sock
780 * @msg: message header
781 * @vec: output s/g array
782 * @num: output s/g array length
783 * @size: total message data size
784 *
785 * Builds the message data with @vec and sends it through @sock.
786 * Returns the number of bytes sent, or an error code.
787 * Caller must hold @sk.
788 */
789
306b13eb
TH
790int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
791 struct kvec *vec, size_t num, size_t size)
792{
793 struct socket *sock = sk->sk_socket;
1ded5e5a 794 const struct proto_ops *ops = READ_ONCE(sock->ops);
306b13eb 795
1ded5e5a 796 if (!ops->sendmsg_locked)
db5980d8 797 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 798
de4eda9d 799 iov_iter_kvec(&msg->msg_iter, ITER_SOURCE, vec, num, size);
306b13eb 800
1ded5e5a 801 return ops->sendmsg_locked(sk, msg, msg_data_left(msg));
306b13eb
TH
802}
803EXPORT_SYMBOL(kernel_sendmsg_locked);
804
8605330a
SHY
805static bool skb_is_err_queue(const struct sk_buff *skb)
806{
807 /* pkt_type of skbs enqueued on the error queue are set to
808 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
809 * in recvmsg, since skbs received on a local socket will never
810 * have a pkt_type of PACKET_OUTGOING.
811 */
812 return skb->pkt_type == PACKET_OUTGOING;
813}
814
b50a5c70
ML
815/* On transmit, software and hardware timestamps are returned independently.
816 * As the two skb clones share the hardware timestamp, which may be updated
817 * before the software timestamp is received, a hardware TX timestamp may be
818 * returned only if there is no software TX timestamp. Ignore false software
819 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 820 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
821 * hardware timestamp.
822 */
823static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
824{
825 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
826}
827
97dc7cd9
GE
828static ktime_t get_timestamp(struct sock *sk, struct sk_buff *skb, int *if_index)
829{
830 bool cycles = sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC;
831 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
832 struct net_device *orig_dev;
833 ktime_t hwtstamp;
834
835 rcu_read_lock();
836 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
837 if (orig_dev) {
838 *if_index = orig_dev->ifindex;
839 hwtstamp = netdev_get_tstamp(orig_dev, shhwtstamps, cycles);
840 } else {
841 hwtstamp = shhwtstamps->hwtstamp;
842 }
843 rcu_read_unlock();
844
845 return hwtstamp;
846}
847
848static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb,
849 int if_index)
aad9c8c4
ML
850{
851 struct scm_ts_pktinfo ts_pktinfo;
852 struct net_device *orig_dev;
853
854 if (!skb_mac_header_was_set(skb))
855 return;
856
857 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
858
97dc7cd9
GE
859 if (!if_index) {
860 rcu_read_lock();
861 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
862 if (orig_dev)
863 if_index = orig_dev->ifindex;
864 rcu_read_unlock();
865 }
866 ts_pktinfo.if_index = if_index;
aad9c8c4
ML
867
868 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
869 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
870 sizeof(ts_pktinfo), &ts_pktinfo);
871}
872
92f37fd2
ED
873/*
874 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
875 */
876void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
877 struct sk_buff *skb)
878{
20d49473 879 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 880 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e
DD
881 struct scm_timestamping_internal tss;
882
b50a5c70 883 int empty = 1, false_tstamp = 0;
20d49473
PO
884 struct skb_shared_hwtstamps *shhwtstamps =
885 skb_hwtstamps(skb);
97dc7cd9 886 int if_index;
007747a9 887 ktime_t hwtstamp;
20d49473
PO
888
889 /* Race occurred between timestamp enabling and packet
890 receiving. Fill in the current time for now. */
b50a5c70 891 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 892 __net_timestamp(skb);
b50a5c70
ML
893 false_tstamp = 1;
894 }
20d49473
PO
895
896 if (need_software_tstamp) {
897 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
898 if (new_tstamp) {
899 struct __kernel_sock_timeval tv;
900
901 skb_get_new_timestamp(skb, &tv);
902 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
903 sizeof(tv), &tv);
904 } else {
905 struct __kernel_old_timeval tv;
906
907 skb_get_timestamp(skb, &tv);
908 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
909 sizeof(tv), &tv);
910 }
20d49473 911 } else {
887feae3
DD
912 if (new_tstamp) {
913 struct __kernel_timespec ts;
914
915 skb_get_new_timestampns(skb, &ts);
916 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
917 sizeof(ts), &ts);
918 } else {
df1b4ba9 919 struct __kernel_old_timespec ts;
887feae3
DD
920
921 skb_get_timestampns(skb, &ts);
922 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
923 sizeof(ts), &ts);
924 }
20d49473
PO
925 }
926 }
927
f24b9be5 928 memset(&tss, 0, sizeof(tss));
c199105d 929 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 930 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 931 empty = 0;
4d276eb6 932 if (shhwtstamps &&
b9f40e21 933 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
d7c08826 934 !skb_is_swtx_tstamp(skb, false_tstamp)) {
97dc7cd9
GE
935 if_index = 0;
936 if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NETDEV)
937 hwtstamp = get_timestamp(sk, skb, &if_index);
007747a9
ML
938 else
939 hwtstamp = shhwtstamps->hwtstamp;
d7c08826 940
97dc7cd9
GE
941 if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC)
942 hwtstamp = ptp_convert_timestamp(&hwtstamp,
943 sk->sk_bind_phc);
944
007747a9 945 if (ktime_to_timespec64_cond(hwtstamp, tss.ts + 2)) {
d7c08826
YL
946 empty = 0;
947
948 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
949 !skb_is_err_queue(skb))
97dc7cd9 950 put_ts_pktinfo(msg, skb, if_index);
d7c08826 951 }
aad9c8c4 952 }
1c885808 953 if (!empty) {
9718475e
DD
954 if (sock_flag(sk, SOCK_TSTAMP_NEW))
955 put_cmsg_scm_timestamping64(msg, &tss);
956 else
957 put_cmsg_scm_timestamping(msg, &tss);
1c885808 958
8605330a 959 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 960 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
961 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
962 skb->len, skb->data);
963 }
92f37fd2 964}
7c81fd8b
ACM
965EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
966
eb6fba75 967#ifdef CONFIG_WIRELESS
6e3e939f
JB
968void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
969 struct sk_buff *skb)
970{
971 int ack;
972
973 if (!sock_flag(sk, SOCK_WIFI_STATUS))
974 return;
975 if (!skb->wifi_acked_valid)
976 return;
977
978 ack = skb->wifi_acked;
979
980 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
981}
982EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
eb6fba75 983#endif
6e3e939f 984
11165f14 985static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
986 struct sk_buff *skb)
3b885787 987{
744d5a3e 988 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 989 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 990 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
991}
992
6fd1d51c
EM
993static void sock_recv_mark(struct msghdr *msg, struct sock *sk,
994 struct sk_buff *skb)
995{
2558b803
ED
996 if (sock_flag(sk, SOCK_RCVMARK) && skb) {
997 /* We must use a bounce buffer for CONFIG_HARDENED_USERCOPY=y */
998 __u32 mark = skb->mark;
999
1000 put_cmsg(msg, SOL_SOCKET, SO_MARK, sizeof(__u32), &mark);
1001 }
6fd1d51c
EM
1002}
1003
1004void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
1005 struct sk_buff *skb)
3b885787
NH
1006{
1007 sock_recv_timestamp(msg, sk, skb);
1008 sock_recv_drops(msg, sk, skb);
6fd1d51c 1009 sock_recv_mark(msg, sk, skb);
3b885787 1010}
6fd1d51c 1011EXPORT_SYMBOL_GPL(__sock_recv_cmsgs);
3b885787 1012
8c3c447b 1013INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
a648a592
PA
1014 size_t, int));
1015INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
1016 size_t, int));
6e6eda44
YC
1017
1018static noinline void call_trace_sock_recv_length(struct sock *sk, int ret, int flags)
1019{
1020 trace_sock_recv_length(sk, ret, flags);
1021}
1022
1b784140 1023static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 1024 int flags)
1da177e4 1025{
1ded5e5a
ED
1026 int ret = INDIRECT_CALL_INET(READ_ONCE(sock->ops)->recvmsg,
1027 inet6_recvmsg,
6e6eda44
YC
1028 inet_recvmsg, sock, msg,
1029 msg_data_left(msg), flags);
1030 if (trace_sock_recv_length_enabled())
1031 call_trace_sock_recv_length(sock->sk, ret, flags);
1032 return ret;
1da177e4
LT
1033}
1034
85806af0
RD
1035/**
1036 * sock_recvmsg - receive a message from @sock
1037 * @sock: socket
1038 * @msg: message to receive
1039 * @flags: message flags
1040 *
1041 * Receives @msg from @sock, passing through LSM. Returns the total number
1042 * of bytes received, or an error.
1043 */
2da62906 1044int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 1045{
2da62906 1046 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 1047
2da62906 1048 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 1049}
c6d409cf 1050EXPORT_SYMBOL(sock_recvmsg);
1da177e4 1051
c1249c0a 1052/**
8a3c245c
PT
1053 * kernel_recvmsg - Receive a message from a socket (kernel space)
1054 * @sock: The socket to receive the message from
1055 * @msg: Received message
1056 * @vec: Input s/g array for message data
1057 * @num: Size of input s/g array
1058 * @size: Number of bytes to read
1059 * @flags: Message flags (MSG_DONTWAIT, etc...)
c1249c0a 1060 *
8a3c245c
PT
1061 * On return the msg structure contains the scatter/gather array passed in the
1062 * vec argument. The array is modified so that it consists of the unfilled
1063 * portion of the original array.
c1249c0a 1064 *
8a3c245c 1065 * The returned value is the total number of bytes received, or an error.
c1249c0a 1066 */
8a3c245c 1067
89bddce5
SH
1068int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
1069 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4 1070{
1f466e1f 1071 msg->msg_control_is_user = false;
de4eda9d 1072 iov_iter_kvec(&msg->msg_iter, ITER_DEST, vec, num, size);
1f466e1f 1073 return sock_recvmsg(sock, msg, flags);
1da177e4 1074}
c6d409cf 1075EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 1076
9c55e01c 1077static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 1078 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
1079 unsigned int flags)
1080{
1081 struct socket *sock = file->private_data;
1ded5e5a 1082 const struct proto_ops *ops;
9c55e01c 1083
1ded5e5a
ED
1084 ops = READ_ONCE(sock->ops);
1085 if (unlikely(!ops->splice_read))
67178fd0 1086 return copy_splice_read(file, ppos, pipe, len, flags);
997b37da 1087
1ded5e5a 1088 return ops->splice_read(sock, ppos, pipe, len, flags);
9c55e01c
JA
1089}
1090
2bfc6685
DH
1091static void sock_splice_eof(struct file *file)
1092{
1093 struct socket *sock = file->private_data;
1ded5e5a 1094 const struct proto_ops *ops;
2bfc6685 1095
1ded5e5a
ED
1096 ops = READ_ONCE(sock->ops);
1097 if (ops->splice_eof)
1098 ops->splice_eof(sock);
2bfc6685
DH
1099}
1100
8ae5e030 1101static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 1102{
6d652330
AV
1103 struct file *file = iocb->ki_filp;
1104 struct socket *sock = file->private_data;
0345f931 1105 struct msghdr msg = {.msg_iter = *to,
1106 .msg_iocb = iocb};
8ae5e030 1107 ssize_t res;
ce1d4d3e 1108
ebfcd895 1109 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
1110 msg.msg_flags = MSG_DONTWAIT;
1111
1112 if (iocb->ki_pos != 0)
1da177e4 1113 return -ESPIPE;
027445c3 1114
66ee59af 1115 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
1116 return 0;
1117
2da62906 1118 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
1119 *to = msg.msg_iter;
1120 return res;
1da177e4
LT
1121}
1122
8ae5e030 1123static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 1124{
6d652330
AV
1125 struct file *file = iocb->ki_filp;
1126 struct socket *sock = file->private_data;
0345f931 1127 struct msghdr msg = {.msg_iter = *from,
1128 .msg_iocb = iocb};
8ae5e030 1129 ssize_t res;
1da177e4 1130
8ae5e030 1131 if (iocb->ki_pos != 0)
ce1d4d3e 1132 return -ESPIPE;
027445c3 1133
ebfcd895 1134 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
1135 msg.msg_flags = MSG_DONTWAIT;
1136
6d652330
AV
1137 if (sock->type == SOCK_SEQPACKET)
1138 msg.msg_flags |= MSG_EOR;
1139
d8725c86 1140 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
1141 *from = msg.msg_iter;
1142 return res;
1da177e4
LT
1143}
1144
1da177e4
LT
1145/*
1146 * Atomic setting of ioctl hooks to avoid race
1147 * with module unload.
1148 */
1149
4a3e2f71 1150static DEFINE_MUTEX(br_ioctl_mutex);
ad2f99ae
AB
1151static int (*br_ioctl_hook)(struct net *net, struct net_bridge *br,
1152 unsigned int cmd, struct ifreq *ifr,
1153 void __user *uarg);
1da177e4 1154
ad2f99ae
AB
1155void brioctl_set(int (*hook)(struct net *net, struct net_bridge *br,
1156 unsigned int cmd, struct ifreq *ifr,
1157 void __user *uarg))
1da177e4 1158{
4a3e2f71 1159 mutex_lock(&br_ioctl_mutex);
1da177e4 1160 br_ioctl_hook = hook;
4a3e2f71 1161 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1162}
1163EXPORT_SYMBOL(brioctl_set);
1164
ad2f99ae
AB
1165int br_ioctl_call(struct net *net, struct net_bridge *br, unsigned int cmd,
1166 struct ifreq *ifr, void __user *uarg)
1167{
1168 int err = -ENOPKG;
1169
1170 if (!br_ioctl_hook)
1171 request_module("bridge");
1172
1173 mutex_lock(&br_ioctl_mutex);
1174 if (br_ioctl_hook)
1175 err = br_ioctl_hook(net, br, cmd, ifr, uarg);
1176 mutex_unlock(&br_ioctl_mutex);
1177
1178 return err;
1179}
1180
4a3e2f71 1181static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1182static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1183
881d966b 1184void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1185{
4a3e2f71 1186 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1187 vlan_ioctl_hook = hook;
4a3e2f71 1188 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1189}
1190EXPORT_SYMBOL(vlan_ioctl_set);
1191
6b96018b 1192static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 1193 unsigned int cmd, unsigned long arg)
6b96018b 1194{
1ded5e5a 1195 const struct proto_ops *ops = READ_ONCE(sock->ops);
876f0bf9
AB
1196 struct ifreq ifr;
1197 bool need_copyout;
6b96018b
AB
1198 int err;
1199 void __user *argp = (void __user *)arg;
a554bf96 1200 void __user *data;
6b96018b 1201
1ded5e5a 1202 err = ops->ioctl(sock, cmd, arg);
6b96018b
AB
1203
1204 /*
1205 * If this ioctl is unknown try to hand it down
1206 * to the NIC driver.
1207 */
36fd633e
AV
1208 if (err != -ENOIOCTLCMD)
1209 return err;
6b96018b 1210
29ce8f97
JK
1211 if (!is_socket_ioctl_cmd(cmd))
1212 return -ENOTTY;
1213
a554bf96 1214 if (get_user_ifreq(&ifr, &data, argp))
876f0bf9 1215 return -EFAULT;
a554bf96 1216 err = dev_ioctl(net, cmd, &ifr, data, &need_copyout);
876f0bf9 1217 if (!err && need_copyout)
a554bf96 1218 if (put_user_ifreq(&ifr, argp))
44c02a2c 1219 return -EFAULT;
876f0bf9 1220
6b96018b
AB
1221 return err;
1222}
1223
1da177e4
LT
1224/*
1225 * With an ioctl, arg may well be a user mode pointer, but we don't know
1226 * what to do with it - that's up to the protocol still.
1227 */
1228
1229static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1230{
1ded5e5a 1231 const struct proto_ops *ops;
1da177e4 1232 struct socket *sock;
881d966b 1233 struct sock *sk;
1da177e4
LT
1234 void __user *argp = (void __user *)arg;
1235 int pid, err;
881d966b 1236 struct net *net;
1da177e4 1237
b69aee04 1238 sock = file->private_data;
1ded5e5a 1239 ops = READ_ONCE(sock->ops);
881d966b 1240 sk = sock->sk;
3b1e0a65 1241 net = sock_net(sk);
44c02a2c
AV
1242 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1243 struct ifreq ifr;
a554bf96 1244 void __user *data;
44c02a2c 1245 bool need_copyout;
a554bf96 1246 if (get_user_ifreq(&ifr, &data, argp))
44c02a2c 1247 return -EFAULT;
a554bf96 1248 err = dev_ioctl(net, cmd, &ifr, data, &need_copyout);
44c02a2c 1249 if (!err && need_copyout)
a554bf96 1250 if (put_user_ifreq(&ifr, argp))
44c02a2c 1251 return -EFAULT;
1da177e4 1252 } else
3d23e349 1253#ifdef CONFIG_WEXT_CORE
1da177e4 1254 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1255 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1256 } else
3d23e349 1257#endif
89bddce5 1258 switch (cmd) {
1da177e4
LT
1259 case FIOSETOWN:
1260 case SIOCSPGRP:
1261 err = -EFAULT;
1262 if (get_user(pid, (int __user *)argp))
1263 break;
393cc3f5 1264 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1265 break;
1266 case FIOGETOWN:
1267 case SIOCGPGRP:
609d7fa9 1268 err = put_user(f_getown(sock->file),
89bddce5 1269 (int __user *)argp);
1da177e4
LT
1270 break;
1271 case SIOCGIFBR:
1272 case SIOCSIFBR:
1273 case SIOCBRADDBR:
1274 case SIOCBRDELBR:
ad2f99ae 1275 err = br_ioctl_call(net, NULL, cmd, NULL, argp);
1da177e4
LT
1276 break;
1277 case SIOCGIFVLAN:
1278 case SIOCSIFVLAN:
1279 err = -ENOPKG;
1280 if (!vlan_ioctl_hook)
1281 request_module("8021q");
1282
4a3e2f71 1283 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1284 if (vlan_ioctl_hook)
881d966b 1285 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1286 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1287 break;
c62cce2c
AV
1288 case SIOCGSKNS:
1289 err = -EPERM;
1290 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1291 break;
1292
1293 err = open_related_ns(&net->ns, get_net_ns);
1294 break;
0768e170
AB
1295 case SIOCGSTAMP_OLD:
1296 case SIOCGSTAMPNS_OLD:
1ded5e5a 1297 if (!ops->gettstamp) {
c7cbdbf2
AB
1298 err = -ENOIOCTLCMD;
1299 break;
1300 }
1ded5e5a
ED
1301 err = ops->gettstamp(sock, argp,
1302 cmd == SIOCGSTAMP_OLD,
1303 !IS_ENABLED(CONFIG_64BIT));
60747828 1304 break;
0768e170
AB
1305 case SIOCGSTAMP_NEW:
1306 case SIOCGSTAMPNS_NEW:
1ded5e5a 1307 if (!ops->gettstamp) {
0768e170
AB
1308 err = -ENOIOCTLCMD;
1309 break;
1310 }
1ded5e5a
ED
1311 err = ops->gettstamp(sock, argp,
1312 cmd == SIOCGSTAMP_NEW,
1313 false);
c7cbdbf2 1314 break;
876f0bf9
AB
1315
1316 case SIOCGIFCONF:
1317 err = dev_ifconf(net, argp);
1318 break;
1319
1da177e4 1320 default:
63ff03ab 1321 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1322 break;
89bddce5 1323 }
1da177e4
LT
1324 return err;
1325}
1326
8a3c245c
PT
1327/**
1328 * sock_create_lite - creates a socket
1329 * @family: protocol family (AF_INET, ...)
1330 * @type: communication type (SOCK_STREAM, ...)
1331 * @protocol: protocol (0, ...)
1332 * @res: new socket
1333 *
1334 * Creates a new socket and assigns it to @res, passing through LSM.
1335 * The new socket initialization is not complete, see kernel_accept().
1336 * Returns 0 or an error. On failure @res is set to %NULL.
1337 * This function internally uses GFP_KERNEL.
1338 */
1339
1da177e4
LT
1340int sock_create_lite(int family, int type, int protocol, struct socket **res)
1341{
1342 int err;
1343 struct socket *sock = NULL;
89bddce5 1344
1da177e4
LT
1345 err = security_socket_create(family, type, protocol, 1);
1346 if (err)
1347 goto out;
1348
1349 sock = sock_alloc();
1350 if (!sock) {
1351 err = -ENOMEM;
1352 goto out;
1353 }
1354
1da177e4 1355 sock->type = type;
7420ed23
VY
1356 err = security_socket_post_create(sock, family, type, protocol, 1);
1357 if (err)
1358 goto out_release;
1359
1da177e4
LT
1360out:
1361 *res = sock;
1362 return err;
7420ed23
VY
1363out_release:
1364 sock_release(sock);
1365 sock = NULL;
1366 goto out;
1da177e4 1367}
c6d409cf 1368EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1369
1370/* No kernel lock held - perfect */
ade994f4 1371static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1372{
3cafb376 1373 struct socket *sock = file->private_data;
1ded5e5a 1374 const struct proto_ops *ops = READ_ONCE(sock->ops);
a331de3b 1375 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1376
1ded5e5a 1377 if (!ops->poll)
e88958e6 1378 return 0;
f641f13b 1379
a331de3b
CH
1380 if (sk_can_busy_loop(sock->sk)) {
1381 /* poll once if requested by the syscall */
1382 if (events & POLL_BUSY_LOOP)
1383 sk_busy_loop(sock->sk, 1);
1384
1385 /* if this socket can poll_ll, tell the system call */
1386 flag = POLL_BUSY_LOOP;
1387 }
1388
1ded5e5a 1389 return ops->poll(file, sock, wait) | flag;
1da177e4
LT
1390}
1391
89bddce5 1392static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1393{
b69aee04 1394 struct socket *sock = file->private_data;
1da177e4 1395
1ded5e5a 1396 return READ_ONCE(sock->ops)->mmap(file, sock, vma);
1da177e4
LT
1397}
1398
20380731 1399static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1400{
6d8c50dc 1401 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1402 return 0;
1403}
1404
1405/*
1406 * Update the socket async list
1407 *
1408 * Fasync_list locking strategy.
1409 *
1410 * 1. fasync_list is modified only under process context socket lock
1411 * i.e. under semaphore.
1412 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1413 * or under socket lock
1da177e4
LT
1414 */
1415
1416static int sock_fasync(int fd, struct file *filp, int on)
1417{
989a2979
ED
1418 struct socket *sock = filp->private_data;
1419 struct sock *sk = sock->sk;
333f7909 1420 struct socket_wq *wq = &sock->wq;
1da177e4 1421
989a2979 1422 if (sk == NULL)
1da177e4 1423 return -EINVAL;
1da177e4
LT
1424
1425 lock_sock(sk);
eaefd110 1426 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1427
eaefd110 1428 if (!wq->fasync_list)
989a2979
ED
1429 sock_reset_flag(sk, SOCK_FASYNC);
1430 else
bcdce719 1431 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1432
989a2979 1433 release_sock(sk);
1da177e4
LT
1434 return 0;
1435}
1436
ceb5d58b 1437/* This function may be called only under rcu_lock */
1da177e4 1438
ceb5d58b 1439int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1440{
ceb5d58b 1441 if (!wq || !wq->fasync_list)
1da177e4 1442 return -1;
ceb5d58b 1443
89bddce5 1444 switch (how) {
8d8ad9d7 1445 case SOCK_WAKE_WAITD:
ceb5d58b 1446 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1447 break;
1448 goto call_kill;
8d8ad9d7 1449 case SOCK_WAKE_SPACE:
ceb5d58b 1450 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4 1451 break;
7c7ab580 1452 fallthrough;
8d8ad9d7 1453 case SOCK_WAKE_IO:
89bddce5 1454call_kill:
43815482 1455 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1456 break;
8d8ad9d7 1457 case SOCK_WAKE_URG:
43815482 1458 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1459 }
ceb5d58b 1460
1da177e4
LT
1461 return 0;
1462}
c6d409cf 1463EXPORT_SYMBOL(sock_wake_async);
1da177e4 1464
8a3c245c
PT
1465/**
1466 * __sock_create - creates a socket
1467 * @net: net namespace
1468 * @family: protocol family (AF_INET, ...)
1469 * @type: communication type (SOCK_STREAM, ...)
1470 * @protocol: protocol (0, ...)
1471 * @res: new socket
1472 * @kern: boolean for kernel space sockets
1473 *
1474 * Creates a new socket and assigns it to @res, passing through LSM.
1475 * Returns 0 or an error. On failure @res is set to %NULL. @kern must
1476 * be set to true if the socket resides in kernel space.
1477 * This function internally uses GFP_KERNEL.
1478 */
1479
721db93a 1480int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1481 struct socket **res, int kern)
1da177e4
LT
1482{
1483 int err;
1484 struct socket *sock;
55737fda 1485 const struct net_proto_family *pf;
1da177e4
LT
1486
1487 /*
89bddce5 1488 * Check protocol is in range
1da177e4
LT
1489 */
1490 if (family < 0 || family >= NPROTO)
1491 return -EAFNOSUPPORT;
1492 if (type < 0 || type >= SOCK_MAX)
1493 return -EINVAL;
1494
1495 /* Compatibility.
1496
1497 This uglymoron is moved from INET layer to here to avoid
1498 deadlock in module load.
1499 */
1500 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1501 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1502 current->comm);
1da177e4
LT
1503 family = PF_PACKET;
1504 }
1505
1506 err = security_socket_create(family, type, protocol, kern);
1507 if (err)
1508 return err;
89bddce5 1509
55737fda
SH
1510 /*
1511 * Allocate the socket and allow the family to set things up. if
1512 * the protocol is 0, the family is instructed to select an appropriate
1513 * default.
1514 */
1515 sock = sock_alloc();
1516 if (!sock) {
e87cc472 1517 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1518 return -ENFILE; /* Not exactly a match, but its the
1519 closest posix thing */
1520 }
1521
1522 sock->type = type;
1523
95a5afca 1524#ifdef CONFIG_MODULES
89bddce5
SH
1525 /* Attempt to load a protocol module if the find failed.
1526 *
1527 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1528 * requested real, full-featured networking support upon configuration.
1529 * Otherwise module support will break!
1530 */
190683a9 1531 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1532 request_module("net-pf-%d", family);
1da177e4
LT
1533#endif
1534
55737fda
SH
1535 rcu_read_lock();
1536 pf = rcu_dereference(net_families[family]);
1537 err = -EAFNOSUPPORT;
1538 if (!pf)
1539 goto out_release;
1da177e4
LT
1540
1541 /*
1542 * We will call the ->create function, that possibly is in a loadable
1543 * module, so we have to bump that loadable module refcnt first.
1544 */
55737fda 1545 if (!try_module_get(pf->owner))
1da177e4
LT
1546 goto out_release;
1547
55737fda
SH
1548 /* Now protected by module ref count */
1549 rcu_read_unlock();
1550
3f378b68 1551 err = pf->create(net, sock, protocol, kern);
55737fda 1552 if (err < 0)
1da177e4 1553 goto out_module_put;
a79af59e 1554
1da177e4
LT
1555 /*
1556 * Now to bump the refcnt of the [loadable] module that owns this
1557 * socket at sock_release time we decrement its refcnt.
1558 */
55737fda
SH
1559 if (!try_module_get(sock->ops->owner))
1560 goto out_module_busy;
1561
1da177e4
LT
1562 /*
1563 * Now that we're done with the ->create function, the [loadable]
1564 * module can have its refcnt decremented
1565 */
55737fda 1566 module_put(pf->owner);
7420ed23
VY
1567 err = security_socket_post_create(sock, family, type, protocol, kern);
1568 if (err)
3b185525 1569 goto out_sock_release;
55737fda 1570 *res = sock;
1da177e4 1571
55737fda
SH
1572 return 0;
1573
1574out_module_busy:
1575 err = -EAFNOSUPPORT;
1da177e4 1576out_module_put:
55737fda
SH
1577 sock->ops = NULL;
1578 module_put(pf->owner);
1579out_sock_release:
1da177e4 1580 sock_release(sock);
55737fda
SH
1581 return err;
1582
1583out_release:
1584 rcu_read_unlock();
1585 goto out_sock_release;
1da177e4 1586}
721db93a 1587EXPORT_SYMBOL(__sock_create);
1da177e4 1588
8a3c245c
PT
1589/**
1590 * sock_create - creates a socket
1591 * @family: protocol family (AF_INET, ...)
1592 * @type: communication type (SOCK_STREAM, ...)
1593 * @protocol: protocol (0, ...)
1594 * @res: new socket
1595 *
1596 * A wrapper around __sock_create().
1597 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1598 */
1599
1da177e4
LT
1600int sock_create(int family, int type, int protocol, struct socket **res)
1601{
1b8d7ae4 1602 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1603}
c6d409cf 1604EXPORT_SYMBOL(sock_create);
1da177e4 1605
8a3c245c
PT
1606/**
1607 * sock_create_kern - creates a socket (kernel space)
1608 * @net: net namespace
1609 * @family: protocol family (AF_INET, ...)
1610 * @type: communication type (SOCK_STREAM, ...)
1611 * @protocol: protocol (0, ...)
1612 * @res: new socket
1613 *
1614 * A wrapper around __sock_create().
1615 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1616 */
1617
eeb1bd5c 1618int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1619{
eeb1bd5c 1620 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1621}
c6d409cf 1622EXPORT_SYMBOL(sock_create_kern);
1da177e4 1623
da214a47 1624static struct socket *__sys_socket_create(int family, int type, int protocol)
1da177e4 1625{
1da177e4 1626 struct socket *sock;
da214a47 1627 int retval;
a677a039 1628
e38b36f3
UD
1629 /* Check the SOCK_* constants for consistency. */
1630 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1631 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1632 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1633 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1634
da214a47
JA
1635 if ((type & ~SOCK_TYPE_MASK) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1636 return ERR_PTR(-EINVAL);
a677a039 1637 type &= SOCK_TYPE_MASK;
1da177e4 1638
da214a47
JA
1639 retval = sock_create(family, type, protocol, &sock);
1640 if (retval < 0)
1641 return ERR_PTR(retval);
1642
1643 return sock;
1644}
1645
1646struct file *__sys_socket_file(int family, int type, int protocol)
1647{
1648 struct socket *sock;
da214a47
JA
1649 int flags;
1650
1651 sock = __sys_socket_create(family, type, protocol);
1652 if (IS_ERR(sock))
1653 return ERR_CAST(sock);
1654
1655 flags = type & ~SOCK_TYPE_MASK;
aaca0bdc
UD
1656 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1657 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1658
649c15c7 1659 return sock_alloc_file(sock, flags, NULL);
da214a47
JA
1660}
1661
0dd061a6
GT
1662/* A hook for bpf progs to attach to and update socket protocol.
1663 *
1664 * A static noinline declaration here could cause the compiler to
1665 * optimize away the function. A global noinline declaration will
1666 * keep the definition, but may optimize away the callsite.
1667 * Therefore, __weak is needed to ensure that the call is still
1668 * emitted, by telling the compiler that we don't know what the
1669 * function might eventually be.
1670 *
1671 * __diag_* below are needed to dismiss the missing prototype warning.
1672 */
1673
1674__diag_push();
1675__diag_ignore_all("-Wmissing-prototypes",
1676 "A fmod_ret entry point for BPF programs");
1677
1678__weak noinline int update_socket_protocol(int family, int type, int protocol)
1679{
1680 return protocol;
1681}
1682
1683__diag_pop();
1684
da214a47
JA
1685int __sys_socket(int family, int type, int protocol)
1686{
1687 struct socket *sock;
1688 int flags;
1689
0dd061a6
GT
1690 sock = __sys_socket_create(family, type,
1691 update_socket_protocol(family, type, protocol));
da214a47
JA
1692 if (IS_ERR(sock))
1693 return PTR_ERR(sock);
1694
1695 flags = type & ~SOCK_TYPE_MASK;
1696 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1697 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1da177e4 1698
8e1611e2 1699 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1700}
1701
9d6a15c3
DB
1702SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1703{
1704 return __sys_socket(family, type, protocol);
1705}
1706
1da177e4
LT
1707/*
1708 * Create a pair of connected sockets.
1709 */
1710
6debc8d8 1711int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1712{
1713 struct socket *sock1, *sock2;
1714 int fd1, fd2, err;
db349509 1715 struct file *newfile1, *newfile2;
a677a039
UD
1716 int flags;
1717
1718 flags = type & ~SOCK_TYPE_MASK;
77d27200 1719 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1720 return -EINVAL;
1721 type &= SOCK_TYPE_MASK;
1da177e4 1722
aaca0bdc
UD
1723 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1724 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1725
016a266b
AV
1726 /*
1727 * reserve descriptors and make sure we won't fail
1728 * to return them to userland.
1729 */
1730 fd1 = get_unused_fd_flags(flags);
1731 if (unlikely(fd1 < 0))
1732 return fd1;
1733
1734 fd2 = get_unused_fd_flags(flags);
1735 if (unlikely(fd2 < 0)) {
1736 put_unused_fd(fd1);
1737 return fd2;
1738 }
1739
1740 err = put_user(fd1, &usockvec[0]);
1741 if (err)
1742 goto out;
1743
1744 err = put_user(fd2, &usockvec[1]);
1745 if (err)
1746 goto out;
1747
1da177e4
LT
1748 /*
1749 * Obtain the first socket and check if the underlying protocol
1750 * supports the socketpair call.
1751 */
1752
1753 err = sock_create(family, type, protocol, &sock1);
016a266b 1754 if (unlikely(err < 0))
1da177e4
LT
1755 goto out;
1756
1757 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1758 if (unlikely(err < 0)) {
1759 sock_release(sock1);
1760 goto out;
bf3c23d1 1761 }
d73aa286 1762
d47cd945
DH
1763 err = security_socket_socketpair(sock1, sock2);
1764 if (unlikely(err)) {
1765 sock_release(sock2);
1766 sock_release(sock1);
1767 goto out;
1768 }
1769
1ded5e5a 1770 err = READ_ONCE(sock1->ops)->socketpair(sock1, sock2);
016a266b
AV
1771 if (unlikely(err < 0)) {
1772 sock_release(sock2);
1773 sock_release(sock1);
1774 goto out;
28407630
AV
1775 }
1776
aab174f0 1777 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1778 if (IS_ERR(newfile1)) {
28407630 1779 err = PTR_ERR(newfile1);
016a266b
AV
1780 sock_release(sock2);
1781 goto out;
28407630
AV
1782 }
1783
aab174f0 1784 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1785 if (IS_ERR(newfile2)) {
1786 err = PTR_ERR(newfile2);
016a266b
AV
1787 fput(newfile1);
1788 goto out;
db349509
AV
1789 }
1790
157cf649 1791 audit_fd_pair(fd1, fd2);
d73aa286 1792
db349509
AV
1793 fd_install(fd1, newfile1);
1794 fd_install(fd2, newfile2);
d73aa286 1795 return 0;
1da177e4 1796
016a266b 1797out:
d73aa286 1798 put_unused_fd(fd2);
d73aa286 1799 put_unused_fd(fd1);
1da177e4
LT
1800 return err;
1801}
1802
6debc8d8
DB
1803SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1804 int __user *, usockvec)
1805{
1806 return __sys_socketpair(family, type, protocol, usockvec);
1807}
1808
1da177e4
LT
1809/*
1810 * Bind a name to a socket. Nothing much to do here since it's
1811 * the protocol's responsibility to handle the local address.
1812 *
1813 * We move the socket address to kernel space before we call
1814 * the protocol layer (having also checked the address is ok).
1815 */
1816
a87d35d8 1817int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1818{
1819 struct socket *sock;
230b1839 1820 struct sockaddr_storage address;
6cb153ca 1821 int err, fput_needed;
1da177e4 1822
89bddce5 1823 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1824 if (sock) {
43db362d 1825 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1826 if (!err) {
89bddce5 1827 err = security_socket_bind(sock,
230b1839 1828 (struct sockaddr *)&address,
89bddce5 1829 addrlen);
6cb153ca 1830 if (!err)
1ded5e5a 1831 err = READ_ONCE(sock->ops)->bind(sock,
89bddce5 1832 (struct sockaddr *)
230b1839 1833 &address, addrlen);
1da177e4 1834 }
6cb153ca 1835 fput_light(sock->file, fput_needed);
89bddce5 1836 }
1da177e4
LT
1837 return err;
1838}
1839
a87d35d8
DB
1840SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1841{
1842 return __sys_bind(fd, umyaddr, addrlen);
1843}
1844
1da177e4
LT
1845/*
1846 * Perform a listen. Basically, we allow the protocol to do anything
1847 * necessary for a listen, and if that works, we mark the socket as
1848 * ready for listening.
1849 */
1850
25e290ee 1851int __sys_listen(int fd, int backlog)
1da177e4
LT
1852{
1853 struct socket *sock;
6cb153ca 1854 int err, fput_needed;
b8e1f9b5 1855 int somaxconn;
89bddce5
SH
1856
1857 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1858 if (sock) {
3c9ba81d 1859 somaxconn = READ_ONCE(sock_net(sock->sk)->core.sysctl_somaxconn);
95c96174 1860 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1861 backlog = somaxconn;
1da177e4
LT
1862
1863 err = security_socket_listen(sock, backlog);
6cb153ca 1864 if (!err)
1ded5e5a 1865 err = READ_ONCE(sock->ops)->listen(sock, backlog);
1da177e4 1866
6cb153ca 1867 fput_light(sock->file, fput_needed);
1da177e4
LT
1868 }
1869 return err;
1870}
1871
25e290ee
DB
1872SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1873{
1874 return __sys_listen(fd, backlog);
1875}
1876
d32f89da 1877struct file *do_accept(struct file *file, unsigned file_flags,
de2ea4b6 1878 struct sockaddr __user *upeer_sockaddr,
d32f89da 1879 int __user *upeer_addrlen, int flags)
1da177e4
LT
1880{
1881 struct socket *sock, *newsock;
39d8c1b6 1882 struct file *newfile;
d32f89da 1883 int err, len;
230b1839 1884 struct sockaddr_storage address;
1ded5e5a 1885 const struct proto_ops *ops;
1da177e4 1886
dba4a925 1887 sock = sock_from_file(file);
d32f89da
PB
1888 if (!sock)
1889 return ERR_PTR(-ENOTSOCK);
1da177e4 1890
c6d409cf
ED
1891 newsock = sock_alloc();
1892 if (!newsock)
d32f89da 1893 return ERR_PTR(-ENFILE);
1ded5e5a 1894 ops = READ_ONCE(sock->ops);
1da177e4
LT
1895
1896 newsock->type = sock->type;
1ded5e5a 1897 newsock->ops = ops;
1da177e4 1898
1da177e4
LT
1899 /*
1900 * We don't need try_module_get here, as the listening socket (sock)
1901 * has the protocol module (sock->ops->owner) held.
1902 */
1ded5e5a 1903 __module_get(ops->owner);
1da177e4 1904
aab174f0 1905 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
d32f89da
PB
1906 if (IS_ERR(newfile))
1907 return newfile;
39d8c1b6 1908
a79af59e
FF
1909 err = security_socket_accept(sock, newsock);
1910 if (err)
39d8c1b6 1911 goto out_fd;
a79af59e 1912
1ded5e5a 1913 err = ops->accept(sock, newsock, sock->file->f_flags | file_flags,
de2ea4b6 1914 false);
1da177e4 1915 if (err < 0)
39d8c1b6 1916 goto out_fd;
1da177e4
LT
1917
1918 if (upeer_sockaddr) {
1ded5e5a 1919 len = ops->getname(newsock, (struct sockaddr *)&address, 2);
9b2c45d4 1920 if (len < 0) {
1da177e4 1921 err = -ECONNABORTED;
39d8c1b6 1922 goto out_fd;
1da177e4 1923 }
43db362d 1924 err = move_addr_to_user(&address,
230b1839 1925 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1926 if (err < 0)
39d8c1b6 1927 goto out_fd;
1da177e4
LT
1928 }
1929
1930 /* File flags are not inherited via accept() unlike another OSes. */
d32f89da 1931 return newfile;
39d8c1b6 1932out_fd:
9606a216 1933 fput(newfile);
d32f89da
PB
1934 return ERR_PTR(err);
1935}
1936
c0424532
YD
1937static int __sys_accept4_file(struct file *file, struct sockaddr __user *upeer_sockaddr,
1938 int __user *upeer_addrlen, int flags)
d32f89da
PB
1939{
1940 struct file *newfile;
1941 int newfd;
1942
1943 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1944 return -EINVAL;
1945
1946 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1947 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
de2ea4b6 1948
c0424532 1949 newfd = get_unused_fd_flags(flags);
d32f89da
PB
1950 if (unlikely(newfd < 0))
1951 return newfd;
1952
c0424532 1953 newfile = do_accept(file, 0, upeer_sockaddr, upeer_addrlen,
d32f89da
PB
1954 flags);
1955 if (IS_ERR(newfile)) {
1956 put_unused_fd(newfd);
1957 return PTR_ERR(newfile);
1958 }
1959 fd_install(newfd, newfile);
1960 return newfd;
de2ea4b6
JA
1961}
1962
1963/*
1964 * For accept, we attempt to create a new socket, set up the link
1965 * with the client, wake up the client, then return the new
1966 * connected fd. We collect the address of the connector in kernel
1967 * space and move it to user at the very end. This is unclean because
1968 * we open the socket then return an error.
1969 *
1970 * 1003.1g adds the ability to recvmsg() to query connection pending
1971 * status to recvmsg. We need to add that support in a way thats
1972 * clean when we restructure accept also.
1973 */
1974
1975int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1976 int __user *upeer_addrlen, int flags)
1977{
1978 int ret = -EBADF;
1979 struct fd f;
1980
1981 f = fdget(fd);
1982 if (f.file) {
c0424532
YD
1983 ret = __sys_accept4_file(f.file, upeer_sockaddr,
1984 upeer_addrlen, flags);
6b07edeb 1985 fdput(f);
de2ea4b6
JA
1986 }
1987
1988 return ret;
1da177e4
LT
1989}
1990
4541e805
DB
1991SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1992 int __user *, upeer_addrlen, int, flags)
1993{
1994 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1995}
1996
20f37034
HC
1997SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1998 int __user *, upeer_addrlen)
aaca0bdc 1999{
4541e805 2000 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
2001}
2002
1da177e4
LT
2003/*
2004 * Attempt to connect to a socket with the server address. The address
2005 * is in user space so we verify it is OK and move it to kernel space.
2006 *
2007 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
2008 * break bindings
2009 *
2010 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
2011 * other SEQPACKET protocols that take time to connect() as it doesn't
2012 * include the -EINPROGRESS status for such sockets.
2013 */
2014
f499a021 2015int __sys_connect_file(struct file *file, struct sockaddr_storage *address,
bd3ded31 2016 int addrlen, int file_flags)
1da177e4
LT
2017{
2018 struct socket *sock;
bd3ded31 2019 int err;
1da177e4 2020
dba4a925
FR
2021 sock = sock_from_file(file);
2022 if (!sock) {
2023 err = -ENOTSOCK;
1da177e4 2024 goto out;
dba4a925 2025 }
1da177e4 2026
89bddce5 2027 err =
f499a021 2028 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4 2029 if (err)
bd3ded31 2030 goto out;
1da177e4 2031
1ded5e5a
ED
2032 err = READ_ONCE(sock->ops)->connect(sock, (struct sockaddr *)address,
2033 addrlen, sock->file->f_flags | file_flags);
1da177e4
LT
2034out:
2035 return err;
2036}
2037
bd3ded31
JA
2038int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
2039{
2040 int ret = -EBADF;
2041 struct fd f;
2042
2043 f = fdget(fd);
2044 if (f.file) {
f499a021
JA
2045 struct sockaddr_storage address;
2046
2047 ret = move_addr_to_kernel(uservaddr, addrlen, &address);
2048 if (!ret)
2049 ret = __sys_connect_file(f.file, &address, addrlen, 0);
6b07edeb 2050 fdput(f);
bd3ded31
JA
2051 }
2052
2053 return ret;
2054}
2055
1387c2c2
DB
2056SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
2057 int, addrlen)
2058{
2059 return __sys_connect(fd, uservaddr, addrlen);
2060}
2061
1da177e4
LT
2062/*
2063 * Get the local address ('name') of a socket object. Move the obtained
2064 * name to user space.
2065 */
2066
8882a107
DB
2067int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
2068 int __user *usockaddr_len)
1da177e4
LT
2069{
2070 struct socket *sock;
230b1839 2071 struct sockaddr_storage address;
9b2c45d4 2072 int err, fput_needed;
89bddce5 2073
6cb153ca 2074 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
2075 if (!sock)
2076 goto out;
2077
2078 err = security_socket_getsockname(sock);
2079 if (err)
2080 goto out_put;
2081
1ded5e5a 2082 err = READ_ONCE(sock->ops)->getname(sock, (struct sockaddr *)&address, 0);
9b2c45d4 2083 if (err < 0)
1da177e4 2084 goto out_put;
e44ef1d4 2085 /* "err" is actually length in this case */
9b2c45d4 2086 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
2087
2088out_put:
6cb153ca 2089 fput_light(sock->file, fput_needed);
1da177e4
LT
2090out:
2091 return err;
2092}
2093
8882a107
DB
2094SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
2095 int __user *, usockaddr_len)
2096{
2097 return __sys_getsockname(fd, usockaddr, usockaddr_len);
2098}
2099
1da177e4
LT
2100/*
2101 * Get the remote address ('name') of a socket object. Move the obtained
2102 * name to user space.
2103 */
2104
b21c8f83
DB
2105int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
2106 int __user *usockaddr_len)
1da177e4
LT
2107{
2108 struct socket *sock;
230b1839 2109 struct sockaddr_storage address;
9b2c45d4 2110 int err, fput_needed;
1da177e4 2111
89bddce5
SH
2112 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2113 if (sock != NULL) {
1ded5e5a
ED
2114 const struct proto_ops *ops = READ_ONCE(sock->ops);
2115
1da177e4
LT
2116 err = security_socket_getpeername(sock);
2117 if (err) {
6cb153ca 2118 fput_light(sock->file, fput_needed);
1da177e4
LT
2119 return err;
2120 }
2121
1ded5e5a 2122 err = ops->getname(sock, (struct sockaddr *)&address, 1);
9b2c45d4
DV
2123 if (err >= 0)
2124 /* "err" is actually length in this case */
2125 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 2126 usockaddr_len);
6cb153ca 2127 fput_light(sock->file, fput_needed);
1da177e4
LT
2128 }
2129 return err;
2130}
2131
b21c8f83
DB
2132SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
2133 int __user *, usockaddr_len)
2134{
2135 return __sys_getpeername(fd, usockaddr, usockaddr_len);
2136}
2137
1da177e4
LT
2138/*
2139 * Send a datagram to a given address. We move the address into kernel
2140 * space and check the user space data area is readable before invoking
2141 * the protocol.
2142 */
211b634b
DB
2143int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
2144 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
2145{
2146 struct socket *sock;
230b1839 2147 struct sockaddr_storage address;
1da177e4
LT
2148 int err;
2149 struct msghdr msg;
2150 struct iovec iov;
6cb153ca 2151 int fput_needed;
6cb153ca 2152
de4eda9d 2153 err = import_single_range(ITER_SOURCE, buff, len, &iov, &msg.msg_iter);
602bd0e9
AV
2154 if (unlikely(err))
2155 return err;
de0fa95c
PE
2156 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2157 if (!sock)
4387ff75 2158 goto out;
6cb153ca 2159
89bddce5 2160 msg.msg_name = NULL;
89bddce5
SH
2161 msg.msg_control = NULL;
2162 msg.msg_controllen = 0;
2163 msg.msg_namelen = 0;
7c701d92 2164 msg.msg_ubuf = NULL;
6cb153ca 2165 if (addr) {
43db362d 2166 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
2167 if (err < 0)
2168 goto out_put;
230b1839 2169 msg.msg_name = (struct sockaddr *)&address;
89bddce5 2170 msg.msg_namelen = addr_len;
1da177e4 2171 }
b841b901 2172 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
1da177e4
LT
2173 if (sock->file->f_flags & O_NONBLOCK)
2174 flags |= MSG_DONTWAIT;
2175 msg.msg_flags = flags;
d8725c86 2176 err = sock_sendmsg(sock, &msg);
1da177e4 2177
89bddce5 2178out_put:
de0fa95c 2179 fput_light(sock->file, fput_needed);
4387ff75 2180out:
1da177e4
LT
2181 return err;
2182}
2183
211b634b
DB
2184SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
2185 unsigned int, flags, struct sockaddr __user *, addr,
2186 int, addr_len)
2187{
2188 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
2189}
2190
1da177e4 2191/*
89bddce5 2192 * Send a datagram down a socket.
1da177e4
LT
2193 */
2194
3e0fa65f 2195SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 2196 unsigned int, flags)
1da177e4 2197{
211b634b 2198 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
2199}
2200
2201/*
89bddce5 2202 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
2203 * sender. We verify the buffers are writable and if needed move the
2204 * sender address from kernel to user space.
2205 */
7a09e1eb
DB
2206int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
2207 struct sockaddr __user *addr, int __user *addr_len)
1da177e4 2208{
1228b34c
ED
2209 struct sockaddr_storage address;
2210 struct msghdr msg = {
2211 /* Save some cycles and don't copy the address if not needed */
2212 .msg_name = addr ? (struct sockaddr *)&address : NULL,
2213 };
1da177e4
LT
2214 struct socket *sock;
2215 struct iovec iov;
89bddce5 2216 int err, err2;
6cb153ca
BL
2217 int fput_needed;
2218
de4eda9d 2219 err = import_single_range(ITER_DEST, ubuf, size, &iov, &msg.msg_iter);
602bd0e9
AV
2220 if (unlikely(err))
2221 return err;
de0fa95c 2222 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 2223 if (!sock)
de0fa95c 2224 goto out;
1da177e4 2225
1da177e4
LT
2226 if (sock->file->f_flags & O_NONBLOCK)
2227 flags |= MSG_DONTWAIT;
2da62906 2228 err = sock_recvmsg(sock, &msg, flags);
1da177e4 2229
89bddce5 2230 if (err >= 0 && addr != NULL) {
43db362d 2231 err2 = move_addr_to_user(&address,
230b1839 2232 msg.msg_namelen, addr, addr_len);
89bddce5
SH
2233 if (err2 < 0)
2234 err = err2;
1da177e4 2235 }
de0fa95c
PE
2236
2237 fput_light(sock->file, fput_needed);
4387ff75 2238out:
1da177e4
LT
2239 return err;
2240}
2241
7a09e1eb
DB
2242SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2243 unsigned int, flags, struct sockaddr __user *, addr,
2244 int __user *, addr_len)
2245{
2246 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2247}
2248
1da177e4 2249/*
89bddce5 2250 * Receive a datagram from a socket.
1da177e4
LT
2251 */
2252
b7c0ddf5
JG
2253SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2254 unsigned int, flags)
1da177e4 2255{
7a09e1eb 2256 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
2257}
2258
83f0c10b
FW
2259static bool sock_use_custom_sol_socket(const struct socket *sock)
2260{
a5ef058d 2261 return test_bit(SOCK_CUSTOM_SOCKOPT, &sock->flags);
83f0c10b
FW
2262}
2263
1da177e4
LT
2264/*
2265 * Set a socket option. Because we don't know the option lengths we have
2266 * to pass the user mode parameter for the protocols to sort out.
2267 */
a7b75c5a 2268int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
55db9c0e 2269 int optlen)
1da177e4 2270{
519a8a6c 2271 sockptr_t optval = USER_SOCKPTR(user_optval);
1ded5e5a 2272 const struct proto_ops *ops;
0d01da6a 2273 char *kernel_optval = NULL;
6cb153ca 2274 int err, fput_needed;
1da177e4
LT
2275 struct socket *sock;
2276
2277 if (optlen < 0)
2278 return -EINVAL;
89bddce5
SH
2279
2280 sock = sockfd_lookup_light(fd, &err, &fput_needed);
4a367299
CH
2281 if (!sock)
2282 return err;
1da177e4 2283
4a367299
CH
2284 err = security_socket_setsockopt(sock, level, optname);
2285 if (err)
2286 goto out_put;
0d01da6a 2287
55db9c0e
CH
2288 if (!in_compat_syscall())
2289 err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname,
a7b75c5a 2290 user_optval, &optlen,
55db9c0e 2291 &kernel_optval);
4a367299
CH
2292 if (err < 0)
2293 goto out_put;
2294 if (err > 0) {
2295 err = 0;
2296 goto out_put;
2297 }
0d01da6a 2298
a7b75c5a
CH
2299 if (kernel_optval)
2300 optval = KERNEL_SOCKPTR(kernel_optval);
1ded5e5a 2301 ops = READ_ONCE(sock->ops);
4a367299 2302 if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock))
a7b75c5a 2303 err = sock_setsockopt(sock, level, optname, optval, optlen);
1ded5e5a 2304 else if (unlikely(!ops->setsockopt))
a44d9e72 2305 err = -EOPNOTSUPP;
4a367299 2306 else
1ded5e5a 2307 err = ops->setsockopt(sock, level, optname, optval,
89bddce5 2308 optlen);
a7b75c5a 2309 kfree(kernel_optval);
4a367299
CH
2310out_put:
2311 fput_light(sock->file, fput_needed);
1da177e4
LT
2312 return err;
2313}
2314
cc36dca0
DB
2315SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2316 char __user *, optval, int, optlen)
2317{
2318 return __sys_setsockopt(fd, level, optname, optval, optlen);
2319}
2320
9cacf81f
SF
2321INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
2322 int optname));
2323
1da177e4
LT
2324/*
2325 * Get a socket option. Because we don't know the option lengths we have
2326 * to pass a user mode parameter for the protocols to sort out.
2327 */
55db9c0e
CH
2328int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
2329 int __user *optlen)
1da177e4 2330{
ad4bf5f2 2331 int max_optlen __maybe_unused;
1ded5e5a 2332 const struct proto_ops *ops;
6cb153ca 2333 int err, fput_needed;
1da177e4
LT
2334 struct socket *sock;
2335
89bddce5 2336 sock = sockfd_lookup_light(fd, &err, &fput_needed);
d8a9b38f
CH
2337 if (!sock)
2338 return err;
2339
2340 err = security_socket_getsockopt(sock, level, optname);
2341 if (err)
2342 goto out_put;
1da177e4 2343
55db9c0e
CH
2344 if (!in_compat_syscall())
2345 max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
0d01da6a 2346
1ded5e5a 2347 ops = READ_ONCE(sock->ops);
d8a9b38f
CH
2348 if (level == SOL_SOCKET)
2349 err = sock_getsockopt(sock, level, optname, optval, optlen);
1ded5e5a 2350 else if (unlikely(!ops->getsockopt))
a44d9e72 2351 err = -EOPNOTSUPP;
d8a9b38f 2352 else
1ded5e5a 2353 err = ops->getsockopt(sock, level, optname, optval,
89bddce5 2354 optlen);
0d01da6a 2355
55db9c0e
CH
2356 if (!in_compat_syscall())
2357 err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
2358 optval, optlen, max_optlen,
2359 err);
6cb153ca 2360out_put:
d8a9b38f 2361 fput_light(sock->file, fput_needed);
1da177e4
LT
2362 return err;
2363}
2364
13a2d70e
DB
2365SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2366 char __user *, optval, int __user *, optlen)
2367{
2368 return __sys_getsockopt(fd, level, optname, optval, optlen);
2369}
2370
1da177e4
LT
2371/*
2372 * Shutdown a socket.
2373 */
2374
b713c195
JA
2375int __sys_shutdown_sock(struct socket *sock, int how)
2376{
2377 int err;
2378
2379 err = security_socket_shutdown(sock, how);
2380 if (!err)
1ded5e5a 2381 err = READ_ONCE(sock->ops)->shutdown(sock, how);
b713c195
JA
2382
2383 return err;
2384}
2385
005a1aea 2386int __sys_shutdown(int fd, int how)
1da177e4 2387{
6cb153ca 2388 int err, fput_needed;
1da177e4
LT
2389 struct socket *sock;
2390
89bddce5
SH
2391 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2392 if (sock != NULL) {
b713c195 2393 err = __sys_shutdown_sock(sock, how);
6cb153ca 2394 fput_light(sock->file, fput_needed);
1da177e4
LT
2395 }
2396 return err;
2397}
2398
005a1aea
DB
2399SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2400{
2401 return __sys_shutdown(fd, how);
2402}
2403
89bddce5 2404/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2405 * fields which are the same type (int / unsigned) on our platforms.
2406 */
2407#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2408#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2409#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2410
c71d8ebe
TH
2411struct used_address {
2412 struct sockaddr_storage name;
2413 unsigned int name_len;
2414};
2415
7fa875b8
DY
2416int __copy_msghdr(struct msghdr *kmsg,
2417 struct user_msghdr *msg,
2418 struct sockaddr __user **save_addr)
1661bf36 2419{
08adb7da
AV
2420 ssize_t err;
2421
1f466e1f 2422 kmsg->msg_control_is_user = true;
1228b34c 2423 kmsg->msg_get_inq = 0;
7fa875b8
DY
2424 kmsg->msg_control_user = msg->msg_control;
2425 kmsg->msg_controllen = msg->msg_controllen;
2426 kmsg->msg_flags = msg->msg_flags;
ffb07550 2427
7fa875b8
DY
2428 kmsg->msg_namelen = msg->msg_namelen;
2429 if (!msg->msg_name)
6a2a2b3a
AS
2430 kmsg->msg_namelen = 0;
2431
dbb490b9
ML
2432 if (kmsg->msg_namelen < 0)
2433 return -EINVAL;
2434
1661bf36 2435 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2436 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2437
2438 if (save_addr)
7fa875b8 2439 *save_addr = msg->msg_name;
08adb7da 2440
7fa875b8 2441 if (msg->msg_name && kmsg->msg_namelen) {
08adb7da 2442 if (!save_addr) {
7fa875b8 2443 err = move_addr_to_kernel(msg->msg_name,
864d9664 2444 kmsg->msg_namelen,
08adb7da
AV
2445 kmsg->msg_name);
2446 if (err < 0)
2447 return err;
2448 }
2449 } else {
2450 kmsg->msg_name = NULL;
2451 kmsg->msg_namelen = 0;
2452 }
2453
7fa875b8 2454 if (msg->msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2455 return -EMSGSIZE;
2456
0345f931 2457 kmsg->msg_iocb = NULL;
7c701d92 2458 kmsg->msg_ubuf = NULL;
0a384abf
JA
2459 return 0;
2460}
2461
2462static int copy_msghdr_from_user(struct msghdr *kmsg,
2463 struct user_msghdr __user *umsg,
2464 struct sockaddr __user **save_addr,
2465 struct iovec **iov)
2466{
2467 struct user_msghdr msg;
2468 ssize_t err;
2469
7fa875b8
DY
2470 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
2471 return -EFAULT;
2472
2473 err = __copy_msghdr(kmsg, &msg, save_addr);
0a384abf
JA
2474 if (err)
2475 return err;
0345f931 2476
de4eda9d 2477 err = import_iovec(save_addr ? ITER_DEST : ITER_SOURCE,
ffb07550 2478 msg.msg_iov, msg.msg_iovlen,
da184284 2479 UIO_FASTIOV, iov, &kmsg->msg_iter);
87e5e6da 2480 return err < 0 ? err : 0;
1661bf36
DC
2481}
2482
4257c8ca
JA
2483static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
2484 unsigned int flags, struct used_address *used_address,
2485 unsigned int allowed_msghdr_flags)
1da177e4 2486{
b9d717a7 2487 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2488 __aligned(sizeof(__kernel_size_t));
89bddce5 2489 /* 20 is size of ipv6_pktinfo */
1da177e4 2490 unsigned char *ctl_buf = ctl;
d8725c86 2491 int ctl_len;
08adb7da 2492 ssize_t err;
89bddce5 2493
1da177e4
LT
2494 err = -ENOBUFS;
2495
228e548e 2496 if (msg_sys->msg_controllen > INT_MAX)
4257c8ca 2497 goto out;
28a94d8f 2498 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2499 ctl_len = msg_sys->msg_controllen;
1da177e4 2500 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2501 err =
228e548e 2502 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2503 sizeof(ctl));
1da177e4 2504 if (err)
4257c8ca 2505 goto out;
228e548e
AB
2506 ctl_buf = msg_sys->msg_control;
2507 ctl_len = msg_sys->msg_controllen;
1da177e4 2508 } else if (ctl_len) {
ac4340fc
DM
2509 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2510 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2511 if (ctl_len > sizeof(ctl)) {
1da177e4 2512 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2513 if (ctl_buf == NULL)
4257c8ca 2514 goto out;
1da177e4
LT
2515 }
2516 err = -EFAULT;
1f466e1f 2517 if (copy_from_user(ctl_buf, msg_sys->msg_control_user, ctl_len))
1da177e4 2518 goto out_freectl;
228e548e 2519 msg_sys->msg_control = ctl_buf;
1f466e1f 2520 msg_sys->msg_control_is_user = false;
1da177e4 2521 }
b841b901 2522 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
228e548e 2523 msg_sys->msg_flags = flags;
1da177e4
LT
2524
2525 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2526 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2527 /*
2528 * If this is sendmmsg() and current destination address is same as
2529 * previously succeeded address, omit asking LSM's decision.
2530 * used_address->name_len is initialized to UINT_MAX so that the first
2531 * destination address never matches.
2532 */
bc909d9d
MD
2533 if (used_address && msg_sys->msg_name &&
2534 used_address->name_len == msg_sys->msg_namelen &&
2535 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2536 used_address->name_len)) {
d8725c86 2537 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2538 goto out_freectl;
2539 }
d8725c86 2540 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2541 /*
2542 * If this is sendmmsg() and sending to current destination address was
2543 * successful, remember it.
2544 */
2545 if (used_address && err >= 0) {
2546 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2547 if (msg_sys->msg_name)
2548 memcpy(&used_address->name, msg_sys->msg_name,
2549 used_address->name_len);
c71d8ebe 2550 }
1da177e4
LT
2551
2552out_freectl:
89bddce5 2553 if (ctl_buf != ctl)
1da177e4 2554 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
4257c8ca
JA
2555out:
2556 return err;
2557}
2558
03b1230c
JA
2559int sendmsg_copy_msghdr(struct msghdr *msg,
2560 struct user_msghdr __user *umsg, unsigned flags,
2561 struct iovec **iov)
4257c8ca
JA
2562{
2563 int err;
2564
2565 if (flags & MSG_CMSG_COMPAT) {
2566 struct compat_msghdr __user *msg_compat;
2567
2568 msg_compat = (struct compat_msghdr __user *) umsg;
2569 err = get_compat_msghdr(msg, msg_compat, NULL, iov);
2570 } else {
2571 err = copy_msghdr_from_user(msg, umsg, NULL, iov);
2572 }
2573 if (err < 0)
2574 return err;
2575
2576 return 0;
2577}
2578
2579static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
2580 struct msghdr *msg_sys, unsigned int flags,
2581 struct used_address *used_address,
2582 unsigned int allowed_msghdr_flags)
2583{
2584 struct sockaddr_storage address;
2585 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2586 ssize_t err;
2587
2588 msg_sys->msg_name = &address;
2589
2590 err = sendmsg_copy_msghdr(msg_sys, msg, flags, &iov);
2591 if (err < 0)
2592 return err;
2593
2594 err = ____sys_sendmsg(sock, msg_sys, flags, used_address,
2595 allowed_msghdr_flags);
da184284 2596 kfree(iov);
228e548e
AB
2597 return err;
2598}
2599
2600/*
2601 * BSD sendmsg interface
2602 */
03b1230c 2603long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
0fa03c62
JA
2604 unsigned int flags)
2605{
03b1230c 2606 return ____sys_sendmsg(sock, msg, flags, NULL, 0);
0fa03c62 2607}
228e548e 2608
e1834a32
DB
2609long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2610 bool forbid_cmsg_compat)
228e548e
AB
2611{
2612 int fput_needed, err;
2613 struct msghdr msg_sys;
1be374a0
AL
2614 struct socket *sock;
2615
e1834a32
DB
2616 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2617 return -EINVAL;
2618
1be374a0 2619 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2620 if (!sock)
2621 goto out;
2622
28a94d8f 2623 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2624
6cb153ca 2625 fput_light(sock->file, fput_needed);
89bddce5 2626out:
1da177e4
LT
2627 return err;
2628}
2629
666547ff 2630SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2631{
e1834a32 2632 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2633}
2634
228e548e
AB
2635/*
2636 * Linux sendmmsg interface
2637 */
2638
2639int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2640 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2641{
2642 int fput_needed, err, datagrams;
2643 struct socket *sock;
2644 struct mmsghdr __user *entry;
2645 struct compat_mmsghdr __user *compat_entry;
2646 struct msghdr msg_sys;
c71d8ebe 2647 struct used_address used_address;
f092276d 2648 unsigned int oflags = flags;
228e548e 2649
e1834a32
DB
2650 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2651 return -EINVAL;
2652
98382f41
AB
2653 if (vlen > UIO_MAXIOV)
2654 vlen = UIO_MAXIOV;
228e548e
AB
2655
2656 datagrams = 0;
2657
2658 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2659 if (!sock)
2660 return err;
2661
c71d8ebe 2662 used_address.name_len = UINT_MAX;
228e548e
AB
2663 entry = mmsg;
2664 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2665 err = 0;
f092276d 2666 flags |= MSG_BATCH;
228e548e
AB
2667
2668 while (datagrams < vlen) {
f092276d
TH
2669 if (datagrams == vlen - 1)
2670 flags = oflags;
2671
228e548e 2672 if (MSG_CMSG_COMPAT & flags) {
666547ff 2673 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2674 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2675 if (err < 0)
2676 break;
2677 err = __put_user(err, &compat_entry->msg_len);
2678 ++compat_entry;
2679 } else {
a7526eb5 2680 err = ___sys_sendmsg(sock,
666547ff 2681 (struct user_msghdr __user *)entry,
28a94d8f 2682 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2683 if (err < 0)
2684 break;
2685 err = put_user(err, &entry->msg_len);
2686 ++entry;
2687 }
2688
2689 if (err)
2690 break;
2691 ++datagrams;
3023898b
SHY
2692 if (msg_data_left(&msg_sys))
2693 break;
a78cb84c 2694 cond_resched();
228e548e
AB
2695 }
2696
228e548e
AB
2697 fput_light(sock->file, fput_needed);
2698
728ffb86
AB
2699 /* We only return an error if no datagrams were able to be sent */
2700 if (datagrams != 0)
228e548e
AB
2701 return datagrams;
2702
228e548e
AB
2703 return err;
2704}
2705
2706SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2707 unsigned int, vlen, unsigned int, flags)
2708{
e1834a32 2709 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2710}
2711
03b1230c
JA
2712int recvmsg_copy_msghdr(struct msghdr *msg,
2713 struct user_msghdr __user *umsg, unsigned flags,
2714 struct sockaddr __user **uaddr,
2715 struct iovec **iov)
1da177e4 2716{
08adb7da 2717 ssize_t err;
1da177e4 2718
4257c8ca
JA
2719 if (MSG_CMSG_COMPAT & flags) {
2720 struct compat_msghdr __user *msg_compat;
1da177e4 2721
4257c8ca
JA
2722 msg_compat = (struct compat_msghdr __user *) umsg;
2723 err = get_compat_msghdr(msg, msg_compat, uaddr, iov);
2724 } else {
2725 err = copy_msghdr_from_user(msg, umsg, uaddr, iov);
2726 }
1da177e4 2727 if (err < 0)
da184284 2728 return err;
1da177e4 2729
4257c8ca
JA
2730 return 0;
2731}
2732
2733static int ____sys_recvmsg(struct socket *sock, struct msghdr *msg_sys,
2734 struct user_msghdr __user *msg,
2735 struct sockaddr __user *uaddr,
2736 unsigned int flags, int nosec)
2737{
2738 struct compat_msghdr __user *msg_compat =
2739 (struct compat_msghdr __user *) msg;
2740 int __user *uaddr_len = COMPAT_NAMELEN(msg);
2741 struct sockaddr_storage addr;
2742 unsigned long cmsg_ptr;
2743 int len;
2744 ssize_t err;
2745
2746 msg_sys->msg_name = &addr;
a2e27255
ACM
2747 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2748 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2749
f3d33426
HFS
2750 /* We assume all kernel code knows the size of sockaddr_storage */
2751 msg_sys->msg_namelen = 0;
2752
1da177e4
LT
2753 if (sock->file->f_flags & O_NONBLOCK)
2754 flags |= MSG_DONTWAIT;
1af66221
ED
2755
2756 if (unlikely(nosec))
2757 err = sock_recvmsg_nosec(sock, msg_sys, flags);
2758 else
2759 err = sock_recvmsg(sock, msg_sys, flags);
2760
1da177e4 2761 if (err < 0)
4257c8ca 2762 goto out;
1da177e4
LT
2763 len = err;
2764
2765 if (uaddr != NULL) {
43db362d 2766 err = move_addr_to_user(&addr,
a2e27255 2767 msg_sys->msg_namelen, uaddr,
89bddce5 2768 uaddr_len);
1da177e4 2769 if (err < 0)
4257c8ca 2770 goto out;
1da177e4 2771 }
a2e27255 2772 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2773 COMPAT_FLAGS(msg));
1da177e4 2774 if (err)
4257c8ca 2775 goto out;
1da177e4 2776 if (MSG_CMSG_COMPAT & flags)
a2e27255 2777 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2778 &msg_compat->msg_controllen);
2779 else
a2e27255 2780 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2781 &msg->msg_controllen);
2782 if (err)
4257c8ca 2783 goto out;
1da177e4 2784 err = len;
4257c8ca
JA
2785out:
2786 return err;
2787}
2788
2789static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
2790 struct msghdr *msg_sys, unsigned int flags, int nosec)
2791{
2792 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2793 /* user mode address pointers */
2794 struct sockaddr __user *uaddr;
2795 ssize_t err;
2796
2797 err = recvmsg_copy_msghdr(msg_sys, msg, flags, &uaddr, &iov);
2798 if (err < 0)
2799 return err;
1da177e4 2800
4257c8ca 2801 err = ____sys_recvmsg(sock, msg_sys, msg, uaddr, flags, nosec);
da184284 2802 kfree(iov);
a2e27255
ACM
2803 return err;
2804}
2805
2806/*
2807 * BSD recvmsg interface
2808 */
2809
03b1230c
JA
2810long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
2811 struct user_msghdr __user *umsg,
2812 struct sockaddr __user *uaddr, unsigned int flags)
aa1fa28f 2813{
03b1230c 2814 return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0);
aa1fa28f
JA
2815}
2816
e1834a32
DB
2817long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2818 bool forbid_cmsg_compat)
a2e27255
ACM
2819{
2820 int fput_needed, err;
2821 struct msghdr msg_sys;
1be374a0
AL
2822 struct socket *sock;
2823
e1834a32
DB
2824 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2825 return -EINVAL;
2826
1be374a0 2827 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2828 if (!sock)
2829 goto out;
2830
a7526eb5 2831 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2832
6cb153ca 2833 fput_light(sock->file, fput_needed);
1da177e4
LT
2834out:
2835 return err;
2836}
2837
666547ff 2838SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2839 unsigned int, flags)
2840{
e1834a32 2841 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2842}
2843
a2e27255
ACM
2844/*
2845 * Linux recvmmsg interface
2846 */
2847
e11d4284
AB
2848static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2849 unsigned int vlen, unsigned int flags,
2850 struct timespec64 *timeout)
a2e27255
ACM
2851{
2852 int fput_needed, err, datagrams;
2853 struct socket *sock;
2854 struct mmsghdr __user *entry;
d7256d0e 2855 struct compat_mmsghdr __user *compat_entry;
a2e27255 2856 struct msghdr msg_sys;
766b9f92
DD
2857 struct timespec64 end_time;
2858 struct timespec64 timeout64;
a2e27255
ACM
2859
2860 if (timeout &&
2861 poll_select_set_timeout(&end_time, timeout->tv_sec,
2862 timeout->tv_nsec))
2863 return -EINVAL;
2864
2865 datagrams = 0;
2866
2867 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2868 if (!sock)
2869 return err;
2870
7797dc41
SHY
2871 if (likely(!(flags & MSG_ERRQUEUE))) {
2872 err = sock_error(sock->sk);
2873 if (err) {
2874 datagrams = err;
2875 goto out_put;
2876 }
e623a9e9 2877 }
a2e27255
ACM
2878
2879 entry = mmsg;
d7256d0e 2880 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2881
2882 while (datagrams < vlen) {
2883 /*
2884 * No need to ask LSM for more than the first datagram.
2885 */
d7256d0e 2886 if (MSG_CMSG_COMPAT & flags) {
666547ff 2887 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2888 &msg_sys, flags & ~MSG_WAITFORONE,
2889 datagrams);
d7256d0e
JMG
2890 if (err < 0)
2891 break;
2892 err = __put_user(err, &compat_entry->msg_len);
2893 ++compat_entry;
2894 } else {
a7526eb5 2895 err = ___sys_recvmsg(sock,
666547ff 2896 (struct user_msghdr __user *)entry,
a7526eb5
AL
2897 &msg_sys, flags & ~MSG_WAITFORONE,
2898 datagrams);
d7256d0e
JMG
2899 if (err < 0)
2900 break;
2901 err = put_user(err, &entry->msg_len);
2902 ++entry;
2903 }
2904
a2e27255
ACM
2905 if (err)
2906 break;
a2e27255
ACM
2907 ++datagrams;
2908
71c5c159
BB
2909 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2910 if (flags & MSG_WAITFORONE)
2911 flags |= MSG_DONTWAIT;
2912
a2e27255 2913 if (timeout) {
766b9f92 2914 ktime_get_ts64(&timeout64);
c2e6c856 2915 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2916 if (timeout->tv_sec < 0) {
2917 timeout->tv_sec = timeout->tv_nsec = 0;
2918 break;
2919 }
2920
2921 /* Timeout, return less than vlen datagrams */
2922 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2923 break;
2924 }
2925
2926 /* Out of band data, return right away */
2927 if (msg_sys.msg_flags & MSG_OOB)
2928 break;
a78cb84c 2929 cond_resched();
a2e27255
ACM
2930 }
2931
a2e27255 2932 if (err == 0)
34b88a68
ACM
2933 goto out_put;
2934
2935 if (datagrams == 0) {
2936 datagrams = err;
2937 goto out_put;
2938 }
a2e27255 2939
34b88a68
ACM
2940 /*
2941 * We may return less entries than requested (vlen) if the
2942 * sock is non block and there aren't enough datagrams...
2943 */
2944 if (err != -EAGAIN) {
a2e27255 2945 /*
34b88a68
ACM
2946 * ... or if recvmsg returns an error after we
2947 * received some datagrams, where we record the
2948 * error to return on the next call or if the
2949 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2950 */
e05a5f51 2951 WRITE_ONCE(sock->sk->sk_err, -err);
a2e27255 2952 }
34b88a68
ACM
2953out_put:
2954 fput_light(sock->file, fput_needed);
a2e27255 2955
34b88a68 2956 return datagrams;
a2e27255
ACM
2957}
2958
e11d4284
AB
2959int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2960 unsigned int vlen, unsigned int flags,
2961 struct __kernel_timespec __user *timeout,
2962 struct old_timespec32 __user *timeout32)
a2e27255
ACM
2963{
2964 int datagrams;
c2e6c856 2965 struct timespec64 timeout_sys;
a2e27255 2966
e11d4284
AB
2967 if (timeout && get_timespec64(&timeout_sys, timeout))
2968 return -EFAULT;
a2e27255 2969
e11d4284 2970 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2971 return -EFAULT;
2972
e11d4284
AB
2973 if (!timeout && !timeout32)
2974 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2975
2976 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 2977
e11d4284
AB
2978 if (datagrams <= 0)
2979 return datagrams;
2980
2981 if (timeout && put_timespec64(&timeout_sys, timeout))
2982 datagrams = -EFAULT;
2983
2984 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2985 datagrams = -EFAULT;
2986
2987 return datagrams;
2988}
2989
1255e269
DB
2990SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2991 unsigned int, vlen, unsigned int, flags,
c2e6c856 2992 struct __kernel_timespec __user *, timeout)
1255e269 2993{
e11d4284
AB
2994 if (flags & MSG_CMSG_COMPAT)
2995 return -EINVAL;
2996
2997 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
2998}
2999
3000#ifdef CONFIG_COMPAT_32BIT_TIME
3001SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
3002 unsigned int, vlen, unsigned int, flags,
3003 struct old_timespec32 __user *, timeout)
3004{
3005 if (flags & MSG_CMSG_COMPAT)
3006 return -EINVAL;
3007
3008 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 3009}
e11d4284 3010#endif
1255e269 3011
a2e27255 3012#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
3013/* Argument list sizes for sys_socketcall */
3014#define AL(x) ((x) * sizeof(unsigned long))
228e548e 3015static const unsigned char nargs[21] = {
c6d409cf
ED
3016 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
3017 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
3018 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 3019 AL(4), AL(5), AL(4)
89bddce5
SH
3020};
3021
1da177e4
LT
3022#undef AL
3023
3024/*
89bddce5 3025 * System call vectors.
1da177e4
LT
3026 *
3027 * Argument checking cleaned up. Saved 20% in size.
3028 * This function doesn't need to set the kernel lock because
89bddce5 3029 * it is set by the callees.
1da177e4
LT
3030 */
3031
3e0fa65f 3032SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 3033{
2950fa9d 3034 unsigned long a[AUDITSC_ARGS];
89bddce5 3035 unsigned long a0, a1;
1da177e4 3036 int err;
47379052 3037 unsigned int len;
1da177e4 3038
228e548e 3039 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 3040 return -EINVAL;
c8e8cd57 3041 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 3042
47379052
AV
3043 len = nargs[call];
3044 if (len > sizeof(a))
3045 return -EINVAL;
3046
1da177e4 3047 /* copy_from_user should be SMP safe. */
47379052 3048 if (copy_from_user(a, args, len))
1da177e4 3049 return -EFAULT;
3ec3b2fb 3050
2950fa9d
CG
3051 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3052 if (err)
3053 return err;
3ec3b2fb 3054
89bddce5
SH
3055 a0 = a[0];
3056 a1 = a[1];
3057
3058 switch (call) {
3059 case SYS_SOCKET:
9d6a15c3 3060 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
3061 break;
3062 case SYS_BIND:
a87d35d8 3063 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
3064 break;
3065 case SYS_CONNECT:
1387c2c2 3066 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
3067 break;
3068 case SYS_LISTEN:
25e290ee 3069 err = __sys_listen(a0, a1);
89bddce5
SH
3070 break;
3071 case SYS_ACCEPT:
4541e805
DB
3072 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
3073 (int __user *)a[2], 0);
89bddce5
SH
3074 break;
3075 case SYS_GETSOCKNAME:
3076 err =
8882a107
DB
3077 __sys_getsockname(a0, (struct sockaddr __user *)a1,
3078 (int __user *)a[2]);
89bddce5
SH
3079 break;
3080 case SYS_GETPEERNAME:
3081 err =
b21c8f83
DB
3082 __sys_getpeername(a0, (struct sockaddr __user *)a1,
3083 (int __user *)a[2]);
89bddce5
SH
3084 break;
3085 case SYS_SOCKETPAIR:
6debc8d8 3086 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
3087 break;
3088 case SYS_SEND:
f3bf896b
DB
3089 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
3090 NULL, 0);
89bddce5
SH
3091 break;
3092 case SYS_SENDTO:
211b634b
DB
3093 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
3094 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
3095 break;
3096 case SYS_RECV:
d27e9afc
DB
3097 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
3098 NULL, NULL);
89bddce5
SH
3099 break;
3100 case SYS_RECVFROM:
7a09e1eb
DB
3101 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
3102 (struct sockaddr __user *)a[4],
3103 (int __user *)a[5]);
89bddce5
SH
3104 break;
3105 case SYS_SHUTDOWN:
005a1aea 3106 err = __sys_shutdown(a0, a1);
89bddce5
SH
3107 break;
3108 case SYS_SETSOCKOPT:
cc36dca0
DB
3109 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
3110 a[4]);
89bddce5
SH
3111 break;
3112 case SYS_GETSOCKOPT:
3113 err =
13a2d70e
DB
3114 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
3115 (int __user *)a[4]);
89bddce5
SH
3116 break;
3117 case SYS_SENDMSG:
e1834a32
DB
3118 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
3119 a[2], true);
89bddce5 3120 break;
228e548e 3121 case SYS_SENDMMSG:
e1834a32
DB
3122 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
3123 a[3], true);
228e548e 3124 break;
89bddce5 3125 case SYS_RECVMSG:
e1834a32
DB
3126 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
3127 a[2], true);
89bddce5 3128 break;
a2e27255 3129 case SYS_RECVMMSG:
3ca47e95 3130 if (IS_ENABLED(CONFIG_64BIT))
e11d4284
AB
3131 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
3132 a[2], a[3],
3133 (struct __kernel_timespec __user *)a[4],
3134 NULL);
3135 else
3136 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
3137 a[2], a[3], NULL,
3138 (struct old_timespec32 __user *)a[4]);
a2e27255 3139 break;
de11defe 3140 case SYS_ACCEPT4:
4541e805
DB
3141 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
3142 (int __user *)a[2], a[3]);
aaca0bdc 3143 break;
89bddce5
SH
3144 default:
3145 err = -EINVAL;
3146 break;
1da177e4
LT
3147 }
3148 return err;
3149}
3150
89bddce5 3151#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 3152
55737fda
SH
3153/**
3154 * sock_register - add a socket protocol handler
3155 * @ops: description of protocol
3156 *
1da177e4
LT
3157 * This function is called by a protocol handler that wants to
3158 * advertise its address family, and have it linked into the
e793c0f7 3159 * socket interface. The value ops->family corresponds to the
55737fda 3160 * socket system call protocol family.
1da177e4 3161 */
f0fd27d4 3162int sock_register(const struct net_proto_family *ops)
1da177e4
LT
3163{
3164 int err;
3165
3166 if (ops->family >= NPROTO) {
3410f22e 3167 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
3168 return -ENOBUFS;
3169 }
55737fda
SH
3170
3171 spin_lock(&net_family_lock);
190683a9
ED
3172 if (rcu_dereference_protected(net_families[ops->family],
3173 lockdep_is_held(&net_family_lock)))
55737fda
SH
3174 err = -EEXIST;
3175 else {
cf778b00 3176 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
3177 err = 0;
3178 }
55737fda
SH
3179 spin_unlock(&net_family_lock);
3180
fe0bdbde 3181 pr_info("NET: Registered %s protocol family\n", pf_family_names[ops->family]);
1da177e4
LT
3182 return err;
3183}
c6d409cf 3184EXPORT_SYMBOL(sock_register);
1da177e4 3185
55737fda
SH
3186/**
3187 * sock_unregister - remove a protocol handler
3188 * @family: protocol family to remove
3189 *
1da177e4
LT
3190 * This function is called by a protocol handler that wants to
3191 * remove its address family, and have it unlinked from the
55737fda
SH
3192 * new socket creation.
3193 *
3194 * If protocol handler is a module, then it can use module reference
3195 * counts to protect against new references. If protocol handler is not
3196 * a module then it needs to provide its own protection in
3197 * the ops->create routine.
1da177e4 3198 */
f0fd27d4 3199void sock_unregister(int family)
1da177e4 3200{
f0fd27d4 3201 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 3202
55737fda 3203 spin_lock(&net_family_lock);
a9b3cd7f 3204 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
3205 spin_unlock(&net_family_lock);
3206
3207 synchronize_rcu();
3208
fe0bdbde 3209 pr_info("NET: Unregistered %s protocol family\n", pf_family_names[family]);
1da177e4 3210}
c6d409cf 3211EXPORT_SYMBOL(sock_unregister);
1da177e4 3212
bf2ae2e4
XL
3213bool sock_is_registered(int family)
3214{
66b51b0a 3215 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
3216}
3217
77d76ea3 3218static int __init sock_init(void)
1da177e4 3219{
b3e19d92 3220 int err;
2ca794e5
EB
3221 /*
3222 * Initialize the network sysctl infrastructure.
3223 */
3224 err = net_sysctl_init();
3225 if (err)
3226 goto out;
b3e19d92 3227
1da177e4 3228 /*
89bddce5 3229 * Initialize skbuff SLAB cache
1da177e4
LT
3230 */
3231 skb_init();
1da177e4
LT
3232
3233 /*
89bddce5 3234 * Initialize the protocols module.
1da177e4
LT
3235 */
3236
3237 init_inodecache();
b3e19d92
NP
3238
3239 err = register_filesystem(&sock_fs_type);
3240 if (err)
47260ba9 3241 goto out;
1da177e4 3242 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
3243 if (IS_ERR(sock_mnt)) {
3244 err = PTR_ERR(sock_mnt);
3245 goto out_mount;
3246 }
77d76ea3
AK
3247
3248 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
3249 */
3250
3251#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
3252 err = netfilter_init();
3253 if (err)
3254 goto out;
1da177e4 3255#endif
cbeb321a 3256
408eccce 3257 ptp_classifier_init();
c1f19b51 3258
b3e19d92
NP
3259out:
3260 return err;
3261
3262out_mount:
3263 unregister_filesystem(&sock_fs_type);
b3e19d92 3264 goto out;
1da177e4
LT
3265}
3266
77d76ea3
AK
3267core_initcall(sock_init); /* early initcall */
3268
1da177e4
LT
3269#ifdef CONFIG_PROC_FS
3270void socket_seq_show(struct seq_file *seq)
3271{
648845ab
TZ
3272 seq_printf(seq, "sockets: used %d\n",
3273 sock_inuse_get(seq->private));
1da177e4 3274}
89bddce5 3275#endif /* CONFIG_PROC_FS */
1da177e4 3276
29c49648
AB
3277/* Handle the fact that while struct ifreq has the same *layout* on
3278 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3279 * which are handled elsewhere, it still has different *size* due to
3280 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3281 * resulting in struct ifreq being 32 and 40 bytes respectively).
3282 * As a result, if the struct happens to be at the end of a page and
3283 * the next page isn't readable/writable, we get a fault. To prevent
3284 * that, copy back and forth to the full size.
3285 */
3286int get_user_ifreq(struct ifreq *ifr, void __user **ifrdata, void __user *arg)
7a229387 3287{
29c49648
AB
3288 if (in_compat_syscall()) {
3289 struct compat_ifreq *ifr32 = (struct compat_ifreq *)ifr;
7a229387 3290
29c49648
AB
3291 memset(ifr, 0, sizeof(*ifr));
3292 if (copy_from_user(ifr32, arg, sizeof(*ifr32)))
3293 return -EFAULT;
7a229387 3294
29c49648
AB
3295 if (ifrdata)
3296 *ifrdata = compat_ptr(ifr32->ifr_data);
7a229387 3297
29c49648
AB
3298 return 0;
3299 }
7a229387 3300
29c49648 3301 if (copy_from_user(ifr, arg, sizeof(*ifr)))
7a229387
AB
3302 return -EFAULT;
3303
29c49648
AB
3304 if (ifrdata)
3305 *ifrdata = ifr->ifr_data;
3306
7a229387
AB
3307 return 0;
3308}
29c49648 3309EXPORT_SYMBOL(get_user_ifreq);
7a229387 3310
29c49648 3311int put_user_ifreq(struct ifreq *ifr, void __user *arg)
7a229387 3312{
29c49648 3313 size_t size = sizeof(*ifr);
7a229387 3314
29c49648
AB
3315 if (in_compat_syscall())
3316 size = sizeof(struct compat_ifreq);
7a229387 3317
29c49648 3318 if (copy_to_user(arg, ifr, size))
7a229387
AB
3319 return -EFAULT;
3320
3a7da39d 3321 return 0;
7a229387 3322}
29c49648 3323EXPORT_SYMBOL(put_user_ifreq);
7a229387 3324
89bbfc95 3325#ifdef CONFIG_COMPAT
7a50a240
AB
3326static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3327{
7a50a240 3328 compat_uptr_t uptr32;
44c02a2c
AV
3329 struct ifreq ifr;
3330 void __user *saved;
3331 int err;
7a50a240 3332
29c49648 3333 if (get_user_ifreq(&ifr, NULL, uifr32))
7a50a240
AB
3334 return -EFAULT;
3335
3336 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3337 return -EFAULT;
3338
44c02a2c
AV
3339 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3340 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 3341
a554bf96 3342 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL, NULL);
44c02a2c
AV
3343 if (!err) {
3344 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
29c49648 3345 if (put_user_ifreq(&ifr, uifr32))
44c02a2c 3346 err = -EFAULT;
ccbd6a5a 3347 }
44c02a2c 3348 return err;
7a229387
AB
3349}
3350
590d4693
BH
3351/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3352static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3353 struct compat_ifreq __user *u_ifreq32)
7a229387 3354{
44c02a2c 3355 struct ifreq ifreq;
a554bf96 3356 void __user *data;
7a229387 3357
d0efb162
PC
3358 if (!is_socket_ioctl_cmd(cmd))
3359 return -ENOTTY;
a554bf96 3360 if (get_user_ifreq(&ifreq, &data, u_ifreq32))
7a229387 3361 return -EFAULT;
a554bf96 3362 ifreq.ifr_data = data;
7a229387 3363
a554bf96 3364 return dev_ioctl(net, cmd, &ifreq, data, NULL);
a2116ed2
AB
3365}
3366
6b96018b
AB
3367static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3368 unsigned int cmd, unsigned long arg)
3369{
3370 void __user *argp = compat_ptr(arg);
3371 struct sock *sk = sock->sk;
3372 struct net *net = sock_net(sk);
1ded5e5a 3373 const struct proto_ops *ops;
7a229387 3374
6b96018b 3375 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
88fc023f 3376 return sock_ioctl(file, cmd, (unsigned long)argp);
6b96018b
AB
3377
3378 switch (cmd) {
7a50a240
AB
3379 case SIOCWANDEV:
3380 return compat_siocwandev(net, argp);
0768e170
AB
3381 case SIOCGSTAMP_OLD:
3382 case SIOCGSTAMPNS_OLD:
1ded5e5a
ED
3383 ops = READ_ONCE(sock->ops);
3384 if (!ops->gettstamp)
c7cbdbf2 3385 return -ENOIOCTLCMD;
1ded5e5a
ED
3386 return ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
3387 !COMPAT_USE_64BIT_TIME);
c7cbdbf2 3388
dd98d289 3389 case SIOCETHTOOL:
590d4693
BH
3390 case SIOCBONDSLAVEINFOQUERY:
3391 case SIOCBONDINFOQUERY:
a2116ed2 3392 case SIOCSHWTSTAMP:
fd468c74 3393 case SIOCGHWTSTAMP:
590d4693 3394 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3395
3396 case FIOSETOWN:
3397 case SIOCSPGRP:
3398 case FIOGETOWN:
3399 case SIOCGPGRP:
3400 case SIOCBRADDBR:
3401 case SIOCBRDELBR:
3402 case SIOCGIFVLAN:
3403 case SIOCSIFVLAN:
c62cce2c 3404 case SIOCGSKNS:
0768e170
AB
3405 case SIOCGSTAMP_NEW:
3406 case SIOCGSTAMPNS_NEW:
876f0bf9 3407 case SIOCGIFCONF:
fd3a4590
RP
3408 case SIOCSIFBR:
3409 case SIOCGIFBR:
6b96018b
AB
3410 return sock_ioctl(file, cmd, arg);
3411
3412 case SIOCGIFFLAGS:
3413 case SIOCSIFFLAGS:
709566d7
AB
3414 case SIOCGIFMAP:
3415 case SIOCSIFMAP:
6b96018b
AB
3416 case SIOCGIFMETRIC:
3417 case SIOCSIFMETRIC:
3418 case SIOCGIFMTU:
3419 case SIOCSIFMTU:
3420 case SIOCGIFMEM:
3421 case SIOCSIFMEM:
3422 case SIOCGIFHWADDR:
3423 case SIOCSIFHWADDR:
3424 case SIOCADDMULTI:
3425 case SIOCDELMULTI:
3426 case SIOCGIFINDEX:
6b96018b
AB
3427 case SIOCGIFADDR:
3428 case SIOCSIFADDR:
3429 case SIOCSIFHWBROADCAST:
6b96018b 3430 case SIOCDIFADDR:
6b96018b
AB
3431 case SIOCGIFBRDADDR:
3432 case SIOCSIFBRDADDR:
3433 case SIOCGIFDSTADDR:
3434 case SIOCSIFDSTADDR:
3435 case SIOCGIFNETMASK:
3436 case SIOCSIFNETMASK:
3437 case SIOCSIFPFLAGS:
3438 case SIOCGIFPFLAGS:
3439 case SIOCGIFTXQLEN:
3440 case SIOCSIFTXQLEN:
3441 case SIOCBRADDIF:
3442 case SIOCBRDELIF:
c6c9fee3 3443 case SIOCGIFNAME:
9177efd3
AB
3444 case SIOCSIFNAME:
3445 case SIOCGMIIPHY:
3446 case SIOCGMIIREG:
3447 case SIOCSMIIREG:
f92d4fc9
AV
3448 case SIOCBONDENSLAVE:
3449 case SIOCBONDRELEASE:
3450 case SIOCBONDSETHWADDR:
3451 case SIOCBONDCHANGEACTIVE:
6b96018b
AB
3452 case SIOCSARP:
3453 case SIOCGARP:
3454 case SIOCDARP:
c7dc504e 3455 case SIOCOUTQ:
9d7bf41f 3456 case SIOCOUTQNSD:
6b96018b 3457 case SIOCATMARK:
63ff03ab 3458 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3459 }
3460
6b96018b
AB
3461 return -ENOIOCTLCMD;
3462}
7a229387 3463
95c96174 3464static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3465 unsigned long arg)
89bbfc95
SP
3466{
3467 struct socket *sock = file->private_data;
1ded5e5a 3468 const struct proto_ops *ops = READ_ONCE(sock->ops);
89bbfc95 3469 int ret = -ENOIOCTLCMD;
87de87d5
DM
3470 struct sock *sk;
3471 struct net *net;
3472
3473 sk = sock->sk;
3474 net = sock_net(sk);
89bbfc95 3475
1ded5e5a
ED
3476 if (ops->compat_ioctl)
3477 ret = ops->compat_ioctl(sock, cmd, arg);
89bbfc95 3478
87de87d5
DM
3479 if (ret == -ENOIOCTLCMD &&
3480 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3481 ret = compat_wext_handle_ioctl(net, cmd, arg);
3482
6b96018b
AB
3483 if (ret == -ENOIOCTLCMD)
3484 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3485
89bbfc95
SP
3486 return ret;
3487}
3488#endif
3489
8a3c245c
PT
3490/**
3491 * kernel_bind - bind an address to a socket (kernel space)
3492 * @sock: socket
3493 * @addr: address
3494 * @addrlen: length of address
3495 *
3496 * Returns 0 or an error.
3497 */
3498
ac5a488e
SS
3499int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3500{
1ded5e5a 3501 return READ_ONCE(sock->ops)->bind(sock, addr, addrlen);
ac5a488e 3502}
c6d409cf 3503EXPORT_SYMBOL(kernel_bind);
ac5a488e 3504
8a3c245c
PT
3505/**
3506 * kernel_listen - move socket to listening state (kernel space)
3507 * @sock: socket
3508 * @backlog: pending connections queue size
3509 *
3510 * Returns 0 or an error.
3511 */
3512
ac5a488e
SS
3513int kernel_listen(struct socket *sock, int backlog)
3514{
1ded5e5a 3515 return READ_ONCE(sock->ops)->listen(sock, backlog);
ac5a488e 3516}
c6d409cf 3517EXPORT_SYMBOL(kernel_listen);
ac5a488e 3518
8a3c245c
PT
3519/**
3520 * kernel_accept - accept a connection (kernel space)
3521 * @sock: listening socket
3522 * @newsock: new connected socket
3523 * @flags: flags
3524 *
3525 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3526 * If it fails, @newsock is guaranteed to be %NULL.
3527 * Returns 0 or an error.
3528 */
3529
ac5a488e
SS
3530int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3531{
3532 struct sock *sk = sock->sk;
1ded5e5a 3533 const struct proto_ops *ops = READ_ONCE(sock->ops);
ac5a488e
SS
3534 int err;
3535
3536 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3537 newsock);
3538 if (err < 0)
3539 goto done;
3540
1ded5e5a 3541 err = ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3542 if (err < 0) {
3543 sock_release(*newsock);
fa8705b0 3544 *newsock = NULL;
ac5a488e
SS
3545 goto done;
3546 }
3547
1ded5e5a
ED
3548 (*newsock)->ops = ops;
3549 __module_get(ops->owner);
ac5a488e
SS
3550
3551done:
3552 return err;
3553}
c6d409cf 3554EXPORT_SYMBOL(kernel_accept);
ac5a488e 3555
8a3c245c
PT
3556/**
3557 * kernel_connect - connect a socket (kernel space)
3558 * @sock: socket
3559 * @addr: address
3560 * @addrlen: address length
3561 * @flags: flags (O_NONBLOCK, ...)
3562 *
f1dcffcc 3563 * For datagram sockets, @addr is the address to which datagrams are sent
8a3c245c
PT
3564 * by default, and the only address from which datagrams are received.
3565 * For stream sockets, attempts to connect to @addr.
3566 * Returns 0 or an error code.
3567 */
3568
ac5a488e 3569int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3570 int flags)
ac5a488e 3571{
0bdf3993
JR
3572 struct sockaddr_storage address;
3573
3574 memcpy(&address, addr, addrlen);
3575
3576 return READ_ONCE(sock->ops)->connect(sock, (struct sockaddr *)&address,
3577 addrlen, flags);
ac5a488e 3578}
c6d409cf 3579EXPORT_SYMBOL(kernel_connect);
ac5a488e 3580
8a3c245c
PT
3581/**
3582 * kernel_getsockname - get the address which the socket is bound (kernel space)
3583 * @sock: socket
3584 * @addr: address holder
3585 *
3586 * Fills the @addr pointer with the address which the socket is bound.
0fc95dec 3587 * Returns the length of the address in bytes or an error code.
8a3c245c
PT
3588 */
3589
9b2c45d4 3590int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3591{
1ded5e5a 3592 return READ_ONCE(sock->ops)->getname(sock, addr, 0);
ac5a488e 3593}
c6d409cf 3594EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3595
8a3c245c 3596/**
645f0897 3597 * kernel_getpeername - get the address which the socket is connected (kernel space)
8a3c245c
PT
3598 * @sock: socket
3599 * @addr: address holder
3600 *
3601 * Fills the @addr pointer with the address which the socket is connected.
0fc95dec 3602 * Returns the length of the address in bytes or an error code.
8a3c245c
PT
3603 */
3604
9b2c45d4 3605int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3606{
1ded5e5a 3607 return READ_ONCE(sock->ops)->getname(sock, addr, 1);
ac5a488e 3608}
c6d409cf 3609EXPORT_SYMBOL(kernel_getpeername);
ac5a488e 3610
8a3c245c 3611/**
645f0897 3612 * kernel_sock_shutdown - shut down part of a full-duplex connection (kernel space)
8a3c245c
PT
3613 * @sock: socket
3614 * @how: connection part
3615 *
3616 * Returns 0 or an error.
3617 */
3618
91cf45f0
TM
3619int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3620{
1ded5e5a 3621 return READ_ONCE(sock->ops)->shutdown(sock, how);
91cf45f0 3622}
91cf45f0 3623EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075 3624
8a3c245c
PT
3625/**
3626 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3627 * @sk: socket
3628 *
3629 * This routine returns the IP overhead imposed by a socket i.e.
3630 * the length of the underlying IP header, depending on whether
3631 * this is an IPv4 or IPv6 socket and the length from IP options turned
3632 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075 3633 */
8a3c245c 3634
113c3075
P
3635u32 kernel_sock_ip_overhead(struct sock *sk)
3636{
3637 struct inet_sock *inet;
3638 struct ip_options_rcu *opt;
3639 u32 overhead = 0;
113c3075
P
3640#if IS_ENABLED(CONFIG_IPV6)
3641 struct ipv6_pinfo *np;
3642 struct ipv6_txoptions *optv6 = NULL;
3643#endif /* IS_ENABLED(CONFIG_IPV6) */
3644
3645 if (!sk)
3646 return overhead;
3647
113c3075
P
3648 switch (sk->sk_family) {
3649 case AF_INET:
3650 inet = inet_sk(sk);
3651 overhead += sizeof(struct iphdr);
3652 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3653 sock_owned_by_user(sk));
113c3075
P
3654 if (opt)
3655 overhead += opt->opt.optlen;
3656 return overhead;
3657#if IS_ENABLED(CONFIG_IPV6)
3658 case AF_INET6:
3659 np = inet6_sk(sk);
3660 overhead += sizeof(struct ipv6hdr);
3661 if (np)
3662 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3663 sock_owned_by_user(sk));
113c3075
P
3664 if (optv6)
3665 overhead += (optv6->opt_flen + optv6->opt_nflen);
3666 return overhead;
3667#endif /* IS_ENABLED(CONFIG_IPV6) */
3668 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3669 return overhead;
3670 }
3671}
3672EXPORT_SYMBOL(kernel_sock_ip_overhead);