fs: port vfs_*() helpers to struct mnt_idmap
[linux-block.git] / net / socket.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * NET An implementation of the SOCKET network access protocol.
4 *
5 * Version: @(#)socket.c 1.1.93 18/02/95
6 *
7 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 8 * Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 *
11 * Fixes:
12 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
13 * shutdown()
14 * Alan Cox : verify_area() fixes
15 * Alan Cox : Removed DDI
16 * Jonathan Kamens : SOCK_DGRAM reconnect bug
17 * Alan Cox : Moved a load of checks to the very
18 * top level.
19 * Alan Cox : Move address structures to/from user
20 * mode above the protocol layers.
21 * Rob Janssen : Allow 0 length sends.
22 * Alan Cox : Asynchronous I/O support (cribbed from the
23 * tty drivers).
24 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
25 * Jeff Uphoff : Made max number of sockets command-line
26 * configurable.
27 * Matti Aarnio : Made the number of sockets dynamic,
28 * to be allocated when needed, and mr.
29 * Uphoff's max is used as max to be
30 * allowed to allocate.
31 * Linus : Argh. removed all the socket allocation
32 * altogether: it's in the inode now.
33 * Alan Cox : Made sock_alloc()/sock_release() public
34 * for NetROM and future kernel nfsd type
35 * stuff.
36 * Alan Cox : sendmsg/recvmsg basics.
37 * Tom Dyas : Export net symbols.
38 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
39 * Alan Cox : Added thread locking to sys_* calls
40 * for sockets. May have errors at the
41 * moment.
42 * Kevin Buhr : Fixed the dumb errors in the above.
43 * Andi Kleen : Some small cleanups, optimizations,
44 * and fixed a copy_from_user() bug.
45 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 46 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
47 * protocol-independent
48 *
1da177e4 49 * This module is effectively the top level interface to the BSD socket
89bddce5 50 * paradigm.
1da177e4
LT
51 *
52 * Based upon Swansea University Computer Society NET3.039
53 */
54
aef2feda 55#include <linux/bpf-cgroup.h>
cc69837f 56#include <linux/ethtool.h>
1da177e4 57#include <linux/mm.h>
1da177e4
LT
58#include <linux/socket.h>
59#include <linux/file.h>
60#include <linux/net.h>
61#include <linux/interrupt.h>
aaca0bdc 62#include <linux/thread_info.h>
55737fda 63#include <linux/rcupdate.h>
1da177e4
LT
64#include <linux/netdevice.h>
65#include <linux/proc_fs.h>
66#include <linux/seq_file.h>
4a3e2f71 67#include <linux/mutex.h>
1da177e4 68#include <linux/if_bridge.h>
20380731 69#include <linux/if_vlan.h>
408eccce 70#include <linux/ptp_classify.h>
1da177e4
LT
71#include <linux/init.h>
72#include <linux/poll.h>
73#include <linux/cache.h>
74#include <linux/module.h>
75#include <linux/highmem.h>
1da177e4 76#include <linux/mount.h>
fba9be49 77#include <linux/pseudo_fs.h>
1da177e4
LT
78#include <linux/security.h>
79#include <linux/syscalls.h>
80#include <linux/compat.h>
81#include <linux/kmod.h>
3ec3b2fb 82#include <linux/audit.h>
d86b5e0e 83#include <linux/wireless.h>
1b8d7ae4 84#include <linux/nsproxy.h>
1fd7317d 85#include <linux/magic.h>
5a0e3ad6 86#include <linux/slab.h>
600e1779 87#include <linux/xattr.h>
c8e8cd57 88#include <linux/nospec.h>
8c3c447b 89#include <linux/indirect_call_wrapper.h>
1da177e4 90
7c0f6ba6 91#include <linux/uaccess.h>
1da177e4
LT
92#include <asm/unistd.h>
93
94#include <net/compat.h>
87de87d5 95#include <net/wext.h>
f8451725 96#include <net/cls_cgroup.h>
1da177e4
LT
97
98#include <net/sock.h>
99#include <linux/netfilter.h>
100
6b96018b
AB
101#include <linux/if_tun.h>
102#include <linux/ipv6_route.h>
103#include <linux/route.h>
c7dc504e 104#include <linux/termios.h>
6b96018b 105#include <linux/sockios.h>
076bb0c8 106#include <net/busy_poll.h>
f24b9be5 107#include <linux/errqueue.h>
d7c08826 108#include <linux/ptp_clock_kernel.h>
06021292 109
e0d1095a 110#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
111unsigned int sysctl_net_busy_read __read_mostly;
112unsigned int sysctl_net_busy_poll __read_mostly;
06021292 113#endif
6b96018b 114
8ae5e030
AV
115static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
116static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 117static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
118
119static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
120static __poll_t sock_poll(struct file *file,
121 struct poll_table_struct *wait);
89bddce5 122static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
123#ifdef CONFIG_COMPAT
124static long compat_sock_ioctl(struct file *file,
89bddce5 125 unsigned int cmd, unsigned long arg);
89bbfc95 126#endif
1da177e4 127static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
128static ssize_t sock_sendpage(struct file *file, struct page *page,
129 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 130static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 131 struct pipe_inode_info *pipe, size_t len,
9c55e01c 132 unsigned int flags);
542d3065
AB
133
134#ifdef CONFIG_PROC_FS
135static void sock_show_fdinfo(struct seq_file *m, struct file *f)
136{
137 struct socket *sock = f->private_data;
138
139 if (sock->ops->show_fdinfo)
140 sock->ops->show_fdinfo(m, sock);
141}
142#else
143#define sock_show_fdinfo NULL
144#endif
1da177e4 145
1da177e4
LT
146/*
147 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
148 * in the operation structures but are done directly via the socketcall() multiplexor.
149 */
150
da7071d7 151static const struct file_operations socket_file_ops = {
1da177e4
LT
152 .owner = THIS_MODULE,
153 .llseek = no_llseek,
8ae5e030
AV
154 .read_iter = sock_read_iter,
155 .write_iter = sock_write_iter,
1da177e4
LT
156 .poll = sock_poll,
157 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
158#ifdef CONFIG_COMPAT
159 .compat_ioctl = compat_sock_ioctl,
160#endif
1da177e4 161 .mmap = sock_mmap,
1da177e4
LT
162 .release = sock_close,
163 .fasync = sock_fasync,
5274f052
JA
164 .sendpage = sock_sendpage,
165 .splice_write = generic_splice_sendpage,
9c55e01c 166 .splice_read = sock_splice_read,
b4653342 167 .show_fdinfo = sock_show_fdinfo,
1da177e4
LT
168};
169
fe0bdbde
YD
170static const char * const pf_family_names[] = {
171 [PF_UNSPEC] = "PF_UNSPEC",
172 [PF_UNIX] = "PF_UNIX/PF_LOCAL",
173 [PF_INET] = "PF_INET",
174 [PF_AX25] = "PF_AX25",
175 [PF_IPX] = "PF_IPX",
176 [PF_APPLETALK] = "PF_APPLETALK",
177 [PF_NETROM] = "PF_NETROM",
178 [PF_BRIDGE] = "PF_BRIDGE",
179 [PF_ATMPVC] = "PF_ATMPVC",
180 [PF_X25] = "PF_X25",
181 [PF_INET6] = "PF_INET6",
182 [PF_ROSE] = "PF_ROSE",
183 [PF_DECnet] = "PF_DECnet",
184 [PF_NETBEUI] = "PF_NETBEUI",
185 [PF_SECURITY] = "PF_SECURITY",
186 [PF_KEY] = "PF_KEY",
187 [PF_NETLINK] = "PF_NETLINK/PF_ROUTE",
188 [PF_PACKET] = "PF_PACKET",
189 [PF_ASH] = "PF_ASH",
190 [PF_ECONET] = "PF_ECONET",
191 [PF_ATMSVC] = "PF_ATMSVC",
192 [PF_RDS] = "PF_RDS",
193 [PF_SNA] = "PF_SNA",
194 [PF_IRDA] = "PF_IRDA",
195 [PF_PPPOX] = "PF_PPPOX",
196 [PF_WANPIPE] = "PF_WANPIPE",
197 [PF_LLC] = "PF_LLC",
198 [PF_IB] = "PF_IB",
199 [PF_MPLS] = "PF_MPLS",
200 [PF_CAN] = "PF_CAN",
201 [PF_TIPC] = "PF_TIPC",
202 [PF_BLUETOOTH] = "PF_BLUETOOTH",
203 [PF_IUCV] = "PF_IUCV",
204 [PF_RXRPC] = "PF_RXRPC",
205 [PF_ISDN] = "PF_ISDN",
206 [PF_PHONET] = "PF_PHONET",
207 [PF_IEEE802154] = "PF_IEEE802154",
208 [PF_CAIF] = "PF_CAIF",
209 [PF_ALG] = "PF_ALG",
210 [PF_NFC] = "PF_NFC",
211 [PF_VSOCK] = "PF_VSOCK",
212 [PF_KCM] = "PF_KCM",
213 [PF_QIPCRTR] = "PF_QIPCRTR",
214 [PF_SMC] = "PF_SMC",
215 [PF_XDP] = "PF_XDP",
bc49d816 216 [PF_MCTP] = "PF_MCTP",
fe0bdbde
YD
217};
218
1da177e4
LT
219/*
220 * The protocol list. Each protocol is registered in here.
221 */
222
1da177e4 223static DEFINE_SPINLOCK(net_family_lock);
190683a9 224static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 225
1da177e4 226/*
89bddce5
SH
227 * Support routines.
228 * Move socket addresses back and forth across the kernel/user
229 * divide and look after the messy bits.
1da177e4
LT
230 */
231
1da177e4
LT
232/**
233 * move_addr_to_kernel - copy a socket address into kernel space
234 * @uaddr: Address in user space
235 * @kaddr: Address in kernel space
236 * @ulen: Length in user space
237 *
238 * The address is copied into kernel space. If the provided address is
239 * too long an error code of -EINVAL is returned. If the copy gives
240 * invalid addresses -EFAULT is returned. On a success 0 is returned.
241 */
242
43db362d 243int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 244{
230b1839 245 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 246 return -EINVAL;
89bddce5 247 if (ulen == 0)
1da177e4 248 return 0;
89bddce5 249 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 250 return -EFAULT;
3ec3b2fb 251 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
252}
253
254/**
255 * move_addr_to_user - copy an address to user space
256 * @kaddr: kernel space address
257 * @klen: length of address in kernel
258 * @uaddr: user space address
259 * @ulen: pointer to user length field
260 *
261 * The value pointed to by ulen on entry is the buffer length available.
262 * This is overwritten with the buffer space used. -EINVAL is returned
263 * if an overlong buffer is specified or a negative buffer size. -EFAULT
264 * is returned if either the buffer or the length field are not
265 * accessible.
266 * After copying the data up to the limit the user specifies, the true
267 * length of the data is written over the length limit the user
268 * specified. Zero is returned for a success.
269 */
89bddce5 270
43db362d 271static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 272 void __user *uaddr, int __user *ulen)
1da177e4
LT
273{
274 int err;
275 int len;
276
68c6beb3 277 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
278 err = get_user(len, ulen);
279 if (err)
1da177e4 280 return err;
89bddce5
SH
281 if (len > klen)
282 len = klen;
68c6beb3 283 if (len < 0)
1da177e4 284 return -EINVAL;
89bddce5 285 if (len) {
d6fe3945
SG
286 if (audit_sockaddr(klen, kaddr))
287 return -ENOMEM;
89bddce5 288 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
289 return -EFAULT;
290 }
291 /*
89bddce5
SH
292 * "fromlen shall refer to the value before truncation.."
293 * 1003.1g
1da177e4
LT
294 */
295 return __put_user(klen, ulen);
296}
297
08009a76 298static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
299
300static struct inode *sock_alloc_inode(struct super_block *sb)
301{
302 struct socket_alloc *ei;
89bddce5 303
fd60b288 304 ei = alloc_inode_sb(sb, sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
305 if (!ei)
306 return NULL;
333f7909
AV
307 init_waitqueue_head(&ei->socket.wq.wait);
308 ei->socket.wq.fasync_list = NULL;
309 ei->socket.wq.flags = 0;
89bddce5 310
1da177e4
LT
311 ei->socket.state = SS_UNCONNECTED;
312 ei->socket.flags = 0;
313 ei->socket.ops = NULL;
314 ei->socket.sk = NULL;
315 ei->socket.file = NULL;
1da177e4
LT
316
317 return &ei->vfs_inode;
318}
319
6d7855c5 320static void sock_free_inode(struct inode *inode)
1da177e4 321{
43815482
ED
322 struct socket_alloc *ei;
323
324 ei = container_of(inode, struct socket_alloc, vfs_inode);
43815482 325 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
326}
327
51cc5068 328static void init_once(void *foo)
1da177e4 329{
89bddce5 330 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 331
a35afb83 332 inode_init_once(&ei->vfs_inode);
1da177e4 333}
89bddce5 334
1e911632 335static void init_inodecache(void)
1da177e4
LT
336{
337 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
338 sizeof(struct socket_alloc),
339 0,
340 (SLAB_HWCACHE_ALIGN |
341 SLAB_RECLAIM_ACCOUNT |
5d097056 342 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 343 init_once);
1e911632 344 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
345}
346
b87221de 347static const struct super_operations sockfs_ops = {
c6d409cf 348 .alloc_inode = sock_alloc_inode,
6d7855c5 349 .free_inode = sock_free_inode,
c6d409cf 350 .statfs = simple_statfs,
1da177e4
LT
351};
352
c23fbb6b
ED
353/*
354 * sockfs_dname() is called from d_path().
355 */
356static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
357{
0f60d288 358 return dynamic_dname(buffer, buflen, "socket:[%lu]",
c5ef6035 359 d_inode(dentry)->i_ino);
c23fbb6b
ED
360}
361
3ba13d17 362static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 363 .d_dname = sockfs_dname,
1da177e4
LT
364};
365
bba0bd31
AG
366static int sockfs_xattr_get(const struct xattr_handler *handler,
367 struct dentry *dentry, struct inode *inode,
368 const char *suffix, void *value, size_t size)
369{
370 if (value) {
371 if (dentry->d_name.len + 1 > size)
372 return -ERANGE;
373 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
374 }
375 return dentry->d_name.len + 1;
376}
377
378#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
379#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
380#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
381
382static const struct xattr_handler sockfs_xattr_handler = {
383 .name = XATTR_NAME_SOCKPROTONAME,
384 .get = sockfs_xattr_get,
385};
386
4a590153 387static int sockfs_security_xattr_set(const struct xattr_handler *handler,
e65ce2a5 388 struct user_namespace *mnt_userns,
4a590153
AG
389 struct dentry *dentry, struct inode *inode,
390 const char *suffix, const void *value,
391 size_t size, int flags)
392{
393 /* Handled by LSM. */
394 return -EAGAIN;
395}
396
397static const struct xattr_handler sockfs_security_xattr_handler = {
398 .prefix = XATTR_SECURITY_PREFIX,
399 .set = sockfs_security_xattr_set,
400};
401
bba0bd31
AG
402static const struct xattr_handler *sockfs_xattr_handlers[] = {
403 &sockfs_xattr_handler,
4a590153 404 &sockfs_security_xattr_handler,
bba0bd31
AG
405 NULL
406};
407
fba9be49 408static int sockfs_init_fs_context(struct fs_context *fc)
c74a1cbb 409{
fba9be49
DH
410 struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
411 if (!ctx)
412 return -ENOMEM;
413 ctx->ops = &sockfs_ops;
414 ctx->dops = &sockfs_dentry_operations;
415 ctx->xattr = sockfs_xattr_handlers;
416 return 0;
c74a1cbb
AV
417}
418
419static struct vfsmount *sock_mnt __read_mostly;
420
421static struct file_system_type sock_fs_type = {
422 .name = "sockfs",
fba9be49 423 .init_fs_context = sockfs_init_fs_context,
c74a1cbb
AV
424 .kill_sb = kill_anon_super,
425};
426
1da177e4
LT
427/*
428 * Obtains the first available file descriptor and sets it up for use.
429 *
39d8c1b6
DM
430 * These functions create file structures and maps them to fd space
431 * of the current process. On success it returns file descriptor
1da177e4
LT
432 * and file struct implicitly stored in sock->file.
433 * Note that another thread may close file descriptor before we return
434 * from this function. We use the fact that now we do not refer
435 * to socket after mapping. If one day we will need it, this
436 * function will increment ref. count on file by 1.
437 *
438 * In any case returned fd MAY BE not valid!
439 * This race condition is unavoidable
440 * with shared fd spaces, we cannot solve it inside kernel,
441 * but we take care of internal coherence yet.
442 */
443
8a3c245c
PT
444/**
445 * sock_alloc_file - Bind a &socket to a &file
446 * @sock: socket
447 * @flags: file status flags
448 * @dname: protocol name
449 *
450 * Returns the &file bound with @sock, implicitly storing it
451 * in sock->file. If dname is %NULL, sets to "".
452 * On failure the return is a ERR pointer (see linux/err.h).
453 * This function uses GFP_KERNEL internally.
454 */
455
aab174f0 456struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 457{
7cbe66b6 458 struct file *file;
1da177e4 459
d93aa9d8
AV
460 if (!dname)
461 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 462
d93aa9d8
AV
463 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
464 O_RDWR | (flags & O_NONBLOCK),
465 &socket_file_ops);
b5ffe634 466 if (IS_ERR(file)) {
8e1611e2 467 sock_release(sock);
39b65252 468 return file;
cc3808f8
AV
469 }
470
471 sock->file = file;
39d8c1b6 472 file->private_data = sock;
d8e464ec 473 stream_open(SOCK_INODE(sock), file);
28407630 474 return file;
39d8c1b6 475}
56b31d1c 476EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 477
56b31d1c 478static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
479{
480 struct file *newfile;
28407630 481 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
482 if (unlikely(fd < 0)) {
483 sock_release(sock);
28407630 484 return fd;
ce4bb04c 485 }
39d8c1b6 486
aab174f0 487 newfile = sock_alloc_file(sock, flags, NULL);
4546e44c 488 if (!IS_ERR(newfile)) {
39d8c1b6 489 fd_install(fd, newfile);
28407630
AV
490 return fd;
491 }
7cbe66b6 492
28407630
AV
493 put_unused_fd(fd);
494 return PTR_ERR(newfile);
1da177e4
LT
495}
496
8a3c245c
PT
497/**
498 * sock_from_file - Return the &socket bounded to @file.
499 * @file: file
8a3c245c 500 *
dba4a925 501 * On failure returns %NULL.
8a3c245c
PT
502 */
503
dba4a925 504struct socket *sock_from_file(struct file *file)
6cb153ca 505{
6cb153ca 506 if (file->f_op == &socket_file_ops)
da214a47 507 return file->private_data; /* set in sock_alloc_file */
6cb153ca 508
23bb80d2 509 return NULL;
6cb153ca 510}
406a3c63 511EXPORT_SYMBOL(sock_from_file);
6cb153ca 512
1da177e4 513/**
c6d409cf 514 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
515 * @fd: file handle
516 * @err: pointer to an error code return
517 *
518 * The file handle passed in is locked and the socket it is bound
241c4667 519 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
520 * with a negative errno code and NULL is returned. The function checks
521 * for both invalid handles and passing a handle which is not a socket.
522 *
523 * On a success the socket object pointer is returned.
524 */
525
526struct socket *sockfd_lookup(int fd, int *err)
527{
528 struct file *file;
1da177e4
LT
529 struct socket *sock;
530
89bddce5
SH
531 file = fget(fd);
532 if (!file) {
1da177e4
LT
533 *err = -EBADF;
534 return NULL;
535 }
89bddce5 536
dba4a925
FR
537 sock = sock_from_file(file);
538 if (!sock) {
539 *err = -ENOTSOCK;
1da177e4 540 fput(file);
dba4a925 541 }
6cb153ca
BL
542 return sock;
543}
c6d409cf 544EXPORT_SYMBOL(sockfd_lookup);
1da177e4 545
6cb153ca
BL
546static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
547{
00e188ef 548 struct fd f = fdget(fd);
6cb153ca
BL
549 struct socket *sock;
550
3672558c 551 *err = -EBADF;
00e188ef 552 if (f.file) {
dba4a925 553 sock = sock_from_file(f.file);
00e188ef 554 if (likely(sock)) {
ce787a5a 555 *fput_needed = f.flags & FDPUT_FPUT;
6cb153ca 556 return sock;
00e188ef 557 }
dba4a925 558 *err = -ENOTSOCK;
00e188ef 559 fdput(f);
1da177e4 560 }
6cb153ca 561 return NULL;
1da177e4
LT
562}
563
600e1779
MY
564static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
565 size_t size)
566{
567 ssize_t len;
568 ssize_t used = 0;
569
c5ef6035 570 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
571 if (len < 0)
572 return len;
573 used += len;
574 if (buffer) {
575 if (size < used)
576 return -ERANGE;
577 buffer += len;
578 }
579
580 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
581 used += len;
582 if (buffer) {
583 if (size < used)
584 return -ERANGE;
585 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
586 buffer += len;
587 }
588
589 return used;
590}
591
549c7297
CB
592static int sockfs_setattr(struct user_namespace *mnt_userns,
593 struct dentry *dentry, struct iattr *iattr)
86741ec2 594{
549c7297 595 int err = simple_setattr(&init_user_ns, dentry, iattr);
86741ec2 596
e1a3a60a 597 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
598 struct socket *sock = SOCKET_I(d_inode(dentry));
599
6d8c50dc
CW
600 if (sock->sk)
601 sock->sk->sk_uid = iattr->ia_uid;
602 else
603 err = -ENOENT;
86741ec2
LC
604 }
605
606 return err;
607}
608
600e1779 609static const struct inode_operations sockfs_inode_ops = {
600e1779 610 .listxattr = sockfs_listxattr,
86741ec2 611 .setattr = sockfs_setattr,
600e1779
MY
612};
613
1da177e4 614/**
8a3c245c 615 * sock_alloc - allocate a socket
89bddce5 616 *
1da177e4
LT
617 * Allocate a new inode and socket object. The two are bound together
618 * and initialised. The socket is then returned. If we are out of inodes
8a3c245c 619 * NULL is returned. This functions uses GFP_KERNEL internally.
1da177e4
LT
620 */
621
f4a00aac 622struct socket *sock_alloc(void)
1da177e4 623{
89bddce5
SH
624 struct inode *inode;
625 struct socket *sock;
1da177e4 626
a209dfc7 627 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
628 if (!inode)
629 return NULL;
630
631 sock = SOCKET_I(inode);
632
85fe4025 633 inode->i_ino = get_next_ino();
89bddce5 634 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
635 inode->i_uid = current_fsuid();
636 inode->i_gid = current_fsgid();
600e1779 637 inode->i_op = &sockfs_inode_ops;
1da177e4 638
1da177e4
LT
639 return sock;
640}
f4a00aac 641EXPORT_SYMBOL(sock_alloc);
1da177e4 642
6d8c50dc 643static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
644{
645 if (sock->ops) {
646 struct module *owner = sock->ops->owner;
647
6d8c50dc
CW
648 if (inode)
649 inode_lock(inode);
1da177e4 650 sock->ops->release(sock);
ff7b11aa 651 sock->sk = NULL;
6d8c50dc
CW
652 if (inode)
653 inode_unlock(inode);
1da177e4
LT
654 sock->ops = NULL;
655 module_put(owner);
656 }
657
333f7909 658 if (sock->wq.fasync_list)
3410f22e 659 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 660
1da177e4
LT
661 if (!sock->file) {
662 iput(SOCK_INODE(sock));
663 return;
664 }
89bddce5 665 sock->file = NULL;
1da177e4 666}
6d8c50dc 667
9a8ad9ac
AL
668/**
669 * sock_release - close a socket
670 * @sock: socket to close
671 *
672 * The socket is released from the protocol stack if it has a release
673 * callback, and the inode is then released if the socket is bound to
674 * an inode not a file.
675 */
6d8c50dc
CW
676void sock_release(struct socket *sock)
677{
678 __sock_release(sock, NULL);
679}
c6d409cf 680EXPORT_SYMBOL(sock_release);
1da177e4 681
c14ac945 682void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 683{
140c55d4
ED
684 u8 flags = *tx_flags;
685
51eb7492 686 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE) {
140c55d4
ED
687 flags |= SKBTX_HW_TSTAMP;
688
51eb7492
GE
689 /* PTP hardware clocks can provide a free running cycle counter
690 * as a time base for virtual clocks. Tell driver to use the
691 * free running cycle counter for timestamp if socket is bound
692 * to virtual clock.
693 */
694 if (tsflags & SOF_TIMESTAMPING_BIND_PHC)
695 flags |= SKBTX_HW_TSTAMP_USE_CYCLES;
696 }
697
c14ac945 698 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
699 flags |= SKBTX_SW_TSTAMP;
700
c14ac945 701 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
702 flags |= SKBTX_SCHED_TSTAMP;
703
140c55d4 704 *tx_flags = flags;
20d49473 705}
67cc0d40 706EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 707
8c3c447b
PA
708INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
709 size_t));
a648a592
PA
710INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
711 size_t));
d8725c86 712static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 713{
a648a592
PA
714 int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
715 inet_sendmsg, sock, msg,
716 msg_data_left(msg));
d8725c86
AV
717 BUG_ON(ret == -EIOCBQUEUED);
718 return ret;
1da177e4
LT
719}
720
85806af0
RD
721/**
722 * sock_sendmsg - send a message through @sock
723 * @sock: socket
724 * @msg: message to send
725 *
726 * Sends @msg through @sock, passing through LSM.
727 * Returns the number of bytes sent, or an error code.
728 */
d8725c86 729int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 730{
d8725c86 731 int err = security_socket_sendmsg(sock, msg,
01e97e65 732 msg_data_left(msg));
228e548e 733
d8725c86 734 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 735}
c6d409cf 736EXPORT_SYMBOL(sock_sendmsg);
1da177e4 737
8a3c245c
PT
738/**
739 * kernel_sendmsg - send a message through @sock (kernel-space)
740 * @sock: socket
741 * @msg: message header
742 * @vec: kernel vec
743 * @num: vec array length
744 * @size: total message data size
745 *
746 * Builds the message data with @vec and sends it through @sock.
747 * Returns the number of bytes sent, or an error code.
748 */
749
1da177e4
LT
750int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
751 struct kvec *vec, size_t num, size_t size)
752{
de4eda9d 753 iov_iter_kvec(&msg->msg_iter, ITER_SOURCE, vec, num, size);
d8725c86 754 return sock_sendmsg(sock, msg);
1da177e4 755}
c6d409cf 756EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 757
8a3c245c
PT
758/**
759 * kernel_sendmsg_locked - send a message through @sock (kernel-space)
760 * @sk: sock
761 * @msg: message header
762 * @vec: output s/g array
763 * @num: output s/g array length
764 * @size: total message data size
765 *
766 * Builds the message data with @vec and sends it through @sock.
767 * Returns the number of bytes sent, or an error code.
768 * Caller must hold @sk.
769 */
770
306b13eb
TH
771int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
772 struct kvec *vec, size_t num, size_t size)
773{
774 struct socket *sock = sk->sk_socket;
775
776 if (!sock->ops->sendmsg_locked)
db5980d8 777 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 778
de4eda9d 779 iov_iter_kvec(&msg->msg_iter, ITER_SOURCE, vec, num, size);
306b13eb
TH
780
781 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
782}
783EXPORT_SYMBOL(kernel_sendmsg_locked);
784
8605330a
SHY
785static bool skb_is_err_queue(const struct sk_buff *skb)
786{
787 /* pkt_type of skbs enqueued on the error queue are set to
788 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
789 * in recvmsg, since skbs received on a local socket will never
790 * have a pkt_type of PACKET_OUTGOING.
791 */
792 return skb->pkt_type == PACKET_OUTGOING;
793}
794
b50a5c70
ML
795/* On transmit, software and hardware timestamps are returned independently.
796 * As the two skb clones share the hardware timestamp, which may be updated
797 * before the software timestamp is received, a hardware TX timestamp may be
798 * returned only if there is no software TX timestamp. Ignore false software
799 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 800 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
801 * hardware timestamp.
802 */
803static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
804{
805 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
806}
807
97dc7cd9
GE
808static ktime_t get_timestamp(struct sock *sk, struct sk_buff *skb, int *if_index)
809{
810 bool cycles = sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC;
811 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
812 struct net_device *orig_dev;
813 ktime_t hwtstamp;
814
815 rcu_read_lock();
816 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
817 if (orig_dev) {
818 *if_index = orig_dev->ifindex;
819 hwtstamp = netdev_get_tstamp(orig_dev, shhwtstamps, cycles);
820 } else {
821 hwtstamp = shhwtstamps->hwtstamp;
822 }
823 rcu_read_unlock();
824
825 return hwtstamp;
826}
827
828static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb,
829 int if_index)
aad9c8c4
ML
830{
831 struct scm_ts_pktinfo ts_pktinfo;
832 struct net_device *orig_dev;
833
834 if (!skb_mac_header_was_set(skb))
835 return;
836
837 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
838
97dc7cd9
GE
839 if (!if_index) {
840 rcu_read_lock();
841 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
842 if (orig_dev)
843 if_index = orig_dev->ifindex;
844 rcu_read_unlock();
845 }
846 ts_pktinfo.if_index = if_index;
aad9c8c4
ML
847
848 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
849 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
850 sizeof(ts_pktinfo), &ts_pktinfo);
851}
852
92f37fd2
ED
853/*
854 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
855 */
856void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
857 struct sk_buff *skb)
858{
20d49473 859 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 860 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e
DD
861 struct scm_timestamping_internal tss;
862
b50a5c70 863 int empty = 1, false_tstamp = 0;
20d49473
PO
864 struct skb_shared_hwtstamps *shhwtstamps =
865 skb_hwtstamps(skb);
97dc7cd9 866 int if_index;
007747a9 867 ktime_t hwtstamp;
20d49473
PO
868
869 /* Race occurred between timestamp enabling and packet
870 receiving. Fill in the current time for now. */
b50a5c70 871 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 872 __net_timestamp(skb);
b50a5c70
ML
873 false_tstamp = 1;
874 }
20d49473
PO
875
876 if (need_software_tstamp) {
877 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
878 if (new_tstamp) {
879 struct __kernel_sock_timeval tv;
880
881 skb_get_new_timestamp(skb, &tv);
882 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
883 sizeof(tv), &tv);
884 } else {
885 struct __kernel_old_timeval tv;
886
887 skb_get_timestamp(skb, &tv);
888 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
889 sizeof(tv), &tv);
890 }
20d49473 891 } else {
887feae3
DD
892 if (new_tstamp) {
893 struct __kernel_timespec ts;
894
895 skb_get_new_timestampns(skb, &ts);
896 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
897 sizeof(ts), &ts);
898 } else {
df1b4ba9 899 struct __kernel_old_timespec ts;
887feae3
DD
900
901 skb_get_timestampns(skb, &ts);
902 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
903 sizeof(ts), &ts);
904 }
20d49473
PO
905 }
906 }
907
f24b9be5 908 memset(&tss, 0, sizeof(tss));
c199105d 909 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 910 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 911 empty = 0;
4d276eb6 912 if (shhwtstamps &&
b9f40e21 913 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
d7c08826 914 !skb_is_swtx_tstamp(skb, false_tstamp)) {
97dc7cd9
GE
915 if_index = 0;
916 if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NETDEV)
917 hwtstamp = get_timestamp(sk, skb, &if_index);
007747a9
ML
918 else
919 hwtstamp = shhwtstamps->hwtstamp;
d7c08826 920
97dc7cd9
GE
921 if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC)
922 hwtstamp = ptp_convert_timestamp(&hwtstamp,
923 sk->sk_bind_phc);
924
007747a9 925 if (ktime_to_timespec64_cond(hwtstamp, tss.ts + 2)) {
d7c08826
YL
926 empty = 0;
927
928 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
929 !skb_is_err_queue(skb))
97dc7cd9 930 put_ts_pktinfo(msg, skb, if_index);
d7c08826 931 }
aad9c8c4 932 }
1c885808 933 if (!empty) {
9718475e
DD
934 if (sock_flag(sk, SOCK_TSTAMP_NEW))
935 put_cmsg_scm_timestamping64(msg, &tss);
936 else
937 put_cmsg_scm_timestamping(msg, &tss);
1c885808 938
8605330a 939 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 940 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
941 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
942 skb->len, skb->data);
943 }
92f37fd2 944}
7c81fd8b
ACM
945EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
946
6e3e939f
JB
947void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
948 struct sk_buff *skb)
949{
950 int ack;
951
952 if (!sock_flag(sk, SOCK_WIFI_STATUS))
953 return;
954 if (!skb->wifi_acked_valid)
955 return;
956
957 ack = skb->wifi_acked;
958
959 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
960}
961EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
962
11165f14 963static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
964 struct sk_buff *skb)
3b885787 965{
744d5a3e 966 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 967 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 968 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
969}
970
6fd1d51c
EM
971static void sock_recv_mark(struct msghdr *msg, struct sock *sk,
972 struct sk_buff *skb)
973{
974 if (sock_flag(sk, SOCK_RCVMARK) && skb)
975 put_cmsg(msg, SOL_SOCKET, SO_MARK, sizeof(__u32),
976 &skb->mark);
977}
978
979void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
980 struct sk_buff *skb)
3b885787
NH
981{
982 sock_recv_timestamp(msg, sk, skb);
983 sock_recv_drops(msg, sk, skb);
6fd1d51c 984 sock_recv_mark(msg, sk, skb);
3b885787 985}
6fd1d51c 986EXPORT_SYMBOL_GPL(__sock_recv_cmsgs);
3b885787 987
8c3c447b 988INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
a648a592
PA
989 size_t, int));
990INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
991 size_t, int));
1b784140 992static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 993 int flags)
1da177e4 994{
a648a592
PA
995 return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
996 inet_recvmsg, sock, msg, msg_data_left(msg),
997 flags);
1da177e4
LT
998}
999
85806af0
RD
1000/**
1001 * sock_recvmsg - receive a message from @sock
1002 * @sock: socket
1003 * @msg: message to receive
1004 * @flags: message flags
1005 *
1006 * Receives @msg from @sock, passing through LSM. Returns the total number
1007 * of bytes received, or an error.
1008 */
2da62906 1009int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 1010{
2da62906 1011 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 1012
2da62906 1013 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 1014}
c6d409cf 1015EXPORT_SYMBOL(sock_recvmsg);
1da177e4 1016
c1249c0a 1017/**
8a3c245c
PT
1018 * kernel_recvmsg - Receive a message from a socket (kernel space)
1019 * @sock: The socket to receive the message from
1020 * @msg: Received message
1021 * @vec: Input s/g array for message data
1022 * @num: Size of input s/g array
1023 * @size: Number of bytes to read
1024 * @flags: Message flags (MSG_DONTWAIT, etc...)
c1249c0a 1025 *
8a3c245c
PT
1026 * On return the msg structure contains the scatter/gather array passed in the
1027 * vec argument. The array is modified so that it consists of the unfilled
1028 * portion of the original array.
c1249c0a 1029 *
8a3c245c 1030 * The returned value is the total number of bytes received, or an error.
c1249c0a 1031 */
8a3c245c 1032
89bddce5
SH
1033int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
1034 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4 1035{
1f466e1f 1036 msg->msg_control_is_user = false;
de4eda9d 1037 iov_iter_kvec(&msg->msg_iter, ITER_DEST, vec, num, size);
1f466e1f 1038 return sock_recvmsg(sock, msg, flags);
1da177e4 1039}
c6d409cf 1040EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 1041
ce1d4d3e
CH
1042static ssize_t sock_sendpage(struct file *file, struct page *page,
1043 int offset, size_t size, loff_t *ppos, int more)
1da177e4 1044{
1da177e4
LT
1045 struct socket *sock;
1046 int flags;
1047
ce1d4d3e
CH
1048 sock = file->private_data;
1049
35f9c09f
ED
1050 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
1051 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
1052 flags |= more;
ce1d4d3e 1053
e6949583 1054 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 1055}
1da177e4 1056
9c55e01c 1057static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 1058 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
1059 unsigned int flags)
1060{
1061 struct socket *sock = file->private_data;
1062
997b37da 1063 if (unlikely(!sock->ops->splice_read))
95506588 1064 return generic_file_splice_read(file, ppos, pipe, len, flags);
997b37da 1065
9c55e01c
JA
1066 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
1067}
1068
8ae5e030 1069static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 1070{
6d652330
AV
1071 struct file *file = iocb->ki_filp;
1072 struct socket *sock = file->private_data;
0345f931 1073 struct msghdr msg = {.msg_iter = *to,
1074 .msg_iocb = iocb};
8ae5e030 1075 ssize_t res;
ce1d4d3e 1076
ebfcd895 1077 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
1078 msg.msg_flags = MSG_DONTWAIT;
1079
1080 if (iocb->ki_pos != 0)
1da177e4 1081 return -ESPIPE;
027445c3 1082
66ee59af 1083 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
1084 return 0;
1085
2da62906 1086 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
1087 *to = msg.msg_iter;
1088 return res;
1da177e4
LT
1089}
1090
8ae5e030 1091static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 1092{
6d652330
AV
1093 struct file *file = iocb->ki_filp;
1094 struct socket *sock = file->private_data;
0345f931 1095 struct msghdr msg = {.msg_iter = *from,
1096 .msg_iocb = iocb};
8ae5e030 1097 ssize_t res;
1da177e4 1098
8ae5e030 1099 if (iocb->ki_pos != 0)
ce1d4d3e 1100 return -ESPIPE;
027445c3 1101
ebfcd895 1102 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
1103 msg.msg_flags = MSG_DONTWAIT;
1104
6d652330
AV
1105 if (sock->type == SOCK_SEQPACKET)
1106 msg.msg_flags |= MSG_EOR;
1107
d8725c86 1108 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
1109 *from = msg.msg_iter;
1110 return res;
1da177e4
LT
1111}
1112
1da177e4
LT
1113/*
1114 * Atomic setting of ioctl hooks to avoid race
1115 * with module unload.
1116 */
1117
4a3e2f71 1118static DEFINE_MUTEX(br_ioctl_mutex);
ad2f99ae
AB
1119static int (*br_ioctl_hook)(struct net *net, struct net_bridge *br,
1120 unsigned int cmd, struct ifreq *ifr,
1121 void __user *uarg);
1da177e4 1122
ad2f99ae
AB
1123void brioctl_set(int (*hook)(struct net *net, struct net_bridge *br,
1124 unsigned int cmd, struct ifreq *ifr,
1125 void __user *uarg))
1da177e4 1126{
4a3e2f71 1127 mutex_lock(&br_ioctl_mutex);
1da177e4 1128 br_ioctl_hook = hook;
4a3e2f71 1129 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1130}
1131EXPORT_SYMBOL(brioctl_set);
1132
ad2f99ae
AB
1133int br_ioctl_call(struct net *net, struct net_bridge *br, unsigned int cmd,
1134 struct ifreq *ifr, void __user *uarg)
1135{
1136 int err = -ENOPKG;
1137
1138 if (!br_ioctl_hook)
1139 request_module("bridge");
1140
1141 mutex_lock(&br_ioctl_mutex);
1142 if (br_ioctl_hook)
1143 err = br_ioctl_hook(net, br, cmd, ifr, uarg);
1144 mutex_unlock(&br_ioctl_mutex);
1145
1146 return err;
1147}
1148
4a3e2f71 1149static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1150static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1151
881d966b 1152void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1153{
4a3e2f71 1154 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1155 vlan_ioctl_hook = hook;
4a3e2f71 1156 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1157}
1158EXPORT_SYMBOL(vlan_ioctl_set);
1159
6b96018b 1160static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 1161 unsigned int cmd, unsigned long arg)
6b96018b 1162{
876f0bf9
AB
1163 struct ifreq ifr;
1164 bool need_copyout;
6b96018b
AB
1165 int err;
1166 void __user *argp = (void __user *)arg;
a554bf96 1167 void __user *data;
6b96018b
AB
1168
1169 err = sock->ops->ioctl(sock, cmd, arg);
1170
1171 /*
1172 * If this ioctl is unknown try to hand it down
1173 * to the NIC driver.
1174 */
36fd633e
AV
1175 if (err != -ENOIOCTLCMD)
1176 return err;
6b96018b 1177
29ce8f97
JK
1178 if (!is_socket_ioctl_cmd(cmd))
1179 return -ENOTTY;
1180
a554bf96 1181 if (get_user_ifreq(&ifr, &data, argp))
876f0bf9 1182 return -EFAULT;
a554bf96 1183 err = dev_ioctl(net, cmd, &ifr, data, &need_copyout);
876f0bf9 1184 if (!err && need_copyout)
a554bf96 1185 if (put_user_ifreq(&ifr, argp))
44c02a2c 1186 return -EFAULT;
876f0bf9 1187
6b96018b
AB
1188 return err;
1189}
1190
1da177e4
LT
1191/*
1192 * With an ioctl, arg may well be a user mode pointer, but we don't know
1193 * what to do with it - that's up to the protocol still.
1194 */
1195
1196static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1197{
1198 struct socket *sock;
881d966b 1199 struct sock *sk;
1da177e4
LT
1200 void __user *argp = (void __user *)arg;
1201 int pid, err;
881d966b 1202 struct net *net;
1da177e4 1203
b69aee04 1204 sock = file->private_data;
881d966b 1205 sk = sock->sk;
3b1e0a65 1206 net = sock_net(sk);
44c02a2c
AV
1207 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1208 struct ifreq ifr;
a554bf96 1209 void __user *data;
44c02a2c 1210 bool need_copyout;
a554bf96 1211 if (get_user_ifreq(&ifr, &data, argp))
44c02a2c 1212 return -EFAULT;
a554bf96 1213 err = dev_ioctl(net, cmd, &ifr, data, &need_copyout);
44c02a2c 1214 if (!err && need_copyout)
a554bf96 1215 if (put_user_ifreq(&ifr, argp))
44c02a2c 1216 return -EFAULT;
1da177e4 1217 } else
3d23e349 1218#ifdef CONFIG_WEXT_CORE
1da177e4 1219 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1220 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1221 } else
3d23e349 1222#endif
89bddce5 1223 switch (cmd) {
1da177e4
LT
1224 case FIOSETOWN:
1225 case SIOCSPGRP:
1226 err = -EFAULT;
1227 if (get_user(pid, (int __user *)argp))
1228 break;
393cc3f5 1229 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1230 break;
1231 case FIOGETOWN:
1232 case SIOCGPGRP:
609d7fa9 1233 err = put_user(f_getown(sock->file),
89bddce5 1234 (int __user *)argp);
1da177e4
LT
1235 break;
1236 case SIOCGIFBR:
1237 case SIOCSIFBR:
1238 case SIOCBRADDBR:
1239 case SIOCBRDELBR:
ad2f99ae 1240 err = br_ioctl_call(net, NULL, cmd, NULL, argp);
1da177e4
LT
1241 break;
1242 case SIOCGIFVLAN:
1243 case SIOCSIFVLAN:
1244 err = -ENOPKG;
1245 if (!vlan_ioctl_hook)
1246 request_module("8021q");
1247
4a3e2f71 1248 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1249 if (vlan_ioctl_hook)
881d966b 1250 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1251 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1252 break;
c62cce2c
AV
1253 case SIOCGSKNS:
1254 err = -EPERM;
1255 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1256 break;
1257
1258 err = open_related_ns(&net->ns, get_net_ns);
1259 break;
0768e170
AB
1260 case SIOCGSTAMP_OLD:
1261 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
1262 if (!sock->ops->gettstamp) {
1263 err = -ENOIOCTLCMD;
1264 break;
1265 }
1266 err = sock->ops->gettstamp(sock, argp,
0768e170
AB
1267 cmd == SIOCGSTAMP_OLD,
1268 !IS_ENABLED(CONFIG_64BIT));
60747828 1269 break;
0768e170
AB
1270 case SIOCGSTAMP_NEW:
1271 case SIOCGSTAMPNS_NEW:
1272 if (!sock->ops->gettstamp) {
1273 err = -ENOIOCTLCMD;
1274 break;
1275 }
1276 err = sock->ops->gettstamp(sock, argp,
1277 cmd == SIOCGSTAMP_NEW,
1278 false);
c7cbdbf2 1279 break;
876f0bf9
AB
1280
1281 case SIOCGIFCONF:
1282 err = dev_ifconf(net, argp);
1283 break;
1284
1da177e4 1285 default:
63ff03ab 1286 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1287 break;
89bddce5 1288 }
1da177e4
LT
1289 return err;
1290}
1291
8a3c245c
PT
1292/**
1293 * sock_create_lite - creates a socket
1294 * @family: protocol family (AF_INET, ...)
1295 * @type: communication type (SOCK_STREAM, ...)
1296 * @protocol: protocol (0, ...)
1297 * @res: new socket
1298 *
1299 * Creates a new socket and assigns it to @res, passing through LSM.
1300 * The new socket initialization is not complete, see kernel_accept().
1301 * Returns 0 or an error. On failure @res is set to %NULL.
1302 * This function internally uses GFP_KERNEL.
1303 */
1304
1da177e4
LT
1305int sock_create_lite(int family, int type, int protocol, struct socket **res)
1306{
1307 int err;
1308 struct socket *sock = NULL;
89bddce5 1309
1da177e4
LT
1310 err = security_socket_create(family, type, protocol, 1);
1311 if (err)
1312 goto out;
1313
1314 sock = sock_alloc();
1315 if (!sock) {
1316 err = -ENOMEM;
1317 goto out;
1318 }
1319
1da177e4 1320 sock->type = type;
7420ed23
VY
1321 err = security_socket_post_create(sock, family, type, protocol, 1);
1322 if (err)
1323 goto out_release;
1324
1da177e4
LT
1325out:
1326 *res = sock;
1327 return err;
7420ed23
VY
1328out_release:
1329 sock_release(sock);
1330 sock = NULL;
1331 goto out;
1da177e4 1332}
c6d409cf 1333EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1334
1335/* No kernel lock held - perfect */
ade994f4 1336static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1337{
3cafb376 1338 struct socket *sock = file->private_data;
a331de3b 1339 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1340
e88958e6
CH
1341 if (!sock->ops->poll)
1342 return 0;
f641f13b 1343
a331de3b
CH
1344 if (sk_can_busy_loop(sock->sk)) {
1345 /* poll once if requested by the syscall */
1346 if (events & POLL_BUSY_LOOP)
1347 sk_busy_loop(sock->sk, 1);
1348
1349 /* if this socket can poll_ll, tell the system call */
1350 flag = POLL_BUSY_LOOP;
1351 }
1352
1353 return sock->ops->poll(file, sock, wait) | flag;
1da177e4
LT
1354}
1355
89bddce5 1356static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1357{
b69aee04 1358 struct socket *sock = file->private_data;
1da177e4
LT
1359
1360 return sock->ops->mmap(file, sock, vma);
1361}
1362
20380731 1363static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1364{
6d8c50dc 1365 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1366 return 0;
1367}
1368
1369/*
1370 * Update the socket async list
1371 *
1372 * Fasync_list locking strategy.
1373 *
1374 * 1. fasync_list is modified only under process context socket lock
1375 * i.e. under semaphore.
1376 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1377 * or under socket lock
1da177e4
LT
1378 */
1379
1380static int sock_fasync(int fd, struct file *filp, int on)
1381{
989a2979
ED
1382 struct socket *sock = filp->private_data;
1383 struct sock *sk = sock->sk;
333f7909 1384 struct socket_wq *wq = &sock->wq;
1da177e4 1385
989a2979 1386 if (sk == NULL)
1da177e4 1387 return -EINVAL;
1da177e4
LT
1388
1389 lock_sock(sk);
eaefd110 1390 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1391
eaefd110 1392 if (!wq->fasync_list)
989a2979
ED
1393 sock_reset_flag(sk, SOCK_FASYNC);
1394 else
bcdce719 1395 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1396
989a2979 1397 release_sock(sk);
1da177e4
LT
1398 return 0;
1399}
1400
ceb5d58b 1401/* This function may be called only under rcu_lock */
1da177e4 1402
ceb5d58b 1403int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1404{
ceb5d58b 1405 if (!wq || !wq->fasync_list)
1da177e4 1406 return -1;
ceb5d58b 1407
89bddce5 1408 switch (how) {
8d8ad9d7 1409 case SOCK_WAKE_WAITD:
ceb5d58b 1410 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1411 break;
1412 goto call_kill;
8d8ad9d7 1413 case SOCK_WAKE_SPACE:
ceb5d58b 1414 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4 1415 break;
7c7ab580 1416 fallthrough;
8d8ad9d7 1417 case SOCK_WAKE_IO:
89bddce5 1418call_kill:
43815482 1419 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1420 break;
8d8ad9d7 1421 case SOCK_WAKE_URG:
43815482 1422 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1423 }
ceb5d58b 1424
1da177e4
LT
1425 return 0;
1426}
c6d409cf 1427EXPORT_SYMBOL(sock_wake_async);
1da177e4 1428
8a3c245c
PT
1429/**
1430 * __sock_create - creates a socket
1431 * @net: net namespace
1432 * @family: protocol family (AF_INET, ...)
1433 * @type: communication type (SOCK_STREAM, ...)
1434 * @protocol: protocol (0, ...)
1435 * @res: new socket
1436 * @kern: boolean for kernel space sockets
1437 *
1438 * Creates a new socket and assigns it to @res, passing through LSM.
1439 * Returns 0 or an error. On failure @res is set to %NULL. @kern must
1440 * be set to true if the socket resides in kernel space.
1441 * This function internally uses GFP_KERNEL.
1442 */
1443
721db93a 1444int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1445 struct socket **res, int kern)
1da177e4
LT
1446{
1447 int err;
1448 struct socket *sock;
55737fda 1449 const struct net_proto_family *pf;
1da177e4
LT
1450
1451 /*
89bddce5 1452 * Check protocol is in range
1da177e4
LT
1453 */
1454 if (family < 0 || family >= NPROTO)
1455 return -EAFNOSUPPORT;
1456 if (type < 0 || type >= SOCK_MAX)
1457 return -EINVAL;
1458
1459 /* Compatibility.
1460
1461 This uglymoron is moved from INET layer to here to avoid
1462 deadlock in module load.
1463 */
1464 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1465 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1466 current->comm);
1da177e4
LT
1467 family = PF_PACKET;
1468 }
1469
1470 err = security_socket_create(family, type, protocol, kern);
1471 if (err)
1472 return err;
89bddce5 1473
55737fda
SH
1474 /*
1475 * Allocate the socket and allow the family to set things up. if
1476 * the protocol is 0, the family is instructed to select an appropriate
1477 * default.
1478 */
1479 sock = sock_alloc();
1480 if (!sock) {
e87cc472 1481 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1482 return -ENFILE; /* Not exactly a match, but its the
1483 closest posix thing */
1484 }
1485
1486 sock->type = type;
1487
95a5afca 1488#ifdef CONFIG_MODULES
89bddce5
SH
1489 /* Attempt to load a protocol module if the find failed.
1490 *
1491 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1492 * requested real, full-featured networking support upon configuration.
1493 * Otherwise module support will break!
1494 */
190683a9 1495 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1496 request_module("net-pf-%d", family);
1da177e4
LT
1497#endif
1498
55737fda
SH
1499 rcu_read_lock();
1500 pf = rcu_dereference(net_families[family]);
1501 err = -EAFNOSUPPORT;
1502 if (!pf)
1503 goto out_release;
1da177e4
LT
1504
1505 /*
1506 * We will call the ->create function, that possibly is in a loadable
1507 * module, so we have to bump that loadable module refcnt first.
1508 */
55737fda 1509 if (!try_module_get(pf->owner))
1da177e4
LT
1510 goto out_release;
1511
55737fda
SH
1512 /* Now protected by module ref count */
1513 rcu_read_unlock();
1514
3f378b68 1515 err = pf->create(net, sock, protocol, kern);
55737fda 1516 if (err < 0)
1da177e4 1517 goto out_module_put;
a79af59e 1518
1da177e4
LT
1519 /*
1520 * Now to bump the refcnt of the [loadable] module that owns this
1521 * socket at sock_release time we decrement its refcnt.
1522 */
55737fda
SH
1523 if (!try_module_get(sock->ops->owner))
1524 goto out_module_busy;
1525
1da177e4
LT
1526 /*
1527 * Now that we're done with the ->create function, the [loadable]
1528 * module can have its refcnt decremented
1529 */
55737fda 1530 module_put(pf->owner);
7420ed23
VY
1531 err = security_socket_post_create(sock, family, type, protocol, kern);
1532 if (err)
3b185525 1533 goto out_sock_release;
55737fda 1534 *res = sock;
1da177e4 1535
55737fda
SH
1536 return 0;
1537
1538out_module_busy:
1539 err = -EAFNOSUPPORT;
1da177e4 1540out_module_put:
55737fda
SH
1541 sock->ops = NULL;
1542 module_put(pf->owner);
1543out_sock_release:
1da177e4 1544 sock_release(sock);
55737fda
SH
1545 return err;
1546
1547out_release:
1548 rcu_read_unlock();
1549 goto out_sock_release;
1da177e4 1550}
721db93a 1551EXPORT_SYMBOL(__sock_create);
1da177e4 1552
8a3c245c
PT
1553/**
1554 * sock_create - creates a socket
1555 * @family: protocol family (AF_INET, ...)
1556 * @type: communication type (SOCK_STREAM, ...)
1557 * @protocol: protocol (0, ...)
1558 * @res: new socket
1559 *
1560 * A wrapper around __sock_create().
1561 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1562 */
1563
1da177e4
LT
1564int sock_create(int family, int type, int protocol, struct socket **res)
1565{
1b8d7ae4 1566 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1567}
c6d409cf 1568EXPORT_SYMBOL(sock_create);
1da177e4 1569
8a3c245c
PT
1570/**
1571 * sock_create_kern - creates a socket (kernel space)
1572 * @net: net namespace
1573 * @family: protocol family (AF_INET, ...)
1574 * @type: communication type (SOCK_STREAM, ...)
1575 * @protocol: protocol (0, ...)
1576 * @res: new socket
1577 *
1578 * A wrapper around __sock_create().
1579 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1580 */
1581
eeb1bd5c 1582int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1583{
eeb1bd5c 1584 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1585}
c6d409cf 1586EXPORT_SYMBOL(sock_create_kern);
1da177e4 1587
da214a47 1588static struct socket *__sys_socket_create(int family, int type, int protocol)
1da177e4 1589{
1da177e4 1590 struct socket *sock;
da214a47 1591 int retval;
a677a039 1592
e38b36f3
UD
1593 /* Check the SOCK_* constants for consistency. */
1594 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1595 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1596 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1597 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1598
da214a47
JA
1599 if ((type & ~SOCK_TYPE_MASK) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1600 return ERR_PTR(-EINVAL);
a677a039 1601 type &= SOCK_TYPE_MASK;
1da177e4 1602
da214a47
JA
1603 retval = sock_create(family, type, protocol, &sock);
1604 if (retval < 0)
1605 return ERR_PTR(retval);
1606
1607 return sock;
1608}
1609
1610struct file *__sys_socket_file(int family, int type, int protocol)
1611{
1612 struct socket *sock;
1613 struct file *file;
1614 int flags;
1615
1616 sock = __sys_socket_create(family, type, protocol);
1617 if (IS_ERR(sock))
1618 return ERR_CAST(sock);
1619
1620 flags = type & ~SOCK_TYPE_MASK;
aaca0bdc
UD
1621 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1622 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1623
da214a47
JA
1624 file = sock_alloc_file(sock, flags, NULL);
1625 if (IS_ERR(file))
1626 sock_release(sock);
1627
1628 return file;
1629}
1630
1631int __sys_socket(int family, int type, int protocol)
1632{
1633 struct socket *sock;
1634 int flags;
1635
1636 sock = __sys_socket_create(family, type, protocol);
1637 if (IS_ERR(sock))
1638 return PTR_ERR(sock);
1639
1640 flags = type & ~SOCK_TYPE_MASK;
1641 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1642 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1da177e4 1643
8e1611e2 1644 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1645}
1646
9d6a15c3
DB
1647SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1648{
1649 return __sys_socket(family, type, protocol);
1650}
1651
1da177e4
LT
1652/*
1653 * Create a pair of connected sockets.
1654 */
1655
6debc8d8 1656int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1657{
1658 struct socket *sock1, *sock2;
1659 int fd1, fd2, err;
db349509 1660 struct file *newfile1, *newfile2;
a677a039
UD
1661 int flags;
1662
1663 flags = type & ~SOCK_TYPE_MASK;
77d27200 1664 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1665 return -EINVAL;
1666 type &= SOCK_TYPE_MASK;
1da177e4 1667
aaca0bdc
UD
1668 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1669 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1670
016a266b
AV
1671 /*
1672 * reserve descriptors and make sure we won't fail
1673 * to return them to userland.
1674 */
1675 fd1 = get_unused_fd_flags(flags);
1676 if (unlikely(fd1 < 0))
1677 return fd1;
1678
1679 fd2 = get_unused_fd_flags(flags);
1680 if (unlikely(fd2 < 0)) {
1681 put_unused_fd(fd1);
1682 return fd2;
1683 }
1684
1685 err = put_user(fd1, &usockvec[0]);
1686 if (err)
1687 goto out;
1688
1689 err = put_user(fd2, &usockvec[1]);
1690 if (err)
1691 goto out;
1692
1da177e4
LT
1693 /*
1694 * Obtain the first socket and check if the underlying protocol
1695 * supports the socketpair call.
1696 */
1697
1698 err = sock_create(family, type, protocol, &sock1);
016a266b 1699 if (unlikely(err < 0))
1da177e4
LT
1700 goto out;
1701
1702 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1703 if (unlikely(err < 0)) {
1704 sock_release(sock1);
1705 goto out;
bf3c23d1 1706 }
d73aa286 1707
d47cd945
DH
1708 err = security_socket_socketpair(sock1, sock2);
1709 if (unlikely(err)) {
1710 sock_release(sock2);
1711 sock_release(sock1);
1712 goto out;
1713 }
1714
016a266b
AV
1715 err = sock1->ops->socketpair(sock1, sock2);
1716 if (unlikely(err < 0)) {
1717 sock_release(sock2);
1718 sock_release(sock1);
1719 goto out;
28407630
AV
1720 }
1721
aab174f0 1722 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1723 if (IS_ERR(newfile1)) {
28407630 1724 err = PTR_ERR(newfile1);
016a266b
AV
1725 sock_release(sock2);
1726 goto out;
28407630
AV
1727 }
1728
aab174f0 1729 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1730 if (IS_ERR(newfile2)) {
1731 err = PTR_ERR(newfile2);
016a266b
AV
1732 fput(newfile1);
1733 goto out;
db349509
AV
1734 }
1735
157cf649 1736 audit_fd_pair(fd1, fd2);
d73aa286 1737
db349509
AV
1738 fd_install(fd1, newfile1);
1739 fd_install(fd2, newfile2);
d73aa286 1740 return 0;
1da177e4 1741
016a266b 1742out:
d73aa286 1743 put_unused_fd(fd2);
d73aa286 1744 put_unused_fd(fd1);
1da177e4
LT
1745 return err;
1746}
1747
6debc8d8
DB
1748SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1749 int __user *, usockvec)
1750{
1751 return __sys_socketpair(family, type, protocol, usockvec);
1752}
1753
1da177e4
LT
1754/*
1755 * Bind a name to a socket. Nothing much to do here since it's
1756 * the protocol's responsibility to handle the local address.
1757 *
1758 * We move the socket address to kernel space before we call
1759 * the protocol layer (having also checked the address is ok).
1760 */
1761
a87d35d8 1762int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1763{
1764 struct socket *sock;
230b1839 1765 struct sockaddr_storage address;
6cb153ca 1766 int err, fput_needed;
1da177e4 1767
89bddce5 1768 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1769 if (sock) {
43db362d 1770 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1771 if (!err) {
89bddce5 1772 err = security_socket_bind(sock,
230b1839 1773 (struct sockaddr *)&address,
89bddce5 1774 addrlen);
6cb153ca
BL
1775 if (!err)
1776 err = sock->ops->bind(sock,
89bddce5 1777 (struct sockaddr *)
230b1839 1778 &address, addrlen);
1da177e4 1779 }
6cb153ca 1780 fput_light(sock->file, fput_needed);
89bddce5 1781 }
1da177e4
LT
1782 return err;
1783}
1784
a87d35d8
DB
1785SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1786{
1787 return __sys_bind(fd, umyaddr, addrlen);
1788}
1789
1da177e4
LT
1790/*
1791 * Perform a listen. Basically, we allow the protocol to do anything
1792 * necessary for a listen, and if that works, we mark the socket as
1793 * ready for listening.
1794 */
1795
25e290ee 1796int __sys_listen(int fd, int backlog)
1da177e4
LT
1797{
1798 struct socket *sock;
6cb153ca 1799 int err, fput_needed;
b8e1f9b5 1800 int somaxconn;
89bddce5
SH
1801
1802 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1803 if (sock) {
3c9ba81d 1804 somaxconn = READ_ONCE(sock_net(sock->sk)->core.sysctl_somaxconn);
95c96174 1805 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1806 backlog = somaxconn;
1da177e4
LT
1807
1808 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1809 if (!err)
1810 err = sock->ops->listen(sock, backlog);
1da177e4 1811
6cb153ca 1812 fput_light(sock->file, fput_needed);
1da177e4
LT
1813 }
1814 return err;
1815}
1816
25e290ee
DB
1817SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1818{
1819 return __sys_listen(fd, backlog);
1820}
1821
d32f89da 1822struct file *do_accept(struct file *file, unsigned file_flags,
de2ea4b6 1823 struct sockaddr __user *upeer_sockaddr,
d32f89da 1824 int __user *upeer_addrlen, int flags)
1da177e4
LT
1825{
1826 struct socket *sock, *newsock;
39d8c1b6 1827 struct file *newfile;
d32f89da 1828 int err, len;
230b1839 1829 struct sockaddr_storage address;
1da177e4 1830
dba4a925 1831 sock = sock_from_file(file);
d32f89da
PB
1832 if (!sock)
1833 return ERR_PTR(-ENOTSOCK);
1da177e4 1834
c6d409cf
ED
1835 newsock = sock_alloc();
1836 if (!newsock)
d32f89da 1837 return ERR_PTR(-ENFILE);
1da177e4
LT
1838
1839 newsock->type = sock->type;
1840 newsock->ops = sock->ops;
1841
1da177e4
LT
1842 /*
1843 * We don't need try_module_get here, as the listening socket (sock)
1844 * has the protocol module (sock->ops->owner) held.
1845 */
1846 __module_get(newsock->ops->owner);
1847
aab174f0 1848 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
d32f89da
PB
1849 if (IS_ERR(newfile))
1850 return newfile;
39d8c1b6 1851
a79af59e
FF
1852 err = security_socket_accept(sock, newsock);
1853 if (err)
39d8c1b6 1854 goto out_fd;
a79af59e 1855
de2ea4b6
JA
1856 err = sock->ops->accept(sock, newsock, sock->file->f_flags | file_flags,
1857 false);
1da177e4 1858 if (err < 0)
39d8c1b6 1859 goto out_fd;
1da177e4
LT
1860
1861 if (upeer_sockaddr) {
9b2c45d4
DV
1862 len = newsock->ops->getname(newsock,
1863 (struct sockaddr *)&address, 2);
1864 if (len < 0) {
1da177e4 1865 err = -ECONNABORTED;
39d8c1b6 1866 goto out_fd;
1da177e4 1867 }
43db362d 1868 err = move_addr_to_user(&address,
230b1839 1869 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1870 if (err < 0)
39d8c1b6 1871 goto out_fd;
1da177e4
LT
1872 }
1873
1874 /* File flags are not inherited via accept() unlike another OSes. */
d32f89da 1875 return newfile;
39d8c1b6 1876out_fd:
9606a216 1877 fput(newfile);
d32f89da
PB
1878 return ERR_PTR(err);
1879}
1880
c0424532
YD
1881static int __sys_accept4_file(struct file *file, struct sockaddr __user *upeer_sockaddr,
1882 int __user *upeer_addrlen, int flags)
d32f89da
PB
1883{
1884 struct file *newfile;
1885 int newfd;
1886
1887 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1888 return -EINVAL;
1889
1890 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1891 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
de2ea4b6 1892
c0424532 1893 newfd = get_unused_fd_flags(flags);
d32f89da
PB
1894 if (unlikely(newfd < 0))
1895 return newfd;
1896
c0424532 1897 newfile = do_accept(file, 0, upeer_sockaddr, upeer_addrlen,
d32f89da
PB
1898 flags);
1899 if (IS_ERR(newfile)) {
1900 put_unused_fd(newfd);
1901 return PTR_ERR(newfile);
1902 }
1903 fd_install(newfd, newfile);
1904 return newfd;
de2ea4b6
JA
1905}
1906
1907/*
1908 * For accept, we attempt to create a new socket, set up the link
1909 * with the client, wake up the client, then return the new
1910 * connected fd. We collect the address of the connector in kernel
1911 * space and move it to user at the very end. This is unclean because
1912 * we open the socket then return an error.
1913 *
1914 * 1003.1g adds the ability to recvmsg() to query connection pending
1915 * status to recvmsg. We need to add that support in a way thats
1916 * clean when we restructure accept also.
1917 */
1918
1919int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1920 int __user *upeer_addrlen, int flags)
1921{
1922 int ret = -EBADF;
1923 struct fd f;
1924
1925 f = fdget(fd);
1926 if (f.file) {
c0424532
YD
1927 ret = __sys_accept4_file(f.file, upeer_sockaddr,
1928 upeer_addrlen, flags);
6b07edeb 1929 fdput(f);
de2ea4b6
JA
1930 }
1931
1932 return ret;
1da177e4
LT
1933}
1934
4541e805
DB
1935SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1936 int __user *, upeer_addrlen, int, flags)
1937{
1938 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1939}
1940
20f37034
HC
1941SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1942 int __user *, upeer_addrlen)
aaca0bdc 1943{
4541e805 1944 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1945}
1946
1da177e4
LT
1947/*
1948 * Attempt to connect to a socket with the server address. The address
1949 * is in user space so we verify it is OK and move it to kernel space.
1950 *
1951 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1952 * break bindings
1953 *
1954 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1955 * other SEQPACKET protocols that take time to connect() as it doesn't
1956 * include the -EINPROGRESS status for such sockets.
1957 */
1958
f499a021 1959int __sys_connect_file(struct file *file, struct sockaddr_storage *address,
bd3ded31 1960 int addrlen, int file_flags)
1da177e4
LT
1961{
1962 struct socket *sock;
bd3ded31 1963 int err;
1da177e4 1964
dba4a925
FR
1965 sock = sock_from_file(file);
1966 if (!sock) {
1967 err = -ENOTSOCK;
1da177e4 1968 goto out;
dba4a925 1969 }
1da177e4 1970
89bddce5 1971 err =
f499a021 1972 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4 1973 if (err)
bd3ded31 1974 goto out;
1da177e4 1975
f499a021 1976 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
bd3ded31 1977 sock->file->f_flags | file_flags);
1da177e4
LT
1978out:
1979 return err;
1980}
1981
bd3ded31
JA
1982int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1983{
1984 int ret = -EBADF;
1985 struct fd f;
1986
1987 f = fdget(fd);
1988 if (f.file) {
f499a021
JA
1989 struct sockaddr_storage address;
1990
1991 ret = move_addr_to_kernel(uservaddr, addrlen, &address);
1992 if (!ret)
1993 ret = __sys_connect_file(f.file, &address, addrlen, 0);
6b07edeb 1994 fdput(f);
bd3ded31
JA
1995 }
1996
1997 return ret;
1998}
1999
1387c2c2
DB
2000SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
2001 int, addrlen)
2002{
2003 return __sys_connect(fd, uservaddr, addrlen);
2004}
2005
1da177e4
LT
2006/*
2007 * Get the local address ('name') of a socket object. Move the obtained
2008 * name to user space.
2009 */
2010
8882a107
DB
2011int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
2012 int __user *usockaddr_len)
1da177e4
LT
2013{
2014 struct socket *sock;
230b1839 2015 struct sockaddr_storage address;
9b2c45d4 2016 int err, fput_needed;
89bddce5 2017
6cb153ca 2018 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
2019 if (!sock)
2020 goto out;
2021
2022 err = security_socket_getsockname(sock);
2023 if (err)
2024 goto out_put;
2025
9b2c45d4
DV
2026 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
2027 if (err < 0)
1da177e4 2028 goto out_put;
e44ef1d4 2029 /* "err" is actually length in this case */
9b2c45d4 2030 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
2031
2032out_put:
6cb153ca 2033 fput_light(sock->file, fput_needed);
1da177e4
LT
2034out:
2035 return err;
2036}
2037
8882a107
DB
2038SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
2039 int __user *, usockaddr_len)
2040{
2041 return __sys_getsockname(fd, usockaddr, usockaddr_len);
2042}
2043
1da177e4
LT
2044/*
2045 * Get the remote address ('name') of a socket object. Move the obtained
2046 * name to user space.
2047 */
2048
b21c8f83
DB
2049int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
2050 int __user *usockaddr_len)
1da177e4
LT
2051{
2052 struct socket *sock;
230b1839 2053 struct sockaddr_storage address;
9b2c45d4 2054 int err, fput_needed;
1da177e4 2055
89bddce5
SH
2056 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2057 if (sock != NULL) {
1da177e4
LT
2058 err = security_socket_getpeername(sock);
2059 if (err) {
6cb153ca 2060 fput_light(sock->file, fput_needed);
1da177e4
LT
2061 return err;
2062 }
2063
9b2c45d4
DV
2064 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
2065 if (err >= 0)
2066 /* "err" is actually length in this case */
2067 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 2068 usockaddr_len);
6cb153ca 2069 fput_light(sock->file, fput_needed);
1da177e4
LT
2070 }
2071 return err;
2072}
2073
b21c8f83
DB
2074SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
2075 int __user *, usockaddr_len)
2076{
2077 return __sys_getpeername(fd, usockaddr, usockaddr_len);
2078}
2079
1da177e4
LT
2080/*
2081 * Send a datagram to a given address. We move the address into kernel
2082 * space and check the user space data area is readable before invoking
2083 * the protocol.
2084 */
211b634b
DB
2085int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
2086 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
2087{
2088 struct socket *sock;
230b1839 2089 struct sockaddr_storage address;
1da177e4
LT
2090 int err;
2091 struct msghdr msg;
2092 struct iovec iov;
6cb153ca 2093 int fput_needed;
6cb153ca 2094
de4eda9d 2095 err = import_single_range(ITER_SOURCE, buff, len, &iov, &msg.msg_iter);
602bd0e9
AV
2096 if (unlikely(err))
2097 return err;
de0fa95c
PE
2098 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2099 if (!sock)
4387ff75 2100 goto out;
6cb153ca 2101
89bddce5 2102 msg.msg_name = NULL;
89bddce5
SH
2103 msg.msg_control = NULL;
2104 msg.msg_controllen = 0;
2105 msg.msg_namelen = 0;
7c701d92 2106 msg.msg_ubuf = NULL;
6cb153ca 2107 if (addr) {
43db362d 2108 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
2109 if (err < 0)
2110 goto out_put;
230b1839 2111 msg.msg_name = (struct sockaddr *)&address;
89bddce5 2112 msg.msg_namelen = addr_len;
1da177e4
LT
2113 }
2114 if (sock->file->f_flags & O_NONBLOCK)
2115 flags |= MSG_DONTWAIT;
2116 msg.msg_flags = flags;
d8725c86 2117 err = sock_sendmsg(sock, &msg);
1da177e4 2118
89bddce5 2119out_put:
de0fa95c 2120 fput_light(sock->file, fput_needed);
4387ff75 2121out:
1da177e4
LT
2122 return err;
2123}
2124
211b634b
DB
2125SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
2126 unsigned int, flags, struct sockaddr __user *, addr,
2127 int, addr_len)
2128{
2129 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
2130}
2131
1da177e4 2132/*
89bddce5 2133 * Send a datagram down a socket.
1da177e4
LT
2134 */
2135
3e0fa65f 2136SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 2137 unsigned int, flags)
1da177e4 2138{
211b634b 2139 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
2140}
2141
2142/*
89bddce5 2143 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
2144 * sender. We verify the buffers are writable and if needed move the
2145 * sender address from kernel to user space.
2146 */
7a09e1eb
DB
2147int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
2148 struct sockaddr __user *addr, int __user *addr_len)
1da177e4 2149{
1228b34c
ED
2150 struct sockaddr_storage address;
2151 struct msghdr msg = {
2152 /* Save some cycles and don't copy the address if not needed */
2153 .msg_name = addr ? (struct sockaddr *)&address : NULL,
2154 };
1da177e4
LT
2155 struct socket *sock;
2156 struct iovec iov;
89bddce5 2157 int err, err2;
6cb153ca
BL
2158 int fput_needed;
2159
de4eda9d 2160 err = import_single_range(ITER_DEST, ubuf, size, &iov, &msg.msg_iter);
602bd0e9
AV
2161 if (unlikely(err))
2162 return err;
de0fa95c 2163 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 2164 if (!sock)
de0fa95c 2165 goto out;
1da177e4 2166
1da177e4
LT
2167 if (sock->file->f_flags & O_NONBLOCK)
2168 flags |= MSG_DONTWAIT;
2da62906 2169 err = sock_recvmsg(sock, &msg, flags);
1da177e4 2170
89bddce5 2171 if (err >= 0 && addr != NULL) {
43db362d 2172 err2 = move_addr_to_user(&address,
230b1839 2173 msg.msg_namelen, addr, addr_len);
89bddce5
SH
2174 if (err2 < 0)
2175 err = err2;
1da177e4 2176 }
de0fa95c
PE
2177
2178 fput_light(sock->file, fput_needed);
4387ff75 2179out:
1da177e4
LT
2180 return err;
2181}
2182
7a09e1eb
DB
2183SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2184 unsigned int, flags, struct sockaddr __user *, addr,
2185 int __user *, addr_len)
2186{
2187 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2188}
2189
1da177e4 2190/*
89bddce5 2191 * Receive a datagram from a socket.
1da177e4
LT
2192 */
2193
b7c0ddf5
JG
2194SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2195 unsigned int, flags)
1da177e4 2196{
7a09e1eb 2197 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
2198}
2199
83f0c10b
FW
2200static bool sock_use_custom_sol_socket(const struct socket *sock)
2201{
a5ef058d 2202 return test_bit(SOCK_CUSTOM_SOCKOPT, &sock->flags);
83f0c10b
FW
2203}
2204
1da177e4
LT
2205/*
2206 * Set a socket option. Because we don't know the option lengths we have
2207 * to pass the user mode parameter for the protocols to sort out.
2208 */
a7b75c5a 2209int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
55db9c0e 2210 int optlen)
1da177e4 2211{
519a8a6c 2212 sockptr_t optval = USER_SOCKPTR(user_optval);
0d01da6a 2213 char *kernel_optval = NULL;
6cb153ca 2214 int err, fput_needed;
1da177e4
LT
2215 struct socket *sock;
2216
2217 if (optlen < 0)
2218 return -EINVAL;
89bddce5
SH
2219
2220 sock = sockfd_lookup_light(fd, &err, &fput_needed);
4a367299
CH
2221 if (!sock)
2222 return err;
1da177e4 2223
4a367299
CH
2224 err = security_socket_setsockopt(sock, level, optname);
2225 if (err)
2226 goto out_put;
0d01da6a 2227
55db9c0e
CH
2228 if (!in_compat_syscall())
2229 err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname,
a7b75c5a 2230 user_optval, &optlen,
55db9c0e 2231 &kernel_optval);
4a367299
CH
2232 if (err < 0)
2233 goto out_put;
2234 if (err > 0) {
2235 err = 0;
2236 goto out_put;
2237 }
0d01da6a 2238
a7b75c5a
CH
2239 if (kernel_optval)
2240 optval = KERNEL_SOCKPTR(kernel_optval);
4a367299 2241 if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock))
a7b75c5a 2242 err = sock_setsockopt(sock, level, optname, optval, optlen);
a44d9e72
CH
2243 else if (unlikely(!sock->ops->setsockopt))
2244 err = -EOPNOTSUPP;
4a367299
CH
2245 else
2246 err = sock->ops->setsockopt(sock, level, optname, optval,
89bddce5 2247 optlen);
a7b75c5a 2248 kfree(kernel_optval);
4a367299
CH
2249out_put:
2250 fput_light(sock->file, fput_needed);
1da177e4
LT
2251 return err;
2252}
2253
cc36dca0
DB
2254SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2255 char __user *, optval, int, optlen)
2256{
2257 return __sys_setsockopt(fd, level, optname, optval, optlen);
2258}
2259
9cacf81f
SF
2260INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
2261 int optname));
2262
1da177e4
LT
2263/*
2264 * Get a socket option. Because we don't know the option lengths we have
2265 * to pass a user mode parameter for the protocols to sort out.
2266 */
55db9c0e
CH
2267int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
2268 int __user *optlen)
1da177e4 2269{
6cb153ca 2270 int err, fput_needed;
1da177e4 2271 struct socket *sock;
0d01da6a 2272 int max_optlen;
1da177e4 2273
89bddce5 2274 sock = sockfd_lookup_light(fd, &err, &fput_needed);
d8a9b38f
CH
2275 if (!sock)
2276 return err;
2277
2278 err = security_socket_getsockopt(sock, level, optname);
2279 if (err)
2280 goto out_put;
1da177e4 2281
55db9c0e
CH
2282 if (!in_compat_syscall())
2283 max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
0d01da6a 2284
d8a9b38f
CH
2285 if (level == SOL_SOCKET)
2286 err = sock_getsockopt(sock, level, optname, optval, optlen);
a44d9e72
CH
2287 else if (unlikely(!sock->ops->getsockopt))
2288 err = -EOPNOTSUPP;
d8a9b38f
CH
2289 else
2290 err = sock->ops->getsockopt(sock, level, optname, optval,
89bddce5 2291 optlen);
0d01da6a 2292
55db9c0e
CH
2293 if (!in_compat_syscall())
2294 err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
2295 optval, optlen, max_optlen,
2296 err);
6cb153ca 2297out_put:
d8a9b38f 2298 fput_light(sock->file, fput_needed);
1da177e4
LT
2299 return err;
2300}
2301
13a2d70e
DB
2302SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2303 char __user *, optval, int __user *, optlen)
2304{
2305 return __sys_getsockopt(fd, level, optname, optval, optlen);
2306}
2307
1da177e4
LT
2308/*
2309 * Shutdown a socket.
2310 */
2311
b713c195
JA
2312int __sys_shutdown_sock(struct socket *sock, int how)
2313{
2314 int err;
2315
2316 err = security_socket_shutdown(sock, how);
2317 if (!err)
2318 err = sock->ops->shutdown(sock, how);
2319
2320 return err;
2321}
2322
005a1aea 2323int __sys_shutdown(int fd, int how)
1da177e4 2324{
6cb153ca 2325 int err, fput_needed;
1da177e4
LT
2326 struct socket *sock;
2327
89bddce5
SH
2328 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2329 if (sock != NULL) {
b713c195 2330 err = __sys_shutdown_sock(sock, how);
6cb153ca 2331 fput_light(sock->file, fput_needed);
1da177e4
LT
2332 }
2333 return err;
2334}
2335
005a1aea
DB
2336SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2337{
2338 return __sys_shutdown(fd, how);
2339}
2340
89bddce5 2341/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2342 * fields which are the same type (int / unsigned) on our platforms.
2343 */
2344#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2345#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2346#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2347
c71d8ebe
TH
2348struct used_address {
2349 struct sockaddr_storage name;
2350 unsigned int name_len;
2351};
2352
7fa875b8
DY
2353int __copy_msghdr(struct msghdr *kmsg,
2354 struct user_msghdr *msg,
2355 struct sockaddr __user **save_addr)
1661bf36 2356{
08adb7da
AV
2357 ssize_t err;
2358
1f466e1f 2359 kmsg->msg_control_is_user = true;
1228b34c 2360 kmsg->msg_get_inq = 0;
7fa875b8
DY
2361 kmsg->msg_control_user = msg->msg_control;
2362 kmsg->msg_controllen = msg->msg_controllen;
2363 kmsg->msg_flags = msg->msg_flags;
ffb07550 2364
7fa875b8
DY
2365 kmsg->msg_namelen = msg->msg_namelen;
2366 if (!msg->msg_name)
6a2a2b3a
AS
2367 kmsg->msg_namelen = 0;
2368
dbb490b9
ML
2369 if (kmsg->msg_namelen < 0)
2370 return -EINVAL;
2371
1661bf36 2372 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2373 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2374
2375 if (save_addr)
7fa875b8 2376 *save_addr = msg->msg_name;
08adb7da 2377
7fa875b8 2378 if (msg->msg_name && kmsg->msg_namelen) {
08adb7da 2379 if (!save_addr) {
7fa875b8 2380 err = move_addr_to_kernel(msg->msg_name,
864d9664 2381 kmsg->msg_namelen,
08adb7da
AV
2382 kmsg->msg_name);
2383 if (err < 0)
2384 return err;
2385 }
2386 } else {
2387 kmsg->msg_name = NULL;
2388 kmsg->msg_namelen = 0;
2389 }
2390
7fa875b8 2391 if (msg->msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2392 return -EMSGSIZE;
2393
0345f931 2394 kmsg->msg_iocb = NULL;
7c701d92 2395 kmsg->msg_ubuf = NULL;
0a384abf
JA
2396 return 0;
2397}
2398
2399static int copy_msghdr_from_user(struct msghdr *kmsg,
2400 struct user_msghdr __user *umsg,
2401 struct sockaddr __user **save_addr,
2402 struct iovec **iov)
2403{
2404 struct user_msghdr msg;
2405 ssize_t err;
2406
7fa875b8
DY
2407 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
2408 return -EFAULT;
2409
2410 err = __copy_msghdr(kmsg, &msg, save_addr);
0a384abf
JA
2411 if (err)
2412 return err;
0345f931 2413
de4eda9d 2414 err = import_iovec(save_addr ? ITER_DEST : ITER_SOURCE,
ffb07550 2415 msg.msg_iov, msg.msg_iovlen,
da184284 2416 UIO_FASTIOV, iov, &kmsg->msg_iter);
87e5e6da 2417 return err < 0 ? err : 0;
1661bf36
DC
2418}
2419
4257c8ca
JA
2420static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
2421 unsigned int flags, struct used_address *used_address,
2422 unsigned int allowed_msghdr_flags)
1da177e4 2423{
b9d717a7 2424 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2425 __aligned(sizeof(__kernel_size_t));
89bddce5 2426 /* 20 is size of ipv6_pktinfo */
1da177e4 2427 unsigned char *ctl_buf = ctl;
d8725c86 2428 int ctl_len;
08adb7da 2429 ssize_t err;
89bddce5 2430
1da177e4
LT
2431 err = -ENOBUFS;
2432
228e548e 2433 if (msg_sys->msg_controllen > INT_MAX)
4257c8ca 2434 goto out;
28a94d8f 2435 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2436 ctl_len = msg_sys->msg_controllen;
1da177e4 2437 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2438 err =
228e548e 2439 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2440 sizeof(ctl));
1da177e4 2441 if (err)
4257c8ca 2442 goto out;
228e548e
AB
2443 ctl_buf = msg_sys->msg_control;
2444 ctl_len = msg_sys->msg_controllen;
1da177e4 2445 } else if (ctl_len) {
ac4340fc
DM
2446 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2447 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2448 if (ctl_len > sizeof(ctl)) {
1da177e4 2449 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2450 if (ctl_buf == NULL)
4257c8ca 2451 goto out;
1da177e4
LT
2452 }
2453 err = -EFAULT;
1f466e1f 2454 if (copy_from_user(ctl_buf, msg_sys->msg_control_user, ctl_len))
1da177e4 2455 goto out_freectl;
228e548e 2456 msg_sys->msg_control = ctl_buf;
1f466e1f 2457 msg_sys->msg_control_is_user = false;
1da177e4 2458 }
228e548e 2459 msg_sys->msg_flags = flags;
1da177e4
LT
2460
2461 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2462 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2463 /*
2464 * If this is sendmmsg() and current destination address is same as
2465 * previously succeeded address, omit asking LSM's decision.
2466 * used_address->name_len is initialized to UINT_MAX so that the first
2467 * destination address never matches.
2468 */
bc909d9d
MD
2469 if (used_address && msg_sys->msg_name &&
2470 used_address->name_len == msg_sys->msg_namelen &&
2471 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2472 used_address->name_len)) {
d8725c86 2473 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2474 goto out_freectl;
2475 }
d8725c86 2476 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2477 /*
2478 * If this is sendmmsg() and sending to current destination address was
2479 * successful, remember it.
2480 */
2481 if (used_address && err >= 0) {
2482 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2483 if (msg_sys->msg_name)
2484 memcpy(&used_address->name, msg_sys->msg_name,
2485 used_address->name_len);
c71d8ebe 2486 }
1da177e4
LT
2487
2488out_freectl:
89bddce5 2489 if (ctl_buf != ctl)
1da177e4 2490 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
4257c8ca
JA
2491out:
2492 return err;
2493}
2494
03b1230c
JA
2495int sendmsg_copy_msghdr(struct msghdr *msg,
2496 struct user_msghdr __user *umsg, unsigned flags,
2497 struct iovec **iov)
4257c8ca
JA
2498{
2499 int err;
2500
2501 if (flags & MSG_CMSG_COMPAT) {
2502 struct compat_msghdr __user *msg_compat;
2503
2504 msg_compat = (struct compat_msghdr __user *) umsg;
2505 err = get_compat_msghdr(msg, msg_compat, NULL, iov);
2506 } else {
2507 err = copy_msghdr_from_user(msg, umsg, NULL, iov);
2508 }
2509 if (err < 0)
2510 return err;
2511
2512 return 0;
2513}
2514
2515static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
2516 struct msghdr *msg_sys, unsigned int flags,
2517 struct used_address *used_address,
2518 unsigned int allowed_msghdr_flags)
2519{
2520 struct sockaddr_storage address;
2521 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2522 ssize_t err;
2523
2524 msg_sys->msg_name = &address;
2525
2526 err = sendmsg_copy_msghdr(msg_sys, msg, flags, &iov);
2527 if (err < 0)
2528 return err;
2529
2530 err = ____sys_sendmsg(sock, msg_sys, flags, used_address,
2531 allowed_msghdr_flags);
da184284 2532 kfree(iov);
228e548e
AB
2533 return err;
2534}
2535
2536/*
2537 * BSD sendmsg interface
2538 */
03b1230c 2539long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
0fa03c62
JA
2540 unsigned int flags)
2541{
03b1230c 2542 return ____sys_sendmsg(sock, msg, flags, NULL, 0);
0fa03c62 2543}
228e548e 2544
e1834a32
DB
2545long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2546 bool forbid_cmsg_compat)
228e548e
AB
2547{
2548 int fput_needed, err;
2549 struct msghdr msg_sys;
1be374a0
AL
2550 struct socket *sock;
2551
e1834a32
DB
2552 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2553 return -EINVAL;
2554
1be374a0 2555 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2556 if (!sock)
2557 goto out;
2558
28a94d8f 2559 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2560
6cb153ca 2561 fput_light(sock->file, fput_needed);
89bddce5 2562out:
1da177e4
LT
2563 return err;
2564}
2565
666547ff 2566SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2567{
e1834a32 2568 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2569}
2570
228e548e
AB
2571/*
2572 * Linux sendmmsg interface
2573 */
2574
2575int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2576 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2577{
2578 int fput_needed, err, datagrams;
2579 struct socket *sock;
2580 struct mmsghdr __user *entry;
2581 struct compat_mmsghdr __user *compat_entry;
2582 struct msghdr msg_sys;
c71d8ebe 2583 struct used_address used_address;
f092276d 2584 unsigned int oflags = flags;
228e548e 2585
e1834a32
DB
2586 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2587 return -EINVAL;
2588
98382f41
AB
2589 if (vlen > UIO_MAXIOV)
2590 vlen = UIO_MAXIOV;
228e548e
AB
2591
2592 datagrams = 0;
2593
2594 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2595 if (!sock)
2596 return err;
2597
c71d8ebe 2598 used_address.name_len = UINT_MAX;
228e548e
AB
2599 entry = mmsg;
2600 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2601 err = 0;
f092276d 2602 flags |= MSG_BATCH;
228e548e
AB
2603
2604 while (datagrams < vlen) {
f092276d
TH
2605 if (datagrams == vlen - 1)
2606 flags = oflags;
2607
228e548e 2608 if (MSG_CMSG_COMPAT & flags) {
666547ff 2609 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2610 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2611 if (err < 0)
2612 break;
2613 err = __put_user(err, &compat_entry->msg_len);
2614 ++compat_entry;
2615 } else {
a7526eb5 2616 err = ___sys_sendmsg(sock,
666547ff 2617 (struct user_msghdr __user *)entry,
28a94d8f 2618 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2619 if (err < 0)
2620 break;
2621 err = put_user(err, &entry->msg_len);
2622 ++entry;
2623 }
2624
2625 if (err)
2626 break;
2627 ++datagrams;
3023898b
SHY
2628 if (msg_data_left(&msg_sys))
2629 break;
a78cb84c 2630 cond_resched();
228e548e
AB
2631 }
2632
228e548e
AB
2633 fput_light(sock->file, fput_needed);
2634
728ffb86
AB
2635 /* We only return an error if no datagrams were able to be sent */
2636 if (datagrams != 0)
228e548e
AB
2637 return datagrams;
2638
228e548e
AB
2639 return err;
2640}
2641
2642SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2643 unsigned int, vlen, unsigned int, flags)
2644{
e1834a32 2645 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2646}
2647
03b1230c
JA
2648int recvmsg_copy_msghdr(struct msghdr *msg,
2649 struct user_msghdr __user *umsg, unsigned flags,
2650 struct sockaddr __user **uaddr,
2651 struct iovec **iov)
1da177e4 2652{
08adb7da 2653 ssize_t err;
1da177e4 2654
4257c8ca
JA
2655 if (MSG_CMSG_COMPAT & flags) {
2656 struct compat_msghdr __user *msg_compat;
1da177e4 2657
4257c8ca
JA
2658 msg_compat = (struct compat_msghdr __user *) umsg;
2659 err = get_compat_msghdr(msg, msg_compat, uaddr, iov);
2660 } else {
2661 err = copy_msghdr_from_user(msg, umsg, uaddr, iov);
2662 }
1da177e4 2663 if (err < 0)
da184284 2664 return err;
1da177e4 2665
4257c8ca
JA
2666 return 0;
2667}
2668
2669static int ____sys_recvmsg(struct socket *sock, struct msghdr *msg_sys,
2670 struct user_msghdr __user *msg,
2671 struct sockaddr __user *uaddr,
2672 unsigned int flags, int nosec)
2673{
2674 struct compat_msghdr __user *msg_compat =
2675 (struct compat_msghdr __user *) msg;
2676 int __user *uaddr_len = COMPAT_NAMELEN(msg);
2677 struct sockaddr_storage addr;
2678 unsigned long cmsg_ptr;
2679 int len;
2680 ssize_t err;
2681
2682 msg_sys->msg_name = &addr;
a2e27255
ACM
2683 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2684 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2685
f3d33426
HFS
2686 /* We assume all kernel code knows the size of sockaddr_storage */
2687 msg_sys->msg_namelen = 0;
2688
1da177e4
LT
2689 if (sock->file->f_flags & O_NONBLOCK)
2690 flags |= MSG_DONTWAIT;
1af66221
ED
2691
2692 if (unlikely(nosec))
2693 err = sock_recvmsg_nosec(sock, msg_sys, flags);
2694 else
2695 err = sock_recvmsg(sock, msg_sys, flags);
2696
1da177e4 2697 if (err < 0)
4257c8ca 2698 goto out;
1da177e4
LT
2699 len = err;
2700
2701 if (uaddr != NULL) {
43db362d 2702 err = move_addr_to_user(&addr,
a2e27255 2703 msg_sys->msg_namelen, uaddr,
89bddce5 2704 uaddr_len);
1da177e4 2705 if (err < 0)
4257c8ca 2706 goto out;
1da177e4 2707 }
a2e27255 2708 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2709 COMPAT_FLAGS(msg));
1da177e4 2710 if (err)
4257c8ca 2711 goto out;
1da177e4 2712 if (MSG_CMSG_COMPAT & flags)
a2e27255 2713 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2714 &msg_compat->msg_controllen);
2715 else
a2e27255 2716 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2717 &msg->msg_controllen);
2718 if (err)
4257c8ca 2719 goto out;
1da177e4 2720 err = len;
4257c8ca
JA
2721out:
2722 return err;
2723}
2724
2725static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
2726 struct msghdr *msg_sys, unsigned int flags, int nosec)
2727{
2728 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2729 /* user mode address pointers */
2730 struct sockaddr __user *uaddr;
2731 ssize_t err;
2732
2733 err = recvmsg_copy_msghdr(msg_sys, msg, flags, &uaddr, &iov);
2734 if (err < 0)
2735 return err;
1da177e4 2736
4257c8ca 2737 err = ____sys_recvmsg(sock, msg_sys, msg, uaddr, flags, nosec);
da184284 2738 kfree(iov);
a2e27255
ACM
2739 return err;
2740}
2741
2742/*
2743 * BSD recvmsg interface
2744 */
2745
03b1230c
JA
2746long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
2747 struct user_msghdr __user *umsg,
2748 struct sockaddr __user *uaddr, unsigned int flags)
aa1fa28f 2749{
03b1230c 2750 return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0);
aa1fa28f
JA
2751}
2752
e1834a32
DB
2753long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2754 bool forbid_cmsg_compat)
a2e27255
ACM
2755{
2756 int fput_needed, err;
2757 struct msghdr msg_sys;
1be374a0
AL
2758 struct socket *sock;
2759
e1834a32
DB
2760 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2761 return -EINVAL;
2762
1be374a0 2763 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2764 if (!sock)
2765 goto out;
2766
a7526eb5 2767 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2768
6cb153ca 2769 fput_light(sock->file, fput_needed);
1da177e4
LT
2770out:
2771 return err;
2772}
2773
666547ff 2774SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2775 unsigned int, flags)
2776{
e1834a32 2777 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2778}
2779
a2e27255
ACM
2780/*
2781 * Linux recvmmsg interface
2782 */
2783
e11d4284
AB
2784static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2785 unsigned int vlen, unsigned int flags,
2786 struct timespec64 *timeout)
a2e27255
ACM
2787{
2788 int fput_needed, err, datagrams;
2789 struct socket *sock;
2790 struct mmsghdr __user *entry;
d7256d0e 2791 struct compat_mmsghdr __user *compat_entry;
a2e27255 2792 struct msghdr msg_sys;
766b9f92
DD
2793 struct timespec64 end_time;
2794 struct timespec64 timeout64;
a2e27255
ACM
2795
2796 if (timeout &&
2797 poll_select_set_timeout(&end_time, timeout->tv_sec,
2798 timeout->tv_nsec))
2799 return -EINVAL;
2800
2801 datagrams = 0;
2802
2803 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2804 if (!sock)
2805 return err;
2806
7797dc41
SHY
2807 if (likely(!(flags & MSG_ERRQUEUE))) {
2808 err = sock_error(sock->sk);
2809 if (err) {
2810 datagrams = err;
2811 goto out_put;
2812 }
e623a9e9 2813 }
a2e27255
ACM
2814
2815 entry = mmsg;
d7256d0e 2816 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2817
2818 while (datagrams < vlen) {
2819 /*
2820 * No need to ask LSM for more than the first datagram.
2821 */
d7256d0e 2822 if (MSG_CMSG_COMPAT & flags) {
666547ff 2823 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2824 &msg_sys, flags & ~MSG_WAITFORONE,
2825 datagrams);
d7256d0e
JMG
2826 if (err < 0)
2827 break;
2828 err = __put_user(err, &compat_entry->msg_len);
2829 ++compat_entry;
2830 } else {
a7526eb5 2831 err = ___sys_recvmsg(sock,
666547ff 2832 (struct user_msghdr __user *)entry,
a7526eb5
AL
2833 &msg_sys, flags & ~MSG_WAITFORONE,
2834 datagrams);
d7256d0e
JMG
2835 if (err < 0)
2836 break;
2837 err = put_user(err, &entry->msg_len);
2838 ++entry;
2839 }
2840
a2e27255
ACM
2841 if (err)
2842 break;
a2e27255
ACM
2843 ++datagrams;
2844
71c5c159
BB
2845 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2846 if (flags & MSG_WAITFORONE)
2847 flags |= MSG_DONTWAIT;
2848
a2e27255 2849 if (timeout) {
766b9f92 2850 ktime_get_ts64(&timeout64);
c2e6c856 2851 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2852 if (timeout->tv_sec < 0) {
2853 timeout->tv_sec = timeout->tv_nsec = 0;
2854 break;
2855 }
2856
2857 /* Timeout, return less than vlen datagrams */
2858 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2859 break;
2860 }
2861
2862 /* Out of band data, return right away */
2863 if (msg_sys.msg_flags & MSG_OOB)
2864 break;
a78cb84c 2865 cond_resched();
a2e27255
ACM
2866 }
2867
a2e27255 2868 if (err == 0)
34b88a68
ACM
2869 goto out_put;
2870
2871 if (datagrams == 0) {
2872 datagrams = err;
2873 goto out_put;
2874 }
a2e27255 2875
34b88a68
ACM
2876 /*
2877 * We may return less entries than requested (vlen) if the
2878 * sock is non block and there aren't enough datagrams...
2879 */
2880 if (err != -EAGAIN) {
a2e27255 2881 /*
34b88a68
ACM
2882 * ... or if recvmsg returns an error after we
2883 * received some datagrams, where we record the
2884 * error to return on the next call or if the
2885 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2886 */
34b88a68 2887 sock->sk->sk_err = -err;
a2e27255 2888 }
34b88a68
ACM
2889out_put:
2890 fput_light(sock->file, fput_needed);
a2e27255 2891
34b88a68 2892 return datagrams;
a2e27255
ACM
2893}
2894
e11d4284
AB
2895int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2896 unsigned int vlen, unsigned int flags,
2897 struct __kernel_timespec __user *timeout,
2898 struct old_timespec32 __user *timeout32)
a2e27255
ACM
2899{
2900 int datagrams;
c2e6c856 2901 struct timespec64 timeout_sys;
a2e27255 2902
e11d4284
AB
2903 if (timeout && get_timespec64(&timeout_sys, timeout))
2904 return -EFAULT;
a2e27255 2905
e11d4284 2906 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2907 return -EFAULT;
2908
e11d4284
AB
2909 if (!timeout && !timeout32)
2910 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2911
2912 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 2913
e11d4284
AB
2914 if (datagrams <= 0)
2915 return datagrams;
2916
2917 if (timeout && put_timespec64(&timeout_sys, timeout))
2918 datagrams = -EFAULT;
2919
2920 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2921 datagrams = -EFAULT;
2922
2923 return datagrams;
2924}
2925
1255e269
DB
2926SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2927 unsigned int, vlen, unsigned int, flags,
c2e6c856 2928 struct __kernel_timespec __user *, timeout)
1255e269 2929{
e11d4284
AB
2930 if (flags & MSG_CMSG_COMPAT)
2931 return -EINVAL;
2932
2933 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
2934}
2935
2936#ifdef CONFIG_COMPAT_32BIT_TIME
2937SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2938 unsigned int, vlen, unsigned int, flags,
2939 struct old_timespec32 __user *, timeout)
2940{
2941 if (flags & MSG_CMSG_COMPAT)
2942 return -EINVAL;
2943
2944 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 2945}
e11d4284 2946#endif
1255e269 2947
a2e27255 2948#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2949/* Argument list sizes for sys_socketcall */
2950#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2951static const unsigned char nargs[21] = {
c6d409cf
ED
2952 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2953 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2954 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2955 AL(4), AL(5), AL(4)
89bddce5
SH
2956};
2957
1da177e4
LT
2958#undef AL
2959
2960/*
89bddce5 2961 * System call vectors.
1da177e4
LT
2962 *
2963 * Argument checking cleaned up. Saved 20% in size.
2964 * This function doesn't need to set the kernel lock because
89bddce5 2965 * it is set by the callees.
1da177e4
LT
2966 */
2967
3e0fa65f 2968SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2969{
2950fa9d 2970 unsigned long a[AUDITSC_ARGS];
89bddce5 2971 unsigned long a0, a1;
1da177e4 2972 int err;
47379052 2973 unsigned int len;
1da177e4 2974
228e548e 2975 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2976 return -EINVAL;
c8e8cd57 2977 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2978
47379052
AV
2979 len = nargs[call];
2980 if (len > sizeof(a))
2981 return -EINVAL;
2982
1da177e4 2983 /* copy_from_user should be SMP safe. */
47379052 2984 if (copy_from_user(a, args, len))
1da177e4 2985 return -EFAULT;
3ec3b2fb 2986
2950fa9d
CG
2987 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2988 if (err)
2989 return err;
3ec3b2fb 2990
89bddce5
SH
2991 a0 = a[0];
2992 a1 = a[1];
2993
2994 switch (call) {
2995 case SYS_SOCKET:
9d6a15c3 2996 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
2997 break;
2998 case SYS_BIND:
a87d35d8 2999 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
3000 break;
3001 case SYS_CONNECT:
1387c2c2 3002 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
3003 break;
3004 case SYS_LISTEN:
25e290ee 3005 err = __sys_listen(a0, a1);
89bddce5
SH
3006 break;
3007 case SYS_ACCEPT:
4541e805
DB
3008 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
3009 (int __user *)a[2], 0);
89bddce5
SH
3010 break;
3011 case SYS_GETSOCKNAME:
3012 err =
8882a107
DB
3013 __sys_getsockname(a0, (struct sockaddr __user *)a1,
3014 (int __user *)a[2]);
89bddce5
SH
3015 break;
3016 case SYS_GETPEERNAME:
3017 err =
b21c8f83
DB
3018 __sys_getpeername(a0, (struct sockaddr __user *)a1,
3019 (int __user *)a[2]);
89bddce5
SH
3020 break;
3021 case SYS_SOCKETPAIR:
6debc8d8 3022 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
3023 break;
3024 case SYS_SEND:
f3bf896b
DB
3025 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
3026 NULL, 0);
89bddce5
SH
3027 break;
3028 case SYS_SENDTO:
211b634b
DB
3029 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
3030 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
3031 break;
3032 case SYS_RECV:
d27e9afc
DB
3033 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
3034 NULL, NULL);
89bddce5
SH
3035 break;
3036 case SYS_RECVFROM:
7a09e1eb
DB
3037 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
3038 (struct sockaddr __user *)a[4],
3039 (int __user *)a[5]);
89bddce5
SH
3040 break;
3041 case SYS_SHUTDOWN:
005a1aea 3042 err = __sys_shutdown(a0, a1);
89bddce5
SH
3043 break;
3044 case SYS_SETSOCKOPT:
cc36dca0
DB
3045 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
3046 a[4]);
89bddce5
SH
3047 break;
3048 case SYS_GETSOCKOPT:
3049 err =
13a2d70e
DB
3050 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
3051 (int __user *)a[4]);
89bddce5
SH
3052 break;
3053 case SYS_SENDMSG:
e1834a32
DB
3054 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
3055 a[2], true);
89bddce5 3056 break;
228e548e 3057 case SYS_SENDMMSG:
e1834a32
DB
3058 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
3059 a[3], true);
228e548e 3060 break;
89bddce5 3061 case SYS_RECVMSG:
e1834a32
DB
3062 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
3063 a[2], true);
89bddce5 3064 break;
a2e27255 3065 case SYS_RECVMMSG:
3ca47e95 3066 if (IS_ENABLED(CONFIG_64BIT))
e11d4284
AB
3067 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
3068 a[2], a[3],
3069 (struct __kernel_timespec __user *)a[4],
3070 NULL);
3071 else
3072 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
3073 a[2], a[3], NULL,
3074 (struct old_timespec32 __user *)a[4]);
a2e27255 3075 break;
de11defe 3076 case SYS_ACCEPT4:
4541e805
DB
3077 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
3078 (int __user *)a[2], a[3]);
aaca0bdc 3079 break;
89bddce5
SH
3080 default:
3081 err = -EINVAL;
3082 break;
1da177e4
LT
3083 }
3084 return err;
3085}
3086
89bddce5 3087#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 3088
55737fda
SH
3089/**
3090 * sock_register - add a socket protocol handler
3091 * @ops: description of protocol
3092 *
1da177e4
LT
3093 * This function is called by a protocol handler that wants to
3094 * advertise its address family, and have it linked into the
e793c0f7 3095 * socket interface. The value ops->family corresponds to the
55737fda 3096 * socket system call protocol family.
1da177e4 3097 */
f0fd27d4 3098int sock_register(const struct net_proto_family *ops)
1da177e4
LT
3099{
3100 int err;
3101
3102 if (ops->family >= NPROTO) {
3410f22e 3103 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
3104 return -ENOBUFS;
3105 }
55737fda
SH
3106
3107 spin_lock(&net_family_lock);
190683a9
ED
3108 if (rcu_dereference_protected(net_families[ops->family],
3109 lockdep_is_held(&net_family_lock)))
55737fda
SH
3110 err = -EEXIST;
3111 else {
cf778b00 3112 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
3113 err = 0;
3114 }
55737fda
SH
3115 spin_unlock(&net_family_lock);
3116
fe0bdbde 3117 pr_info("NET: Registered %s protocol family\n", pf_family_names[ops->family]);
1da177e4
LT
3118 return err;
3119}
c6d409cf 3120EXPORT_SYMBOL(sock_register);
1da177e4 3121
55737fda
SH
3122/**
3123 * sock_unregister - remove a protocol handler
3124 * @family: protocol family to remove
3125 *
1da177e4
LT
3126 * This function is called by a protocol handler that wants to
3127 * remove its address family, and have it unlinked from the
55737fda
SH
3128 * new socket creation.
3129 *
3130 * If protocol handler is a module, then it can use module reference
3131 * counts to protect against new references. If protocol handler is not
3132 * a module then it needs to provide its own protection in
3133 * the ops->create routine.
1da177e4 3134 */
f0fd27d4 3135void sock_unregister(int family)
1da177e4 3136{
f0fd27d4 3137 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 3138
55737fda 3139 spin_lock(&net_family_lock);
a9b3cd7f 3140 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
3141 spin_unlock(&net_family_lock);
3142
3143 synchronize_rcu();
3144
fe0bdbde 3145 pr_info("NET: Unregistered %s protocol family\n", pf_family_names[family]);
1da177e4 3146}
c6d409cf 3147EXPORT_SYMBOL(sock_unregister);
1da177e4 3148
bf2ae2e4
XL
3149bool sock_is_registered(int family)
3150{
66b51b0a 3151 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
3152}
3153
77d76ea3 3154static int __init sock_init(void)
1da177e4 3155{
b3e19d92 3156 int err;
2ca794e5
EB
3157 /*
3158 * Initialize the network sysctl infrastructure.
3159 */
3160 err = net_sysctl_init();
3161 if (err)
3162 goto out;
b3e19d92 3163
1da177e4 3164 /*
89bddce5 3165 * Initialize skbuff SLAB cache
1da177e4
LT
3166 */
3167 skb_init();
1da177e4
LT
3168
3169 /*
89bddce5 3170 * Initialize the protocols module.
1da177e4
LT
3171 */
3172
3173 init_inodecache();
b3e19d92
NP
3174
3175 err = register_filesystem(&sock_fs_type);
3176 if (err)
47260ba9 3177 goto out;
1da177e4 3178 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
3179 if (IS_ERR(sock_mnt)) {
3180 err = PTR_ERR(sock_mnt);
3181 goto out_mount;
3182 }
77d76ea3
AK
3183
3184 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
3185 */
3186
3187#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
3188 err = netfilter_init();
3189 if (err)
3190 goto out;
1da177e4 3191#endif
cbeb321a 3192
408eccce 3193 ptp_classifier_init();
c1f19b51 3194
b3e19d92
NP
3195out:
3196 return err;
3197
3198out_mount:
3199 unregister_filesystem(&sock_fs_type);
b3e19d92 3200 goto out;
1da177e4
LT
3201}
3202
77d76ea3
AK
3203core_initcall(sock_init); /* early initcall */
3204
1da177e4
LT
3205#ifdef CONFIG_PROC_FS
3206void socket_seq_show(struct seq_file *seq)
3207{
648845ab
TZ
3208 seq_printf(seq, "sockets: used %d\n",
3209 sock_inuse_get(seq->private));
1da177e4 3210}
89bddce5 3211#endif /* CONFIG_PROC_FS */
1da177e4 3212
29c49648
AB
3213/* Handle the fact that while struct ifreq has the same *layout* on
3214 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3215 * which are handled elsewhere, it still has different *size* due to
3216 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3217 * resulting in struct ifreq being 32 and 40 bytes respectively).
3218 * As a result, if the struct happens to be at the end of a page and
3219 * the next page isn't readable/writable, we get a fault. To prevent
3220 * that, copy back and forth to the full size.
3221 */
3222int get_user_ifreq(struct ifreq *ifr, void __user **ifrdata, void __user *arg)
7a229387 3223{
29c49648
AB
3224 if (in_compat_syscall()) {
3225 struct compat_ifreq *ifr32 = (struct compat_ifreq *)ifr;
7a229387 3226
29c49648
AB
3227 memset(ifr, 0, sizeof(*ifr));
3228 if (copy_from_user(ifr32, arg, sizeof(*ifr32)))
3229 return -EFAULT;
7a229387 3230
29c49648
AB
3231 if (ifrdata)
3232 *ifrdata = compat_ptr(ifr32->ifr_data);
7a229387 3233
29c49648
AB
3234 return 0;
3235 }
7a229387 3236
29c49648 3237 if (copy_from_user(ifr, arg, sizeof(*ifr)))
7a229387
AB
3238 return -EFAULT;
3239
29c49648
AB
3240 if (ifrdata)
3241 *ifrdata = ifr->ifr_data;
3242
7a229387
AB
3243 return 0;
3244}
29c49648 3245EXPORT_SYMBOL(get_user_ifreq);
7a229387 3246
29c49648 3247int put_user_ifreq(struct ifreq *ifr, void __user *arg)
7a229387 3248{
29c49648 3249 size_t size = sizeof(*ifr);
7a229387 3250
29c49648
AB
3251 if (in_compat_syscall())
3252 size = sizeof(struct compat_ifreq);
7a229387 3253
29c49648 3254 if (copy_to_user(arg, ifr, size))
7a229387
AB
3255 return -EFAULT;
3256
3a7da39d 3257 return 0;
7a229387 3258}
29c49648 3259EXPORT_SYMBOL(put_user_ifreq);
7a229387 3260
89bbfc95 3261#ifdef CONFIG_COMPAT
7a50a240
AB
3262static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3263{
7a50a240 3264 compat_uptr_t uptr32;
44c02a2c
AV
3265 struct ifreq ifr;
3266 void __user *saved;
3267 int err;
7a50a240 3268
29c49648 3269 if (get_user_ifreq(&ifr, NULL, uifr32))
7a50a240
AB
3270 return -EFAULT;
3271
3272 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3273 return -EFAULT;
3274
44c02a2c
AV
3275 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3276 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 3277
a554bf96 3278 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL, NULL);
44c02a2c
AV
3279 if (!err) {
3280 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
29c49648 3281 if (put_user_ifreq(&ifr, uifr32))
44c02a2c 3282 err = -EFAULT;
ccbd6a5a 3283 }
44c02a2c 3284 return err;
7a229387
AB
3285}
3286
590d4693
BH
3287/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3288static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3289 struct compat_ifreq __user *u_ifreq32)
7a229387 3290{
44c02a2c 3291 struct ifreq ifreq;
a554bf96 3292 void __user *data;
7a229387 3293
d0efb162
PC
3294 if (!is_socket_ioctl_cmd(cmd))
3295 return -ENOTTY;
a554bf96 3296 if (get_user_ifreq(&ifreq, &data, u_ifreq32))
7a229387 3297 return -EFAULT;
a554bf96 3298 ifreq.ifr_data = data;
7a229387 3299
a554bf96 3300 return dev_ioctl(net, cmd, &ifreq, data, NULL);
a2116ed2
AB
3301}
3302
6b96018b
AB
3303static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3304 unsigned int cmd, unsigned long arg)
3305{
3306 void __user *argp = compat_ptr(arg);
3307 struct sock *sk = sock->sk;
3308 struct net *net = sock_net(sk);
7a229387 3309
6b96018b 3310 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
88fc023f 3311 return sock_ioctl(file, cmd, (unsigned long)argp);
6b96018b
AB
3312
3313 switch (cmd) {
7a50a240
AB
3314 case SIOCWANDEV:
3315 return compat_siocwandev(net, argp);
0768e170
AB
3316 case SIOCGSTAMP_OLD:
3317 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
3318 if (!sock->ops->gettstamp)
3319 return -ENOIOCTLCMD;
0768e170 3320 return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
c7cbdbf2
AB
3321 !COMPAT_USE_64BIT_TIME);
3322
dd98d289 3323 case SIOCETHTOOL:
590d4693
BH
3324 case SIOCBONDSLAVEINFOQUERY:
3325 case SIOCBONDINFOQUERY:
a2116ed2 3326 case SIOCSHWTSTAMP:
fd468c74 3327 case SIOCGHWTSTAMP:
590d4693 3328 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3329
3330 case FIOSETOWN:
3331 case SIOCSPGRP:
3332 case FIOGETOWN:
3333 case SIOCGPGRP:
3334 case SIOCBRADDBR:
3335 case SIOCBRDELBR:
3336 case SIOCGIFVLAN:
3337 case SIOCSIFVLAN:
c62cce2c 3338 case SIOCGSKNS:
0768e170
AB
3339 case SIOCGSTAMP_NEW:
3340 case SIOCGSTAMPNS_NEW:
876f0bf9 3341 case SIOCGIFCONF:
fd3a4590
RP
3342 case SIOCSIFBR:
3343 case SIOCGIFBR:
6b96018b
AB
3344 return sock_ioctl(file, cmd, arg);
3345
3346 case SIOCGIFFLAGS:
3347 case SIOCSIFFLAGS:
709566d7
AB
3348 case SIOCGIFMAP:
3349 case SIOCSIFMAP:
6b96018b
AB
3350 case SIOCGIFMETRIC:
3351 case SIOCSIFMETRIC:
3352 case SIOCGIFMTU:
3353 case SIOCSIFMTU:
3354 case SIOCGIFMEM:
3355 case SIOCSIFMEM:
3356 case SIOCGIFHWADDR:
3357 case SIOCSIFHWADDR:
3358 case SIOCADDMULTI:
3359 case SIOCDELMULTI:
3360 case SIOCGIFINDEX:
6b96018b
AB
3361 case SIOCGIFADDR:
3362 case SIOCSIFADDR:
3363 case SIOCSIFHWBROADCAST:
6b96018b 3364 case SIOCDIFADDR:
6b96018b
AB
3365 case SIOCGIFBRDADDR:
3366 case SIOCSIFBRDADDR:
3367 case SIOCGIFDSTADDR:
3368 case SIOCSIFDSTADDR:
3369 case SIOCGIFNETMASK:
3370 case SIOCSIFNETMASK:
3371 case SIOCSIFPFLAGS:
3372 case SIOCGIFPFLAGS:
3373 case SIOCGIFTXQLEN:
3374 case SIOCSIFTXQLEN:
3375 case SIOCBRADDIF:
3376 case SIOCBRDELIF:
c6c9fee3 3377 case SIOCGIFNAME:
9177efd3
AB
3378 case SIOCSIFNAME:
3379 case SIOCGMIIPHY:
3380 case SIOCGMIIREG:
3381 case SIOCSMIIREG:
f92d4fc9
AV
3382 case SIOCBONDENSLAVE:
3383 case SIOCBONDRELEASE:
3384 case SIOCBONDSETHWADDR:
3385 case SIOCBONDCHANGEACTIVE:
6b96018b
AB
3386 case SIOCSARP:
3387 case SIOCGARP:
3388 case SIOCDARP:
c7dc504e 3389 case SIOCOUTQ:
9d7bf41f 3390 case SIOCOUTQNSD:
6b96018b 3391 case SIOCATMARK:
63ff03ab 3392 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3393 }
3394
6b96018b
AB
3395 return -ENOIOCTLCMD;
3396}
7a229387 3397
95c96174 3398static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3399 unsigned long arg)
89bbfc95
SP
3400{
3401 struct socket *sock = file->private_data;
3402 int ret = -ENOIOCTLCMD;
87de87d5
DM
3403 struct sock *sk;
3404 struct net *net;
3405
3406 sk = sock->sk;
3407 net = sock_net(sk);
89bbfc95
SP
3408
3409 if (sock->ops->compat_ioctl)
3410 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3411
87de87d5
DM
3412 if (ret == -ENOIOCTLCMD &&
3413 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3414 ret = compat_wext_handle_ioctl(net, cmd, arg);
3415
6b96018b
AB
3416 if (ret == -ENOIOCTLCMD)
3417 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3418
89bbfc95
SP
3419 return ret;
3420}
3421#endif
3422
8a3c245c
PT
3423/**
3424 * kernel_bind - bind an address to a socket (kernel space)
3425 * @sock: socket
3426 * @addr: address
3427 * @addrlen: length of address
3428 *
3429 * Returns 0 or an error.
3430 */
3431
ac5a488e
SS
3432int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3433{
3434 return sock->ops->bind(sock, addr, addrlen);
3435}
c6d409cf 3436EXPORT_SYMBOL(kernel_bind);
ac5a488e 3437
8a3c245c
PT
3438/**
3439 * kernel_listen - move socket to listening state (kernel space)
3440 * @sock: socket
3441 * @backlog: pending connections queue size
3442 *
3443 * Returns 0 or an error.
3444 */
3445
ac5a488e
SS
3446int kernel_listen(struct socket *sock, int backlog)
3447{
3448 return sock->ops->listen(sock, backlog);
3449}
c6d409cf 3450EXPORT_SYMBOL(kernel_listen);
ac5a488e 3451
8a3c245c
PT
3452/**
3453 * kernel_accept - accept a connection (kernel space)
3454 * @sock: listening socket
3455 * @newsock: new connected socket
3456 * @flags: flags
3457 *
3458 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3459 * If it fails, @newsock is guaranteed to be %NULL.
3460 * Returns 0 or an error.
3461 */
3462
ac5a488e
SS
3463int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3464{
3465 struct sock *sk = sock->sk;
3466 int err;
3467
3468 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3469 newsock);
3470 if (err < 0)
3471 goto done;
3472
cdfbabfb 3473 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3474 if (err < 0) {
3475 sock_release(*newsock);
fa8705b0 3476 *newsock = NULL;
ac5a488e
SS
3477 goto done;
3478 }
3479
3480 (*newsock)->ops = sock->ops;
1b08534e 3481 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3482
3483done:
3484 return err;
3485}
c6d409cf 3486EXPORT_SYMBOL(kernel_accept);
ac5a488e 3487
8a3c245c
PT
3488/**
3489 * kernel_connect - connect a socket (kernel space)
3490 * @sock: socket
3491 * @addr: address
3492 * @addrlen: address length
3493 * @flags: flags (O_NONBLOCK, ...)
3494 *
f1dcffcc 3495 * For datagram sockets, @addr is the address to which datagrams are sent
8a3c245c
PT
3496 * by default, and the only address from which datagrams are received.
3497 * For stream sockets, attempts to connect to @addr.
3498 * Returns 0 or an error code.
3499 */
3500
ac5a488e 3501int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3502 int flags)
ac5a488e
SS
3503{
3504 return sock->ops->connect(sock, addr, addrlen, flags);
3505}
c6d409cf 3506EXPORT_SYMBOL(kernel_connect);
ac5a488e 3507
8a3c245c
PT
3508/**
3509 * kernel_getsockname - get the address which the socket is bound (kernel space)
3510 * @sock: socket
3511 * @addr: address holder
3512 *
3513 * Fills the @addr pointer with the address which the socket is bound.
0fc95dec 3514 * Returns the length of the address in bytes or an error code.
8a3c245c
PT
3515 */
3516
9b2c45d4 3517int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3518{
9b2c45d4 3519 return sock->ops->getname(sock, addr, 0);
ac5a488e 3520}
c6d409cf 3521EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3522
8a3c245c 3523/**
645f0897 3524 * kernel_getpeername - get the address which the socket is connected (kernel space)
8a3c245c
PT
3525 * @sock: socket
3526 * @addr: address holder
3527 *
3528 * Fills the @addr pointer with the address which the socket is connected.
0fc95dec 3529 * Returns the length of the address in bytes or an error code.
8a3c245c
PT
3530 */
3531
9b2c45d4 3532int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3533{
9b2c45d4 3534 return sock->ops->getname(sock, addr, 1);
ac5a488e 3535}
c6d409cf 3536EXPORT_SYMBOL(kernel_getpeername);
ac5a488e 3537
8a3c245c
PT
3538/**
3539 * kernel_sendpage - send a &page through a socket (kernel space)
3540 * @sock: socket
3541 * @page: page
3542 * @offset: page offset
3543 * @size: total size in bytes
3544 * @flags: flags (MSG_DONTWAIT, ...)
3545 *
3546 * Returns the total amount sent in bytes or an error.
3547 */
3548
ac5a488e
SS
3549int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3550 size_t size, int flags)
3551{
7b62d31d
CL
3552 if (sock->ops->sendpage) {
3553 /* Warn in case the improper page to zero-copy send */
3554 WARN_ONCE(!sendpage_ok(page), "improper page for zero-copy send");
ac5a488e 3555 return sock->ops->sendpage(sock, page, offset, size, flags);
7b62d31d 3556 }
ac5a488e
SS
3557 return sock_no_sendpage(sock, page, offset, size, flags);
3558}
c6d409cf 3559EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3560
8a3c245c
PT
3561/**
3562 * kernel_sendpage_locked - send a &page through the locked sock (kernel space)
3563 * @sk: sock
3564 * @page: page
3565 * @offset: page offset
3566 * @size: total size in bytes
3567 * @flags: flags (MSG_DONTWAIT, ...)
3568 *
3569 * Returns the total amount sent in bytes or an error.
3570 * Caller must hold @sk.
3571 */
3572
306b13eb
TH
3573int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3574 size_t size, int flags)
3575{
3576 struct socket *sock = sk->sk_socket;
3577
3578 if (sock->ops->sendpage_locked)
3579 return sock->ops->sendpage_locked(sk, page, offset, size,
3580 flags);
3581
3582 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3583}
3584EXPORT_SYMBOL(kernel_sendpage_locked);
3585
8a3c245c 3586/**
645f0897 3587 * kernel_sock_shutdown - shut down part of a full-duplex connection (kernel space)
8a3c245c
PT
3588 * @sock: socket
3589 * @how: connection part
3590 *
3591 * Returns 0 or an error.
3592 */
3593
91cf45f0
TM
3594int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3595{
3596 return sock->ops->shutdown(sock, how);
3597}
91cf45f0 3598EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075 3599
8a3c245c
PT
3600/**
3601 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3602 * @sk: socket
3603 *
3604 * This routine returns the IP overhead imposed by a socket i.e.
3605 * the length of the underlying IP header, depending on whether
3606 * this is an IPv4 or IPv6 socket and the length from IP options turned
3607 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075 3608 */
8a3c245c 3609
113c3075
P
3610u32 kernel_sock_ip_overhead(struct sock *sk)
3611{
3612 struct inet_sock *inet;
3613 struct ip_options_rcu *opt;
3614 u32 overhead = 0;
113c3075
P
3615#if IS_ENABLED(CONFIG_IPV6)
3616 struct ipv6_pinfo *np;
3617 struct ipv6_txoptions *optv6 = NULL;
3618#endif /* IS_ENABLED(CONFIG_IPV6) */
3619
3620 if (!sk)
3621 return overhead;
3622
113c3075
P
3623 switch (sk->sk_family) {
3624 case AF_INET:
3625 inet = inet_sk(sk);
3626 overhead += sizeof(struct iphdr);
3627 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3628 sock_owned_by_user(sk));
113c3075
P
3629 if (opt)
3630 overhead += opt->opt.optlen;
3631 return overhead;
3632#if IS_ENABLED(CONFIG_IPV6)
3633 case AF_INET6:
3634 np = inet6_sk(sk);
3635 overhead += sizeof(struct ipv6hdr);
3636 if (np)
3637 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3638 sock_owned_by_user(sk));
113c3075
P
3639 if (optv6)
3640 overhead += (optv6->opt_flen + optv6->opt_nflen);
3641 return overhead;
3642#endif /* IS_ENABLED(CONFIG_IPV6) */
3643 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3644 return overhead;
3645 }
3646}
3647EXPORT_SYMBOL(kernel_sock_ip_overhead);