af_unix: Call scm_recv() only after scm_set_cred().
[linux-2.6-block.git] / net / socket.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * NET An implementation of the SOCKET network access protocol.
4 *
5 * Version: @(#)socket.c 1.1.93 18/02/95
6 *
7 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 8 * Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 *
11 * Fixes:
12 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
13 * shutdown()
14 * Alan Cox : verify_area() fixes
15 * Alan Cox : Removed DDI
16 * Jonathan Kamens : SOCK_DGRAM reconnect bug
17 * Alan Cox : Moved a load of checks to the very
18 * top level.
19 * Alan Cox : Move address structures to/from user
20 * mode above the protocol layers.
21 * Rob Janssen : Allow 0 length sends.
22 * Alan Cox : Asynchronous I/O support (cribbed from the
23 * tty drivers).
24 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
25 * Jeff Uphoff : Made max number of sockets command-line
26 * configurable.
27 * Matti Aarnio : Made the number of sockets dynamic,
28 * to be allocated when needed, and mr.
29 * Uphoff's max is used as max to be
30 * allowed to allocate.
31 * Linus : Argh. removed all the socket allocation
32 * altogether: it's in the inode now.
33 * Alan Cox : Made sock_alloc()/sock_release() public
34 * for NetROM and future kernel nfsd type
35 * stuff.
36 * Alan Cox : sendmsg/recvmsg basics.
37 * Tom Dyas : Export net symbols.
38 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
39 * Alan Cox : Added thread locking to sys_* calls
40 * for sockets. May have errors at the
41 * moment.
42 * Kevin Buhr : Fixed the dumb errors in the above.
43 * Andi Kleen : Some small cleanups, optimizations,
44 * and fixed a copy_from_user() bug.
45 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 46 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
47 * protocol-independent
48 *
1da177e4 49 * This module is effectively the top level interface to the BSD socket
89bddce5 50 * paradigm.
1da177e4
LT
51 *
52 * Based upon Swansea University Computer Society NET3.039
53 */
54
aef2feda 55#include <linux/bpf-cgroup.h>
cc69837f 56#include <linux/ethtool.h>
1da177e4 57#include <linux/mm.h>
1da177e4
LT
58#include <linux/socket.h>
59#include <linux/file.h>
2dc334f1 60#include <linux/splice.h>
1da177e4
LT
61#include <linux/net.h>
62#include <linux/interrupt.h>
aaca0bdc 63#include <linux/thread_info.h>
55737fda 64#include <linux/rcupdate.h>
1da177e4
LT
65#include <linux/netdevice.h>
66#include <linux/proc_fs.h>
67#include <linux/seq_file.h>
4a3e2f71 68#include <linux/mutex.h>
1da177e4 69#include <linux/if_bridge.h>
20380731 70#include <linux/if_vlan.h>
408eccce 71#include <linux/ptp_classify.h>
1da177e4
LT
72#include <linux/init.h>
73#include <linux/poll.h>
74#include <linux/cache.h>
75#include <linux/module.h>
76#include <linux/highmem.h>
1da177e4 77#include <linux/mount.h>
fba9be49 78#include <linux/pseudo_fs.h>
1da177e4
LT
79#include <linux/security.h>
80#include <linux/syscalls.h>
81#include <linux/compat.h>
82#include <linux/kmod.h>
3ec3b2fb 83#include <linux/audit.h>
d86b5e0e 84#include <linux/wireless.h>
1b8d7ae4 85#include <linux/nsproxy.h>
1fd7317d 86#include <linux/magic.h>
5a0e3ad6 87#include <linux/slab.h>
600e1779 88#include <linux/xattr.h>
c8e8cd57 89#include <linux/nospec.h>
8c3c447b 90#include <linux/indirect_call_wrapper.h>
1da177e4 91
7c0f6ba6 92#include <linux/uaccess.h>
1da177e4
LT
93#include <asm/unistd.h>
94
95#include <net/compat.h>
87de87d5 96#include <net/wext.h>
f8451725 97#include <net/cls_cgroup.h>
1da177e4
LT
98
99#include <net/sock.h>
100#include <linux/netfilter.h>
101
6b96018b
AB
102#include <linux/if_tun.h>
103#include <linux/ipv6_route.h>
104#include <linux/route.h>
c7dc504e 105#include <linux/termios.h>
6b96018b 106#include <linux/sockios.h>
076bb0c8 107#include <net/busy_poll.h>
f24b9be5 108#include <linux/errqueue.h>
d7c08826 109#include <linux/ptp_clock_kernel.h>
6e6eda44 110#include <trace/events/sock.h>
06021292 111
e0d1095a 112#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
113unsigned int sysctl_net_busy_read __read_mostly;
114unsigned int sysctl_net_busy_poll __read_mostly;
06021292 115#endif
6b96018b 116
8ae5e030
AV
117static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
118static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 119static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
120
121static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
122static __poll_t sock_poll(struct file *file,
123 struct poll_table_struct *wait);
89bddce5 124static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
125#ifdef CONFIG_COMPAT
126static long compat_sock_ioctl(struct file *file,
89bddce5 127 unsigned int cmd, unsigned long arg);
89bbfc95 128#endif
1da177e4 129static int sock_fasync(int fd, struct file *filp, int on);
9c55e01c 130static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 131 struct pipe_inode_info *pipe, size_t len,
9c55e01c 132 unsigned int flags);
2bfc6685 133static void sock_splice_eof(struct file *file);
542d3065
AB
134
135#ifdef CONFIG_PROC_FS
136static void sock_show_fdinfo(struct seq_file *m, struct file *f)
137{
138 struct socket *sock = f->private_data;
139
140 if (sock->ops->show_fdinfo)
141 sock->ops->show_fdinfo(m, sock);
142}
143#else
144#define sock_show_fdinfo NULL
145#endif
1da177e4 146
1da177e4
LT
147/*
148 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
149 * in the operation structures but are done directly via the socketcall() multiplexor.
150 */
151
da7071d7 152static const struct file_operations socket_file_ops = {
1da177e4
LT
153 .owner = THIS_MODULE,
154 .llseek = no_llseek,
8ae5e030
AV
155 .read_iter = sock_read_iter,
156 .write_iter = sock_write_iter,
1da177e4
LT
157 .poll = sock_poll,
158 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
159#ifdef CONFIG_COMPAT
160 .compat_ioctl = compat_sock_ioctl,
161#endif
1da177e4 162 .mmap = sock_mmap,
1da177e4
LT
163 .release = sock_close,
164 .fasync = sock_fasync,
2dc334f1 165 .splice_write = splice_to_socket,
9c55e01c 166 .splice_read = sock_splice_read,
2bfc6685 167 .splice_eof = sock_splice_eof,
b4653342 168 .show_fdinfo = sock_show_fdinfo,
1da177e4
LT
169};
170
fe0bdbde
YD
171static const char * const pf_family_names[] = {
172 [PF_UNSPEC] = "PF_UNSPEC",
173 [PF_UNIX] = "PF_UNIX/PF_LOCAL",
174 [PF_INET] = "PF_INET",
175 [PF_AX25] = "PF_AX25",
176 [PF_IPX] = "PF_IPX",
177 [PF_APPLETALK] = "PF_APPLETALK",
178 [PF_NETROM] = "PF_NETROM",
179 [PF_BRIDGE] = "PF_BRIDGE",
180 [PF_ATMPVC] = "PF_ATMPVC",
181 [PF_X25] = "PF_X25",
182 [PF_INET6] = "PF_INET6",
183 [PF_ROSE] = "PF_ROSE",
184 [PF_DECnet] = "PF_DECnet",
185 [PF_NETBEUI] = "PF_NETBEUI",
186 [PF_SECURITY] = "PF_SECURITY",
187 [PF_KEY] = "PF_KEY",
188 [PF_NETLINK] = "PF_NETLINK/PF_ROUTE",
189 [PF_PACKET] = "PF_PACKET",
190 [PF_ASH] = "PF_ASH",
191 [PF_ECONET] = "PF_ECONET",
192 [PF_ATMSVC] = "PF_ATMSVC",
193 [PF_RDS] = "PF_RDS",
194 [PF_SNA] = "PF_SNA",
195 [PF_IRDA] = "PF_IRDA",
196 [PF_PPPOX] = "PF_PPPOX",
197 [PF_WANPIPE] = "PF_WANPIPE",
198 [PF_LLC] = "PF_LLC",
199 [PF_IB] = "PF_IB",
200 [PF_MPLS] = "PF_MPLS",
201 [PF_CAN] = "PF_CAN",
202 [PF_TIPC] = "PF_TIPC",
203 [PF_BLUETOOTH] = "PF_BLUETOOTH",
204 [PF_IUCV] = "PF_IUCV",
205 [PF_RXRPC] = "PF_RXRPC",
206 [PF_ISDN] = "PF_ISDN",
207 [PF_PHONET] = "PF_PHONET",
208 [PF_IEEE802154] = "PF_IEEE802154",
209 [PF_CAIF] = "PF_CAIF",
210 [PF_ALG] = "PF_ALG",
211 [PF_NFC] = "PF_NFC",
212 [PF_VSOCK] = "PF_VSOCK",
213 [PF_KCM] = "PF_KCM",
214 [PF_QIPCRTR] = "PF_QIPCRTR",
215 [PF_SMC] = "PF_SMC",
216 [PF_XDP] = "PF_XDP",
bc49d816 217 [PF_MCTP] = "PF_MCTP",
fe0bdbde
YD
218};
219
1da177e4
LT
220/*
221 * The protocol list. Each protocol is registered in here.
222 */
223
1da177e4 224static DEFINE_SPINLOCK(net_family_lock);
190683a9 225static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 226
1da177e4 227/*
89bddce5
SH
228 * Support routines.
229 * Move socket addresses back and forth across the kernel/user
230 * divide and look after the messy bits.
1da177e4
LT
231 */
232
1da177e4
LT
233/**
234 * move_addr_to_kernel - copy a socket address into kernel space
235 * @uaddr: Address in user space
236 * @kaddr: Address in kernel space
237 * @ulen: Length in user space
238 *
239 * The address is copied into kernel space. If the provided address is
240 * too long an error code of -EINVAL is returned. If the copy gives
241 * invalid addresses -EFAULT is returned. On a success 0 is returned.
242 */
243
43db362d 244int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 245{
230b1839 246 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 247 return -EINVAL;
89bddce5 248 if (ulen == 0)
1da177e4 249 return 0;
89bddce5 250 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 251 return -EFAULT;
3ec3b2fb 252 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
253}
254
255/**
256 * move_addr_to_user - copy an address to user space
257 * @kaddr: kernel space address
258 * @klen: length of address in kernel
259 * @uaddr: user space address
260 * @ulen: pointer to user length field
261 *
262 * The value pointed to by ulen on entry is the buffer length available.
263 * This is overwritten with the buffer space used. -EINVAL is returned
264 * if an overlong buffer is specified or a negative buffer size. -EFAULT
265 * is returned if either the buffer or the length field are not
266 * accessible.
267 * After copying the data up to the limit the user specifies, the true
268 * length of the data is written over the length limit the user
269 * specified. Zero is returned for a success.
270 */
89bddce5 271
43db362d 272static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 273 void __user *uaddr, int __user *ulen)
1da177e4
LT
274{
275 int err;
276 int len;
277
68c6beb3 278 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
279 err = get_user(len, ulen);
280 if (err)
1da177e4 281 return err;
89bddce5
SH
282 if (len > klen)
283 len = klen;
68c6beb3 284 if (len < 0)
1da177e4 285 return -EINVAL;
89bddce5 286 if (len) {
d6fe3945
SG
287 if (audit_sockaddr(klen, kaddr))
288 return -ENOMEM;
89bddce5 289 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
290 return -EFAULT;
291 }
292 /*
89bddce5
SH
293 * "fromlen shall refer to the value before truncation.."
294 * 1003.1g
1da177e4
LT
295 */
296 return __put_user(klen, ulen);
297}
298
08009a76 299static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
300
301static struct inode *sock_alloc_inode(struct super_block *sb)
302{
303 struct socket_alloc *ei;
89bddce5 304
fd60b288 305 ei = alloc_inode_sb(sb, sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
306 if (!ei)
307 return NULL;
333f7909
AV
308 init_waitqueue_head(&ei->socket.wq.wait);
309 ei->socket.wq.fasync_list = NULL;
310 ei->socket.wq.flags = 0;
89bddce5 311
1da177e4
LT
312 ei->socket.state = SS_UNCONNECTED;
313 ei->socket.flags = 0;
314 ei->socket.ops = NULL;
315 ei->socket.sk = NULL;
316 ei->socket.file = NULL;
1da177e4
LT
317
318 return &ei->vfs_inode;
319}
320
6d7855c5 321static void sock_free_inode(struct inode *inode)
1da177e4 322{
43815482
ED
323 struct socket_alloc *ei;
324
325 ei = container_of(inode, struct socket_alloc, vfs_inode);
43815482 326 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
327}
328
51cc5068 329static void init_once(void *foo)
1da177e4 330{
89bddce5 331 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 332
a35afb83 333 inode_init_once(&ei->vfs_inode);
1da177e4 334}
89bddce5 335
1e911632 336static void init_inodecache(void)
1da177e4
LT
337{
338 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
339 sizeof(struct socket_alloc),
340 0,
341 (SLAB_HWCACHE_ALIGN |
342 SLAB_RECLAIM_ACCOUNT |
5d097056 343 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 344 init_once);
1e911632 345 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
346}
347
b87221de 348static const struct super_operations sockfs_ops = {
c6d409cf 349 .alloc_inode = sock_alloc_inode,
6d7855c5 350 .free_inode = sock_free_inode,
c6d409cf 351 .statfs = simple_statfs,
1da177e4
LT
352};
353
c23fbb6b
ED
354/*
355 * sockfs_dname() is called from d_path().
356 */
357static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
358{
0f60d288 359 return dynamic_dname(buffer, buflen, "socket:[%lu]",
c5ef6035 360 d_inode(dentry)->i_ino);
c23fbb6b
ED
361}
362
3ba13d17 363static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 364 .d_dname = sockfs_dname,
1da177e4
LT
365};
366
bba0bd31
AG
367static int sockfs_xattr_get(const struct xattr_handler *handler,
368 struct dentry *dentry, struct inode *inode,
369 const char *suffix, void *value, size_t size)
370{
371 if (value) {
372 if (dentry->d_name.len + 1 > size)
373 return -ERANGE;
374 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
375 }
376 return dentry->d_name.len + 1;
377}
378
379#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
380#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
381#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
382
383static const struct xattr_handler sockfs_xattr_handler = {
384 .name = XATTR_NAME_SOCKPROTONAME,
385 .get = sockfs_xattr_get,
386};
387
4a590153 388static int sockfs_security_xattr_set(const struct xattr_handler *handler,
39f60c1c 389 struct mnt_idmap *idmap,
4a590153
AG
390 struct dentry *dentry, struct inode *inode,
391 const char *suffix, const void *value,
392 size_t size, int flags)
393{
394 /* Handled by LSM. */
395 return -EAGAIN;
396}
397
398static const struct xattr_handler sockfs_security_xattr_handler = {
399 .prefix = XATTR_SECURITY_PREFIX,
400 .set = sockfs_security_xattr_set,
401};
402
bba0bd31
AG
403static const struct xattr_handler *sockfs_xattr_handlers[] = {
404 &sockfs_xattr_handler,
4a590153 405 &sockfs_security_xattr_handler,
bba0bd31
AG
406 NULL
407};
408
fba9be49 409static int sockfs_init_fs_context(struct fs_context *fc)
c74a1cbb 410{
fba9be49
DH
411 struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
412 if (!ctx)
413 return -ENOMEM;
414 ctx->ops = &sockfs_ops;
415 ctx->dops = &sockfs_dentry_operations;
416 ctx->xattr = sockfs_xattr_handlers;
417 return 0;
c74a1cbb
AV
418}
419
420static struct vfsmount *sock_mnt __read_mostly;
421
422static struct file_system_type sock_fs_type = {
423 .name = "sockfs",
fba9be49 424 .init_fs_context = sockfs_init_fs_context,
c74a1cbb
AV
425 .kill_sb = kill_anon_super,
426};
427
1da177e4
LT
428/*
429 * Obtains the first available file descriptor and sets it up for use.
430 *
39d8c1b6
DM
431 * These functions create file structures and maps them to fd space
432 * of the current process. On success it returns file descriptor
1da177e4
LT
433 * and file struct implicitly stored in sock->file.
434 * Note that another thread may close file descriptor before we return
435 * from this function. We use the fact that now we do not refer
436 * to socket after mapping. If one day we will need it, this
437 * function will increment ref. count on file by 1.
438 *
439 * In any case returned fd MAY BE not valid!
440 * This race condition is unavoidable
441 * with shared fd spaces, we cannot solve it inside kernel,
442 * but we take care of internal coherence yet.
443 */
444
8a3c245c
PT
445/**
446 * sock_alloc_file - Bind a &socket to a &file
447 * @sock: socket
448 * @flags: file status flags
449 * @dname: protocol name
450 *
451 * Returns the &file bound with @sock, implicitly storing it
452 * in sock->file. If dname is %NULL, sets to "".
649c15c7
TLSC
453 *
454 * On failure @sock is released, and an ERR pointer is returned.
455 *
8a3c245c
PT
456 * This function uses GFP_KERNEL internally.
457 */
458
aab174f0 459struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 460{
7cbe66b6 461 struct file *file;
1da177e4 462
d93aa9d8
AV
463 if (!dname)
464 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 465
d93aa9d8
AV
466 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
467 O_RDWR | (flags & O_NONBLOCK),
468 &socket_file_ops);
b5ffe634 469 if (IS_ERR(file)) {
8e1611e2 470 sock_release(sock);
39b65252 471 return file;
cc3808f8
AV
472 }
473
474 sock->file = file;
39d8c1b6 475 file->private_data = sock;
d8e464ec 476 stream_open(SOCK_INODE(sock), file);
28407630 477 return file;
39d8c1b6 478}
56b31d1c 479EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 480
56b31d1c 481static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
482{
483 struct file *newfile;
28407630 484 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
485 if (unlikely(fd < 0)) {
486 sock_release(sock);
28407630 487 return fd;
ce4bb04c 488 }
39d8c1b6 489
aab174f0 490 newfile = sock_alloc_file(sock, flags, NULL);
4546e44c 491 if (!IS_ERR(newfile)) {
39d8c1b6 492 fd_install(fd, newfile);
28407630
AV
493 return fd;
494 }
7cbe66b6 495
28407630
AV
496 put_unused_fd(fd);
497 return PTR_ERR(newfile);
1da177e4
LT
498}
499
8a3c245c
PT
500/**
501 * sock_from_file - Return the &socket bounded to @file.
502 * @file: file
8a3c245c 503 *
dba4a925 504 * On failure returns %NULL.
8a3c245c
PT
505 */
506
dba4a925 507struct socket *sock_from_file(struct file *file)
6cb153ca 508{
6cb153ca 509 if (file->f_op == &socket_file_ops)
da214a47 510 return file->private_data; /* set in sock_alloc_file */
6cb153ca 511
23bb80d2 512 return NULL;
6cb153ca 513}
406a3c63 514EXPORT_SYMBOL(sock_from_file);
6cb153ca 515
1da177e4 516/**
c6d409cf 517 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
518 * @fd: file handle
519 * @err: pointer to an error code return
520 *
521 * The file handle passed in is locked and the socket it is bound
241c4667 522 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
523 * with a negative errno code and NULL is returned. The function checks
524 * for both invalid handles and passing a handle which is not a socket.
525 *
526 * On a success the socket object pointer is returned.
527 */
528
529struct socket *sockfd_lookup(int fd, int *err)
530{
531 struct file *file;
1da177e4
LT
532 struct socket *sock;
533
89bddce5
SH
534 file = fget(fd);
535 if (!file) {
1da177e4
LT
536 *err = -EBADF;
537 return NULL;
538 }
89bddce5 539
dba4a925
FR
540 sock = sock_from_file(file);
541 if (!sock) {
542 *err = -ENOTSOCK;
1da177e4 543 fput(file);
dba4a925 544 }
6cb153ca
BL
545 return sock;
546}
c6d409cf 547EXPORT_SYMBOL(sockfd_lookup);
1da177e4 548
6cb153ca
BL
549static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
550{
00e188ef 551 struct fd f = fdget(fd);
6cb153ca
BL
552 struct socket *sock;
553
3672558c 554 *err = -EBADF;
00e188ef 555 if (f.file) {
dba4a925 556 sock = sock_from_file(f.file);
00e188ef 557 if (likely(sock)) {
ce787a5a 558 *fput_needed = f.flags & FDPUT_FPUT;
6cb153ca 559 return sock;
00e188ef 560 }
dba4a925 561 *err = -ENOTSOCK;
00e188ef 562 fdput(f);
1da177e4 563 }
6cb153ca 564 return NULL;
1da177e4
LT
565}
566
600e1779
MY
567static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
568 size_t size)
569{
570 ssize_t len;
571 ssize_t used = 0;
572
c5ef6035 573 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
574 if (len < 0)
575 return len;
576 used += len;
577 if (buffer) {
578 if (size < used)
579 return -ERANGE;
580 buffer += len;
581 }
582
583 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
584 used += len;
585 if (buffer) {
586 if (size < used)
587 return -ERANGE;
588 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
589 buffer += len;
590 }
591
592 return used;
593}
594
c1632a0f 595static int sockfs_setattr(struct mnt_idmap *idmap,
549c7297 596 struct dentry *dentry, struct iattr *iattr)
86741ec2 597{
c1632a0f 598 int err = simple_setattr(&nop_mnt_idmap, dentry, iattr);
86741ec2 599
e1a3a60a 600 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
601 struct socket *sock = SOCKET_I(d_inode(dentry));
602
6d8c50dc
CW
603 if (sock->sk)
604 sock->sk->sk_uid = iattr->ia_uid;
605 else
606 err = -ENOENT;
86741ec2
LC
607 }
608
609 return err;
610}
611
600e1779 612static const struct inode_operations sockfs_inode_ops = {
600e1779 613 .listxattr = sockfs_listxattr,
86741ec2 614 .setattr = sockfs_setattr,
600e1779
MY
615};
616
1da177e4 617/**
8a3c245c 618 * sock_alloc - allocate a socket
89bddce5 619 *
1da177e4
LT
620 * Allocate a new inode and socket object. The two are bound together
621 * and initialised. The socket is then returned. If we are out of inodes
8a3c245c 622 * NULL is returned. This functions uses GFP_KERNEL internally.
1da177e4
LT
623 */
624
f4a00aac 625struct socket *sock_alloc(void)
1da177e4 626{
89bddce5
SH
627 struct inode *inode;
628 struct socket *sock;
1da177e4 629
a209dfc7 630 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
631 if (!inode)
632 return NULL;
633
634 sock = SOCKET_I(inode);
635
85fe4025 636 inode->i_ino = get_next_ino();
89bddce5 637 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
638 inode->i_uid = current_fsuid();
639 inode->i_gid = current_fsgid();
600e1779 640 inode->i_op = &sockfs_inode_ops;
1da177e4 641
1da177e4
LT
642 return sock;
643}
f4a00aac 644EXPORT_SYMBOL(sock_alloc);
1da177e4 645
6d8c50dc 646static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
647{
648 if (sock->ops) {
649 struct module *owner = sock->ops->owner;
650
6d8c50dc
CW
651 if (inode)
652 inode_lock(inode);
1da177e4 653 sock->ops->release(sock);
ff7b11aa 654 sock->sk = NULL;
6d8c50dc
CW
655 if (inode)
656 inode_unlock(inode);
1da177e4
LT
657 sock->ops = NULL;
658 module_put(owner);
659 }
660
333f7909 661 if (sock->wq.fasync_list)
3410f22e 662 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 663
1da177e4
LT
664 if (!sock->file) {
665 iput(SOCK_INODE(sock));
666 return;
667 }
89bddce5 668 sock->file = NULL;
1da177e4 669}
6d8c50dc 670
9a8ad9ac
AL
671/**
672 * sock_release - close a socket
673 * @sock: socket to close
674 *
675 * The socket is released from the protocol stack if it has a release
676 * callback, and the inode is then released if the socket is bound to
677 * an inode not a file.
678 */
6d8c50dc
CW
679void sock_release(struct socket *sock)
680{
681 __sock_release(sock, NULL);
682}
c6d409cf 683EXPORT_SYMBOL(sock_release);
1da177e4 684
c14ac945 685void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 686{
140c55d4
ED
687 u8 flags = *tx_flags;
688
51eb7492 689 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE) {
140c55d4
ED
690 flags |= SKBTX_HW_TSTAMP;
691
51eb7492
GE
692 /* PTP hardware clocks can provide a free running cycle counter
693 * as a time base for virtual clocks. Tell driver to use the
694 * free running cycle counter for timestamp if socket is bound
695 * to virtual clock.
696 */
697 if (tsflags & SOF_TIMESTAMPING_BIND_PHC)
698 flags |= SKBTX_HW_TSTAMP_USE_CYCLES;
699 }
700
c14ac945 701 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
702 flags |= SKBTX_SW_TSTAMP;
703
c14ac945 704 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
705 flags |= SKBTX_SCHED_TSTAMP;
706
140c55d4 707 *tx_flags = flags;
20d49473 708}
67cc0d40 709EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 710
8c3c447b
PA
711INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
712 size_t));
a648a592
PA
713INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
714 size_t));
6e6eda44
YC
715
716static noinline void call_trace_sock_send_length(struct sock *sk, int ret,
717 int flags)
718{
719 trace_sock_send_length(sk, ret, 0);
720}
721
d8725c86 722static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 723{
a648a592
PA
724 int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
725 inet_sendmsg, sock, msg,
726 msg_data_left(msg));
d8725c86 727 BUG_ON(ret == -EIOCBQUEUED);
6e6eda44
YC
728
729 if (trace_sock_send_length_enabled())
730 call_trace_sock_send_length(sock->sk, ret, 0);
d8725c86 731 return ret;
1da177e4
LT
732}
733
85806af0
RD
734/**
735 * sock_sendmsg - send a message through @sock
736 * @sock: socket
737 * @msg: message to send
738 *
739 * Sends @msg through @sock, passing through LSM.
740 * Returns the number of bytes sent, or an error code.
741 */
d8725c86 742int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 743{
d8725c86 744 int err = security_socket_sendmsg(sock, msg,
01e97e65 745 msg_data_left(msg));
228e548e 746
d8725c86 747 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 748}
c6d409cf 749EXPORT_SYMBOL(sock_sendmsg);
1da177e4 750
8a3c245c
PT
751/**
752 * kernel_sendmsg - send a message through @sock (kernel-space)
753 * @sock: socket
754 * @msg: message header
755 * @vec: kernel vec
756 * @num: vec array length
757 * @size: total message data size
758 *
759 * Builds the message data with @vec and sends it through @sock.
760 * Returns the number of bytes sent, or an error code.
761 */
762
1da177e4
LT
763int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
764 struct kvec *vec, size_t num, size_t size)
765{
de4eda9d 766 iov_iter_kvec(&msg->msg_iter, ITER_SOURCE, vec, num, size);
d8725c86 767 return sock_sendmsg(sock, msg);
1da177e4 768}
c6d409cf 769EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 770
8a3c245c
PT
771/**
772 * kernel_sendmsg_locked - send a message through @sock (kernel-space)
773 * @sk: sock
774 * @msg: message header
775 * @vec: output s/g array
776 * @num: output s/g array length
777 * @size: total message data size
778 *
779 * Builds the message data with @vec and sends it through @sock.
780 * Returns the number of bytes sent, or an error code.
781 * Caller must hold @sk.
782 */
783
306b13eb
TH
784int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
785 struct kvec *vec, size_t num, size_t size)
786{
787 struct socket *sock = sk->sk_socket;
788
789 if (!sock->ops->sendmsg_locked)
db5980d8 790 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 791
de4eda9d 792 iov_iter_kvec(&msg->msg_iter, ITER_SOURCE, vec, num, size);
306b13eb
TH
793
794 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
795}
796EXPORT_SYMBOL(kernel_sendmsg_locked);
797
8605330a
SHY
798static bool skb_is_err_queue(const struct sk_buff *skb)
799{
800 /* pkt_type of skbs enqueued on the error queue are set to
801 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
802 * in recvmsg, since skbs received on a local socket will never
803 * have a pkt_type of PACKET_OUTGOING.
804 */
805 return skb->pkt_type == PACKET_OUTGOING;
806}
807
b50a5c70
ML
808/* On transmit, software and hardware timestamps are returned independently.
809 * As the two skb clones share the hardware timestamp, which may be updated
810 * before the software timestamp is received, a hardware TX timestamp may be
811 * returned only if there is no software TX timestamp. Ignore false software
812 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 813 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
814 * hardware timestamp.
815 */
816static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
817{
818 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
819}
820
97dc7cd9
GE
821static ktime_t get_timestamp(struct sock *sk, struct sk_buff *skb, int *if_index)
822{
823 bool cycles = sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC;
824 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
825 struct net_device *orig_dev;
826 ktime_t hwtstamp;
827
828 rcu_read_lock();
829 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
830 if (orig_dev) {
831 *if_index = orig_dev->ifindex;
832 hwtstamp = netdev_get_tstamp(orig_dev, shhwtstamps, cycles);
833 } else {
834 hwtstamp = shhwtstamps->hwtstamp;
835 }
836 rcu_read_unlock();
837
838 return hwtstamp;
839}
840
841static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb,
842 int if_index)
aad9c8c4
ML
843{
844 struct scm_ts_pktinfo ts_pktinfo;
845 struct net_device *orig_dev;
846
847 if (!skb_mac_header_was_set(skb))
848 return;
849
850 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
851
97dc7cd9
GE
852 if (!if_index) {
853 rcu_read_lock();
854 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
855 if (orig_dev)
856 if_index = orig_dev->ifindex;
857 rcu_read_unlock();
858 }
859 ts_pktinfo.if_index = if_index;
aad9c8c4
ML
860
861 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
862 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
863 sizeof(ts_pktinfo), &ts_pktinfo);
864}
865
92f37fd2
ED
866/*
867 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
868 */
869void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
870 struct sk_buff *skb)
871{
20d49473 872 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 873 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e
DD
874 struct scm_timestamping_internal tss;
875
b50a5c70 876 int empty = 1, false_tstamp = 0;
20d49473
PO
877 struct skb_shared_hwtstamps *shhwtstamps =
878 skb_hwtstamps(skb);
97dc7cd9 879 int if_index;
007747a9 880 ktime_t hwtstamp;
20d49473
PO
881
882 /* Race occurred between timestamp enabling and packet
883 receiving. Fill in the current time for now. */
b50a5c70 884 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 885 __net_timestamp(skb);
b50a5c70
ML
886 false_tstamp = 1;
887 }
20d49473
PO
888
889 if (need_software_tstamp) {
890 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
891 if (new_tstamp) {
892 struct __kernel_sock_timeval tv;
893
894 skb_get_new_timestamp(skb, &tv);
895 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
896 sizeof(tv), &tv);
897 } else {
898 struct __kernel_old_timeval tv;
899
900 skb_get_timestamp(skb, &tv);
901 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
902 sizeof(tv), &tv);
903 }
20d49473 904 } else {
887feae3
DD
905 if (new_tstamp) {
906 struct __kernel_timespec ts;
907
908 skb_get_new_timestampns(skb, &ts);
909 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
910 sizeof(ts), &ts);
911 } else {
df1b4ba9 912 struct __kernel_old_timespec ts;
887feae3
DD
913
914 skb_get_timestampns(skb, &ts);
915 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
916 sizeof(ts), &ts);
917 }
20d49473
PO
918 }
919 }
920
f24b9be5 921 memset(&tss, 0, sizeof(tss));
c199105d 922 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 923 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 924 empty = 0;
4d276eb6 925 if (shhwtstamps &&
b9f40e21 926 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
d7c08826 927 !skb_is_swtx_tstamp(skb, false_tstamp)) {
97dc7cd9
GE
928 if_index = 0;
929 if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NETDEV)
930 hwtstamp = get_timestamp(sk, skb, &if_index);
007747a9
ML
931 else
932 hwtstamp = shhwtstamps->hwtstamp;
d7c08826 933
97dc7cd9
GE
934 if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC)
935 hwtstamp = ptp_convert_timestamp(&hwtstamp,
936 sk->sk_bind_phc);
937
007747a9 938 if (ktime_to_timespec64_cond(hwtstamp, tss.ts + 2)) {
d7c08826
YL
939 empty = 0;
940
941 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
942 !skb_is_err_queue(skb))
97dc7cd9 943 put_ts_pktinfo(msg, skb, if_index);
d7c08826 944 }
aad9c8c4 945 }
1c885808 946 if (!empty) {
9718475e
DD
947 if (sock_flag(sk, SOCK_TSTAMP_NEW))
948 put_cmsg_scm_timestamping64(msg, &tss);
949 else
950 put_cmsg_scm_timestamping(msg, &tss);
1c885808 951
8605330a 952 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 953 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
954 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
955 skb->len, skb->data);
956 }
92f37fd2 957}
7c81fd8b
ACM
958EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
959
eb6fba75 960#ifdef CONFIG_WIRELESS
6e3e939f
JB
961void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
962 struct sk_buff *skb)
963{
964 int ack;
965
966 if (!sock_flag(sk, SOCK_WIFI_STATUS))
967 return;
968 if (!skb->wifi_acked_valid)
969 return;
970
971 ack = skb->wifi_acked;
972
973 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
974}
975EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
eb6fba75 976#endif
6e3e939f 977
11165f14 978static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
979 struct sk_buff *skb)
3b885787 980{
744d5a3e 981 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 982 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 983 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
984}
985
6fd1d51c
EM
986static void sock_recv_mark(struct msghdr *msg, struct sock *sk,
987 struct sk_buff *skb)
988{
2558b803
ED
989 if (sock_flag(sk, SOCK_RCVMARK) && skb) {
990 /* We must use a bounce buffer for CONFIG_HARDENED_USERCOPY=y */
991 __u32 mark = skb->mark;
992
993 put_cmsg(msg, SOL_SOCKET, SO_MARK, sizeof(__u32), &mark);
994 }
6fd1d51c
EM
995}
996
997void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
998 struct sk_buff *skb)
3b885787
NH
999{
1000 sock_recv_timestamp(msg, sk, skb);
1001 sock_recv_drops(msg, sk, skb);
6fd1d51c 1002 sock_recv_mark(msg, sk, skb);
3b885787 1003}
6fd1d51c 1004EXPORT_SYMBOL_GPL(__sock_recv_cmsgs);
3b885787 1005
8c3c447b 1006INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
a648a592
PA
1007 size_t, int));
1008INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
1009 size_t, int));
6e6eda44
YC
1010
1011static noinline void call_trace_sock_recv_length(struct sock *sk, int ret, int flags)
1012{
1013 trace_sock_recv_length(sk, ret, flags);
1014}
1015
1b784140 1016static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 1017 int flags)
1da177e4 1018{
6e6eda44
YC
1019 int ret = INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
1020 inet_recvmsg, sock, msg,
1021 msg_data_left(msg), flags);
1022 if (trace_sock_recv_length_enabled())
1023 call_trace_sock_recv_length(sock->sk, ret, flags);
1024 return ret;
1da177e4
LT
1025}
1026
85806af0
RD
1027/**
1028 * sock_recvmsg - receive a message from @sock
1029 * @sock: socket
1030 * @msg: message to receive
1031 * @flags: message flags
1032 *
1033 * Receives @msg from @sock, passing through LSM. Returns the total number
1034 * of bytes received, or an error.
1035 */
2da62906 1036int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 1037{
2da62906 1038 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 1039
2da62906 1040 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 1041}
c6d409cf 1042EXPORT_SYMBOL(sock_recvmsg);
1da177e4 1043
c1249c0a 1044/**
8a3c245c
PT
1045 * kernel_recvmsg - Receive a message from a socket (kernel space)
1046 * @sock: The socket to receive the message from
1047 * @msg: Received message
1048 * @vec: Input s/g array for message data
1049 * @num: Size of input s/g array
1050 * @size: Number of bytes to read
1051 * @flags: Message flags (MSG_DONTWAIT, etc...)
c1249c0a 1052 *
8a3c245c
PT
1053 * On return the msg structure contains the scatter/gather array passed in the
1054 * vec argument. The array is modified so that it consists of the unfilled
1055 * portion of the original array.
c1249c0a 1056 *
8a3c245c 1057 * The returned value is the total number of bytes received, or an error.
c1249c0a 1058 */
8a3c245c 1059
89bddce5
SH
1060int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
1061 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4 1062{
1f466e1f 1063 msg->msg_control_is_user = false;
de4eda9d 1064 iov_iter_kvec(&msg->msg_iter, ITER_DEST, vec, num, size);
1f466e1f 1065 return sock_recvmsg(sock, msg, flags);
1da177e4 1066}
c6d409cf 1067EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 1068
9c55e01c 1069static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 1070 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
1071 unsigned int flags)
1072{
1073 struct socket *sock = file->private_data;
1074
997b37da 1075 if (unlikely(!sock->ops->splice_read))
95506588 1076 return generic_file_splice_read(file, ppos, pipe, len, flags);
997b37da 1077
9c55e01c
JA
1078 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
1079}
1080
2bfc6685
DH
1081static void sock_splice_eof(struct file *file)
1082{
1083 struct socket *sock = file->private_data;
1084
1085 if (sock->ops->splice_eof)
1086 sock->ops->splice_eof(sock);
1087}
1088
8ae5e030 1089static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 1090{
6d652330
AV
1091 struct file *file = iocb->ki_filp;
1092 struct socket *sock = file->private_data;
0345f931 1093 struct msghdr msg = {.msg_iter = *to,
1094 .msg_iocb = iocb};
8ae5e030 1095 ssize_t res;
ce1d4d3e 1096
ebfcd895 1097 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
1098 msg.msg_flags = MSG_DONTWAIT;
1099
1100 if (iocb->ki_pos != 0)
1da177e4 1101 return -ESPIPE;
027445c3 1102
66ee59af 1103 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
1104 return 0;
1105
2da62906 1106 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
1107 *to = msg.msg_iter;
1108 return res;
1da177e4
LT
1109}
1110
8ae5e030 1111static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 1112{
6d652330
AV
1113 struct file *file = iocb->ki_filp;
1114 struct socket *sock = file->private_data;
0345f931 1115 struct msghdr msg = {.msg_iter = *from,
1116 .msg_iocb = iocb};
8ae5e030 1117 ssize_t res;
1da177e4 1118
8ae5e030 1119 if (iocb->ki_pos != 0)
ce1d4d3e 1120 return -ESPIPE;
027445c3 1121
ebfcd895 1122 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
1123 msg.msg_flags = MSG_DONTWAIT;
1124
6d652330
AV
1125 if (sock->type == SOCK_SEQPACKET)
1126 msg.msg_flags |= MSG_EOR;
1127
d8725c86 1128 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
1129 *from = msg.msg_iter;
1130 return res;
1da177e4
LT
1131}
1132
1da177e4
LT
1133/*
1134 * Atomic setting of ioctl hooks to avoid race
1135 * with module unload.
1136 */
1137
4a3e2f71 1138static DEFINE_MUTEX(br_ioctl_mutex);
ad2f99ae
AB
1139static int (*br_ioctl_hook)(struct net *net, struct net_bridge *br,
1140 unsigned int cmd, struct ifreq *ifr,
1141 void __user *uarg);
1da177e4 1142
ad2f99ae
AB
1143void brioctl_set(int (*hook)(struct net *net, struct net_bridge *br,
1144 unsigned int cmd, struct ifreq *ifr,
1145 void __user *uarg))
1da177e4 1146{
4a3e2f71 1147 mutex_lock(&br_ioctl_mutex);
1da177e4 1148 br_ioctl_hook = hook;
4a3e2f71 1149 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1150}
1151EXPORT_SYMBOL(brioctl_set);
1152
ad2f99ae
AB
1153int br_ioctl_call(struct net *net, struct net_bridge *br, unsigned int cmd,
1154 struct ifreq *ifr, void __user *uarg)
1155{
1156 int err = -ENOPKG;
1157
1158 if (!br_ioctl_hook)
1159 request_module("bridge");
1160
1161 mutex_lock(&br_ioctl_mutex);
1162 if (br_ioctl_hook)
1163 err = br_ioctl_hook(net, br, cmd, ifr, uarg);
1164 mutex_unlock(&br_ioctl_mutex);
1165
1166 return err;
1167}
1168
4a3e2f71 1169static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1170static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1171
881d966b 1172void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1173{
4a3e2f71 1174 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1175 vlan_ioctl_hook = hook;
4a3e2f71 1176 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1177}
1178EXPORT_SYMBOL(vlan_ioctl_set);
1179
6b96018b 1180static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 1181 unsigned int cmd, unsigned long arg)
6b96018b 1182{
876f0bf9
AB
1183 struct ifreq ifr;
1184 bool need_copyout;
6b96018b
AB
1185 int err;
1186 void __user *argp = (void __user *)arg;
a554bf96 1187 void __user *data;
6b96018b
AB
1188
1189 err = sock->ops->ioctl(sock, cmd, arg);
1190
1191 /*
1192 * If this ioctl is unknown try to hand it down
1193 * to the NIC driver.
1194 */
36fd633e
AV
1195 if (err != -ENOIOCTLCMD)
1196 return err;
6b96018b 1197
29ce8f97
JK
1198 if (!is_socket_ioctl_cmd(cmd))
1199 return -ENOTTY;
1200
a554bf96 1201 if (get_user_ifreq(&ifr, &data, argp))
876f0bf9 1202 return -EFAULT;
a554bf96 1203 err = dev_ioctl(net, cmd, &ifr, data, &need_copyout);
876f0bf9 1204 if (!err && need_copyout)
a554bf96 1205 if (put_user_ifreq(&ifr, argp))
44c02a2c 1206 return -EFAULT;
876f0bf9 1207
6b96018b
AB
1208 return err;
1209}
1210
1da177e4
LT
1211/*
1212 * With an ioctl, arg may well be a user mode pointer, but we don't know
1213 * what to do with it - that's up to the protocol still.
1214 */
1215
1216static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1217{
1218 struct socket *sock;
881d966b 1219 struct sock *sk;
1da177e4
LT
1220 void __user *argp = (void __user *)arg;
1221 int pid, err;
881d966b 1222 struct net *net;
1da177e4 1223
b69aee04 1224 sock = file->private_data;
881d966b 1225 sk = sock->sk;
3b1e0a65 1226 net = sock_net(sk);
44c02a2c
AV
1227 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1228 struct ifreq ifr;
a554bf96 1229 void __user *data;
44c02a2c 1230 bool need_copyout;
a554bf96 1231 if (get_user_ifreq(&ifr, &data, argp))
44c02a2c 1232 return -EFAULT;
a554bf96 1233 err = dev_ioctl(net, cmd, &ifr, data, &need_copyout);
44c02a2c 1234 if (!err && need_copyout)
a554bf96 1235 if (put_user_ifreq(&ifr, argp))
44c02a2c 1236 return -EFAULT;
1da177e4 1237 } else
3d23e349 1238#ifdef CONFIG_WEXT_CORE
1da177e4 1239 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1240 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1241 } else
3d23e349 1242#endif
89bddce5 1243 switch (cmd) {
1da177e4
LT
1244 case FIOSETOWN:
1245 case SIOCSPGRP:
1246 err = -EFAULT;
1247 if (get_user(pid, (int __user *)argp))
1248 break;
393cc3f5 1249 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1250 break;
1251 case FIOGETOWN:
1252 case SIOCGPGRP:
609d7fa9 1253 err = put_user(f_getown(sock->file),
89bddce5 1254 (int __user *)argp);
1da177e4
LT
1255 break;
1256 case SIOCGIFBR:
1257 case SIOCSIFBR:
1258 case SIOCBRADDBR:
1259 case SIOCBRDELBR:
ad2f99ae 1260 err = br_ioctl_call(net, NULL, cmd, NULL, argp);
1da177e4
LT
1261 break;
1262 case SIOCGIFVLAN:
1263 case SIOCSIFVLAN:
1264 err = -ENOPKG;
1265 if (!vlan_ioctl_hook)
1266 request_module("8021q");
1267
4a3e2f71 1268 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1269 if (vlan_ioctl_hook)
881d966b 1270 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1271 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1272 break;
c62cce2c
AV
1273 case SIOCGSKNS:
1274 err = -EPERM;
1275 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1276 break;
1277
1278 err = open_related_ns(&net->ns, get_net_ns);
1279 break;
0768e170
AB
1280 case SIOCGSTAMP_OLD:
1281 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
1282 if (!sock->ops->gettstamp) {
1283 err = -ENOIOCTLCMD;
1284 break;
1285 }
1286 err = sock->ops->gettstamp(sock, argp,
0768e170
AB
1287 cmd == SIOCGSTAMP_OLD,
1288 !IS_ENABLED(CONFIG_64BIT));
60747828 1289 break;
0768e170
AB
1290 case SIOCGSTAMP_NEW:
1291 case SIOCGSTAMPNS_NEW:
1292 if (!sock->ops->gettstamp) {
1293 err = -ENOIOCTLCMD;
1294 break;
1295 }
1296 err = sock->ops->gettstamp(sock, argp,
1297 cmd == SIOCGSTAMP_NEW,
1298 false);
c7cbdbf2 1299 break;
876f0bf9
AB
1300
1301 case SIOCGIFCONF:
1302 err = dev_ifconf(net, argp);
1303 break;
1304
1da177e4 1305 default:
63ff03ab 1306 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1307 break;
89bddce5 1308 }
1da177e4
LT
1309 return err;
1310}
1311
8a3c245c
PT
1312/**
1313 * sock_create_lite - creates a socket
1314 * @family: protocol family (AF_INET, ...)
1315 * @type: communication type (SOCK_STREAM, ...)
1316 * @protocol: protocol (0, ...)
1317 * @res: new socket
1318 *
1319 * Creates a new socket and assigns it to @res, passing through LSM.
1320 * The new socket initialization is not complete, see kernel_accept().
1321 * Returns 0 or an error. On failure @res is set to %NULL.
1322 * This function internally uses GFP_KERNEL.
1323 */
1324
1da177e4
LT
1325int sock_create_lite(int family, int type, int protocol, struct socket **res)
1326{
1327 int err;
1328 struct socket *sock = NULL;
89bddce5 1329
1da177e4
LT
1330 err = security_socket_create(family, type, protocol, 1);
1331 if (err)
1332 goto out;
1333
1334 sock = sock_alloc();
1335 if (!sock) {
1336 err = -ENOMEM;
1337 goto out;
1338 }
1339
1da177e4 1340 sock->type = type;
7420ed23
VY
1341 err = security_socket_post_create(sock, family, type, protocol, 1);
1342 if (err)
1343 goto out_release;
1344
1da177e4
LT
1345out:
1346 *res = sock;
1347 return err;
7420ed23
VY
1348out_release:
1349 sock_release(sock);
1350 sock = NULL;
1351 goto out;
1da177e4 1352}
c6d409cf 1353EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1354
1355/* No kernel lock held - perfect */
ade994f4 1356static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1357{
3cafb376 1358 struct socket *sock = file->private_data;
a331de3b 1359 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1360
e88958e6
CH
1361 if (!sock->ops->poll)
1362 return 0;
f641f13b 1363
a331de3b
CH
1364 if (sk_can_busy_loop(sock->sk)) {
1365 /* poll once if requested by the syscall */
1366 if (events & POLL_BUSY_LOOP)
1367 sk_busy_loop(sock->sk, 1);
1368
1369 /* if this socket can poll_ll, tell the system call */
1370 flag = POLL_BUSY_LOOP;
1371 }
1372
1373 return sock->ops->poll(file, sock, wait) | flag;
1da177e4
LT
1374}
1375
89bddce5 1376static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1377{
b69aee04 1378 struct socket *sock = file->private_data;
1da177e4
LT
1379
1380 return sock->ops->mmap(file, sock, vma);
1381}
1382
20380731 1383static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1384{
6d8c50dc 1385 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1386 return 0;
1387}
1388
1389/*
1390 * Update the socket async list
1391 *
1392 * Fasync_list locking strategy.
1393 *
1394 * 1. fasync_list is modified only under process context socket lock
1395 * i.e. under semaphore.
1396 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1397 * or under socket lock
1da177e4
LT
1398 */
1399
1400static int sock_fasync(int fd, struct file *filp, int on)
1401{
989a2979
ED
1402 struct socket *sock = filp->private_data;
1403 struct sock *sk = sock->sk;
333f7909 1404 struct socket_wq *wq = &sock->wq;
1da177e4 1405
989a2979 1406 if (sk == NULL)
1da177e4 1407 return -EINVAL;
1da177e4
LT
1408
1409 lock_sock(sk);
eaefd110 1410 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1411
eaefd110 1412 if (!wq->fasync_list)
989a2979
ED
1413 sock_reset_flag(sk, SOCK_FASYNC);
1414 else
bcdce719 1415 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1416
989a2979 1417 release_sock(sk);
1da177e4
LT
1418 return 0;
1419}
1420
ceb5d58b 1421/* This function may be called only under rcu_lock */
1da177e4 1422
ceb5d58b 1423int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1424{
ceb5d58b 1425 if (!wq || !wq->fasync_list)
1da177e4 1426 return -1;
ceb5d58b 1427
89bddce5 1428 switch (how) {
8d8ad9d7 1429 case SOCK_WAKE_WAITD:
ceb5d58b 1430 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1431 break;
1432 goto call_kill;
8d8ad9d7 1433 case SOCK_WAKE_SPACE:
ceb5d58b 1434 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4 1435 break;
7c7ab580 1436 fallthrough;
8d8ad9d7 1437 case SOCK_WAKE_IO:
89bddce5 1438call_kill:
43815482 1439 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1440 break;
8d8ad9d7 1441 case SOCK_WAKE_URG:
43815482 1442 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1443 }
ceb5d58b 1444
1da177e4
LT
1445 return 0;
1446}
c6d409cf 1447EXPORT_SYMBOL(sock_wake_async);
1da177e4 1448
8a3c245c
PT
1449/**
1450 * __sock_create - creates a socket
1451 * @net: net namespace
1452 * @family: protocol family (AF_INET, ...)
1453 * @type: communication type (SOCK_STREAM, ...)
1454 * @protocol: protocol (0, ...)
1455 * @res: new socket
1456 * @kern: boolean for kernel space sockets
1457 *
1458 * Creates a new socket and assigns it to @res, passing through LSM.
1459 * Returns 0 or an error. On failure @res is set to %NULL. @kern must
1460 * be set to true if the socket resides in kernel space.
1461 * This function internally uses GFP_KERNEL.
1462 */
1463
721db93a 1464int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1465 struct socket **res, int kern)
1da177e4
LT
1466{
1467 int err;
1468 struct socket *sock;
55737fda 1469 const struct net_proto_family *pf;
1da177e4
LT
1470
1471 /*
89bddce5 1472 * Check protocol is in range
1da177e4
LT
1473 */
1474 if (family < 0 || family >= NPROTO)
1475 return -EAFNOSUPPORT;
1476 if (type < 0 || type >= SOCK_MAX)
1477 return -EINVAL;
1478
1479 /* Compatibility.
1480
1481 This uglymoron is moved from INET layer to here to avoid
1482 deadlock in module load.
1483 */
1484 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1485 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1486 current->comm);
1da177e4
LT
1487 family = PF_PACKET;
1488 }
1489
1490 err = security_socket_create(family, type, protocol, kern);
1491 if (err)
1492 return err;
89bddce5 1493
55737fda
SH
1494 /*
1495 * Allocate the socket and allow the family to set things up. if
1496 * the protocol is 0, the family is instructed to select an appropriate
1497 * default.
1498 */
1499 sock = sock_alloc();
1500 if (!sock) {
e87cc472 1501 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1502 return -ENFILE; /* Not exactly a match, but its the
1503 closest posix thing */
1504 }
1505
1506 sock->type = type;
1507
95a5afca 1508#ifdef CONFIG_MODULES
89bddce5
SH
1509 /* Attempt to load a protocol module if the find failed.
1510 *
1511 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1512 * requested real, full-featured networking support upon configuration.
1513 * Otherwise module support will break!
1514 */
190683a9 1515 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1516 request_module("net-pf-%d", family);
1da177e4
LT
1517#endif
1518
55737fda
SH
1519 rcu_read_lock();
1520 pf = rcu_dereference(net_families[family]);
1521 err = -EAFNOSUPPORT;
1522 if (!pf)
1523 goto out_release;
1da177e4
LT
1524
1525 /*
1526 * We will call the ->create function, that possibly is in a loadable
1527 * module, so we have to bump that loadable module refcnt first.
1528 */
55737fda 1529 if (!try_module_get(pf->owner))
1da177e4
LT
1530 goto out_release;
1531
55737fda
SH
1532 /* Now protected by module ref count */
1533 rcu_read_unlock();
1534
3f378b68 1535 err = pf->create(net, sock, protocol, kern);
55737fda 1536 if (err < 0)
1da177e4 1537 goto out_module_put;
a79af59e 1538
1da177e4
LT
1539 /*
1540 * Now to bump the refcnt of the [loadable] module that owns this
1541 * socket at sock_release time we decrement its refcnt.
1542 */
55737fda
SH
1543 if (!try_module_get(sock->ops->owner))
1544 goto out_module_busy;
1545
1da177e4
LT
1546 /*
1547 * Now that we're done with the ->create function, the [loadable]
1548 * module can have its refcnt decremented
1549 */
55737fda 1550 module_put(pf->owner);
7420ed23
VY
1551 err = security_socket_post_create(sock, family, type, protocol, kern);
1552 if (err)
3b185525 1553 goto out_sock_release;
55737fda 1554 *res = sock;
1da177e4 1555
55737fda
SH
1556 return 0;
1557
1558out_module_busy:
1559 err = -EAFNOSUPPORT;
1da177e4 1560out_module_put:
55737fda
SH
1561 sock->ops = NULL;
1562 module_put(pf->owner);
1563out_sock_release:
1da177e4 1564 sock_release(sock);
55737fda
SH
1565 return err;
1566
1567out_release:
1568 rcu_read_unlock();
1569 goto out_sock_release;
1da177e4 1570}
721db93a 1571EXPORT_SYMBOL(__sock_create);
1da177e4 1572
8a3c245c
PT
1573/**
1574 * sock_create - creates a socket
1575 * @family: protocol family (AF_INET, ...)
1576 * @type: communication type (SOCK_STREAM, ...)
1577 * @protocol: protocol (0, ...)
1578 * @res: new socket
1579 *
1580 * A wrapper around __sock_create().
1581 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1582 */
1583
1da177e4
LT
1584int sock_create(int family, int type, int protocol, struct socket **res)
1585{
1b8d7ae4 1586 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1587}
c6d409cf 1588EXPORT_SYMBOL(sock_create);
1da177e4 1589
8a3c245c
PT
1590/**
1591 * sock_create_kern - creates a socket (kernel space)
1592 * @net: net namespace
1593 * @family: protocol family (AF_INET, ...)
1594 * @type: communication type (SOCK_STREAM, ...)
1595 * @protocol: protocol (0, ...)
1596 * @res: new socket
1597 *
1598 * A wrapper around __sock_create().
1599 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1600 */
1601
eeb1bd5c 1602int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1603{
eeb1bd5c 1604 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1605}
c6d409cf 1606EXPORT_SYMBOL(sock_create_kern);
1da177e4 1607
da214a47 1608static struct socket *__sys_socket_create(int family, int type, int protocol)
1da177e4 1609{
1da177e4 1610 struct socket *sock;
da214a47 1611 int retval;
a677a039 1612
e38b36f3
UD
1613 /* Check the SOCK_* constants for consistency. */
1614 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1615 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1616 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1617 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1618
da214a47
JA
1619 if ((type & ~SOCK_TYPE_MASK) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1620 return ERR_PTR(-EINVAL);
a677a039 1621 type &= SOCK_TYPE_MASK;
1da177e4 1622
da214a47
JA
1623 retval = sock_create(family, type, protocol, &sock);
1624 if (retval < 0)
1625 return ERR_PTR(retval);
1626
1627 return sock;
1628}
1629
1630struct file *__sys_socket_file(int family, int type, int protocol)
1631{
1632 struct socket *sock;
da214a47
JA
1633 int flags;
1634
1635 sock = __sys_socket_create(family, type, protocol);
1636 if (IS_ERR(sock))
1637 return ERR_CAST(sock);
1638
1639 flags = type & ~SOCK_TYPE_MASK;
aaca0bdc
UD
1640 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1641 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1642
649c15c7 1643 return sock_alloc_file(sock, flags, NULL);
da214a47
JA
1644}
1645
1646int __sys_socket(int family, int type, int protocol)
1647{
1648 struct socket *sock;
1649 int flags;
1650
1651 sock = __sys_socket_create(family, type, protocol);
1652 if (IS_ERR(sock))
1653 return PTR_ERR(sock);
1654
1655 flags = type & ~SOCK_TYPE_MASK;
1656 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1657 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1da177e4 1658
8e1611e2 1659 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1660}
1661
9d6a15c3
DB
1662SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1663{
1664 return __sys_socket(family, type, protocol);
1665}
1666
1da177e4
LT
1667/*
1668 * Create a pair of connected sockets.
1669 */
1670
6debc8d8 1671int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1672{
1673 struct socket *sock1, *sock2;
1674 int fd1, fd2, err;
db349509 1675 struct file *newfile1, *newfile2;
a677a039
UD
1676 int flags;
1677
1678 flags = type & ~SOCK_TYPE_MASK;
77d27200 1679 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1680 return -EINVAL;
1681 type &= SOCK_TYPE_MASK;
1da177e4 1682
aaca0bdc
UD
1683 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1684 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1685
016a266b
AV
1686 /*
1687 * reserve descriptors and make sure we won't fail
1688 * to return them to userland.
1689 */
1690 fd1 = get_unused_fd_flags(flags);
1691 if (unlikely(fd1 < 0))
1692 return fd1;
1693
1694 fd2 = get_unused_fd_flags(flags);
1695 if (unlikely(fd2 < 0)) {
1696 put_unused_fd(fd1);
1697 return fd2;
1698 }
1699
1700 err = put_user(fd1, &usockvec[0]);
1701 if (err)
1702 goto out;
1703
1704 err = put_user(fd2, &usockvec[1]);
1705 if (err)
1706 goto out;
1707
1da177e4
LT
1708 /*
1709 * Obtain the first socket and check if the underlying protocol
1710 * supports the socketpair call.
1711 */
1712
1713 err = sock_create(family, type, protocol, &sock1);
016a266b 1714 if (unlikely(err < 0))
1da177e4
LT
1715 goto out;
1716
1717 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1718 if (unlikely(err < 0)) {
1719 sock_release(sock1);
1720 goto out;
bf3c23d1 1721 }
d73aa286 1722
d47cd945
DH
1723 err = security_socket_socketpair(sock1, sock2);
1724 if (unlikely(err)) {
1725 sock_release(sock2);
1726 sock_release(sock1);
1727 goto out;
1728 }
1729
016a266b
AV
1730 err = sock1->ops->socketpair(sock1, sock2);
1731 if (unlikely(err < 0)) {
1732 sock_release(sock2);
1733 sock_release(sock1);
1734 goto out;
28407630
AV
1735 }
1736
aab174f0 1737 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1738 if (IS_ERR(newfile1)) {
28407630 1739 err = PTR_ERR(newfile1);
016a266b
AV
1740 sock_release(sock2);
1741 goto out;
28407630
AV
1742 }
1743
aab174f0 1744 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1745 if (IS_ERR(newfile2)) {
1746 err = PTR_ERR(newfile2);
016a266b
AV
1747 fput(newfile1);
1748 goto out;
db349509
AV
1749 }
1750
157cf649 1751 audit_fd_pair(fd1, fd2);
d73aa286 1752
db349509
AV
1753 fd_install(fd1, newfile1);
1754 fd_install(fd2, newfile2);
d73aa286 1755 return 0;
1da177e4 1756
016a266b 1757out:
d73aa286 1758 put_unused_fd(fd2);
d73aa286 1759 put_unused_fd(fd1);
1da177e4
LT
1760 return err;
1761}
1762
6debc8d8
DB
1763SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1764 int __user *, usockvec)
1765{
1766 return __sys_socketpair(family, type, protocol, usockvec);
1767}
1768
1da177e4
LT
1769/*
1770 * Bind a name to a socket. Nothing much to do here since it's
1771 * the protocol's responsibility to handle the local address.
1772 *
1773 * We move the socket address to kernel space before we call
1774 * the protocol layer (having also checked the address is ok).
1775 */
1776
a87d35d8 1777int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1778{
1779 struct socket *sock;
230b1839 1780 struct sockaddr_storage address;
6cb153ca 1781 int err, fput_needed;
1da177e4 1782
89bddce5 1783 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1784 if (sock) {
43db362d 1785 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1786 if (!err) {
89bddce5 1787 err = security_socket_bind(sock,
230b1839 1788 (struct sockaddr *)&address,
89bddce5 1789 addrlen);
6cb153ca
BL
1790 if (!err)
1791 err = sock->ops->bind(sock,
89bddce5 1792 (struct sockaddr *)
230b1839 1793 &address, addrlen);
1da177e4 1794 }
6cb153ca 1795 fput_light(sock->file, fput_needed);
89bddce5 1796 }
1da177e4
LT
1797 return err;
1798}
1799
a87d35d8
DB
1800SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1801{
1802 return __sys_bind(fd, umyaddr, addrlen);
1803}
1804
1da177e4
LT
1805/*
1806 * Perform a listen. Basically, we allow the protocol to do anything
1807 * necessary for a listen, and if that works, we mark the socket as
1808 * ready for listening.
1809 */
1810
25e290ee 1811int __sys_listen(int fd, int backlog)
1da177e4
LT
1812{
1813 struct socket *sock;
6cb153ca 1814 int err, fput_needed;
b8e1f9b5 1815 int somaxconn;
89bddce5
SH
1816
1817 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1818 if (sock) {
3c9ba81d 1819 somaxconn = READ_ONCE(sock_net(sock->sk)->core.sysctl_somaxconn);
95c96174 1820 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1821 backlog = somaxconn;
1da177e4
LT
1822
1823 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1824 if (!err)
1825 err = sock->ops->listen(sock, backlog);
1da177e4 1826
6cb153ca 1827 fput_light(sock->file, fput_needed);
1da177e4
LT
1828 }
1829 return err;
1830}
1831
25e290ee
DB
1832SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1833{
1834 return __sys_listen(fd, backlog);
1835}
1836
d32f89da 1837struct file *do_accept(struct file *file, unsigned file_flags,
de2ea4b6 1838 struct sockaddr __user *upeer_sockaddr,
d32f89da 1839 int __user *upeer_addrlen, int flags)
1da177e4
LT
1840{
1841 struct socket *sock, *newsock;
39d8c1b6 1842 struct file *newfile;
d32f89da 1843 int err, len;
230b1839 1844 struct sockaddr_storage address;
1da177e4 1845
dba4a925 1846 sock = sock_from_file(file);
d32f89da
PB
1847 if (!sock)
1848 return ERR_PTR(-ENOTSOCK);
1da177e4 1849
c6d409cf
ED
1850 newsock = sock_alloc();
1851 if (!newsock)
d32f89da 1852 return ERR_PTR(-ENFILE);
1da177e4
LT
1853
1854 newsock->type = sock->type;
1855 newsock->ops = sock->ops;
1856
1da177e4
LT
1857 /*
1858 * We don't need try_module_get here, as the listening socket (sock)
1859 * has the protocol module (sock->ops->owner) held.
1860 */
1861 __module_get(newsock->ops->owner);
1862
aab174f0 1863 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
d32f89da
PB
1864 if (IS_ERR(newfile))
1865 return newfile;
39d8c1b6 1866
a79af59e
FF
1867 err = security_socket_accept(sock, newsock);
1868 if (err)
39d8c1b6 1869 goto out_fd;
a79af59e 1870
de2ea4b6
JA
1871 err = sock->ops->accept(sock, newsock, sock->file->f_flags | file_flags,
1872 false);
1da177e4 1873 if (err < 0)
39d8c1b6 1874 goto out_fd;
1da177e4
LT
1875
1876 if (upeer_sockaddr) {
9b2c45d4
DV
1877 len = newsock->ops->getname(newsock,
1878 (struct sockaddr *)&address, 2);
1879 if (len < 0) {
1da177e4 1880 err = -ECONNABORTED;
39d8c1b6 1881 goto out_fd;
1da177e4 1882 }
43db362d 1883 err = move_addr_to_user(&address,
230b1839 1884 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1885 if (err < 0)
39d8c1b6 1886 goto out_fd;
1da177e4
LT
1887 }
1888
1889 /* File flags are not inherited via accept() unlike another OSes. */
d32f89da 1890 return newfile;
39d8c1b6 1891out_fd:
9606a216 1892 fput(newfile);
d32f89da
PB
1893 return ERR_PTR(err);
1894}
1895
c0424532
YD
1896static int __sys_accept4_file(struct file *file, struct sockaddr __user *upeer_sockaddr,
1897 int __user *upeer_addrlen, int flags)
d32f89da
PB
1898{
1899 struct file *newfile;
1900 int newfd;
1901
1902 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1903 return -EINVAL;
1904
1905 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1906 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
de2ea4b6 1907
c0424532 1908 newfd = get_unused_fd_flags(flags);
d32f89da
PB
1909 if (unlikely(newfd < 0))
1910 return newfd;
1911
c0424532 1912 newfile = do_accept(file, 0, upeer_sockaddr, upeer_addrlen,
d32f89da
PB
1913 flags);
1914 if (IS_ERR(newfile)) {
1915 put_unused_fd(newfd);
1916 return PTR_ERR(newfile);
1917 }
1918 fd_install(newfd, newfile);
1919 return newfd;
de2ea4b6
JA
1920}
1921
1922/*
1923 * For accept, we attempt to create a new socket, set up the link
1924 * with the client, wake up the client, then return the new
1925 * connected fd. We collect the address of the connector in kernel
1926 * space and move it to user at the very end. This is unclean because
1927 * we open the socket then return an error.
1928 *
1929 * 1003.1g adds the ability to recvmsg() to query connection pending
1930 * status to recvmsg. We need to add that support in a way thats
1931 * clean when we restructure accept also.
1932 */
1933
1934int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1935 int __user *upeer_addrlen, int flags)
1936{
1937 int ret = -EBADF;
1938 struct fd f;
1939
1940 f = fdget(fd);
1941 if (f.file) {
c0424532
YD
1942 ret = __sys_accept4_file(f.file, upeer_sockaddr,
1943 upeer_addrlen, flags);
6b07edeb 1944 fdput(f);
de2ea4b6
JA
1945 }
1946
1947 return ret;
1da177e4
LT
1948}
1949
4541e805
DB
1950SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1951 int __user *, upeer_addrlen, int, flags)
1952{
1953 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1954}
1955
20f37034
HC
1956SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1957 int __user *, upeer_addrlen)
aaca0bdc 1958{
4541e805 1959 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1960}
1961
1da177e4
LT
1962/*
1963 * Attempt to connect to a socket with the server address. The address
1964 * is in user space so we verify it is OK and move it to kernel space.
1965 *
1966 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1967 * break bindings
1968 *
1969 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1970 * other SEQPACKET protocols that take time to connect() as it doesn't
1971 * include the -EINPROGRESS status for such sockets.
1972 */
1973
f499a021 1974int __sys_connect_file(struct file *file, struct sockaddr_storage *address,
bd3ded31 1975 int addrlen, int file_flags)
1da177e4
LT
1976{
1977 struct socket *sock;
bd3ded31 1978 int err;
1da177e4 1979
dba4a925
FR
1980 sock = sock_from_file(file);
1981 if (!sock) {
1982 err = -ENOTSOCK;
1da177e4 1983 goto out;
dba4a925 1984 }
1da177e4 1985
89bddce5 1986 err =
f499a021 1987 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4 1988 if (err)
bd3ded31 1989 goto out;
1da177e4 1990
f499a021 1991 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
bd3ded31 1992 sock->file->f_flags | file_flags);
1da177e4
LT
1993out:
1994 return err;
1995}
1996
bd3ded31
JA
1997int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1998{
1999 int ret = -EBADF;
2000 struct fd f;
2001
2002 f = fdget(fd);
2003 if (f.file) {
f499a021
JA
2004 struct sockaddr_storage address;
2005
2006 ret = move_addr_to_kernel(uservaddr, addrlen, &address);
2007 if (!ret)
2008 ret = __sys_connect_file(f.file, &address, addrlen, 0);
6b07edeb 2009 fdput(f);
bd3ded31
JA
2010 }
2011
2012 return ret;
2013}
2014
1387c2c2
DB
2015SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
2016 int, addrlen)
2017{
2018 return __sys_connect(fd, uservaddr, addrlen);
2019}
2020
1da177e4
LT
2021/*
2022 * Get the local address ('name') of a socket object. Move the obtained
2023 * name to user space.
2024 */
2025
8882a107
DB
2026int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
2027 int __user *usockaddr_len)
1da177e4
LT
2028{
2029 struct socket *sock;
230b1839 2030 struct sockaddr_storage address;
9b2c45d4 2031 int err, fput_needed;
89bddce5 2032
6cb153ca 2033 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
2034 if (!sock)
2035 goto out;
2036
2037 err = security_socket_getsockname(sock);
2038 if (err)
2039 goto out_put;
2040
9b2c45d4
DV
2041 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
2042 if (err < 0)
1da177e4 2043 goto out_put;
e44ef1d4 2044 /* "err" is actually length in this case */
9b2c45d4 2045 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
2046
2047out_put:
6cb153ca 2048 fput_light(sock->file, fput_needed);
1da177e4
LT
2049out:
2050 return err;
2051}
2052
8882a107
DB
2053SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
2054 int __user *, usockaddr_len)
2055{
2056 return __sys_getsockname(fd, usockaddr, usockaddr_len);
2057}
2058
1da177e4
LT
2059/*
2060 * Get the remote address ('name') of a socket object. Move the obtained
2061 * name to user space.
2062 */
2063
b21c8f83
DB
2064int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
2065 int __user *usockaddr_len)
1da177e4
LT
2066{
2067 struct socket *sock;
230b1839 2068 struct sockaddr_storage address;
9b2c45d4 2069 int err, fput_needed;
1da177e4 2070
89bddce5
SH
2071 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2072 if (sock != NULL) {
1da177e4
LT
2073 err = security_socket_getpeername(sock);
2074 if (err) {
6cb153ca 2075 fput_light(sock->file, fput_needed);
1da177e4
LT
2076 return err;
2077 }
2078
9b2c45d4
DV
2079 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
2080 if (err >= 0)
2081 /* "err" is actually length in this case */
2082 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 2083 usockaddr_len);
6cb153ca 2084 fput_light(sock->file, fput_needed);
1da177e4
LT
2085 }
2086 return err;
2087}
2088
b21c8f83
DB
2089SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
2090 int __user *, usockaddr_len)
2091{
2092 return __sys_getpeername(fd, usockaddr, usockaddr_len);
2093}
2094
1da177e4
LT
2095/*
2096 * Send a datagram to a given address. We move the address into kernel
2097 * space and check the user space data area is readable before invoking
2098 * the protocol.
2099 */
211b634b
DB
2100int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
2101 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
2102{
2103 struct socket *sock;
230b1839 2104 struct sockaddr_storage address;
1da177e4
LT
2105 int err;
2106 struct msghdr msg;
2107 struct iovec iov;
6cb153ca 2108 int fput_needed;
6cb153ca 2109
de4eda9d 2110 err = import_single_range(ITER_SOURCE, buff, len, &iov, &msg.msg_iter);
602bd0e9
AV
2111 if (unlikely(err))
2112 return err;
de0fa95c
PE
2113 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2114 if (!sock)
4387ff75 2115 goto out;
6cb153ca 2116
89bddce5 2117 msg.msg_name = NULL;
89bddce5
SH
2118 msg.msg_control = NULL;
2119 msg.msg_controllen = 0;
2120 msg.msg_namelen = 0;
7c701d92 2121 msg.msg_ubuf = NULL;
6cb153ca 2122 if (addr) {
43db362d 2123 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
2124 if (err < 0)
2125 goto out_put;
230b1839 2126 msg.msg_name = (struct sockaddr *)&address;
89bddce5 2127 msg.msg_namelen = addr_len;
1da177e4 2128 }
b841b901 2129 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
1da177e4
LT
2130 if (sock->file->f_flags & O_NONBLOCK)
2131 flags |= MSG_DONTWAIT;
2132 msg.msg_flags = flags;
d8725c86 2133 err = sock_sendmsg(sock, &msg);
1da177e4 2134
89bddce5 2135out_put:
de0fa95c 2136 fput_light(sock->file, fput_needed);
4387ff75 2137out:
1da177e4
LT
2138 return err;
2139}
2140
211b634b
DB
2141SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
2142 unsigned int, flags, struct sockaddr __user *, addr,
2143 int, addr_len)
2144{
2145 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
2146}
2147
1da177e4 2148/*
89bddce5 2149 * Send a datagram down a socket.
1da177e4
LT
2150 */
2151
3e0fa65f 2152SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 2153 unsigned int, flags)
1da177e4 2154{
211b634b 2155 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
2156}
2157
2158/*
89bddce5 2159 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
2160 * sender. We verify the buffers are writable and if needed move the
2161 * sender address from kernel to user space.
2162 */
7a09e1eb
DB
2163int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
2164 struct sockaddr __user *addr, int __user *addr_len)
1da177e4 2165{
1228b34c
ED
2166 struct sockaddr_storage address;
2167 struct msghdr msg = {
2168 /* Save some cycles and don't copy the address if not needed */
2169 .msg_name = addr ? (struct sockaddr *)&address : NULL,
2170 };
1da177e4
LT
2171 struct socket *sock;
2172 struct iovec iov;
89bddce5 2173 int err, err2;
6cb153ca
BL
2174 int fput_needed;
2175
de4eda9d 2176 err = import_single_range(ITER_DEST, ubuf, size, &iov, &msg.msg_iter);
602bd0e9
AV
2177 if (unlikely(err))
2178 return err;
de0fa95c 2179 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 2180 if (!sock)
de0fa95c 2181 goto out;
1da177e4 2182
1da177e4
LT
2183 if (sock->file->f_flags & O_NONBLOCK)
2184 flags |= MSG_DONTWAIT;
2da62906 2185 err = sock_recvmsg(sock, &msg, flags);
1da177e4 2186
89bddce5 2187 if (err >= 0 && addr != NULL) {
43db362d 2188 err2 = move_addr_to_user(&address,
230b1839 2189 msg.msg_namelen, addr, addr_len);
89bddce5
SH
2190 if (err2 < 0)
2191 err = err2;
1da177e4 2192 }
de0fa95c
PE
2193
2194 fput_light(sock->file, fput_needed);
4387ff75 2195out:
1da177e4
LT
2196 return err;
2197}
2198
7a09e1eb
DB
2199SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2200 unsigned int, flags, struct sockaddr __user *, addr,
2201 int __user *, addr_len)
2202{
2203 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2204}
2205
1da177e4 2206/*
89bddce5 2207 * Receive a datagram from a socket.
1da177e4
LT
2208 */
2209
b7c0ddf5
JG
2210SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2211 unsigned int, flags)
1da177e4 2212{
7a09e1eb 2213 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
2214}
2215
83f0c10b
FW
2216static bool sock_use_custom_sol_socket(const struct socket *sock)
2217{
a5ef058d 2218 return test_bit(SOCK_CUSTOM_SOCKOPT, &sock->flags);
83f0c10b
FW
2219}
2220
1da177e4
LT
2221/*
2222 * Set a socket option. Because we don't know the option lengths we have
2223 * to pass the user mode parameter for the protocols to sort out.
2224 */
a7b75c5a 2225int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
55db9c0e 2226 int optlen)
1da177e4 2227{
519a8a6c 2228 sockptr_t optval = USER_SOCKPTR(user_optval);
0d01da6a 2229 char *kernel_optval = NULL;
6cb153ca 2230 int err, fput_needed;
1da177e4
LT
2231 struct socket *sock;
2232
2233 if (optlen < 0)
2234 return -EINVAL;
89bddce5
SH
2235
2236 sock = sockfd_lookup_light(fd, &err, &fput_needed);
4a367299
CH
2237 if (!sock)
2238 return err;
1da177e4 2239
4a367299
CH
2240 err = security_socket_setsockopt(sock, level, optname);
2241 if (err)
2242 goto out_put;
0d01da6a 2243
55db9c0e
CH
2244 if (!in_compat_syscall())
2245 err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname,
a7b75c5a 2246 user_optval, &optlen,
55db9c0e 2247 &kernel_optval);
4a367299
CH
2248 if (err < 0)
2249 goto out_put;
2250 if (err > 0) {
2251 err = 0;
2252 goto out_put;
2253 }
0d01da6a 2254
a7b75c5a
CH
2255 if (kernel_optval)
2256 optval = KERNEL_SOCKPTR(kernel_optval);
4a367299 2257 if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock))
a7b75c5a 2258 err = sock_setsockopt(sock, level, optname, optval, optlen);
a44d9e72
CH
2259 else if (unlikely(!sock->ops->setsockopt))
2260 err = -EOPNOTSUPP;
4a367299
CH
2261 else
2262 err = sock->ops->setsockopt(sock, level, optname, optval,
89bddce5 2263 optlen);
a7b75c5a 2264 kfree(kernel_optval);
4a367299
CH
2265out_put:
2266 fput_light(sock->file, fput_needed);
1da177e4
LT
2267 return err;
2268}
2269
cc36dca0
DB
2270SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2271 char __user *, optval, int, optlen)
2272{
2273 return __sys_setsockopt(fd, level, optname, optval, optlen);
2274}
2275
9cacf81f
SF
2276INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
2277 int optname));
2278
1da177e4
LT
2279/*
2280 * Get a socket option. Because we don't know the option lengths we have
2281 * to pass a user mode parameter for the protocols to sort out.
2282 */
55db9c0e
CH
2283int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
2284 int __user *optlen)
1da177e4 2285{
ad4bf5f2 2286 int max_optlen __maybe_unused;
6cb153ca 2287 int err, fput_needed;
1da177e4
LT
2288 struct socket *sock;
2289
89bddce5 2290 sock = sockfd_lookup_light(fd, &err, &fput_needed);
d8a9b38f
CH
2291 if (!sock)
2292 return err;
2293
2294 err = security_socket_getsockopt(sock, level, optname);
2295 if (err)
2296 goto out_put;
1da177e4 2297
55db9c0e
CH
2298 if (!in_compat_syscall())
2299 max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
0d01da6a 2300
d8a9b38f
CH
2301 if (level == SOL_SOCKET)
2302 err = sock_getsockopt(sock, level, optname, optval, optlen);
a44d9e72
CH
2303 else if (unlikely(!sock->ops->getsockopt))
2304 err = -EOPNOTSUPP;
d8a9b38f
CH
2305 else
2306 err = sock->ops->getsockopt(sock, level, optname, optval,
89bddce5 2307 optlen);
0d01da6a 2308
55db9c0e
CH
2309 if (!in_compat_syscall())
2310 err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
2311 optval, optlen, max_optlen,
2312 err);
6cb153ca 2313out_put:
d8a9b38f 2314 fput_light(sock->file, fput_needed);
1da177e4
LT
2315 return err;
2316}
2317
13a2d70e
DB
2318SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2319 char __user *, optval, int __user *, optlen)
2320{
2321 return __sys_getsockopt(fd, level, optname, optval, optlen);
2322}
2323
1da177e4
LT
2324/*
2325 * Shutdown a socket.
2326 */
2327
b713c195
JA
2328int __sys_shutdown_sock(struct socket *sock, int how)
2329{
2330 int err;
2331
2332 err = security_socket_shutdown(sock, how);
2333 if (!err)
2334 err = sock->ops->shutdown(sock, how);
2335
2336 return err;
2337}
2338
005a1aea 2339int __sys_shutdown(int fd, int how)
1da177e4 2340{
6cb153ca 2341 int err, fput_needed;
1da177e4
LT
2342 struct socket *sock;
2343
89bddce5
SH
2344 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2345 if (sock != NULL) {
b713c195 2346 err = __sys_shutdown_sock(sock, how);
6cb153ca 2347 fput_light(sock->file, fput_needed);
1da177e4
LT
2348 }
2349 return err;
2350}
2351
005a1aea
DB
2352SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2353{
2354 return __sys_shutdown(fd, how);
2355}
2356
89bddce5 2357/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2358 * fields which are the same type (int / unsigned) on our platforms.
2359 */
2360#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2361#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2362#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2363
c71d8ebe
TH
2364struct used_address {
2365 struct sockaddr_storage name;
2366 unsigned int name_len;
2367};
2368
7fa875b8
DY
2369int __copy_msghdr(struct msghdr *kmsg,
2370 struct user_msghdr *msg,
2371 struct sockaddr __user **save_addr)
1661bf36 2372{
08adb7da
AV
2373 ssize_t err;
2374
1f466e1f 2375 kmsg->msg_control_is_user = true;
1228b34c 2376 kmsg->msg_get_inq = 0;
7fa875b8
DY
2377 kmsg->msg_control_user = msg->msg_control;
2378 kmsg->msg_controllen = msg->msg_controllen;
2379 kmsg->msg_flags = msg->msg_flags;
ffb07550 2380
7fa875b8
DY
2381 kmsg->msg_namelen = msg->msg_namelen;
2382 if (!msg->msg_name)
6a2a2b3a
AS
2383 kmsg->msg_namelen = 0;
2384
dbb490b9
ML
2385 if (kmsg->msg_namelen < 0)
2386 return -EINVAL;
2387
1661bf36 2388 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2389 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2390
2391 if (save_addr)
7fa875b8 2392 *save_addr = msg->msg_name;
08adb7da 2393
7fa875b8 2394 if (msg->msg_name && kmsg->msg_namelen) {
08adb7da 2395 if (!save_addr) {
7fa875b8 2396 err = move_addr_to_kernel(msg->msg_name,
864d9664 2397 kmsg->msg_namelen,
08adb7da
AV
2398 kmsg->msg_name);
2399 if (err < 0)
2400 return err;
2401 }
2402 } else {
2403 kmsg->msg_name = NULL;
2404 kmsg->msg_namelen = 0;
2405 }
2406
7fa875b8 2407 if (msg->msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2408 return -EMSGSIZE;
2409
0345f931 2410 kmsg->msg_iocb = NULL;
7c701d92 2411 kmsg->msg_ubuf = NULL;
0a384abf
JA
2412 return 0;
2413}
2414
2415static int copy_msghdr_from_user(struct msghdr *kmsg,
2416 struct user_msghdr __user *umsg,
2417 struct sockaddr __user **save_addr,
2418 struct iovec **iov)
2419{
2420 struct user_msghdr msg;
2421 ssize_t err;
2422
7fa875b8
DY
2423 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
2424 return -EFAULT;
2425
2426 err = __copy_msghdr(kmsg, &msg, save_addr);
0a384abf
JA
2427 if (err)
2428 return err;
0345f931 2429
de4eda9d 2430 err = import_iovec(save_addr ? ITER_DEST : ITER_SOURCE,
ffb07550 2431 msg.msg_iov, msg.msg_iovlen,
da184284 2432 UIO_FASTIOV, iov, &kmsg->msg_iter);
87e5e6da 2433 return err < 0 ? err : 0;
1661bf36
DC
2434}
2435
4257c8ca
JA
2436static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
2437 unsigned int flags, struct used_address *used_address,
2438 unsigned int allowed_msghdr_flags)
1da177e4 2439{
b9d717a7 2440 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2441 __aligned(sizeof(__kernel_size_t));
89bddce5 2442 /* 20 is size of ipv6_pktinfo */
1da177e4 2443 unsigned char *ctl_buf = ctl;
d8725c86 2444 int ctl_len;
08adb7da 2445 ssize_t err;
89bddce5 2446
1da177e4
LT
2447 err = -ENOBUFS;
2448
228e548e 2449 if (msg_sys->msg_controllen > INT_MAX)
4257c8ca 2450 goto out;
28a94d8f 2451 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2452 ctl_len = msg_sys->msg_controllen;
1da177e4 2453 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2454 err =
228e548e 2455 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2456 sizeof(ctl));
1da177e4 2457 if (err)
4257c8ca 2458 goto out;
228e548e
AB
2459 ctl_buf = msg_sys->msg_control;
2460 ctl_len = msg_sys->msg_controllen;
1da177e4 2461 } else if (ctl_len) {
ac4340fc
DM
2462 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2463 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2464 if (ctl_len > sizeof(ctl)) {
1da177e4 2465 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2466 if (ctl_buf == NULL)
4257c8ca 2467 goto out;
1da177e4
LT
2468 }
2469 err = -EFAULT;
1f466e1f 2470 if (copy_from_user(ctl_buf, msg_sys->msg_control_user, ctl_len))
1da177e4 2471 goto out_freectl;
228e548e 2472 msg_sys->msg_control = ctl_buf;
1f466e1f 2473 msg_sys->msg_control_is_user = false;
1da177e4 2474 }
b841b901 2475 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
228e548e 2476 msg_sys->msg_flags = flags;
1da177e4
LT
2477
2478 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2479 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2480 /*
2481 * If this is sendmmsg() and current destination address is same as
2482 * previously succeeded address, omit asking LSM's decision.
2483 * used_address->name_len is initialized to UINT_MAX so that the first
2484 * destination address never matches.
2485 */
bc909d9d
MD
2486 if (used_address && msg_sys->msg_name &&
2487 used_address->name_len == msg_sys->msg_namelen &&
2488 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2489 used_address->name_len)) {
d8725c86 2490 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2491 goto out_freectl;
2492 }
d8725c86 2493 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2494 /*
2495 * If this is sendmmsg() and sending to current destination address was
2496 * successful, remember it.
2497 */
2498 if (used_address && err >= 0) {
2499 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2500 if (msg_sys->msg_name)
2501 memcpy(&used_address->name, msg_sys->msg_name,
2502 used_address->name_len);
c71d8ebe 2503 }
1da177e4
LT
2504
2505out_freectl:
89bddce5 2506 if (ctl_buf != ctl)
1da177e4 2507 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
4257c8ca
JA
2508out:
2509 return err;
2510}
2511
03b1230c
JA
2512int sendmsg_copy_msghdr(struct msghdr *msg,
2513 struct user_msghdr __user *umsg, unsigned flags,
2514 struct iovec **iov)
4257c8ca
JA
2515{
2516 int err;
2517
2518 if (flags & MSG_CMSG_COMPAT) {
2519 struct compat_msghdr __user *msg_compat;
2520
2521 msg_compat = (struct compat_msghdr __user *) umsg;
2522 err = get_compat_msghdr(msg, msg_compat, NULL, iov);
2523 } else {
2524 err = copy_msghdr_from_user(msg, umsg, NULL, iov);
2525 }
2526 if (err < 0)
2527 return err;
2528
2529 return 0;
2530}
2531
2532static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
2533 struct msghdr *msg_sys, unsigned int flags,
2534 struct used_address *used_address,
2535 unsigned int allowed_msghdr_flags)
2536{
2537 struct sockaddr_storage address;
2538 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2539 ssize_t err;
2540
2541 msg_sys->msg_name = &address;
2542
2543 err = sendmsg_copy_msghdr(msg_sys, msg, flags, &iov);
2544 if (err < 0)
2545 return err;
2546
2547 err = ____sys_sendmsg(sock, msg_sys, flags, used_address,
2548 allowed_msghdr_flags);
da184284 2549 kfree(iov);
228e548e
AB
2550 return err;
2551}
2552
2553/*
2554 * BSD sendmsg interface
2555 */
03b1230c 2556long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
0fa03c62
JA
2557 unsigned int flags)
2558{
03b1230c 2559 return ____sys_sendmsg(sock, msg, flags, NULL, 0);
0fa03c62 2560}
228e548e 2561
e1834a32
DB
2562long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2563 bool forbid_cmsg_compat)
228e548e
AB
2564{
2565 int fput_needed, err;
2566 struct msghdr msg_sys;
1be374a0
AL
2567 struct socket *sock;
2568
e1834a32
DB
2569 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2570 return -EINVAL;
2571
1be374a0 2572 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2573 if (!sock)
2574 goto out;
2575
28a94d8f 2576 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2577
6cb153ca 2578 fput_light(sock->file, fput_needed);
89bddce5 2579out:
1da177e4
LT
2580 return err;
2581}
2582
666547ff 2583SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2584{
e1834a32 2585 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2586}
2587
228e548e
AB
2588/*
2589 * Linux sendmmsg interface
2590 */
2591
2592int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2593 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2594{
2595 int fput_needed, err, datagrams;
2596 struct socket *sock;
2597 struct mmsghdr __user *entry;
2598 struct compat_mmsghdr __user *compat_entry;
2599 struct msghdr msg_sys;
c71d8ebe 2600 struct used_address used_address;
f092276d 2601 unsigned int oflags = flags;
228e548e 2602
e1834a32
DB
2603 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2604 return -EINVAL;
2605
98382f41
AB
2606 if (vlen > UIO_MAXIOV)
2607 vlen = UIO_MAXIOV;
228e548e
AB
2608
2609 datagrams = 0;
2610
2611 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2612 if (!sock)
2613 return err;
2614
c71d8ebe 2615 used_address.name_len = UINT_MAX;
228e548e
AB
2616 entry = mmsg;
2617 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2618 err = 0;
f092276d 2619 flags |= MSG_BATCH;
228e548e
AB
2620
2621 while (datagrams < vlen) {
f092276d
TH
2622 if (datagrams == vlen - 1)
2623 flags = oflags;
2624
228e548e 2625 if (MSG_CMSG_COMPAT & flags) {
666547ff 2626 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2627 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2628 if (err < 0)
2629 break;
2630 err = __put_user(err, &compat_entry->msg_len);
2631 ++compat_entry;
2632 } else {
a7526eb5 2633 err = ___sys_sendmsg(sock,
666547ff 2634 (struct user_msghdr __user *)entry,
28a94d8f 2635 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2636 if (err < 0)
2637 break;
2638 err = put_user(err, &entry->msg_len);
2639 ++entry;
2640 }
2641
2642 if (err)
2643 break;
2644 ++datagrams;
3023898b
SHY
2645 if (msg_data_left(&msg_sys))
2646 break;
a78cb84c 2647 cond_resched();
228e548e
AB
2648 }
2649
228e548e
AB
2650 fput_light(sock->file, fput_needed);
2651
728ffb86
AB
2652 /* We only return an error if no datagrams were able to be sent */
2653 if (datagrams != 0)
228e548e
AB
2654 return datagrams;
2655
228e548e
AB
2656 return err;
2657}
2658
2659SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2660 unsigned int, vlen, unsigned int, flags)
2661{
e1834a32 2662 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2663}
2664
03b1230c
JA
2665int recvmsg_copy_msghdr(struct msghdr *msg,
2666 struct user_msghdr __user *umsg, unsigned flags,
2667 struct sockaddr __user **uaddr,
2668 struct iovec **iov)
1da177e4 2669{
08adb7da 2670 ssize_t err;
1da177e4 2671
4257c8ca
JA
2672 if (MSG_CMSG_COMPAT & flags) {
2673 struct compat_msghdr __user *msg_compat;
1da177e4 2674
4257c8ca
JA
2675 msg_compat = (struct compat_msghdr __user *) umsg;
2676 err = get_compat_msghdr(msg, msg_compat, uaddr, iov);
2677 } else {
2678 err = copy_msghdr_from_user(msg, umsg, uaddr, iov);
2679 }
1da177e4 2680 if (err < 0)
da184284 2681 return err;
1da177e4 2682
4257c8ca
JA
2683 return 0;
2684}
2685
2686static int ____sys_recvmsg(struct socket *sock, struct msghdr *msg_sys,
2687 struct user_msghdr __user *msg,
2688 struct sockaddr __user *uaddr,
2689 unsigned int flags, int nosec)
2690{
2691 struct compat_msghdr __user *msg_compat =
2692 (struct compat_msghdr __user *) msg;
2693 int __user *uaddr_len = COMPAT_NAMELEN(msg);
2694 struct sockaddr_storage addr;
2695 unsigned long cmsg_ptr;
2696 int len;
2697 ssize_t err;
2698
2699 msg_sys->msg_name = &addr;
a2e27255
ACM
2700 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2701 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2702
f3d33426
HFS
2703 /* We assume all kernel code knows the size of sockaddr_storage */
2704 msg_sys->msg_namelen = 0;
2705
1da177e4
LT
2706 if (sock->file->f_flags & O_NONBLOCK)
2707 flags |= MSG_DONTWAIT;
1af66221
ED
2708
2709 if (unlikely(nosec))
2710 err = sock_recvmsg_nosec(sock, msg_sys, flags);
2711 else
2712 err = sock_recvmsg(sock, msg_sys, flags);
2713
1da177e4 2714 if (err < 0)
4257c8ca 2715 goto out;
1da177e4
LT
2716 len = err;
2717
2718 if (uaddr != NULL) {
43db362d 2719 err = move_addr_to_user(&addr,
a2e27255 2720 msg_sys->msg_namelen, uaddr,
89bddce5 2721 uaddr_len);
1da177e4 2722 if (err < 0)
4257c8ca 2723 goto out;
1da177e4 2724 }
a2e27255 2725 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2726 COMPAT_FLAGS(msg));
1da177e4 2727 if (err)
4257c8ca 2728 goto out;
1da177e4 2729 if (MSG_CMSG_COMPAT & flags)
a2e27255 2730 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2731 &msg_compat->msg_controllen);
2732 else
a2e27255 2733 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2734 &msg->msg_controllen);
2735 if (err)
4257c8ca 2736 goto out;
1da177e4 2737 err = len;
4257c8ca
JA
2738out:
2739 return err;
2740}
2741
2742static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
2743 struct msghdr *msg_sys, unsigned int flags, int nosec)
2744{
2745 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2746 /* user mode address pointers */
2747 struct sockaddr __user *uaddr;
2748 ssize_t err;
2749
2750 err = recvmsg_copy_msghdr(msg_sys, msg, flags, &uaddr, &iov);
2751 if (err < 0)
2752 return err;
1da177e4 2753
4257c8ca 2754 err = ____sys_recvmsg(sock, msg_sys, msg, uaddr, flags, nosec);
da184284 2755 kfree(iov);
a2e27255
ACM
2756 return err;
2757}
2758
2759/*
2760 * BSD recvmsg interface
2761 */
2762
03b1230c
JA
2763long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
2764 struct user_msghdr __user *umsg,
2765 struct sockaddr __user *uaddr, unsigned int flags)
aa1fa28f 2766{
03b1230c 2767 return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0);
aa1fa28f
JA
2768}
2769
e1834a32
DB
2770long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2771 bool forbid_cmsg_compat)
a2e27255
ACM
2772{
2773 int fput_needed, err;
2774 struct msghdr msg_sys;
1be374a0
AL
2775 struct socket *sock;
2776
e1834a32
DB
2777 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2778 return -EINVAL;
2779
1be374a0 2780 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2781 if (!sock)
2782 goto out;
2783
a7526eb5 2784 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2785
6cb153ca 2786 fput_light(sock->file, fput_needed);
1da177e4
LT
2787out:
2788 return err;
2789}
2790
666547ff 2791SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2792 unsigned int, flags)
2793{
e1834a32 2794 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2795}
2796
a2e27255
ACM
2797/*
2798 * Linux recvmmsg interface
2799 */
2800
e11d4284
AB
2801static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2802 unsigned int vlen, unsigned int flags,
2803 struct timespec64 *timeout)
a2e27255
ACM
2804{
2805 int fput_needed, err, datagrams;
2806 struct socket *sock;
2807 struct mmsghdr __user *entry;
d7256d0e 2808 struct compat_mmsghdr __user *compat_entry;
a2e27255 2809 struct msghdr msg_sys;
766b9f92
DD
2810 struct timespec64 end_time;
2811 struct timespec64 timeout64;
a2e27255
ACM
2812
2813 if (timeout &&
2814 poll_select_set_timeout(&end_time, timeout->tv_sec,
2815 timeout->tv_nsec))
2816 return -EINVAL;
2817
2818 datagrams = 0;
2819
2820 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2821 if (!sock)
2822 return err;
2823
7797dc41
SHY
2824 if (likely(!(flags & MSG_ERRQUEUE))) {
2825 err = sock_error(sock->sk);
2826 if (err) {
2827 datagrams = err;
2828 goto out_put;
2829 }
e623a9e9 2830 }
a2e27255
ACM
2831
2832 entry = mmsg;
d7256d0e 2833 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2834
2835 while (datagrams < vlen) {
2836 /*
2837 * No need to ask LSM for more than the first datagram.
2838 */
d7256d0e 2839 if (MSG_CMSG_COMPAT & flags) {
666547ff 2840 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2841 &msg_sys, flags & ~MSG_WAITFORONE,
2842 datagrams);
d7256d0e
JMG
2843 if (err < 0)
2844 break;
2845 err = __put_user(err, &compat_entry->msg_len);
2846 ++compat_entry;
2847 } else {
a7526eb5 2848 err = ___sys_recvmsg(sock,
666547ff 2849 (struct user_msghdr __user *)entry,
a7526eb5
AL
2850 &msg_sys, flags & ~MSG_WAITFORONE,
2851 datagrams);
d7256d0e
JMG
2852 if (err < 0)
2853 break;
2854 err = put_user(err, &entry->msg_len);
2855 ++entry;
2856 }
2857
a2e27255
ACM
2858 if (err)
2859 break;
a2e27255
ACM
2860 ++datagrams;
2861
71c5c159
BB
2862 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2863 if (flags & MSG_WAITFORONE)
2864 flags |= MSG_DONTWAIT;
2865
a2e27255 2866 if (timeout) {
766b9f92 2867 ktime_get_ts64(&timeout64);
c2e6c856 2868 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2869 if (timeout->tv_sec < 0) {
2870 timeout->tv_sec = timeout->tv_nsec = 0;
2871 break;
2872 }
2873
2874 /* Timeout, return less than vlen datagrams */
2875 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2876 break;
2877 }
2878
2879 /* Out of band data, return right away */
2880 if (msg_sys.msg_flags & MSG_OOB)
2881 break;
a78cb84c 2882 cond_resched();
a2e27255
ACM
2883 }
2884
a2e27255 2885 if (err == 0)
34b88a68
ACM
2886 goto out_put;
2887
2888 if (datagrams == 0) {
2889 datagrams = err;
2890 goto out_put;
2891 }
a2e27255 2892
34b88a68
ACM
2893 /*
2894 * We may return less entries than requested (vlen) if the
2895 * sock is non block and there aren't enough datagrams...
2896 */
2897 if (err != -EAGAIN) {
a2e27255 2898 /*
34b88a68
ACM
2899 * ... or if recvmsg returns an error after we
2900 * received some datagrams, where we record the
2901 * error to return on the next call or if the
2902 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2903 */
e05a5f51 2904 WRITE_ONCE(sock->sk->sk_err, -err);
a2e27255 2905 }
34b88a68
ACM
2906out_put:
2907 fput_light(sock->file, fput_needed);
a2e27255 2908
34b88a68 2909 return datagrams;
a2e27255
ACM
2910}
2911
e11d4284
AB
2912int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2913 unsigned int vlen, unsigned int flags,
2914 struct __kernel_timespec __user *timeout,
2915 struct old_timespec32 __user *timeout32)
a2e27255
ACM
2916{
2917 int datagrams;
c2e6c856 2918 struct timespec64 timeout_sys;
a2e27255 2919
e11d4284
AB
2920 if (timeout && get_timespec64(&timeout_sys, timeout))
2921 return -EFAULT;
a2e27255 2922
e11d4284 2923 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2924 return -EFAULT;
2925
e11d4284
AB
2926 if (!timeout && !timeout32)
2927 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2928
2929 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 2930
e11d4284
AB
2931 if (datagrams <= 0)
2932 return datagrams;
2933
2934 if (timeout && put_timespec64(&timeout_sys, timeout))
2935 datagrams = -EFAULT;
2936
2937 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2938 datagrams = -EFAULT;
2939
2940 return datagrams;
2941}
2942
1255e269
DB
2943SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2944 unsigned int, vlen, unsigned int, flags,
c2e6c856 2945 struct __kernel_timespec __user *, timeout)
1255e269 2946{
e11d4284
AB
2947 if (flags & MSG_CMSG_COMPAT)
2948 return -EINVAL;
2949
2950 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
2951}
2952
2953#ifdef CONFIG_COMPAT_32BIT_TIME
2954SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2955 unsigned int, vlen, unsigned int, flags,
2956 struct old_timespec32 __user *, timeout)
2957{
2958 if (flags & MSG_CMSG_COMPAT)
2959 return -EINVAL;
2960
2961 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 2962}
e11d4284 2963#endif
1255e269 2964
a2e27255 2965#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2966/* Argument list sizes for sys_socketcall */
2967#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2968static const unsigned char nargs[21] = {
c6d409cf
ED
2969 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2970 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2971 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2972 AL(4), AL(5), AL(4)
89bddce5
SH
2973};
2974
1da177e4
LT
2975#undef AL
2976
2977/*
89bddce5 2978 * System call vectors.
1da177e4
LT
2979 *
2980 * Argument checking cleaned up. Saved 20% in size.
2981 * This function doesn't need to set the kernel lock because
89bddce5 2982 * it is set by the callees.
1da177e4
LT
2983 */
2984
3e0fa65f 2985SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2986{
2950fa9d 2987 unsigned long a[AUDITSC_ARGS];
89bddce5 2988 unsigned long a0, a1;
1da177e4 2989 int err;
47379052 2990 unsigned int len;
1da177e4 2991
228e548e 2992 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2993 return -EINVAL;
c8e8cd57 2994 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2995
47379052
AV
2996 len = nargs[call];
2997 if (len > sizeof(a))
2998 return -EINVAL;
2999
1da177e4 3000 /* copy_from_user should be SMP safe. */
47379052 3001 if (copy_from_user(a, args, len))
1da177e4 3002 return -EFAULT;
3ec3b2fb 3003
2950fa9d
CG
3004 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3005 if (err)
3006 return err;
3ec3b2fb 3007
89bddce5
SH
3008 a0 = a[0];
3009 a1 = a[1];
3010
3011 switch (call) {
3012 case SYS_SOCKET:
9d6a15c3 3013 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
3014 break;
3015 case SYS_BIND:
a87d35d8 3016 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
3017 break;
3018 case SYS_CONNECT:
1387c2c2 3019 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
3020 break;
3021 case SYS_LISTEN:
25e290ee 3022 err = __sys_listen(a0, a1);
89bddce5
SH
3023 break;
3024 case SYS_ACCEPT:
4541e805
DB
3025 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
3026 (int __user *)a[2], 0);
89bddce5
SH
3027 break;
3028 case SYS_GETSOCKNAME:
3029 err =
8882a107
DB
3030 __sys_getsockname(a0, (struct sockaddr __user *)a1,
3031 (int __user *)a[2]);
89bddce5
SH
3032 break;
3033 case SYS_GETPEERNAME:
3034 err =
b21c8f83
DB
3035 __sys_getpeername(a0, (struct sockaddr __user *)a1,
3036 (int __user *)a[2]);
89bddce5
SH
3037 break;
3038 case SYS_SOCKETPAIR:
6debc8d8 3039 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
3040 break;
3041 case SYS_SEND:
f3bf896b
DB
3042 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
3043 NULL, 0);
89bddce5
SH
3044 break;
3045 case SYS_SENDTO:
211b634b
DB
3046 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
3047 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
3048 break;
3049 case SYS_RECV:
d27e9afc
DB
3050 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
3051 NULL, NULL);
89bddce5
SH
3052 break;
3053 case SYS_RECVFROM:
7a09e1eb
DB
3054 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
3055 (struct sockaddr __user *)a[4],
3056 (int __user *)a[5]);
89bddce5
SH
3057 break;
3058 case SYS_SHUTDOWN:
005a1aea 3059 err = __sys_shutdown(a0, a1);
89bddce5
SH
3060 break;
3061 case SYS_SETSOCKOPT:
cc36dca0
DB
3062 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
3063 a[4]);
89bddce5
SH
3064 break;
3065 case SYS_GETSOCKOPT:
3066 err =
13a2d70e
DB
3067 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
3068 (int __user *)a[4]);
89bddce5
SH
3069 break;
3070 case SYS_SENDMSG:
e1834a32
DB
3071 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
3072 a[2], true);
89bddce5 3073 break;
228e548e 3074 case SYS_SENDMMSG:
e1834a32
DB
3075 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
3076 a[3], true);
228e548e 3077 break;
89bddce5 3078 case SYS_RECVMSG:
e1834a32
DB
3079 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
3080 a[2], true);
89bddce5 3081 break;
a2e27255 3082 case SYS_RECVMMSG:
3ca47e95 3083 if (IS_ENABLED(CONFIG_64BIT))
e11d4284
AB
3084 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
3085 a[2], a[3],
3086 (struct __kernel_timespec __user *)a[4],
3087 NULL);
3088 else
3089 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
3090 a[2], a[3], NULL,
3091 (struct old_timespec32 __user *)a[4]);
a2e27255 3092 break;
de11defe 3093 case SYS_ACCEPT4:
4541e805
DB
3094 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
3095 (int __user *)a[2], a[3]);
aaca0bdc 3096 break;
89bddce5
SH
3097 default:
3098 err = -EINVAL;
3099 break;
1da177e4
LT
3100 }
3101 return err;
3102}
3103
89bddce5 3104#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 3105
55737fda
SH
3106/**
3107 * sock_register - add a socket protocol handler
3108 * @ops: description of protocol
3109 *
1da177e4
LT
3110 * This function is called by a protocol handler that wants to
3111 * advertise its address family, and have it linked into the
e793c0f7 3112 * socket interface. The value ops->family corresponds to the
55737fda 3113 * socket system call protocol family.
1da177e4 3114 */
f0fd27d4 3115int sock_register(const struct net_proto_family *ops)
1da177e4
LT
3116{
3117 int err;
3118
3119 if (ops->family >= NPROTO) {
3410f22e 3120 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
3121 return -ENOBUFS;
3122 }
55737fda
SH
3123
3124 spin_lock(&net_family_lock);
190683a9
ED
3125 if (rcu_dereference_protected(net_families[ops->family],
3126 lockdep_is_held(&net_family_lock)))
55737fda
SH
3127 err = -EEXIST;
3128 else {
cf778b00 3129 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
3130 err = 0;
3131 }
55737fda
SH
3132 spin_unlock(&net_family_lock);
3133
fe0bdbde 3134 pr_info("NET: Registered %s protocol family\n", pf_family_names[ops->family]);
1da177e4
LT
3135 return err;
3136}
c6d409cf 3137EXPORT_SYMBOL(sock_register);
1da177e4 3138
55737fda
SH
3139/**
3140 * sock_unregister - remove a protocol handler
3141 * @family: protocol family to remove
3142 *
1da177e4
LT
3143 * This function is called by a protocol handler that wants to
3144 * remove its address family, and have it unlinked from the
55737fda
SH
3145 * new socket creation.
3146 *
3147 * If protocol handler is a module, then it can use module reference
3148 * counts to protect against new references. If protocol handler is not
3149 * a module then it needs to provide its own protection in
3150 * the ops->create routine.
1da177e4 3151 */
f0fd27d4 3152void sock_unregister(int family)
1da177e4 3153{
f0fd27d4 3154 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 3155
55737fda 3156 spin_lock(&net_family_lock);
a9b3cd7f 3157 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
3158 spin_unlock(&net_family_lock);
3159
3160 synchronize_rcu();
3161
fe0bdbde 3162 pr_info("NET: Unregistered %s protocol family\n", pf_family_names[family]);
1da177e4 3163}
c6d409cf 3164EXPORT_SYMBOL(sock_unregister);
1da177e4 3165
bf2ae2e4
XL
3166bool sock_is_registered(int family)
3167{
66b51b0a 3168 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
3169}
3170
77d76ea3 3171static int __init sock_init(void)
1da177e4 3172{
b3e19d92 3173 int err;
2ca794e5
EB
3174 /*
3175 * Initialize the network sysctl infrastructure.
3176 */
3177 err = net_sysctl_init();
3178 if (err)
3179 goto out;
b3e19d92 3180
1da177e4 3181 /*
89bddce5 3182 * Initialize skbuff SLAB cache
1da177e4
LT
3183 */
3184 skb_init();
1da177e4
LT
3185
3186 /*
89bddce5 3187 * Initialize the protocols module.
1da177e4
LT
3188 */
3189
3190 init_inodecache();
b3e19d92
NP
3191
3192 err = register_filesystem(&sock_fs_type);
3193 if (err)
47260ba9 3194 goto out;
1da177e4 3195 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
3196 if (IS_ERR(sock_mnt)) {
3197 err = PTR_ERR(sock_mnt);
3198 goto out_mount;
3199 }
77d76ea3
AK
3200
3201 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
3202 */
3203
3204#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
3205 err = netfilter_init();
3206 if (err)
3207 goto out;
1da177e4 3208#endif
cbeb321a 3209
408eccce 3210 ptp_classifier_init();
c1f19b51 3211
b3e19d92
NP
3212out:
3213 return err;
3214
3215out_mount:
3216 unregister_filesystem(&sock_fs_type);
b3e19d92 3217 goto out;
1da177e4
LT
3218}
3219
77d76ea3
AK
3220core_initcall(sock_init); /* early initcall */
3221
1da177e4
LT
3222#ifdef CONFIG_PROC_FS
3223void socket_seq_show(struct seq_file *seq)
3224{
648845ab
TZ
3225 seq_printf(seq, "sockets: used %d\n",
3226 sock_inuse_get(seq->private));
1da177e4 3227}
89bddce5 3228#endif /* CONFIG_PROC_FS */
1da177e4 3229
29c49648
AB
3230/* Handle the fact that while struct ifreq has the same *layout* on
3231 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3232 * which are handled elsewhere, it still has different *size* due to
3233 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3234 * resulting in struct ifreq being 32 and 40 bytes respectively).
3235 * As a result, if the struct happens to be at the end of a page and
3236 * the next page isn't readable/writable, we get a fault. To prevent
3237 * that, copy back and forth to the full size.
3238 */
3239int get_user_ifreq(struct ifreq *ifr, void __user **ifrdata, void __user *arg)
7a229387 3240{
29c49648
AB
3241 if (in_compat_syscall()) {
3242 struct compat_ifreq *ifr32 = (struct compat_ifreq *)ifr;
7a229387 3243
29c49648
AB
3244 memset(ifr, 0, sizeof(*ifr));
3245 if (copy_from_user(ifr32, arg, sizeof(*ifr32)))
3246 return -EFAULT;
7a229387 3247
29c49648
AB
3248 if (ifrdata)
3249 *ifrdata = compat_ptr(ifr32->ifr_data);
7a229387 3250
29c49648
AB
3251 return 0;
3252 }
7a229387 3253
29c49648 3254 if (copy_from_user(ifr, arg, sizeof(*ifr)))
7a229387
AB
3255 return -EFAULT;
3256
29c49648
AB
3257 if (ifrdata)
3258 *ifrdata = ifr->ifr_data;
3259
7a229387
AB
3260 return 0;
3261}
29c49648 3262EXPORT_SYMBOL(get_user_ifreq);
7a229387 3263
29c49648 3264int put_user_ifreq(struct ifreq *ifr, void __user *arg)
7a229387 3265{
29c49648 3266 size_t size = sizeof(*ifr);
7a229387 3267
29c49648
AB
3268 if (in_compat_syscall())
3269 size = sizeof(struct compat_ifreq);
7a229387 3270
29c49648 3271 if (copy_to_user(arg, ifr, size))
7a229387
AB
3272 return -EFAULT;
3273
3a7da39d 3274 return 0;
7a229387 3275}
29c49648 3276EXPORT_SYMBOL(put_user_ifreq);
7a229387 3277
89bbfc95 3278#ifdef CONFIG_COMPAT
7a50a240
AB
3279static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3280{
7a50a240 3281 compat_uptr_t uptr32;
44c02a2c
AV
3282 struct ifreq ifr;
3283 void __user *saved;
3284 int err;
7a50a240 3285
29c49648 3286 if (get_user_ifreq(&ifr, NULL, uifr32))
7a50a240
AB
3287 return -EFAULT;
3288
3289 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3290 return -EFAULT;
3291
44c02a2c
AV
3292 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3293 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 3294
a554bf96 3295 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL, NULL);
44c02a2c
AV
3296 if (!err) {
3297 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
29c49648 3298 if (put_user_ifreq(&ifr, uifr32))
44c02a2c 3299 err = -EFAULT;
ccbd6a5a 3300 }
44c02a2c 3301 return err;
7a229387
AB
3302}
3303
590d4693
BH
3304/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3305static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3306 struct compat_ifreq __user *u_ifreq32)
7a229387 3307{
44c02a2c 3308 struct ifreq ifreq;
a554bf96 3309 void __user *data;
7a229387 3310
d0efb162
PC
3311 if (!is_socket_ioctl_cmd(cmd))
3312 return -ENOTTY;
a554bf96 3313 if (get_user_ifreq(&ifreq, &data, u_ifreq32))
7a229387 3314 return -EFAULT;
a554bf96 3315 ifreq.ifr_data = data;
7a229387 3316
a554bf96 3317 return dev_ioctl(net, cmd, &ifreq, data, NULL);
a2116ed2
AB
3318}
3319
6b96018b
AB
3320static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3321 unsigned int cmd, unsigned long arg)
3322{
3323 void __user *argp = compat_ptr(arg);
3324 struct sock *sk = sock->sk;
3325 struct net *net = sock_net(sk);
7a229387 3326
6b96018b 3327 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
88fc023f 3328 return sock_ioctl(file, cmd, (unsigned long)argp);
6b96018b
AB
3329
3330 switch (cmd) {
7a50a240
AB
3331 case SIOCWANDEV:
3332 return compat_siocwandev(net, argp);
0768e170
AB
3333 case SIOCGSTAMP_OLD:
3334 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
3335 if (!sock->ops->gettstamp)
3336 return -ENOIOCTLCMD;
0768e170 3337 return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
c7cbdbf2
AB
3338 !COMPAT_USE_64BIT_TIME);
3339
dd98d289 3340 case SIOCETHTOOL:
590d4693
BH
3341 case SIOCBONDSLAVEINFOQUERY:
3342 case SIOCBONDINFOQUERY:
a2116ed2 3343 case SIOCSHWTSTAMP:
fd468c74 3344 case SIOCGHWTSTAMP:
590d4693 3345 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3346
3347 case FIOSETOWN:
3348 case SIOCSPGRP:
3349 case FIOGETOWN:
3350 case SIOCGPGRP:
3351 case SIOCBRADDBR:
3352 case SIOCBRDELBR:
3353 case SIOCGIFVLAN:
3354 case SIOCSIFVLAN:
c62cce2c 3355 case SIOCGSKNS:
0768e170
AB
3356 case SIOCGSTAMP_NEW:
3357 case SIOCGSTAMPNS_NEW:
876f0bf9 3358 case SIOCGIFCONF:
fd3a4590
RP
3359 case SIOCSIFBR:
3360 case SIOCGIFBR:
6b96018b
AB
3361 return sock_ioctl(file, cmd, arg);
3362
3363 case SIOCGIFFLAGS:
3364 case SIOCSIFFLAGS:
709566d7
AB
3365 case SIOCGIFMAP:
3366 case SIOCSIFMAP:
6b96018b
AB
3367 case SIOCGIFMETRIC:
3368 case SIOCSIFMETRIC:
3369 case SIOCGIFMTU:
3370 case SIOCSIFMTU:
3371 case SIOCGIFMEM:
3372 case SIOCSIFMEM:
3373 case SIOCGIFHWADDR:
3374 case SIOCSIFHWADDR:
3375 case SIOCADDMULTI:
3376 case SIOCDELMULTI:
3377 case SIOCGIFINDEX:
6b96018b
AB
3378 case SIOCGIFADDR:
3379 case SIOCSIFADDR:
3380 case SIOCSIFHWBROADCAST:
6b96018b 3381 case SIOCDIFADDR:
6b96018b
AB
3382 case SIOCGIFBRDADDR:
3383 case SIOCSIFBRDADDR:
3384 case SIOCGIFDSTADDR:
3385 case SIOCSIFDSTADDR:
3386 case SIOCGIFNETMASK:
3387 case SIOCSIFNETMASK:
3388 case SIOCSIFPFLAGS:
3389 case SIOCGIFPFLAGS:
3390 case SIOCGIFTXQLEN:
3391 case SIOCSIFTXQLEN:
3392 case SIOCBRADDIF:
3393 case SIOCBRDELIF:
c6c9fee3 3394 case SIOCGIFNAME:
9177efd3
AB
3395 case SIOCSIFNAME:
3396 case SIOCGMIIPHY:
3397 case SIOCGMIIREG:
3398 case SIOCSMIIREG:
f92d4fc9
AV
3399 case SIOCBONDENSLAVE:
3400 case SIOCBONDRELEASE:
3401 case SIOCBONDSETHWADDR:
3402 case SIOCBONDCHANGEACTIVE:
6b96018b
AB
3403 case SIOCSARP:
3404 case SIOCGARP:
3405 case SIOCDARP:
c7dc504e 3406 case SIOCOUTQ:
9d7bf41f 3407 case SIOCOUTQNSD:
6b96018b 3408 case SIOCATMARK:
63ff03ab 3409 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3410 }
3411
6b96018b
AB
3412 return -ENOIOCTLCMD;
3413}
7a229387 3414
95c96174 3415static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3416 unsigned long arg)
89bbfc95
SP
3417{
3418 struct socket *sock = file->private_data;
3419 int ret = -ENOIOCTLCMD;
87de87d5
DM
3420 struct sock *sk;
3421 struct net *net;
3422
3423 sk = sock->sk;
3424 net = sock_net(sk);
89bbfc95
SP
3425
3426 if (sock->ops->compat_ioctl)
3427 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3428
87de87d5
DM
3429 if (ret == -ENOIOCTLCMD &&
3430 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3431 ret = compat_wext_handle_ioctl(net, cmd, arg);
3432
6b96018b
AB
3433 if (ret == -ENOIOCTLCMD)
3434 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3435
89bbfc95
SP
3436 return ret;
3437}
3438#endif
3439
8a3c245c
PT
3440/**
3441 * kernel_bind - bind an address to a socket (kernel space)
3442 * @sock: socket
3443 * @addr: address
3444 * @addrlen: length of address
3445 *
3446 * Returns 0 or an error.
3447 */
3448
ac5a488e
SS
3449int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3450{
3451 return sock->ops->bind(sock, addr, addrlen);
3452}
c6d409cf 3453EXPORT_SYMBOL(kernel_bind);
ac5a488e 3454
8a3c245c
PT
3455/**
3456 * kernel_listen - move socket to listening state (kernel space)
3457 * @sock: socket
3458 * @backlog: pending connections queue size
3459 *
3460 * Returns 0 or an error.
3461 */
3462
ac5a488e
SS
3463int kernel_listen(struct socket *sock, int backlog)
3464{
3465 return sock->ops->listen(sock, backlog);
3466}
c6d409cf 3467EXPORT_SYMBOL(kernel_listen);
ac5a488e 3468
8a3c245c
PT
3469/**
3470 * kernel_accept - accept a connection (kernel space)
3471 * @sock: listening socket
3472 * @newsock: new connected socket
3473 * @flags: flags
3474 *
3475 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3476 * If it fails, @newsock is guaranteed to be %NULL.
3477 * Returns 0 or an error.
3478 */
3479
ac5a488e
SS
3480int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3481{
3482 struct sock *sk = sock->sk;
3483 int err;
3484
3485 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3486 newsock);
3487 if (err < 0)
3488 goto done;
3489
cdfbabfb 3490 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3491 if (err < 0) {
3492 sock_release(*newsock);
fa8705b0 3493 *newsock = NULL;
ac5a488e
SS
3494 goto done;
3495 }
3496
3497 (*newsock)->ops = sock->ops;
1b08534e 3498 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3499
3500done:
3501 return err;
3502}
c6d409cf 3503EXPORT_SYMBOL(kernel_accept);
ac5a488e 3504
8a3c245c
PT
3505/**
3506 * kernel_connect - connect a socket (kernel space)
3507 * @sock: socket
3508 * @addr: address
3509 * @addrlen: address length
3510 * @flags: flags (O_NONBLOCK, ...)
3511 *
f1dcffcc 3512 * For datagram sockets, @addr is the address to which datagrams are sent
8a3c245c
PT
3513 * by default, and the only address from which datagrams are received.
3514 * For stream sockets, attempts to connect to @addr.
3515 * Returns 0 or an error code.
3516 */
3517
ac5a488e 3518int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3519 int flags)
ac5a488e
SS
3520{
3521 return sock->ops->connect(sock, addr, addrlen, flags);
3522}
c6d409cf 3523EXPORT_SYMBOL(kernel_connect);
ac5a488e 3524
8a3c245c
PT
3525/**
3526 * kernel_getsockname - get the address which the socket is bound (kernel space)
3527 * @sock: socket
3528 * @addr: address holder
3529 *
3530 * Fills the @addr pointer with the address which the socket is bound.
0fc95dec 3531 * Returns the length of the address in bytes or an error code.
8a3c245c
PT
3532 */
3533
9b2c45d4 3534int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3535{
9b2c45d4 3536 return sock->ops->getname(sock, addr, 0);
ac5a488e 3537}
c6d409cf 3538EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3539
8a3c245c 3540/**
645f0897 3541 * kernel_getpeername - get the address which the socket is connected (kernel space)
8a3c245c
PT
3542 * @sock: socket
3543 * @addr: address holder
3544 *
3545 * Fills the @addr pointer with the address which the socket is connected.
0fc95dec 3546 * Returns the length of the address in bytes or an error code.
8a3c245c
PT
3547 */
3548
9b2c45d4 3549int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3550{
9b2c45d4 3551 return sock->ops->getname(sock, addr, 1);
ac5a488e 3552}
c6d409cf 3553EXPORT_SYMBOL(kernel_getpeername);
ac5a488e 3554
8a3c245c
PT
3555/**
3556 * kernel_sendpage - send a &page through a socket (kernel space)
3557 * @sock: socket
3558 * @page: page
3559 * @offset: page offset
3560 * @size: total size in bytes
3561 * @flags: flags (MSG_DONTWAIT, ...)
3562 *
3563 * Returns the total amount sent in bytes or an error.
3564 */
3565
ac5a488e
SS
3566int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3567 size_t size, int flags)
3568{
7b62d31d
CL
3569 if (sock->ops->sendpage) {
3570 /* Warn in case the improper page to zero-copy send */
3571 WARN_ONCE(!sendpage_ok(page), "improper page for zero-copy send");
ac5a488e 3572 return sock->ops->sendpage(sock, page, offset, size, flags);
7b62d31d 3573 }
ac5a488e
SS
3574 return sock_no_sendpage(sock, page, offset, size, flags);
3575}
c6d409cf 3576EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3577
8a3c245c
PT
3578/**
3579 * kernel_sendpage_locked - send a &page through the locked sock (kernel space)
3580 * @sk: sock
3581 * @page: page
3582 * @offset: page offset
3583 * @size: total size in bytes
3584 * @flags: flags (MSG_DONTWAIT, ...)
3585 *
3586 * Returns the total amount sent in bytes or an error.
3587 * Caller must hold @sk.
3588 */
3589
306b13eb
TH
3590int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3591 size_t size, int flags)
3592{
3593 struct socket *sock = sk->sk_socket;
3594
3595 if (sock->ops->sendpage_locked)
3596 return sock->ops->sendpage_locked(sk, page, offset, size,
3597 flags);
3598
3599 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3600}
3601EXPORT_SYMBOL(kernel_sendpage_locked);
3602
8a3c245c 3603/**
645f0897 3604 * kernel_sock_shutdown - shut down part of a full-duplex connection (kernel space)
8a3c245c
PT
3605 * @sock: socket
3606 * @how: connection part
3607 *
3608 * Returns 0 or an error.
3609 */
3610
91cf45f0
TM
3611int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3612{
3613 return sock->ops->shutdown(sock, how);
3614}
91cf45f0 3615EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075 3616
8a3c245c
PT
3617/**
3618 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3619 * @sk: socket
3620 *
3621 * This routine returns the IP overhead imposed by a socket i.e.
3622 * the length of the underlying IP header, depending on whether
3623 * this is an IPv4 or IPv6 socket and the length from IP options turned
3624 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075 3625 */
8a3c245c 3626
113c3075
P
3627u32 kernel_sock_ip_overhead(struct sock *sk)
3628{
3629 struct inet_sock *inet;
3630 struct ip_options_rcu *opt;
3631 u32 overhead = 0;
113c3075
P
3632#if IS_ENABLED(CONFIG_IPV6)
3633 struct ipv6_pinfo *np;
3634 struct ipv6_txoptions *optv6 = NULL;
3635#endif /* IS_ENABLED(CONFIG_IPV6) */
3636
3637 if (!sk)
3638 return overhead;
3639
113c3075
P
3640 switch (sk->sk_family) {
3641 case AF_INET:
3642 inet = inet_sk(sk);
3643 overhead += sizeof(struct iphdr);
3644 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3645 sock_owned_by_user(sk));
113c3075
P
3646 if (opt)
3647 overhead += opt->opt.optlen;
3648 return overhead;
3649#if IS_ENABLED(CONFIG_IPV6)
3650 case AF_INET6:
3651 np = inet6_sk(sk);
3652 overhead += sizeof(struct ipv6hdr);
3653 if (np)
3654 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3655 sock_owned_by_user(sk));
113c3075
P
3656 if (optv6)
3657 overhead += (optv6->opt_flen + optv6->opt_nflen);
3658 return overhead;
3659#endif /* IS_ENABLED(CONFIG_IPV6) */
3660 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3661 return overhead;
3662 }
3663}
3664EXPORT_SYMBOL(kernel_sock_ip_overhead);