Merge tag 'auxdisplay-for-linus-v5.15-rc1' of git://github.com/ojeda/linux
[linux-block.git] / net / socket.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * NET An implementation of the SOCKET network access protocol.
4 *
5 * Version: @(#)socket.c 1.1.93 18/02/95
6 *
7 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 8 * Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 *
11 * Fixes:
12 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
13 * shutdown()
14 * Alan Cox : verify_area() fixes
15 * Alan Cox : Removed DDI
16 * Jonathan Kamens : SOCK_DGRAM reconnect bug
17 * Alan Cox : Moved a load of checks to the very
18 * top level.
19 * Alan Cox : Move address structures to/from user
20 * mode above the protocol layers.
21 * Rob Janssen : Allow 0 length sends.
22 * Alan Cox : Asynchronous I/O support (cribbed from the
23 * tty drivers).
24 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
25 * Jeff Uphoff : Made max number of sockets command-line
26 * configurable.
27 * Matti Aarnio : Made the number of sockets dynamic,
28 * to be allocated when needed, and mr.
29 * Uphoff's max is used as max to be
30 * allowed to allocate.
31 * Linus : Argh. removed all the socket allocation
32 * altogether: it's in the inode now.
33 * Alan Cox : Made sock_alloc()/sock_release() public
34 * for NetROM and future kernel nfsd type
35 * stuff.
36 * Alan Cox : sendmsg/recvmsg basics.
37 * Tom Dyas : Export net symbols.
38 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
39 * Alan Cox : Added thread locking to sys_* calls
40 * for sockets. May have errors at the
41 * moment.
42 * Kevin Buhr : Fixed the dumb errors in the above.
43 * Andi Kleen : Some small cleanups, optimizations,
44 * and fixed a copy_from_user() bug.
45 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 46 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
47 * protocol-independent
48 *
1da177e4 49 * This module is effectively the top level interface to the BSD socket
89bddce5 50 * paradigm.
1da177e4
LT
51 *
52 * Based upon Swansea University Computer Society NET3.039
53 */
54
cc69837f 55#include <linux/ethtool.h>
1da177e4 56#include <linux/mm.h>
1da177e4
LT
57#include <linux/socket.h>
58#include <linux/file.h>
59#include <linux/net.h>
60#include <linux/interrupt.h>
aaca0bdc 61#include <linux/thread_info.h>
55737fda 62#include <linux/rcupdate.h>
1da177e4
LT
63#include <linux/netdevice.h>
64#include <linux/proc_fs.h>
65#include <linux/seq_file.h>
4a3e2f71 66#include <linux/mutex.h>
1da177e4 67#include <linux/if_bridge.h>
20380731 68#include <linux/if_vlan.h>
408eccce 69#include <linux/ptp_classify.h>
1da177e4
LT
70#include <linux/init.h>
71#include <linux/poll.h>
72#include <linux/cache.h>
73#include <linux/module.h>
74#include <linux/highmem.h>
1da177e4 75#include <linux/mount.h>
fba9be49 76#include <linux/pseudo_fs.h>
1da177e4
LT
77#include <linux/security.h>
78#include <linux/syscalls.h>
79#include <linux/compat.h>
80#include <linux/kmod.h>
3ec3b2fb 81#include <linux/audit.h>
d86b5e0e 82#include <linux/wireless.h>
1b8d7ae4 83#include <linux/nsproxy.h>
1fd7317d 84#include <linux/magic.h>
5a0e3ad6 85#include <linux/slab.h>
600e1779 86#include <linux/xattr.h>
c8e8cd57 87#include <linux/nospec.h>
8c3c447b 88#include <linux/indirect_call_wrapper.h>
1da177e4 89
7c0f6ba6 90#include <linux/uaccess.h>
1da177e4
LT
91#include <asm/unistd.h>
92
93#include <net/compat.h>
87de87d5 94#include <net/wext.h>
f8451725 95#include <net/cls_cgroup.h>
1da177e4
LT
96
97#include <net/sock.h>
98#include <linux/netfilter.h>
99
6b96018b
AB
100#include <linux/if_tun.h>
101#include <linux/ipv6_route.h>
102#include <linux/route.h>
c7dc504e 103#include <linux/termios.h>
6b96018b 104#include <linux/sockios.h>
076bb0c8 105#include <net/busy_poll.h>
f24b9be5 106#include <linux/errqueue.h>
d7c08826 107#include <linux/ptp_clock_kernel.h>
06021292 108
e0d1095a 109#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
110unsigned int sysctl_net_busy_read __read_mostly;
111unsigned int sysctl_net_busy_poll __read_mostly;
06021292 112#endif
6b96018b 113
8ae5e030
AV
114static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
115static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 116static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
117
118static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
119static __poll_t sock_poll(struct file *file,
120 struct poll_table_struct *wait);
89bddce5 121static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
122#ifdef CONFIG_COMPAT
123static long compat_sock_ioctl(struct file *file,
89bddce5 124 unsigned int cmd, unsigned long arg);
89bbfc95 125#endif
1da177e4 126static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
127static ssize_t sock_sendpage(struct file *file, struct page *page,
128 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 129static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 130 struct pipe_inode_info *pipe, size_t len,
9c55e01c 131 unsigned int flags);
542d3065
AB
132
133#ifdef CONFIG_PROC_FS
134static void sock_show_fdinfo(struct seq_file *m, struct file *f)
135{
136 struct socket *sock = f->private_data;
137
138 if (sock->ops->show_fdinfo)
139 sock->ops->show_fdinfo(m, sock);
140}
141#else
142#define sock_show_fdinfo NULL
143#endif
1da177e4 144
1da177e4
LT
145/*
146 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
147 * in the operation structures but are done directly via the socketcall() multiplexor.
148 */
149
da7071d7 150static const struct file_operations socket_file_ops = {
1da177e4
LT
151 .owner = THIS_MODULE,
152 .llseek = no_llseek,
8ae5e030
AV
153 .read_iter = sock_read_iter,
154 .write_iter = sock_write_iter,
1da177e4
LT
155 .poll = sock_poll,
156 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
157#ifdef CONFIG_COMPAT
158 .compat_ioctl = compat_sock_ioctl,
159#endif
1da177e4 160 .mmap = sock_mmap,
1da177e4
LT
161 .release = sock_close,
162 .fasync = sock_fasync,
5274f052
JA
163 .sendpage = sock_sendpage,
164 .splice_write = generic_splice_sendpage,
9c55e01c 165 .splice_read = sock_splice_read,
b4653342 166 .show_fdinfo = sock_show_fdinfo,
1da177e4
LT
167};
168
fe0bdbde
YD
169static const char * const pf_family_names[] = {
170 [PF_UNSPEC] = "PF_UNSPEC",
171 [PF_UNIX] = "PF_UNIX/PF_LOCAL",
172 [PF_INET] = "PF_INET",
173 [PF_AX25] = "PF_AX25",
174 [PF_IPX] = "PF_IPX",
175 [PF_APPLETALK] = "PF_APPLETALK",
176 [PF_NETROM] = "PF_NETROM",
177 [PF_BRIDGE] = "PF_BRIDGE",
178 [PF_ATMPVC] = "PF_ATMPVC",
179 [PF_X25] = "PF_X25",
180 [PF_INET6] = "PF_INET6",
181 [PF_ROSE] = "PF_ROSE",
182 [PF_DECnet] = "PF_DECnet",
183 [PF_NETBEUI] = "PF_NETBEUI",
184 [PF_SECURITY] = "PF_SECURITY",
185 [PF_KEY] = "PF_KEY",
186 [PF_NETLINK] = "PF_NETLINK/PF_ROUTE",
187 [PF_PACKET] = "PF_PACKET",
188 [PF_ASH] = "PF_ASH",
189 [PF_ECONET] = "PF_ECONET",
190 [PF_ATMSVC] = "PF_ATMSVC",
191 [PF_RDS] = "PF_RDS",
192 [PF_SNA] = "PF_SNA",
193 [PF_IRDA] = "PF_IRDA",
194 [PF_PPPOX] = "PF_PPPOX",
195 [PF_WANPIPE] = "PF_WANPIPE",
196 [PF_LLC] = "PF_LLC",
197 [PF_IB] = "PF_IB",
198 [PF_MPLS] = "PF_MPLS",
199 [PF_CAN] = "PF_CAN",
200 [PF_TIPC] = "PF_TIPC",
201 [PF_BLUETOOTH] = "PF_BLUETOOTH",
202 [PF_IUCV] = "PF_IUCV",
203 [PF_RXRPC] = "PF_RXRPC",
204 [PF_ISDN] = "PF_ISDN",
205 [PF_PHONET] = "PF_PHONET",
206 [PF_IEEE802154] = "PF_IEEE802154",
207 [PF_CAIF] = "PF_CAIF",
208 [PF_ALG] = "PF_ALG",
209 [PF_NFC] = "PF_NFC",
210 [PF_VSOCK] = "PF_VSOCK",
211 [PF_KCM] = "PF_KCM",
212 [PF_QIPCRTR] = "PF_QIPCRTR",
213 [PF_SMC] = "PF_SMC",
214 [PF_XDP] = "PF_XDP",
bc49d816 215 [PF_MCTP] = "PF_MCTP",
fe0bdbde
YD
216};
217
1da177e4
LT
218/*
219 * The protocol list. Each protocol is registered in here.
220 */
221
1da177e4 222static DEFINE_SPINLOCK(net_family_lock);
190683a9 223static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 224
1da177e4 225/*
89bddce5
SH
226 * Support routines.
227 * Move socket addresses back and forth across the kernel/user
228 * divide and look after the messy bits.
1da177e4
LT
229 */
230
1da177e4
LT
231/**
232 * move_addr_to_kernel - copy a socket address into kernel space
233 * @uaddr: Address in user space
234 * @kaddr: Address in kernel space
235 * @ulen: Length in user space
236 *
237 * The address is copied into kernel space. If the provided address is
238 * too long an error code of -EINVAL is returned. If the copy gives
239 * invalid addresses -EFAULT is returned. On a success 0 is returned.
240 */
241
43db362d 242int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 243{
230b1839 244 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 245 return -EINVAL;
89bddce5 246 if (ulen == 0)
1da177e4 247 return 0;
89bddce5 248 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 249 return -EFAULT;
3ec3b2fb 250 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
251}
252
253/**
254 * move_addr_to_user - copy an address to user space
255 * @kaddr: kernel space address
256 * @klen: length of address in kernel
257 * @uaddr: user space address
258 * @ulen: pointer to user length field
259 *
260 * The value pointed to by ulen on entry is the buffer length available.
261 * This is overwritten with the buffer space used. -EINVAL is returned
262 * if an overlong buffer is specified or a negative buffer size. -EFAULT
263 * is returned if either the buffer or the length field are not
264 * accessible.
265 * After copying the data up to the limit the user specifies, the true
266 * length of the data is written over the length limit the user
267 * specified. Zero is returned for a success.
268 */
89bddce5 269
43db362d 270static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 271 void __user *uaddr, int __user *ulen)
1da177e4
LT
272{
273 int err;
274 int len;
275
68c6beb3 276 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
277 err = get_user(len, ulen);
278 if (err)
1da177e4 279 return err;
89bddce5
SH
280 if (len > klen)
281 len = klen;
68c6beb3 282 if (len < 0)
1da177e4 283 return -EINVAL;
89bddce5 284 if (len) {
d6fe3945
SG
285 if (audit_sockaddr(klen, kaddr))
286 return -ENOMEM;
89bddce5 287 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
288 return -EFAULT;
289 }
290 /*
89bddce5
SH
291 * "fromlen shall refer to the value before truncation.."
292 * 1003.1g
1da177e4
LT
293 */
294 return __put_user(klen, ulen);
295}
296
08009a76 297static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
298
299static struct inode *sock_alloc_inode(struct super_block *sb)
300{
301 struct socket_alloc *ei;
89bddce5 302
e94b1766 303 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
304 if (!ei)
305 return NULL;
333f7909
AV
306 init_waitqueue_head(&ei->socket.wq.wait);
307 ei->socket.wq.fasync_list = NULL;
308 ei->socket.wq.flags = 0;
89bddce5 309
1da177e4
LT
310 ei->socket.state = SS_UNCONNECTED;
311 ei->socket.flags = 0;
312 ei->socket.ops = NULL;
313 ei->socket.sk = NULL;
314 ei->socket.file = NULL;
1da177e4
LT
315
316 return &ei->vfs_inode;
317}
318
6d7855c5 319static void sock_free_inode(struct inode *inode)
1da177e4 320{
43815482
ED
321 struct socket_alloc *ei;
322
323 ei = container_of(inode, struct socket_alloc, vfs_inode);
43815482 324 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
325}
326
51cc5068 327static void init_once(void *foo)
1da177e4 328{
89bddce5 329 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 330
a35afb83 331 inode_init_once(&ei->vfs_inode);
1da177e4 332}
89bddce5 333
1e911632 334static void init_inodecache(void)
1da177e4
LT
335{
336 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
337 sizeof(struct socket_alloc),
338 0,
339 (SLAB_HWCACHE_ALIGN |
340 SLAB_RECLAIM_ACCOUNT |
5d097056 341 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 342 init_once);
1e911632 343 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
344}
345
b87221de 346static const struct super_operations sockfs_ops = {
c6d409cf 347 .alloc_inode = sock_alloc_inode,
6d7855c5 348 .free_inode = sock_free_inode,
c6d409cf 349 .statfs = simple_statfs,
1da177e4
LT
350};
351
c23fbb6b
ED
352/*
353 * sockfs_dname() is called from d_path().
354 */
355static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
356{
357 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 358 d_inode(dentry)->i_ino);
c23fbb6b
ED
359}
360
3ba13d17 361static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 362 .d_dname = sockfs_dname,
1da177e4
LT
363};
364
bba0bd31
AG
365static int sockfs_xattr_get(const struct xattr_handler *handler,
366 struct dentry *dentry, struct inode *inode,
367 const char *suffix, void *value, size_t size)
368{
369 if (value) {
370 if (dentry->d_name.len + 1 > size)
371 return -ERANGE;
372 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
373 }
374 return dentry->d_name.len + 1;
375}
376
377#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
378#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
379#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
380
381static const struct xattr_handler sockfs_xattr_handler = {
382 .name = XATTR_NAME_SOCKPROTONAME,
383 .get = sockfs_xattr_get,
384};
385
4a590153 386static int sockfs_security_xattr_set(const struct xattr_handler *handler,
e65ce2a5 387 struct user_namespace *mnt_userns,
4a590153
AG
388 struct dentry *dentry, struct inode *inode,
389 const char *suffix, const void *value,
390 size_t size, int flags)
391{
392 /* Handled by LSM. */
393 return -EAGAIN;
394}
395
396static const struct xattr_handler sockfs_security_xattr_handler = {
397 .prefix = XATTR_SECURITY_PREFIX,
398 .set = sockfs_security_xattr_set,
399};
400
bba0bd31
AG
401static const struct xattr_handler *sockfs_xattr_handlers[] = {
402 &sockfs_xattr_handler,
4a590153 403 &sockfs_security_xattr_handler,
bba0bd31
AG
404 NULL
405};
406
fba9be49 407static int sockfs_init_fs_context(struct fs_context *fc)
c74a1cbb 408{
fba9be49
DH
409 struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
410 if (!ctx)
411 return -ENOMEM;
412 ctx->ops = &sockfs_ops;
413 ctx->dops = &sockfs_dentry_operations;
414 ctx->xattr = sockfs_xattr_handlers;
415 return 0;
c74a1cbb
AV
416}
417
418static struct vfsmount *sock_mnt __read_mostly;
419
420static struct file_system_type sock_fs_type = {
421 .name = "sockfs",
fba9be49 422 .init_fs_context = sockfs_init_fs_context,
c74a1cbb
AV
423 .kill_sb = kill_anon_super,
424};
425
1da177e4
LT
426/*
427 * Obtains the first available file descriptor and sets it up for use.
428 *
39d8c1b6
DM
429 * These functions create file structures and maps them to fd space
430 * of the current process. On success it returns file descriptor
1da177e4
LT
431 * and file struct implicitly stored in sock->file.
432 * Note that another thread may close file descriptor before we return
433 * from this function. We use the fact that now we do not refer
434 * to socket after mapping. If one day we will need it, this
435 * function will increment ref. count on file by 1.
436 *
437 * In any case returned fd MAY BE not valid!
438 * This race condition is unavoidable
439 * with shared fd spaces, we cannot solve it inside kernel,
440 * but we take care of internal coherence yet.
441 */
442
8a3c245c
PT
443/**
444 * sock_alloc_file - Bind a &socket to a &file
445 * @sock: socket
446 * @flags: file status flags
447 * @dname: protocol name
448 *
449 * Returns the &file bound with @sock, implicitly storing it
450 * in sock->file. If dname is %NULL, sets to "".
451 * On failure the return is a ERR pointer (see linux/err.h).
452 * This function uses GFP_KERNEL internally.
453 */
454
aab174f0 455struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 456{
7cbe66b6 457 struct file *file;
1da177e4 458
d93aa9d8
AV
459 if (!dname)
460 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 461
d93aa9d8
AV
462 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
463 O_RDWR | (flags & O_NONBLOCK),
464 &socket_file_ops);
b5ffe634 465 if (IS_ERR(file)) {
8e1611e2 466 sock_release(sock);
39b65252 467 return file;
cc3808f8
AV
468 }
469
470 sock->file = file;
39d8c1b6 471 file->private_data = sock;
d8e464ec 472 stream_open(SOCK_INODE(sock), file);
28407630 473 return file;
39d8c1b6 474}
56b31d1c 475EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 476
56b31d1c 477static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
478{
479 struct file *newfile;
28407630 480 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
481 if (unlikely(fd < 0)) {
482 sock_release(sock);
28407630 483 return fd;
ce4bb04c 484 }
39d8c1b6 485
aab174f0 486 newfile = sock_alloc_file(sock, flags, NULL);
4546e44c 487 if (!IS_ERR(newfile)) {
39d8c1b6 488 fd_install(fd, newfile);
28407630
AV
489 return fd;
490 }
7cbe66b6 491
28407630
AV
492 put_unused_fd(fd);
493 return PTR_ERR(newfile);
1da177e4
LT
494}
495
8a3c245c
PT
496/**
497 * sock_from_file - Return the &socket bounded to @file.
498 * @file: file
8a3c245c 499 *
dba4a925 500 * On failure returns %NULL.
8a3c245c
PT
501 */
502
dba4a925 503struct socket *sock_from_file(struct file *file)
6cb153ca 504{
6cb153ca
BL
505 if (file->f_op == &socket_file_ops)
506 return file->private_data; /* set in sock_map_fd */
507
23bb80d2 508 return NULL;
6cb153ca 509}
406a3c63 510EXPORT_SYMBOL(sock_from_file);
6cb153ca 511
1da177e4 512/**
c6d409cf 513 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
514 * @fd: file handle
515 * @err: pointer to an error code return
516 *
517 * The file handle passed in is locked and the socket it is bound
241c4667 518 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
519 * with a negative errno code and NULL is returned. The function checks
520 * for both invalid handles and passing a handle which is not a socket.
521 *
522 * On a success the socket object pointer is returned.
523 */
524
525struct socket *sockfd_lookup(int fd, int *err)
526{
527 struct file *file;
1da177e4
LT
528 struct socket *sock;
529
89bddce5
SH
530 file = fget(fd);
531 if (!file) {
1da177e4
LT
532 *err = -EBADF;
533 return NULL;
534 }
89bddce5 535
dba4a925
FR
536 sock = sock_from_file(file);
537 if (!sock) {
538 *err = -ENOTSOCK;
1da177e4 539 fput(file);
dba4a925 540 }
6cb153ca
BL
541 return sock;
542}
c6d409cf 543EXPORT_SYMBOL(sockfd_lookup);
1da177e4 544
6cb153ca
BL
545static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
546{
00e188ef 547 struct fd f = fdget(fd);
6cb153ca
BL
548 struct socket *sock;
549
3672558c 550 *err = -EBADF;
00e188ef 551 if (f.file) {
dba4a925 552 sock = sock_from_file(f.file);
00e188ef 553 if (likely(sock)) {
ce787a5a 554 *fput_needed = f.flags & FDPUT_FPUT;
6cb153ca 555 return sock;
00e188ef 556 }
dba4a925 557 *err = -ENOTSOCK;
00e188ef 558 fdput(f);
1da177e4 559 }
6cb153ca 560 return NULL;
1da177e4
LT
561}
562
600e1779
MY
563static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
564 size_t size)
565{
566 ssize_t len;
567 ssize_t used = 0;
568
c5ef6035 569 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
570 if (len < 0)
571 return len;
572 used += len;
573 if (buffer) {
574 if (size < used)
575 return -ERANGE;
576 buffer += len;
577 }
578
579 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
580 used += len;
581 if (buffer) {
582 if (size < used)
583 return -ERANGE;
584 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
585 buffer += len;
586 }
587
588 return used;
589}
590
549c7297
CB
591static int sockfs_setattr(struct user_namespace *mnt_userns,
592 struct dentry *dentry, struct iattr *iattr)
86741ec2 593{
549c7297 594 int err = simple_setattr(&init_user_ns, dentry, iattr);
86741ec2 595
e1a3a60a 596 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
597 struct socket *sock = SOCKET_I(d_inode(dentry));
598
6d8c50dc
CW
599 if (sock->sk)
600 sock->sk->sk_uid = iattr->ia_uid;
601 else
602 err = -ENOENT;
86741ec2
LC
603 }
604
605 return err;
606}
607
600e1779 608static const struct inode_operations sockfs_inode_ops = {
600e1779 609 .listxattr = sockfs_listxattr,
86741ec2 610 .setattr = sockfs_setattr,
600e1779
MY
611};
612
1da177e4 613/**
8a3c245c 614 * sock_alloc - allocate a socket
89bddce5 615 *
1da177e4
LT
616 * Allocate a new inode and socket object. The two are bound together
617 * and initialised. The socket is then returned. If we are out of inodes
8a3c245c 618 * NULL is returned. This functions uses GFP_KERNEL internally.
1da177e4
LT
619 */
620
f4a00aac 621struct socket *sock_alloc(void)
1da177e4 622{
89bddce5
SH
623 struct inode *inode;
624 struct socket *sock;
1da177e4 625
a209dfc7 626 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
627 if (!inode)
628 return NULL;
629
630 sock = SOCKET_I(inode);
631
85fe4025 632 inode->i_ino = get_next_ino();
89bddce5 633 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
634 inode->i_uid = current_fsuid();
635 inode->i_gid = current_fsgid();
600e1779 636 inode->i_op = &sockfs_inode_ops;
1da177e4 637
1da177e4
LT
638 return sock;
639}
f4a00aac 640EXPORT_SYMBOL(sock_alloc);
1da177e4 641
6d8c50dc 642static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
643{
644 if (sock->ops) {
645 struct module *owner = sock->ops->owner;
646
6d8c50dc
CW
647 if (inode)
648 inode_lock(inode);
1da177e4 649 sock->ops->release(sock);
ff7b11aa 650 sock->sk = NULL;
6d8c50dc
CW
651 if (inode)
652 inode_unlock(inode);
1da177e4
LT
653 sock->ops = NULL;
654 module_put(owner);
655 }
656
333f7909 657 if (sock->wq.fasync_list)
3410f22e 658 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 659
1da177e4
LT
660 if (!sock->file) {
661 iput(SOCK_INODE(sock));
662 return;
663 }
89bddce5 664 sock->file = NULL;
1da177e4 665}
6d8c50dc 666
9a8ad9ac
AL
667/**
668 * sock_release - close a socket
669 * @sock: socket to close
670 *
671 * The socket is released from the protocol stack if it has a release
672 * callback, and the inode is then released if the socket is bound to
673 * an inode not a file.
674 */
6d8c50dc
CW
675void sock_release(struct socket *sock)
676{
677 __sock_release(sock, NULL);
678}
c6d409cf 679EXPORT_SYMBOL(sock_release);
1da177e4 680
c14ac945 681void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 682{
140c55d4
ED
683 u8 flags = *tx_flags;
684
c14ac945 685 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
686 flags |= SKBTX_HW_TSTAMP;
687
c14ac945 688 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
689 flags |= SKBTX_SW_TSTAMP;
690
c14ac945 691 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
692 flags |= SKBTX_SCHED_TSTAMP;
693
140c55d4 694 *tx_flags = flags;
20d49473 695}
67cc0d40 696EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 697
8c3c447b
PA
698INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
699 size_t));
a648a592
PA
700INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
701 size_t));
d8725c86 702static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 703{
a648a592
PA
704 int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
705 inet_sendmsg, sock, msg,
706 msg_data_left(msg));
d8725c86
AV
707 BUG_ON(ret == -EIOCBQUEUED);
708 return ret;
1da177e4
LT
709}
710
85806af0
RD
711/**
712 * sock_sendmsg - send a message through @sock
713 * @sock: socket
714 * @msg: message to send
715 *
716 * Sends @msg through @sock, passing through LSM.
717 * Returns the number of bytes sent, or an error code.
718 */
d8725c86 719int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 720{
d8725c86 721 int err = security_socket_sendmsg(sock, msg,
01e97e65 722 msg_data_left(msg));
228e548e 723
d8725c86 724 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 725}
c6d409cf 726EXPORT_SYMBOL(sock_sendmsg);
1da177e4 727
8a3c245c
PT
728/**
729 * kernel_sendmsg - send a message through @sock (kernel-space)
730 * @sock: socket
731 * @msg: message header
732 * @vec: kernel vec
733 * @num: vec array length
734 * @size: total message data size
735 *
736 * Builds the message data with @vec and sends it through @sock.
737 * Returns the number of bytes sent, or an error code.
738 */
739
1da177e4
LT
740int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
741 struct kvec *vec, size_t num, size_t size)
742{
aa563d7b 743 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
d8725c86 744 return sock_sendmsg(sock, msg);
1da177e4 745}
c6d409cf 746EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 747
8a3c245c
PT
748/**
749 * kernel_sendmsg_locked - send a message through @sock (kernel-space)
750 * @sk: sock
751 * @msg: message header
752 * @vec: output s/g array
753 * @num: output s/g array length
754 * @size: total message data size
755 *
756 * Builds the message data with @vec and sends it through @sock.
757 * Returns the number of bytes sent, or an error code.
758 * Caller must hold @sk.
759 */
760
306b13eb
TH
761int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
762 struct kvec *vec, size_t num, size_t size)
763{
764 struct socket *sock = sk->sk_socket;
765
766 if (!sock->ops->sendmsg_locked)
db5980d8 767 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 768
aa563d7b 769 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
306b13eb
TH
770
771 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
772}
773EXPORT_SYMBOL(kernel_sendmsg_locked);
774
8605330a
SHY
775static bool skb_is_err_queue(const struct sk_buff *skb)
776{
777 /* pkt_type of skbs enqueued on the error queue are set to
778 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
779 * in recvmsg, since skbs received on a local socket will never
780 * have a pkt_type of PACKET_OUTGOING.
781 */
782 return skb->pkt_type == PACKET_OUTGOING;
783}
784
b50a5c70
ML
785/* On transmit, software and hardware timestamps are returned independently.
786 * As the two skb clones share the hardware timestamp, which may be updated
787 * before the software timestamp is received, a hardware TX timestamp may be
788 * returned only if there is no software TX timestamp. Ignore false software
789 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 790 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
791 * hardware timestamp.
792 */
793static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
794{
795 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
796}
797
aad9c8c4
ML
798static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
799{
800 struct scm_ts_pktinfo ts_pktinfo;
801 struct net_device *orig_dev;
802
803 if (!skb_mac_header_was_set(skb))
804 return;
805
806 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
807
808 rcu_read_lock();
809 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
810 if (orig_dev)
811 ts_pktinfo.if_index = orig_dev->ifindex;
812 rcu_read_unlock();
813
814 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
815 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
816 sizeof(ts_pktinfo), &ts_pktinfo);
817}
818
92f37fd2
ED
819/*
820 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
821 */
822void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
823 struct sk_buff *skb)
824{
20d49473 825 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 826 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e
DD
827 struct scm_timestamping_internal tss;
828
b50a5c70 829 int empty = 1, false_tstamp = 0;
20d49473
PO
830 struct skb_shared_hwtstamps *shhwtstamps =
831 skb_hwtstamps(skb);
832
833 /* Race occurred between timestamp enabling and packet
834 receiving. Fill in the current time for now. */
b50a5c70 835 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 836 __net_timestamp(skb);
b50a5c70
ML
837 false_tstamp = 1;
838 }
20d49473
PO
839
840 if (need_software_tstamp) {
841 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
842 if (new_tstamp) {
843 struct __kernel_sock_timeval tv;
844
845 skb_get_new_timestamp(skb, &tv);
846 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
847 sizeof(tv), &tv);
848 } else {
849 struct __kernel_old_timeval tv;
850
851 skb_get_timestamp(skb, &tv);
852 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
853 sizeof(tv), &tv);
854 }
20d49473 855 } else {
887feae3
DD
856 if (new_tstamp) {
857 struct __kernel_timespec ts;
858
859 skb_get_new_timestampns(skb, &ts);
860 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
861 sizeof(ts), &ts);
862 } else {
df1b4ba9 863 struct __kernel_old_timespec ts;
887feae3
DD
864
865 skb_get_timestampns(skb, &ts);
866 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
867 sizeof(ts), &ts);
868 }
20d49473
PO
869 }
870 }
871
f24b9be5 872 memset(&tss, 0, sizeof(tss));
c199105d 873 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 874 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 875 empty = 0;
4d276eb6 876 if (shhwtstamps &&
b9f40e21 877 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
d7c08826
YL
878 !skb_is_swtx_tstamp(skb, false_tstamp)) {
879 if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC)
880 ptp_convert_timestamp(shhwtstamps, sk->sk_bind_phc);
881
882 if (ktime_to_timespec64_cond(shhwtstamps->hwtstamp,
883 tss.ts + 2)) {
884 empty = 0;
885
886 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
887 !skb_is_err_queue(skb))
888 put_ts_pktinfo(msg, skb);
889 }
aad9c8c4 890 }
1c885808 891 if (!empty) {
9718475e
DD
892 if (sock_flag(sk, SOCK_TSTAMP_NEW))
893 put_cmsg_scm_timestamping64(msg, &tss);
894 else
895 put_cmsg_scm_timestamping(msg, &tss);
1c885808 896
8605330a 897 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 898 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
899 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
900 skb->len, skb->data);
901 }
92f37fd2 902}
7c81fd8b
ACM
903EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
904
6e3e939f
JB
905void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
906 struct sk_buff *skb)
907{
908 int ack;
909
910 if (!sock_flag(sk, SOCK_WIFI_STATUS))
911 return;
912 if (!skb->wifi_acked_valid)
913 return;
914
915 ack = skb->wifi_acked;
916
917 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
918}
919EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
920
11165f14 921static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
922 struct sk_buff *skb)
3b885787 923{
744d5a3e 924 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 925 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 926 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
927}
928
767dd033 929void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
930 struct sk_buff *skb)
931{
932 sock_recv_timestamp(msg, sk, skb);
933 sock_recv_drops(msg, sk, skb);
934}
767dd033 935EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 936
8c3c447b 937INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
a648a592
PA
938 size_t, int));
939INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
940 size_t, int));
1b784140 941static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 942 int flags)
1da177e4 943{
a648a592
PA
944 return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
945 inet_recvmsg, sock, msg, msg_data_left(msg),
946 flags);
1da177e4
LT
947}
948
85806af0
RD
949/**
950 * sock_recvmsg - receive a message from @sock
951 * @sock: socket
952 * @msg: message to receive
953 * @flags: message flags
954 *
955 * Receives @msg from @sock, passing through LSM. Returns the total number
956 * of bytes received, or an error.
957 */
2da62906 958int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 959{
2da62906 960 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 961
2da62906 962 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 963}
c6d409cf 964EXPORT_SYMBOL(sock_recvmsg);
1da177e4 965
c1249c0a 966/**
8a3c245c
PT
967 * kernel_recvmsg - Receive a message from a socket (kernel space)
968 * @sock: The socket to receive the message from
969 * @msg: Received message
970 * @vec: Input s/g array for message data
971 * @num: Size of input s/g array
972 * @size: Number of bytes to read
973 * @flags: Message flags (MSG_DONTWAIT, etc...)
c1249c0a 974 *
8a3c245c
PT
975 * On return the msg structure contains the scatter/gather array passed in the
976 * vec argument. The array is modified so that it consists of the unfilled
977 * portion of the original array.
c1249c0a 978 *
8a3c245c 979 * The returned value is the total number of bytes received, or an error.
c1249c0a 980 */
8a3c245c 981
89bddce5
SH
982int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
983 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4 984{
1f466e1f 985 msg->msg_control_is_user = false;
aa563d7b 986 iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
1f466e1f 987 return sock_recvmsg(sock, msg, flags);
1da177e4 988}
c6d409cf 989EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 990
ce1d4d3e
CH
991static ssize_t sock_sendpage(struct file *file, struct page *page,
992 int offset, size_t size, loff_t *ppos, int more)
1da177e4 993{
1da177e4
LT
994 struct socket *sock;
995 int flags;
996
ce1d4d3e
CH
997 sock = file->private_data;
998
35f9c09f
ED
999 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
1000 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
1001 flags |= more;
ce1d4d3e 1002
e6949583 1003 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 1004}
1da177e4 1005
9c55e01c 1006static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 1007 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
1008 unsigned int flags)
1009{
1010 struct socket *sock = file->private_data;
1011
997b37da 1012 if (unlikely(!sock->ops->splice_read))
95506588 1013 return generic_file_splice_read(file, ppos, pipe, len, flags);
997b37da 1014
9c55e01c
JA
1015 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
1016}
1017
8ae5e030 1018static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 1019{
6d652330
AV
1020 struct file *file = iocb->ki_filp;
1021 struct socket *sock = file->private_data;
0345f931 1022 struct msghdr msg = {.msg_iter = *to,
1023 .msg_iocb = iocb};
8ae5e030 1024 ssize_t res;
ce1d4d3e 1025
ebfcd895 1026 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
1027 msg.msg_flags = MSG_DONTWAIT;
1028
1029 if (iocb->ki_pos != 0)
1da177e4 1030 return -ESPIPE;
027445c3 1031
66ee59af 1032 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
1033 return 0;
1034
2da62906 1035 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
1036 *to = msg.msg_iter;
1037 return res;
1da177e4
LT
1038}
1039
8ae5e030 1040static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 1041{
6d652330
AV
1042 struct file *file = iocb->ki_filp;
1043 struct socket *sock = file->private_data;
0345f931 1044 struct msghdr msg = {.msg_iter = *from,
1045 .msg_iocb = iocb};
8ae5e030 1046 ssize_t res;
1da177e4 1047
8ae5e030 1048 if (iocb->ki_pos != 0)
ce1d4d3e 1049 return -ESPIPE;
027445c3 1050
ebfcd895 1051 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
1052 msg.msg_flags = MSG_DONTWAIT;
1053
6d652330
AV
1054 if (sock->type == SOCK_SEQPACKET)
1055 msg.msg_flags |= MSG_EOR;
1056
d8725c86 1057 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
1058 *from = msg.msg_iter;
1059 return res;
1da177e4
LT
1060}
1061
1da177e4
LT
1062/*
1063 * Atomic setting of ioctl hooks to avoid race
1064 * with module unload.
1065 */
1066
4a3e2f71 1067static DEFINE_MUTEX(br_ioctl_mutex);
ad2f99ae
AB
1068static int (*br_ioctl_hook)(struct net *net, struct net_bridge *br,
1069 unsigned int cmd, struct ifreq *ifr,
1070 void __user *uarg);
1da177e4 1071
ad2f99ae
AB
1072void brioctl_set(int (*hook)(struct net *net, struct net_bridge *br,
1073 unsigned int cmd, struct ifreq *ifr,
1074 void __user *uarg))
1da177e4 1075{
4a3e2f71 1076 mutex_lock(&br_ioctl_mutex);
1da177e4 1077 br_ioctl_hook = hook;
4a3e2f71 1078 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1079}
1080EXPORT_SYMBOL(brioctl_set);
1081
ad2f99ae
AB
1082int br_ioctl_call(struct net *net, struct net_bridge *br, unsigned int cmd,
1083 struct ifreq *ifr, void __user *uarg)
1084{
1085 int err = -ENOPKG;
1086
1087 if (!br_ioctl_hook)
1088 request_module("bridge");
1089
1090 mutex_lock(&br_ioctl_mutex);
1091 if (br_ioctl_hook)
1092 err = br_ioctl_hook(net, br, cmd, ifr, uarg);
1093 mutex_unlock(&br_ioctl_mutex);
1094
1095 return err;
1096}
1097
4a3e2f71 1098static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1099static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1100
881d966b 1101void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1102{
4a3e2f71 1103 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1104 vlan_ioctl_hook = hook;
4a3e2f71 1105 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1106}
1107EXPORT_SYMBOL(vlan_ioctl_set);
1108
6b96018b 1109static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 1110 unsigned int cmd, unsigned long arg)
6b96018b 1111{
876f0bf9
AB
1112 struct ifreq ifr;
1113 bool need_copyout;
6b96018b
AB
1114 int err;
1115 void __user *argp = (void __user *)arg;
a554bf96 1116 void __user *data;
6b96018b
AB
1117
1118 err = sock->ops->ioctl(sock, cmd, arg);
1119
1120 /*
1121 * If this ioctl is unknown try to hand it down
1122 * to the NIC driver.
1123 */
36fd633e
AV
1124 if (err != -ENOIOCTLCMD)
1125 return err;
6b96018b 1126
29ce8f97
JK
1127 if (!is_socket_ioctl_cmd(cmd))
1128 return -ENOTTY;
1129
a554bf96 1130 if (get_user_ifreq(&ifr, &data, argp))
876f0bf9 1131 return -EFAULT;
a554bf96 1132 err = dev_ioctl(net, cmd, &ifr, data, &need_copyout);
876f0bf9 1133 if (!err && need_copyout)
a554bf96 1134 if (put_user_ifreq(&ifr, argp))
44c02a2c 1135 return -EFAULT;
876f0bf9 1136
6b96018b
AB
1137 return err;
1138}
1139
1da177e4
LT
1140/*
1141 * With an ioctl, arg may well be a user mode pointer, but we don't know
1142 * what to do with it - that's up to the protocol still.
1143 */
1144
1145static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1146{
1147 struct socket *sock;
881d966b 1148 struct sock *sk;
1da177e4
LT
1149 void __user *argp = (void __user *)arg;
1150 int pid, err;
881d966b 1151 struct net *net;
1da177e4 1152
b69aee04 1153 sock = file->private_data;
881d966b 1154 sk = sock->sk;
3b1e0a65 1155 net = sock_net(sk);
44c02a2c
AV
1156 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1157 struct ifreq ifr;
a554bf96 1158 void __user *data;
44c02a2c 1159 bool need_copyout;
a554bf96 1160 if (get_user_ifreq(&ifr, &data, argp))
44c02a2c 1161 return -EFAULT;
a554bf96 1162 err = dev_ioctl(net, cmd, &ifr, data, &need_copyout);
44c02a2c 1163 if (!err && need_copyout)
a554bf96 1164 if (put_user_ifreq(&ifr, argp))
44c02a2c 1165 return -EFAULT;
1da177e4 1166 } else
3d23e349 1167#ifdef CONFIG_WEXT_CORE
1da177e4 1168 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1169 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1170 } else
3d23e349 1171#endif
89bddce5 1172 switch (cmd) {
1da177e4
LT
1173 case FIOSETOWN:
1174 case SIOCSPGRP:
1175 err = -EFAULT;
1176 if (get_user(pid, (int __user *)argp))
1177 break;
393cc3f5 1178 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1179 break;
1180 case FIOGETOWN:
1181 case SIOCGPGRP:
609d7fa9 1182 err = put_user(f_getown(sock->file),
89bddce5 1183 (int __user *)argp);
1da177e4
LT
1184 break;
1185 case SIOCGIFBR:
1186 case SIOCSIFBR:
1187 case SIOCBRADDBR:
1188 case SIOCBRDELBR:
ad2f99ae 1189 err = br_ioctl_call(net, NULL, cmd, NULL, argp);
1da177e4
LT
1190 break;
1191 case SIOCGIFVLAN:
1192 case SIOCSIFVLAN:
1193 err = -ENOPKG;
1194 if (!vlan_ioctl_hook)
1195 request_module("8021q");
1196
4a3e2f71 1197 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1198 if (vlan_ioctl_hook)
881d966b 1199 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1200 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1201 break;
c62cce2c
AV
1202 case SIOCGSKNS:
1203 err = -EPERM;
1204 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1205 break;
1206
1207 err = open_related_ns(&net->ns, get_net_ns);
1208 break;
0768e170
AB
1209 case SIOCGSTAMP_OLD:
1210 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
1211 if (!sock->ops->gettstamp) {
1212 err = -ENOIOCTLCMD;
1213 break;
1214 }
1215 err = sock->ops->gettstamp(sock, argp,
0768e170
AB
1216 cmd == SIOCGSTAMP_OLD,
1217 !IS_ENABLED(CONFIG_64BIT));
60747828 1218 break;
0768e170
AB
1219 case SIOCGSTAMP_NEW:
1220 case SIOCGSTAMPNS_NEW:
1221 if (!sock->ops->gettstamp) {
1222 err = -ENOIOCTLCMD;
1223 break;
1224 }
1225 err = sock->ops->gettstamp(sock, argp,
1226 cmd == SIOCGSTAMP_NEW,
1227 false);
c7cbdbf2 1228 break;
876f0bf9
AB
1229
1230 case SIOCGIFCONF:
1231 err = dev_ifconf(net, argp);
1232 break;
1233
1da177e4 1234 default:
63ff03ab 1235 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1236 break;
89bddce5 1237 }
1da177e4
LT
1238 return err;
1239}
1240
8a3c245c
PT
1241/**
1242 * sock_create_lite - creates a socket
1243 * @family: protocol family (AF_INET, ...)
1244 * @type: communication type (SOCK_STREAM, ...)
1245 * @protocol: protocol (0, ...)
1246 * @res: new socket
1247 *
1248 * Creates a new socket and assigns it to @res, passing through LSM.
1249 * The new socket initialization is not complete, see kernel_accept().
1250 * Returns 0 or an error. On failure @res is set to %NULL.
1251 * This function internally uses GFP_KERNEL.
1252 */
1253
1da177e4
LT
1254int sock_create_lite(int family, int type, int protocol, struct socket **res)
1255{
1256 int err;
1257 struct socket *sock = NULL;
89bddce5 1258
1da177e4
LT
1259 err = security_socket_create(family, type, protocol, 1);
1260 if (err)
1261 goto out;
1262
1263 sock = sock_alloc();
1264 if (!sock) {
1265 err = -ENOMEM;
1266 goto out;
1267 }
1268
1da177e4 1269 sock->type = type;
7420ed23
VY
1270 err = security_socket_post_create(sock, family, type, protocol, 1);
1271 if (err)
1272 goto out_release;
1273
1da177e4
LT
1274out:
1275 *res = sock;
1276 return err;
7420ed23
VY
1277out_release:
1278 sock_release(sock);
1279 sock = NULL;
1280 goto out;
1da177e4 1281}
c6d409cf 1282EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1283
1284/* No kernel lock held - perfect */
ade994f4 1285static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1286{
3cafb376 1287 struct socket *sock = file->private_data;
a331de3b 1288 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1289
e88958e6
CH
1290 if (!sock->ops->poll)
1291 return 0;
f641f13b 1292
a331de3b
CH
1293 if (sk_can_busy_loop(sock->sk)) {
1294 /* poll once if requested by the syscall */
1295 if (events & POLL_BUSY_LOOP)
1296 sk_busy_loop(sock->sk, 1);
1297
1298 /* if this socket can poll_ll, tell the system call */
1299 flag = POLL_BUSY_LOOP;
1300 }
1301
1302 return sock->ops->poll(file, sock, wait) | flag;
1da177e4
LT
1303}
1304
89bddce5 1305static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1306{
b69aee04 1307 struct socket *sock = file->private_data;
1da177e4
LT
1308
1309 return sock->ops->mmap(file, sock, vma);
1310}
1311
20380731 1312static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1313{
6d8c50dc 1314 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1315 return 0;
1316}
1317
1318/*
1319 * Update the socket async list
1320 *
1321 * Fasync_list locking strategy.
1322 *
1323 * 1. fasync_list is modified only under process context socket lock
1324 * i.e. under semaphore.
1325 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1326 * or under socket lock
1da177e4
LT
1327 */
1328
1329static int sock_fasync(int fd, struct file *filp, int on)
1330{
989a2979
ED
1331 struct socket *sock = filp->private_data;
1332 struct sock *sk = sock->sk;
333f7909 1333 struct socket_wq *wq = &sock->wq;
1da177e4 1334
989a2979 1335 if (sk == NULL)
1da177e4 1336 return -EINVAL;
1da177e4
LT
1337
1338 lock_sock(sk);
eaefd110 1339 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1340
eaefd110 1341 if (!wq->fasync_list)
989a2979
ED
1342 sock_reset_flag(sk, SOCK_FASYNC);
1343 else
bcdce719 1344 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1345
989a2979 1346 release_sock(sk);
1da177e4
LT
1347 return 0;
1348}
1349
ceb5d58b 1350/* This function may be called only under rcu_lock */
1da177e4 1351
ceb5d58b 1352int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1353{
ceb5d58b 1354 if (!wq || !wq->fasync_list)
1da177e4 1355 return -1;
ceb5d58b 1356
89bddce5 1357 switch (how) {
8d8ad9d7 1358 case SOCK_WAKE_WAITD:
ceb5d58b 1359 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1360 break;
1361 goto call_kill;
8d8ad9d7 1362 case SOCK_WAKE_SPACE:
ceb5d58b 1363 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4 1364 break;
7c7ab580 1365 fallthrough;
8d8ad9d7 1366 case SOCK_WAKE_IO:
89bddce5 1367call_kill:
43815482 1368 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1369 break;
8d8ad9d7 1370 case SOCK_WAKE_URG:
43815482 1371 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1372 }
ceb5d58b 1373
1da177e4
LT
1374 return 0;
1375}
c6d409cf 1376EXPORT_SYMBOL(sock_wake_async);
1da177e4 1377
8a3c245c
PT
1378/**
1379 * __sock_create - creates a socket
1380 * @net: net namespace
1381 * @family: protocol family (AF_INET, ...)
1382 * @type: communication type (SOCK_STREAM, ...)
1383 * @protocol: protocol (0, ...)
1384 * @res: new socket
1385 * @kern: boolean for kernel space sockets
1386 *
1387 * Creates a new socket and assigns it to @res, passing through LSM.
1388 * Returns 0 or an error. On failure @res is set to %NULL. @kern must
1389 * be set to true if the socket resides in kernel space.
1390 * This function internally uses GFP_KERNEL.
1391 */
1392
721db93a 1393int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1394 struct socket **res, int kern)
1da177e4
LT
1395{
1396 int err;
1397 struct socket *sock;
55737fda 1398 const struct net_proto_family *pf;
1da177e4
LT
1399
1400 /*
89bddce5 1401 * Check protocol is in range
1da177e4
LT
1402 */
1403 if (family < 0 || family >= NPROTO)
1404 return -EAFNOSUPPORT;
1405 if (type < 0 || type >= SOCK_MAX)
1406 return -EINVAL;
1407
1408 /* Compatibility.
1409
1410 This uglymoron is moved from INET layer to here to avoid
1411 deadlock in module load.
1412 */
1413 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1414 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1415 current->comm);
1da177e4
LT
1416 family = PF_PACKET;
1417 }
1418
1419 err = security_socket_create(family, type, protocol, kern);
1420 if (err)
1421 return err;
89bddce5 1422
55737fda
SH
1423 /*
1424 * Allocate the socket and allow the family to set things up. if
1425 * the protocol is 0, the family is instructed to select an appropriate
1426 * default.
1427 */
1428 sock = sock_alloc();
1429 if (!sock) {
e87cc472 1430 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1431 return -ENFILE; /* Not exactly a match, but its the
1432 closest posix thing */
1433 }
1434
1435 sock->type = type;
1436
95a5afca 1437#ifdef CONFIG_MODULES
89bddce5
SH
1438 /* Attempt to load a protocol module if the find failed.
1439 *
1440 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1441 * requested real, full-featured networking support upon configuration.
1442 * Otherwise module support will break!
1443 */
190683a9 1444 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1445 request_module("net-pf-%d", family);
1da177e4
LT
1446#endif
1447
55737fda
SH
1448 rcu_read_lock();
1449 pf = rcu_dereference(net_families[family]);
1450 err = -EAFNOSUPPORT;
1451 if (!pf)
1452 goto out_release;
1da177e4
LT
1453
1454 /*
1455 * We will call the ->create function, that possibly is in a loadable
1456 * module, so we have to bump that loadable module refcnt first.
1457 */
55737fda 1458 if (!try_module_get(pf->owner))
1da177e4
LT
1459 goto out_release;
1460
55737fda
SH
1461 /* Now protected by module ref count */
1462 rcu_read_unlock();
1463
3f378b68 1464 err = pf->create(net, sock, protocol, kern);
55737fda 1465 if (err < 0)
1da177e4 1466 goto out_module_put;
a79af59e 1467
1da177e4
LT
1468 /*
1469 * Now to bump the refcnt of the [loadable] module that owns this
1470 * socket at sock_release time we decrement its refcnt.
1471 */
55737fda
SH
1472 if (!try_module_get(sock->ops->owner))
1473 goto out_module_busy;
1474
1da177e4
LT
1475 /*
1476 * Now that we're done with the ->create function, the [loadable]
1477 * module can have its refcnt decremented
1478 */
55737fda 1479 module_put(pf->owner);
7420ed23
VY
1480 err = security_socket_post_create(sock, family, type, protocol, kern);
1481 if (err)
3b185525 1482 goto out_sock_release;
55737fda 1483 *res = sock;
1da177e4 1484
55737fda
SH
1485 return 0;
1486
1487out_module_busy:
1488 err = -EAFNOSUPPORT;
1da177e4 1489out_module_put:
55737fda
SH
1490 sock->ops = NULL;
1491 module_put(pf->owner);
1492out_sock_release:
1da177e4 1493 sock_release(sock);
55737fda
SH
1494 return err;
1495
1496out_release:
1497 rcu_read_unlock();
1498 goto out_sock_release;
1da177e4 1499}
721db93a 1500EXPORT_SYMBOL(__sock_create);
1da177e4 1501
8a3c245c
PT
1502/**
1503 * sock_create - creates a socket
1504 * @family: protocol family (AF_INET, ...)
1505 * @type: communication type (SOCK_STREAM, ...)
1506 * @protocol: protocol (0, ...)
1507 * @res: new socket
1508 *
1509 * A wrapper around __sock_create().
1510 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1511 */
1512
1da177e4
LT
1513int sock_create(int family, int type, int protocol, struct socket **res)
1514{
1b8d7ae4 1515 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1516}
c6d409cf 1517EXPORT_SYMBOL(sock_create);
1da177e4 1518
8a3c245c
PT
1519/**
1520 * sock_create_kern - creates a socket (kernel space)
1521 * @net: net namespace
1522 * @family: protocol family (AF_INET, ...)
1523 * @type: communication type (SOCK_STREAM, ...)
1524 * @protocol: protocol (0, ...)
1525 * @res: new socket
1526 *
1527 * A wrapper around __sock_create().
1528 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1529 */
1530
eeb1bd5c 1531int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1532{
eeb1bd5c 1533 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1534}
c6d409cf 1535EXPORT_SYMBOL(sock_create_kern);
1da177e4 1536
9d6a15c3 1537int __sys_socket(int family, int type, int protocol)
1da177e4
LT
1538{
1539 int retval;
1540 struct socket *sock;
a677a039
UD
1541 int flags;
1542
e38b36f3
UD
1543 /* Check the SOCK_* constants for consistency. */
1544 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1545 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1546 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1547 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1548
a677a039 1549 flags = type & ~SOCK_TYPE_MASK;
77d27200 1550 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1551 return -EINVAL;
1552 type &= SOCK_TYPE_MASK;
1da177e4 1553
aaca0bdc
UD
1554 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1555 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1556
1da177e4
LT
1557 retval = sock_create(family, type, protocol, &sock);
1558 if (retval < 0)
8e1611e2 1559 return retval;
1da177e4 1560
8e1611e2 1561 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1562}
1563
9d6a15c3
DB
1564SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1565{
1566 return __sys_socket(family, type, protocol);
1567}
1568
1da177e4
LT
1569/*
1570 * Create a pair of connected sockets.
1571 */
1572
6debc8d8 1573int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1574{
1575 struct socket *sock1, *sock2;
1576 int fd1, fd2, err;
db349509 1577 struct file *newfile1, *newfile2;
a677a039
UD
1578 int flags;
1579
1580 flags = type & ~SOCK_TYPE_MASK;
77d27200 1581 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1582 return -EINVAL;
1583 type &= SOCK_TYPE_MASK;
1da177e4 1584
aaca0bdc
UD
1585 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1586 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1587
016a266b
AV
1588 /*
1589 * reserve descriptors and make sure we won't fail
1590 * to return them to userland.
1591 */
1592 fd1 = get_unused_fd_flags(flags);
1593 if (unlikely(fd1 < 0))
1594 return fd1;
1595
1596 fd2 = get_unused_fd_flags(flags);
1597 if (unlikely(fd2 < 0)) {
1598 put_unused_fd(fd1);
1599 return fd2;
1600 }
1601
1602 err = put_user(fd1, &usockvec[0]);
1603 if (err)
1604 goto out;
1605
1606 err = put_user(fd2, &usockvec[1]);
1607 if (err)
1608 goto out;
1609
1da177e4
LT
1610 /*
1611 * Obtain the first socket and check if the underlying protocol
1612 * supports the socketpair call.
1613 */
1614
1615 err = sock_create(family, type, protocol, &sock1);
016a266b 1616 if (unlikely(err < 0))
1da177e4
LT
1617 goto out;
1618
1619 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1620 if (unlikely(err < 0)) {
1621 sock_release(sock1);
1622 goto out;
bf3c23d1 1623 }
d73aa286 1624
d47cd945
DH
1625 err = security_socket_socketpair(sock1, sock2);
1626 if (unlikely(err)) {
1627 sock_release(sock2);
1628 sock_release(sock1);
1629 goto out;
1630 }
1631
016a266b
AV
1632 err = sock1->ops->socketpair(sock1, sock2);
1633 if (unlikely(err < 0)) {
1634 sock_release(sock2);
1635 sock_release(sock1);
1636 goto out;
28407630
AV
1637 }
1638
aab174f0 1639 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1640 if (IS_ERR(newfile1)) {
28407630 1641 err = PTR_ERR(newfile1);
016a266b
AV
1642 sock_release(sock2);
1643 goto out;
28407630
AV
1644 }
1645
aab174f0 1646 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1647 if (IS_ERR(newfile2)) {
1648 err = PTR_ERR(newfile2);
016a266b
AV
1649 fput(newfile1);
1650 goto out;
db349509
AV
1651 }
1652
157cf649 1653 audit_fd_pair(fd1, fd2);
d73aa286 1654
db349509
AV
1655 fd_install(fd1, newfile1);
1656 fd_install(fd2, newfile2);
d73aa286 1657 return 0;
1da177e4 1658
016a266b 1659out:
d73aa286 1660 put_unused_fd(fd2);
d73aa286 1661 put_unused_fd(fd1);
1da177e4
LT
1662 return err;
1663}
1664
6debc8d8
DB
1665SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1666 int __user *, usockvec)
1667{
1668 return __sys_socketpair(family, type, protocol, usockvec);
1669}
1670
1da177e4
LT
1671/*
1672 * Bind a name to a socket. Nothing much to do here since it's
1673 * the protocol's responsibility to handle the local address.
1674 *
1675 * We move the socket address to kernel space before we call
1676 * the protocol layer (having also checked the address is ok).
1677 */
1678
a87d35d8 1679int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1680{
1681 struct socket *sock;
230b1839 1682 struct sockaddr_storage address;
6cb153ca 1683 int err, fput_needed;
1da177e4 1684
89bddce5 1685 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1686 if (sock) {
43db362d 1687 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1688 if (!err) {
89bddce5 1689 err = security_socket_bind(sock,
230b1839 1690 (struct sockaddr *)&address,
89bddce5 1691 addrlen);
6cb153ca
BL
1692 if (!err)
1693 err = sock->ops->bind(sock,
89bddce5 1694 (struct sockaddr *)
230b1839 1695 &address, addrlen);
1da177e4 1696 }
6cb153ca 1697 fput_light(sock->file, fput_needed);
89bddce5 1698 }
1da177e4
LT
1699 return err;
1700}
1701
a87d35d8
DB
1702SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1703{
1704 return __sys_bind(fd, umyaddr, addrlen);
1705}
1706
1da177e4
LT
1707/*
1708 * Perform a listen. Basically, we allow the protocol to do anything
1709 * necessary for a listen, and if that works, we mark the socket as
1710 * ready for listening.
1711 */
1712
25e290ee 1713int __sys_listen(int fd, int backlog)
1da177e4
LT
1714{
1715 struct socket *sock;
6cb153ca 1716 int err, fput_needed;
b8e1f9b5 1717 int somaxconn;
89bddce5
SH
1718
1719 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1720 if (sock) {
8efa6e93 1721 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1722 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1723 backlog = somaxconn;
1da177e4
LT
1724
1725 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1726 if (!err)
1727 err = sock->ops->listen(sock, backlog);
1da177e4 1728
6cb153ca 1729 fput_light(sock->file, fput_needed);
1da177e4
LT
1730 }
1731 return err;
1732}
1733
25e290ee
DB
1734SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1735{
1736 return __sys_listen(fd, backlog);
1737}
1738
d32f89da 1739struct file *do_accept(struct file *file, unsigned file_flags,
de2ea4b6 1740 struct sockaddr __user *upeer_sockaddr,
d32f89da 1741 int __user *upeer_addrlen, int flags)
1da177e4
LT
1742{
1743 struct socket *sock, *newsock;
39d8c1b6 1744 struct file *newfile;
d32f89da 1745 int err, len;
230b1839 1746 struct sockaddr_storage address;
1da177e4 1747
dba4a925 1748 sock = sock_from_file(file);
d32f89da
PB
1749 if (!sock)
1750 return ERR_PTR(-ENOTSOCK);
1da177e4 1751
c6d409cf
ED
1752 newsock = sock_alloc();
1753 if (!newsock)
d32f89da 1754 return ERR_PTR(-ENFILE);
1da177e4
LT
1755
1756 newsock->type = sock->type;
1757 newsock->ops = sock->ops;
1758
1da177e4
LT
1759 /*
1760 * We don't need try_module_get here, as the listening socket (sock)
1761 * has the protocol module (sock->ops->owner) held.
1762 */
1763 __module_get(newsock->ops->owner);
1764
aab174f0 1765 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
d32f89da
PB
1766 if (IS_ERR(newfile))
1767 return newfile;
39d8c1b6 1768
a79af59e
FF
1769 err = security_socket_accept(sock, newsock);
1770 if (err)
39d8c1b6 1771 goto out_fd;
a79af59e 1772
de2ea4b6
JA
1773 err = sock->ops->accept(sock, newsock, sock->file->f_flags | file_flags,
1774 false);
1da177e4 1775 if (err < 0)
39d8c1b6 1776 goto out_fd;
1da177e4
LT
1777
1778 if (upeer_sockaddr) {
9b2c45d4
DV
1779 len = newsock->ops->getname(newsock,
1780 (struct sockaddr *)&address, 2);
1781 if (len < 0) {
1da177e4 1782 err = -ECONNABORTED;
39d8c1b6 1783 goto out_fd;
1da177e4 1784 }
43db362d 1785 err = move_addr_to_user(&address,
230b1839 1786 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1787 if (err < 0)
39d8c1b6 1788 goto out_fd;
1da177e4
LT
1789 }
1790
1791 /* File flags are not inherited via accept() unlike another OSes. */
d32f89da 1792 return newfile;
39d8c1b6 1793out_fd:
9606a216 1794 fput(newfile);
d32f89da
PB
1795 return ERR_PTR(err);
1796}
1797
1798int __sys_accept4_file(struct file *file, unsigned file_flags,
1799 struct sockaddr __user *upeer_sockaddr,
1800 int __user *upeer_addrlen, int flags,
1801 unsigned long nofile)
1802{
1803 struct file *newfile;
1804 int newfd;
1805
1806 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1807 return -EINVAL;
1808
1809 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1810 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
de2ea4b6 1811
d32f89da
PB
1812 newfd = __get_unused_fd_flags(flags, nofile);
1813 if (unlikely(newfd < 0))
1814 return newfd;
1815
1816 newfile = do_accept(file, file_flags, upeer_sockaddr, upeer_addrlen,
1817 flags);
1818 if (IS_ERR(newfile)) {
1819 put_unused_fd(newfd);
1820 return PTR_ERR(newfile);
1821 }
1822 fd_install(newfd, newfile);
1823 return newfd;
de2ea4b6
JA
1824}
1825
1826/*
1827 * For accept, we attempt to create a new socket, set up the link
1828 * with the client, wake up the client, then return the new
1829 * connected fd. We collect the address of the connector in kernel
1830 * space and move it to user at the very end. This is unclean because
1831 * we open the socket then return an error.
1832 *
1833 * 1003.1g adds the ability to recvmsg() to query connection pending
1834 * status to recvmsg. We need to add that support in a way thats
1835 * clean when we restructure accept also.
1836 */
1837
1838int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1839 int __user *upeer_addrlen, int flags)
1840{
1841 int ret = -EBADF;
1842 struct fd f;
1843
1844 f = fdget(fd);
1845 if (f.file) {
1846 ret = __sys_accept4_file(f.file, 0, upeer_sockaddr,
09952e3e
JA
1847 upeer_addrlen, flags,
1848 rlimit(RLIMIT_NOFILE));
6b07edeb 1849 fdput(f);
de2ea4b6
JA
1850 }
1851
1852 return ret;
1da177e4
LT
1853}
1854
4541e805
DB
1855SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1856 int __user *, upeer_addrlen, int, flags)
1857{
1858 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1859}
1860
20f37034
HC
1861SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1862 int __user *, upeer_addrlen)
aaca0bdc 1863{
4541e805 1864 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1865}
1866
1da177e4
LT
1867/*
1868 * Attempt to connect to a socket with the server address. The address
1869 * is in user space so we verify it is OK and move it to kernel space.
1870 *
1871 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1872 * break bindings
1873 *
1874 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1875 * other SEQPACKET protocols that take time to connect() as it doesn't
1876 * include the -EINPROGRESS status for such sockets.
1877 */
1878
f499a021 1879int __sys_connect_file(struct file *file, struct sockaddr_storage *address,
bd3ded31 1880 int addrlen, int file_flags)
1da177e4
LT
1881{
1882 struct socket *sock;
bd3ded31 1883 int err;
1da177e4 1884
dba4a925
FR
1885 sock = sock_from_file(file);
1886 if (!sock) {
1887 err = -ENOTSOCK;
1da177e4 1888 goto out;
dba4a925 1889 }
1da177e4 1890
89bddce5 1891 err =
f499a021 1892 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4 1893 if (err)
bd3ded31 1894 goto out;
1da177e4 1895
f499a021 1896 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
bd3ded31 1897 sock->file->f_flags | file_flags);
1da177e4
LT
1898out:
1899 return err;
1900}
1901
bd3ded31
JA
1902int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1903{
1904 int ret = -EBADF;
1905 struct fd f;
1906
1907 f = fdget(fd);
1908 if (f.file) {
f499a021
JA
1909 struct sockaddr_storage address;
1910
1911 ret = move_addr_to_kernel(uservaddr, addrlen, &address);
1912 if (!ret)
1913 ret = __sys_connect_file(f.file, &address, addrlen, 0);
6b07edeb 1914 fdput(f);
bd3ded31
JA
1915 }
1916
1917 return ret;
1918}
1919
1387c2c2
DB
1920SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1921 int, addrlen)
1922{
1923 return __sys_connect(fd, uservaddr, addrlen);
1924}
1925
1da177e4
LT
1926/*
1927 * Get the local address ('name') of a socket object. Move the obtained
1928 * name to user space.
1929 */
1930
8882a107
DB
1931int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1932 int __user *usockaddr_len)
1da177e4
LT
1933{
1934 struct socket *sock;
230b1839 1935 struct sockaddr_storage address;
9b2c45d4 1936 int err, fput_needed;
89bddce5 1937
6cb153ca 1938 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1939 if (!sock)
1940 goto out;
1941
1942 err = security_socket_getsockname(sock);
1943 if (err)
1944 goto out_put;
1945
9b2c45d4
DV
1946 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
1947 if (err < 0)
1da177e4 1948 goto out_put;
9b2c45d4
DV
1949 /* "err" is actually length in this case */
1950 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
1951
1952out_put:
6cb153ca 1953 fput_light(sock->file, fput_needed);
1da177e4
LT
1954out:
1955 return err;
1956}
1957
8882a107
DB
1958SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1959 int __user *, usockaddr_len)
1960{
1961 return __sys_getsockname(fd, usockaddr, usockaddr_len);
1962}
1963
1da177e4
LT
1964/*
1965 * Get the remote address ('name') of a socket object. Move the obtained
1966 * name to user space.
1967 */
1968
b21c8f83
DB
1969int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1970 int __user *usockaddr_len)
1da177e4
LT
1971{
1972 struct socket *sock;
230b1839 1973 struct sockaddr_storage address;
9b2c45d4 1974 int err, fput_needed;
1da177e4 1975
89bddce5
SH
1976 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1977 if (sock != NULL) {
1da177e4
LT
1978 err = security_socket_getpeername(sock);
1979 if (err) {
6cb153ca 1980 fput_light(sock->file, fput_needed);
1da177e4
LT
1981 return err;
1982 }
1983
9b2c45d4
DV
1984 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
1985 if (err >= 0)
1986 /* "err" is actually length in this case */
1987 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 1988 usockaddr_len);
6cb153ca 1989 fput_light(sock->file, fput_needed);
1da177e4
LT
1990 }
1991 return err;
1992}
1993
b21c8f83
DB
1994SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1995 int __user *, usockaddr_len)
1996{
1997 return __sys_getpeername(fd, usockaddr, usockaddr_len);
1998}
1999
1da177e4
LT
2000/*
2001 * Send a datagram to a given address. We move the address into kernel
2002 * space and check the user space data area is readable before invoking
2003 * the protocol.
2004 */
211b634b
DB
2005int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
2006 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
2007{
2008 struct socket *sock;
230b1839 2009 struct sockaddr_storage address;
1da177e4
LT
2010 int err;
2011 struct msghdr msg;
2012 struct iovec iov;
6cb153ca 2013 int fput_needed;
6cb153ca 2014
602bd0e9
AV
2015 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
2016 if (unlikely(err))
2017 return err;
de0fa95c
PE
2018 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2019 if (!sock)
4387ff75 2020 goto out;
6cb153ca 2021
89bddce5 2022 msg.msg_name = NULL;
89bddce5
SH
2023 msg.msg_control = NULL;
2024 msg.msg_controllen = 0;
2025 msg.msg_namelen = 0;
6cb153ca 2026 if (addr) {
43db362d 2027 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
2028 if (err < 0)
2029 goto out_put;
230b1839 2030 msg.msg_name = (struct sockaddr *)&address;
89bddce5 2031 msg.msg_namelen = addr_len;
1da177e4
LT
2032 }
2033 if (sock->file->f_flags & O_NONBLOCK)
2034 flags |= MSG_DONTWAIT;
2035 msg.msg_flags = flags;
d8725c86 2036 err = sock_sendmsg(sock, &msg);
1da177e4 2037
89bddce5 2038out_put:
de0fa95c 2039 fput_light(sock->file, fput_needed);
4387ff75 2040out:
1da177e4
LT
2041 return err;
2042}
2043
211b634b
DB
2044SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
2045 unsigned int, flags, struct sockaddr __user *, addr,
2046 int, addr_len)
2047{
2048 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
2049}
2050
1da177e4 2051/*
89bddce5 2052 * Send a datagram down a socket.
1da177e4
LT
2053 */
2054
3e0fa65f 2055SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 2056 unsigned int, flags)
1da177e4 2057{
211b634b 2058 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
2059}
2060
2061/*
89bddce5 2062 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
2063 * sender. We verify the buffers are writable and if needed move the
2064 * sender address from kernel to user space.
2065 */
7a09e1eb
DB
2066int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
2067 struct sockaddr __user *addr, int __user *addr_len)
1da177e4
LT
2068{
2069 struct socket *sock;
2070 struct iovec iov;
2071 struct msghdr msg;
230b1839 2072 struct sockaddr_storage address;
89bddce5 2073 int err, err2;
6cb153ca
BL
2074 int fput_needed;
2075
602bd0e9
AV
2076 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
2077 if (unlikely(err))
2078 return err;
de0fa95c 2079 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 2080 if (!sock)
de0fa95c 2081 goto out;
1da177e4 2082
89bddce5
SH
2083 msg.msg_control = NULL;
2084 msg.msg_controllen = 0;
f3d33426
HFS
2085 /* Save some cycles and don't copy the address if not needed */
2086 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
2087 /* We assume all kernel code knows the size of sockaddr_storage */
2088 msg.msg_namelen = 0;
130ed5d1 2089 msg.msg_iocb = NULL;
9f138fa6 2090 msg.msg_flags = 0;
1da177e4
LT
2091 if (sock->file->f_flags & O_NONBLOCK)
2092 flags |= MSG_DONTWAIT;
2da62906 2093 err = sock_recvmsg(sock, &msg, flags);
1da177e4 2094
89bddce5 2095 if (err >= 0 && addr != NULL) {
43db362d 2096 err2 = move_addr_to_user(&address,
230b1839 2097 msg.msg_namelen, addr, addr_len);
89bddce5
SH
2098 if (err2 < 0)
2099 err = err2;
1da177e4 2100 }
de0fa95c
PE
2101
2102 fput_light(sock->file, fput_needed);
4387ff75 2103out:
1da177e4
LT
2104 return err;
2105}
2106
7a09e1eb
DB
2107SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2108 unsigned int, flags, struct sockaddr __user *, addr,
2109 int __user *, addr_len)
2110{
2111 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2112}
2113
1da177e4 2114/*
89bddce5 2115 * Receive a datagram from a socket.
1da177e4
LT
2116 */
2117
b7c0ddf5
JG
2118SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2119 unsigned int, flags)
1da177e4 2120{
7a09e1eb 2121 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
2122}
2123
83f0c10b
FW
2124static bool sock_use_custom_sol_socket(const struct socket *sock)
2125{
2126 const struct sock *sk = sock->sk;
2127
2128 /* Use sock->ops->setsockopt() for MPTCP */
2129 return IS_ENABLED(CONFIG_MPTCP) &&
2130 sk->sk_protocol == IPPROTO_MPTCP &&
2131 sk->sk_type == SOCK_STREAM &&
2132 (sk->sk_family == AF_INET || sk->sk_family == AF_INET6);
2133}
2134
1da177e4
LT
2135/*
2136 * Set a socket option. Because we don't know the option lengths we have
2137 * to pass the user mode parameter for the protocols to sort out.
2138 */
a7b75c5a 2139int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
55db9c0e 2140 int optlen)
1da177e4 2141{
519a8a6c 2142 sockptr_t optval = USER_SOCKPTR(user_optval);
0d01da6a 2143 char *kernel_optval = NULL;
6cb153ca 2144 int err, fput_needed;
1da177e4
LT
2145 struct socket *sock;
2146
2147 if (optlen < 0)
2148 return -EINVAL;
89bddce5
SH
2149
2150 sock = sockfd_lookup_light(fd, &err, &fput_needed);
4a367299
CH
2151 if (!sock)
2152 return err;
1da177e4 2153
4a367299
CH
2154 err = security_socket_setsockopt(sock, level, optname);
2155 if (err)
2156 goto out_put;
0d01da6a 2157
55db9c0e
CH
2158 if (!in_compat_syscall())
2159 err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname,
a7b75c5a 2160 user_optval, &optlen,
55db9c0e 2161 &kernel_optval);
4a367299
CH
2162 if (err < 0)
2163 goto out_put;
2164 if (err > 0) {
2165 err = 0;
2166 goto out_put;
2167 }
0d01da6a 2168
a7b75c5a
CH
2169 if (kernel_optval)
2170 optval = KERNEL_SOCKPTR(kernel_optval);
4a367299 2171 if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock))
a7b75c5a 2172 err = sock_setsockopt(sock, level, optname, optval, optlen);
a44d9e72
CH
2173 else if (unlikely(!sock->ops->setsockopt))
2174 err = -EOPNOTSUPP;
4a367299
CH
2175 else
2176 err = sock->ops->setsockopt(sock, level, optname, optval,
89bddce5 2177 optlen);
a7b75c5a 2178 kfree(kernel_optval);
4a367299
CH
2179out_put:
2180 fput_light(sock->file, fput_needed);
1da177e4
LT
2181 return err;
2182}
2183
cc36dca0
DB
2184SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2185 char __user *, optval, int, optlen)
2186{
2187 return __sys_setsockopt(fd, level, optname, optval, optlen);
2188}
2189
9cacf81f
SF
2190INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
2191 int optname));
2192
1da177e4
LT
2193/*
2194 * Get a socket option. Because we don't know the option lengths we have
2195 * to pass a user mode parameter for the protocols to sort out.
2196 */
55db9c0e
CH
2197int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
2198 int __user *optlen)
1da177e4 2199{
6cb153ca 2200 int err, fput_needed;
1da177e4 2201 struct socket *sock;
0d01da6a 2202 int max_optlen;
1da177e4 2203
89bddce5 2204 sock = sockfd_lookup_light(fd, &err, &fput_needed);
d8a9b38f
CH
2205 if (!sock)
2206 return err;
2207
2208 err = security_socket_getsockopt(sock, level, optname);
2209 if (err)
2210 goto out_put;
1da177e4 2211
55db9c0e
CH
2212 if (!in_compat_syscall())
2213 max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
0d01da6a 2214
d8a9b38f
CH
2215 if (level == SOL_SOCKET)
2216 err = sock_getsockopt(sock, level, optname, optval, optlen);
a44d9e72
CH
2217 else if (unlikely(!sock->ops->getsockopt))
2218 err = -EOPNOTSUPP;
d8a9b38f
CH
2219 else
2220 err = sock->ops->getsockopt(sock, level, optname, optval,
89bddce5 2221 optlen);
0d01da6a 2222
55db9c0e
CH
2223 if (!in_compat_syscall())
2224 err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
2225 optval, optlen, max_optlen,
2226 err);
6cb153ca 2227out_put:
d8a9b38f 2228 fput_light(sock->file, fput_needed);
1da177e4
LT
2229 return err;
2230}
2231
13a2d70e
DB
2232SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2233 char __user *, optval, int __user *, optlen)
2234{
2235 return __sys_getsockopt(fd, level, optname, optval, optlen);
2236}
2237
1da177e4
LT
2238/*
2239 * Shutdown a socket.
2240 */
2241
b713c195
JA
2242int __sys_shutdown_sock(struct socket *sock, int how)
2243{
2244 int err;
2245
2246 err = security_socket_shutdown(sock, how);
2247 if (!err)
2248 err = sock->ops->shutdown(sock, how);
2249
2250 return err;
2251}
2252
005a1aea 2253int __sys_shutdown(int fd, int how)
1da177e4 2254{
6cb153ca 2255 int err, fput_needed;
1da177e4
LT
2256 struct socket *sock;
2257
89bddce5
SH
2258 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2259 if (sock != NULL) {
b713c195 2260 err = __sys_shutdown_sock(sock, how);
6cb153ca 2261 fput_light(sock->file, fput_needed);
1da177e4
LT
2262 }
2263 return err;
2264}
2265
005a1aea
DB
2266SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2267{
2268 return __sys_shutdown(fd, how);
2269}
2270
89bddce5 2271/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2272 * fields which are the same type (int / unsigned) on our platforms.
2273 */
2274#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2275#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2276#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2277
c71d8ebe
TH
2278struct used_address {
2279 struct sockaddr_storage name;
2280 unsigned int name_len;
2281};
2282
0a384abf
JA
2283int __copy_msghdr_from_user(struct msghdr *kmsg,
2284 struct user_msghdr __user *umsg,
2285 struct sockaddr __user **save_addr,
2286 struct iovec __user **uiov, size_t *nsegs)
1661bf36 2287{
ffb07550 2288 struct user_msghdr msg;
08adb7da
AV
2289 ssize_t err;
2290
ffb07550 2291 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 2292 return -EFAULT;
dbb490b9 2293
1f466e1f
CH
2294 kmsg->msg_control_is_user = true;
2295 kmsg->msg_control_user = msg.msg_control;
ffb07550
AV
2296 kmsg->msg_controllen = msg.msg_controllen;
2297 kmsg->msg_flags = msg.msg_flags;
2298
2299 kmsg->msg_namelen = msg.msg_namelen;
2300 if (!msg.msg_name)
6a2a2b3a
AS
2301 kmsg->msg_namelen = 0;
2302
dbb490b9
ML
2303 if (kmsg->msg_namelen < 0)
2304 return -EINVAL;
2305
1661bf36 2306 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2307 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2308
2309 if (save_addr)
ffb07550 2310 *save_addr = msg.msg_name;
08adb7da 2311
ffb07550 2312 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 2313 if (!save_addr) {
864d9664
PA
2314 err = move_addr_to_kernel(msg.msg_name,
2315 kmsg->msg_namelen,
08adb7da
AV
2316 kmsg->msg_name);
2317 if (err < 0)
2318 return err;
2319 }
2320 } else {
2321 kmsg->msg_name = NULL;
2322 kmsg->msg_namelen = 0;
2323 }
2324
ffb07550 2325 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2326 return -EMSGSIZE;
2327
0345f931 2328 kmsg->msg_iocb = NULL;
0a384abf
JA
2329 *uiov = msg.msg_iov;
2330 *nsegs = msg.msg_iovlen;
2331 return 0;
2332}
2333
2334static int copy_msghdr_from_user(struct msghdr *kmsg,
2335 struct user_msghdr __user *umsg,
2336 struct sockaddr __user **save_addr,
2337 struct iovec **iov)
2338{
2339 struct user_msghdr msg;
2340 ssize_t err;
2341
2342 err = __copy_msghdr_from_user(kmsg, umsg, save_addr, &msg.msg_iov,
2343 &msg.msg_iovlen);
2344 if (err)
2345 return err;
0345f931 2346
87e5e6da 2347 err = import_iovec(save_addr ? READ : WRITE,
ffb07550 2348 msg.msg_iov, msg.msg_iovlen,
da184284 2349 UIO_FASTIOV, iov, &kmsg->msg_iter);
87e5e6da 2350 return err < 0 ? err : 0;
1661bf36
DC
2351}
2352
4257c8ca
JA
2353static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
2354 unsigned int flags, struct used_address *used_address,
2355 unsigned int allowed_msghdr_flags)
1da177e4 2356{
b9d717a7 2357 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2358 __aligned(sizeof(__kernel_size_t));
89bddce5 2359 /* 20 is size of ipv6_pktinfo */
1da177e4 2360 unsigned char *ctl_buf = ctl;
d8725c86 2361 int ctl_len;
08adb7da 2362 ssize_t err;
89bddce5 2363
1da177e4
LT
2364 err = -ENOBUFS;
2365
228e548e 2366 if (msg_sys->msg_controllen > INT_MAX)
4257c8ca 2367 goto out;
28a94d8f 2368 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2369 ctl_len = msg_sys->msg_controllen;
1da177e4 2370 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2371 err =
228e548e 2372 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2373 sizeof(ctl));
1da177e4 2374 if (err)
4257c8ca 2375 goto out;
228e548e
AB
2376 ctl_buf = msg_sys->msg_control;
2377 ctl_len = msg_sys->msg_controllen;
1da177e4 2378 } else if (ctl_len) {
ac4340fc
DM
2379 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2380 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2381 if (ctl_len > sizeof(ctl)) {
1da177e4 2382 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2383 if (ctl_buf == NULL)
4257c8ca 2384 goto out;
1da177e4
LT
2385 }
2386 err = -EFAULT;
1f466e1f 2387 if (copy_from_user(ctl_buf, msg_sys->msg_control_user, ctl_len))
1da177e4 2388 goto out_freectl;
228e548e 2389 msg_sys->msg_control = ctl_buf;
1f466e1f 2390 msg_sys->msg_control_is_user = false;
1da177e4 2391 }
228e548e 2392 msg_sys->msg_flags = flags;
1da177e4
LT
2393
2394 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2395 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2396 /*
2397 * If this is sendmmsg() and current destination address is same as
2398 * previously succeeded address, omit asking LSM's decision.
2399 * used_address->name_len is initialized to UINT_MAX so that the first
2400 * destination address never matches.
2401 */
bc909d9d
MD
2402 if (used_address && msg_sys->msg_name &&
2403 used_address->name_len == msg_sys->msg_namelen &&
2404 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2405 used_address->name_len)) {
d8725c86 2406 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2407 goto out_freectl;
2408 }
d8725c86 2409 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2410 /*
2411 * If this is sendmmsg() and sending to current destination address was
2412 * successful, remember it.
2413 */
2414 if (used_address && err >= 0) {
2415 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2416 if (msg_sys->msg_name)
2417 memcpy(&used_address->name, msg_sys->msg_name,
2418 used_address->name_len);
c71d8ebe 2419 }
1da177e4
LT
2420
2421out_freectl:
89bddce5 2422 if (ctl_buf != ctl)
1da177e4 2423 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
4257c8ca
JA
2424out:
2425 return err;
2426}
2427
03b1230c
JA
2428int sendmsg_copy_msghdr(struct msghdr *msg,
2429 struct user_msghdr __user *umsg, unsigned flags,
2430 struct iovec **iov)
4257c8ca
JA
2431{
2432 int err;
2433
2434 if (flags & MSG_CMSG_COMPAT) {
2435 struct compat_msghdr __user *msg_compat;
2436
2437 msg_compat = (struct compat_msghdr __user *) umsg;
2438 err = get_compat_msghdr(msg, msg_compat, NULL, iov);
2439 } else {
2440 err = copy_msghdr_from_user(msg, umsg, NULL, iov);
2441 }
2442 if (err < 0)
2443 return err;
2444
2445 return 0;
2446}
2447
2448static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
2449 struct msghdr *msg_sys, unsigned int flags,
2450 struct used_address *used_address,
2451 unsigned int allowed_msghdr_flags)
2452{
2453 struct sockaddr_storage address;
2454 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2455 ssize_t err;
2456
2457 msg_sys->msg_name = &address;
2458
2459 err = sendmsg_copy_msghdr(msg_sys, msg, flags, &iov);
2460 if (err < 0)
2461 return err;
2462
2463 err = ____sys_sendmsg(sock, msg_sys, flags, used_address,
2464 allowed_msghdr_flags);
da184284 2465 kfree(iov);
228e548e
AB
2466 return err;
2467}
2468
2469/*
2470 * BSD sendmsg interface
2471 */
03b1230c 2472long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
0fa03c62
JA
2473 unsigned int flags)
2474{
03b1230c 2475 return ____sys_sendmsg(sock, msg, flags, NULL, 0);
0fa03c62 2476}
228e548e 2477
e1834a32
DB
2478long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2479 bool forbid_cmsg_compat)
228e548e
AB
2480{
2481 int fput_needed, err;
2482 struct msghdr msg_sys;
1be374a0
AL
2483 struct socket *sock;
2484
e1834a32
DB
2485 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2486 return -EINVAL;
2487
1be374a0 2488 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2489 if (!sock)
2490 goto out;
2491
28a94d8f 2492 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2493
6cb153ca 2494 fput_light(sock->file, fput_needed);
89bddce5 2495out:
1da177e4
LT
2496 return err;
2497}
2498
666547ff 2499SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2500{
e1834a32 2501 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2502}
2503
228e548e
AB
2504/*
2505 * Linux sendmmsg interface
2506 */
2507
2508int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2509 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2510{
2511 int fput_needed, err, datagrams;
2512 struct socket *sock;
2513 struct mmsghdr __user *entry;
2514 struct compat_mmsghdr __user *compat_entry;
2515 struct msghdr msg_sys;
c71d8ebe 2516 struct used_address used_address;
f092276d 2517 unsigned int oflags = flags;
228e548e 2518
e1834a32
DB
2519 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2520 return -EINVAL;
2521
98382f41
AB
2522 if (vlen > UIO_MAXIOV)
2523 vlen = UIO_MAXIOV;
228e548e
AB
2524
2525 datagrams = 0;
2526
2527 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2528 if (!sock)
2529 return err;
2530
c71d8ebe 2531 used_address.name_len = UINT_MAX;
228e548e
AB
2532 entry = mmsg;
2533 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2534 err = 0;
f092276d 2535 flags |= MSG_BATCH;
228e548e
AB
2536
2537 while (datagrams < vlen) {
f092276d
TH
2538 if (datagrams == vlen - 1)
2539 flags = oflags;
2540
228e548e 2541 if (MSG_CMSG_COMPAT & flags) {
666547ff 2542 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2543 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2544 if (err < 0)
2545 break;
2546 err = __put_user(err, &compat_entry->msg_len);
2547 ++compat_entry;
2548 } else {
a7526eb5 2549 err = ___sys_sendmsg(sock,
666547ff 2550 (struct user_msghdr __user *)entry,
28a94d8f 2551 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2552 if (err < 0)
2553 break;
2554 err = put_user(err, &entry->msg_len);
2555 ++entry;
2556 }
2557
2558 if (err)
2559 break;
2560 ++datagrams;
3023898b
SHY
2561 if (msg_data_left(&msg_sys))
2562 break;
a78cb84c 2563 cond_resched();
228e548e
AB
2564 }
2565
228e548e
AB
2566 fput_light(sock->file, fput_needed);
2567
728ffb86
AB
2568 /* We only return an error if no datagrams were able to be sent */
2569 if (datagrams != 0)
228e548e
AB
2570 return datagrams;
2571
228e548e
AB
2572 return err;
2573}
2574
2575SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2576 unsigned int, vlen, unsigned int, flags)
2577{
e1834a32 2578 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2579}
2580
03b1230c
JA
2581int recvmsg_copy_msghdr(struct msghdr *msg,
2582 struct user_msghdr __user *umsg, unsigned flags,
2583 struct sockaddr __user **uaddr,
2584 struct iovec **iov)
1da177e4 2585{
08adb7da 2586 ssize_t err;
1da177e4 2587
4257c8ca
JA
2588 if (MSG_CMSG_COMPAT & flags) {
2589 struct compat_msghdr __user *msg_compat;
1da177e4 2590
4257c8ca
JA
2591 msg_compat = (struct compat_msghdr __user *) umsg;
2592 err = get_compat_msghdr(msg, msg_compat, uaddr, iov);
2593 } else {
2594 err = copy_msghdr_from_user(msg, umsg, uaddr, iov);
2595 }
1da177e4 2596 if (err < 0)
da184284 2597 return err;
1da177e4 2598
4257c8ca
JA
2599 return 0;
2600}
2601
2602static int ____sys_recvmsg(struct socket *sock, struct msghdr *msg_sys,
2603 struct user_msghdr __user *msg,
2604 struct sockaddr __user *uaddr,
2605 unsigned int flags, int nosec)
2606{
2607 struct compat_msghdr __user *msg_compat =
2608 (struct compat_msghdr __user *) msg;
2609 int __user *uaddr_len = COMPAT_NAMELEN(msg);
2610 struct sockaddr_storage addr;
2611 unsigned long cmsg_ptr;
2612 int len;
2613 ssize_t err;
2614
2615 msg_sys->msg_name = &addr;
a2e27255
ACM
2616 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2617 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2618
f3d33426
HFS
2619 /* We assume all kernel code knows the size of sockaddr_storage */
2620 msg_sys->msg_namelen = 0;
2621
1da177e4
LT
2622 if (sock->file->f_flags & O_NONBLOCK)
2623 flags |= MSG_DONTWAIT;
1af66221
ED
2624
2625 if (unlikely(nosec))
2626 err = sock_recvmsg_nosec(sock, msg_sys, flags);
2627 else
2628 err = sock_recvmsg(sock, msg_sys, flags);
2629
1da177e4 2630 if (err < 0)
4257c8ca 2631 goto out;
1da177e4
LT
2632 len = err;
2633
2634 if (uaddr != NULL) {
43db362d 2635 err = move_addr_to_user(&addr,
a2e27255 2636 msg_sys->msg_namelen, uaddr,
89bddce5 2637 uaddr_len);
1da177e4 2638 if (err < 0)
4257c8ca 2639 goto out;
1da177e4 2640 }
a2e27255 2641 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2642 COMPAT_FLAGS(msg));
1da177e4 2643 if (err)
4257c8ca 2644 goto out;
1da177e4 2645 if (MSG_CMSG_COMPAT & flags)
a2e27255 2646 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2647 &msg_compat->msg_controllen);
2648 else
a2e27255 2649 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2650 &msg->msg_controllen);
2651 if (err)
4257c8ca 2652 goto out;
1da177e4 2653 err = len;
4257c8ca
JA
2654out:
2655 return err;
2656}
2657
2658static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
2659 struct msghdr *msg_sys, unsigned int flags, int nosec)
2660{
2661 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2662 /* user mode address pointers */
2663 struct sockaddr __user *uaddr;
2664 ssize_t err;
2665
2666 err = recvmsg_copy_msghdr(msg_sys, msg, flags, &uaddr, &iov);
2667 if (err < 0)
2668 return err;
1da177e4 2669
4257c8ca 2670 err = ____sys_recvmsg(sock, msg_sys, msg, uaddr, flags, nosec);
da184284 2671 kfree(iov);
a2e27255
ACM
2672 return err;
2673}
2674
2675/*
2676 * BSD recvmsg interface
2677 */
2678
03b1230c
JA
2679long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
2680 struct user_msghdr __user *umsg,
2681 struct sockaddr __user *uaddr, unsigned int flags)
aa1fa28f 2682{
03b1230c 2683 return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0);
aa1fa28f
JA
2684}
2685
e1834a32
DB
2686long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2687 bool forbid_cmsg_compat)
a2e27255
ACM
2688{
2689 int fput_needed, err;
2690 struct msghdr msg_sys;
1be374a0
AL
2691 struct socket *sock;
2692
e1834a32
DB
2693 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2694 return -EINVAL;
2695
1be374a0 2696 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2697 if (!sock)
2698 goto out;
2699
a7526eb5 2700 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2701
6cb153ca 2702 fput_light(sock->file, fput_needed);
1da177e4
LT
2703out:
2704 return err;
2705}
2706
666547ff 2707SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2708 unsigned int, flags)
2709{
e1834a32 2710 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2711}
2712
a2e27255
ACM
2713/*
2714 * Linux recvmmsg interface
2715 */
2716
e11d4284
AB
2717static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2718 unsigned int vlen, unsigned int flags,
2719 struct timespec64 *timeout)
a2e27255
ACM
2720{
2721 int fput_needed, err, datagrams;
2722 struct socket *sock;
2723 struct mmsghdr __user *entry;
d7256d0e 2724 struct compat_mmsghdr __user *compat_entry;
a2e27255 2725 struct msghdr msg_sys;
766b9f92
DD
2726 struct timespec64 end_time;
2727 struct timespec64 timeout64;
a2e27255
ACM
2728
2729 if (timeout &&
2730 poll_select_set_timeout(&end_time, timeout->tv_sec,
2731 timeout->tv_nsec))
2732 return -EINVAL;
2733
2734 datagrams = 0;
2735
2736 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2737 if (!sock)
2738 return err;
2739
7797dc41
SHY
2740 if (likely(!(flags & MSG_ERRQUEUE))) {
2741 err = sock_error(sock->sk);
2742 if (err) {
2743 datagrams = err;
2744 goto out_put;
2745 }
e623a9e9 2746 }
a2e27255
ACM
2747
2748 entry = mmsg;
d7256d0e 2749 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2750
2751 while (datagrams < vlen) {
2752 /*
2753 * No need to ask LSM for more than the first datagram.
2754 */
d7256d0e 2755 if (MSG_CMSG_COMPAT & flags) {
666547ff 2756 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2757 &msg_sys, flags & ~MSG_WAITFORONE,
2758 datagrams);
d7256d0e
JMG
2759 if (err < 0)
2760 break;
2761 err = __put_user(err, &compat_entry->msg_len);
2762 ++compat_entry;
2763 } else {
a7526eb5 2764 err = ___sys_recvmsg(sock,
666547ff 2765 (struct user_msghdr __user *)entry,
a7526eb5
AL
2766 &msg_sys, flags & ~MSG_WAITFORONE,
2767 datagrams);
d7256d0e
JMG
2768 if (err < 0)
2769 break;
2770 err = put_user(err, &entry->msg_len);
2771 ++entry;
2772 }
2773
a2e27255
ACM
2774 if (err)
2775 break;
a2e27255
ACM
2776 ++datagrams;
2777
71c5c159
BB
2778 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2779 if (flags & MSG_WAITFORONE)
2780 flags |= MSG_DONTWAIT;
2781
a2e27255 2782 if (timeout) {
766b9f92 2783 ktime_get_ts64(&timeout64);
c2e6c856 2784 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2785 if (timeout->tv_sec < 0) {
2786 timeout->tv_sec = timeout->tv_nsec = 0;
2787 break;
2788 }
2789
2790 /* Timeout, return less than vlen datagrams */
2791 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2792 break;
2793 }
2794
2795 /* Out of band data, return right away */
2796 if (msg_sys.msg_flags & MSG_OOB)
2797 break;
a78cb84c 2798 cond_resched();
a2e27255
ACM
2799 }
2800
a2e27255 2801 if (err == 0)
34b88a68
ACM
2802 goto out_put;
2803
2804 if (datagrams == 0) {
2805 datagrams = err;
2806 goto out_put;
2807 }
a2e27255 2808
34b88a68
ACM
2809 /*
2810 * We may return less entries than requested (vlen) if the
2811 * sock is non block and there aren't enough datagrams...
2812 */
2813 if (err != -EAGAIN) {
a2e27255 2814 /*
34b88a68
ACM
2815 * ... or if recvmsg returns an error after we
2816 * received some datagrams, where we record the
2817 * error to return on the next call or if the
2818 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2819 */
34b88a68 2820 sock->sk->sk_err = -err;
a2e27255 2821 }
34b88a68
ACM
2822out_put:
2823 fput_light(sock->file, fput_needed);
a2e27255 2824
34b88a68 2825 return datagrams;
a2e27255
ACM
2826}
2827
e11d4284
AB
2828int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2829 unsigned int vlen, unsigned int flags,
2830 struct __kernel_timespec __user *timeout,
2831 struct old_timespec32 __user *timeout32)
a2e27255
ACM
2832{
2833 int datagrams;
c2e6c856 2834 struct timespec64 timeout_sys;
a2e27255 2835
e11d4284
AB
2836 if (timeout && get_timespec64(&timeout_sys, timeout))
2837 return -EFAULT;
a2e27255 2838
e11d4284 2839 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2840 return -EFAULT;
2841
e11d4284
AB
2842 if (!timeout && !timeout32)
2843 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2844
2845 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 2846
e11d4284
AB
2847 if (datagrams <= 0)
2848 return datagrams;
2849
2850 if (timeout && put_timespec64(&timeout_sys, timeout))
2851 datagrams = -EFAULT;
2852
2853 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2854 datagrams = -EFAULT;
2855
2856 return datagrams;
2857}
2858
1255e269
DB
2859SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2860 unsigned int, vlen, unsigned int, flags,
c2e6c856 2861 struct __kernel_timespec __user *, timeout)
1255e269 2862{
e11d4284
AB
2863 if (flags & MSG_CMSG_COMPAT)
2864 return -EINVAL;
2865
2866 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
2867}
2868
2869#ifdef CONFIG_COMPAT_32BIT_TIME
2870SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2871 unsigned int, vlen, unsigned int, flags,
2872 struct old_timespec32 __user *, timeout)
2873{
2874 if (flags & MSG_CMSG_COMPAT)
2875 return -EINVAL;
2876
2877 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 2878}
e11d4284 2879#endif
1255e269 2880
a2e27255 2881#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2882/* Argument list sizes for sys_socketcall */
2883#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2884static const unsigned char nargs[21] = {
c6d409cf
ED
2885 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2886 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2887 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2888 AL(4), AL(5), AL(4)
89bddce5
SH
2889};
2890
1da177e4
LT
2891#undef AL
2892
2893/*
89bddce5 2894 * System call vectors.
1da177e4
LT
2895 *
2896 * Argument checking cleaned up. Saved 20% in size.
2897 * This function doesn't need to set the kernel lock because
89bddce5 2898 * it is set by the callees.
1da177e4
LT
2899 */
2900
3e0fa65f 2901SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2902{
2950fa9d 2903 unsigned long a[AUDITSC_ARGS];
89bddce5 2904 unsigned long a0, a1;
1da177e4 2905 int err;
47379052 2906 unsigned int len;
1da177e4 2907
228e548e 2908 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2909 return -EINVAL;
c8e8cd57 2910 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2911
47379052
AV
2912 len = nargs[call];
2913 if (len > sizeof(a))
2914 return -EINVAL;
2915
1da177e4 2916 /* copy_from_user should be SMP safe. */
47379052 2917 if (copy_from_user(a, args, len))
1da177e4 2918 return -EFAULT;
3ec3b2fb 2919
2950fa9d
CG
2920 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2921 if (err)
2922 return err;
3ec3b2fb 2923
89bddce5
SH
2924 a0 = a[0];
2925 a1 = a[1];
2926
2927 switch (call) {
2928 case SYS_SOCKET:
9d6a15c3 2929 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
2930 break;
2931 case SYS_BIND:
a87d35d8 2932 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2933 break;
2934 case SYS_CONNECT:
1387c2c2 2935 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2936 break;
2937 case SYS_LISTEN:
25e290ee 2938 err = __sys_listen(a0, a1);
89bddce5
SH
2939 break;
2940 case SYS_ACCEPT:
4541e805
DB
2941 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2942 (int __user *)a[2], 0);
89bddce5
SH
2943 break;
2944 case SYS_GETSOCKNAME:
2945 err =
8882a107
DB
2946 __sys_getsockname(a0, (struct sockaddr __user *)a1,
2947 (int __user *)a[2]);
89bddce5
SH
2948 break;
2949 case SYS_GETPEERNAME:
2950 err =
b21c8f83
DB
2951 __sys_getpeername(a0, (struct sockaddr __user *)a1,
2952 (int __user *)a[2]);
89bddce5
SH
2953 break;
2954 case SYS_SOCKETPAIR:
6debc8d8 2955 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
2956 break;
2957 case SYS_SEND:
f3bf896b
DB
2958 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2959 NULL, 0);
89bddce5
SH
2960 break;
2961 case SYS_SENDTO:
211b634b
DB
2962 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2963 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
2964 break;
2965 case SYS_RECV:
d27e9afc
DB
2966 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2967 NULL, NULL);
89bddce5
SH
2968 break;
2969 case SYS_RECVFROM:
7a09e1eb
DB
2970 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2971 (struct sockaddr __user *)a[4],
2972 (int __user *)a[5]);
89bddce5
SH
2973 break;
2974 case SYS_SHUTDOWN:
005a1aea 2975 err = __sys_shutdown(a0, a1);
89bddce5
SH
2976 break;
2977 case SYS_SETSOCKOPT:
cc36dca0
DB
2978 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
2979 a[4]);
89bddce5
SH
2980 break;
2981 case SYS_GETSOCKOPT:
2982 err =
13a2d70e
DB
2983 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2984 (int __user *)a[4]);
89bddce5
SH
2985 break;
2986 case SYS_SENDMSG:
e1834a32
DB
2987 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
2988 a[2], true);
89bddce5 2989 break;
228e548e 2990 case SYS_SENDMMSG:
e1834a32
DB
2991 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2992 a[3], true);
228e548e 2993 break;
89bddce5 2994 case SYS_RECVMSG:
e1834a32
DB
2995 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
2996 a[2], true);
89bddce5 2997 break;
a2e27255 2998 case SYS_RECVMMSG:
3ca47e95 2999 if (IS_ENABLED(CONFIG_64BIT))
e11d4284
AB
3000 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
3001 a[2], a[3],
3002 (struct __kernel_timespec __user *)a[4],
3003 NULL);
3004 else
3005 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
3006 a[2], a[3], NULL,
3007 (struct old_timespec32 __user *)a[4]);
a2e27255 3008 break;
de11defe 3009 case SYS_ACCEPT4:
4541e805
DB
3010 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
3011 (int __user *)a[2], a[3]);
aaca0bdc 3012 break;
89bddce5
SH
3013 default:
3014 err = -EINVAL;
3015 break;
1da177e4
LT
3016 }
3017 return err;
3018}
3019
89bddce5 3020#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 3021
55737fda
SH
3022/**
3023 * sock_register - add a socket protocol handler
3024 * @ops: description of protocol
3025 *
1da177e4
LT
3026 * This function is called by a protocol handler that wants to
3027 * advertise its address family, and have it linked into the
e793c0f7 3028 * socket interface. The value ops->family corresponds to the
55737fda 3029 * socket system call protocol family.
1da177e4 3030 */
f0fd27d4 3031int sock_register(const struct net_proto_family *ops)
1da177e4
LT
3032{
3033 int err;
3034
3035 if (ops->family >= NPROTO) {
3410f22e 3036 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
3037 return -ENOBUFS;
3038 }
55737fda
SH
3039
3040 spin_lock(&net_family_lock);
190683a9
ED
3041 if (rcu_dereference_protected(net_families[ops->family],
3042 lockdep_is_held(&net_family_lock)))
55737fda
SH
3043 err = -EEXIST;
3044 else {
cf778b00 3045 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
3046 err = 0;
3047 }
55737fda
SH
3048 spin_unlock(&net_family_lock);
3049
fe0bdbde 3050 pr_info("NET: Registered %s protocol family\n", pf_family_names[ops->family]);
1da177e4
LT
3051 return err;
3052}
c6d409cf 3053EXPORT_SYMBOL(sock_register);
1da177e4 3054
55737fda
SH
3055/**
3056 * sock_unregister - remove a protocol handler
3057 * @family: protocol family to remove
3058 *
1da177e4
LT
3059 * This function is called by a protocol handler that wants to
3060 * remove its address family, and have it unlinked from the
55737fda
SH
3061 * new socket creation.
3062 *
3063 * If protocol handler is a module, then it can use module reference
3064 * counts to protect against new references. If protocol handler is not
3065 * a module then it needs to provide its own protection in
3066 * the ops->create routine.
1da177e4 3067 */
f0fd27d4 3068void sock_unregister(int family)
1da177e4 3069{
f0fd27d4 3070 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 3071
55737fda 3072 spin_lock(&net_family_lock);
a9b3cd7f 3073 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
3074 spin_unlock(&net_family_lock);
3075
3076 synchronize_rcu();
3077
fe0bdbde 3078 pr_info("NET: Unregistered %s protocol family\n", pf_family_names[family]);
1da177e4 3079}
c6d409cf 3080EXPORT_SYMBOL(sock_unregister);
1da177e4 3081
bf2ae2e4
XL
3082bool sock_is_registered(int family)
3083{
66b51b0a 3084 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
3085}
3086
77d76ea3 3087static int __init sock_init(void)
1da177e4 3088{
b3e19d92 3089 int err;
2ca794e5
EB
3090 /*
3091 * Initialize the network sysctl infrastructure.
3092 */
3093 err = net_sysctl_init();
3094 if (err)
3095 goto out;
b3e19d92 3096
1da177e4 3097 /*
89bddce5 3098 * Initialize skbuff SLAB cache
1da177e4
LT
3099 */
3100 skb_init();
1da177e4
LT
3101
3102 /*
89bddce5 3103 * Initialize the protocols module.
1da177e4
LT
3104 */
3105
3106 init_inodecache();
b3e19d92
NP
3107
3108 err = register_filesystem(&sock_fs_type);
3109 if (err)
47260ba9 3110 goto out;
1da177e4 3111 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
3112 if (IS_ERR(sock_mnt)) {
3113 err = PTR_ERR(sock_mnt);
3114 goto out_mount;
3115 }
77d76ea3
AK
3116
3117 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
3118 */
3119
3120#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
3121 err = netfilter_init();
3122 if (err)
3123 goto out;
1da177e4 3124#endif
cbeb321a 3125
408eccce 3126 ptp_classifier_init();
c1f19b51 3127
b3e19d92
NP
3128out:
3129 return err;
3130
3131out_mount:
3132 unregister_filesystem(&sock_fs_type);
b3e19d92 3133 goto out;
1da177e4
LT
3134}
3135
77d76ea3
AK
3136core_initcall(sock_init); /* early initcall */
3137
1da177e4
LT
3138#ifdef CONFIG_PROC_FS
3139void socket_seq_show(struct seq_file *seq)
3140{
648845ab
TZ
3141 seq_printf(seq, "sockets: used %d\n",
3142 sock_inuse_get(seq->private));
1da177e4 3143}
89bddce5 3144#endif /* CONFIG_PROC_FS */
1da177e4 3145
29c49648
AB
3146/* Handle the fact that while struct ifreq has the same *layout* on
3147 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3148 * which are handled elsewhere, it still has different *size* due to
3149 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3150 * resulting in struct ifreq being 32 and 40 bytes respectively).
3151 * As a result, if the struct happens to be at the end of a page and
3152 * the next page isn't readable/writable, we get a fault. To prevent
3153 * that, copy back and forth to the full size.
3154 */
3155int get_user_ifreq(struct ifreq *ifr, void __user **ifrdata, void __user *arg)
7a229387 3156{
29c49648
AB
3157 if (in_compat_syscall()) {
3158 struct compat_ifreq *ifr32 = (struct compat_ifreq *)ifr;
7a229387 3159
29c49648
AB
3160 memset(ifr, 0, sizeof(*ifr));
3161 if (copy_from_user(ifr32, arg, sizeof(*ifr32)))
3162 return -EFAULT;
7a229387 3163
29c49648
AB
3164 if (ifrdata)
3165 *ifrdata = compat_ptr(ifr32->ifr_data);
7a229387 3166
29c49648
AB
3167 return 0;
3168 }
7a229387 3169
29c49648 3170 if (copy_from_user(ifr, arg, sizeof(*ifr)))
7a229387
AB
3171 return -EFAULT;
3172
29c49648
AB
3173 if (ifrdata)
3174 *ifrdata = ifr->ifr_data;
3175
7a229387
AB
3176 return 0;
3177}
29c49648 3178EXPORT_SYMBOL(get_user_ifreq);
7a229387 3179
29c49648 3180int put_user_ifreq(struct ifreq *ifr, void __user *arg)
7a229387 3181{
29c49648 3182 size_t size = sizeof(*ifr);
7a229387 3183
29c49648
AB
3184 if (in_compat_syscall())
3185 size = sizeof(struct compat_ifreq);
7a229387 3186
29c49648 3187 if (copy_to_user(arg, ifr, size))
7a229387
AB
3188 return -EFAULT;
3189
3a7da39d 3190 return 0;
7a229387 3191}
29c49648 3192EXPORT_SYMBOL(put_user_ifreq);
7a229387 3193
89bbfc95 3194#ifdef CONFIG_COMPAT
7a50a240
AB
3195static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3196{
7a50a240 3197 compat_uptr_t uptr32;
44c02a2c
AV
3198 struct ifreq ifr;
3199 void __user *saved;
3200 int err;
7a50a240 3201
29c49648 3202 if (get_user_ifreq(&ifr, NULL, uifr32))
7a50a240
AB
3203 return -EFAULT;
3204
3205 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3206 return -EFAULT;
3207
44c02a2c
AV
3208 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3209 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 3210
a554bf96 3211 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL, NULL);
44c02a2c
AV
3212 if (!err) {
3213 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
29c49648 3214 if (put_user_ifreq(&ifr, uifr32))
44c02a2c 3215 err = -EFAULT;
ccbd6a5a 3216 }
44c02a2c 3217 return err;
7a229387
AB
3218}
3219
590d4693
BH
3220/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3221static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3222 struct compat_ifreq __user *u_ifreq32)
7a229387 3223{
44c02a2c 3224 struct ifreq ifreq;
a554bf96 3225 void __user *data;
7a229387 3226
d0efb162
PC
3227 if (!is_socket_ioctl_cmd(cmd))
3228 return -ENOTTY;
a554bf96 3229 if (get_user_ifreq(&ifreq, &data, u_ifreq32))
7a229387 3230 return -EFAULT;
a554bf96 3231 ifreq.ifr_data = data;
7a229387 3232
a554bf96 3233 return dev_ioctl(net, cmd, &ifreq, data, NULL);
a2116ed2
AB
3234}
3235
7a229387
AB
3236/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3237 * for some operations; this forces use of the newer bridge-utils that
25985edc 3238 * use compatible ioctls
7a229387 3239 */
6b96018b 3240static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3241{
6b96018b 3242 compat_ulong_t tmp;
7a229387 3243
6b96018b 3244 if (get_user(tmp, argp))
7a229387
AB
3245 return -EFAULT;
3246 if (tmp == BRCTL_GET_VERSION)
3247 return BRCTL_VERSION + 1;
3248 return -EINVAL;
3249}
3250
6b96018b
AB
3251static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3252 unsigned int cmd, unsigned long arg)
3253{
3254 void __user *argp = compat_ptr(arg);
3255 struct sock *sk = sock->sk;
3256 struct net *net = sock_net(sk);
7a229387 3257
6b96018b 3258 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
88fc023f 3259 return sock_ioctl(file, cmd, (unsigned long)argp);
6b96018b
AB
3260
3261 switch (cmd) {
3262 case SIOCSIFBR:
3263 case SIOCGIFBR:
3264 return old_bridge_ioctl(argp);
7a50a240
AB
3265 case SIOCWANDEV:
3266 return compat_siocwandev(net, argp);
0768e170
AB
3267 case SIOCGSTAMP_OLD:
3268 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
3269 if (!sock->ops->gettstamp)
3270 return -ENOIOCTLCMD;
0768e170 3271 return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
c7cbdbf2
AB
3272 !COMPAT_USE_64BIT_TIME);
3273
dd98d289 3274 case SIOCETHTOOL:
590d4693
BH
3275 case SIOCBONDSLAVEINFOQUERY:
3276 case SIOCBONDINFOQUERY:
a2116ed2 3277 case SIOCSHWTSTAMP:
fd468c74 3278 case SIOCGHWTSTAMP:
590d4693 3279 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3280
3281 case FIOSETOWN:
3282 case SIOCSPGRP:
3283 case FIOGETOWN:
3284 case SIOCGPGRP:
3285 case SIOCBRADDBR:
3286 case SIOCBRDELBR:
3287 case SIOCGIFVLAN:
3288 case SIOCSIFVLAN:
c62cce2c 3289 case SIOCGSKNS:
0768e170
AB
3290 case SIOCGSTAMP_NEW:
3291 case SIOCGSTAMPNS_NEW:
876f0bf9 3292 case SIOCGIFCONF:
6b96018b
AB
3293 return sock_ioctl(file, cmd, arg);
3294
3295 case SIOCGIFFLAGS:
3296 case SIOCSIFFLAGS:
709566d7
AB
3297 case SIOCGIFMAP:
3298 case SIOCSIFMAP:
6b96018b
AB
3299 case SIOCGIFMETRIC:
3300 case SIOCSIFMETRIC:
3301 case SIOCGIFMTU:
3302 case SIOCSIFMTU:
3303 case SIOCGIFMEM:
3304 case SIOCSIFMEM:
3305 case SIOCGIFHWADDR:
3306 case SIOCSIFHWADDR:
3307 case SIOCADDMULTI:
3308 case SIOCDELMULTI:
3309 case SIOCGIFINDEX:
6b96018b
AB
3310 case SIOCGIFADDR:
3311 case SIOCSIFADDR:
3312 case SIOCSIFHWBROADCAST:
6b96018b 3313 case SIOCDIFADDR:
6b96018b
AB
3314 case SIOCGIFBRDADDR:
3315 case SIOCSIFBRDADDR:
3316 case SIOCGIFDSTADDR:
3317 case SIOCSIFDSTADDR:
3318 case SIOCGIFNETMASK:
3319 case SIOCSIFNETMASK:
3320 case SIOCSIFPFLAGS:
3321 case SIOCGIFPFLAGS:
3322 case SIOCGIFTXQLEN:
3323 case SIOCSIFTXQLEN:
3324 case SIOCBRADDIF:
3325 case SIOCBRDELIF:
c6c9fee3 3326 case SIOCGIFNAME:
9177efd3
AB
3327 case SIOCSIFNAME:
3328 case SIOCGMIIPHY:
3329 case SIOCGMIIREG:
3330 case SIOCSMIIREG:
f92d4fc9
AV
3331 case SIOCBONDENSLAVE:
3332 case SIOCBONDRELEASE:
3333 case SIOCBONDSETHWADDR:
3334 case SIOCBONDCHANGEACTIVE:
6b96018b
AB
3335 case SIOCSARP:
3336 case SIOCGARP:
3337 case SIOCDARP:
c7dc504e 3338 case SIOCOUTQ:
9d7bf41f 3339 case SIOCOUTQNSD:
6b96018b 3340 case SIOCATMARK:
63ff03ab 3341 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3342 }
3343
6b96018b
AB
3344 return -ENOIOCTLCMD;
3345}
7a229387 3346
95c96174 3347static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3348 unsigned long arg)
89bbfc95
SP
3349{
3350 struct socket *sock = file->private_data;
3351 int ret = -ENOIOCTLCMD;
87de87d5
DM
3352 struct sock *sk;
3353 struct net *net;
3354
3355 sk = sock->sk;
3356 net = sock_net(sk);
89bbfc95
SP
3357
3358 if (sock->ops->compat_ioctl)
3359 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3360
87de87d5
DM
3361 if (ret == -ENOIOCTLCMD &&
3362 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3363 ret = compat_wext_handle_ioctl(net, cmd, arg);
3364
6b96018b
AB
3365 if (ret == -ENOIOCTLCMD)
3366 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3367
89bbfc95
SP
3368 return ret;
3369}
3370#endif
3371
8a3c245c
PT
3372/**
3373 * kernel_bind - bind an address to a socket (kernel space)
3374 * @sock: socket
3375 * @addr: address
3376 * @addrlen: length of address
3377 *
3378 * Returns 0 or an error.
3379 */
3380
ac5a488e
SS
3381int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3382{
3383 return sock->ops->bind(sock, addr, addrlen);
3384}
c6d409cf 3385EXPORT_SYMBOL(kernel_bind);
ac5a488e 3386
8a3c245c
PT
3387/**
3388 * kernel_listen - move socket to listening state (kernel space)
3389 * @sock: socket
3390 * @backlog: pending connections queue size
3391 *
3392 * Returns 0 or an error.
3393 */
3394
ac5a488e
SS
3395int kernel_listen(struct socket *sock, int backlog)
3396{
3397 return sock->ops->listen(sock, backlog);
3398}
c6d409cf 3399EXPORT_SYMBOL(kernel_listen);
ac5a488e 3400
8a3c245c
PT
3401/**
3402 * kernel_accept - accept a connection (kernel space)
3403 * @sock: listening socket
3404 * @newsock: new connected socket
3405 * @flags: flags
3406 *
3407 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3408 * If it fails, @newsock is guaranteed to be %NULL.
3409 * Returns 0 or an error.
3410 */
3411
ac5a488e
SS
3412int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3413{
3414 struct sock *sk = sock->sk;
3415 int err;
3416
3417 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3418 newsock);
3419 if (err < 0)
3420 goto done;
3421
cdfbabfb 3422 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3423 if (err < 0) {
3424 sock_release(*newsock);
fa8705b0 3425 *newsock = NULL;
ac5a488e
SS
3426 goto done;
3427 }
3428
3429 (*newsock)->ops = sock->ops;
1b08534e 3430 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3431
3432done:
3433 return err;
3434}
c6d409cf 3435EXPORT_SYMBOL(kernel_accept);
ac5a488e 3436
8a3c245c
PT
3437/**
3438 * kernel_connect - connect a socket (kernel space)
3439 * @sock: socket
3440 * @addr: address
3441 * @addrlen: address length
3442 * @flags: flags (O_NONBLOCK, ...)
3443 *
f1dcffcc 3444 * For datagram sockets, @addr is the address to which datagrams are sent
8a3c245c
PT
3445 * by default, and the only address from which datagrams are received.
3446 * For stream sockets, attempts to connect to @addr.
3447 * Returns 0 or an error code.
3448 */
3449
ac5a488e 3450int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3451 int flags)
ac5a488e
SS
3452{
3453 return sock->ops->connect(sock, addr, addrlen, flags);
3454}
c6d409cf 3455EXPORT_SYMBOL(kernel_connect);
ac5a488e 3456
8a3c245c
PT
3457/**
3458 * kernel_getsockname - get the address which the socket is bound (kernel space)
3459 * @sock: socket
3460 * @addr: address holder
3461 *
3462 * Fills the @addr pointer with the address which the socket is bound.
3463 * Returns 0 or an error code.
3464 */
3465
9b2c45d4 3466int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3467{
9b2c45d4 3468 return sock->ops->getname(sock, addr, 0);
ac5a488e 3469}
c6d409cf 3470EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3471
8a3c245c 3472/**
645f0897 3473 * kernel_getpeername - get the address which the socket is connected (kernel space)
8a3c245c
PT
3474 * @sock: socket
3475 * @addr: address holder
3476 *
3477 * Fills the @addr pointer with the address which the socket is connected.
3478 * Returns 0 or an error code.
3479 */
3480
9b2c45d4 3481int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3482{
9b2c45d4 3483 return sock->ops->getname(sock, addr, 1);
ac5a488e 3484}
c6d409cf 3485EXPORT_SYMBOL(kernel_getpeername);
ac5a488e 3486
8a3c245c
PT
3487/**
3488 * kernel_sendpage - send a &page through a socket (kernel space)
3489 * @sock: socket
3490 * @page: page
3491 * @offset: page offset
3492 * @size: total size in bytes
3493 * @flags: flags (MSG_DONTWAIT, ...)
3494 *
3495 * Returns the total amount sent in bytes or an error.
3496 */
3497
ac5a488e
SS
3498int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3499 size_t size, int flags)
3500{
7b62d31d
CL
3501 if (sock->ops->sendpage) {
3502 /* Warn in case the improper page to zero-copy send */
3503 WARN_ONCE(!sendpage_ok(page), "improper page for zero-copy send");
ac5a488e 3504 return sock->ops->sendpage(sock, page, offset, size, flags);
7b62d31d 3505 }
ac5a488e
SS
3506 return sock_no_sendpage(sock, page, offset, size, flags);
3507}
c6d409cf 3508EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3509
8a3c245c
PT
3510/**
3511 * kernel_sendpage_locked - send a &page through the locked sock (kernel space)
3512 * @sk: sock
3513 * @page: page
3514 * @offset: page offset
3515 * @size: total size in bytes
3516 * @flags: flags (MSG_DONTWAIT, ...)
3517 *
3518 * Returns the total amount sent in bytes or an error.
3519 * Caller must hold @sk.
3520 */
3521
306b13eb
TH
3522int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3523 size_t size, int flags)
3524{
3525 struct socket *sock = sk->sk_socket;
3526
3527 if (sock->ops->sendpage_locked)
3528 return sock->ops->sendpage_locked(sk, page, offset, size,
3529 flags);
3530
3531 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3532}
3533EXPORT_SYMBOL(kernel_sendpage_locked);
3534
8a3c245c 3535/**
645f0897 3536 * kernel_sock_shutdown - shut down part of a full-duplex connection (kernel space)
8a3c245c
PT
3537 * @sock: socket
3538 * @how: connection part
3539 *
3540 * Returns 0 or an error.
3541 */
3542
91cf45f0
TM
3543int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3544{
3545 return sock->ops->shutdown(sock, how);
3546}
91cf45f0 3547EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075 3548
8a3c245c
PT
3549/**
3550 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3551 * @sk: socket
3552 *
3553 * This routine returns the IP overhead imposed by a socket i.e.
3554 * the length of the underlying IP header, depending on whether
3555 * this is an IPv4 or IPv6 socket and the length from IP options turned
3556 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075 3557 */
8a3c245c 3558
113c3075
P
3559u32 kernel_sock_ip_overhead(struct sock *sk)
3560{
3561 struct inet_sock *inet;
3562 struct ip_options_rcu *opt;
3563 u32 overhead = 0;
113c3075
P
3564#if IS_ENABLED(CONFIG_IPV6)
3565 struct ipv6_pinfo *np;
3566 struct ipv6_txoptions *optv6 = NULL;
3567#endif /* IS_ENABLED(CONFIG_IPV6) */
3568
3569 if (!sk)
3570 return overhead;
3571
113c3075
P
3572 switch (sk->sk_family) {
3573 case AF_INET:
3574 inet = inet_sk(sk);
3575 overhead += sizeof(struct iphdr);
3576 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3577 sock_owned_by_user(sk));
113c3075
P
3578 if (opt)
3579 overhead += opt->opt.optlen;
3580 return overhead;
3581#if IS_ENABLED(CONFIG_IPV6)
3582 case AF_INET6:
3583 np = inet6_sk(sk);
3584 overhead += sizeof(struct ipv6hdr);
3585 if (np)
3586 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3587 sock_owned_by_user(sk));
113c3075
P
3588 if (optv6)
3589 overhead += (optv6->opt_flen + optv6->opt_nflen);
3590 return overhead;
3591#endif /* IS_ENABLED(CONFIG_IPV6) */
3592 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3593 return overhead;
3594 }
3595}
3596EXPORT_SYMBOL(kernel_sock_ip_overhead);