ALSA: hda - Update descriptions about new position_fix values
[linux-2.6-block.git] / net / socket.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * NET An implementation of the SOCKET network access protocol.
4 *
5 * Version: @(#)socket.c 1.1.93 18/02/95
6 *
7 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 8 * Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 *
11 * Fixes:
12 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
13 * shutdown()
14 * Alan Cox : verify_area() fixes
15 * Alan Cox : Removed DDI
16 * Jonathan Kamens : SOCK_DGRAM reconnect bug
17 * Alan Cox : Moved a load of checks to the very
18 * top level.
19 * Alan Cox : Move address structures to/from user
20 * mode above the protocol layers.
21 * Rob Janssen : Allow 0 length sends.
22 * Alan Cox : Asynchronous I/O support (cribbed from the
23 * tty drivers).
24 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
25 * Jeff Uphoff : Made max number of sockets command-line
26 * configurable.
27 * Matti Aarnio : Made the number of sockets dynamic,
28 * to be allocated when needed, and mr.
29 * Uphoff's max is used as max to be
30 * allowed to allocate.
31 * Linus : Argh. removed all the socket allocation
32 * altogether: it's in the inode now.
33 * Alan Cox : Made sock_alloc()/sock_release() public
34 * for NetROM and future kernel nfsd type
35 * stuff.
36 * Alan Cox : sendmsg/recvmsg basics.
37 * Tom Dyas : Export net symbols.
38 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
39 * Alan Cox : Added thread locking to sys_* calls
40 * for sockets. May have errors at the
41 * moment.
42 * Kevin Buhr : Fixed the dumb errors in the above.
43 * Andi Kleen : Some small cleanups, optimizations,
44 * and fixed a copy_from_user() bug.
45 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 46 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
47 * protocol-independent
48 *
1da177e4 49 * This module is effectively the top level interface to the BSD socket
89bddce5 50 * paradigm.
1da177e4
LT
51 *
52 * Based upon Swansea University Computer Society NET3.039
53 */
54
1da177e4 55#include <linux/mm.h>
1da177e4
LT
56#include <linux/socket.h>
57#include <linux/file.h>
58#include <linux/net.h>
59#include <linux/interrupt.h>
aaca0bdc 60#include <linux/thread_info.h>
55737fda 61#include <linux/rcupdate.h>
1da177e4
LT
62#include <linux/netdevice.h>
63#include <linux/proc_fs.h>
64#include <linux/seq_file.h>
4a3e2f71 65#include <linux/mutex.h>
1da177e4 66#include <linux/if_bridge.h>
20380731
ACM
67#include <linux/if_frad.h>
68#include <linux/if_vlan.h>
408eccce 69#include <linux/ptp_classify.h>
1da177e4
LT
70#include <linux/init.h>
71#include <linux/poll.h>
72#include <linux/cache.h>
73#include <linux/module.h>
74#include <linux/highmem.h>
1da177e4 75#include <linux/mount.h>
fba9be49 76#include <linux/pseudo_fs.h>
1da177e4
LT
77#include <linux/security.h>
78#include <linux/syscalls.h>
79#include <linux/compat.h>
80#include <linux/kmod.h>
3ec3b2fb 81#include <linux/audit.h>
d86b5e0e 82#include <linux/wireless.h>
1b8d7ae4 83#include <linux/nsproxy.h>
1fd7317d 84#include <linux/magic.h>
5a0e3ad6 85#include <linux/slab.h>
600e1779 86#include <linux/xattr.h>
c8e8cd57 87#include <linux/nospec.h>
8c3c447b 88#include <linux/indirect_call_wrapper.h>
1da177e4 89
7c0f6ba6 90#include <linux/uaccess.h>
1da177e4
LT
91#include <asm/unistd.h>
92
93#include <net/compat.h>
87de87d5 94#include <net/wext.h>
f8451725 95#include <net/cls_cgroup.h>
1da177e4
LT
96
97#include <net/sock.h>
98#include <linux/netfilter.h>
99
6b96018b
AB
100#include <linux/if_tun.h>
101#include <linux/ipv6_route.h>
102#include <linux/route.h>
6b96018b 103#include <linux/sockios.h>
076bb0c8 104#include <net/busy_poll.h>
f24b9be5 105#include <linux/errqueue.h>
06021292 106
e0d1095a 107#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
108unsigned int sysctl_net_busy_read __read_mostly;
109unsigned int sysctl_net_busy_poll __read_mostly;
06021292 110#endif
6b96018b 111
8ae5e030
AV
112static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
113static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 114static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
115
116static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
117static __poll_t sock_poll(struct file *file,
118 struct poll_table_struct *wait);
89bddce5 119static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
120#ifdef CONFIG_COMPAT
121static long compat_sock_ioctl(struct file *file,
89bddce5 122 unsigned int cmd, unsigned long arg);
89bbfc95 123#endif
1da177e4 124static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
125static ssize_t sock_sendpage(struct file *file, struct page *page,
126 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 127static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 128 struct pipe_inode_info *pipe, size_t len,
9c55e01c 129 unsigned int flags);
1da177e4 130
1da177e4
LT
131/*
132 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
133 * in the operation structures but are done directly via the socketcall() multiplexor.
134 */
135
da7071d7 136static const struct file_operations socket_file_ops = {
1da177e4
LT
137 .owner = THIS_MODULE,
138 .llseek = no_llseek,
8ae5e030
AV
139 .read_iter = sock_read_iter,
140 .write_iter = sock_write_iter,
1da177e4
LT
141 .poll = sock_poll,
142 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
143#ifdef CONFIG_COMPAT
144 .compat_ioctl = compat_sock_ioctl,
145#endif
1da177e4 146 .mmap = sock_mmap,
1da177e4
LT
147 .release = sock_close,
148 .fasync = sock_fasync,
5274f052
JA
149 .sendpage = sock_sendpage,
150 .splice_write = generic_splice_sendpage,
9c55e01c 151 .splice_read = sock_splice_read,
1da177e4
LT
152};
153
154/*
155 * The protocol list. Each protocol is registered in here.
156 */
157
1da177e4 158static DEFINE_SPINLOCK(net_family_lock);
190683a9 159static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 160
1da177e4 161/*
89bddce5
SH
162 * Support routines.
163 * Move socket addresses back and forth across the kernel/user
164 * divide and look after the messy bits.
1da177e4
LT
165 */
166
1da177e4
LT
167/**
168 * move_addr_to_kernel - copy a socket address into kernel space
169 * @uaddr: Address in user space
170 * @kaddr: Address in kernel space
171 * @ulen: Length in user space
172 *
173 * The address is copied into kernel space. If the provided address is
174 * too long an error code of -EINVAL is returned. If the copy gives
175 * invalid addresses -EFAULT is returned. On a success 0 is returned.
176 */
177
43db362d 178int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 179{
230b1839 180 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 181 return -EINVAL;
89bddce5 182 if (ulen == 0)
1da177e4 183 return 0;
89bddce5 184 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 185 return -EFAULT;
3ec3b2fb 186 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
187}
188
189/**
190 * move_addr_to_user - copy an address to user space
191 * @kaddr: kernel space address
192 * @klen: length of address in kernel
193 * @uaddr: user space address
194 * @ulen: pointer to user length field
195 *
196 * The value pointed to by ulen on entry is the buffer length available.
197 * This is overwritten with the buffer space used. -EINVAL is returned
198 * if an overlong buffer is specified or a negative buffer size. -EFAULT
199 * is returned if either the buffer or the length field are not
200 * accessible.
201 * After copying the data up to the limit the user specifies, the true
202 * length of the data is written over the length limit the user
203 * specified. Zero is returned for a success.
204 */
89bddce5 205
43db362d 206static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 207 void __user *uaddr, int __user *ulen)
1da177e4
LT
208{
209 int err;
210 int len;
211
68c6beb3 212 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
213 err = get_user(len, ulen);
214 if (err)
1da177e4 215 return err;
89bddce5
SH
216 if (len > klen)
217 len = klen;
68c6beb3 218 if (len < 0)
1da177e4 219 return -EINVAL;
89bddce5 220 if (len) {
d6fe3945
SG
221 if (audit_sockaddr(klen, kaddr))
222 return -ENOMEM;
89bddce5 223 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
224 return -EFAULT;
225 }
226 /*
89bddce5
SH
227 * "fromlen shall refer to the value before truncation.."
228 * 1003.1g
1da177e4
LT
229 */
230 return __put_user(klen, ulen);
231}
232
08009a76 233static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
234
235static struct inode *sock_alloc_inode(struct super_block *sb)
236{
237 struct socket_alloc *ei;
89bddce5 238
e94b1766 239 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
240 if (!ei)
241 return NULL;
333f7909
AV
242 init_waitqueue_head(&ei->socket.wq.wait);
243 ei->socket.wq.fasync_list = NULL;
244 ei->socket.wq.flags = 0;
89bddce5 245
1da177e4
LT
246 ei->socket.state = SS_UNCONNECTED;
247 ei->socket.flags = 0;
248 ei->socket.ops = NULL;
249 ei->socket.sk = NULL;
250 ei->socket.file = NULL;
1da177e4
LT
251
252 return &ei->vfs_inode;
253}
254
6d7855c5 255static void sock_free_inode(struct inode *inode)
1da177e4 256{
43815482
ED
257 struct socket_alloc *ei;
258
259 ei = container_of(inode, struct socket_alloc, vfs_inode);
43815482 260 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
261}
262
51cc5068 263static void init_once(void *foo)
1da177e4 264{
89bddce5 265 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 266
a35afb83 267 inode_init_once(&ei->vfs_inode);
1da177e4 268}
89bddce5 269
1e911632 270static void init_inodecache(void)
1da177e4
LT
271{
272 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
273 sizeof(struct socket_alloc),
274 0,
275 (SLAB_HWCACHE_ALIGN |
276 SLAB_RECLAIM_ACCOUNT |
5d097056 277 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 278 init_once);
1e911632 279 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
280}
281
b87221de 282static const struct super_operations sockfs_ops = {
c6d409cf 283 .alloc_inode = sock_alloc_inode,
6d7855c5 284 .free_inode = sock_free_inode,
c6d409cf 285 .statfs = simple_statfs,
1da177e4
LT
286};
287
c23fbb6b
ED
288/*
289 * sockfs_dname() is called from d_path().
290 */
291static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
292{
293 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 294 d_inode(dentry)->i_ino);
c23fbb6b
ED
295}
296
3ba13d17 297static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 298 .d_dname = sockfs_dname,
1da177e4
LT
299};
300
bba0bd31
AG
301static int sockfs_xattr_get(const struct xattr_handler *handler,
302 struct dentry *dentry, struct inode *inode,
303 const char *suffix, void *value, size_t size)
304{
305 if (value) {
306 if (dentry->d_name.len + 1 > size)
307 return -ERANGE;
308 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
309 }
310 return dentry->d_name.len + 1;
311}
312
313#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
314#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
315#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
316
317static const struct xattr_handler sockfs_xattr_handler = {
318 .name = XATTR_NAME_SOCKPROTONAME,
319 .get = sockfs_xattr_get,
320};
321
4a590153
AG
322static int sockfs_security_xattr_set(const struct xattr_handler *handler,
323 struct dentry *dentry, struct inode *inode,
324 const char *suffix, const void *value,
325 size_t size, int flags)
326{
327 /* Handled by LSM. */
328 return -EAGAIN;
329}
330
331static const struct xattr_handler sockfs_security_xattr_handler = {
332 .prefix = XATTR_SECURITY_PREFIX,
333 .set = sockfs_security_xattr_set,
334};
335
bba0bd31
AG
336static const struct xattr_handler *sockfs_xattr_handlers[] = {
337 &sockfs_xattr_handler,
4a590153 338 &sockfs_security_xattr_handler,
bba0bd31
AG
339 NULL
340};
341
fba9be49 342static int sockfs_init_fs_context(struct fs_context *fc)
c74a1cbb 343{
fba9be49
DH
344 struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
345 if (!ctx)
346 return -ENOMEM;
347 ctx->ops = &sockfs_ops;
348 ctx->dops = &sockfs_dentry_operations;
349 ctx->xattr = sockfs_xattr_handlers;
350 return 0;
c74a1cbb
AV
351}
352
353static struct vfsmount *sock_mnt __read_mostly;
354
355static struct file_system_type sock_fs_type = {
356 .name = "sockfs",
fba9be49 357 .init_fs_context = sockfs_init_fs_context,
c74a1cbb
AV
358 .kill_sb = kill_anon_super,
359};
360
1da177e4
LT
361/*
362 * Obtains the first available file descriptor and sets it up for use.
363 *
39d8c1b6
DM
364 * These functions create file structures and maps them to fd space
365 * of the current process. On success it returns file descriptor
1da177e4
LT
366 * and file struct implicitly stored in sock->file.
367 * Note that another thread may close file descriptor before we return
368 * from this function. We use the fact that now we do not refer
369 * to socket after mapping. If one day we will need it, this
370 * function will increment ref. count on file by 1.
371 *
372 * In any case returned fd MAY BE not valid!
373 * This race condition is unavoidable
374 * with shared fd spaces, we cannot solve it inside kernel,
375 * but we take care of internal coherence yet.
376 */
377
8a3c245c
PT
378/**
379 * sock_alloc_file - Bind a &socket to a &file
380 * @sock: socket
381 * @flags: file status flags
382 * @dname: protocol name
383 *
384 * Returns the &file bound with @sock, implicitly storing it
385 * in sock->file. If dname is %NULL, sets to "".
386 * On failure the return is a ERR pointer (see linux/err.h).
387 * This function uses GFP_KERNEL internally.
388 */
389
aab174f0 390struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 391{
7cbe66b6 392 struct file *file;
1da177e4 393
d93aa9d8
AV
394 if (!dname)
395 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 396
d93aa9d8
AV
397 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
398 O_RDWR | (flags & O_NONBLOCK),
399 &socket_file_ops);
b5ffe634 400 if (IS_ERR(file)) {
8e1611e2 401 sock_release(sock);
39b65252 402 return file;
cc3808f8
AV
403 }
404
405 sock->file = file;
39d8c1b6 406 file->private_data = sock;
28407630 407 return file;
39d8c1b6 408}
56b31d1c 409EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 410
56b31d1c 411static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
412{
413 struct file *newfile;
28407630 414 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
415 if (unlikely(fd < 0)) {
416 sock_release(sock);
28407630 417 return fd;
ce4bb04c 418 }
39d8c1b6 419
aab174f0 420 newfile = sock_alloc_file(sock, flags, NULL);
4546e44c 421 if (!IS_ERR(newfile)) {
39d8c1b6 422 fd_install(fd, newfile);
28407630
AV
423 return fd;
424 }
7cbe66b6 425
28407630
AV
426 put_unused_fd(fd);
427 return PTR_ERR(newfile);
1da177e4
LT
428}
429
8a3c245c
PT
430/**
431 * sock_from_file - Return the &socket bounded to @file.
432 * @file: file
433 * @err: pointer to an error code return
434 *
435 * On failure returns %NULL and assigns -ENOTSOCK to @err.
436 */
437
406a3c63 438struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 439{
6cb153ca
BL
440 if (file->f_op == &socket_file_ops)
441 return file->private_data; /* set in sock_map_fd */
442
23bb80d2
ED
443 *err = -ENOTSOCK;
444 return NULL;
6cb153ca 445}
406a3c63 446EXPORT_SYMBOL(sock_from_file);
6cb153ca 447
1da177e4 448/**
c6d409cf 449 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
450 * @fd: file handle
451 * @err: pointer to an error code return
452 *
453 * The file handle passed in is locked and the socket it is bound
241c4667 454 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
455 * with a negative errno code and NULL is returned. The function checks
456 * for both invalid handles and passing a handle which is not a socket.
457 *
458 * On a success the socket object pointer is returned.
459 */
460
461struct socket *sockfd_lookup(int fd, int *err)
462{
463 struct file *file;
1da177e4
LT
464 struct socket *sock;
465
89bddce5
SH
466 file = fget(fd);
467 if (!file) {
1da177e4
LT
468 *err = -EBADF;
469 return NULL;
470 }
89bddce5 471
6cb153ca
BL
472 sock = sock_from_file(file, err);
473 if (!sock)
1da177e4 474 fput(file);
6cb153ca
BL
475 return sock;
476}
c6d409cf 477EXPORT_SYMBOL(sockfd_lookup);
1da177e4 478
6cb153ca
BL
479static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
480{
00e188ef 481 struct fd f = fdget(fd);
6cb153ca
BL
482 struct socket *sock;
483
3672558c 484 *err = -EBADF;
00e188ef
AV
485 if (f.file) {
486 sock = sock_from_file(f.file, err);
487 if (likely(sock)) {
488 *fput_needed = f.flags;
6cb153ca 489 return sock;
00e188ef
AV
490 }
491 fdput(f);
1da177e4 492 }
6cb153ca 493 return NULL;
1da177e4
LT
494}
495
600e1779
MY
496static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
497 size_t size)
498{
499 ssize_t len;
500 ssize_t used = 0;
501
c5ef6035 502 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
503 if (len < 0)
504 return len;
505 used += len;
506 if (buffer) {
507 if (size < used)
508 return -ERANGE;
509 buffer += len;
510 }
511
512 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
513 used += len;
514 if (buffer) {
515 if (size < used)
516 return -ERANGE;
517 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
518 buffer += len;
519 }
520
521 return used;
522}
523
dc647ec8 524static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
86741ec2
LC
525{
526 int err = simple_setattr(dentry, iattr);
527
e1a3a60a 528 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
529 struct socket *sock = SOCKET_I(d_inode(dentry));
530
6d8c50dc
CW
531 if (sock->sk)
532 sock->sk->sk_uid = iattr->ia_uid;
533 else
534 err = -ENOENT;
86741ec2
LC
535 }
536
537 return err;
538}
539
600e1779 540static const struct inode_operations sockfs_inode_ops = {
600e1779 541 .listxattr = sockfs_listxattr,
86741ec2 542 .setattr = sockfs_setattr,
600e1779
MY
543};
544
1da177e4 545/**
8a3c245c 546 * sock_alloc - allocate a socket
89bddce5 547 *
1da177e4
LT
548 * Allocate a new inode and socket object. The two are bound together
549 * and initialised. The socket is then returned. If we are out of inodes
8a3c245c 550 * NULL is returned. This functions uses GFP_KERNEL internally.
1da177e4
LT
551 */
552
f4a00aac 553struct socket *sock_alloc(void)
1da177e4 554{
89bddce5
SH
555 struct inode *inode;
556 struct socket *sock;
1da177e4 557
a209dfc7 558 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
559 if (!inode)
560 return NULL;
561
562 sock = SOCKET_I(inode);
563
85fe4025 564 inode->i_ino = get_next_ino();
89bddce5 565 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
566 inode->i_uid = current_fsuid();
567 inode->i_gid = current_fsgid();
600e1779 568 inode->i_op = &sockfs_inode_ops;
1da177e4 569
1da177e4
LT
570 return sock;
571}
f4a00aac 572EXPORT_SYMBOL(sock_alloc);
1da177e4 573
1da177e4 574/**
8a3c245c 575 * sock_release - close a socket
1da177e4
LT
576 * @sock: socket to close
577 *
578 * The socket is released from the protocol stack if it has a release
579 * callback, and the inode is then released if the socket is bound to
89bddce5 580 * an inode not a file.
1da177e4 581 */
89bddce5 582
6d8c50dc 583static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
584{
585 if (sock->ops) {
586 struct module *owner = sock->ops->owner;
587
6d8c50dc
CW
588 if (inode)
589 inode_lock(inode);
1da177e4 590 sock->ops->release(sock);
ff7b11aa 591 sock->sk = NULL;
6d8c50dc
CW
592 if (inode)
593 inode_unlock(inode);
1da177e4
LT
594 sock->ops = NULL;
595 module_put(owner);
596 }
597
333f7909 598 if (sock->wq.fasync_list)
3410f22e 599 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 600
1da177e4
LT
601 if (!sock->file) {
602 iput(SOCK_INODE(sock));
603 return;
604 }
89bddce5 605 sock->file = NULL;
1da177e4 606}
6d8c50dc
CW
607
608void sock_release(struct socket *sock)
609{
610 __sock_release(sock, NULL);
611}
c6d409cf 612EXPORT_SYMBOL(sock_release);
1da177e4 613
c14ac945 614void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 615{
140c55d4
ED
616 u8 flags = *tx_flags;
617
c14ac945 618 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
619 flags |= SKBTX_HW_TSTAMP;
620
c14ac945 621 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
622 flags |= SKBTX_SW_TSTAMP;
623
c14ac945 624 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
625 flags |= SKBTX_SCHED_TSTAMP;
626
140c55d4 627 *tx_flags = flags;
20d49473 628}
67cc0d40 629EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 630
8c3c447b
PA
631INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
632 size_t));
a648a592
PA
633INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
634 size_t));
d8725c86 635static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 636{
a648a592
PA
637 int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
638 inet_sendmsg, sock, msg,
639 msg_data_left(msg));
d8725c86
AV
640 BUG_ON(ret == -EIOCBQUEUED);
641 return ret;
1da177e4
LT
642}
643
85806af0
RD
644/**
645 * sock_sendmsg - send a message through @sock
646 * @sock: socket
647 * @msg: message to send
648 *
649 * Sends @msg through @sock, passing through LSM.
650 * Returns the number of bytes sent, or an error code.
651 */
d8725c86 652int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 653{
d8725c86 654 int err = security_socket_sendmsg(sock, msg,
01e97e65 655 msg_data_left(msg));
228e548e 656
d8725c86 657 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 658}
c6d409cf 659EXPORT_SYMBOL(sock_sendmsg);
1da177e4 660
8a3c245c
PT
661/**
662 * kernel_sendmsg - send a message through @sock (kernel-space)
663 * @sock: socket
664 * @msg: message header
665 * @vec: kernel vec
666 * @num: vec array length
667 * @size: total message data size
668 *
669 * Builds the message data with @vec and sends it through @sock.
670 * Returns the number of bytes sent, or an error code.
671 */
672
1da177e4
LT
673int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
674 struct kvec *vec, size_t num, size_t size)
675{
aa563d7b 676 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
d8725c86 677 return sock_sendmsg(sock, msg);
1da177e4 678}
c6d409cf 679EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 680
8a3c245c
PT
681/**
682 * kernel_sendmsg_locked - send a message through @sock (kernel-space)
683 * @sk: sock
684 * @msg: message header
685 * @vec: output s/g array
686 * @num: output s/g array length
687 * @size: total message data size
688 *
689 * Builds the message data with @vec and sends it through @sock.
690 * Returns the number of bytes sent, or an error code.
691 * Caller must hold @sk.
692 */
693
306b13eb
TH
694int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
695 struct kvec *vec, size_t num, size_t size)
696{
697 struct socket *sock = sk->sk_socket;
698
699 if (!sock->ops->sendmsg_locked)
db5980d8 700 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 701
aa563d7b 702 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
306b13eb
TH
703
704 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
705}
706EXPORT_SYMBOL(kernel_sendmsg_locked);
707
8605330a
SHY
708static bool skb_is_err_queue(const struct sk_buff *skb)
709{
710 /* pkt_type of skbs enqueued on the error queue are set to
711 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
712 * in recvmsg, since skbs received on a local socket will never
713 * have a pkt_type of PACKET_OUTGOING.
714 */
715 return skb->pkt_type == PACKET_OUTGOING;
716}
717
b50a5c70
ML
718/* On transmit, software and hardware timestamps are returned independently.
719 * As the two skb clones share the hardware timestamp, which may be updated
720 * before the software timestamp is received, a hardware TX timestamp may be
721 * returned only if there is no software TX timestamp. Ignore false software
722 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 723 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
724 * hardware timestamp.
725 */
726static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
727{
728 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
729}
730
aad9c8c4
ML
731static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
732{
733 struct scm_ts_pktinfo ts_pktinfo;
734 struct net_device *orig_dev;
735
736 if (!skb_mac_header_was_set(skb))
737 return;
738
739 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
740
741 rcu_read_lock();
742 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
743 if (orig_dev)
744 ts_pktinfo.if_index = orig_dev->ifindex;
745 rcu_read_unlock();
746
747 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
748 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
749 sizeof(ts_pktinfo), &ts_pktinfo);
750}
751
92f37fd2
ED
752/*
753 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
754 */
755void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
756 struct sk_buff *skb)
757{
20d49473 758 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 759 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e
DD
760 struct scm_timestamping_internal tss;
761
b50a5c70 762 int empty = 1, false_tstamp = 0;
20d49473
PO
763 struct skb_shared_hwtstamps *shhwtstamps =
764 skb_hwtstamps(skb);
765
766 /* Race occurred between timestamp enabling and packet
767 receiving. Fill in the current time for now. */
b50a5c70 768 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 769 __net_timestamp(skb);
b50a5c70
ML
770 false_tstamp = 1;
771 }
20d49473
PO
772
773 if (need_software_tstamp) {
774 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
775 if (new_tstamp) {
776 struct __kernel_sock_timeval tv;
777
778 skb_get_new_timestamp(skb, &tv);
779 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
780 sizeof(tv), &tv);
781 } else {
782 struct __kernel_old_timeval tv;
783
784 skb_get_timestamp(skb, &tv);
785 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
786 sizeof(tv), &tv);
787 }
20d49473 788 } else {
887feae3
DD
789 if (new_tstamp) {
790 struct __kernel_timespec ts;
791
792 skb_get_new_timestampns(skb, &ts);
793 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
794 sizeof(ts), &ts);
795 } else {
796 struct timespec ts;
797
798 skb_get_timestampns(skb, &ts);
799 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
800 sizeof(ts), &ts);
801 }
20d49473
PO
802 }
803 }
804
f24b9be5 805 memset(&tss, 0, sizeof(tss));
c199105d 806 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 807 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 808 empty = 0;
4d276eb6 809 if (shhwtstamps &&
b9f40e21 810 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
b50a5c70 811 !skb_is_swtx_tstamp(skb, false_tstamp) &&
9718475e 812 ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
4d276eb6 813 empty = 0;
aad9c8c4
ML
814 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
815 !skb_is_err_queue(skb))
816 put_ts_pktinfo(msg, skb);
817 }
1c885808 818 if (!empty) {
9718475e
DD
819 if (sock_flag(sk, SOCK_TSTAMP_NEW))
820 put_cmsg_scm_timestamping64(msg, &tss);
821 else
822 put_cmsg_scm_timestamping(msg, &tss);
1c885808 823
8605330a 824 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 825 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
826 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
827 skb->len, skb->data);
828 }
92f37fd2 829}
7c81fd8b
ACM
830EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
831
6e3e939f
JB
832void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
833 struct sk_buff *skb)
834{
835 int ack;
836
837 if (!sock_flag(sk, SOCK_WIFI_STATUS))
838 return;
839 if (!skb->wifi_acked_valid)
840 return;
841
842 ack = skb->wifi_acked;
843
844 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
845}
846EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
847
11165f14 848static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
849 struct sk_buff *skb)
3b885787 850{
744d5a3e 851 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 852 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 853 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
854}
855
767dd033 856void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
857 struct sk_buff *skb)
858{
859 sock_recv_timestamp(msg, sk, skb);
860 sock_recv_drops(msg, sk, skb);
861}
767dd033 862EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 863
8c3c447b 864INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
a648a592
PA
865 size_t, int));
866INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
867 size_t, int));
1b784140 868static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 869 int flags)
1da177e4 870{
a648a592
PA
871 return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
872 inet_recvmsg, sock, msg, msg_data_left(msg),
873 flags);
1da177e4
LT
874}
875
85806af0
RD
876/**
877 * sock_recvmsg - receive a message from @sock
878 * @sock: socket
879 * @msg: message to receive
880 * @flags: message flags
881 *
882 * Receives @msg from @sock, passing through LSM. Returns the total number
883 * of bytes received, or an error.
884 */
2da62906 885int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 886{
2da62906 887 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 888
2da62906 889 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 890}
c6d409cf 891EXPORT_SYMBOL(sock_recvmsg);
1da177e4 892
c1249c0a 893/**
8a3c245c
PT
894 * kernel_recvmsg - Receive a message from a socket (kernel space)
895 * @sock: The socket to receive the message from
896 * @msg: Received message
897 * @vec: Input s/g array for message data
898 * @num: Size of input s/g array
899 * @size: Number of bytes to read
900 * @flags: Message flags (MSG_DONTWAIT, etc...)
c1249c0a 901 *
8a3c245c
PT
902 * On return the msg structure contains the scatter/gather array passed in the
903 * vec argument. The array is modified so that it consists of the unfilled
904 * portion of the original array.
c1249c0a 905 *
8a3c245c 906 * The returned value is the total number of bytes received, or an error.
c1249c0a 907 */
8a3c245c 908
89bddce5
SH
909int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
910 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
911{
912 mm_segment_t oldfs = get_fs();
913 int result;
914
aa563d7b 915 iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
1da177e4 916 set_fs(KERNEL_DS);
2da62906 917 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
918 set_fs(oldfs);
919 return result;
920}
c6d409cf 921EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 922
ce1d4d3e
CH
923static ssize_t sock_sendpage(struct file *file, struct page *page,
924 int offset, size_t size, loff_t *ppos, int more)
1da177e4 925{
1da177e4
LT
926 struct socket *sock;
927 int flags;
928
ce1d4d3e
CH
929 sock = file->private_data;
930
35f9c09f
ED
931 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
932 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
933 flags |= more;
ce1d4d3e 934
e6949583 935 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 936}
1da177e4 937
9c55e01c 938static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 939 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
940 unsigned int flags)
941{
942 struct socket *sock = file->private_data;
943
997b37da 944 if (unlikely(!sock->ops->splice_read))
95506588 945 return generic_file_splice_read(file, ppos, pipe, len, flags);
997b37da 946
9c55e01c
JA
947 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
948}
949
8ae5e030 950static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 951{
6d652330
AV
952 struct file *file = iocb->ki_filp;
953 struct socket *sock = file->private_data;
0345f931 954 struct msghdr msg = {.msg_iter = *to,
955 .msg_iocb = iocb};
8ae5e030 956 ssize_t res;
ce1d4d3e 957
8ae5e030
AV
958 if (file->f_flags & O_NONBLOCK)
959 msg.msg_flags = MSG_DONTWAIT;
960
961 if (iocb->ki_pos != 0)
1da177e4 962 return -ESPIPE;
027445c3 963
66ee59af 964 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
965 return 0;
966
2da62906 967 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
968 *to = msg.msg_iter;
969 return res;
1da177e4
LT
970}
971
8ae5e030 972static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 973{
6d652330
AV
974 struct file *file = iocb->ki_filp;
975 struct socket *sock = file->private_data;
0345f931 976 struct msghdr msg = {.msg_iter = *from,
977 .msg_iocb = iocb};
8ae5e030 978 ssize_t res;
1da177e4 979
8ae5e030 980 if (iocb->ki_pos != 0)
ce1d4d3e 981 return -ESPIPE;
027445c3 982
8ae5e030
AV
983 if (file->f_flags & O_NONBLOCK)
984 msg.msg_flags = MSG_DONTWAIT;
985
6d652330
AV
986 if (sock->type == SOCK_SEQPACKET)
987 msg.msg_flags |= MSG_EOR;
988
d8725c86 989 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
990 *from = msg.msg_iter;
991 return res;
1da177e4
LT
992}
993
1da177e4
LT
994/*
995 * Atomic setting of ioctl hooks to avoid race
996 * with module unload.
997 */
998
4a3e2f71 999static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 1000static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 1001
881d966b 1002void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 1003{
4a3e2f71 1004 mutex_lock(&br_ioctl_mutex);
1da177e4 1005 br_ioctl_hook = hook;
4a3e2f71 1006 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1007}
1008EXPORT_SYMBOL(brioctl_set);
1009
4a3e2f71 1010static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1011static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1012
881d966b 1013void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1014{
4a3e2f71 1015 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1016 vlan_ioctl_hook = hook;
4a3e2f71 1017 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1018}
1019EXPORT_SYMBOL(vlan_ioctl_set);
1020
4a3e2f71 1021static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 1022static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 1023
89bddce5 1024void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 1025{
4a3e2f71 1026 mutex_lock(&dlci_ioctl_mutex);
1da177e4 1027 dlci_ioctl_hook = hook;
4a3e2f71 1028 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1029}
1030EXPORT_SYMBOL(dlci_ioctl_set);
1031
6b96018b 1032static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 1033 unsigned int cmd, unsigned long arg)
6b96018b
AB
1034{
1035 int err;
1036 void __user *argp = (void __user *)arg;
1037
1038 err = sock->ops->ioctl(sock, cmd, arg);
1039
1040 /*
1041 * If this ioctl is unknown try to hand it down
1042 * to the NIC driver.
1043 */
36fd633e
AV
1044 if (err != -ENOIOCTLCMD)
1045 return err;
6b96018b 1046
36fd633e
AV
1047 if (cmd == SIOCGIFCONF) {
1048 struct ifconf ifc;
1049 if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
1050 return -EFAULT;
1051 rtnl_lock();
1052 err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
1053 rtnl_unlock();
1054 if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
1055 err = -EFAULT;
44c02a2c
AV
1056 } else {
1057 struct ifreq ifr;
1058 bool need_copyout;
63ff03ab 1059 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
44c02a2c
AV
1060 return -EFAULT;
1061 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1062 if (!err && need_copyout)
63ff03ab 1063 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
44c02a2c 1064 return -EFAULT;
36fd633e 1065 }
6b96018b
AB
1066 return err;
1067}
1068
1da177e4
LT
1069/*
1070 * With an ioctl, arg may well be a user mode pointer, but we don't know
1071 * what to do with it - that's up to the protocol still.
1072 */
1073
8a3c245c
PT
1074/**
1075 * get_net_ns - increment the refcount of the network namespace
1076 * @ns: common namespace (net)
1077 *
1078 * Returns the net's common namespace.
1079 */
1080
d8d211a2 1081struct ns_common *get_net_ns(struct ns_common *ns)
c62cce2c
AV
1082{
1083 return &get_net(container_of(ns, struct net, ns))->ns;
1084}
d8d211a2 1085EXPORT_SYMBOL_GPL(get_net_ns);
c62cce2c 1086
1da177e4
LT
1087static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1088{
1089 struct socket *sock;
881d966b 1090 struct sock *sk;
1da177e4
LT
1091 void __user *argp = (void __user *)arg;
1092 int pid, err;
881d966b 1093 struct net *net;
1da177e4 1094
b69aee04 1095 sock = file->private_data;
881d966b 1096 sk = sock->sk;
3b1e0a65 1097 net = sock_net(sk);
44c02a2c
AV
1098 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1099 struct ifreq ifr;
1100 bool need_copyout;
1101 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
1102 return -EFAULT;
1103 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1104 if (!err && need_copyout)
1105 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
1106 return -EFAULT;
1da177e4 1107 } else
3d23e349 1108#ifdef CONFIG_WEXT_CORE
1da177e4 1109 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1110 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1111 } else
3d23e349 1112#endif
89bddce5 1113 switch (cmd) {
1da177e4
LT
1114 case FIOSETOWN:
1115 case SIOCSPGRP:
1116 err = -EFAULT;
1117 if (get_user(pid, (int __user *)argp))
1118 break;
393cc3f5 1119 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1120 break;
1121 case FIOGETOWN:
1122 case SIOCGPGRP:
609d7fa9 1123 err = put_user(f_getown(sock->file),
89bddce5 1124 (int __user *)argp);
1da177e4
LT
1125 break;
1126 case SIOCGIFBR:
1127 case SIOCSIFBR:
1128 case SIOCBRADDBR:
1129 case SIOCBRDELBR:
1130 err = -ENOPKG;
1131 if (!br_ioctl_hook)
1132 request_module("bridge");
1133
4a3e2f71 1134 mutex_lock(&br_ioctl_mutex);
89bddce5 1135 if (br_ioctl_hook)
881d966b 1136 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1137 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1138 break;
1139 case SIOCGIFVLAN:
1140 case SIOCSIFVLAN:
1141 err = -ENOPKG;
1142 if (!vlan_ioctl_hook)
1143 request_module("8021q");
1144
4a3e2f71 1145 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1146 if (vlan_ioctl_hook)
881d966b 1147 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1148 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1149 break;
1da177e4
LT
1150 case SIOCADDDLCI:
1151 case SIOCDELDLCI:
1152 err = -ENOPKG;
1153 if (!dlci_ioctl_hook)
1154 request_module("dlci");
1155
7512cbf6
PE
1156 mutex_lock(&dlci_ioctl_mutex);
1157 if (dlci_ioctl_hook)
1da177e4 1158 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1159 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 1160 break;
c62cce2c
AV
1161 case SIOCGSKNS:
1162 err = -EPERM;
1163 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1164 break;
1165
1166 err = open_related_ns(&net->ns, get_net_ns);
1167 break;
0768e170
AB
1168 case SIOCGSTAMP_OLD:
1169 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
1170 if (!sock->ops->gettstamp) {
1171 err = -ENOIOCTLCMD;
1172 break;
1173 }
1174 err = sock->ops->gettstamp(sock, argp,
0768e170
AB
1175 cmd == SIOCGSTAMP_OLD,
1176 !IS_ENABLED(CONFIG_64BIT));
60747828 1177 break;
0768e170
AB
1178 case SIOCGSTAMP_NEW:
1179 case SIOCGSTAMPNS_NEW:
1180 if (!sock->ops->gettstamp) {
1181 err = -ENOIOCTLCMD;
1182 break;
1183 }
1184 err = sock->ops->gettstamp(sock, argp,
1185 cmd == SIOCGSTAMP_NEW,
1186 false);
c7cbdbf2 1187 break;
1da177e4 1188 default:
63ff03ab 1189 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1190 break;
89bddce5 1191 }
1da177e4
LT
1192 return err;
1193}
1194
8a3c245c
PT
1195/**
1196 * sock_create_lite - creates a socket
1197 * @family: protocol family (AF_INET, ...)
1198 * @type: communication type (SOCK_STREAM, ...)
1199 * @protocol: protocol (0, ...)
1200 * @res: new socket
1201 *
1202 * Creates a new socket and assigns it to @res, passing through LSM.
1203 * The new socket initialization is not complete, see kernel_accept().
1204 * Returns 0 or an error. On failure @res is set to %NULL.
1205 * This function internally uses GFP_KERNEL.
1206 */
1207
1da177e4
LT
1208int sock_create_lite(int family, int type, int protocol, struct socket **res)
1209{
1210 int err;
1211 struct socket *sock = NULL;
89bddce5 1212
1da177e4
LT
1213 err = security_socket_create(family, type, protocol, 1);
1214 if (err)
1215 goto out;
1216
1217 sock = sock_alloc();
1218 if (!sock) {
1219 err = -ENOMEM;
1220 goto out;
1221 }
1222
1da177e4 1223 sock->type = type;
7420ed23
VY
1224 err = security_socket_post_create(sock, family, type, protocol, 1);
1225 if (err)
1226 goto out_release;
1227
1da177e4
LT
1228out:
1229 *res = sock;
1230 return err;
7420ed23
VY
1231out_release:
1232 sock_release(sock);
1233 sock = NULL;
1234 goto out;
1da177e4 1235}
c6d409cf 1236EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1237
1238/* No kernel lock held - perfect */
ade994f4 1239static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1240{
3cafb376 1241 struct socket *sock = file->private_data;
a331de3b 1242 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1243
e88958e6
CH
1244 if (!sock->ops->poll)
1245 return 0;
f641f13b 1246
a331de3b
CH
1247 if (sk_can_busy_loop(sock->sk)) {
1248 /* poll once if requested by the syscall */
1249 if (events & POLL_BUSY_LOOP)
1250 sk_busy_loop(sock->sk, 1);
1251
1252 /* if this socket can poll_ll, tell the system call */
1253 flag = POLL_BUSY_LOOP;
1254 }
1255
1256 return sock->ops->poll(file, sock, wait) | flag;
1da177e4
LT
1257}
1258
89bddce5 1259static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1260{
b69aee04 1261 struct socket *sock = file->private_data;
1da177e4
LT
1262
1263 return sock->ops->mmap(file, sock, vma);
1264}
1265
20380731 1266static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1267{
6d8c50dc 1268 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1269 return 0;
1270}
1271
1272/*
1273 * Update the socket async list
1274 *
1275 * Fasync_list locking strategy.
1276 *
1277 * 1. fasync_list is modified only under process context socket lock
1278 * i.e. under semaphore.
1279 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1280 * or under socket lock
1da177e4
LT
1281 */
1282
1283static int sock_fasync(int fd, struct file *filp, int on)
1284{
989a2979
ED
1285 struct socket *sock = filp->private_data;
1286 struct sock *sk = sock->sk;
333f7909 1287 struct socket_wq *wq = &sock->wq;
1da177e4 1288
989a2979 1289 if (sk == NULL)
1da177e4 1290 return -EINVAL;
1da177e4
LT
1291
1292 lock_sock(sk);
eaefd110 1293 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1294
eaefd110 1295 if (!wq->fasync_list)
989a2979
ED
1296 sock_reset_flag(sk, SOCK_FASYNC);
1297 else
bcdce719 1298 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1299
989a2979 1300 release_sock(sk);
1da177e4
LT
1301 return 0;
1302}
1303
ceb5d58b 1304/* This function may be called only under rcu_lock */
1da177e4 1305
ceb5d58b 1306int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1307{
ceb5d58b 1308 if (!wq || !wq->fasync_list)
1da177e4 1309 return -1;
ceb5d58b 1310
89bddce5 1311 switch (how) {
8d8ad9d7 1312 case SOCK_WAKE_WAITD:
ceb5d58b 1313 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1314 break;
1315 goto call_kill;
8d8ad9d7 1316 case SOCK_WAKE_SPACE:
ceb5d58b 1317 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1318 break;
1319 /* fall through */
8d8ad9d7 1320 case SOCK_WAKE_IO:
89bddce5 1321call_kill:
43815482 1322 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1323 break;
8d8ad9d7 1324 case SOCK_WAKE_URG:
43815482 1325 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1326 }
ceb5d58b 1327
1da177e4
LT
1328 return 0;
1329}
c6d409cf 1330EXPORT_SYMBOL(sock_wake_async);
1da177e4 1331
8a3c245c
PT
1332/**
1333 * __sock_create - creates a socket
1334 * @net: net namespace
1335 * @family: protocol family (AF_INET, ...)
1336 * @type: communication type (SOCK_STREAM, ...)
1337 * @protocol: protocol (0, ...)
1338 * @res: new socket
1339 * @kern: boolean for kernel space sockets
1340 *
1341 * Creates a new socket and assigns it to @res, passing through LSM.
1342 * Returns 0 or an error. On failure @res is set to %NULL. @kern must
1343 * be set to true if the socket resides in kernel space.
1344 * This function internally uses GFP_KERNEL.
1345 */
1346
721db93a 1347int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1348 struct socket **res, int kern)
1da177e4
LT
1349{
1350 int err;
1351 struct socket *sock;
55737fda 1352 const struct net_proto_family *pf;
1da177e4
LT
1353
1354 /*
89bddce5 1355 * Check protocol is in range
1da177e4
LT
1356 */
1357 if (family < 0 || family >= NPROTO)
1358 return -EAFNOSUPPORT;
1359 if (type < 0 || type >= SOCK_MAX)
1360 return -EINVAL;
1361
1362 /* Compatibility.
1363
1364 This uglymoron is moved from INET layer to here to avoid
1365 deadlock in module load.
1366 */
1367 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1368 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1369 current->comm);
1da177e4
LT
1370 family = PF_PACKET;
1371 }
1372
1373 err = security_socket_create(family, type, protocol, kern);
1374 if (err)
1375 return err;
89bddce5 1376
55737fda
SH
1377 /*
1378 * Allocate the socket and allow the family to set things up. if
1379 * the protocol is 0, the family is instructed to select an appropriate
1380 * default.
1381 */
1382 sock = sock_alloc();
1383 if (!sock) {
e87cc472 1384 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1385 return -ENFILE; /* Not exactly a match, but its the
1386 closest posix thing */
1387 }
1388
1389 sock->type = type;
1390
95a5afca 1391#ifdef CONFIG_MODULES
89bddce5
SH
1392 /* Attempt to load a protocol module if the find failed.
1393 *
1394 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1395 * requested real, full-featured networking support upon configuration.
1396 * Otherwise module support will break!
1397 */
190683a9 1398 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1399 request_module("net-pf-%d", family);
1da177e4
LT
1400#endif
1401
55737fda
SH
1402 rcu_read_lock();
1403 pf = rcu_dereference(net_families[family]);
1404 err = -EAFNOSUPPORT;
1405 if (!pf)
1406 goto out_release;
1da177e4
LT
1407
1408 /*
1409 * We will call the ->create function, that possibly is in a loadable
1410 * module, so we have to bump that loadable module refcnt first.
1411 */
55737fda 1412 if (!try_module_get(pf->owner))
1da177e4
LT
1413 goto out_release;
1414
55737fda
SH
1415 /* Now protected by module ref count */
1416 rcu_read_unlock();
1417
3f378b68 1418 err = pf->create(net, sock, protocol, kern);
55737fda 1419 if (err < 0)
1da177e4 1420 goto out_module_put;
a79af59e 1421
1da177e4
LT
1422 /*
1423 * Now to bump the refcnt of the [loadable] module that owns this
1424 * socket at sock_release time we decrement its refcnt.
1425 */
55737fda
SH
1426 if (!try_module_get(sock->ops->owner))
1427 goto out_module_busy;
1428
1da177e4
LT
1429 /*
1430 * Now that we're done with the ->create function, the [loadable]
1431 * module can have its refcnt decremented
1432 */
55737fda 1433 module_put(pf->owner);
7420ed23
VY
1434 err = security_socket_post_create(sock, family, type, protocol, kern);
1435 if (err)
3b185525 1436 goto out_sock_release;
55737fda 1437 *res = sock;
1da177e4 1438
55737fda
SH
1439 return 0;
1440
1441out_module_busy:
1442 err = -EAFNOSUPPORT;
1da177e4 1443out_module_put:
55737fda
SH
1444 sock->ops = NULL;
1445 module_put(pf->owner);
1446out_sock_release:
1da177e4 1447 sock_release(sock);
55737fda
SH
1448 return err;
1449
1450out_release:
1451 rcu_read_unlock();
1452 goto out_sock_release;
1da177e4 1453}
721db93a 1454EXPORT_SYMBOL(__sock_create);
1da177e4 1455
8a3c245c
PT
1456/**
1457 * sock_create - creates a socket
1458 * @family: protocol family (AF_INET, ...)
1459 * @type: communication type (SOCK_STREAM, ...)
1460 * @protocol: protocol (0, ...)
1461 * @res: new socket
1462 *
1463 * A wrapper around __sock_create().
1464 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1465 */
1466
1da177e4
LT
1467int sock_create(int family, int type, int protocol, struct socket **res)
1468{
1b8d7ae4 1469 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1470}
c6d409cf 1471EXPORT_SYMBOL(sock_create);
1da177e4 1472
8a3c245c
PT
1473/**
1474 * sock_create_kern - creates a socket (kernel space)
1475 * @net: net namespace
1476 * @family: protocol family (AF_INET, ...)
1477 * @type: communication type (SOCK_STREAM, ...)
1478 * @protocol: protocol (0, ...)
1479 * @res: new socket
1480 *
1481 * A wrapper around __sock_create().
1482 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1483 */
1484
eeb1bd5c 1485int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1486{
eeb1bd5c 1487 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1488}
c6d409cf 1489EXPORT_SYMBOL(sock_create_kern);
1da177e4 1490
9d6a15c3 1491int __sys_socket(int family, int type, int protocol)
1da177e4
LT
1492{
1493 int retval;
1494 struct socket *sock;
a677a039
UD
1495 int flags;
1496
e38b36f3
UD
1497 /* Check the SOCK_* constants for consistency. */
1498 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1499 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1500 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1501 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1502
a677a039 1503 flags = type & ~SOCK_TYPE_MASK;
77d27200 1504 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1505 return -EINVAL;
1506 type &= SOCK_TYPE_MASK;
1da177e4 1507
aaca0bdc
UD
1508 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1509 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1510
1da177e4
LT
1511 retval = sock_create(family, type, protocol, &sock);
1512 if (retval < 0)
8e1611e2 1513 return retval;
1da177e4 1514
8e1611e2 1515 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1516}
1517
9d6a15c3
DB
1518SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1519{
1520 return __sys_socket(family, type, protocol);
1521}
1522
1da177e4
LT
1523/*
1524 * Create a pair of connected sockets.
1525 */
1526
6debc8d8 1527int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1528{
1529 struct socket *sock1, *sock2;
1530 int fd1, fd2, err;
db349509 1531 struct file *newfile1, *newfile2;
a677a039
UD
1532 int flags;
1533
1534 flags = type & ~SOCK_TYPE_MASK;
77d27200 1535 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1536 return -EINVAL;
1537 type &= SOCK_TYPE_MASK;
1da177e4 1538
aaca0bdc
UD
1539 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1540 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1541
016a266b
AV
1542 /*
1543 * reserve descriptors and make sure we won't fail
1544 * to return them to userland.
1545 */
1546 fd1 = get_unused_fd_flags(flags);
1547 if (unlikely(fd1 < 0))
1548 return fd1;
1549
1550 fd2 = get_unused_fd_flags(flags);
1551 if (unlikely(fd2 < 0)) {
1552 put_unused_fd(fd1);
1553 return fd2;
1554 }
1555
1556 err = put_user(fd1, &usockvec[0]);
1557 if (err)
1558 goto out;
1559
1560 err = put_user(fd2, &usockvec[1]);
1561 if (err)
1562 goto out;
1563
1da177e4
LT
1564 /*
1565 * Obtain the first socket and check if the underlying protocol
1566 * supports the socketpair call.
1567 */
1568
1569 err = sock_create(family, type, protocol, &sock1);
016a266b 1570 if (unlikely(err < 0))
1da177e4
LT
1571 goto out;
1572
1573 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1574 if (unlikely(err < 0)) {
1575 sock_release(sock1);
1576 goto out;
bf3c23d1 1577 }
d73aa286 1578
d47cd945
DH
1579 err = security_socket_socketpair(sock1, sock2);
1580 if (unlikely(err)) {
1581 sock_release(sock2);
1582 sock_release(sock1);
1583 goto out;
1584 }
1585
016a266b
AV
1586 err = sock1->ops->socketpair(sock1, sock2);
1587 if (unlikely(err < 0)) {
1588 sock_release(sock2);
1589 sock_release(sock1);
1590 goto out;
28407630
AV
1591 }
1592
aab174f0 1593 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1594 if (IS_ERR(newfile1)) {
28407630 1595 err = PTR_ERR(newfile1);
016a266b
AV
1596 sock_release(sock2);
1597 goto out;
28407630
AV
1598 }
1599
aab174f0 1600 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1601 if (IS_ERR(newfile2)) {
1602 err = PTR_ERR(newfile2);
016a266b
AV
1603 fput(newfile1);
1604 goto out;
db349509
AV
1605 }
1606
157cf649 1607 audit_fd_pair(fd1, fd2);
d73aa286 1608
db349509
AV
1609 fd_install(fd1, newfile1);
1610 fd_install(fd2, newfile2);
d73aa286 1611 return 0;
1da177e4 1612
016a266b 1613out:
d73aa286 1614 put_unused_fd(fd2);
d73aa286 1615 put_unused_fd(fd1);
1da177e4
LT
1616 return err;
1617}
1618
6debc8d8
DB
1619SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1620 int __user *, usockvec)
1621{
1622 return __sys_socketpair(family, type, protocol, usockvec);
1623}
1624
1da177e4
LT
1625/*
1626 * Bind a name to a socket. Nothing much to do here since it's
1627 * the protocol's responsibility to handle the local address.
1628 *
1629 * We move the socket address to kernel space before we call
1630 * the protocol layer (having also checked the address is ok).
1631 */
1632
a87d35d8 1633int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1634{
1635 struct socket *sock;
230b1839 1636 struct sockaddr_storage address;
6cb153ca 1637 int err, fput_needed;
1da177e4 1638
89bddce5 1639 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1640 if (sock) {
43db362d 1641 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1642 if (!err) {
89bddce5 1643 err = security_socket_bind(sock,
230b1839 1644 (struct sockaddr *)&address,
89bddce5 1645 addrlen);
6cb153ca
BL
1646 if (!err)
1647 err = sock->ops->bind(sock,
89bddce5 1648 (struct sockaddr *)
230b1839 1649 &address, addrlen);
1da177e4 1650 }
6cb153ca 1651 fput_light(sock->file, fput_needed);
89bddce5 1652 }
1da177e4
LT
1653 return err;
1654}
1655
a87d35d8
DB
1656SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1657{
1658 return __sys_bind(fd, umyaddr, addrlen);
1659}
1660
1da177e4
LT
1661/*
1662 * Perform a listen. Basically, we allow the protocol to do anything
1663 * necessary for a listen, and if that works, we mark the socket as
1664 * ready for listening.
1665 */
1666
25e290ee 1667int __sys_listen(int fd, int backlog)
1da177e4
LT
1668{
1669 struct socket *sock;
6cb153ca 1670 int err, fput_needed;
b8e1f9b5 1671 int somaxconn;
89bddce5
SH
1672
1673 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1674 if (sock) {
8efa6e93 1675 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1676 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1677 backlog = somaxconn;
1da177e4
LT
1678
1679 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1680 if (!err)
1681 err = sock->ops->listen(sock, backlog);
1da177e4 1682
6cb153ca 1683 fput_light(sock->file, fput_needed);
1da177e4
LT
1684 }
1685 return err;
1686}
1687
25e290ee
DB
1688SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1689{
1690 return __sys_listen(fd, backlog);
1691}
1692
1da177e4
LT
1693/*
1694 * For accept, we attempt to create a new socket, set up the link
1695 * with the client, wake up the client, then return the new
1696 * connected fd. We collect the address of the connector in kernel
1697 * space and move it to user at the very end. This is unclean because
1698 * we open the socket then return an error.
1699 *
1700 * 1003.1g adds the ability to recvmsg() to query connection pending
1701 * status to recvmsg. We need to add that support in a way thats
b903036a 1702 * clean when we restructure accept also.
1da177e4
LT
1703 */
1704
4541e805
DB
1705int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1706 int __user *upeer_addrlen, int flags)
1da177e4
LT
1707{
1708 struct socket *sock, *newsock;
39d8c1b6 1709 struct file *newfile;
6cb153ca 1710 int err, len, newfd, fput_needed;
230b1839 1711 struct sockaddr_storage address;
1da177e4 1712
77d27200 1713 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1714 return -EINVAL;
1715
1716 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1717 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1718
6cb153ca 1719 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1720 if (!sock)
1721 goto out;
1722
1723 err = -ENFILE;
c6d409cf
ED
1724 newsock = sock_alloc();
1725 if (!newsock)
1da177e4
LT
1726 goto out_put;
1727
1728 newsock->type = sock->type;
1729 newsock->ops = sock->ops;
1730
1da177e4
LT
1731 /*
1732 * We don't need try_module_get here, as the listening socket (sock)
1733 * has the protocol module (sock->ops->owner) held.
1734 */
1735 __module_get(newsock->ops->owner);
1736
28407630 1737 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1738 if (unlikely(newfd < 0)) {
1739 err = newfd;
9a1875e6
DM
1740 sock_release(newsock);
1741 goto out_put;
39d8c1b6 1742 }
aab174f0 1743 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1744 if (IS_ERR(newfile)) {
28407630
AV
1745 err = PTR_ERR(newfile);
1746 put_unused_fd(newfd);
28407630
AV
1747 goto out_put;
1748 }
39d8c1b6 1749
a79af59e
FF
1750 err = security_socket_accept(sock, newsock);
1751 if (err)
39d8c1b6 1752 goto out_fd;
a79af59e 1753
cdfbabfb 1754 err = sock->ops->accept(sock, newsock, sock->file->f_flags, false);
1da177e4 1755 if (err < 0)
39d8c1b6 1756 goto out_fd;
1da177e4
LT
1757
1758 if (upeer_sockaddr) {
9b2c45d4
DV
1759 len = newsock->ops->getname(newsock,
1760 (struct sockaddr *)&address, 2);
1761 if (len < 0) {
1da177e4 1762 err = -ECONNABORTED;
39d8c1b6 1763 goto out_fd;
1da177e4 1764 }
43db362d 1765 err = move_addr_to_user(&address,
230b1839 1766 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1767 if (err < 0)
39d8c1b6 1768 goto out_fd;
1da177e4
LT
1769 }
1770
1771 /* File flags are not inherited via accept() unlike another OSes. */
1772
39d8c1b6
DM
1773 fd_install(newfd, newfile);
1774 err = newfd;
1da177e4 1775
1da177e4 1776out_put:
6cb153ca 1777 fput_light(sock->file, fput_needed);
1da177e4
LT
1778out:
1779 return err;
39d8c1b6 1780out_fd:
9606a216 1781 fput(newfile);
39d8c1b6 1782 put_unused_fd(newfd);
1da177e4
LT
1783 goto out_put;
1784}
1785
4541e805
DB
1786SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1787 int __user *, upeer_addrlen, int, flags)
1788{
1789 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1790}
1791
20f37034
HC
1792SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1793 int __user *, upeer_addrlen)
aaca0bdc 1794{
4541e805 1795 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1796}
1797
1da177e4
LT
1798/*
1799 * Attempt to connect to a socket with the server address. The address
1800 * is in user space so we verify it is OK and move it to kernel space.
1801 *
1802 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1803 * break bindings
1804 *
1805 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1806 * other SEQPACKET protocols that take time to connect() as it doesn't
1807 * include the -EINPROGRESS status for such sockets.
1808 */
1809
1387c2c2 1810int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1da177e4
LT
1811{
1812 struct socket *sock;
230b1839 1813 struct sockaddr_storage address;
6cb153ca 1814 int err, fput_needed;
1da177e4 1815
6cb153ca 1816 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1817 if (!sock)
1818 goto out;
43db362d 1819 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1820 if (err < 0)
1821 goto out_put;
1822
89bddce5 1823 err =
230b1839 1824 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1825 if (err)
1826 goto out_put;
1827
230b1839 1828 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1829 sock->file->f_flags);
1830out_put:
6cb153ca 1831 fput_light(sock->file, fput_needed);
1da177e4
LT
1832out:
1833 return err;
1834}
1835
1387c2c2
DB
1836SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1837 int, addrlen)
1838{
1839 return __sys_connect(fd, uservaddr, addrlen);
1840}
1841
1da177e4
LT
1842/*
1843 * Get the local address ('name') of a socket object. Move the obtained
1844 * name to user space.
1845 */
1846
8882a107
DB
1847int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1848 int __user *usockaddr_len)
1da177e4
LT
1849{
1850 struct socket *sock;
230b1839 1851 struct sockaddr_storage address;
9b2c45d4 1852 int err, fput_needed;
89bddce5 1853
6cb153ca 1854 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1855 if (!sock)
1856 goto out;
1857
1858 err = security_socket_getsockname(sock);
1859 if (err)
1860 goto out_put;
1861
9b2c45d4
DV
1862 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
1863 if (err < 0)
1da177e4 1864 goto out_put;
9b2c45d4
DV
1865 /* "err" is actually length in this case */
1866 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
1867
1868out_put:
6cb153ca 1869 fput_light(sock->file, fput_needed);
1da177e4
LT
1870out:
1871 return err;
1872}
1873
8882a107
DB
1874SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1875 int __user *, usockaddr_len)
1876{
1877 return __sys_getsockname(fd, usockaddr, usockaddr_len);
1878}
1879
1da177e4
LT
1880/*
1881 * Get the remote address ('name') of a socket object. Move the obtained
1882 * name to user space.
1883 */
1884
b21c8f83
DB
1885int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1886 int __user *usockaddr_len)
1da177e4
LT
1887{
1888 struct socket *sock;
230b1839 1889 struct sockaddr_storage address;
9b2c45d4 1890 int err, fput_needed;
1da177e4 1891
89bddce5
SH
1892 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1893 if (sock != NULL) {
1da177e4
LT
1894 err = security_socket_getpeername(sock);
1895 if (err) {
6cb153ca 1896 fput_light(sock->file, fput_needed);
1da177e4
LT
1897 return err;
1898 }
1899
9b2c45d4
DV
1900 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
1901 if (err >= 0)
1902 /* "err" is actually length in this case */
1903 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 1904 usockaddr_len);
6cb153ca 1905 fput_light(sock->file, fput_needed);
1da177e4
LT
1906 }
1907 return err;
1908}
1909
b21c8f83
DB
1910SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1911 int __user *, usockaddr_len)
1912{
1913 return __sys_getpeername(fd, usockaddr, usockaddr_len);
1914}
1915
1da177e4
LT
1916/*
1917 * Send a datagram to a given address. We move the address into kernel
1918 * space and check the user space data area is readable before invoking
1919 * the protocol.
1920 */
211b634b
DB
1921int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
1922 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
1923{
1924 struct socket *sock;
230b1839 1925 struct sockaddr_storage address;
1da177e4
LT
1926 int err;
1927 struct msghdr msg;
1928 struct iovec iov;
6cb153ca 1929 int fput_needed;
6cb153ca 1930
602bd0e9
AV
1931 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1932 if (unlikely(err))
1933 return err;
de0fa95c
PE
1934 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1935 if (!sock)
4387ff75 1936 goto out;
6cb153ca 1937
89bddce5 1938 msg.msg_name = NULL;
89bddce5
SH
1939 msg.msg_control = NULL;
1940 msg.msg_controllen = 0;
1941 msg.msg_namelen = 0;
6cb153ca 1942 if (addr) {
43db362d 1943 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1944 if (err < 0)
1945 goto out_put;
230b1839 1946 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1947 msg.msg_namelen = addr_len;
1da177e4
LT
1948 }
1949 if (sock->file->f_flags & O_NONBLOCK)
1950 flags |= MSG_DONTWAIT;
1951 msg.msg_flags = flags;
d8725c86 1952 err = sock_sendmsg(sock, &msg);
1da177e4 1953
89bddce5 1954out_put:
de0fa95c 1955 fput_light(sock->file, fput_needed);
4387ff75 1956out:
1da177e4
LT
1957 return err;
1958}
1959
211b634b
DB
1960SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1961 unsigned int, flags, struct sockaddr __user *, addr,
1962 int, addr_len)
1963{
1964 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
1965}
1966
1da177e4 1967/*
89bddce5 1968 * Send a datagram down a socket.
1da177e4
LT
1969 */
1970
3e0fa65f 1971SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1972 unsigned int, flags)
1da177e4 1973{
211b634b 1974 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
1975}
1976
1977/*
89bddce5 1978 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1979 * sender. We verify the buffers are writable and if needed move the
1980 * sender address from kernel to user space.
1981 */
7a09e1eb
DB
1982int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
1983 struct sockaddr __user *addr, int __user *addr_len)
1da177e4
LT
1984{
1985 struct socket *sock;
1986 struct iovec iov;
1987 struct msghdr msg;
230b1839 1988 struct sockaddr_storage address;
89bddce5 1989 int err, err2;
6cb153ca
BL
1990 int fput_needed;
1991
602bd0e9
AV
1992 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1993 if (unlikely(err))
1994 return err;
de0fa95c 1995 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1996 if (!sock)
de0fa95c 1997 goto out;
1da177e4 1998
89bddce5
SH
1999 msg.msg_control = NULL;
2000 msg.msg_controllen = 0;
f3d33426
HFS
2001 /* Save some cycles and don't copy the address if not needed */
2002 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
2003 /* We assume all kernel code knows the size of sockaddr_storage */
2004 msg.msg_namelen = 0;
130ed5d1 2005 msg.msg_iocb = NULL;
9f138fa6 2006 msg.msg_flags = 0;
1da177e4
LT
2007 if (sock->file->f_flags & O_NONBLOCK)
2008 flags |= MSG_DONTWAIT;
2da62906 2009 err = sock_recvmsg(sock, &msg, flags);
1da177e4 2010
89bddce5 2011 if (err >= 0 && addr != NULL) {
43db362d 2012 err2 = move_addr_to_user(&address,
230b1839 2013 msg.msg_namelen, addr, addr_len);
89bddce5
SH
2014 if (err2 < 0)
2015 err = err2;
1da177e4 2016 }
de0fa95c
PE
2017
2018 fput_light(sock->file, fput_needed);
4387ff75 2019out:
1da177e4
LT
2020 return err;
2021}
2022
7a09e1eb
DB
2023SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2024 unsigned int, flags, struct sockaddr __user *, addr,
2025 int __user *, addr_len)
2026{
2027 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2028}
2029
1da177e4 2030/*
89bddce5 2031 * Receive a datagram from a socket.
1da177e4
LT
2032 */
2033
b7c0ddf5
JG
2034SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2035 unsigned int, flags)
1da177e4 2036{
7a09e1eb 2037 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
2038}
2039
2040/*
2041 * Set a socket option. Because we don't know the option lengths we have
2042 * to pass the user mode parameter for the protocols to sort out.
2043 */
2044
cc36dca0
DB
2045static int __sys_setsockopt(int fd, int level, int optname,
2046 char __user *optval, int optlen)
1da177e4 2047{
0d01da6a
SF
2048 mm_segment_t oldfs = get_fs();
2049 char *kernel_optval = NULL;
6cb153ca 2050 int err, fput_needed;
1da177e4
LT
2051 struct socket *sock;
2052
2053 if (optlen < 0)
2054 return -EINVAL;
89bddce5
SH
2055
2056 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2057 if (sock != NULL) {
2058 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
2059 if (err)
2060 goto out_put;
1da177e4 2061
0d01da6a
SF
2062 err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level,
2063 &optname, optval, &optlen,
2064 &kernel_optval);
2065
2066 if (err < 0) {
2067 goto out_put;
2068 } else if (err > 0) {
2069 err = 0;
2070 goto out_put;
2071 }
2072
2073 if (kernel_optval) {
2074 set_fs(KERNEL_DS);
2075 optval = (char __user __force *)kernel_optval;
2076 }
2077
1da177e4 2078 if (level == SOL_SOCKET)
89bddce5
SH
2079 err =
2080 sock_setsockopt(sock, level, optname, optval,
2081 optlen);
1da177e4 2082 else
89bddce5
SH
2083 err =
2084 sock->ops->setsockopt(sock, level, optname, optval,
2085 optlen);
0d01da6a
SF
2086
2087 if (kernel_optval) {
2088 set_fs(oldfs);
2089 kfree(kernel_optval);
2090 }
6cb153ca
BL
2091out_put:
2092 fput_light(sock->file, fput_needed);
1da177e4
LT
2093 }
2094 return err;
2095}
2096
cc36dca0
DB
2097SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2098 char __user *, optval, int, optlen)
2099{
2100 return __sys_setsockopt(fd, level, optname, optval, optlen);
2101}
2102
1da177e4
LT
2103/*
2104 * Get a socket option. Because we don't know the option lengths we have
2105 * to pass a user mode parameter for the protocols to sort out.
2106 */
2107
13a2d70e
DB
2108static int __sys_getsockopt(int fd, int level, int optname,
2109 char __user *optval, int __user *optlen)
1da177e4 2110{
6cb153ca 2111 int err, fput_needed;
1da177e4 2112 struct socket *sock;
0d01da6a 2113 int max_optlen;
1da177e4 2114
89bddce5
SH
2115 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2116 if (sock != NULL) {
6cb153ca
BL
2117 err = security_socket_getsockopt(sock, level, optname);
2118 if (err)
2119 goto out_put;
1da177e4 2120
0d01da6a
SF
2121 max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
2122
1da177e4 2123 if (level == SOL_SOCKET)
89bddce5
SH
2124 err =
2125 sock_getsockopt(sock, level, optname, optval,
2126 optlen);
1da177e4 2127 else
89bddce5
SH
2128 err =
2129 sock->ops->getsockopt(sock, level, optname, optval,
2130 optlen);
0d01da6a
SF
2131
2132 err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
2133 optval, optlen,
2134 max_optlen, err);
6cb153ca
BL
2135out_put:
2136 fput_light(sock->file, fput_needed);
1da177e4
LT
2137 }
2138 return err;
2139}
2140
13a2d70e
DB
2141SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2142 char __user *, optval, int __user *, optlen)
2143{
2144 return __sys_getsockopt(fd, level, optname, optval, optlen);
2145}
2146
1da177e4
LT
2147/*
2148 * Shutdown a socket.
2149 */
2150
005a1aea 2151int __sys_shutdown(int fd, int how)
1da177e4 2152{
6cb153ca 2153 int err, fput_needed;
1da177e4
LT
2154 struct socket *sock;
2155
89bddce5
SH
2156 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2157 if (sock != NULL) {
1da177e4 2158 err = security_socket_shutdown(sock, how);
6cb153ca
BL
2159 if (!err)
2160 err = sock->ops->shutdown(sock, how);
2161 fput_light(sock->file, fput_needed);
1da177e4
LT
2162 }
2163 return err;
2164}
2165
005a1aea
DB
2166SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2167{
2168 return __sys_shutdown(fd, how);
2169}
2170
89bddce5 2171/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2172 * fields which are the same type (int / unsigned) on our platforms.
2173 */
2174#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2175#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2176#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2177
c71d8ebe
TH
2178struct used_address {
2179 struct sockaddr_storage name;
2180 unsigned int name_len;
2181};
2182
da184284
AV
2183static int copy_msghdr_from_user(struct msghdr *kmsg,
2184 struct user_msghdr __user *umsg,
2185 struct sockaddr __user **save_addr,
2186 struct iovec **iov)
1661bf36 2187{
ffb07550 2188 struct user_msghdr msg;
08adb7da
AV
2189 ssize_t err;
2190
ffb07550 2191 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 2192 return -EFAULT;
dbb490b9 2193
864d9664 2194 kmsg->msg_control = (void __force *)msg.msg_control;
ffb07550
AV
2195 kmsg->msg_controllen = msg.msg_controllen;
2196 kmsg->msg_flags = msg.msg_flags;
2197
2198 kmsg->msg_namelen = msg.msg_namelen;
2199 if (!msg.msg_name)
6a2a2b3a
AS
2200 kmsg->msg_namelen = 0;
2201
dbb490b9
ML
2202 if (kmsg->msg_namelen < 0)
2203 return -EINVAL;
2204
1661bf36 2205 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2206 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2207
2208 if (save_addr)
ffb07550 2209 *save_addr = msg.msg_name;
08adb7da 2210
ffb07550 2211 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 2212 if (!save_addr) {
864d9664
PA
2213 err = move_addr_to_kernel(msg.msg_name,
2214 kmsg->msg_namelen,
08adb7da
AV
2215 kmsg->msg_name);
2216 if (err < 0)
2217 return err;
2218 }
2219 } else {
2220 kmsg->msg_name = NULL;
2221 kmsg->msg_namelen = 0;
2222 }
2223
ffb07550 2224 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2225 return -EMSGSIZE;
2226
0345f931 2227 kmsg->msg_iocb = NULL;
2228
87e5e6da 2229 err = import_iovec(save_addr ? READ : WRITE,
ffb07550 2230 msg.msg_iov, msg.msg_iovlen,
da184284 2231 UIO_FASTIOV, iov, &kmsg->msg_iter);
87e5e6da 2232 return err < 0 ? err : 0;
1661bf36
DC
2233}
2234
666547ff 2235static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2236 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
2237 struct used_address *used_address,
2238 unsigned int allowed_msghdr_flags)
1da177e4 2239{
89bddce5
SH
2240 struct compat_msghdr __user *msg_compat =
2241 (struct compat_msghdr __user *)msg;
230b1839 2242 struct sockaddr_storage address;
1da177e4 2243 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 2244 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2245 __aligned(sizeof(__kernel_size_t));
89bddce5 2246 /* 20 is size of ipv6_pktinfo */
1da177e4 2247 unsigned char *ctl_buf = ctl;
d8725c86 2248 int ctl_len;
08adb7da 2249 ssize_t err;
89bddce5 2250
08adb7da 2251 msg_sys->msg_name = &address;
1da177e4 2252
08449320 2253 if (MSG_CMSG_COMPAT & flags)
08adb7da 2254 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 2255 else
08adb7da 2256 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 2257 if (err < 0)
da184284 2258 return err;
1da177e4
LT
2259
2260 err = -ENOBUFS;
2261
228e548e 2262 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 2263 goto out_freeiov;
28a94d8f 2264 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2265 ctl_len = msg_sys->msg_controllen;
1da177e4 2266 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2267 err =
228e548e 2268 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2269 sizeof(ctl));
1da177e4
LT
2270 if (err)
2271 goto out_freeiov;
228e548e
AB
2272 ctl_buf = msg_sys->msg_control;
2273 ctl_len = msg_sys->msg_controllen;
1da177e4 2274 } else if (ctl_len) {
ac4340fc
DM
2275 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2276 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2277 if (ctl_len > sizeof(ctl)) {
1da177e4 2278 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2279 if (ctl_buf == NULL)
1da177e4
LT
2280 goto out_freeiov;
2281 }
2282 err = -EFAULT;
2283 /*
228e548e 2284 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2285 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2286 * checking falls down on this.
2287 */
fb8621bb 2288 if (copy_from_user(ctl_buf,
228e548e 2289 (void __user __force *)msg_sys->msg_control,
89bddce5 2290 ctl_len))
1da177e4 2291 goto out_freectl;
228e548e 2292 msg_sys->msg_control = ctl_buf;
1da177e4 2293 }
228e548e 2294 msg_sys->msg_flags = flags;
1da177e4
LT
2295
2296 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2297 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2298 /*
2299 * If this is sendmmsg() and current destination address is same as
2300 * previously succeeded address, omit asking LSM's decision.
2301 * used_address->name_len is initialized to UINT_MAX so that the first
2302 * destination address never matches.
2303 */
bc909d9d
MD
2304 if (used_address && msg_sys->msg_name &&
2305 used_address->name_len == msg_sys->msg_namelen &&
2306 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2307 used_address->name_len)) {
d8725c86 2308 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2309 goto out_freectl;
2310 }
d8725c86 2311 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2312 /*
2313 * If this is sendmmsg() and sending to current destination address was
2314 * successful, remember it.
2315 */
2316 if (used_address && err >= 0) {
2317 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2318 if (msg_sys->msg_name)
2319 memcpy(&used_address->name, msg_sys->msg_name,
2320 used_address->name_len);
c71d8ebe 2321 }
1da177e4
LT
2322
2323out_freectl:
89bddce5 2324 if (ctl_buf != ctl)
1da177e4
LT
2325 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2326out_freeiov:
da184284 2327 kfree(iov);
228e548e
AB
2328 return err;
2329}
2330
2331/*
2332 * BSD sendmsg interface
2333 */
0fa03c62
JA
2334long __sys_sendmsg_sock(struct socket *sock, struct user_msghdr __user *msg,
2335 unsigned int flags)
2336{
2337 struct msghdr msg_sys;
2338
2339 return ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
2340}
228e548e 2341
e1834a32
DB
2342long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2343 bool forbid_cmsg_compat)
228e548e
AB
2344{
2345 int fput_needed, err;
2346 struct msghdr msg_sys;
1be374a0
AL
2347 struct socket *sock;
2348
e1834a32
DB
2349 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2350 return -EINVAL;
2351
1be374a0 2352 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2353 if (!sock)
2354 goto out;
2355
28a94d8f 2356 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2357
6cb153ca 2358 fput_light(sock->file, fput_needed);
89bddce5 2359out:
1da177e4
LT
2360 return err;
2361}
2362
666547ff 2363SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2364{
e1834a32 2365 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2366}
2367
228e548e
AB
2368/*
2369 * Linux sendmmsg interface
2370 */
2371
2372int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2373 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2374{
2375 int fput_needed, err, datagrams;
2376 struct socket *sock;
2377 struct mmsghdr __user *entry;
2378 struct compat_mmsghdr __user *compat_entry;
2379 struct msghdr msg_sys;
c71d8ebe 2380 struct used_address used_address;
f092276d 2381 unsigned int oflags = flags;
228e548e 2382
e1834a32
DB
2383 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2384 return -EINVAL;
2385
98382f41
AB
2386 if (vlen > UIO_MAXIOV)
2387 vlen = UIO_MAXIOV;
228e548e
AB
2388
2389 datagrams = 0;
2390
2391 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2392 if (!sock)
2393 return err;
2394
c71d8ebe 2395 used_address.name_len = UINT_MAX;
228e548e
AB
2396 entry = mmsg;
2397 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2398 err = 0;
f092276d 2399 flags |= MSG_BATCH;
228e548e
AB
2400
2401 while (datagrams < vlen) {
f092276d
TH
2402 if (datagrams == vlen - 1)
2403 flags = oflags;
2404
228e548e 2405 if (MSG_CMSG_COMPAT & flags) {
666547ff 2406 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2407 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2408 if (err < 0)
2409 break;
2410 err = __put_user(err, &compat_entry->msg_len);
2411 ++compat_entry;
2412 } else {
a7526eb5 2413 err = ___sys_sendmsg(sock,
666547ff 2414 (struct user_msghdr __user *)entry,
28a94d8f 2415 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2416 if (err < 0)
2417 break;
2418 err = put_user(err, &entry->msg_len);
2419 ++entry;
2420 }
2421
2422 if (err)
2423 break;
2424 ++datagrams;
3023898b
SHY
2425 if (msg_data_left(&msg_sys))
2426 break;
a78cb84c 2427 cond_resched();
228e548e
AB
2428 }
2429
228e548e
AB
2430 fput_light(sock->file, fput_needed);
2431
728ffb86
AB
2432 /* We only return an error if no datagrams were able to be sent */
2433 if (datagrams != 0)
228e548e
AB
2434 return datagrams;
2435
228e548e
AB
2436 return err;
2437}
2438
2439SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2440 unsigned int, vlen, unsigned int, flags)
2441{
e1834a32 2442 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2443}
2444
666547ff 2445static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2446 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2447{
89bddce5
SH
2448 struct compat_msghdr __user *msg_compat =
2449 (struct compat_msghdr __user *)msg;
1da177e4 2450 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2451 struct iovec *iov = iovstack;
1da177e4 2452 unsigned long cmsg_ptr;
2da62906 2453 int len;
08adb7da 2454 ssize_t err;
1da177e4
LT
2455
2456 /* kernel mode address */
230b1839 2457 struct sockaddr_storage addr;
1da177e4
LT
2458
2459 /* user mode address pointers */
2460 struct sockaddr __user *uaddr;
08adb7da 2461 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2462
08adb7da 2463 msg_sys->msg_name = &addr;
1da177e4 2464
f3d33426 2465 if (MSG_CMSG_COMPAT & flags)
08adb7da 2466 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2467 else
08adb7da 2468 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2469 if (err < 0)
da184284 2470 return err;
1da177e4 2471
a2e27255
ACM
2472 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2473 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2474
f3d33426
HFS
2475 /* We assume all kernel code knows the size of sockaddr_storage */
2476 msg_sys->msg_namelen = 0;
2477
1da177e4
LT
2478 if (sock->file->f_flags & O_NONBLOCK)
2479 flags |= MSG_DONTWAIT;
2da62906 2480 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4
LT
2481 if (err < 0)
2482 goto out_freeiov;
2483 len = err;
2484
2485 if (uaddr != NULL) {
43db362d 2486 err = move_addr_to_user(&addr,
a2e27255 2487 msg_sys->msg_namelen, uaddr,
89bddce5 2488 uaddr_len);
1da177e4
LT
2489 if (err < 0)
2490 goto out_freeiov;
2491 }
a2e27255 2492 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2493 COMPAT_FLAGS(msg));
1da177e4
LT
2494 if (err)
2495 goto out_freeiov;
2496 if (MSG_CMSG_COMPAT & flags)
a2e27255 2497 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2498 &msg_compat->msg_controllen);
2499 else
a2e27255 2500 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2501 &msg->msg_controllen);
2502 if (err)
2503 goto out_freeiov;
2504 err = len;
2505
2506out_freeiov:
da184284 2507 kfree(iov);
a2e27255
ACM
2508 return err;
2509}
2510
2511/*
2512 * BSD recvmsg interface
2513 */
2514
aa1fa28f
JA
2515long __sys_recvmsg_sock(struct socket *sock, struct user_msghdr __user *msg,
2516 unsigned int flags)
2517{
2518 struct msghdr msg_sys;
2519
2520 return ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
2521}
2522
e1834a32
DB
2523long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2524 bool forbid_cmsg_compat)
a2e27255
ACM
2525{
2526 int fput_needed, err;
2527 struct msghdr msg_sys;
1be374a0
AL
2528 struct socket *sock;
2529
e1834a32
DB
2530 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2531 return -EINVAL;
2532
1be374a0 2533 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2534 if (!sock)
2535 goto out;
2536
a7526eb5 2537 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2538
6cb153ca 2539 fput_light(sock->file, fput_needed);
1da177e4
LT
2540out:
2541 return err;
2542}
2543
666547ff 2544SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2545 unsigned int, flags)
2546{
e1834a32 2547 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2548}
2549
a2e27255
ACM
2550/*
2551 * Linux recvmmsg interface
2552 */
2553
e11d4284
AB
2554static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2555 unsigned int vlen, unsigned int flags,
2556 struct timespec64 *timeout)
a2e27255
ACM
2557{
2558 int fput_needed, err, datagrams;
2559 struct socket *sock;
2560 struct mmsghdr __user *entry;
d7256d0e 2561 struct compat_mmsghdr __user *compat_entry;
a2e27255 2562 struct msghdr msg_sys;
766b9f92
DD
2563 struct timespec64 end_time;
2564 struct timespec64 timeout64;
a2e27255
ACM
2565
2566 if (timeout &&
2567 poll_select_set_timeout(&end_time, timeout->tv_sec,
2568 timeout->tv_nsec))
2569 return -EINVAL;
2570
2571 datagrams = 0;
2572
2573 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2574 if (!sock)
2575 return err;
2576
7797dc41
SHY
2577 if (likely(!(flags & MSG_ERRQUEUE))) {
2578 err = sock_error(sock->sk);
2579 if (err) {
2580 datagrams = err;
2581 goto out_put;
2582 }
e623a9e9 2583 }
a2e27255
ACM
2584
2585 entry = mmsg;
d7256d0e 2586 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2587
2588 while (datagrams < vlen) {
2589 /*
2590 * No need to ask LSM for more than the first datagram.
2591 */
d7256d0e 2592 if (MSG_CMSG_COMPAT & flags) {
666547ff 2593 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2594 &msg_sys, flags & ~MSG_WAITFORONE,
2595 datagrams);
d7256d0e
JMG
2596 if (err < 0)
2597 break;
2598 err = __put_user(err, &compat_entry->msg_len);
2599 ++compat_entry;
2600 } else {
a7526eb5 2601 err = ___sys_recvmsg(sock,
666547ff 2602 (struct user_msghdr __user *)entry,
a7526eb5
AL
2603 &msg_sys, flags & ~MSG_WAITFORONE,
2604 datagrams);
d7256d0e
JMG
2605 if (err < 0)
2606 break;
2607 err = put_user(err, &entry->msg_len);
2608 ++entry;
2609 }
2610
a2e27255
ACM
2611 if (err)
2612 break;
a2e27255
ACM
2613 ++datagrams;
2614
71c5c159
BB
2615 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2616 if (flags & MSG_WAITFORONE)
2617 flags |= MSG_DONTWAIT;
2618
a2e27255 2619 if (timeout) {
766b9f92 2620 ktime_get_ts64(&timeout64);
c2e6c856 2621 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2622 if (timeout->tv_sec < 0) {
2623 timeout->tv_sec = timeout->tv_nsec = 0;
2624 break;
2625 }
2626
2627 /* Timeout, return less than vlen datagrams */
2628 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2629 break;
2630 }
2631
2632 /* Out of band data, return right away */
2633 if (msg_sys.msg_flags & MSG_OOB)
2634 break;
a78cb84c 2635 cond_resched();
a2e27255
ACM
2636 }
2637
a2e27255 2638 if (err == 0)
34b88a68
ACM
2639 goto out_put;
2640
2641 if (datagrams == 0) {
2642 datagrams = err;
2643 goto out_put;
2644 }
a2e27255 2645
34b88a68
ACM
2646 /*
2647 * We may return less entries than requested (vlen) if the
2648 * sock is non block and there aren't enough datagrams...
2649 */
2650 if (err != -EAGAIN) {
a2e27255 2651 /*
34b88a68
ACM
2652 * ... or if recvmsg returns an error after we
2653 * received some datagrams, where we record the
2654 * error to return on the next call or if the
2655 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2656 */
34b88a68 2657 sock->sk->sk_err = -err;
a2e27255 2658 }
34b88a68
ACM
2659out_put:
2660 fput_light(sock->file, fput_needed);
a2e27255 2661
34b88a68 2662 return datagrams;
a2e27255
ACM
2663}
2664
e11d4284
AB
2665int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2666 unsigned int vlen, unsigned int flags,
2667 struct __kernel_timespec __user *timeout,
2668 struct old_timespec32 __user *timeout32)
a2e27255
ACM
2669{
2670 int datagrams;
c2e6c856 2671 struct timespec64 timeout_sys;
a2e27255 2672
e11d4284
AB
2673 if (timeout && get_timespec64(&timeout_sys, timeout))
2674 return -EFAULT;
a2e27255 2675
e11d4284 2676 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2677 return -EFAULT;
2678
e11d4284
AB
2679 if (!timeout && !timeout32)
2680 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2681
2682 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 2683
e11d4284
AB
2684 if (datagrams <= 0)
2685 return datagrams;
2686
2687 if (timeout && put_timespec64(&timeout_sys, timeout))
2688 datagrams = -EFAULT;
2689
2690 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2691 datagrams = -EFAULT;
2692
2693 return datagrams;
2694}
2695
1255e269
DB
2696SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2697 unsigned int, vlen, unsigned int, flags,
c2e6c856 2698 struct __kernel_timespec __user *, timeout)
1255e269 2699{
e11d4284
AB
2700 if (flags & MSG_CMSG_COMPAT)
2701 return -EINVAL;
2702
2703 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
2704}
2705
2706#ifdef CONFIG_COMPAT_32BIT_TIME
2707SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2708 unsigned int, vlen, unsigned int, flags,
2709 struct old_timespec32 __user *, timeout)
2710{
2711 if (flags & MSG_CMSG_COMPAT)
2712 return -EINVAL;
2713
2714 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 2715}
e11d4284 2716#endif
1255e269 2717
a2e27255 2718#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2719/* Argument list sizes for sys_socketcall */
2720#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2721static const unsigned char nargs[21] = {
c6d409cf
ED
2722 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2723 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2724 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2725 AL(4), AL(5), AL(4)
89bddce5
SH
2726};
2727
1da177e4
LT
2728#undef AL
2729
2730/*
89bddce5 2731 * System call vectors.
1da177e4
LT
2732 *
2733 * Argument checking cleaned up. Saved 20% in size.
2734 * This function doesn't need to set the kernel lock because
89bddce5 2735 * it is set by the callees.
1da177e4
LT
2736 */
2737
3e0fa65f 2738SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2739{
2950fa9d 2740 unsigned long a[AUDITSC_ARGS];
89bddce5 2741 unsigned long a0, a1;
1da177e4 2742 int err;
47379052 2743 unsigned int len;
1da177e4 2744
228e548e 2745 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2746 return -EINVAL;
c8e8cd57 2747 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2748
47379052
AV
2749 len = nargs[call];
2750 if (len > sizeof(a))
2751 return -EINVAL;
2752
1da177e4 2753 /* copy_from_user should be SMP safe. */
47379052 2754 if (copy_from_user(a, args, len))
1da177e4 2755 return -EFAULT;
3ec3b2fb 2756
2950fa9d
CG
2757 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2758 if (err)
2759 return err;
3ec3b2fb 2760
89bddce5
SH
2761 a0 = a[0];
2762 a1 = a[1];
2763
2764 switch (call) {
2765 case SYS_SOCKET:
9d6a15c3 2766 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
2767 break;
2768 case SYS_BIND:
a87d35d8 2769 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2770 break;
2771 case SYS_CONNECT:
1387c2c2 2772 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2773 break;
2774 case SYS_LISTEN:
25e290ee 2775 err = __sys_listen(a0, a1);
89bddce5
SH
2776 break;
2777 case SYS_ACCEPT:
4541e805
DB
2778 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2779 (int __user *)a[2], 0);
89bddce5
SH
2780 break;
2781 case SYS_GETSOCKNAME:
2782 err =
8882a107
DB
2783 __sys_getsockname(a0, (struct sockaddr __user *)a1,
2784 (int __user *)a[2]);
89bddce5
SH
2785 break;
2786 case SYS_GETPEERNAME:
2787 err =
b21c8f83
DB
2788 __sys_getpeername(a0, (struct sockaddr __user *)a1,
2789 (int __user *)a[2]);
89bddce5
SH
2790 break;
2791 case SYS_SOCKETPAIR:
6debc8d8 2792 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
2793 break;
2794 case SYS_SEND:
f3bf896b
DB
2795 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2796 NULL, 0);
89bddce5
SH
2797 break;
2798 case SYS_SENDTO:
211b634b
DB
2799 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2800 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
2801 break;
2802 case SYS_RECV:
d27e9afc
DB
2803 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2804 NULL, NULL);
89bddce5
SH
2805 break;
2806 case SYS_RECVFROM:
7a09e1eb
DB
2807 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2808 (struct sockaddr __user *)a[4],
2809 (int __user *)a[5]);
89bddce5
SH
2810 break;
2811 case SYS_SHUTDOWN:
005a1aea 2812 err = __sys_shutdown(a0, a1);
89bddce5
SH
2813 break;
2814 case SYS_SETSOCKOPT:
cc36dca0
DB
2815 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
2816 a[4]);
89bddce5
SH
2817 break;
2818 case SYS_GETSOCKOPT:
2819 err =
13a2d70e
DB
2820 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2821 (int __user *)a[4]);
89bddce5
SH
2822 break;
2823 case SYS_SENDMSG:
e1834a32
DB
2824 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
2825 a[2], true);
89bddce5 2826 break;
228e548e 2827 case SYS_SENDMMSG:
e1834a32
DB
2828 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2829 a[3], true);
228e548e 2830 break;
89bddce5 2831 case SYS_RECVMSG:
e1834a32
DB
2832 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
2833 a[2], true);
89bddce5 2834 break;
a2e27255 2835 case SYS_RECVMMSG:
e11d4284
AB
2836 if (IS_ENABLED(CONFIG_64BIT) || !IS_ENABLED(CONFIG_64BIT_TIME))
2837 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2838 a[2], a[3],
2839 (struct __kernel_timespec __user *)a[4],
2840 NULL);
2841 else
2842 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2843 a[2], a[3], NULL,
2844 (struct old_timespec32 __user *)a[4]);
a2e27255 2845 break;
de11defe 2846 case SYS_ACCEPT4:
4541e805
DB
2847 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2848 (int __user *)a[2], a[3]);
aaca0bdc 2849 break;
89bddce5
SH
2850 default:
2851 err = -EINVAL;
2852 break;
1da177e4
LT
2853 }
2854 return err;
2855}
2856
89bddce5 2857#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2858
55737fda
SH
2859/**
2860 * sock_register - add a socket protocol handler
2861 * @ops: description of protocol
2862 *
1da177e4
LT
2863 * This function is called by a protocol handler that wants to
2864 * advertise its address family, and have it linked into the
e793c0f7 2865 * socket interface. The value ops->family corresponds to the
55737fda 2866 * socket system call protocol family.
1da177e4 2867 */
f0fd27d4 2868int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2869{
2870 int err;
2871
2872 if (ops->family >= NPROTO) {
3410f22e 2873 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2874 return -ENOBUFS;
2875 }
55737fda
SH
2876
2877 spin_lock(&net_family_lock);
190683a9
ED
2878 if (rcu_dereference_protected(net_families[ops->family],
2879 lockdep_is_held(&net_family_lock)))
55737fda
SH
2880 err = -EEXIST;
2881 else {
cf778b00 2882 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2883 err = 0;
2884 }
55737fda
SH
2885 spin_unlock(&net_family_lock);
2886
3410f22e 2887 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2888 return err;
2889}
c6d409cf 2890EXPORT_SYMBOL(sock_register);
1da177e4 2891
55737fda
SH
2892/**
2893 * sock_unregister - remove a protocol handler
2894 * @family: protocol family to remove
2895 *
1da177e4
LT
2896 * This function is called by a protocol handler that wants to
2897 * remove its address family, and have it unlinked from the
55737fda
SH
2898 * new socket creation.
2899 *
2900 * If protocol handler is a module, then it can use module reference
2901 * counts to protect against new references. If protocol handler is not
2902 * a module then it needs to provide its own protection in
2903 * the ops->create routine.
1da177e4 2904 */
f0fd27d4 2905void sock_unregister(int family)
1da177e4 2906{
f0fd27d4 2907 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2908
55737fda 2909 spin_lock(&net_family_lock);
a9b3cd7f 2910 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2911 spin_unlock(&net_family_lock);
2912
2913 synchronize_rcu();
2914
3410f22e 2915 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2916}
c6d409cf 2917EXPORT_SYMBOL(sock_unregister);
1da177e4 2918
bf2ae2e4
XL
2919bool sock_is_registered(int family)
2920{
66b51b0a 2921 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
2922}
2923
77d76ea3 2924static int __init sock_init(void)
1da177e4 2925{
b3e19d92 2926 int err;
2ca794e5
EB
2927 /*
2928 * Initialize the network sysctl infrastructure.
2929 */
2930 err = net_sysctl_init();
2931 if (err)
2932 goto out;
b3e19d92 2933
1da177e4 2934 /*
89bddce5 2935 * Initialize skbuff SLAB cache
1da177e4
LT
2936 */
2937 skb_init();
1da177e4
LT
2938
2939 /*
89bddce5 2940 * Initialize the protocols module.
1da177e4
LT
2941 */
2942
2943 init_inodecache();
b3e19d92
NP
2944
2945 err = register_filesystem(&sock_fs_type);
2946 if (err)
2947 goto out_fs;
1da177e4 2948 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2949 if (IS_ERR(sock_mnt)) {
2950 err = PTR_ERR(sock_mnt);
2951 goto out_mount;
2952 }
77d76ea3
AK
2953
2954 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2955 */
2956
2957#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2958 err = netfilter_init();
2959 if (err)
2960 goto out;
1da177e4 2961#endif
cbeb321a 2962
408eccce 2963 ptp_classifier_init();
c1f19b51 2964
b3e19d92
NP
2965out:
2966 return err;
2967
2968out_mount:
2969 unregister_filesystem(&sock_fs_type);
2970out_fs:
2971 goto out;
1da177e4
LT
2972}
2973
77d76ea3
AK
2974core_initcall(sock_init); /* early initcall */
2975
1da177e4
LT
2976#ifdef CONFIG_PROC_FS
2977void socket_seq_show(struct seq_file *seq)
2978{
648845ab
TZ
2979 seq_printf(seq, "sockets: used %d\n",
2980 sock_inuse_get(seq->private));
1da177e4 2981}
89bddce5 2982#endif /* CONFIG_PROC_FS */
1da177e4 2983
89bbfc95 2984#ifdef CONFIG_COMPAT
36fd633e 2985static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2986{
6b96018b 2987 struct compat_ifconf ifc32;
7a229387 2988 struct ifconf ifc;
7a229387
AB
2989 int err;
2990
6b96018b 2991 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2992 return -EFAULT;
2993
36fd633e
AV
2994 ifc.ifc_len = ifc32.ifc_len;
2995 ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
7a229387 2996
36fd633e
AV
2997 rtnl_lock();
2998 err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
2999 rtnl_unlock();
7a229387
AB
3000 if (err)
3001 return err;
3002
36fd633e 3003 ifc32.ifc_len = ifc.ifc_len;
6b96018b 3004 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
3005 return -EFAULT;
3006
3007 return 0;
3008}
3009
6b96018b 3010static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 3011{
3a7da39d
BH
3012 struct compat_ethtool_rxnfc __user *compat_rxnfc;
3013 bool convert_in = false, convert_out = false;
44c02a2c
AV
3014 size_t buf_size = 0;
3015 struct ethtool_rxnfc __user *rxnfc = NULL;
3016 struct ifreq ifr;
3a7da39d
BH
3017 u32 rule_cnt = 0, actual_rule_cnt;
3018 u32 ethcmd;
7a229387 3019 u32 data;
3a7da39d 3020 int ret;
7a229387 3021
3a7da39d
BH
3022 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
3023 return -EFAULT;
7a229387 3024
3a7da39d
BH
3025 compat_rxnfc = compat_ptr(data);
3026
3027 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
3028 return -EFAULT;
3029
3a7da39d
BH
3030 /* Most ethtool structures are defined without padding.
3031 * Unfortunately struct ethtool_rxnfc is an exception.
3032 */
3033 switch (ethcmd) {
3034 default:
3035 break;
3036 case ETHTOOL_GRXCLSRLALL:
3037 /* Buffer size is variable */
3038 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
3039 return -EFAULT;
3040 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
3041 return -ENOMEM;
3042 buf_size += rule_cnt * sizeof(u32);
3043 /* fall through */
3044 case ETHTOOL_GRXRINGS:
3045 case ETHTOOL_GRXCLSRLCNT:
3046 case ETHTOOL_GRXCLSRULE:
55664f32 3047 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
3048 convert_out = true;
3049 /* fall through */
3050 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
3051 buf_size += sizeof(struct ethtool_rxnfc);
3052 convert_in = true;
44c02a2c 3053 rxnfc = compat_alloc_user_space(buf_size);
3a7da39d
BH
3054 break;
3055 }
3056
44c02a2c 3057 if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
3058 return -EFAULT;
3059
44c02a2c 3060 ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
7a229387 3061
3a7da39d 3062 if (convert_in) {
127fe533 3063 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
3064 * fs.ring_cookie and at the end of fs, but nowhere else.
3065 */
127fe533
AD
3066 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
3067 sizeof(compat_rxnfc->fs.m_ext) !=
3068 offsetof(struct ethtool_rxnfc, fs.m_ext) +
3069 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
3070 BUILD_BUG_ON(
3071 offsetof(struct compat_ethtool_rxnfc, fs.location) -
3072 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
3073 offsetof(struct ethtool_rxnfc, fs.location) -
3074 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
3075
3076 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
3077 (void __user *)(&rxnfc->fs.m_ext + 1) -
3078 (void __user *)rxnfc) ||
3a7da39d
BH
3079 copy_in_user(&rxnfc->fs.ring_cookie,
3080 &compat_rxnfc->fs.ring_cookie,
954b1244 3081 (void __user *)(&rxnfc->fs.location + 1) -
b6168562
WW
3082 (void __user *)&rxnfc->fs.ring_cookie))
3083 return -EFAULT;
3084 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3085 if (put_user(rule_cnt, &rxnfc->rule_cnt))
3086 return -EFAULT;
3087 } else if (copy_in_user(&rxnfc->rule_cnt,
3088 &compat_rxnfc->rule_cnt,
3089 sizeof(rxnfc->rule_cnt)))
3a7da39d
BH
3090 return -EFAULT;
3091 }
3092
44c02a2c 3093 ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
3a7da39d
BH
3094 if (ret)
3095 return ret;
3096
3097 if (convert_out) {
3098 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
3099 (const void __user *)(&rxnfc->fs.m_ext + 1) -
3100 (const void __user *)rxnfc) ||
3a7da39d
BH
3101 copy_in_user(&compat_rxnfc->fs.ring_cookie,
3102 &rxnfc->fs.ring_cookie,
954b1244
SH
3103 (const void __user *)(&rxnfc->fs.location + 1) -
3104 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
3105 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
3106 sizeof(rxnfc->rule_cnt)))
3107 return -EFAULT;
3108
3109 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3110 /* As an optimisation, we only copy the actual
3111 * number of rules that the underlying
3112 * function returned. Since Mallory might
3113 * change the rule count in user memory, we
3114 * check that it is less than the rule count
3115 * originally given (as the user buffer size),
3116 * which has been range-checked.
3117 */
3118 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
3119 return -EFAULT;
3120 if (actual_rule_cnt < rule_cnt)
3121 rule_cnt = actual_rule_cnt;
3122 if (copy_in_user(&compat_rxnfc->rule_locs[0],
3123 &rxnfc->rule_locs[0],
3124 rule_cnt * sizeof(u32)))
3125 return -EFAULT;
3126 }
3127 }
3128
3129 return 0;
7a229387
AB
3130}
3131
7a50a240
AB
3132static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3133{
7a50a240 3134 compat_uptr_t uptr32;
44c02a2c
AV
3135 struct ifreq ifr;
3136 void __user *saved;
3137 int err;
7a50a240 3138
44c02a2c 3139 if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
7a50a240
AB
3140 return -EFAULT;
3141
3142 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3143 return -EFAULT;
3144
44c02a2c
AV
3145 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3146 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 3147
44c02a2c
AV
3148 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
3149 if (!err) {
3150 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
3151 if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
3152 err = -EFAULT;
ccbd6a5a 3153 }
44c02a2c 3154 return err;
7a229387
AB
3155}
3156
590d4693
BH
3157/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3158static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3159 struct compat_ifreq __user *u_ifreq32)
7a229387 3160{
44c02a2c 3161 struct ifreq ifreq;
7a229387
AB
3162 u32 data32;
3163
44c02a2c 3164 if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
7a229387 3165 return -EFAULT;
44c02a2c 3166 if (get_user(data32, &u_ifreq32->ifr_data))
7a229387 3167 return -EFAULT;
44c02a2c 3168 ifreq.ifr_data = compat_ptr(data32);
7a229387 3169
44c02a2c 3170 return dev_ioctl(net, cmd, &ifreq, NULL);
7a229387
AB
3171}
3172
37ac39bd
JB
3173static int compat_ifreq_ioctl(struct net *net, struct socket *sock,
3174 unsigned int cmd,
3175 struct compat_ifreq __user *uifr32)
3176{
3177 struct ifreq __user *uifr;
3178 int err;
3179
3180 /* Handle the fact that while struct ifreq has the same *layout* on
3181 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3182 * which are handled elsewhere, it still has different *size* due to
3183 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3184 * resulting in struct ifreq being 32 and 40 bytes respectively).
3185 * As a result, if the struct happens to be at the end of a page and
3186 * the next page isn't readable/writable, we get a fault. To prevent
3187 * that, copy back and forth to the full size.
3188 */
3189
3190 uifr = compat_alloc_user_space(sizeof(*uifr));
3191 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3192 return -EFAULT;
3193
3194 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3195
3196 if (!err) {
3197 switch (cmd) {
3198 case SIOCGIFFLAGS:
3199 case SIOCGIFMETRIC:
3200 case SIOCGIFMTU:
3201 case SIOCGIFMEM:
3202 case SIOCGIFHWADDR:
3203 case SIOCGIFINDEX:
3204 case SIOCGIFADDR:
3205 case SIOCGIFBRDADDR:
3206 case SIOCGIFDSTADDR:
3207 case SIOCGIFNETMASK:
3208 case SIOCGIFPFLAGS:
3209 case SIOCGIFTXQLEN:
3210 case SIOCGMIIPHY:
3211 case SIOCGMIIREG:
c6c9fee3 3212 case SIOCGIFNAME:
37ac39bd
JB
3213 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
3214 err = -EFAULT;
3215 break;
3216 }
3217 }
3218 return err;
3219}
3220
a2116ed2
AB
3221static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3222 struct compat_ifreq __user *uifr32)
3223{
3224 struct ifreq ifr;
3225 struct compat_ifmap __user *uifmap32;
a2116ed2
AB
3226 int err;
3227
3228 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3229 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3230 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3231 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3232 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3233 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3234 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3235 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3236 if (err)
3237 return -EFAULT;
3238
44c02a2c 3239 err = dev_ioctl(net, cmd, &ifr, NULL);
a2116ed2
AB
3240
3241 if (cmd == SIOCGIFMAP && !err) {
3242 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3243 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3244 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3245 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3246 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3247 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3248 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3249 if (err)
3250 err = -EFAULT;
3251 }
3252 return err;
3253}
3254
7a229387 3255struct rtentry32 {
c6d409cf 3256 u32 rt_pad1;
7a229387
AB
3257 struct sockaddr rt_dst; /* target address */
3258 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3259 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3260 unsigned short rt_flags;
3261 short rt_pad2;
3262 u32 rt_pad3;
3263 unsigned char rt_tos;
3264 unsigned char rt_class;
3265 short rt_pad4;
3266 short rt_metric; /* +1 for binary compatibility! */
7a229387 3267 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3268 u32 rt_mtu; /* per route MTU/Window */
3269 u32 rt_window; /* Window clamping */
7a229387
AB
3270 unsigned short rt_irtt; /* Initial RTT */
3271};
3272
3273struct in6_rtmsg32 {
3274 struct in6_addr rtmsg_dst;
3275 struct in6_addr rtmsg_src;
3276 struct in6_addr rtmsg_gateway;
3277 u32 rtmsg_type;
3278 u16 rtmsg_dst_len;
3279 u16 rtmsg_src_len;
3280 u32 rtmsg_metric;
3281 u32 rtmsg_info;
3282 u32 rtmsg_flags;
3283 s32 rtmsg_ifindex;
3284};
3285
6b96018b
AB
3286static int routing_ioctl(struct net *net, struct socket *sock,
3287 unsigned int cmd, void __user *argp)
7a229387
AB
3288{
3289 int ret;
3290 void *r = NULL;
3291 struct in6_rtmsg r6;
3292 struct rtentry r4;
3293 char devname[16];
3294 u32 rtdev;
3295 mm_segment_t old_fs = get_fs();
3296
6b96018b
AB
3297 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3298 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3299 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3300 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3301 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3302 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3303 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3304 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3305 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3306 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3307 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3308
3309 r = (void *) &r6;
3310 } else { /* ipv4 */
6b96018b 3311 struct rtentry32 __user *ur4 = argp;
c6d409cf 3312 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3313 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3314 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3315 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3316 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3317 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3318 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3319 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3320 if (rtdev) {
c6d409cf 3321 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3322 r4.rt_dev = (char __user __force *)devname;
3323 devname[15] = 0;
7a229387
AB
3324 } else
3325 r4.rt_dev = NULL;
3326
3327 r = (void *) &r4;
3328 }
3329
3330 if (ret) {
3331 ret = -EFAULT;
3332 goto out;
3333 }
3334
c6d409cf 3335 set_fs(KERNEL_DS);
63ff03ab 3336 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3337 set_fs(old_fs);
7a229387
AB
3338
3339out:
7a229387
AB
3340 return ret;
3341}
3342
3343/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3344 * for some operations; this forces use of the newer bridge-utils that
25985edc 3345 * use compatible ioctls
7a229387 3346 */
6b96018b 3347static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3348{
6b96018b 3349 compat_ulong_t tmp;
7a229387 3350
6b96018b 3351 if (get_user(tmp, argp))
7a229387
AB
3352 return -EFAULT;
3353 if (tmp == BRCTL_GET_VERSION)
3354 return BRCTL_VERSION + 1;
3355 return -EINVAL;
3356}
3357
6b96018b
AB
3358static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3359 unsigned int cmd, unsigned long arg)
3360{
3361 void __user *argp = compat_ptr(arg);
3362 struct sock *sk = sock->sk;
3363 struct net *net = sock_net(sk);
7a229387 3364
6b96018b 3365 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3366 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3367
3368 switch (cmd) {
3369 case SIOCSIFBR:
3370 case SIOCGIFBR:
3371 return old_bridge_ioctl(argp);
6b96018b 3372 case SIOCGIFCONF:
36fd633e 3373 return compat_dev_ifconf(net, argp);
6b96018b
AB
3374 case SIOCETHTOOL:
3375 return ethtool_ioctl(net, argp);
7a50a240
AB
3376 case SIOCWANDEV:
3377 return compat_siocwandev(net, argp);
a2116ed2
AB
3378 case SIOCGIFMAP:
3379 case SIOCSIFMAP:
3380 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3381 case SIOCADDRT:
3382 case SIOCDELRT:
3383 return routing_ioctl(net, sock, cmd, argp);
0768e170
AB
3384 case SIOCGSTAMP_OLD:
3385 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
3386 if (!sock->ops->gettstamp)
3387 return -ENOIOCTLCMD;
0768e170 3388 return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
c7cbdbf2
AB
3389 !COMPAT_USE_64BIT_TIME);
3390
590d4693
BH
3391 case SIOCBONDSLAVEINFOQUERY:
3392 case SIOCBONDINFOQUERY:
a2116ed2 3393 case SIOCSHWTSTAMP:
fd468c74 3394 case SIOCGHWTSTAMP:
590d4693 3395 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3396
3397 case FIOSETOWN:
3398 case SIOCSPGRP:
3399 case FIOGETOWN:
3400 case SIOCGPGRP:
3401 case SIOCBRADDBR:
3402 case SIOCBRDELBR:
3403 case SIOCGIFVLAN:
3404 case SIOCSIFVLAN:
3405 case SIOCADDDLCI:
3406 case SIOCDELDLCI:
c62cce2c 3407 case SIOCGSKNS:
0768e170
AB
3408 case SIOCGSTAMP_NEW:
3409 case SIOCGSTAMPNS_NEW:
6b96018b
AB
3410 return sock_ioctl(file, cmd, arg);
3411
3412 case SIOCGIFFLAGS:
3413 case SIOCSIFFLAGS:
3414 case SIOCGIFMETRIC:
3415 case SIOCSIFMETRIC:
3416 case SIOCGIFMTU:
3417 case SIOCSIFMTU:
3418 case SIOCGIFMEM:
3419 case SIOCSIFMEM:
3420 case SIOCGIFHWADDR:
3421 case SIOCSIFHWADDR:
3422 case SIOCADDMULTI:
3423 case SIOCDELMULTI:
3424 case SIOCGIFINDEX:
6b96018b
AB
3425 case SIOCGIFADDR:
3426 case SIOCSIFADDR:
3427 case SIOCSIFHWBROADCAST:
6b96018b 3428 case SIOCDIFADDR:
6b96018b
AB
3429 case SIOCGIFBRDADDR:
3430 case SIOCSIFBRDADDR:
3431 case SIOCGIFDSTADDR:
3432 case SIOCSIFDSTADDR:
3433 case SIOCGIFNETMASK:
3434 case SIOCSIFNETMASK:
3435 case SIOCSIFPFLAGS:
3436 case SIOCGIFPFLAGS:
3437 case SIOCGIFTXQLEN:
3438 case SIOCSIFTXQLEN:
3439 case SIOCBRADDIF:
3440 case SIOCBRDELIF:
c6c9fee3 3441 case SIOCGIFNAME:
9177efd3
AB
3442 case SIOCSIFNAME:
3443 case SIOCGMIIPHY:
3444 case SIOCGMIIREG:
3445 case SIOCSMIIREG:
f92d4fc9
AV
3446 case SIOCBONDENSLAVE:
3447 case SIOCBONDRELEASE:
3448 case SIOCBONDSETHWADDR:
3449 case SIOCBONDCHANGEACTIVE:
37ac39bd
JB
3450 return compat_ifreq_ioctl(net, sock, cmd, argp);
3451
6b96018b
AB
3452 case SIOCSARP:
3453 case SIOCGARP:
3454 case SIOCDARP:
6b96018b 3455 case SIOCATMARK:
63ff03ab 3456 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3457 }
3458
6b96018b
AB
3459 return -ENOIOCTLCMD;
3460}
7a229387 3461
95c96174 3462static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3463 unsigned long arg)
89bbfc95
SP
3464{
3465 struct socket *sock = file->private_data;
3466 int ret = -ENOIOCTLCMD;
87de87d5
DM
3467 struct sock *sk;
3468 struct net *net;
3469
3470 sk = sock->sk;
3471 net = sock_net(sk);
89bbfc95
SP
3472
3473 if (sock->ops->compat_ioctl)
3474 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3475
87de87d5
DM
3476 if (ret == -ENOIOCTLCMD &&
3477 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3478 ret = compat_wext_handle_ioctl(net, cmd, arg);
3479
6b96018b
AB
3480 if (ret == -ENOIOCTLCMD)
3481 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3482
89bbfc95
SP
3483 return ret;
3484}
3485#endif
3486
8a3c245c
PT
3487/**
3488 * kernel_bind - bind an address to a socket (kernel space)
3489 * @sock: socket
3490 * @addr: address
3491 * @addrlen: length of address
3492 *
3493 * Returns 0 or an error.
3494 */
3495
ac5a488e
SS
3496int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3497{
3498 return sock->ops->bind(sock, addr, addrlen);
3499}
c6d409cf 3500EXPORT_SYMBOL(kernel_bind);
ac5a488e 3501
8a3c245c
PT
3502/**
3503 * kernel_listen - move socket to listening state (kernel space)
3504 * @sock: socket
3505 * @backlog: pending connections queue size
3506 *
3507 * Returns 0 or an error.
3508 */
3509
ac5a488e
SS
3510int kernel_listen(struct socket *sock, int backlog)
3511{
3512 return sock->ops->listen(sock, backlog);
3513}
c6d409cf 3514EXPORT_SYMBOL(kernel_listen);
ac5a488e 3515
8a3c245c
PT
3516/**
3517 * kernel_accept - accept a connection (kernel space)
3518 * @sock: listening socket
3519 * @newsock: new connected socket
3520 * @flags: flags
3521 *
3522 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3523 * If it fails, @newsock is guaranteed to be %NULL.
3524 * Returns 0 or an error.
3525 */
3526
ac5a488e
SS
3527int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3528{
3529 struct sock *sk = sock->sk;
3530 int err;
3531
3532 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3533 newsock);
3534 if (err < 0)
3535 goto done;
3536
cdfbabfb 3537 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3538 if (err < 0) {
3539 sock_release(*newsock);
fa8705b0 3540 *newsock = NULL;
ac5a488e
SS
3541 goto done;
3542 }
3543
3544 (*newsock)->ops = sock->ops;
1b08534e 3545 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3546
3547done:
3548 return err;
3549}
c6d409cf 3550EXPORT_SYMBOL(kernel_accept);
ac5a488e 3551
8a3c245c
PT
3552/**
3553 * kernel_connect - connect a socket (kernel space)
3554 * @sock: socket
3555 * @addr: address
3556 * @addrlen: address length
3557 * @flags: flags (O_NONBLOCK, ...)
3558 *
3559 * For datagram sockets, @addr is the addres to which datagrams are sent
3560 * by default, and the only address from which datagrams are received.
3561 * For stream sockets, attempts to connect to @addr.
3562 * Returns 0 or an error code.
3563 */
3564
ac5a488e 3565int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3566 int flags)
ac5a488e
SS
3567{
3568 return sock->ops->connect(sock, addr, addrlen, flags);
3569}
c6d409cf 3570EXPORT_SYMBOL(kernel_connect);
ac5a488e 3571
8a3c245c
PT
3572/**
3573 * kernel_getsockname - get the address which the socket is bound (kernel space)
3574 * @sock: socket
3575 * @addr: address holder
3576 *
3577 * Fills the @addr pointer with the address which the socket is bound.
3578 * Returns 0 or an error code.
3579 */
3580
9b2c45d4 3581int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3582{
9b2c45d4 3583 return sock->ops->getname(sock, addr, 0);
ac5a488e 3584}
c6d409cf 3585EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3586
8a3c245c
PT
3587/**
3588 * kernel_peername - get the address which the socket is connected (kernel space)
3589 * @sock: socket
3590 * @addr: address holder
3591 *
3592 * Fills the @addr pointer with the address which the socket is connected.
3593 * Returns 0 or an error code.
3594 */
3595
9b2c45d4 3596int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3597{
9b2c45d4 3598 return sock->ops->getname(sock, addr, 1);
ac5a488e 3599}
c6d409cf 3600EXPORT_SYMBOL(kernel_getpeername);
ac5a488e 3601
8a3c245c
PT
3602/**
3603 * kernel_getsockopt - get a socket option (kernel space)
3604 * @sock: socket
3605 * @level: API level (SOL_SOCKET, ...)
3606 * @optname: option tag
3607 * @optval: option value
3608 * @optlen: option length
3609 *
3610 * Assigns the option length to @optlen.
3611 * Returns 0 or an error.
3612 */
3613
ac5a488e
SS
3614int kernel_getsockopt(struct socket *sock, int level, int optname,
3615 char *optval, int *optlen)
3616{
3617 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3618 char __user *uoptval;
3619 int __user *uoptlen;
ac5a488e
SS
3620 int err;
3621
fb8621bb
NK
3622 uoptval = (char __user __force *) optval;
3623 uoptlen = (int __user __force *) optlen;
3624
ac5a488e
SS
3625 set_fs(KERNEL_DS);
3626 if (level == SOL_SOCKET)
fb8621bb 3627 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3628 else
fb8621bb
NK
3629 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3630 uoptlen);
ac5a488e
SS
3631 set_fs(oldfs);
3632 return err;
3633}
c6d409cf 3634EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e 3635
8a3c245c
PT
3636/**
3637 * kernel_setsockopt - set a socket option (kernel space)
3638 * @sock: socket
3639 * @level: API level (SOL_SOCKET, ...)
3640 * @optname: option tag
3641 * @optval: option value
3642 * @optlen: option length
3643 *
3644 * Returns 0 or an error.
3645 */
3646
ac5a488e 3647int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3648 char *optval, unsigned int optlen)
ac5a488e
SS
3649{
3650 mm_segment_t oldfs = get_fs();
fb8621bb 3651 char __user *uoptval;
ac5a488e
SS
3652 int err;
3653
fb8621bb
NK
3654 uoptval = (char __user __force *) optval;
3655
ac5a488e
SS
3656 set_fs(KERNEL_DS);
3657 if (level == SOL_SOCKET)
fb8621bb 3658 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3659 else
fb8621bb 3660 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3661 optlen);
3662 set_fs(oldfs);
3663 return err;
3664}
c6d409cf 3665EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e 3666
8a3c245c
PT
3667/**
3668 * kernel_sendpage - send a &page through a socket (kernel space)
3669 * @sock: socket
3670 * @page: page
3671 * @offset: page offset
3672 * @size: total size in bytes
3673 * @flags: flags (MSG_DONTWAIT, ...)
3674 *
3675 * Returns the total amount sent in bytes or an error.
3676 */
3677
ac5a488e
SS
3678int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3679 size_t size, int flags)
3680{
3681 if (sock->ops->sendpage)
3682 return sock->ops->sendpage(sock, page, offset, size, flags);
3683
3684 return sock_no_sendpage(sock, page, offset, size, flags);
3685}
c6d409cf 3686EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3687
8a3c245c
PT
3688/**
3689 * kernel_sendpage_locked - send a &page through the locked sock (kernel space)
3690 * @sk: sock
3691 * @page: page
3692 * @offset: page offset
3693 * @size: total size in bytes
3694 * @flags: flags (MSG_DONTWAIT, ...)
3695 *
3696 * Returns the total amount sent in bytes or an error.
3697 * Caller must hold @sk.
3698 */
3699
306b13eb
TH
3700int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3701 size_t size, int flags)
3702{
3703 struct socket *sock = sk->sk_socket;
3704
3705 if (sock->ops->sendpage_locked)
3706 return sock->ops->sendpage_locked(sk, page, offset, size,
3707 flags);
3708
3709 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3710}
3711EXPORT_SYMBOL(kernel_sendpage_locked);
3712
8a3c245c
PT
3713/**
3714 * kernel_shutdown - shut down part of a full-duplex connection (kernel space)
3715 * @sock: socket
3716 * @how: connection part
3717 *
3718 * Returns 0 or an error.
3719 */
3720
91cf45f0
TM
3721int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3722{
3723 return sock->ops->shutdown(sock, how);
3724}
91cf45f0 3725EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075 3726
8a3c245c
PT
3727/**
3728 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3729 * @sk: socket
3730 *
3731 * This routine returns the IP overhead imposed by a socket i.e.
3732 * the length of the underlying IP header, depending on whether
3733 * this is an IPv4 or IPv6 socket and the length from IP options turned
3734 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075 3735 */
8a3c245c 3736
113c3075
P
3737u32 kernel_sock_ip_overhead(struct sock *sk)
3738{
3739 struct inet_sock *inet;
3740 struct ip_options_rcu *opt;
3741 u32 overhead = 0;
113c3075
P
3742#if IS_ENABLED(CONFIG_IPV6)
3743 struct ipv6_pinfo *np;
3744 struct ipv6_txoptions *optv6 = NULL;
3745#endif /* IS_ENABLED(CONFIG_IPV6) */
3746
3747 if (!sk)
3748 return overhead;
3749
113c3075
P
3750 switch (sk->sk_family) {
3751 case AF_INET:
3752 inet = inet_sk(sk);
3753 overhead += sizeof(struct iphdr);
3754 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3755 sock_owned_by_user(sk));
113c3075
P
3756 if (opt)
3757 overhead += opt->opt.optlen;
3758 return overhead;
3759#if IS_ENABLED(CONFIG_IPV6)
3760 case AF_INET6:
3761 np = inet6_sk(sk);
3762 overhead += sizeof(struct ipv6hdr);
3763 if (np)
3764 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3765 sock_owned_by_user(sk));
113c3075
P
3766 if (optv6)
3767 overhead += (optv6->opt_flen + optv6->opt_nflen);
3768 return overhead;
3769#endif /* IS_ENABLED(CONFIG_IPV6) */
3770 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3771 return overhead;
3772 }
3773}
3774EXPORT_SYMBOL(kernel_sock_ip_overhead);