vfs: Convert sockfs to use the new mount API
[linux-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4 81#include <linux/mount.h>
fba9be49 82#include <linux/pseudo_fs.h>
1da177e4
LT
83#include <linux/security.h>
84#include <linux/syscalls.h>
85#include <linux/compat.h>
86#include <linux/kmod.h>
3ec3b2fb 87#include <linux/audit.h>
d86b5e0e 88#include <linux/wireless.h>
1b8d7ae4 89#include <linux/nsproxy.h>
1fd7317d 90#include <linux/magic.h>
5a0e3ad6 91#include <linux/slab.h>
600e1779 92#include <linux/xattr.h>
c8e8cd57 93#include <linux/nospec.h>
8c3c447b 94#include <linux/indirect_call_wrapper.h>
1da177e4 95
7c0f6ba6 96#include <linux/uaccess.h>
1da177e4
LT
97#include <asm/unistd.h>
98
99#include <net/compat.h>
87de87d5 100#include <net/wext.h>
f8451725 101#include <net/cls_cgroup.h>
1da177e4
LT
102
103#include <net/sock.h>
104#include <linux/netfilter.h>
105
6b96018b
AB
106#include <linux/if_tun.h>
107#include <linux/ipv6_route.h>
108#include <linux/route.h>
6b96018b 109#include <linux/sockios.h>
076bb0c8 110#include <net/busy_poll.h>
f24b9be5 111#include <linux/errqueue.h>
06021292 112
8c3c447b
PA
113/* proto_ops for ipv4 and ipv6 use the same {recv,send}msg function */
114#if IS_ENABLED(CONFIG_INET)
115#define INDIRECT_CALL_INET4(f, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__)
116#else
117#define INDIRECT_CALL_INET4(f, f1, ...) f(__VA_ARGS__)
118#endif
119
e0d1095a 120#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
121unsigned int sysctl_net_busy_read __read_mostly;
122unsigned int sysctl_net_busy_poll __read_mostly;
06021292 123#endif
6b96018b 124
8ae5e030
AV
125static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
126static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 127static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
128
129static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
130static __poll_t sock_poll(struct file *file,
131 struct poll_table_struct *wait);
89bddce5 132static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
133#ifdef CONFIG_COMPAT
134static long compat_sock_ioctl(struct file *file,
89bddce5 135 unsigned int cmd, unsigned long arg);
89bbfc95 136#endif
1da177e4 137static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
138static ssize_t sock_sendpage(struct file *file, struct page *page,
139 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 140static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 141 struct pipe_inode_info *pipe, size_t len,
9c55e01c 142 unsigned int flags);
1da177e4 143
1da177e4
LT
144/*
145 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
146 * in the operation structures but are done directly via the socketcall() multiplexor.
147 */
148
da7071d7 149static const struct file_operations socket_file_ops = {
1da177e4
LT
150 .owner = THIS_MODULE,
151 .llseek = no_llseek,
8ae5e030
AV
152 .read_iter = sock_read_iter,
153 .write_iter = sock_write_iter,
1da177e4
LT
154 .poll = sock_poll,
155 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
156#ifdef CONFIG_COMPAT
157 .compat_ioctl = compat_sock_ioctl,
158#endif
1da177e4 159 .mmap = sock_mmap,
1da177e4
LT
160 .release = sock_close,
161 .fasync = sock_fasync,
5274f052
JA
162 .sendpage = sock_sendpage,
163 .splice_write = generic_splice_sendpage,
9c55e01c 164 .splice_read = sock_splice_read,
1da177e4
LT
165};
166
167/*
168 * The protocol list. Each protocol is registered in here.
169 */
170
1da177e4 171static DEFINE_SPINLOCK(net_family_lock);
190683a9 172static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 173
1da177e4 174/*
89bddce5
SH
175 * Support routines.
176 * Move socket addresses back and forth across the kernel/user
177 * divide and look after the messy bits.
1da177e4
LT
178 */
179
1da177e4
LT
180/**
181 * move_addr_to_kernel - copy a socket address into kernel space
182 * @uaddr: Address in user space
183 * @kaddr: Address in kernel space
184 * @ulen: Length in user space
185 *
186 * The address is copied into kernel space. If the provided address is
187 * too long an error code of -EINVAL is returned. If the copy gives
188 * invalid addresses -EFAULT is returned. On a success 0 is returned.
189 */
190
43db362d 191int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 192{
230b1839 193 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 194 return -EINVAL;
89bddce5 195 if (ulen == 0)
1da177e4 196 return 0;
89bddce5 197 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 198 return -EFAULT;
3ec3b2fb 199 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
200}
201
202/**
203 * move_addr_to_user - copy an address to user space
204 * @kaddr: kernel space address
205 * @klen: length of address in kernel
206 * @uaddr: user space address
207 * @ulen: pointer to user length field
208 *
209 * The value pointed to by ulen on entry is the buffer length available.
210 * This is overwritten with the buffer space used. -EINVAL is returned
211 * if an overlong buffer is specified or a negative buffer size. -EFAULT
212 * is returned if either the buffer or the length field are not
213 * accessible.
214 * After copying the data up to the limit the user specifies, the true
215 * length of the data is written over the length limit the user
216 * specified. Zero is returned for a success.
217 */
89bddce5 218
43db362d 219static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 220 void __user *uaddr, int __user *ulen)
1da177e4
LT
221{
222 int err;
223 int len;
224
68c6beb3 225 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
226 err = get_user(len, ulen);
227 if (err)
1da177e4 228 return err;
89bddce5
SH
229 if (len > klen)
230 len = klen;
68c6beb3 231 if (len < 0)
1da177e4 232 return -EINVAL;
89bddce5 233 if (len) {
d6fe3945
SG
234 if (audit_sockaddr(klen, kaddr))
235 return -ENOMEM;
89bddce5 236 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
237 return -EFAULT;
238 }
239 /*
89bddce5
SH
240 * "fromlen shall refer to the value before truncation.."
241 * 1003.1g
1da177e4
LT
242 */
243 return __put_user(klen, ulen);
244}
245
08009a76 246static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
247
248static struct inode *sock_alloc_inode(struct super_block *sb)
249{
250 struct socket_alloc *ei;
eaefd110 251 struct socket_wq *wq;
89bddce5 252
e94b1766 253 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
254 if (!ei)
255 return NULL;
eaefd110
ED
256 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
257 if (!wq) {
43815482
ED
258 kmem_cache_free(sock_inode_cachep, ei);
259 return NULL;
260 }
eaefd110
ED
261 init_waitqueue_head(&wq->wait);
262 wq->fasync_list = NULL;
574aab1e 263 wq->flags = 0;
e6476c21 264 ei->socket.wq = wq;
89bddce5 265
1da177e4
LT
266 ei->socket.state = SS_UNCONNECTED;
267 ei->socket.flags = 0;
268 ei->socket.ops = NULL;
269 ei->socket.sk = NULL;
270 ei->socket.file = NULL;
1da177e4
LT
271
272 return &ei->vfs_inode;
273}
274
275static void sock_destroy_inode(struct inode *inode)
276{
43815482
ED
277 struct socket_alloc *ei;
278
279 ei = container_of(inode, struct socket_alloc, vfs_inode);
e6476c21 280 kfree_rcu(ei->socket.wq, rcu);
43815482 281 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
282}
283
51cc5068 284static void init_once(void *foo)
1da177e4 285{
89bddce5 286 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 287
a35afb83 288 inode_init_once(&ei->vfs_inode);
1da177e4 289}
89bddce5 290
1e911632 291static void init_inodecache(void)
1da177e4
LT
292{
293 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
294 sizeof(struct socket_alloc),
295 0,
296 (SLAB_HWCACHE_ALIGN |
297 SLAB_RECLAIM_ACCOUNT |
5d097056 298 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 299 init_once);
1e911632 300 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
301}
302
b87221de 303static const struct super_operations sockfs_ops = {
c6d409cf
ED
304 .alloc_inode = sock_alloc_inode,
305 .destroy_inode = sock_destroy_inode,
306 .statfs = simple_statfs,
1da177e4
LT
307};
308
c23fbb6b
ED
309/*
310 * sockfs_dname() is called from d_path().
311 */
312static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
313{
314 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 315 d_inode(dentry)->i_ino);
c23fbb6b
ED
316}
317
3ba13d17 318static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 319 .d_dname = sockfs_dname,
1da177e4
LT
320};
321
bba0bd31
AG
322static int sockfs_xattr_get(const struct xattr_handler *handler,
323 struct dentry *dentry, struct inode *inode,
324 const char *suffix, void *value, size_t size)
325{
326 if (value) {
327 if (dentry->d_name.len + 1 > size)
328 return -ERANGE;
329 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
330 }
331 return dentry->d_name.len + 1;
332}
333
334#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
335#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
336#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
337
338static const struct xattr_handler sockfs_xattr_handler = {
339 .name = XATTR_NAME_SOCKPROTONAME,
340 .get = sockfs_xattr_get,
341};
342
4a590153
AG
343static int sockfs_security_xattr_set(const struct xattr_handler *handler,
344 struct dentry *dentry, struct inode *inode,
345 const char *suffix, const void *value,
346 size_t size, int flags)
347{
348 /* Handled by LSM. */
349 return -EAGAIN;
350}
351
352static const struct xattr_handler sockfs_security_xattr_handler = {
353 .prefix = XATTR_SECURITY_PREFIX,
354 .set = sockfs_security_xattr_set,
355};
356
bba0bd31
AG
357static const struct xattr_handler *sockfs_xattr_handlers[] = {
358 &sockfs_xattr_handler,
4a590153 359 &sockfs_security_xattr_handler,
bba0bd31
AG
360 NULL
361};
362
fba9be49 363static int sockfs_init_fs_context(struct fs_context *fc)
c74a1cbb 364{
fba9be49
DH
365 struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
366 if (!ctx)
367 return -ENOMEM;
368 ctx->ops = &sockfs_ops;
369 ctx->dops = &sockfs_dentry_operations;
370 ctx->xattr = sockfs_xattr_handlers;
371 return 0;
c74a1cbb
AV
372}
373
374static struct vfsmount *sock_mnt __read_mostly;
375
376static struct file_system_type sock_fs_type = {
377 .name = "sockfs",
fba9be49 378 .init_fs_context = sockfs_init_fs_context,
c74a1cbb
AV
379 .kill_sb = kill_anon_super,
380};
381
1da177e4
LT
382/*
383 * Obtains the first available file descriptor and sets it up for use.
384 *
39d8c1b6
DM
385 * These functions create file structures and maps them to fd space
386 * of the current process. On success it returns file descriptor
1da177e4
LT
387 * and file struct implicitly stored in sock->file.
388 * Note that another thread may close file descriptor before we return
389 * from this function. We use the fact that now we do not refer
390 * to socket after mapping. If one day we will need it, this
391 * function will increment ref. count on file by 1.
392 *
393 * In any case returned fd MAY BE not valid!
394 * This race condition is unavoidable
395 * with shared fd spaces, we cannot solve it inside kernel,
396 * but we take care of internal coherence yet.
397 */
398
8a3c245c
PT
399/**
400 * sock_alloc_file - Bind a &socket to a &file
401 * @sock: socket
402 * @flags: file status flags
403 * @dname: protocol name
404 *
405 * Returns the &file bound with @sock, implicitly storing it
406 * in sock->file. If dname is %NULL, sets to "".
407 * On failure the return is a ERR pointer (see linux/err.h).
408 * This function uses GFP_KERNEL internally.
409 */
410
aab174f0 411struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 412{
7cbe66b6 413 struct file *file;
1da177e4 414
d93aa9d8
AV
415 if (!dname)
416 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 417
d93aa9d8
AV
418 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
419 O_RDWR | (flags & O_NONBLOCK),
420 &socket_file_ops);
b5ffe634 421 if (IS_ERR(file)) {
8e1611e2 422 sock_release(sock);
39b65252 423 return file;
cc3808f8
AV
424 }
425
426 sock->file = file;
39d8c1b6 427 file->private_data = sock;
28407630 428 return file;
39d8c1b6 429}
56b31d1c 430EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 431
56b31d1c 432static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
433{
434 struct file *newfile;
28407630 435 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
436 if (unlikely(fd < 0)) {
437 sock_release(sock);
28407630 438 return fd;
ce4bb04c 439 }
39d8c1b6 440
aab174f0 441 newfile = sock_alloc_file(sock, flags, NULL);
28407630 442 if (likely(!IS_ERR(newfile))) {
39d8c1b6 443 fd_install(fd, newfile);
28407630
AV
444 return fd;
445 }
7cbe66b6 446
28407630
AV
447 put_unused_fd(fd);
448 return PTR_ERR(newfile);
1da177e4
LT
449}
450
8a3c245c
PT
451/**
452 * sock_from_file - Return the &socket bounded to @file.
453 * @file: file
454 * @err: pointer to an error code return
455 *
456 * On failure returns %NULL and assigns -ENOTSOCK to @err.
457 */
458
406a3c63 459struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 460{
6cb153ca
BL
461 if (file->f_op == &socket_file_ops)
462 return file->private_data; /* set in sock_map_fd */
463
23bb80d2
ED
464 *err = -ENOTSOCK;
465 return NULL;
6cb153ca 466}
406a3c63 467EXPORT_SYMBOL(sock_from_file);
6cb153ca 468
1da177e4 469/**
c6d409cf 470 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
471 * @fd: file handle
472 * @err: pointer to an error code return
473 *
474 * The file handle passed in is locked and the socket it is bound
241c4667 475 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
476 * with a negative errno code and NULL is returned. The function checks
477 * for both invalid handles and passing a handle which is not a socket.
478 *
479 * On a success the socket object pointer is returned.
480 */
481
482struct socket *sockfd_lookup(int fd, int *err)
483{
484 struct file *file;
1da177e4
LT
485 struct socket *sock;
486
89bddce5
SH
487 file = fget(fd);
488 if (!file) {
1da177e4
LT
489 *err = -EBADF;
490 return NULL;
491 }
89bddce5 492
6cb153ca
BL
493 sock = sock_from_file(file, err);
494 if (!sock)
1da177e4 495 fput(file);
6cb153ca
BL
496 return sock;
497}
c6d409cf 498EXPORT_SYMBOL(sockfd_lookup);
1da177e4 499
6cb153ca
BL
500static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
501{
00e188ef 502 struct fd f = fdget(fd);
6cb153ca
BL
503 struct socket *sock;
504
3672558c 505 *err = -EBADF;
00e188ef
AV
506 if (f.file) {
507 sock = sock_from_file(f.file, err);
508 if (likely(sock)) {
509 *fput_needed = f.flags;
6cb153ca 510 return sock;
00e188ef
AV
511 }
512 fdput(f);
1da177e4 513 }
6cb153ca 514 return NULL;
1da177e4
LT
515}
516
600e1779
MY
517static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
518 size_t size)
519{
520 ssize_t len;
521 ssize_t used = 0;
522
c5ef6035 523 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
524 if (len < 0)
525 return len;
526 used += len;
527 if (buffer) {
528 if (size < used)
529 return -ERANGE;
530 buffer += len;
531 }
532
533 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
534 used += len;
535 if (buffer) {
536 if (size < used)
537 return -ERANGE;
538 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
539 buffer += len;
540 }
541
542 return used;
543}
544
dc647ec8 545static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
86741ec2
LC
546{
547 int err = simple_setattr(dentry, iattr);
548
e1a3a60a 549 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
550 struct socket *sock = SOCKET_I(d_inode(dentry));
551
6d8c50dc
CW
552 if (sock->sk)
553 sock->sk->sk_uid = iattr->ia_uid;
554 else
555 err = -ENOENT;
86741ec2
LC
556 }
557
558 return err;
559}
560
600e1779 561static const struct inode_operations sockfs_inode_ops = {
600e1779 562 .listxattr = sockfs_listxattr,
86741ec2 563 .setattr = sockfs_setattr,
600e1779
MY
564};
565
1da177e4 566/**
8a3c245c 567 * sock_alloc - allocate a socket
89bddce5 568 *
1da177e4
LT
569 * Allocate a new inode and socket object. The two are bound together
570 * and initialised. The socket is then returned. If we are out of inodes
8a3c245c 571 * NULL is returned. This functions uses GFP_KERNEL internally.
1da177e4
LT
572 */
573
f4a00aac 574struct socket *sock_alloc(void)
1da177e4 575{
89bddce5
SH
576 struct inode *inode;
577 struct socket *sock;
1da177e4 578
a209dfc7 579 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
580 if (!inode)
581 return NULL;
582
583 sock = SOCKET_I(inode);
584
85fe4025 585 inode->i_ino = get_next_ino();
89bddce5 586 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
587 inode->i_uid = current_fsuid();
588 inode->i_gid = current_fsgid();
600e1779 589 inode->i_op = &sockfs_inode_ops;
1da177e4 590
1da177e4
LT
591 return sock;
592}
f4a00aac 593EXPORT_SYMBOL(sock_alloc);
1da177e4 594
1da177e4 595/**
8a3c245c 596 * sock_release - close a socket
1da177e4
LT
597 * @sock: socket to close
598 *
599 * The socket is released from the protocol stack if it has a release
600 * callback, and the inode is then released if the socket is bound to
89bddce5 601 * an inode not a file.
1da177e4 602 */
89bddce5 603
6d8c50dc 604static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
605{
606 if (sock->ops) {
607 struct module *owner = sock->ops->owner;
608
6d8c50dc
CW
609 if (inode)
610 inode_lock(inode);
1da177e4 611 sock->ops->release(sock);
ff7b11aa 612 sock->sk = NULL;
6d8c50dc
CW
613 if (inode)
614 inode_unlock(inode);
1da177e4
LT
615 sock->ops = NULL;
616 module_put(owner);
617 }
618
e6476c21 619 if (sock->wq->fasync_list)
3410f22e 620 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 621
1da177e4
LT
622 if (!sock->file) {
623 iput(SOCK_INODE(sock));
624 return;
625 }
89bddce5 626 sock->file = NULL;
1da177e4 627}
6d8c50dc
CW
628
629void sock_release(struct socket *sock)
630{
631 __sock_release(sock, NULL);
632}
c6d409cf 633EXPORT_SYMBOL(sock_release);
1da177e4 634
c14ac945 635void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 636{
140c55d4
ED
637 u8 flags = *tx_flags;
638
c14ac945 639 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
640 flags |= SKBTX_HW_TSTAMP;
641
c14ac945 642 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
643 flags |= SKBTX_SW_TSTAMP;
644
c14ac945 645 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
646 flags |= SKBTX_SCHED_TSTAMP;
647
140c55d4 648 *tx_flags = flags;
20d49473 649}
67cc0d40 650EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 651
8a3c245c
PT
652/**
653 * sock_sendmsg - send a message through @sock
654 * @sock: socket
655 * @msg: message to send
656 *
657 * Sends @msg through @sock, passing through LSM.
658 * Returns the number of bytes sent, or an error code.
659 */
8c3c447b
PA
660INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
661 size_t));
d8725c86 662static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 663{
8c3c447b
PA
664 int ret = INDIRECT_CALL_INET4(sock->ops->sendmsg, inet_sendmsg, sock,
665 msg, msg_data_left(msg));
d8725c86
AV
666 BUG_ON(ret == -EIOCBQUEUED);
667 return ret;
1da177e4
LT
668}
669
d8725c86 670int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 671{
d8725c86 672 int err = security_socket_sendmsg(sock, msg,
01e97e65 673 msg_data_left(msg));
228e548e 674
d8725c86 675 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 676}
c6d409cf 677EXPORT_SYMBOL(sock_sendmsg);
1da177e4 678
8a3c245c
PT
679/**
680 * kernel_sendmsg - send a message through @sock (kernel-space)
681 * @sock: socket
682 * @msg: message header
683 * @vec: kernel vec
684 * @num: vec array length
685 * @size: total message data size
686 *
687 * Builds the message data with @vec and sends it through @sock.
688 * Returns the number of bytes sent, or an error code.
689 */
690
1da177e4
LT
691int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
692 struct kvec *vec, size_t num, size_t size)
693{
aa563d7b 694 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
d8725c86 695 return sock_sendmsg(sock, msg);
1da177e4 696}
c6d409cf 697EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 698
8a3c245c
PT
699/**
700 * kernel_sendmsg_locked - send a message through @sock (kernel-space)
701 * @sk: sock
702 * @msg: message header
703 * @vec: output s/g array
704 * @num: output s/g array length
705 * @size: total message data size
706 *
707 * Builds the message data with @vec and sends it through @sock.
708 * Returns the number of bytes sent, or an error code.
709 * Caller must hold @sk.
710 */
711
306b13eb
TH
712int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
713 struct kvec *vec, size_t num, size_t size)
714{
715 struct socket *sock = sk->sk_socket;
716
717 if (!sock->ops->sendmsg_locked)
db5980d8 718 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 719
aa563d7b 720 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
306b13eb
TH
721
722 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
723}
724EXPORT_SYMBOL(kernel_sendmsg_locked);
725
8605330a
SHY
726static bool skb_is_err_queue(const struct sk_buff *skb)
727{
728 /* pkt_type of skbs enqueued on the error queue are set to
729 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
730 * in recvmsg, since skbs received on a local socket will never
731 * have a pkt_type of PACKET_OUTGOING.
732 */
733 return skb->pkt_type == PACKET_OUTGOING;
734}
735
b50a5c70
ML
736/* On transmit, software and hardware timestamps are returned independently.
737 * As the two skb clones share the hardware timestamp, which may be updated
738 * before the software timestamp is received, a hardware TX timestamp may be
739 * returned only if there is no software TX timestamp. Ignore false software
740 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 741 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
742 * hardware timestamp.
743 */
744static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
745{
746 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
747}
748
aad9c8c4
ML
749static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
750{
751 struct scm_ts_pktinfo ts_pktinfo;
752 struct net_device *orig_dev;
753
754 if (!skb_mac_header_was_set(skb))
755 return;
756
757 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
758
759 rcu_read_lock();
760 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
761 if (orig_dev)
762 ts_pktinfo.if_index = orig_dev->ifindex;
763 rcu_read_unlock();
764
765 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
766 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
767 sizeof(ts_pktinfo), &ts_pktinfo);
768}
769
92f37fd2
ED
770/*
771 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
772 */
773void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
774 struct sk_buff *skb)
775{
20d49473 776 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 777 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e
DD
778 struct scm_timestamping_internal tss;
779
b50a5c70 780 int empty = 1, false_tstamp = 0;
20d49473
PO
781 struct skb_shared_hwtstamps *shhwtstamps =
782 skb_hwtstamps(skb);
783
784 /* Race occurred between timestamp enabling and packet
785 receiving. Fill in the current time for now. */
b50a5c70 786 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 787 __net_timestamp(skb);
b50a5c70
ML
788 false_tstamp = 1;
789 }
20d49473
PO
790
791 if (need_software_tstamp) {
792 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
793 if (new_tstamp) {
794 struct __kernel_sock_timeval tv;
795
796 skb_get_new_timestamp(skb, &tv);
797 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
798 sizeof(tv), &tv);
799 } else {
800 struct __kernel_old_timeval tv;
801
802 skb_get_timestamp(skb, &tv);
803 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
804 sizeof(tv), &tv);
805 }
20d49473 806 } else {
887feae3
DD
807 if (new_tstamp) {
808 struct __kernel_timespec ts;
809
810 skb_get_new_timestampns(skb, &ts);
811 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
812 sizeof(ts), &ts);
813 } else {
814 struct timespec ts;
815
816 skb_get_timestampns(skb, &ts);
817 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
818 sizeof(ts), &ts);
819 }
20d49473
PO
820 }
821 }
822
f24b9be5 823 memset(&tss, 0, sizeof(tss));
c199105d 824 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 825 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 826 empty = 0;
4d276eb6 827 if (shhwtstamps &&
b9f40e21 828 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
b50a5c70 829 !skb_is_swtx_tstamp(skb, false_tstamp) &&
9718475e 830 ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
4d276eb6 831 empty = 0;
aad9c8c4
ML
832 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
833 !skb_is_err_queue(skb))
834 put_ts_pktinfo(msg, skb);
835 }
1c885808 836 if (!empty) {
9718475e
DD
837 if (sock_flag(sk, SOCK_TSTAMP_NEW))
838 put_cmsg_scm_timestamping64(msg, &tss);
839 else
840 put_cmsg_scm_timestamping(msg, &tss);
1c885808 841
8605330a 842 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 843 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
844 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
845 skb->len, skb->data);
846 }
92f37fd2 847}
7c81fd8b
ACM
848EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
849
6e3e939f
JB
850void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
851 struct sk_buff *skb)
852{
853 int ack;
854
855 if (!sock_flag(sk, SOCK_WIFI_STATUS))
856 return;
857 if (!skb->wifi_acked_valid)
858 return;
859
860 ack = skb->wifi_acked;
861
862 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
863}
864EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
865
11165f14 866static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
867 struct sk_buff *skb)
3b885787 868{
744d5a3e 869 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 870 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 871 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
872}
873
767dd033 874void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
875 struct sk_buff *skb)
876{
877 sock_recv_timestamp(msg, sk, skb);
878 sock_recv_drops(msg, sk, skb);
879}
767dd033 880EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 881
8a3c245c
PT
882/**
883 * sock_recvmsg - receive a message from @sock
884 * @sock: socket
885 * @msg: message to receive
886 * @flags: message flags
887 *
888 * Receives @msg from @sock, passing through LSM. Returns the total number
889 * of bytes received, or an error.
890 */
8c3c447b
PA
891INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
892 size_t , int ));
1b784140 893static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 894 int flags)
1da177e4 895{
8c3c447b
PA
896 return INDIRECT_CALL_INET4(sock->ops->recvmsg, inet_recvmsg, sock, msg,
897 msg_data_left(msg), flags);
1da177e4
LT
898}
899
2da62906 900int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 901{
2da62906 902 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 903
2da62906 904 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 905}
c6d409cf 906EXPORT_SYMBOL(sock_recvmsg);
1da177e4 907
c1249c0a 908/**
8a3c245c
PT
909 * kernel_recvmsg - Receive a message from a socket (kernel space)
910 * @sock: The socket to receive the message from
911 * @msg: Received message
912 * @vec: Input s/g array for message data
913 * @num: Size of input s/g array
914 * @size: Number of bytes to read
915 * @flags: Message flags (MSG_DONTWAIT, etc...)
c1249c0a 916 *
8a3c245c
PT
917 * On return the msg structure contains the scatter/gather array passed in the
918 * vec argument. The array is modified so that it consists of the unfilled
919 * portion of the original array.
c1249c0a 920 *
8a3c245c 921 * The returned value is the total number of bytes received, or an error.
c1249c0a 922 */
8a3c245c 923
89bddce5
SH
924int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
925 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
926{
927 mm_segment_t oldfs = get_fs();
928 int result;
929
aa563d7b 930 iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
1da177e4 931 set_fs(KERNEL_DS);
2da62906 932 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
933 set_fs(oldfs);
934 return result;
935}
c6d409cf 936EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 937
ce1d4d3e
CH
938static ssize_t sock_sendpage(struct file *file, struct page *page,
939 int offset, size_t size, loff_t *ppos, int more)
1da177e4 940{
1da177e4
LT
941 struct socket *sock;
942 int flags;
943
ce1d4d3e
CH
944 sock = file->private_data;
945
35f9c09f
ED
946 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
947 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
948 flags |= more;
ce1d4d3e 949
e6949583 950 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 951}
1da177e4 952
9c55e01c 953static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 954 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
955 unsigned int flags)
956{
957 struct socket *sock = file->private_data;
958
997b37da 959 if (unlikely(!sock->ops->splice_read))
95506588 960 return generic_file_splice_read(file, ppos, pipe, len, flags);
997b37da 961
9c55e01c
JA
962 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
963}
964
8ae5e030 965static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 966{
6d652330
AV
967 struct file *file = iocb->ki_filp;
968 struct socket *sock = file->private_data;
0345f931 969 struct msghdr msg = {.msg_iter = *to,
970 .msg_iocb = iocb};
8ae5e030 971 ssize_t res;
ce1d4d3e 972
8ae5e030
AV
973 if (file->f_flags & O_NONBLOCK)
974 msg.msg_flags = MSG_DONTWAIT;
975
976 if (iocb->ki_pos != 0)
1da177e4 977 return -ESPIPE;
027445c3 978
66ee59af 979 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
980 return 0;
981
2da62906 982 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
983 *to = msg.msg_iter;
984 return res;
1da177e4
LT
985}
986
8ae5e030 987static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 988{
6d652330
AV
989 struct file *file = iocb->ki_filp;
990 struct socket *sock = file->private_data;
0345f931 991 struct msghdr msg = {.msg_iter = *from,
992 .msg_iocb = iocb};
8ae5e030 993 ssize_t res;
1da177e4 994
8ae5e030 995 if (iocb->ki_pos != 0)
ce1d4d3e 996 return -ESPIPE;
027445c3 997
8ae5e030
AV
998 if (file->f_flags & O_NONBLOCK)
999 msg.msg_flags = MSG_DONTWAIT;
1000
6d652330
AV
1001 if (sock->type == SOCK_SEQPACKET)
1002 msg.msg_flags |= MSG_EOR;
1003
d8725c86 1004 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
1005 *from = msg.msg_iter;
1006 return res;
1da177e4
LT
1007}
1008
1da177e4
LT
1009/*
1010 * Atomic setting of ioctl hooks to avoid race
1011 * with module unload.
1012 */
1013
4a3e2f71 1014static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 1015static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 1016
881d966b 1017void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 1018{
4a3e2f71 1019 mutex_lock(&br_ioctl_mutex);
1da177e4 1020 br_ioctl_hook = hook;
4a3e2f71 1021 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1022}
1023EXPORT_SYMBOL(brioctl_set);
1024
4a3e2f71 1025static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1026static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1027
881d966b 1028void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1029{
4a3e2f71 1030 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1031 vlan_ioctl_hook = hook;
4a3e2f71 1032 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1033}
1034EXPORT_SYMBOL(vlan_ioctl_set);
1035
4a3e2f71 1036static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 1037static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 1038
89bddce5 1039void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 1040{
4a3e2f71 1041 mutex_lock(&dlci_ioctl_mutex);
1da177e4 1042 dlci_ioctl_hook = hook;
4a3e2f71 1043 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1044}
1045EXPORT_SYMBOL(dlci_ioctl_set);
1046
6b96018b 1047static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 1048 unsigned int cmd, unsigned long arg)
6b96018b
AB
1049{
1050 int err;
1051 void __user *argp = (void __user *)arg;
1052
1053 err = sock->ops->ioctl(sock, cmd, arg);
1054
1055 /*
1056 * If this ioctl is unknown try to hand it down
1057 * to the NIC driver.
1058 */
36fd633e
AV
1059 if (err != -ENOIOCTLCMD)
1060 return err;
6b96018b 1061
36fd633e
AV
1062 if (cmd == SIOCGIFCONF) {
1063 struct ifconf ifc;
1064 if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
1065 return -EFAULT;
1066 rtnl_lock();
1067 err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
1068 rtnl_unlock();
1069 if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
1070 err = -EFAULT;
44c02a2c
AV
1071 } else {
1072 struct ifreq ifr;
1073 bool need_copyout;
63ff03ab 1074 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
44c02a2c
AV
1075 return -EFAULT;
1076 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1077 if (!err && need_copyout)
63ff03ab 1078 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
44c02a2c 1079 return -EFAULT;
36fd633e 1080 }
6b96018b
AB
1081 return err;
1082}
1083
1da177e4
LT
1084/*
1085 * With an ioctl, arg may well be a user mode pointer, but we don't know
1086 * what to do with it - that's up to the protocol still.
1087 */
1088
8a3c245c
PT
1089/**
1090 * get_net_ns - increment the refcount of the network namespace
1091 * @ns: common namespace (net)
1092 *
1093 * Returns the net's common namespace.
1094 */
1095
d8d211a2 1096struct ns_common *get_net_ns(struct ns_common *ns)
c62cce2c
AV
1097{
1098 return &get_net(container_of(ns, struct net, ns))->ns;
1099}
d8d211a2 1100EXPORT_SYMBOL_GPL(get_net_ns);
c62cce2c 1101
1da177e4
LT
1102static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1103{
1104 struct socket *sock;
881d966b 1105 struct sock *sk;
1da177e4
LT
1106 void __user *argp = (void __user *)arg;
1107 int pid, err;
881d966b 1108 struct net *net;
1da177e4 1109
b69aee04 1110 sock = file->private_data;
881d966b 1111 sk = sock->sk;
3b1e0a65 1112 net = sock_net(sk);
44c02a2c
AV
1113 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1114 struct ifreq ifr;
1115 bool need_copyout;
1116 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
1117 return -EFAULT;
1118 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1119 if (!err && need_copyout)
1120 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
1121 return -EFAULT;
1da177e4 1122 } else
3d23e349 1123#ifdef CONFIG_WEXT_CORE
1da177e4 1124 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1125 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1126 } else
3d23e349 1127#endif
89bddce5 1128 switch (cmd) {
1da177e4
LT
1129 case FIOSETOWN:
1130 case SIOCSPGRP:
1131 err = -EFAULT;
1132 if (get_user(pid, (int __user *)argp))
1133 break;
393cc3f5 1134 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1135 break;
1136 case FIOGETOWN:
1137 case SIOCGPGRP:
609d7fa9 1138 err = put_user(f_getown(sock->file),
89bddce5 1139 (int __user *)argp);
1da177e4
LT
1140 break;
1141 case SIOCGIFBR:
1142 case SIOCSIFBR:
1143 case SIOCBRADDBR:
1144 case SIOCBRDELBR:
1145 err = -ENOPKG;
1146 if (!br_ioctl_hook)
1147 request_module("bridge");
1148
4a3e2f71 1149 mutex_lock(&br_ioctl_mutex);
89bddce5 1150 if (br_ioctl_hook)
881d966b 1151 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1152 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1153 break;
1154 case SIOCGIFVLAN:
1155 case SIOCSIFVLAN:
1156 err = -ENOPKG;
1157 if (!vlan_ioctl_hook)
1158 request_module("8021q");
1159
4a3e2f71 1160 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1161 if (vlan_ioctl_hook)
881d966b 1162 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1163 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1164 break;
1da177e4
LT
1165 case SIOCADDDLCI:
1166 case SIOCDELDLCI:
1167 err = -ENOPKG;
1168 if (!dlci_ioctl_hook)
1169 request_module("dlci");
1170
7512cbf6
PE
1171 mutex_lock(&dlci_ioctl_mutex);
1172 if (dlci_ioctl_hook)
1da177e4 1173 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1174 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 1175 break;
c62cce2c
AV
1176 case SIOCGSKNS:
1177 err = -EPERM;
1178 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1179 break;
1180
1181 err = open_related_ns(&net->ns, get_net_ns);
1182 break;
0768e170
AB
1183 case SIOCGSTAMP_OLD:
1184 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
1185 if (!sock->ops->gettstamp) {
1186 err = -ENOIOCTLCMD;
1187 break;
1188 }
1189 err = sock->ops->gettstamp(sock, argp,
0768e170
AB
1190 cmd == SIOCGSTAMP_OLD,
1191 !IS_ENABLED(CONFIG_64BIT));
60747828 1192 break;
0768e170
AB
1193 case SIOCGSTAMP_NEW:
1194 case SIOCGSTAMPNS_NEW:
1195 if (!sock->ops->gettstamp) {
1196 err = -ENOIOCTLCMD;
1197 break;
1198 }
1199 err = sock->ops->gettstamp(sock, argp,
1200 cmd == SIOCGSTAMP_NEW,
1201 false);
c7cbdbf2 1202 break;
1da177e4 1203 default:
63ff03ab 1204 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1205 break;
89bddce5 1206 }
1da177e4
LT
1207 return err;
1208}
1209
8a3c245c
PT
1210/**
1211 * sock_create_lite - creates a socket
1212 * @family: protocol family (AF_INET, ...)
1213 * @type: communication type (SOCK_STREAM, ...)
1214 * @protocol: protocol (0, ...)
1215 * @res: new socket
1216 *
1217 * Creates a new socket and assigns it to @res, passing through LSM.
1218 * The new socket initialization is not complete, see kernel_accept().
1219 * Returns 0 or an error. On failure @res is set to %NULL.
1220 * This function internally uses GFP_KERNEL.
1221 */
1222
1da177e4
LT
1223int sock_create_lite(int family, int type, int protocol, struct socket **res)
1224{
1225 int err;
1226 struct socket *sock = NULL;
89bddce5 1227
1da177e4
LT
1228 err = security_socket_create(family, type, protocol, 1);
1229 if (err)
1230 goto out;
1231
1232 sock = sock_alloc();
1233 if (!sock) {
1234 err = -ENOMEM;
1235 goto out;
1236 }
1237
1da177e4 1238 sock->type = type;
7420ed23
VY
1239 err = security_socket_post_create(sock, family, type, protocol, 1);
1240 if (err)
1241 goto out_release;
1242
1da177e4
LT
1243out:
1244 *res = sock;
1245 return err;
7420ed23
VY
1246out_release:
1247 sock_release(sock);
1248 sock = NULL;
1249 goto out;
1da177e4 1250}
c6d409cf 1251EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1252
1253/* No kernel lock held - perfect */
ade994f4 1254static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1255{
3cafb376 1256 struct socket *sock = file->private_data;
a331de3b 1257 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1258
e88958e6
CH
1259 if (!sock->ops->poll)
1260 return 0;
f641f13b 1261
a331de3b
CH
1262 if (sk_can_busy_loop(sock->sk)) {
1263 /* poll once if requested by the syscall */
1264 if (events & POLL_BUSY_LOOP)
1265 sk_busy_loop(sock->sk, 1);
1266
1267 /* if this socket can poll_ll, tell the system call */
1268 flag = POLL_BUSY_LOOP;
1269 }
1270
1271 return sock->ops->poll(file, sock, wait) | flag;
1da177e4
LT
1272}
1273
89bddce5 1274static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1275{
b69aee04 1276 struct socket *sock = file->private_data;
1da177e4
LT
1277
1278 return sock->ops->mmap(file, sock, vma);
1279}
1280
20380731 1281static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1282{
6d8c50dc 1283 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1284 return 0;
1285}
1286
1287/*
1288 * Update the socket async list
1289 *
1290 * Fasync_list locking strategy.
1291 *
1292 * 1. fasync_list is modified only under process context socket lock
1293 * i.e. under semaphore.
1294 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1295 * or under socket lock
1da177e4
LT
1296 */
1297
1298static int sock_fasync(int fd, struct file *filp, int on)
1299{
989a2979
ED
1300 struct socket *sock = filp->private_data;
1301 struct sock *sk = sock->sk;
eaefd110 1302 struct socket_wq *wq;
1da177e4 1303
989a2979 1304 if (sk == NULL)
1da177e4 1305 return -EINVAL;
1da177e4
LT
1306
1307 lock_sock(sk);
e6476c21 1308 wq = sock->wq;
eaefd110 1309 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1310
eaefd110 1311 if (!wq->fasync_list)
989a2979
ED
1312 sock_reset_flag(sk, SOCK_FASYNC);
1313 else
bcdce719 1314 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1315
989a2979 1316 release_sock(sk);
1da177e4
LT
1317 return 0;
1318}
1319
ceb5d58b 1320/* This function may be called only under rcu_lock */
1da177e4 1321
ceb5d58b 1322int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1323{
ceb5d58b 1324 if (!wq || !wq->fasync_list)
1da177e4 1325 return -1;
ceb5d58b 1326
89bddce5 1327 switch (how) {
8d8ad9d7 1328 case SOCK_WAKE_WAITD:
ceb5d58b 1329 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1330 break;
1331 goto call_kill;
8d8ad9d7 1332 case SOCK_WAKE_SPACE:
ceb5d58b 1333 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1334 break;
1335 /* fall through */
8d8ad9d7 1336 case SOCK_WAKE_IO:
89bddce5 1337call_kill:
43815482 1338 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1339 break;
8d8ad9d7 1340 case SOCK_WAKE_URG:
43815482 1341 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1342 }
ceb5d58b 1343
1da177e4
LT
1344 return 0;
1345}
c6d409cf 1346EXPORT_SYMBOL(sock_wake_async);
1da177e4 1347
8a3c245c
PT
1348/**
1349 * __sock_create - creates a socket
1350 * @net: net namespace
1351 * @family: protocol family (AF_INET, ...)
1352 * @type: communication type (SOCK_STREAM, ...)
1353 * @protocol: protocol (0, ...)
1354 * @res: new socket
1355 * @kern: boolean for kernel space sockets
1356 *
1357 * Creates a new socket and assigns it to @res, passing through LSM.
1358 * Returns 0 or an error. On failure @res is set to %NULL. @kern must
1359 * be set to true if the socket resides in kernel space.
1360 * This function internally uses GFP_KERNEL.
1361 */
1362
721db93a 1363int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1364 struct socket **res, int kern)
1da177e4
LT
1365{
1366 int err;
1367 struct socket *sock;
55737fda 1368 const struct net_proto_family *pf;
1da177e4
LT
1369
1370 /*
89bddce5 1371 * Check protocol is in range
1da177e4
LT
1372 */
1373 if (family < 0 || family >= NPROTO)
1374 return -EAFNOSUPPORT;
1375 if (type < 0 || type >= SOCK_MAX)
1376 return -EINVAL;
1377
1378 /* Compatibility.
1379
1380 This uglymoron is moved from INET layer to here to avoid
1381 deadlock in module load.
1382 */
1383 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1384 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1385 current->comm);
1da177e4
LT
1386 family = PF_PACKET;
1387 }
1388
1389 err = security_socket_create(family, type, protocol, kern);
1390 if (err)
1391 return err;
89bddce5 1392
55737fda
SH
1393 /*
1394 * Allocate the socket and allow the family to set things up. if
1395 * the protocol is 0, the family is instructed to select an appropriate
1396 * default.
1397 */
1398 sock = sock_alloc();
1399 if (!sock) {
e87cc472 1400 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1401 return -ENFILE; /* Not exactly a match, but its the
1402 closest posix thing */
1403 }
1404
1405 sock->type = type;
1406
95a5afca 1407#ifdef CONFIG_MODULES
89bddce5
SH
1408 /* Attempt to load a protocol module if the find failed.
1409 *
1410 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1411 * requested real, full-featured networking support upon configuration.
1412 * Otherwise module support will break!
1413 */
190683a9 1414 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1415 request_module("net-pf-%d", family);
1da177e4
LT
1416#endif
1417
55737fda
SH
1418 rcu_read_lock();
1419 pf = rcu_dereference(net_families[family]);
1420 err = -EAFNOSUPPORT;
1421 if (!pf)
1422 goto out_release;
1da177e4
LT
1423
1424 /*
1425 * We will call the ->create function, that possibly is in a loadable
1426 * module, so we have to bump that loadable module refcnt first.
1427 */
55737fda 1428 if (!try_module_get(pf->owner))
1da177e4
LT
1429 goto out_release;
1430
55737fda
SH
1431 /* Now protected by module ref count */
1432 rcu_read_unlock();
1433
3f378b68 1434 err = pf->create(net, sock, protocol, kern);
55737fda 1435 if (err < 0)
1da177e4 1436 goto out_module_put;
a79af59e 1437
1da177e4
LT
1438 /*
1439 * Now to bump the refcnt of the [loadable] module that owns this
1440 * socket at sock_release time we decrement its refcnt.
1441 */
55737fda
SH
1442 if (!try_module_get(sock->ops->owner))
1443 goto out_module_busy;
1444
1da177e4
LT
1445 /*
1446 * Now that we're done with the ->create function, the [loadable]
1447 * module can have its refcnt decremented
1448 */
55737fda 1449 module_put(pf->owner);
7420ed23
VY
1450 err = security_socket_post_create(sock, family, type, protocol, kern);
1451 if (err)
3b185525 1452 goto out_sock_release;
55737fda 1453 *res = sock;
1da177e4 1454
55737fda
SH
1455 return 0;
1456
1457out_module_busy:
1458 err = -EAFNOSUPPORT;
1da177e4 1459out_module_put:
55737fda
SH
1460 sock->ops = NULL;
1461 module_put(pf->owner);
1462out_sock_release:
1da177e4 1463 sock_release(sock);
55737fda
SH
1464 return err;
1465
1466out_release:
1467 rcu_read_unlock();
1468 goto out_sock_release;
1da177e4 1469}
721db93a 1470EXPORT_SYMBOL(__sock_create);
1da177e4 1471
8a3c245c
PT
1472/**
1473 * sock_create - creates a socket
1474 * @family: protocol family (AF_INET, ...)
1475 * @type: communication type (SOCK_STREAM, ...)
1476 * @protocol: protocol (0, ...)
1477 * @res: new socket
1478 *
1479 * A wrapper around __sock_create().
1480 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1481 */
1482
1da177e4
LT
1483int sock_create(int family, int type, int protocol, struct socket **res)
1484{
1b8d7ae4 1485 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1486}
c6d409cf 1487EXPORT_SYMBOL(sock_create);
1da177e4 1488
8a3c245c
PT
1489/**
1490 * sock_create_kern - creates a socket (kernel space)
1491 * @net: net namespace
1492 * @family: protocol family (AF_INET, ...)
1493 * @type: communication type (SOCK_STREAM, ...)
1494 * @protocol: protocol (0, ...)
1495 * @res: new socket
1496 *
1497 * A wrapper around __sock_create().
1498 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1499 */
1500
eeb1bd5c 1501int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1502{
eeb1bd5c 1503 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1504}
c6d409cf 1505EXPORT_SYMBOL(sock_create_kern);
1da177e4 1506
9d6a15c3 1507int __sys_socket(int family, int type, int protocol)
1da177e4
LT
1508{
1509 int retval;
1510 struct socket *sock;
a677a039
UD
1511 int flags;
1512
e38b36f3
UD
1513 /* Check the SOCK_* constants for consistency. */
1514 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1515 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1516 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1517 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1518
a677a039 1519 flags = type & ~SOCK_TYPE_MASK;
77d27200 1520 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1521 return -EINVAL;
1522 type &= SOCK_TYPE_MASK;
1da177e4 1523
aaca0bdc
UD
1524 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1525 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1526
1da177e4
LT
1527 retval = sock_create(family, type, protocol, &sock);
1528 if (retval < 0)
8e1611e2 1529 return retval;
1da177e4 1530
8e1611e2 1531 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1532}
1533
9d6a15c3
DB
1534SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1535{
1536 return __sys_socket(family, type, protocol);
1537}
1538
1da177e4
LT
1539/*
1540 * Create a pair of connected sockets.
1541 */
1542
6debc8d8 1543int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1544{
1545 struct socket *sock1, *sock2;
1546 int fd1, fd2, err;
db349509 1547 struct file *newfile1, *newfile2;
a677a039
UD
1548 int flags;
1549
1550 flags = type & ~SOCK_TYPE_MASK;
77d27200 1551 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1552 return -EINVAL;
1553 type &= SOCK_TYPE_MASK;
1da177e4 1554
aaca0bdc
UD
1555 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1556 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1557
016a266b
AV
1558 /*
1559 * reserve descriptors and make sure we won't fail
1560 * to return them to userland.
1561 */
1562 fd1 = get_unused_fd_flags(flags);
1563 if (unlikely(fd1 < 0))
1564 return fd1;
1565
1566 fd2 = get_unused_fd_flags(flags);
1567 if (unlikely(fd2 < 0)) {
1568 put_unused_fd(fd1);
1569 return fd2;
1570 }
1571
1572 err = put_user(fd1, &usockvec[0]);
1573 if (err)
1574 goto out;
1575
1576 err = put_user(fd2, &usockvec[1]);
1577 if (err)
1578 goto out;
1579
1da177e4
LT
1580 /*
1581 * Obtain the first socket and check if the underlying protocol
1582 * supports the socketpair call.
1583 */
1584
1585 err = sock_create(family, type, protocol, &sock1);
016a266b 1586 if (unlikely(err < 0))
1da177e4
LT
1587 goto out;
1588
1589 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1590 if (unlikely(err < 0)) {
1591 sock_release(sock1);
1592 goto out;
bf3c23d1 1593 }
d73aa286 1594
d47cd945
DH
1595 err = security_socket_socketpair(sock1, sock2);
1596 if (unlikely(err)) {
1597 sock_release(sock2);
1598 sock_release(sock1);
1599 goto out;
1600 }
1601
016a266b
AV
1602 err = sock1->ops->socketpair(sock1, sock2);
1603 if (unlikely(err < 0)) {
1604 sock_release(sock2);
1605 sock_release(sock1);
1606 goto out;
28407630
AV
1607 }
1608
aab174f0 1609 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1610 if (IS_ERR(newfile1)) {
28407630 1611 err = PTR_ERR(newfile1);
016a266b
AV
1612 sock_release(sock2);
1613 goto out;
28407630
AV
1614 }
1615
aab174f0 1616 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1617 if (IS_ERR(newfile2)) {
1618 err = PTR_ERR(newfile2);
016a266b
AV
1619 fput(newfile1);
1620 goto out;
db349509
AV
1621 }
1622
157cf649 1623 audit_fd_pair(fd1, fd2);
d73aa286 1624
db349509
AV
1625 fd_install(fd1, newfile1);
1626 fd_install(fd2, newfile2);
d73aa286 1627 return 0;
1da177e4 1628
016a266b 1629out:
d73aa286 1630 put_unused_fd(fd2);
d73aa286 1631 put_unused_fd(fd1);
1da177e4
LT
1632 return err;
1633}
1634
6debc8d8
DB
1635SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1636 int __user *, usockvec)
1637{
1638 return __sys_socketpair(family, type, protocol, usockvec);
1639}
1640
1da177e4
LT
1641/*
1642 * Bind a name to a socket. Nothing much to do here since it's
1643 * the protocol's responsibility to handle the local address.
1644 *
1645 * We move the socket address to kernel space before we call
1646 * the protocol layer (having also checked the address is ok).
1647 */
1648
a87d35d8 1649int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1650{
1651 struct socket *sock;
230b1839 1652 struct sockaddr_storage address;
6cb153ca 1653 int err, fput_needed;
1da177e4 1654
89bddce5 1655 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1656 if (sock) {
43db362d 1657 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1658 if (!err) {
89bddce5 1659 err = security_socket_bind(sock,
230b1839 1660 (struct sockaddr *)&address,
89bddce5 1661 addrlen);
6cb153ca
BL
1662 if (!err)
1663 err = sock->ops->bind(sock,
89bddce5 1664 (struct sockaddr *)
230b1839 1665 &address, addrlen);
1da177e4 1666 }
6cb153ca 1667 fput_light(sock->file, fput_needed);
89bddce5 1668 }
1da177e4
LT
1669 return err;
1670}
1671
a87d35d8
DB
1672SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1673{
1674 return __sys_bind(fd, umyaddr, addrlen);
1675}
1676
1da177e4
LT
1677/*
1678 * Perform a listen. Basically, we allow the protocol to do anything
1679 * necessary for a listen, and if that works, we mark the socket as
1680 * ready for listening.
1681 */
1682
25e290ee 1683int __sys_listen(int fd, int backlog)
1da177e4
LT
1684{
1685 struct socket *sock;
6cb153ca 1686 int err, fput_needed;
b8e1f9b5 1687 int somaxconn;
89bddce5
SH
1688
1689 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1690 if (sock) {
8efa6e93 1691 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1692 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1693 backlog = somaxconn;
1da177e4
LT
1694
1695 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1696 if (!err)
1697 err = sock->ops->listen(sock, backlog);
1da177e4 1698
6cb153ca 1699 fput_light(sock->file, fput_needed);
1da177e4
LT
1700 }
1701 return err;
1702}
1703
25e290ee
DB
1704SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1705{
1706 return __sys_listen(fd, backlog);
1707}
1708
1da177e4
LT
1709/*
1710 * For accept, we attempt to create a new socket, set up the link
1711 * with the client, wake up the client, then return the new
1712 * connected fd. We collect the address of the connector in kernel
1713 * space and move it to user at the very end. This is unclean because
1714 * we open the socket then return an error.
1715 *
1716 * 1003.1g adds the ability to recvmsg() to query connection pending
1717 * status to recvmsg. We need to add that support in a way thats
b903036a 1718 * clean when we restructure accept also.
1da177e4
LT
1719 */
1720
4541e805
DB
1721int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1722 int __user *upeer_addrlen, int flags)
1da177e4
LT
1723{
1724 struct socket *sock, *newsock;
39d8c1b6 1725 struct file *newfile;
6cb153ca 1726 int err, len, newfd, fput_needed;
230b1839 1727 struct sockaddr_storage address;
1da177e4 1728
77d27200 1729 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1730 return -EINVAL;
1731
1732 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1733 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1734
6cb153ca 1735 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1736 if (!sock)
1737 goto out;
1738
1739 err = -ENFILE;
c6d409cf
ED
1740 newsock = sock_alloc();
1741 if (!newsock)
1da177e4
LT
1742 goto out_put;
1743
1744 newsock->type = sock->type;
1745 newsock->ops = sock->ops;
1746
1da177e4
LT
1747 /*
1748 * We don't need try_module_get here, as the listening socket (sock)
1749 * has the protocol module (sock->ops->owner) held.
1750 */
1751 __module_get(newsock->ops->owner);
1752
28407630 1753 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1754 if (unlikely(newfd < 0)) {
1755 err = newfd;
9a1875e6
DM
1756 sock_release(newsock);
1757 goto out_put;
39d8c1b6 1758 }
aab174f0 1759 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1760 if (IS_ERR(newfile)) {
28407630
AV
1761 err = PTR_ERR(newfile);
1762 put_unused_fd(newfd);
28407630
AV
1763 goto out_put;
1764 }
39d8c1b6 1765
a79af59e
FF
1766 err = security_socket_accept(sock, newsock);
1767 if (err)
39d8c1b6 1768 goto out_fd;
a79af59e 1769
cdfbabfb 1770 err = sock->ops->accept(sock, newsock, sock->file->f_flags, false);
1da177e4 1771 if (err < 0)
39d8c1b6 1772 goto out_fd;
1da177e4
LT
1773
1774 if (upeer_sockaddr) {
9b2c45d4
DV
1775 len = newsock->ops->getname(newsock,
1776 (struct sockaddr *)&address, 2);
1777 if (len < 0) {
1da177e4 1778 err = -ECONNABORTED;
39d8c1b6 1779 goto out_fd;
1da177e4 1780 }
43db362d 1781 err = move_addr_to_user(&address,
230b1839 1782 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1783 if (err < 0)
39d8c1b6 1784 goto out_fd;
1da177e4
LT
1785 }
1786
1787 /* File flags are not inherited via accept() unlike another OSes. */
1788
39d8c1b6
DM
1789 fd_install(newfd, newfile);
1790 err = newfd;
1da177e4 1791
1da177e4 1792out_put:
6cb153ca 1793 fput_light(sock->file, fput_needed);
1da177e4
LT
1794out:
1795 return err;
39d8c1b6 1796out_fd:
9606a216 1797 fput(newfile);
39d8c1b6 1798 put_unused_fd(newfd);
1da177e4
LT
1799 goto out_put;
1800}
1801
4541e805
DB
1802SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1803 int __user *, upeer_addrlen, int, flags)
1804{
1805 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1806}
1807
20f37034
HC
1808SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1809 int __user *, upeer_addrlen)
aaca0bdc 1810{
4541e805 1811 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1812}
1813
1da177e4
LT
1814/*
1815 * Attempt to connect to a socket with the server address. The address
1816 * is in user space so we verify it is OK and move it to kernel space.
1817 *
1818 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1819 * break bindings
1820 *
1821 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1822 * other SEQPACKET protocols that take time to connect() as it doesn't
1823 * include the -EINPROGRESS status for such sockets.
1824 */
1825
1387c2c2 1826int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1da177e4
LT
1827{
1828 struct socket *sock;
230b1839 1829 struct sockaddr_storage address;
6cb153ca 1830 int err, fput_needed;
1da177e4 1831
6cb153ca 1832 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1833 if (!sock)
1834 goto out;
43db362d 1835 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1836 if (err < 0)
1837 goto out_put;
1838
89bddce5 1839 err =
230b1839 1840 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1841 if (err)
1842 goto out_put;
1843
230b1839 1844 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1845 sock->file->f_flags);
1846out_put:
6cb153ca 1847 fput_light(sock->file, fput_needed);
1da177e4
LT
1848out:
1849 return err;
1850}
1851
1387c2c2
DB
1852SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1853 int, addrlen)
1854{
1855 return __sys_connect(fd, uservaddr, addrlen);
1856}
1857
1da177e4
LT
1858/*
1859 * Get the local address ('name') of a socket object. Move the obtained
1860 * name to user space.
1861 */
1862
8882a107
DB
1863int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1864 int __user *usockaddr_len)
1da177e4
LT
1865{
1866 struct socket *sock;
230b1839 1867 struct sockaddr_storage address;
9b2c45d4 1868 int err, fput_needed;
89bddce5 1869
6cb153ca 1870 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1871 if (!sock)
1872 goto out;
1873
1874 err = security_socket_getsockname(sock);
1875 if (err)
1876 goto out_put;
1877
9b2c45d4
DV
1878 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
1879 if (err < 0)
1da177e4 1880 goto out_put;
9b2c45d4
DV
1881 /* "err" is actually length in this case */
1882 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
1883
1884out_put:
6cb153ca 1885 fput_light(sock->file, fput_needed);
1da177e4
LT
1886out:
1887 return err;
1888}
1889
8882a107
DB
1890SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1891 int __user *, usockaddr_len)
1892{
1893 return __sys_getsockname(fd, usockaddr, usockaddr_len);
1894}
1895
1da177e4
LT
1896/*
1897 * Get the remote address ('name') of a socket object. Move the obtained
1898 * name to user space.
1899 */
1900
b21c8f83
DB
1901int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1902 int __user *usockaddr_len)
1da177e4
LT
1903{
1904 struct socket *sock;
230b1839 1905 struct sockaddr_storage address;
9b2c45d4 1906 int err, fput_needed;
1da177e4 1907
89bddce5
SH
1908 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1909 if (sock != NULL) {
1da177e4
LT
1910 err = security_socket_getpeername(sock);
1911 if (err) {
6cb153ca 1912 fput_light(sock->file, fput_needed);
1da177e4
LT
1913 return err;
1914 }
1915
9b2c45d4
DV
1916 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
1917 if (err >= 0)
1918 /* "err" is actually length in this case */
1919 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 1920 usockaddr_len);
6cb153ca 1921 fput_light(sock->file, fput_needed);
1da177e4
LT
1922 }
1923 return err;
1924}
1925
b21c8f83
DB
1926SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1927 int __user *, usockaddr_len)
1928{
1929 return __sys_getpeername(fd, usockaddr, usockaddr_len);
1930}
1931
1da177e4
LT
1932/*
1933 * Send a datagram to a given address. We move the address into kernel
1934 * space and check the user space data area is readable before invoking
1935 * the protocol.
1936 */
211b634b
DB
1937int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
1938 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
1939{
1940 struct socket *sock;
230b1839 1941 struct sockaddr_storage address;
1da177e4
LT
1942 int err;
1943 struct msghdr msg;
1944 struct iovec iov;
6cb153ca 1945 int fput_needed;
6cb153ca 1946
602bd0e9
AV
1947 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1948 if (unlikely(err))
1949 return err;
de0fa95c
PE
1950 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1951 if (!sock)
4387ff75 1952 goto out;
6cb153ca 1953
89bddce5 1954 msg.msg_name = NULL;
89bddce5
SH
1955 msg.msg_control = NULL;
1956 msg.msg_controllen = 0;
1957 msg.msg_namelen = 0;
6cb153ca 1958 if (addr) {
43db362d 1959 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1960 if (err < 0)
1961 goto out_put;
230b1839 1962 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1963 msg.msg_namelen = addr_len;
1da177e4
LT
1964 }
1965 if (sock->file->f_flags & O_NONBLOCK)
1966 flags |= MSG_DONTWAIT;
1967 msg.msg_flags = flags;
d8725c86 1968 err = sock_sendmsg(sock, &msg);
1da177e4 1969
89bddce5 1970out_put:
de0fa95c 1971 fput_light(sock->file, fput_needed);
4387ff75 1972out:
1da177e4
LT
1973 return err;
1974}
1975
211b634b
DB
1976SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1977 unsigned int, flags, struct sockaddr __user *, addr,
1978 int, addr_len)
1979{
1980 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
1981}
1982
1da177e4 1983/*
89bddce5 1984 * Send a datagram down a socket.
1da177e4
LT
1985 */
1986
3e0fa65f 1987SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1988 unsigned int, flags)
1da177e4 1989{
211b634b 1990 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
1991}
1992
1993/*
89bddce5 1994 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1995 * sender. We verify the buffers are writable and if needed move the
1996 * sender address from kernel to user space.
1997 */
7a09e1eb
DB
1998int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
1999 struct sockaddr __user *addr, int __user *addr_len)
1da177e4
LT
2000{
2001 struct socket *sock;
2002 struct iovec iov;
2003 struct msghdr msg;
230b1839 2004 struct sockaddr_storage address;
89bddce5 2005 int err, err2;
6cb153ca
BL
2006 int fput_needed;
2007
602bd0e9
AV
2008 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
2009 if (unlikely(err))
2010 return err;
de0fa95c 2011 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 2012 if (!sock)
de0fa95c 2013 goto out;
1da177e4 2014
89bddce5
SH
2015 msg.msg_control = NULL;
2016 msg.msg_controllen = 0;
f3d33426
HFS
2017 /* Save some cycles and don't copy the address if not needed */
2018 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
2019 /* We assume all kernel code knows the size of sockaddr_storage */
2020 msg.msg_namelen = 0;
130ed5d1 2021 msg.msg_iocb = NULL;
9f138fa6 2022 msg.msg_flags = 0;
1da177e4
LT
2023 if (sock->file->f_flags & O_NONBLOCK)
2024 flags |= MSG_DONTWAIT;
2da62906 2025 err = sock_recvmsg(sock, &msg, flags);
1da177e4 2026
89bddce5 2027 if (err >= 0 && addr != NULL) {
43db362d 2028 err2 = move_addr_to_user(&address,
230b1839 2029 msg.msg_namelen, addr, addr_len);
89bddce5
SH
2030 if (err2 < 0)
2031 err = err2;
1da177e4 2032 }
de0fa95c
PE
2033
2034 fput_light(sock->file, fput_needed);
4387ff75 2035out:
1da177e4
LT
2036 return err;
2037}
2038
7a09e1eb
DB
2039SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2040 unsigned int, flags, struct sockaddr __user *, addr,
2041 int __user *, addr_len)
2042{
2043 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2044}
2045
1da177e4 2046/*
89bddce5 2047 * Receive a datagram from a socket.
1da177e4
LT
2048 */
2049
b7c0ddf5
JG
2050SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2051 unsigned int, flags)
1da177e4 2052{
7a09e1eb 2053 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
2054}
2055
2056/*
2057 * Set a socket option. Because we don't know the option lengths we have
2058 * to pass the user mode parameter for the protocols to sort out.
2059 */
2060
cc36dca0
DB
2061static int __sys_setsockopt(int fd, int level, int optname,
2062 char __user *optval, int optlen)
1da177e4 2063{
6cb153ca 2064 int err, fput_needed;
1da177e4
LT
2065 struct socket *sock;
2066
2067 if (optlen < 0)
2068 return -EINVAL;
89bddce5
SH
2069
2070 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2071 if (sock != NULL) {
2072 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
2073 if (err)
2074 goto out_put;
1da177e4
LT
2075
2076 if (level == SOL_SOCKET)
89bddce5
SH
2077 err =
2078 sock_setsockopt(sock, level, optname, optval,
2079 optlen);
1da177e4 2080 else
89bddce5
SH
2081 err =
2082 sock->ops->setsockopt(sock, level, optname, optval,
2083 optlen);
6cb153ca
BL
2084out_put:
2085 fput_light(sock->file, fput_needed);
1da177e4
LT
2086 }
2087 return err;
2088}
2089
cc36dca0
DB
2090SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2091 char __user *, optval, int, optlen)
2092{
2093 return __sys_setsockopt(fd, level, optname, optval, optlen);
2094}
2095
1da177e4
LT
2096/*
2097 * Get a socket option. Because we don't know the option lengths we have
2098 * to pass a user mode parameter for the protocols to sort out.
2099 */
2100
13a2d70e
DB
2101static int __sys_getsockopt(int fd, int level, int optname,
2102 char __user *optval, int __user *optlen)
1da177e4 2103{
6cb153ca 2104 int err, fput_needed;
1da177e4
LT
2105 struct socket *sock;
2106
89bddce5
SH
2107 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2108 if (sock != NULL) {
6cb153ca
BL
2109 err = security_socket_getsockopt(sock, level, optname);
2110 if (err)
2111 goto out_put;
1da177e4
LT
2112
2113 if (level == SOL_SOCKET)
89bddce5
SH
2114 err =
2115 sock_getsockopt(sock, level, optname, optval,
2116 optlen);
1da177e4 2117 else
89bddce5
SH
2118 err =
2119 sock->ops->getsockopt(sock, level, optname, optval,
2120 optlen);
6cb153ca
BL
2121out_put:
2122 fput_light(sock->file, fput_needed);
1da177e4
LT
2123 }
2124 return err;
2125}
2126
13a2d70e
DB
2127SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2128 char __user *, optval, int __user *, optlen)
2129{
2130 return __sys_getsockopt(fd, level, optname, optval, optlen);
2131}
2132
1da177e4
LT
2133/*
2134 * Shutdown a socket.
2135 */
2136
005a1aea 2137int __sys_shutdown(int fd, int how)
1da177e4 2138{
6cb153ca 2139 int err, fput_needed;
1da177e4
LT
2140 struct socket *sock;
2141
89bddce5
SH
2142 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2143 if (sock != NULL) {
1da177e4 2144 err = security_socket_shutdown(sock, how);
6cb153ca
BL
2145 if (!err)
2146 err = sock->ops->shutdown(sock, how);
2147 fput_light(sock->file, fput_needed);
1da177e4
LT
2148 }
2149 return err;
2150}
2151
005a1aea
DB
2152SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2153{
2154 return __sys_shutdown(fd, how);
2155}
2156
89bddce5 2157/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2158 * fields which are the same type (int / unsigned) on our platforms.
2159 */
2160#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2161#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2162#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2163
c71d8ebe
TH
2164struct used_address {
2165 struct sockaddr_storage name;
2166 unsigned int name_len;
2167};
2168
da184284
AV
2169static int copy_msghdr_from_user(struct msghdr *kmsg,
2170 struct user_msghdr __user *umsg,
2171 struct sockaddr __user **save_addr,
2172 struct iovec **iov)
1661bf36 2173{
ffb07550 2174 struct user_msghdr msg;
08adb7da
AV
2175 ssize_t err;
2176
ffb07550 2177 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 2178 return -EFAULT;
dbb490b9 2179
864d9664 2180 kmsg->msg_control = (void __force *)msg.msg_control;
ffb07550
AV
2181 kmsg->msg_controllen = msg.msg_controllen;
2182 kmsg->msg_flags = msg.msg_flags;
2183
2184 kmsg->msg_namelen = msg.msg_namelen;
2185 if (!msg.msg_name)
6a2a2b3a
AS
2186 kmsg->msg_namelen = 0;
2187
dbb490b9
ML
2188 if (kmsg->msg_namelen < 0)
2189 return -EINVAL;
2190
1661bf36 2191 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2192 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2193
2194 if (save_addr)
ffb07550 2195 *save_addr = msg.msg_name;
08adb7da 2196
ffb07550 2197 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 2198 if (!save_addr) {
864d9664
PA
2199 err = move_addr_to_kernel(msg.msg_name,
2200 kmsg->msg_namelen,
08adb7da
AV
2201 kmsg->msg_name);
2202 if (err < 0)
2203 return err;
2204 }
2205 } else {
2206 kmsg->msg_name = NULL;
2207 kmsg->msg_namelen = 0;
2208 }
2209
ffb07550 2210 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2211 return -EMSGSIZE;
2212
0345f931 2213 kmsg->msg_iocb = NULL;
2214
ffb07550
AV
2215 return import_iovec(save_addr ? READ : WRITE,
2216 msg.msg_iov, msg.msg_iovlen,
da184284 2217 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
2218}
2219
666547ff 2220static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2221 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
2222 struct used_address *used_address,
2223 unsigned int allowed_msghdr_flags)
1da177e4 2224{
89bddce5
SH
2225 struct compat_msghdr __user *msg_compat =
2226 (struct compat_msghdr __user *)msg;
230b1839 2227 struct sockaddr_storage address;
1da177e4 2228 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 2229 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2230 __aligned(sizeof(__kernel_size_t));
89bddce5 2231 /* 20 is size of ipv6_pktinfo */
1da177e4 2232 unsigned char *ctl_buf = ctl;
d8725c86 2233 int ctl_len;
08adb7da 2234 ssize_t err;
89bddce5 2235
08adb7da 2236 msg_sys->msg_name = &address;
1da177e4 2237
08449320 2238 if (MSG_CMSG_COMPAT & flags)
08adb7da 2239 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 2240 else
08adb7da 2241 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 2242 if (err < 0)
da184284 2243 return err;
1da177e4
LT
2244
2245 err = -ENOBUFS;
2246
228e548e 2247 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 2248 goto out_freeiov;
28a94d8f 2249 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2250 ctl_len = msg_sys->msg_controllen;
1da177e4 2251 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2252 err =
228e548e 2253 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2254 sizeof(ctl));
1da177e4
LT
2255 if (err)
2256 goto out_freeiov;
228e548e
AB
2257 ctl_buf = msg_sys->msg_control;
2258 ctl_len = msg_sys->msg_controllen;
1da177e4 2259 } else if (ctl_len) {
ac4340fc
DM
2260 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2261 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2262 if (ctl_len > sizeof(ctl)) {
1da177e4 2263 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2264 if (ctl_buf == NULL)
1da177e4
LT
2265 goto out_freeiov;
2266 }
2267 err = -EFAULT;
2268 /*
228e548e 2269 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2270 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2271 * checking falls down on this.
2272 */
fb8621bb 2273 if (copy_from_user(ctl_buf,
228e548e 2274 (void __user __force *)msg_sys->msg_control,
89bddce5 2275 ctl_len))
1da177e4 2276 goto out_freectl;
228e548e 2277 msg_sys->msg_control = ctl_buf;
1da177e4 2278 }
228e548e 2279 msg_sys->msg_flags = flags;
1da177e4
LT
2280
2281 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2282 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2283 /*
2284 * If this is sendmmsg() and current destination address is same as
2285 * previously succeeded address, omit asking LSM's decision.
2286 * used_address->name_len is initialized to UINT_MAX so that the first
2287 * destination address never matches.
2288 */
bc909d9d
MD
2289 if (used_address && msg_sys->msg_name &&
2290 used_address->name_len == msg_sys->msg_namelen &&
2291 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2292 used_address->name_len)) {
d8725c86 2293 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2294 goto out_freectl;
2295 }
d8725c86 2296 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2297 /*
2298 * If this is sendmmsg() and sending to current destination address was
2299 * successful, remember it.
2300 */
2301 if (used_address && err >= 0) {
2302 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2303 if (msg_sys->msg_name)
2304 memcpy(&used_address->name, msg_sys->msg_name,
2305 used_address->name_len);
c71d8ebe 2306 }
1da177e4
LT
2307
2308out_freectl:
89bddce5 2309 if (ctl_buf != ctl)
1da177e4
LT
2310 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2311out_freeiov:
da184284 2312 kfree(iov);
228e548e
AB
2313 return err;
2314}
2315
2316/*
2317 * BSD sendmsg interface
2318 */
2319
e1834a32
DB
2320long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2321 bool forbid_cmsg_compat)
228e548e
AB
2322{
2323 int fput_needed, err;
2324 struct msghdr msg_sys;
1be374a0
AL
2325 struct socket *sock;
2326
e1834a32
DB
2327 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2328 return -EINVAL;
2329
1be374a0 2330 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2331 if (!sock)
2332 goto out;
2333
28a94d8f 2334 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2335
6cb153ca 2336 fput_light(sock->file, fput_needed);
89bddce5 2337out:
1da177e4
LT
2338 return err;
2339}
2340
666547ff 2341SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2342{
e1834a32 2343 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2344}
2345
228e548e
AB
2346/*
2347 * Linux sendmmsg interface
2348 */
2349
2350int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2351 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2352{
2353 int fput_needed, err, datagrams;
2354 struct socket *sock;
2355 struct mmsghdr __user *entry;
2356 struct compat_mmsghdr __user *compat_entry;
2357 struct msghdr msg_sys;
c71d8ebe 2358 struct used_address used_address;
f092276d 2359 unsigned int oflags = flags;
228e548e 2360
e1834a32
DB
2361 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2362 return -EINVAL;
2363
98382f41
AB
2364 if (vlen > UIO_MAXIOV)
2365 vlen = UIO_MAXIOV;
228e548e
AB
2366
2367 datagrams = 0;
2368
2369 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2370 if (!sock)
2371 return err;
2372
c71d8ebe 2373 used_address.name_len = UINT_MAX;
228e548e
AB
2374 entry = mmsg;
2375 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2376 err = 0;
f092276d 2377 flags |= MSG_BATCH;
228e548e
AB
2378
2379 while (datagrams < vlen) {
f092276d
TH
2380 if (datagrams == vlen - 1)
2381 flags = oflags;
2382
228e548e 2383 if (MSG_CMSG_COMPAT & flags) {
666547ff 2384 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2385 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2386 if (err < 0)
2387 break;
2388 err = __put_user(err, &compat_entry->msg_len);
2389 ++compat_entry;
2390 } else {
a7526eb5 2391 err = ___sys_sendmsg(sock,
666547ff 2392 (struct user_msghdr __user *)entry,
28a94d8f 2393 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2394 if (err < 0)
2395 break;
2396 err = put_user(err, &entry->msg_len);
2397 ++entry;
2398 }
2399
2400 if (err)
2401 break;
2402 ++datagrams;
3023898b
SHY
2403 if (msg_data_left(&msg_sys))
2404 break;
a78cb84c 2405 cond_resched();
228e548e
AB
2406 }
2407
228e548e
AB
2408 fput_light(sock->file, fput_needed);
2409
728ffb86
AB
2410 /* We only return an error if no datagrams were able to be sent */
2411 if (datagrams != 0)
228e548e
AB
2412 return datagrams;
2413
228e548e
AB
2414 return err;
2415}
2416
2417SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2418 unsigned int, vlen, unsigned int, flags)
2419{
e1834a32 2420 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2421}
2422
666547ff 2423static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2424 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2425{
89bddce5
SH
2426 struct compat_msghdr __user *msg_compat =
2427 (struct compat_msghdr __user *)msg;
1da177e4 2428 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2429 struct iovec *iov = iovstack;
1da177e4 2430 unsigned long cmsg_ptr;
2da62906 2431 int len;
08adb7da 2432 ssize_t err;
1da177e4
LT
2433
2434 /* kernel mode address */
230b1839 2435 struct sockaddr_storage addr;
1da177e4
LT
2436
2437 /* user mode address pointers */
2438 struct sockaddr __user *uaddr;
08adb7da 2439 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2440
08adb7da 2441 msg_sys->msg_name = &addr;
1da177e4 2442
f3d33426 2443 if (MSG_CMSG_COMPAT & flags)
08adb7da 2444 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2445 else
08adb7da 2446 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2447 if (err < 0)
da184284 2448 return err;
1da177e4 2449
a2e27255
ACM
2450 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2451 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2452
f3d33426
HFS
2453 /* We assume all kernel code knows the size of sockaddr_storage */
2454 msg_sys->msg_namelen = 0;
2455
1da177e4
LT
2456 if (sock->file->f_flags & O_NONBLOCK)
2457 flags |= MSG_DONTWAIT;
2da62906 2458 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4
LT
2459 if (err < 0)
2460 goto out_freeiov;
2461 len = err;
2462
2463 if (uaddr != NULL) {
43db362d 2464 err = move_addr_to_user(&addr,
a2e27255 2465 msg_sys->msg_namelen, uaddr,
89bddce5 2466 uaddr_len);
1da177e4
LT
2467 if (err < 0)
2468 goto out_freeiov;
2469 }
a2e27255 2470 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2471 COMPAT_FLAGS(msg));
1da177e4
LT
2472 if (err)
2473 goto out_freeiov;
2474 if (MSG_CMSG_COMPAT & flags)
a2e27255 2475 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2476 &msg_compat->msg_controllen);
2477 else
a2e27255 2478 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2479 &msg->msg_controllen);
2480 if (err)
2481 goto out_freeiov;
2482 err = len;
2483
2484out_freeiov:
da184284 2485 kfree(iov);
a2e27255
ACM
2486 return err;
2487}
2488
2489/*
2490 * BSD recvmsg interface
2491 */
2492
e1834a32
DB
2493long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2494 bool forbid_cmsg_compat)
a2e27255
ACM
2495{
2496 int fput_needed, err;
2497 struct msghdr msg_sys;
1be374a0
AL
2498 struct socket *sock;
2499
e1834a32
DB
2500 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2501 return -EINVAL;
2502
1be374a0 2503 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2504 if (!sock)
2505 goto out;
2506
a7526eb5 2507 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2508
6cb153ca 2509 fput_light(sock->file, fput_needed);
1da177e4
LT
2510out:
2511 return err;
2512}
2513
666547ff 2514SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2515 unsigned int, flags)
2516{
e1834a32 2517 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2518}
2519
a2e27255
ACM
2520/*
2521 * Linux recvmmsg interface
2522 */
2523
e11d4284
AB
2524static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2525 unsigned int vlen, unsigned int flags,
2526 struct timespec64 *timeout)
a2e27255
ACM
2527{
2528 int fput_needed, err, datagrams;
2529 struct socket *sock;
2530 struct mmsghdr __user *entry;
d7256d0e 2531 struct compat_mmsghdr __user *compat_entry;
a2e27255 2532 struct msghdr msg_sys;
766b9f92
DD
2533 struct timespec64 end_time;
2534 struct timespec64 timeout64;
a2e27255
ACM
2535
2536 if (timeout &&
2537 poll_select_set_timeout(&end_time, timeout->tv_sec,
2538 timeout->tv_nsec))
2539 return -EINVAL;
2540
2541 datagrams = 0;
2542
2543 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2544 if (!sock)
2545 return err;
2546
7797dc41
SHY
2547 if (likely(!(flags & MSG_ERRQUEUE))) {
2548 err = sock_error(sock->sk);
2549 if (err) {
2550 datagrams = err;
2551 goto out_put;
2552 }
e623a9e9 2553 }
a2e27255
ACM
2554
2555 entry = mmsg;
d7256d0e 2556 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2557
2558 while (datagrams < vlen) {
2559 /*
2560 * No need to ask LSM for more than the first datagram.
2561 */
d7256d0e 2562 if (MSG_CMSG_COMPAT & flags) {
666547ff 2563 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2564 &msg_sys, flags & ~MSG_WAITFORONE,
2565 datagrams);
d7256d0e
JMG
2566 if (err < 0)
2567 break;
2568 err = __put_user(err, &compat_entry->msg_len);
2569 ++compat_entry;
2570 } else {
a7526eb5 2571 err = ___sys_recvmsg(sock,
666547ff 2572 (struct user_msghdr __user *)entry,
a7526eb5
AL
2573 &msg_sys, flags & ~MSG_WAITFORONE,
2574 datagrams);
d7256d0e
JMG
2575 if (err < 0)
2576 break;
2577 err = put_user(err, &entry->msg_len);
2578 ++entry;
2579 }
2580
a2e27255
ACM
2581 if (err)
2582 break;
a2e27255
ACM
2583 ++datagrams;
2584
71c5c159
BB
2585 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2586 if (flags & MSG_WAITFORONE)
2587 flags |= MSG_DONTWAIT;
2588
a2e27255 2589 if (timeout) {
766b9f92 2590 ktime_get_ts64(&timeout64);
c2e6c856 2591 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2592 if (timeout->tv_sec < 0) {
2593 timeout->tv_sec = timeout->tv_nsec = 0;
2594 break;
2595 }
2596
2597 /* Timeout, return less than vlen datagrams */
2598 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2599 break;
2600 }
2601
2602 /* Out of band data, return right away */
2603 if (msg_sys.msg_flags & MSG_OOB)
2604 break;
a78cb84c 2605 cond_resched();
a2e27255
ACM
2606 }
2607
a2e27255 2608 if (err == 0)
34b88a68
ACM
2609 goto out_put;
2610
2611 if (datagrams == 0) {
2612 datagrams = err;
2613 goto out_put;
2614 }
a2e27255 2615
34b88a68
ACM
2616 /*
2617 * We may return less entries than requested (vlen) if the
2618 * sock is non block and there aren't enough datagrams...
2619 */
2620 if (err != -EAGAIN) {
a2e27255 2621 /*
34b88a68
ACM
2622 * ... or if recvmsg returns an error after we
2623 * received some datagrams, where we record the
2624 * error to return on the next call or if the
2625 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2626 */
34b88a68 2627 sock->sk->sk_err = -err;
a2e27255 2628 }
34b88a68
ACM
2629out_put:
2630 fput_light(sock->file, fput_needed);
a2e27255 2631
34b88a68 2632 return datagrams;
a2e27255
ACM
2633}
2634
e11d4284
AB
2635int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2636 unsigned int vlen, unsigned int flags,
2637 struct __kernel_timespec __user *timeout,
2638 struct old_timespec32 __user *timeout32)
a2e27255
ACM
2639{
2640 int datagrams;
c2e6c856 2641 struct timespec64 timeout_sys;
a2e27255 2642
e11d4284
AB
2643 if (timeout && get_timespec64(&timeout_sys, timeout))
2644 return -EFAULT;
a2e27255 2645
e11d4284 2646 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2647 return -EFAULT;
2648
e11d4284
AB
2649 if (!timeout && !timeout32)
2650 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2651
2652 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 2653
e11d4284
AB
2654 if (datagrams <= 0)
2655 return datagrams;
2656
2657 if (timeout && put_timespec64(&timeout_sys, timeout))
2658 datagrams = -EFAULT;
2659
2660 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2661 datagrams = -EFAULT;
2662
2663 return datagrams;
2664}
2665
1255e269
DB
2666SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2667 unsigned int, vlen, unsigned int, flags,
c2e6c856 2668 struct __kernel_timespec __user *, timeout)
1255e269 2669{
e11d4284
AB
2670 if (flags & MSG_CMSG_COMPAT)
2671 return -EINVAL;
2672
2673 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
2674}
2675
2676#ifdef CONFIG_COMPAT_32BIT_TIME
2677SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2678 unsigned int, vlen, unsigned int, flags,
2679 struct old_timespec32 __user *, timeout)
2680{
2681 if (flags & MSG_CMSG_COMPAT)
2682 return -EINVAL;
2683
2684 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 2685}
e11d4284 2686#endif
1255e269 2687
a2e27255 2688#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2689/* Argument list sizes for sys_socketcall */
2690#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2691static const unsigned char nargs[21] = {
c6d409cf
ED
2692 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2693 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2694 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2695 AL(4), AL(5), AL(4)
89bddce5
SH
2696};
2697
1da177e4
LT
2698#undef AL
2699
2700/*
89bddce5 2701 * System call vectors.
1da177e4
LT
2702 *
2703 * Argument checking cleaned up. Saved 20% in size.
2704 * This function doesn't need to set the kernel lock because
89bddce5 2705 * it is set by the callees.
1da177e4
LT
2706 */
2707
3e0fa65f 2708SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2709{
2950fa9d 2710 unsigned long a[AUDITSC_ARGS];
89bddce5 2711 unsigned long a0, a1;
1da177e4 2712 int err;
47379052 2713 unsigned int len;
1da177e4 2714
228e548e 2715 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2716 return -EINVAL;
c8e8cd57 2717 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2718
47379052
AV
2719 len = nargs[call];
2720 if (len > sizeof(a))
2721 return -EINVAL;
2722
1da177e4 2723 /* copy_from_user should be SMP safe. */
47379052 2724 if (copy_from_user(a, args, len))
1da177e4 2725 return -EFAULT;
3ec3b2fb 2726
2950fa9d
CG
2727 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2728 if (err)
2729 return err;
3ec3b2fb 2730
89bddce5
SH
2731 a0 = a[0];
2732 a1 = a[1];
2733
2734 switch (call) {
2735 case SYS_SOCKET:
9d6a15c3 2736 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
2737 break;
2738 case SYS_BIND:
a87d35d8 2739 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2740 break;
2741 case SYS_CONNECT:
1387c2c2 2742 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2743 break;
2744 case SYS_LISTEN:
25e290ee 2745 err = __sys_listen(a0, a1);
89bddce5
SH
2746 break;
2747 case SYS_ACCEPT:
4541e805
DB
2748 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2749 (int __user *)a[2], 0);
89bddce5
SH
2750 break;
2751 case SYS_GETSOCKNAME:
2752 err =
8882a107
DB
2753 __sys_getsockname(a0, (struct sockaddr __user *)a1,
2754 (int __user *)a[2]);
89bddce5
SH
2755 break;
2756 case SYS_GETPEERNAME:
2757 err =
b21c8f83
DB
2758 __sys_getpeername(a0, (struct sockaddr __user *)a1,
2759 (int __user *)a[2]);
89bddce5
SH
2760 break;
2761 case SYS_SOCKETPAIR:
6debc8d8 2762 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
2763 break;
2764 case SYS_SEND:
f3bf896b
DB
2765 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2766 NULL, 0);
89bddce5
SH
2767 break;
2768 case SYS_SENDTO:
211b634b
DB
2769 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2770 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
2771 break;
2772 case SYS_RECV:
d27e9afc
DB
2773 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2774 NULL, NULL);
89bddce5
SH
2775 break;
2776 case SYS_RECVFROM:
7a09e1eb
DB
2777 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2778 (struct sockaddr __user *)a[4],
2779 (int __user *)a[5]);
89bddce5
SH
2780 break;
2781 case SYS_SHUTDOWN:
005a1aea 2782 err = __sys_shutdown(a0, a1);
89bddce5
SH
2783 break;
2784 case SYS_SETSOCKOPT:
cc36dca0
DB
2785 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
2786 a[4]);
89bddce5
SH
2787 break;
2788 case SYS_GETSOCKOPT:
2789 err =
13a2d70e
DB
2790 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2791 (int __user *)a[4]);
89bddce5
SH
2792 break;
2793 case SYS_SENDMSG:
e1834a32
DB
2794 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
2795 a[2], true);
89bddce5 2796 break;
228e548e 2797 case SYS_SENDMMSG:
e1834a32
DB
2798 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2799 a[3], true);
228e548e 2800 break;
89bddce5 2801 case SYS_RECVMSG:
e1834a32
DB
2802 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
2803 a[2], true);
89bddce5 2804 break;
a2e27255 2805 case SYS_RECVMMSG:
e11d4284
AB
2806 if (IS_ENABLED(CONFIG_64BIT) || !IS_ENABLED(CONFIG_64BIT_TIME))
2807 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2808 a[2], a[3],
2809 (struct __kernel_timespec __user *)a[4],
2810 NULL);
2811 else
2812 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2813 a[2], a[3], NULL,
2814 (struct old_timespec32 __user *)a[4]);
a2e27255 2815 break;
de11defe 2816 case SYS_ACCEPT4:
4541e805
DB
2817 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2818 (int __user *)a[2], a[3]);
aaca0bdc 2819 break;
89bddce5
SH
2820 default:
2821 err = -EINVAL;
2822 break;
1da177e4
LT
2823 }
2824 return err;
2825}
2826
89bddce5 2827#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2828
55737fda
SH
2829/**
2830 * sock_register - add a socket protocol handler
2831 * @ops: description of protocol
2832 *
1da177e4
LT
2833 * This function is called by a protocol handler that wants to
2834 * advertise its address family, and have it linked into the
e793c0f7 2835 * socket interface. The value ops->family corresponds to the
55737fda 2836 * socket system call protocol family.
1da177e4 2837 */
f0fd27d4 2838int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2839{
2840 int err;
2841
2842 if (ops->family >= NPROTO) {
3410f22e 2843 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2844 return -ENOBUFS;
2845 }
55737fda
SH
2846
2847 spin_lock(&net_family_lock);
190683a9
ED
2848 if (rcu_dereference_protected(net_families[ops->family],
2849 lockdep_is_held(&net_family_lock)))
55737fda
SH
2850 err = -EEXIST;
2851 else {
cf778b00 2852 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2853 err = 0;
2854 }
55737fda
SH
2855 spin_unlock(&net_family_lock);
2856
3410f22e 2857 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2858 return err;
2859}
c6d409cf 2860EXPORT_SYMBOL(sock_register);
1da177e4 2861
55737fda
SH
2862/**
2863 * sock_unregister - remove a protocol handler
2864 * @family: protocol family to remove
2865 *
1da177e4
LT
2866 * This function is called by a protocol handler that wants to
2867 * remove its address family, and have it unlinked from the
55737fda
SH
2868 * new socket creation.
2869 *
2870 * If protocol handler is a module, then it can use module reference
2871 * counts to protect against new references. If protocol handler is not
2872 * a module then it needs to provide its own protection in
2873 * the ops->create routine.
1da177e4 2874 */
f0fd27d4 2875void sock_unregister(int family)
1da177e4 2876{
f0fd27d4 2877 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2878
55737fda 2879 spin_lock(&net_family_lock);
a9b3cd7f 2880 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2881 spin_unlock(&net_family_lock);
2882
2883 synchronize_rcu();
2884
3410f22e 2885 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2886}
c6d409cf 2887EXPORT_SYMBOL(sock_unregister);
1da177e4 2888
bf2ae2e4
XL
2889bool sock_is_registered(int family)
2890{
66b51b0a 2891 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
2892}
2893
77d76ea3 2894static int __init sock_init(void)
1da177e4 2895{
b3e19d92 2896 int err;
2ca794e5
EB
2897 /*
2898 * Initialize the network sysctl infrastructure.
2899 */
2900 err = net_sysctl_init();
2901 if (err)
2902 goto out;
b3e19d92 2903
1da177e4 2904 /*
89bddce5 2905 * Initialize skbuff SLAB cache
1da177e4
LT
2906 */
2907 skb_init();
1da177e4
LT
2908
2909 /*
89bddce5 2910 * Initialize the protocols module.
1da177e4
LT
2911 */
2912
2913 init_inodecache();
b3e19d92
NP
2914
2915 err = register_filesystem(&sock_fs_type);
2916 if (err)
2917 goto out_fs;
1da177e4 2918 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2919 if (IS_ERR(sock_mnt)) {
2920 err = PTR_ERR(sock_mnt);
2921 goto out_mount;
2922 }
77d76ea3
AK
2923
2924 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2925 */
2926
2927#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2928 err = netfilter_init();
2929 if (err)
2930 goto out;
1da177e4 2931#endif
cbeb321a 2932
408eccce 2933 ptp_classifier_init();
c1f19b51 2934
b3e19d92
NP
2935out:
2936 return err;
2937
2938out_mount:
2939 unregister_filesystem(&sock_fs_type);
2940out_fs:
2941 goto out;
1da177e4
LT
2942}
2943
77d76ea3
AK
2944core_initcall(sock_init); /* early initcall */
2945
1da177e4
LT
2946#ifdef CONFIG_PROC_FS
2947void socket_seq_show(struct seq_file *seq)
2948{
648845ab
TZ
2949 seq_printf(seq, "sockets: used %d\n",
2950 sock_inuse_get(seq->private));
1da177e4 2951}
89bddce5 2952#endif /* CONFIG_PROC_FS */
1da177e4 2953
89bbfc95 2954#ifdef CONFIG_COMPAT
36fd633e 2955static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2956{
6b96018b 2957 struct compat_ifconf ifc32;
7a229387 2958 struct ifconf ifc;
7a229387
AB
2959 int err;
2960
6b96018b 2961 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2962 return -EFAULT;
2963
36fd633e
AV
2964 ifc.ifc_len = ifc32.ifc_len;
2965 ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
7a229387 2966
36fd633e
AV
2967 rtnl_lock();
2968 err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
2969 rtnl_unlock();
7a229387
AB
2970 if (err)
2971 return err;
2972
36fd633e 2973 ifc32.ifc_len = ifc.ifc_len;
6b96018b 2974 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2975 return -EFAULT;
2976
2977 return 0;
2978}
2979
6b96018b 2980static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2981{
3a7da39d
BH
2982 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2983 bool convert_in = false, convert_out = false;
44c02a2c
AV
2984 size_t buf_size = 0;
2985 struct ethtool_rxnfc __user *rxnfc = NULL;
2986 struct ifreq ifr;
3a7da39d
BH
2987 u32 rule_cnt = 0, actual_rule_cnt;
2988 u32 ethcmd;
7a229387 2989 u32 data;
3a7da39d 2990 int ret;
7a229387 2991
3a7da39d
BH
2992 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2993 return -EFAULT;
7a229387 2994
3a7da39d
BH
2995 compat_rxnfc = compat_ptr(data);
2996
2997 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2998 return -EFAULT;
2999
3a7da39d
BH
3000 /* Most ethtool structures are defined without padding.
3001 * Unfortunately struct ethtool_rxnfc is an exception.
3002 */
3003 switch (ethcmd) {
3004 default:
3005 break;
3006 case ETHTOOL_GRXCLSRLALL:
3007 /* Buffer size is variable */
3008 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
3009 return -EFAULT;
3010 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
3011 return -ENOMEM;
3012 buf_size += rule_cnt * sizeof(u32);
3013 /* fall through */
3014 case ETHTOOL_GRXRINGS:
3015 case ETHTOOL_GRXCLSRLCNT:
3016 case ETHTOOL_GRXCLSRULE:
55664f32 3017 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
3018 convert_out = true;
3019 /* fall through */
3020 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
3021 buf_size += sizeof(struct ethtool_rxnfc);
3022 convert_in = true;
44c02a2c 3023 rxnfc = compat_alloc_user_space(buf_size);
3a7da39d
BH
3024 break;
3025 }
3026
44c02a2c 3027 if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
3028 return -EFAULT;
3029
44c02a2c 3030 ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
7a229387 3031
3a7da39d 3032 if (convert_in) {
127fe533 3033 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
3034 * fs.ring_cookie and at the end of fs, but nowhere else.
3035 */
127fe533
AD
3036 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
3037 sizeof(compat_rxnfc->fs.m_ext) !=
3038 offsetof(struct ethtool_rxnfc, fs.m_ext) +
3039 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
3040 BUILD_BUG_ON(
3041 offsetof(struct compat_ethtool_rxnfc, fs.location) -
3042 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
3043 offsetof(struct ethtool_rxnfc, fs.location) -
3044 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
3045
3046 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
3047 (void __user *)(&rxnfc->fs.m_ext + 1) -
3048 (void __user *)rxnfc) ||
3a7da39d
BH
3049 copy_in_user(&rxnfc->fs.ring_cookie,
3050 &compat_rxnfc->fs.ring_cookie,
954b1244 3051 (void __user *)(&rxnfc->fs.location + 1) -
b6168562
WW
3052 (void __user *)&rxnfc->fs.ring_cookie))
3053 return -EFAULT;
3054 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3055 if (put_user(rule_cnt, &rxnfc->rule_cnt))
3056 return -EFAULT;
3057 } else if (copy_in_user(&rxnfc->rule_cnt,
3058 &compat_rxnfc->rule_cnt,
3059 sizeof(rxnfc->rule_cnt)))
3a7da39d
BH
3060 return -EFAULT;
3061 }
3062
44c02a2c 3063 ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
3a7da39d
BH
3064 if (ret)
3065 return ret;
3066
3067 if (convert_out) {
3068 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
3069 (const void __user *)(&rxnfc->fs.m_ext + 1) -
3070 (const void __user *)rxnfc) ||
3a7da39d
BH
3071 copy_in_user(&compat_rxnfc->fs.ring_cookie,
3072 &rxnfc->fs.ring_cookie,
954b1244
SH
3073 (const void __user *)(&rxnfc->fs.location + 1) -
3074 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
3075 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
3076 sizeof(rxnfc->rule_cnt)))
3077 return -EFAULT;
3078
3079 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3080 /* As an optimisation, we only copy the actual
3081 * number of rules that the underlying
3082 * function returned. Since Mallory might
3083 * change the rule count in user memory, we
3084 * check that it is less than the rule count
3085 * originally given (as the user buffer size),
3086 * which has been range-checked.
3087 */
3088 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
3089 return -EFAULT;
3090 if (actual_rule_cnt < rule_cnt)
3091 rule_cnt = actual_rule_cnt;
3092 if (copy_in_user(&compat_rxnfc->rule_locs[0],
3093 &rxnfc->rule_locs[0],
3094 rule_cnt * sizeof(u32)))
3095 return -EFAULT;
3096 }
3097 }
3098
3099 return 0;
7a229387
AB
3100}
3101
7a50a240
AB
3102static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3103{
7a50a240 3104 compat_uptr_t uptr32;
44c02a2c
AV
3105 struct ifreq ifr;
3106 void __user *saved;
3107 int err;
7a50a240 3108
44c02a2c 3109 if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
7a50a240
AB
3110 return -EFAULT;
3111
3112 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3113 return -EFAULT;
3114
44c02a2c
AV
3115 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3116 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 3117
44c02a2c
AV
3118 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
3119 if (!err) {
3120 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
3121 if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
3122 err = -EFAULT;
ccbd6a5a 3123 }
44c02a2c 3124 return err;
7a229387
AB
3125}
3126
590d4693
BH
3127/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3128static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3129 struct compat_ifreq __user *u_ifreq32)
7a229387 3130{
44c02a2c 3131 struct ifreq ifreq;
7a229387
AB
3132 u32 data32;
3133
44c02a2c 3134 if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
7a229387 3135 return -EFAULT;
44c02a2c 3136 if (get_user(data32, &u_ifreq32->ifr_data))
7a229387 3137 return -EFAULT;
44c02a2c 3138 ifreq.ifr_data = compat_ptr(data32);
7a229387 3139
44c02a2c 3140 return dev_ioctl(net, cmd, &ifreq, NULL);
7a229387
AB
3141}
3142
37ac39bd
JB
3143static int compat_ifreq_ioctl(struct net *net, struct socket *sock,
3144 unsigned int cmd,
3145 struct compat_ifreq __user *uifr32)
3146{
3147 struct ifreq __user *uifr;
3148 int err;
3149
3150 /* Handle the fact that while struct ifreq has the same *layout* on
3151 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3152 * which are handled elsewhere, it still has different *size* due to
3153 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3154 * resulting in struct ifreq being 32 and 40 bytes respectively).
3155 * As a result, if the struct happens to be at the end of a page and
3156 * the next page isn't readable/writable, we get a fault. To prevent
3157 * that, copy back and forth to the full size.
3158 */
3159
3160 uifr = compat_alloc_user_space(sizeof(*uifr));
3161 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3162 return -EFAULT;
3163
3164 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3165
3166 if (!err) {
3167 switch (cmd) {
3168 case SIOCGIFFLAGS:
3169 case SIOCGIFMETRIC:
3170 case SIOCGIFMTU:
3171 case SIOCGIFMEM:
3172 case SIOCGIFHWADDR:
3173 case SIOCGIFINDEX:
3174 case SIOCGIFADDR:
3175 case SIOCGIFBRDADDR:
3176 case SIOCGIFDSTADDR:
3177 case SIOCGIFNETMASK:
3178 case SIOCGIFPFLAGS:
3179 case SIOCGIFTXQLEN:
3180 case SIOCGMIIPHY:
3181 case SIOCGMIIREG:
c6c9fee3 3182 case SIOCGIFNAME:
37ac39bd
JB
3183 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
3184 err = -EFAULT;
3185 break;
3186 }
3187 }
3188 return err;
3189}
3190
a2116ed2
AB
3191static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3192 struct compat_ifreq __user *uifr32)
3193{
3194 struct ifreq ifr;
3195 struct compat_ifmap __user *uifmap32;
a2116ed2
AB
3196 int err;
3197
3198 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3199 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3200 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3201 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3202 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3203 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3204 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3205 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3206 if (err)
3207 return -EFAULT;
3208
44c02a2c 3209 err = dev_ioctl(net, cmd, &ifr, NULL);
a2116ed2
AB
3210
3211 if (cmd == SIOCGIFMAP && !err) {
3212 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3213 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3214 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3215 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3216 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3217 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3218 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3219 if (err)
3220 err = -EFAULT;
3221 }
3222 return err;
3223}
3224
7a229387 3225struct rtentry32 {
c6d409cf 3226 u32 rt_pad1;
7a229387
AB
3227 struct sockaddr rt_dst; /* target address */
3228 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3229 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3230 unsigned short rt_flags;
3231 short rt_pad2;
3232 u32 rt_pad3;
3233 unsigned char rt_tos;
3234 unsigned char rt_class;
3235 short rt_pad4;
3236 short rt_metric; /* +1 for binary compatibility! */
7a229387 3237 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3238 u32 rt_mtu; /* per route MTU/Window */
3239 u32 rt_window; /* Window clamping */
7a229387
AB
3240 unsigned short rt_irtt; /* Initial RTT */
3241};
3242
3243struct in6_rtmsg32 {
3244 struct in6_addr rtmsg_dst;
3245 struct in6_addr rtmsg_src;
3246 struct in6_addr rtmsg_gateway;
3247 u32 rtmsg_type;
3248 u16 rtmsg_dst_len;
3249 u16 rtmsg_src_len;
3250 u32 rtmsg_metric;
3251 u32 rtmsg_info;
3252 u32 rtmsg_flags;
3253 s32 rtmsg_ifindex;
3254};
3255
6b96018b
AB
3256static int routing_ioctl(struct net *net, struct socket *sock,
3257 unsigned int cmd, void __user *argp)
7a229387
AB
3258{
3259 int ret;
3260 void *r = NULL;
3261 struct in6_rtmsg r6;
3262 struct rtentry r4;
3263 char devname[16];
3264 u32 rtdev;
3265 mm_segment_t old_fs = get_fs();
3266
6b96018b
AB
3267 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3268 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3269 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3270 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3271 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3272 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3273 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3274 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3275 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3276 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3277 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3278
3279 r = (void *) &r6;
3280 } else { /* ipv4 */
6b96018b 3281 struct rtentry32 __user *ur4 = argp;
c6d409cf 3282 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3283 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3284 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3285 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3286 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3287 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3288 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3289 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3290 if (rtdev) {
c6d409cf 3291 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3292 r4.rt_dev = (char __user __force *)devname;
3293 devname[15] = 0;
7a229387
AB
3294 } else
3295 r4.rt_dev = NULL;
3296
3297 r = (void *) &r4;
3298 }
3299
3300 if (ret) {
3301 ret = -EFAULT;
3302 goto out;
3303 }
3304
c6d409cf 3305 set_fs(KERNEL_DS);
63ff03ab 3306 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3307 set_fs(old_fs);
7a229387
AB
3308
3309out:
7a229387
AB
3310 return ret;
3311}
3312
3313/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3314 * for some operations; this forces use of the newer bridge-utils that
25985edc 3315 * use compatible ioctls
7a229387 3316 */
6b96018b 3317static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3318{
6b96018b 3319 compat_ulong_t tmp;
7a229387 3320
6b96018b 3321 if (get_user(tmp, argp))
7a229387
AB
3322 return -EFAULT;
3323 if (tmp == BRCTL_GET_VERSION)
3324 return BRCTL_VERSION + 1;
3325 return -EINVAL;
3326}
3327
6b96018b
AB
3328static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3329 unsigned int cmd, unsigned long arg)
3330{
3331 void __user *argp = compat_ptr(arg);
3332 struct sock *sk = sock->sk;
3333 struct net *net = sock_net(sk);
7a229387 3334
6b96018b 3335 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3336 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3337
3338 switch (cmd) {
3339 case SIOCSIFBR:
3340 case SIOCGIFBR:
3341 return old_bridge_ioctl(argp);
6b96018b 3342 case SIOCGIFCONF:
36fd633e 3343 return compat_dev_ifconf(net, argp);
6b96018b
AB
3344 case SIOCETHTOOL:
3345 return ethtool_ioctl(net, argp);
7a50a240
AB
3346 case SIOCWANDEV:
3347 return compat_siocwandev(net, argp);
a2116ed2
AB
3348 case SIOCGIFMAP:
3349 case SIOCSIFMAP:
3350 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3351 case SIOCADDRT:
3352 case SIOCDELRT:
3353 return routing_ioctl(net, sock, cmd, argp);
0768e170
AB
3354 case SIOCGSTAMP_OLD:
3355 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
3356 if (!sock->ops->gettstamp)
3357 return -ENOIOCTLCMD;
0768e170 3358 return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
c7cbdbf2
AB
3359 !COMPAT_USE_64BIT_TIME);
3360
590d4693
BH
3361 case SIOCBONDSLAVEINFOQUERY:
3362 case SIOCBONDINFOQUERY:
a2116ed2 3363 case SIOCSHWTSTAMP:
fd468c74 3364 case SIOCGHWTSTAMP:
590d4693 3365 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3366
3367 case FIOSETOWN:
3368 case SIOCSPGRP:
3369 case FIOGETOWN:
3370 case SIOCGPGRP:
3371 case SIOCBRADDBR:
3372 case SIOCBRDELBR:
3373 case SIOCGIFVLAN:
3374 case SIOCSIFVLAN:
3375 case SIOCADDDLCI:
3376 case SIOCDELDLCI:
c62cce2c 3377 case SIOCGSKNS:
0768e170
AB
3378 case SIOCGSTAMP_NEW:
3379 case SIOCGSTAMPNS_NEW:
6b96018b
AB
3380 return sock_ioctl(file, cmd, arg);
3381
3382 case SIOCGIFFLAGS:
3383 case SIOCSIFFLAGS:
3384 case SIOCGIFMETRIC:
3385 case SIOCSIFMETRIC:
3386 case SIOCGIFMTU:
3387 case SIOCSIFMTU:
3388 case SIOCGIFMEM:
3389 case SIOCSIFMEM:
3390 case SIOCGIFHWADDR:
3391 case SIOCSIFHWADDR:
3392 case SIOCADDMULTI:
3393 case SIOCDELMULTI:
3394 case SIOCGIFINDEX:
6b96018b
AB
3395 case SIOCGIFADDR:
3396 case SIOCSIFADDR:
3397 case SIOCSIFHWBROADCAST:
6b96018b 3398 case SIOCDIFADDR:
6b96018b
AB
3399 case SIOCGIFBRDADDR:
3400 case SIOCSIFBRDADDR:
3401 case SIOCGIFDSTADDR:
3402 case SIOCSIFDSTADDR:
3403 case SIOCGIFNETMASK:
3404 case SIOCSIFNETMASK:
3405 case SIOCSIFPFLAGS:
3406 case SIOCGIFPFLAGS:
3407 case SIOCGIFTXQLEN:
3408 case SIOCSIFTXQLEN:
3409 case SIOCBRADDIF:
3410 case SIOCBRDELIF:
c6c9fee3 3411 case SIOCGIFNAME:
9177efd3
AB
3412 case SIOCSIFNAME:
3413 case SIOCGMIIPHY:
3414 case SIOCGMIIREG:
3415 case SIOCSMIIREG:
f92d4fc9
AV
3416 case SIOCBONDENSLAVE:
3417 case SIOCBONDRELEASE:
3418 case SIOCBONDSETHWADDR:
3419 case SIOCBONDCHANGEACTIVE:
37ac39bd
JB
3420 return compat_ifreq_ioctl(net, sock, cmd, argp);
3421
6b96018b
AB
3422 case SIOCSARP:
3423 case SIOCGARP:
3424 case SIOCDARP:
6b96018b 3425 case SIOCATMARK:
63ff03ab 3426 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3427 }
3428
6b96018b
AB
3429 return -ENOIOCTLCMD;
3430}
7a229387 3431
95c96174 3432static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3433 unsigned long arg)
89bbfc95
SP
3434{
3435 struct socket *sock = file->private_data;
3436 int ret = -ENOIOCTLCMD;
87de87d5
DM
3437 struct sock *sk;
3438 struct net *net;
3439
3440 sk = sock->sk;
3441 net = sock_net(sk);
89bbfc95
SP
3442
3443 if (sock->ops->compat_ioctl)
3444 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3445
87de87d5
DM
3446 if (ret == -ENOIOCTLCMD &&
3447 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3448 ret = compat_wext_handle_ioctl(net, cmd, arg);
3449
6b96018b
AB
3450 if (ret == -ENOIOCTLCMD)
3451 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3452
89bbfc95
SP
3453 return ret;
3454}
3455#endif
3456
8a3c245c
PT
3457/**
3458 * kernel_bind - bind an address to a socket (kernel space)
3459 * @sock: socket
3460 * @addr: address
3461 * @addrlen: length of address
3462 *
3463 * Returns 0 or an error.
3464 */
3465
ac5a488e
SS
3466int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3467{
3468 return sock->ops->bind(sock, addr, addrlen);
3469}
c6d409cf 3470EXPORT_SYMBOL(kernel_bind);
ac5a488e 3471
8a3c245c
PT
3472/**
3473 * kernel_listen - move socket to listening state (kernel space)
3474 * @sock: socket
3475 * @backlog: pending connections queue size
3476 *
3477 * Returns 0 or an error.
3478 */
3479
ac5a488e
SS
3480int kernel_listen(struct socket *sock, int backlog)
3481{
3482 return sock->ops->listen(sock, backlog);
3483}
c6d409cf 3484EXPORT_SYMBOL(kernel_listen);
ac5a488e 3485
8a3c245c
PT
3486/**
3487 * kernel_accept - accept a connection (kernel space)
3488 * @sock: listening socket
3489 * @newsock: new connected socket
3490 * @flags: flags
3491 *
3492 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3493 * If it fails, @newsock is guaranteed to be %NULL.
3494 * Returns 0 or an error.
3495 */
3496
ac5a488e
SS
3497int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3498{
3499 struct sock *sk = sock->sk;
3500 int err;
3501
3502 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3503 newsock);
3504 if (err < 0)
3505 goto done;
3506
cdfbabfb 3507 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3508 if (err < 0) {
3509 sock_release(*newsock);
fa8705b0 3510 *newsock = NULL;
ac5a488e
SS
3511 goto done;
3512 }
3513
3514 (*newsock)->ops = sock->ops;
1b08534e 3515 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3516
3517done:
3518 return err;
3519}
c6d409cf 3520EXPORT_SYMBOL(kernel_accept);
ac5a488e 3521
8a3c245c
PT
3522/**
3523 * kernel_connect - connect a socket (kernel space)
3524 * @sock: socket
3525 * @addr: address
3526 * @addrlen: address length
3527 * @flags: flags (O_NONBLOCK, ...)
3528 *
3529 * For datagram sockets, @addr is the addres to which datagrams are sent
3530 * by default, and the only address from which datagrams are received.
3531 * For stream sockets, attempts to connect to @addr.
3532 * Returns 0 or an error code.
3533 */
3534
ac5a488e 3535int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3536 int flags)
ac5a488e
SS
3537{
3538 return sock->ops->connect(sock, addr, addrlen, flags);
3539}
c6d409cf 3540EXPORT_SYMBOL(kernel_connect);
ac5a488e 3541
8a3c245c
PT
3542/**
3543 * kernel_getsockname - get the address which the socket is bound (kernel space)
3544 * @sock: socket
3545 * @addr: address holder
3546 *
3547 * Fills the @addr pointer with the address which the socket is bound.
3548 * Returns 0 or an error code.
3549 */
3550
9b2c45d4 3551int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3552{
9b2c45d4 3553 return sock->ops->getname(sock, addr, 0);
ac5a488e 3554}
c6d409cf 3555EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3556
8a3c245c
PT
3557/**
3558 * kernel_peername - get the address which the socket is connected (kernel space)
3559 * @sock: socket
3560 * @addr: address holder
3561 *
3562 * Fills the @addr pointer with the address which the socket is connected.
3563 * Returns 0 or an error code.
3564 */
3565
9b2c45d4 3566int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3567{
9b2c45d4 3568 return sock->ops->getname(sock, addr, 1);
ac5a488e 3569}
c6d409cf 3570EXPORT_SYMBOL(kernel_getpeername);
ac5a488e 3571
8a3c245c
PT
3572/**
3573 * kernel_getsockopt - get a socket option (kernel space)
3574 * @sock: socket
3575 * @level: API level (SOL_SOCKET, ...)
3576 * @optname: option tag
3577 * @optval: option value
3578 * @optlen: option length
3579 *
3580 * Assigns the option length to @optlen.
3581 * Returns 0 or an error.
3582 */
3583
ac5a488e
SS
3584int kernel_getsockopt(struct socket *sock, int level, int optname,
3585 char *optval, int *optlen)
3586{
3587 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3588 char __user *uoptval;
3589 int __user *uoptlen;
ac5a488e
SS
3590 int err;
3591
fb8621bb
NK
3592 uoptval = (char __user __force *) optval;
3593 uoptlen = (int __user __force *) optlen;
3594
ac5a488e
SS
3595 set_fs(KERNEL_DS);
3596 if (level == SOL_SOCKET)
fb8621bb 3597 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3598 else
fb8621bb
NK
3599 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3600 uoptlen);
ac5a488e
SS
3601 set_fs(oldfs);
3602 return err;
3603}
c6d409cf 3604EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e 3605
8a3c245c
PT
3606/**
3607 * kernel_setsockopt - set a socket option (kernel space)
3608 * @sock: socket
3609 * @level: API level (SOL_SOCKET, ...)
3610 * @optname: option tag
3611 * @optval: option value
3612 * @optlen: option length
3613 *
3614 * Returns 0 or an error.
3615 */
3616
ac5a488e 3617int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3618 char *optval, unsigned int optlen)
ac5a488e
SS
3619{
3620 mm_segment_t oldfs = get_fs();
fb8621bb 3621 char __user *uoptval;
ac5a488e
SS
3622 int err;
3623
fb8621bb
NK
3624 uoptval = (char __user __force *) optval;
3625
ac5a488e
SS
3626 set_fs(KERNEL_DS);
3627 if (level == SOL_SOCKET)
fb8621bb 3628 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3629 else
fb8621bb 3630 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3631 optlen);
3632 set_fs(oldfs);
3633 return err;
3634}
c6d409cf 3635EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e 3636
8a3c245c
PT
3637/**
3638 * kernel_sendpage - send a &page through a socket (kernel space)
3639 * @sock: socket
3640 * @page: page
3641 * @offset: page offset
3642 * @size: total size in bytes
3643 * @flags: flags (MSG_DONTWAIT, ...)
3644 *
3645 * Returns the total amount sent in bytes or an error.
3646 */
3647
ac5a488e
SS
3648int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3649 size_t size, int flags)
3650{
3651 if (sock->ops->sendpage)
3652 return sock->ops->sendpage(sock, page, offset, size, flags);
3653
3654 return sock_no_sendpage(sock, page, offset, size, flags);
3655}
c6d409cf 3656EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3657
8a3c245c
PT
3658/**
3659 * kernel_sendpage_locked - send a &page through the locked sock (kernel space)
3660 * @sk: sock
3661 * @page: page
3662 * @offset: page offset
3663 * @size: total size in bytes
3664 * @flags: flags (MSG_DONTWAIT, ...)
3665 *
3666 * Returns the total amount sent in bytes or an error.
3667 * Caller must hold @sk.
3668 */
3669
306b13eb
TH
3670int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3671 size_t size, int flags)
3672{
3673 struct socket *sock = sk->sk_socket;
3674
3675 if (sock->ops->sendpage_locked)
3676 return sock->ops->sendpage_locked(sk, page, offset, size,
3677 flags);
3678
3679 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3680}
3681EXPORT_SYMBOL(kernel_sendpage_locked);
3682
8a3c245c
PT
3683/**
3684 * kernel_shutdown - shut down part of a full-duplex connection (kernel space)
3685 * @sock: socket
3686 * @how: connection part
3687 *
3688 * Returns 0 or an error.
3689 */
3690
91cf45f0
TM
3691int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3692{
3693 return sock->ops->shutdown(sock, how);
3694}
91cf45f0 3695EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075 3696
8a3c245c
PT
3697/**
3698 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3699 * @sk: socket
3700 *
3701 * This routine returns the IP overhead imposed by a socket i.e.
3702 * the length of the underlying IP header, depending on whether
3703 * this is an IPv4 or IPv6 socket and the length from IP options turned
3704 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075 3705 */
8a3c245c 3706
113c3075
P
3707u32 kernel_sock_ip_overhead(struct sock *sk)
3708{
3709 struct inet_sock *inet;
3710 struct ip_options_rcu *opt;
3711 u32 overhead = 0;
113c3075
P
3712#if IS_ENABLED(CONFIG_IPV6)
3713 struct ipv6_pinfo *np;
3714 struct ipv6_txoptions *optv6 = NULL;
3715#endif /* IS_ENABLED(CONFIG_IPV6) */
3716
3717 if (!sk)
3718 return overhead;
3719
113c3075
P
3720 switch (sk->sk_family) {
3721 case AF_INET:
3722 inet = inet_sk(sk);
3723 overhead += sizeof(struct iphdr);
3724 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3725 sock_owned_by_user(sk));
113c3075
P
3726 if (opt)
3727 overhead += opt->opt.optlen;
3728 return overhead;
3729#if IS_ENABLED(CONFIG_IPV6)
3730 case AF_INET6:
3731 np = inet6_sk(sk);
3732 overhead += sizeof(struct ipv6hdr);
3733 if (np)
3734 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3735 sock_owned_by_user(sk));
113c3075
P
3736 if (optv6)
3737 overhead += (optv6->opt_flen + optv6->opt_nflen);
3738 return overhead;
3739#endif /* IS_ENABLED(CONFIG_IPV6) */
3740 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3741 return overhead;
3742 }
3743}
3744EXPORT_SYMBOL(kernel_sock_ip_overhead);