Merge tag 'fuse-fixes-5.2-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/mszer...
[linux-2.6-block.git] / net / socket.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * NET An implementation of the SOCKET network access protocol.
4 *
5 * Version: @(#)socket.c 1.1.93 18/02/95
6 *
7 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 8 * Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 *
11 * Fixes:
12 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
13 * shutdown()
14 * Alan Cox : verify_area() fixes
15 * Alan Cox : Removed DDI
16 * Jonathan Kamens : SOCK_DGRAM reconnect bug
17 * Alan Cox : Moved a load of checks to the very
18 * top level.
19 * Alan Cox : Move address structures to/from user
20 * mode above the protocol layers.
21 * Rob Janssen : Allow 0 length sends.
22 * Alan Cox : Asynchronous I/O support (cribbed from the
23 * tty drivers).
24 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
25 * Jeff Uphoff : Made max number of sockets command-line
26 * configurable.
27 * Matti Aarnio : Made the number of sockets dynamic,
28 * to be allocated when needed, and mr.
29 * Uphoff's max is used as max to be
30 * allowed to allocate.
31 * Linus : Argh. removed all the socket allocation
32 * altogether: it's in the inode now.
33 * Alan Cox : Made sock_alloc()/sock_release() public
34 * for NetROM and future kernel nfsd type
35 * stuff.
36 * Alan Cox : sendmsg/recvmsg basics.
37 * Tom Dyas : Export net symbols.
38 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
39 * Alan Cox : Added thread locking to sys_* calls
40 * for sockets. May have errors at the
41 * moment.
42 * Kevin Buhr : Fixed the dumb errors in the above.
43 * Andi Kleen : Some small cleanups, optimizations,
44 * and fixed a copy_from_user() bug.
45 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 46 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
47 * protocol-independent
48 *
1da177e4 49 * This module is effectively the top level interface to the BSD socket
89bddce5 50 * paradigm.
1da177e4
LT
51 *
52 * Based upon Swansea University Computer Society NET3.039
53 */
54
1da177e4 55#include <linux/mm.h>
1da177e4
LT
56#include <linux/socket.h>
57#include <linux/file.h>
58#include <linux/net.h>
59#include <linux/interrupt.h>
aaca0bdc 60#include <linux/thread_info.h>
55737fda 61#include <linux/rcupdate.h>
1da177e4
LT
62#include <linux/netdevice.h>
63#include <linux/proc_fs.h>
64#include <linux/seq_file.h>
4a3e2f71 65#include <linux/mutex.h>
1da177e4 66#include <linux/if_bridge.h>
20380731
ACM
67#include <linux/if_frad.h>
68#include <linux/if_vlan.h>
408eccce 69#include <linux/ptp_classify.h>
1da177e4
LT
70#include <linux/init.h>
71#include <linux/poll.h>
72#include <linux/cache.h>
73#include <linux/module.h>
74#include <linux/highmem.h>
1da177e4
LT
75#include <linux/mount.h>
76#include <linux/security.h>
77#include <linux/syscalls.h>
78#include <linux/compat.h>
79#include <linux/kmod.h>
3ec3b2fb 80#include <linux/audit.h>
d86b5e0e 81#include <linux/wireless.h>
1b8d7ae4 82#include <linux/nsproxy.h>
1fd7317d 83#include <linux/magic.h>
5a0e3ad6 84#include <linux/slab.h>
600e1779 85#include <linux/xattr.h>
c8e8cd57 86#include <linux/nospec.h>
8c3c447b 87#include <linux/indirect_call_wrapper.h>
1da177e4 88
7c0f6ba6 89#include <linux/uaccess.h>
1da177e4
LT
90#include <asm/unistd.h>
91
92#include <net/compat.h>
87de87d5 93#include <net/wext.h>
f8451725 94#include <net/cls_cgroup.h>
1da177e4
LT
95
96#include <net/sock.h>
97#include <linux/netfilter.h>
98
6b96018b
AB
99#include <linux/if_tun.h>
100#include <linux/ipv6_route.h>
101#include <linux/route.h>
6b96018b 102#include <linux/sockios.h>
076bb0c8 103#include <net/busy_poll.h>
f24b9be5 104#include <linux/errqueue.h>
06021292 105
8c3c447b
PA
106/* proto_ops for ipv4 and ipv6 use the same {recv,send}msg function */
107#if IS_ENABLED(CONFIG_INET)
108#define INDIRECT_CALL_INET4(f, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__)
109#else
110#define INDIRECT_CALL_INET4(f, f1, ...) f(__VA_ARGS__)
111#endif
112
e0d1095a 113#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
114unsigned int sysctl_net_busy_read __read_mostly;
115unsigned int sysctl_net_busy_poll __read_mostly;
06021292 116#endif
6b96018b 117
8ae5e030
AV
118static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
119static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 120static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
121
122static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
123static __poll_t sock_poll(struct file *file,
124 struct poll_table_struct *wait);
89bddce5 125static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
126#ifdef CONFIG_COMPAT
127static long compat_sock_ioctl(struct file *file,
89bddce5 128 unsigned int cmd, unsigned long arg);
89bbfc95 129#endif
1da177e4 130static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
131static ssize_t sock_sendpage(struct file *file, struct page *page,
132 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 133static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 134 struct pipe_inode_info *pipe, size_t len,
9c55e01c 135 unsigned int flags);
1da177e4 136
1da177e4
LT
137/*
138 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
139 * in the operation structures but are done directly via the socketcall() multiplexor.
140 */
141
da7071d7 142static const struct file_operations socket_file_ops = {
1da177e4
LT
143 .owner = THIS_MODULE,
144 .llseek = no_llseek,
8ae5e030
AV
145 .read_iter = sock_read_iter,
146 .write_iter = sock_write_iter,
1da177e4
LT
147 .poll = sock_poll,
148 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
149#ifdef CONFIG_COMPAT
150 .compat_ioctl = compat_sock_ioctl,
151#endif
1da177e4 152 .mmap = sock_mmap,
1da177e4
LT
153 .release = sock_close,
154 .fasync = sock_fasync,
5274f052
JA
155 .sendpage = sock_sendpage,
156 .splice_write = generic_splice_sendpage,
9c55e01c 157 .splice_read = sock_splice_read,
1da177e4
LT
158};
159
160/*
161 * The protocol list. Each protocol is registered in here.
162 */
163
1da177e4 164static DEFINE_SPINLOCK(net_family_lock);
190683a9 165static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 166
1da177e4 167/*
89bddce5
SH
168 * Support routines.
169 * Move socket addresses back and forth across the kernel/user
170 * divide and look after the messy bits.
1da177e4
LT
171 */
172
1da177e4
LT
173/**
174 * move_addr_to_kernel - copy a socket address into kernel space
175 * @uaddr: Address in user space
176 * @kaddr: Address in kernel space
177 * @ulen: Length in user space
178 *
179 * The address is copied into kernel space. If the provided address is
180 * too long an error code of -EINVAL is returned. If the copy gives
181 * invalid addresses -EFAULT is returned. On a success 0 is returned.
182 */
183
43db362d 184int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 185{
230b1839 186 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 187 return -EINVAL;
89bddce5 188 if (ulen == 0)
1da177e4 189 return 0;
89bddce5 190 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 191 return -EFAULT;
3ec3b2fb 192 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
193}
194
195/**
196 * move_addr_to_user - copy an address to user space
197 * @kaddr: kernel space address
198 * @klen: length of address in kernel
199 * @uaddr: user space address
200 * @ulen: pointer to user length field
201 *
202 * The value pointed to by ulen on entry is the buffer length available.
203 * This is overwritten with the buffer space used. -EINVAL is returned
204 * if an overlong buffer is specified or a negative buffer size. -EFAULT
205 * is returned if either the buffer or the length field are not
206 * accessible.
207 * After copying the data up to the limit the user specifies, the true
208 * length of the data is written over the length limit the user
209 * specified. Zero is returned for a success.
210 */
89bddce5 211
43db362d 212static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 213 void __user *uaddr, int __user *ulen)
1da177e4
LT
214{
215 int err;
216 int len;
217
68c6beb3 218 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
219 err = get_user(len, ulen);
220 if (err)
1da177e4 221 return err;
89bddce5
SH
222 if (len > klen)
223 len = klen;
68c6beb3 224 if (len < 0)
1da177e4 225 return -EINVAL;
89bddce5 226 if (len) {
d6fe3945
SG
227 if (audit_sockaddr(klen, kaddr))
228 return -ENOMEM;
89bddce5 229 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
230 return -EFAULT;
231 }
232 /*
89bddce5
SH
233 * "fromlen shall refer to the value before truncation.."
234 * 1003.1g
1da177e4
LT
235 */
236 return __put_user(klen, ulen);
237}
238
08009a76 239static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
240
241static struct inode *sock_alloc_inode(struct super_block *sb)
242{
243 struct socket_alloc *ei;
eaefd110 244 struct socket_wq *wq;
89bddce5 245
e94b1766 246 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
247 if (!ei)
248 return NULL;
eaefd110
ED
249 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
250 if (!wq) {
43815482
ED
251 kmem_cache_free(sock_inode_cachep, ei);
252 return NULL;
253 }
eaefd110
ED
254 init_waitqueue_head(&wq->wait);
255 wq->fasync_list = NULL;
574aab1e 256 wq->flags = 0;
e6476c21 257 ei->socket.wq = wq;
89bddce5 258
1da177e4
LT
259 ei->socket.state = SS_UNCONNECTED;
260 ei->socket.flags = 0;
261 ei->socket.ops = NULL;
262 ei->socket.sk = NULL;
263 ei->socket.file = NULL;
1da177e4
LT
264
265 return &ei->vfs_inode;
266}
267
268static void sock_destroy_inode(struct inode *inode)
269{
43815482
ED
270 struct socket_alloc *ei;
271
272 ei = container_of(inode, struct socket_alloc, vfs_inode);
e6476c21 273 kfree_rcu(ei->socket.wq, rcu);
43815482 274 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
275}
276
51cc5068 277static void init_once(void *foo)
1da177e4 278{
89bddce5 279 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 280
a35afb83 281 inode_init_once(&ei->vfs_inode);
1da177e4 282}
89bddce5 283
1e911632 284static void init_inodecache(void)
1da177e4
LT
285{
286 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
287 sizeof(struct socket_alloc),
288 0,
289 (SLAB_HWCACHE_ALIGN |
290 SLAB_RECLAIM_ACCOUNT |
5d097056 291 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 292 init_once);
1e911632 293 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
294}
295
b87221de 296static const struct super_operations sockfs_ops = {
c6d409cf
ED
297 .alloc_inode = sock_alloc_inode,
298 .destroy_inode = sock_destroy_inode,
299 .statfs = simple_statfs,
1da177e4
LT
300};
301
c23fbb6b
ED
302/*
303 * sockfs_dname() is called from d_path().
304 */
305static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
306{
307 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 308 d_inode(dentry)->i_ino);
c23fbb6b
ED
309}
310
3ba13d17 311static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 312 .d_dname = sockfs_dname,
1da177e4
LT
313};
314
bba0bd31
AG
315static int sockfs_xattr_get(const struct xattr_handler *handler,
316 struct dentry *dentry, struct inode *inode,
317 const char *suffix, void *value, size_t size)
318{
319 if (value) {
320 if (dentry->d_name.len + 1 > size)
321 return -ERANGE;
322 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
323 }
324 return dentry->d_name.len + 1;
325}
326
327#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
328#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
329#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
330
331static const struct xattr_handler sockfs_xattr_handler = {
332 .name = XATTR_NAME_SOCKPROTONAME,
333 .get = sockfs_xattr_get,
334};
335
4a590153
AG
336static int sockfs_security_xattr_set(const struct xattr_handler *handler,
337 struct dentry *dentry, struct inode *inode,
338 const char *suffix, const void *value,
339 size_t size, int flags)
340{
341 /* Handled by LSM. */
342 return -EAGAIN;
343}
344
345static const struct xattr_handler sockfs_security_xattr_handler = {
346 .prefix = XATTR_SECURITY_PREFIX,
347 .set = sockfs_security_xattr_set,
348};
349
bba0bd31
AG
350static const struct xattr_handler *sockfs_xattr_handlers[] = {
351 &sockfs_xattr_handler,
4a590153 352 &sockfs_security_xattr_handler,
bba0bd31
AG
353 NULL
354};
355
c74a1cbb
AV
356static struct dentry *sockfs_mount(struct file_system_type *fs_type,
357 int flags, const char *dev_name, void *data)
358{
bba0bd31
AG
359 return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
360 sockfs_xattr_handlers,
361 &sockfs_dentry_operations, SOCKFS_MAGIC);
c74a1cbb
AV
362}
363
364static struct vfsmount *sock_mnt __read_mostly;
365
366static struct file_system_type sock_fs_type = {
367 .name = "sockfs",
368 .mount = sockfs_mount,
369 .kill_sb = kill_anon_super,
370};
371
1da177e4
LT
372/*
373 * Obtains the first available file descriptor and sets it up for use.
374 *
39d8c1b6
DM
375 * These functions create file structures and maps them to fd space
376 * of the current process. On success it returns file descriptor
1da177e4
LT
377 * and file struct implicitly stored in sock->file.
378 * Note that another thread may close file descriptor before we return
379 * from this function. We use the fact that now we do not refer
380 * to socket after mapping. If one day we will need it, this
381 * function will increment ref. count on file by 1.
382 *
383 * In any case returned fd MAY BE not valid!
384 * This race condition is unavoidable
385 * with shared fd spaces, we cannot solve it inside kernel,
386 * but we take care of internal coherence yet.
387 */
388
8a3c245c
PT
389/**
390 * sock_alloc_file - Bind a &socket to a &file
391 * @sock: socket
392 * @flags: file status flags
393 * @dname: protocol name
394 *
395 * Returns the &file bound with @sock, implicitly storing it
396 * in sock->file. If dname is %NULL, sets to "".
397 * On failure the return is a ERR pointer (see linux/err.h).
398 * This function uses GFP_KERNEL internally.
399 */
400
aab174f0 401struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 402{
7cbe66b6 403 struct file *file;
1da177e4 404
d93aa9d8
AV
405 if (!dname)
406 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 407
d93aa9d8
AV
408 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
409 O_RDWR | (flags & O_NONBLOCK),
410 &socket_file_ops);
b5ffe634 411 if (IS_ERR(file)) {
8e1611e2 412 sock_release(sock);
39b65252 413 return file;
cc3808f8
AV
414 }
415
416 sock->file = file;
39d8c1b6 417 file->private_data = sock;
28407630 418 return file;
39d8c1b6 419}
56b31d1c 420EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 421
56b31d1c 422static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
423{
424 struct file *newfile;
28407630 425 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
426 if (unlikely(fd < 0)) {
427 sock_release(sock);
28407630 428 return fd;
ce4bb04c 429 }
39d8c1b6 430
aab174f0 431 newfile = sock_alloc_file(sock, flags, NULL);
28407630 432 if (likely(!IS_ERR(newfile))) {
39d8c1b6 433 fd_install(fd, newfile);
28407630
AV
434 return fd;
435 }
7cbe66b6 436
28407630
AV
437 put_unused_fd(fd);
438 return PTR_ERR(newfile);
1da177e4
LT
439}
440
8a3c245c
PT
441/**
442 * sock_from_file - Return the &socket bounded to @file.
443 * @file: file
444 * @err: pointer to an error code return
445 *
446 * On failure returns %NULL and assigns -ENOTSOCK to @err.
447 */
448
406a3c63 449struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 450{
6cb153ca
BL
451 if (file->f_op == &socket_file_ops)
452 return file->private_data; /* set in sock_map_fd */
453
23bb80d2
ED
454 *err = -ENOTSOCK;
455 return NULL;
6cb153ca 456}
406a3c63 457EXPORT_SYMBOL(sock_from_file);
6cb153ca 458
1da177e4 459/**
c6d409cf 460 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
461 * @fd: file handle
462 * @err: pointer to an error code return
463 *
464 * The file handle passed in is locked and the socket it is bound
241c4667 465 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
466 * with a negative errno code and NULL is returned. The function checks
467 * for both invalid handles and passing a handle which is not a socket.
468 *
469 * On a success the socket object pointer is returned.
470 */
471
472struct socket *sockfd_lookup(int fd, int *err)
473{
474 struct file *file;
1da177e4
LT
475 struct socket *sock;
476
89bddce5
SH
477 file = fget(fd);
478 if (!file) {
1da177e4
LT
479 *err = -EBADF;
480 return NULL;
481 }
89bddce5 482
6cb153ca
BL
483 sock = sock_from_file(file, err);
484 if (!sock)
1da177e4 485 fput(file);
6cb153ca
BL
486 return sock;
487}
c6d409cf 488EXPORT_SYMBOL(sockfd_lookup);
1da177e4 489
6cb153ca
BL
490static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
491{
00e188ef 492 struct fd f = fdget(fd);
6cb153ca
BL
493 struct socket *sock;
494
3672558c 495 *err = -EBADF;
00e188ef
AV
496 if (f.file) {
497 sock = sock_from_file(f.file, err);
498 if (likely(sock)) {
499 *fput_needed = f.flags;
6cb153ca 500 return sock;
00e188ef
AV
501 }
502 fdput(f);
1da177e4 503 }
6cb153ca 504 return NULL;
1da177e4
LT
505}
506
600e1779
MY
507static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
508 size_t size)
509{
510 ssize_t len;
511 ssize_t used = 0;
512
c5ef6035 513 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
514 if (len < 0)
515 return len;
516 used += len;
517 if (buffer) {
518 if (size < used)
519 return -ERANGE;
520 buffer += len;
521 }
522
523 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
524 used += len;
525 if (buffer) {
526 if (size < used)
527 return -ERANGE;
528 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
529 buffer += len;
530 }
531
532 return used;
533}
534
dc647ec8 535static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
86741ec2
LC
536{
537 int err = simple_setattr(dentry, iattr);
538
e1a3a60a 539 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
540 struct socket *sock = SOCKET_I(d_inode(dentry));
541
6d8c50dc
CW
542 if (sock->sk)
543 sock->sk->sk_uid = iattr->ia_uid;
544 else
545 err = -ENOENT;
86741ec2
LC
546 }
547
548 return err;
549}
550
600e1779 551static const struct inode_operations sockfs_inode_ops = {
600e1779 552 .listxattr = sockfs_listxattr,
86741ec2 553 .setattr = sockfs_setattr,
600e1779
MY
554};
555
1da177e4 556/**
8a3c245c 557 * sock_alloc - allocate a socket
89bddce5 558 *
1da177e4
LT
559 * Allocate a new inode and socket object. The two are bound together
560 * and initialised. The socket is then returned. If we are out of inodes
8a3c245c 561 * NULL is returned. This functions uses GFP_KERNEL internally.
1da177e4
LT
562 */
563
f4a00aac 564struct socket *sock_alloc(void)
1da177e4 565{
89bddce5
SH
566 struct inode *inode;
567 struct socket *sock;
1da177e4 568
a209dfc7 569 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
570 if (!inode)
571 return NULL;
572
573 sock = SOCKET_I(inode);
574
85fe4025 575 inode->i_ino = get_next_ino();
89bddce5 576 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
577 inode->i_uid = current_fsuid();
578 inode->i_gid = current_fsgid();
600e1779 579 inode->i_op = &sockfs_inode_ops;
1da177e4 580
1da177e4
LT
581 return sock;
582}
f4a00aac 583EXPORT_SYMBOL(sock_alloc);
1da177e4 584
1da177e4 585/**
8a3c245c 586 * sock_release - close a socket
1da177e4
LT
587 * @sock: socket to close
588 *
589 * The socket is released from the protocol stack if it has a release
590 * callback, and the inode is then released if the socket is bound to
89bddce5 591 * an inode not a file.
1da177e4 592 */
89bddce5 593
6d8c50dc 594static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
595{
596 if (sock->ops) {
597 struct module *owner = sock->ops->owner;
598
6d8c50dc
CW
599 if (inode)
600 inode_lock(inode);
1da177e4 601 sock->ops->release(sock);
ff7b11aa 602 sock->sk = NULL;
6d8c50dc
CW
603 if (inode)
604 inode_unlock(inode);
1da177e4
LT
605 sock->ops = NULL;
606 module_put(owner);
607 }
608
e6476c21 609 if (sock->wq->fasync_list)
3410f22e 610 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 611
1da177e4
LT
612 if (!sock->file) {
613 iput(SOCK_INODE(sock));
614 return;
615 }
89bddce5 616 sock->file = NULL;
1da177e4 617}
6d8c50dc
CW
618
619void sock_release(struct socket *sock)
620{
621 __sock_release(sock, NULL);
622}
c6d409cf 623EXPORT_SYMBOL(sock_release);
1da177e4 624
c14ac945 625void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 626{
140c55d4
ED
627 u8 flags = *tx_flags;
628
c14ac945 629 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
630 flags |= SKBTX_HW_TSTAMP;
631
c14ac945 632 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
633 flags |= SKBTX_SW_TSTAMP;
634
c14ac945 635 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
636 flags |= SKBTX_SCHED_TSTAMP;
637
140c55d4 638 *tx_flags = flags;
20d49473 639}
67cc0d40 640EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 641
8c3c447b
PA
642INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
643 size_t));
d8725c86 644static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 645{
8c3c447b
PA
646 int ret = INDIRECT_CALL_INET4(sock->ops->sendmsg, inet_sendmsg, sock,
647 msg, msg_data_left(msg));
d8725c86
AV
648 BUG_ON(ret == -EIOCBQUEUED);
649 return ret;
1da177e4
LT
650}
651
85806af0
RD
652/**
653 * sock_sendmsg - send a message through @sock
654 * @sock: socket
655 * @msg: message to send
656 *
657 * Sends @msg through @sock, passing through LSM.
658 * Returns the number of bytes sent, or an error code.
659 */
d8725c86 660int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 661{
d8725c86 662 int err = security_socket_sendmsg(sock, msg,
01e97e65 663 msg_data_left(msg));
228e548e 664
d8725c86 665 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 666}
c6d409cf 667EXPORT_SYMBOL(sock_sendmsg);
1da177e4 668
8a3c245c
PT
669/**
670 * kernel_sendmsg - send a message through @sock (kernel-space)
671 * @sock: socket
672 * @msg: message header
673 * @vec: kernel vec
674 * @num: vec array length
675 * @size: total message data size
676 *
677 * Builds the message data with @vec and sends it through @sock.
678 * Returns the number of bytes sent, or an error code.
679 */
680
1da177e4
LT
681int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
682 struct kvec *vec, size_t num, size_t size)
683{
aa563d7b 684 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
d8725c86 685 return sock_sendmsg(sock, msg);
1da177e4 686}
c6d409cf 687EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 688
8a3c245c
PT
689/**
690 * kernel_sendmsg_locked - send a message through @sock (kernel-space)
691 * @sk: sock
692 * @msg: message header
693 * @vec: output s/g array
694 * @num: output s/g array length
695 * @size: total message data size
696 *
697 * Builds the message data with @vec and sends it through @sock.
698 * Returns the number of bytes sent, or an error code.
699 * Caller must hold @sk.
700 */
701
306b13eb
TH
702int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
703 struct kvec *vec, size_t num, size_t size)
704{
705 struct socket *sock = sk->sk_socket;
706
707 if (!sock->ops->sendmsg_locked)
db5980d8 708 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 709
aa563d7b 710 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
306b13eb
TH
711
712 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
713}
714EXPORT_SYMBOL(kernel_sendmsg_locked);
715
8605330a
SHY
716static bool skb_is_err_queue(const struct sk_buff *skb)
717{
718 /* pkt_type of skbs enqueued on the error queue are set to
719 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
720 * in recvmsg, since skbs received on a local socket will never
721 * have a pkt_type of PACKET_OUTGOING.
722 */
723 return skb->pkt_type == PACKET_OUTGOING;
724}
725
b50a5c70
ML
726/* On transmit, software and hardware timestamps are returned independently.
727 * As the two skb clones share the hardware timestamp, which may be updated
728 * before the software timestamp is received, a hardware TX timestamp may be
729 * returned only if there is no software TX timestamp. Ignore false software
730 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 731 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
732 * hardware timestamp.
733 */
734static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
735{
736 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
737}
738
aad9c8c4
ML
739static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
740{
741 struct scm_ts_pktinfo ts_pktinfo;
742 struct net_device *orig_dev;
743
744 if (!skb_mac_header_was_set(skb))
745 return;
746
747 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
748
749 rcu_read_lock();
750 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
751 if (orig_dev)
752 ts_pktinfo.if_index = orig_dev->ifindex;
753 rcu_read_unlock();
754
755 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
756 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
757 sizeof(ts_pktinfo), &ts_pktinfo);
758}
759
92f37fd2
ED
760/*
761 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
762 */
763void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
764 struct sk_buff *skb)
765{
20d49473 766 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 767 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e
DD
768 struct scm_timestamping_internal tss;
769
b50a5c70 770 int empty = 1, false_tstamp = 0;
20d49473
PO
771 struct skb_shared_hwtstamps *shhwtstamps =
772 skb_hwtstamps(skb);
773
774 /* Race occurred between timestamp enabling and packet
775 receiving. Fill in the current time for now. */
b50a5c70 776 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 777 __net_timestamp(skb);
b50a5c70
ML
778 false_tstamp = 1;
779 }
20d49473
PO
780
781 if (need_software_tstamp) {
782 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
783 if (new_tstamp) {
784 struct __kernel_sock_timeval tv;
785
786 skb_get_new_timestamp(skb, &tv);
787 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
788 sizeof(tv), &tv);
789 } else {
790 struct __kernel_old_timeval tv;
791
792 skb_get_timestamp(skb, &tv);
793 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
794 sizeof(tv), &tv);
795 }
20d49473 796 } else {
887feae3
DD
797 if (new_tstamp) {
798 struct __kernel_timespec ts;
799
800 skb_get_new_timestampns(skb, &ts);
801 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
802 sizeof(ts), &ts);
803 } else {
804 struct timespec ts;
805
806 skb_get_timestampns(skb, &ts);
807 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
808 sizeof(ts), &ts);
809 }
20d49473
PO
810 }
811 }
812
f24b9be5 813 memset(&tss, 0, sizeof(tss));
c199105d 814 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 815 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 816 empty = 0;
4d276eb6 817 if (shhwtstamps &&
b9f40e21 818 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
b50a5c70 819 !skb_is_swtx_tstamp(skb, false_tstamp) &&
9718475e 820 ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
4d276eb6 821 empty = 0;
aad9c8c4
ML
822 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
823 !skb_is_err_queue(skb))
824 put_ts_pktinfo(msg, skb);
825 }
1c885808 826 if (!empty) {
9718475e
DD
827 if (sock_flag(sk, SOCK_TSTAMP_NEW))
828 put_cmsg_scm_timestamping64(msg, &tss);
829 else
830 put_cmsg_scm_timestamping(msg, &tss);
1c885808 831
8605330a 832 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 833 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
834 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
835 skb->len, skb->data);
836 }
92f37fd2 837}
7c81fd8b
ACM
838EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
839
6e3e939f
JB
840void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
841 struct sk_buff *skb)
842{
843 int ack;
844
845 if (!sock_flag(sk, SOCK_WIFI_STATUS))
846 return;
847 if (!skb->wifi_acked_valid)
848 return;
849
850 ack = skb->wifi_acked;
851
852 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
853}
854EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
855
11165f14 856static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
857 struct sk_buff *skb)
3b885787 858{
744d5a3e 859 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 860 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 861 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
862}
863
767dd033 864void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
865 struct sk_buff *skb)
866{
867 sock_recv_timestamp(msg, sk, skb);
868 sock_recv_drops(msg, sk, skb);
869}
767dd033 870EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 871
8c3c447b
PA
872INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
873 size_t , int ));
1b784140 874static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 875 int flags)
1da177e4 876{
8c3c447b
PA
877 return INDIRECT_CALL_INET4(sock->ops->recvmsg, inet_recvmsg, sock, msg,
878 msg_data_left(msg), flags);
1da177e4
LT
879}
880
85806af0
RD
881/**
882 * sock_recvmsg - receive a message from @sock
883 * @sock: socket
884 * @msg: message to receive
885 * @flags: message flags
886 *
887 * Receives @msg from @sock, passing through LSM. Returns the total number
888 * of bytes received, or an error.
889 */
2da62906 890int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 891{
2da62906 892 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 893
2da62906 894 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 895}
c6d409cf 896EXPORT_SYMBOL(sock_recvmsg);
1da177e4 897
c1249c0a 898/**
8a3c245c
PT
899 * kernel_recvmsg - Receive a message from a socket (kernel space)
900 * @sock: The socket to receive the message from
901 * @msg: Received message
902 * @vec: Input s/g array for message data
903 * @num: Size of input s/g array
904 * @size: Number of bytes to read
905 * @flags: Message flags (MSG_DONTWAIT, etc...)
c1249c0a 906 *
8a3c245c
PT
907 * On return the msg structure contains the scatter/gather array passed in the
908 * vec argument. The array is modified so that it consists of the unfilled
909 * portion of the original array.
c1249c0a 910 *
8a3c245c 911 * The returned value is the total number of bytes received, or an error.
c1249c0a 912 */
8a3c245c 913
89bddce5
SH
914int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
915 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
916{
917 mm_segment_t oldfs = get_fs();
918 int result;
919
aa563d7b 920 iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
1da177e4 921 set_fs(KERNEL_DS);
2da62906 922 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
923 set_fs(oldfs);
924 return result;
925}
c6d409cf 926EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 927
ce1d4d3e
CH
928static ssize_t sock_sendpage(struct file *file, struct page *page,
929 int offset, size_t size, loff_t *ppos, int more)
1da177e4 930{
1da177e4
LT
931 struct socket *sock;
932 int flags;
933
ce1d4d3e
CH
934 sock = file->private_data;
935
35f9c09f
ED
936 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
937 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
938 flags |= more;
ce1d4d3e 939
e6949583 940 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 941}
1da177e4 942
9c55e01c 943static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 944 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
945 unsigned int flags)
946{
947 struct socket *sock = file->private_data;
948
997b37da 949 if (unlikely(!sock->ops->splice_read))
95506588 950 return generic_file_splice_read(file, ppos, pipe, len, flags);
997b37da 951
9c55e01c
JA
952 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
953}
954
8ae5e030 955static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 956{
6d652330
AV
957 struct file *file = iocb->ki_filp;
958 struct socket *sock = file->private_data;
0345f931 959 struct msghdr msg = {.msg_iter = *to,
960 .msg_iocb = iocb};
8ae5e030 961 ssize_t res;
ce1d4d3e 962
8ae5e030
AV
963 if (file->f_flags & O_NONBLOCK)
964 msg.msg_flags = MSG_DONTWAIT;
965
966 if (iocb->ki_pos != 0)
1da177e4 967 return -ESPIPE;
027445c3 968
66ee59af 969 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
970 return 0;
971
2da62906 972 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
973 *to = msg.msg_iter;
974 return res;
1da177e4
LT
975}
976
8ae5e030 977static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 978{
6d652330
AV
979 struct file *file = iocb->ki_filp;
980 struct socket *sock = file->private_data;
0345f931 981 struct msghdr msg = {.msg_iter = *from,
982 .msg_iocb = iocb};
8ae5e030 983 ssize_t res;
1da177e4 984
8ae5e030 985 if (iocb->ki_pos != 0)
ce1d4d3e 986 return -ESPIPE;
027445c3 987
8ae5e030
AV
988 if (file->f_flags & O_NONBLOCK)
989 msg.msg_flags = MSG_DONTWAIT;
990
6d652330
AV
991 if (sock->type == SOCK_SEQPACKET)
992 msg.msg_flags |= MSG_EOR;
993
d8725c86 994 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
995 *from = msg.msg_iter;
996 return res;
1da177e4
LT
997}
998
1da177e4
LT
999/*
1000 * Atomic setting of ioctl hooks to avoid race
1001 * with module unload.
1002 */
1003
4a3e2f71 1004static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 1005static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 1006
881d966b 1007void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 1008{
4a3e2f71 1009 mutex_lock(&br_ioctl_mutex);
1da177e4 1010 br_ioctl_hook = hook;
4a3e2f71 1011 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1012}
1013EXPORT_SYMBOL(brioctl_set);
1014
4a3e2f71 1015static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1016static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1017
881d966b 1018void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1019{
4a3e2f71 1020 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1021 vlan_ioctl_hook = hook;
4a3e2f71 1022 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1023}
1024EXPORT_SYMBOL(vlan_ioctl_set);
1025
4a3e2f71 1026static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 1027static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 1028
89bddce5 1029void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 1030{
4a3e2f71 1031 mutex_lock(&dlci_ioctl_mutex);
1da177e4 1032 dlci_ioctl_hook = hook;
4a3e2f71 1033 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1034}
1035EXPORT_SYMBOL(dlci_ioctl_set);
1036
6b96018b 1037static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 1038 unsigned int cmd, unsigned long arg)
6b96018b
AB
1039{
1040 int err;
1041 void __user *argp = (void __user *)arg;
1042
1043 err = sock->ops->ioctl(sock, cmd, arg);
1044
1045 /*
1046 * If this ioctl is unknown try to hand it down
1047 * to the NIC driver.
1048 */
36fd633e
AV
1049 if (err != -ENOIOCTLCMD)
1050 return err;
6b96018b 1051
36fd633e
AV
1052 if (cmd == SIOCGIFCONF) {
1053 struct ifconf ifc;
1054 if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
1055 return -EFAULT;
1056 rtnl_lock();
1057 err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
1058 rtnl_unlock();
1059 if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
1060 err = -EFAULT;
44c02a2c
AV
1061 } else {
1062 struct ifreq ifr;
1063 bool need_copyout;
63ff03ab 1064 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
44c02a2c
AV
1065 return -EFAULT;
1066 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1067 if (!err && need_copyout)
63ff03ab 1068 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
44c02a2c 1069 return -EFAULT;
36fd633e 1070 }
6b96018b
AB
1071 return err;
1072}
1073
1da177e4
LT
1074/*
1075 * With an ioctl, arg may well be a user mode pointer, but we don't know
1076 * what to do with it - that's up to the protocol still.
1077 */
1078
8a3c245c
PT
1079/**
1080 * get_net_ns - increment the refcount of the network namespace
1081 * @ns: common namespace (net)
1082 *
1083 * Returns the net's common namespace.
1084 */
1085
d8d211a2 1086struct ns_common *get_net_ns(struct ns_common *ns)
c62cce2c
AV
1087{
1088 return &get_net(container_of(ns, struct net, ns))->ns;
1089}
d8d211a2 1090EXPORT_SYMBOL_GPL(get_net_ns);
c62cce2c 1091
1da177e4
LT
1092static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1093{
1094 struct socket *sock;
881d966b 1095 struct sock *sk;
1da177e4
LT
1096 void __user *argp = (void __user *)arg;
1097 int pid, err;
881d966b 1098 struct net *net;
1da177e4 1099
b69aee04 1100 sock = file->private_data;
881d966b 1101 sk = sock->sk;
3b1e0a65 1102 net = sock_net(sk);
44c02a2c
AV
1103 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1104 struct ifreq ifr;
1105 bool need_copyout;
1106 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
1107 return -EFAULT;
1108 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1109 if (!err && need_copyout)
1110 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
1111 return -EFAULT;
1da177e4 1112 } else
3d23e349 1113#ifdef CONFIG_WEXT_CORE
1da177e4 1114 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1115 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1116 } else
3d23e349 1117#endif
89bddce5 1118 switch (cmd) {
1da177e4
LT
1119 case FIOSETOWN:
1120 case SIOCSPGRP:
1121 err = -EFAULT;
1122 if (get_user(pid, (int __user *)argp))
1123 break;
393cc3f5 1124 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1125 break;
1126 case FIOGETOWN:
1127 case SIOCGPGRP:
609d7fa9 1128 err = put_user(f_getown(sock->file),
89bddce5 1129 (int __user *)argp);
1da177e4
LT
1130 break;
1131 case SIOCGIFBR:
1132 case SIOCSIFBR:
1133 case SIOCBRADDBR:
1134 case SIOCBRDELBR:
1135 err = -ENOPKG;
1136 if (!br_ioctl_hook)
1137 request_module("bridge");
1138
4a3e2f71 1139 mutex_lock(&br_ioctl_mutex);
89bddce5 1140 if (br_ioctl_hook)
881d966b 1141 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1142 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1143 break;
1144 case SIOCGIFVLAN:
1145 case SIOCSIFVLAN:
1146 err = -ENOPKG;
1147 if (!vlan_ioctl_hook)
1148 request_module("8021q");
1149
4a3e2f71 1150 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1151 if (vlan_ioctl_hook)
881d966b 1152 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1153 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1154 break;
1da177e4
LT
1155 case SIOCADDDLCI:
1156 case SIOCDELDLCI:
1157 err = -ENOPKG;
1158 if (!dlci_ioctl_hook)
1159 request_module("dlci");
1160
7512cbf6
PE
1161 mutex_lock(&dlci_ioctl_mutex);
1162 if (dlci_ioctl_hook)
1da177e4 1163 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1164 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 1165 break;
c62cce2c
AV
1166 case SIOCGSKNS:
1167 err = -EPERM;
1168 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1169 break;
1170
1171 err = open_related_ns(&net->ns, get_net_ns);
1172 break;
0768e170
AB
1173 case SIOCGSTAMP_OLD:
1174 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
1175 if (!sock->ops->gettstamp) {
1176 err = -ENOIOCTLCMD;
1177 break;
1178 }
1179 err = sock->ops->gettstamp(sock, argp,
0768e170
AB
1180 cmd == SIOCGSTAMP_OLD,
1181 !IS_ENABLED(CONFIG_64BIT));
60747828 1182 break;
0768e170
AB
1183 case SIOCGSTAMP_NEW:
1184 case SIOCGSTAMPNS_NEW:
1185 if (!sock->ops->gettstamp) {
1186 err = -ENOIOCTLCMD;
1187 break;
1188 }
1189 err = sock->ops->gettstamp(sock, argp,
1190 cmd == SIOCGSTAMP_NEW,
1191 false);
c7cbdbf2 1192 break;
1da177e4 1193 default:
63ff03ab 1194 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1195 break;
89bddce5 1196 }
1da177e4
LT
1197 return err;
1198}
1199
8a3c245c
PT
1200/**
1201 * sock_create_lite - creates a socket
1202 * @family: protocol family (AF_INET, ...)
1203 * @type: communication type (SOCK_STREAM, ...)
1204 * @protocol: protocol (0, ...)
1205 * @res: new socket
1206 *
1207 * Creates a new socket and assigns it to @res, passing through LSM.
1208 * The new socket initialization is not complete, see kernel_accept().
1209 * Returns 0 or an error. On failure @res is set to %NULL.
1210 * This function internally uses GFP_KERNEL.
1211 */
1212
1da177e4
LT
1213int sock_create_lite(int family, int type, int protocol, struct socket **res)
1214{
1215 int err;
1216 struct socket *sock = NULL;
89bddce5 1217
1da177e4
LT
1218 err = security_socket_create(family, type, protocol, 1);
1219 if (err)
1220 goto out;
1221
1222 sock = sock_alloc();
1223 if (!sock) {
1224 err = -ENOMEM;
1225 goto out;
1226 }
1227
1da177e4 1228 sock->type = type;
7420ed23
VY
1229 err = security_socket_post_create(sock, family, type, protocol, 1);
1230 if (err)
1231 goto out_release;
1232
1da177e4
LT
1233out:
1234 *res = sock;
1235 return err;
7420ed23
VY
1236out_release:
1237 sock_release(sock);
1238 sock = NULL;
1239 goto out;
1da177e4 1240}
c6d409cf 1241EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1242
1243/* No kernel lock held - perfect */
ade994f4 1244static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1245{
3cafb376 1246 struct socket *sock = file->private_data;
a331de3b 1247 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1248
e88958e6
CH
1249 if (!sock->ops->poll)
1250 return 0;
f641f13b 1251
a331de3b
CH
1252 if (sk_can_busy_loop(sock->sk)) {
1253 /* poll once if requested by the syscall */
1254 if (events & POLL_BUSY_LOOP)
1255 sk_busy_loop(sock->sk, 1);
1256
1257 /* if this socket can poll_ll, tell the system call */
1258 flag = POLL_BUSY_LOOP;
1259 }
1260
1261 return sock->ops->poll(file, sock, wait) | flag;
1da177e4
LT
1262}
1263
89bddce5 1264static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1265{
b69aee04 1266 struct socket *sock = file->private_data;
1da177e4
LT
1267
1268 return sock->ops->mmap(file, sock, vma);
1269}
1270
20380731 1271static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1272{
6d8c50dc 1273 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1274 return 0;
1275}
1276
1277/*
1278 * Update the socket async list
1279 *
1280 * Fasync_list locking strategy.
1281 *
1282 * 1. fasync_list is modified only under process context socket lock
1283 * i.e. under semaphore.
1284 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1285 * or under socket lock
1da177e4
LT
1286 */
1287
1288static int sock_fasync(int fd, struct file *filp, int on)
1289{
989a2979
ED
1290 struct socket *sock = filp->private_data;
1291 struct sock *sk = sock->sk;
eaefd110 1292 struct socket_wq *wq;
1da177e4 1293
989a2979 1294 if (sk == NULL)
1da177e4 1295 return -EINVAL;
1da177e4
LT
1296
1297 lock_sock(sk);
e6476c21 1298 wq = sock->wq;
eaefd110 1299 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1300
eaefd110 1301 if (!wq->fasync_list)
989a2979
ED
1302 sock_reset_flag(sk, SOCK_FASYNC);
1303 else
bcdce719 1304 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1305
989a2979 1306 release_sock(sk);
1da177e4
LT
1307 return 0;
1308}
1309
ceb5d58b 1310/* This function may be called only under rcu_lock */
1da177e4 1311
ceb5d58b 1312int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1313{
ceb5d58b 1314 if (!wq || !wq->fasync_list)
1da177e4 1315 return -1;
ceb5d58b 1316
89bddce5 1317 switch (how) {
8d8ad9d7 1318 case SOCK_WAKE_WAITD:
ceb5d58b 1319 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1320 break;
1321 goto call_kill;
8d8ad9d7 1322 case SOCK_WAKE_SPACE:
ceb5d58b 1323 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1324 break;
1325 /* fall through */
8d8ad9d7 1326 case SOCK_WAKE_IO:
89bddce5 1327call_kill:
43815482 1328 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1329 break;
8d8ad9d7 1330 case SOCK_WAKE_URG:
43815482 1331 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1332 }
ceb5d58b 1333
1da177e4
LT
1334 return 0;
1335}
c6d409cf 1336EXPORT_SYMBOL(sock_wake_async);
1da177e4 1337
8a3c245c
PT
1338/**
1339 * __sock_create - creates a socket
1340 * @net: net namespace
1341 * @family: protocol family (AF_INET, ...)
1342 * @type: communication type (SOCK_STREAM, ...)
1343 * @protocol: protocol (0, ...)
1344 * @res: new socket
1345 * @kern: boolean for kernel space sockets
1346 *
1347 * Creates a new socket and assigns it to @res, passing through LSM.
1348 * Returns 0 or an error. On failure @res is set to %NULL. @kern must
1349 * be set to true if the socket resides in kernel space.
1350 * This function internally uses GFP_KERNEL.
1351 */
1352
721db93a 1353int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1354 struct socket **res, int kern)
1da177e4
LT
1355{
1356 int err;
1357 struct socket *sock;
55737fda 1358 const struct net_proto_family *pf;
1da177e4
LT
1359
1360 /*
89bddce5 1361 * Check protocol is in range
1da177e4
LT
1362 */
1363 if (family < 0 || family >= NPROTO)
1364 return -EAFNOSUPPORT;
1365 if (type < 0 || type >= SOCK_MAX)
1366 return -EINVAL;
1367
1368 /* Compatibility.
1369
1370 This uglymoron is moved from INET layer to here to avoid
1371 deadlock in module load.
1372 */
1373 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1374 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1375 current->comm);
1da177e4
LT
1376 family = PF_PACKET;
1377 }
1378
1379 err = security_socket_create(family, type, protocol, kern);
1380 if (err)
1381 return err;
89bddce5 1382
55737fda
SH
1383 /*
1384 * Allocate the socket and allow the family to set things up. if
1385 * the protocol is 0, the family is instructed to select an appropriate
1386 * default.
1387 */
1388 sock = sock_alloc();
1389 if (!sock) {
e87cc472 1390 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1391 return -ENFILE; /* Not exactly a match, but its the
1392 closest posix thing */
1393 }
1394
1395 sock->type = type;
1396
95a5afca 1397#ifdef CONFIG_MODULES
89bddce5
SH
1398 /* Attempt to load a protocol module if the find failed.
1399 *
1400 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1401 * requested real, full-featured networking support upon configuration.
1402 * Otherwise module support will break!
1403 */
190683a9 1404 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1405 request_module("net-pf-%d", family);
1da177e4
LT
1406#endif
1407
55737fda
SH
1408 rcu_read_lock();
1409 pf = rcu_dereference(net_families[family]);
1410 err = -EAFNOSUPPORT;
1411 if (!pf)
1412 goto out_release;
1da177e4
LT
1413
1414 /*
1415 * We will call the ->create function, that possibly is in a loadable
1416 * module, so we have to bump that loadable module refcnt first.
1417 */
55737fda 1418 if (!try_module_get(pf->owner))
1da177e4
LT
1419 goto out_release;
1420
55737fda
SH
1421 /* Now protected by module ref count */
1422 rcu_read_unlock();
1423
3f378b68 1424 err = pf->create(net, sock, protocol, kern);
55737fda 1425 if (err < 0)
1da177e4 1426 goto out_module_put;
a79af59e 1427
1da177e4
LT
1428 /*
1429 * Now to bump the refcnt of the [loadable] module that owns this
1430 * socket at sock_release time we decrement its refcnt.
1431 */
55737fda
SH
1432 if (!try_module_get(sock->ops->owner))
1433 goto out_module_busy;
1434
1da177e4
LT
1435 /*
1436 * Now that we're done with the ->create function, the [loadable]
1437 * module can have its refcnt decremented
1438 */
55737fda 1439 module_put(pf->owner);
7420ed23
VY
1440 err = security_socket_post_create(sock, family, type, protocol, kern);
1441 if (err)
3b185525 1442 goto out_sock_release;
55737fda 1443 *res = sock;
1da177e4 1444
55737fda
SH
1445 return 0;
1446
1447out_module_busy:
1448 err = -EAFNOSUPPORT;
1da177e4 1449out_module_put:
55737fda
SH
1450 sock->ops = NULL;
1451 module_put(pf->owner);
1452out_sock_release:
1da177e4 1453 sock_release(sock);
55737fda
SH
1454 return err;
1455
1456out_release:
1457 rcu_read_unlock();
1458 goto out_sock_release;
1da177e4 1459}
721db93a 1460EXPORT_SYMBOL(__sock_create);
1da177e4 1461
8a3c245c
PT
1462/**
1463 * sock_create - creates a socket
1464 * @family: protocol family (AF_INET, ...)
1465 * @type: communication type (SOCK_STREAM, ...)
1466 * @protocol: protocol (0, ...)
1467 * @res: new socket
1468 *
1469 * A wrapper around __sock_create().
1470 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1471 */
1472
1da177e4
LT
1473int sock_create(int family, int type, int protocol, struct socket **res)
1474{
1b8d7ae4 1475 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1476}
c6d409cf 1477EXPORT_SYMBOL(sock_create);
1da177e4 1478
8a3c245c
PT
1479/**
1480 * sock_create_kern - creates a socket (kernel space)
1481 * @net: net namespace
1482 * @family: protocol family (AF_INET, ...)
1483 * @type: communication type (SOCK_STREAM, ...)
1484 * @protocol: protocol (0, ...)
1485 * @res: new socket
1486 *
1487 * A wrapper around __sock_create().
1488 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1489 */
1490
eeb1bd5c 1491int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1492{
eeb1bd5c 1493 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1494}
c6d409cf 1495EXPORT_SYMBOL(sock_create_kern);
1da177e4 1496
9d6a15c3 1497int __sys_socket(int family, int type, int protocol)
1da177e4
LT
1498{
1499 int retval;
1500 struct socket *sock;
a677a039
UD
1501 int flags;
1502
e38b36f3
UD
1503 /* Check the SOCK_* constants for consistency. */
1504 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1505 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1506 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1507 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1508
a677a039 1509 flags = type & ~SOCK_TYPE_MASK;
77d27200 1510 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1511 return -EINVAL;
1512 type &= SOCK_TYPE_MASK;
1da177e4 1513
aaca0bdc
UD
1514 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1515 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1516
1da177e4
LT
1517 retval = sock_create(family, type, protocol, &sock);
1518 if (retval < 0)
8e1611e2 1519 return retval;
1da177e4 1520
8e1611e2 1521 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1522}
1523
9d6a15c3
DB
1524SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1525{
1526 return __sys_socket(family, type, protocol);
1527}
1528
1da177e4
LT
1529/*
1530 * Create a pair of connected sockets.
1531 */
1532
6debc8d8 1533int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1534{
1535 struct socket *sock1, *sock2;
1536 int fd1, fd2, err;
db349509 1537 struct file *newfile1, *newfile2;
a677a039
UD
1538 int flags;
1539
1540 flags = type & ~SOCK_TYPE_MASK;
77d27200 1541 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1542 return -EINVAL;
1543 type &= SOCK_TYPE_MASK;
1da177e4 1544
aaca0bdc
UD
1545 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1546 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1547
016a266b
AV
1548 /*
1549 * reserve descriptors and make sure we won't fail
1550 * to return them to userland.
1551 */
1552 fd1 = get_unused_fd_flags(flags);
1553 if (unlikely(fd1 < 0))
1554 return fd1;
1555
1556 fd2 = get_unused_fd_flags(flags);
1557 if (unlikely(fd2 < 0)) {
1558 put_unused_fd(fd1);
1559 return fd2;
1560 }
1561
1562 err = put_user(fd1, &usockvec[0]);
1563 if (err)
1564 goto out;
1565
1566 err = put_user(fd2, &usockvec[1]);
1567 if (err)
1568 goto out;
1569
1da177e4
LT
1570 /*
1571 * Obtain the first socket and check if the underlying protocol
1572 * supports the socketpair call.
1573 */
1574
1575 err = sock_create(family, type, protocol, &sock1);
016a266b 1576 if (unlikely(err < 0))
1da177e4
LT
1577 goto out;
1578
1579 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1580 if (unlikely(err < 0)) {
1581 sock_release(sock1);
1582 goto out;
bf3c23d1 1583 }
d73aa286 1584
d47cd945
DH
1585 err = security_socket_socketpair(sock1, sock2);
1586 if (unlikely(err)) {
1587 sock_release(sock2);
1588 sock_release(sock1);
1589 goto out;
1590 }
1591
016a266b
AV
1592 err = sock1->ops->socketpair(sock1, sock2);
1593 if (unlikely(err < 0)) {
1594 sock_release(sock2);
1595 sock_release(sock1);
1596 goto out;
28407630
AV
1597 }
1598
aab174f0 1599 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1600 if (IS_ERR(newfile1)) {
28407630 1601 err = PTR_ERR(newfile1);
016a266b
AV
1602 sock_release(sock2);
1603 goto out;
28407630
AV
1604 }
1605
aab174f0 1606 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1607 if (IS_ERR(newfile2)) {
1608 err = PTR_ERR(newfile2);
016a266b
AV
1609 fput(newfile1);
1610 goto out;
db349509
AV
1611 }
1612
157cf649 1613 audit_fd_pair(fd1, fd2);
d73aa286 1614
db349509
AV
1615 fd_install(fd1, newfile1);
1616 fd_install(fd2, newfile2);
d73aa286 1617 return 0;
1da177e4 1618
016a266b 1619out:
d73aa286 1620 put_unused_fd(fd2);
d73aa286 1621 put_unused_fd(fd1);
1da177e4
LT
1622 return err;
1623}
1624
6debc8d8
DB
1625SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1626 int __user *, usockvec)
1627{
1628 return __sys_socketpair(family, type, protocol, usockvec);
1629}
1630
1da177e4
LT
1631/*
1632 * Bind a name to a socket. Nothing much to do here since it's
1633 * the protocol's responsibility to handle the local address.
1634 *
1635 * We move the socket address to kernel space before we call
1636 * the protocol layer (having also checked the address is ok).
1637 */
1638
a87d35d8 1639int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1640{
1641 struct socket *sock;
230b1839 1642 struct sockaddr_storage address;
6cb153ca 1643 int err, fput_needed;
1da177e4 1644
89bddce5 1645 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1646 if (sock) {
43db362d 1647 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1648 if (!err) {
89bddce5 1649 err = security_socket_bind(sock,
230b1839 1650 (struct sockaddr *)&address,
89bddce5 1651 addrlen);
6cb153ca
BL
1652 if (!err)
1653 err = sock->ops->bind(sock,
89bddce5 1654 (struct sockaddr *)
230b1839 1655 &address, addrlen);
1da177e4 1656 }
6cb153ca 1657 fput_light(sock->file, fput_needed);
89bddce5 1658 }
1da177e4
LT
1659 return err;
1660}
1661
a87d35d8
DB
1662SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1663{
1664 return __sys_bind(fd, umyaddr, addrlen);
1665}
1666
1da177e4
LT
1667/*
1668 * Perform a listen. Basically, we allow the protocol to do anything
1669 * necessary for a listen, and if that works, we mark the socket as
1670 * ready for listening.
1671 */
1672
25e290ee 1673int __sys_listen(int fd, int backlog)
1da177e4
LT
1674{
1675 struct socket *sock;
6cb153ca 1676 int err, fput_needed;
b8e1f9b5 1677 int somaxconn;
89bddce5
SH
1678
1679 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1680 if (sock) {
8efa6e93 1681 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1682 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1683 backlog = somaxconn;
1da177e4
LT
1684
1685 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1686 if (!err)
1687 err = sock->ops->listen(sock, backlog);
1da177e4 1688
6cb153ca 1689 fput_light(sock->file, fput_needed);
1da177e4
LT
1690 }
1691 return err;
1692}
1693
25e290ee
DB
1694SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1695{
1696 return __sys_listen(fd, backlog);
1697}
1698
1da177e4
LT
1699/*
1700 * For accept, we attempt to create a new socket, set up the link
1701 * with the client, wake up the client, then return the new
1702 * connected fd. We collect the address of the connector in kernel
1703 * space and move it to user at the very end. This is unclean because
1704 * we open the socket then return an error.
1705 *
1706 * 1003.1g adds the ability to recvmsg() to query connection pending
1707 * status to recvmsg. We need to add that support in a way thats
b903036a 1708 * clean when we restructure accept also.
1da177e4
LT
1709 */
1710
4541e805
DB
1711int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1712 int __user *upeer_addrlen, int flags)
1da177e4
LT
1713{
1714 struct socket *sock, *newsock;
39d8c1b6 1715 struct file *newfile;
6cb153ca 1716 int err, len, newfd, fput_needed;
230b1839 1717 struct sockaddr_storage address;
1da177e4 1718
77d27200 1719 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1720 return -EINVAL;
1721
1722 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1723 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1724
6cb153ca 1725 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1726 if (!sock)
1727 goto out;
1728
1729 err = -ENFILE;
c6d409cf
ED
1730 newsock = sock_alloc();
1731 if (!newsock)
1da177e4
LT
1732 goto out_put;
1733
1734 newsock->type = sock->type;
1735 newsock->ops = sock->ops;
1736
1da177e4
LT
1737 /*
1738 * We don't need try_module_get here, as the listening socket (sock)
1739 * has the protocol module (sock->ops->owner) held.
1740 */
1741 __module_get(newsock->ops->owner);
1742
28407630 1743 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1744 if (unlikely(newfd < 0)) {
1745 err = newfd;
9a1875e6
DM
1746 sock_release(newsock);
1747 goto out_put;
39d8c1b6 1748 }
aab174f0 1749 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1750 if (IS_ERR(newfile)) {
28407630
AV
1751 err = PTR_ERR(newfile);
1752 put_unused_fd(newfd);
28407630
AV
1753 goto out_put;
1754 }
39d8c1b6 1755
a79af59e
FF
1756 err = security_socket_accept(sock, newsock);
1757 if (err)
39d8c1b6 1758 goto out_fd;
a79af59e 1759
cdfbabfb 1760 err = sock->ops->accept(sock, newsock, sock->file->f_flags, false);
1da177e4 1761 if (err < 0)
39d8c1b6 1762 goto out_fd;
1da177e4
LT
1763
1764 if (upeer_sockaddr) {
9b2c45d4
DV
1765 len = newsock->ops->getname(newsock,
1766 (struct sockaddr *)&address, 2);
1767 if (len < 0) {
1da177e4 1768 err = -ECONNABORTED;
39d8c1b6 1769 goto out_fd;
1da177e4 1770 }
43db362d 1771 err = move_addr_to_user(&address,
230b1839 1772 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1773 if (err < 0)
39d8c1b6 1774 goto out_fd;
1da177e4
LT
1775 }
1776
1777 /* File flags are not inherited via accept() unlike another OSes. */
1778
39d8c1b6
DM
1779 fd_install(newfd, newfile);
1780 err = newfd;
1da177e4 1781
1da177e4 1782out_put:
6cb153ca 1783 fput_light(sock->file, fput_needed);
1da177e4
LT
1784out:
1785 return err;
39d8c1b6 1786out_fd:
9606a216 1787 fput(newfile);
39d8c1b6 1788 put_unused_fd(newfd);
1da177e4
LT
1789 goto out_put;
1790}
1791
4541e805
DB
1792SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1793 int __user *, upeer_addrlen, int, flags)
1794{
1795 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1796}
1797
20f37034
HC
1798SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1799 int __user *, upeer_addrlen)
aaca0bdc 1800{
4541e805 1801 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1802}
1803
1da177e4
LT
1804/*
1805 * Attempt to connect to a socket with the server address. The address
1806 * is in user space so we verify it is OK and move it to kernel space.
1807 *
1808 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1809 * break bindings
1810 *
1811 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1812 * other SEQPACKET protocols that take time to connect() as it doesn't
1813 * include the -EINPROGRESS status for such sockets.
1814 */
1815
1387c2c2 1816int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1da177e4
LT
1817{
1818 struct socket *sock;
230b1839 1819 struct sockaddr_storage address;
6cb153ca 1820 int err, fput_needed;
1da177e4 1821
6cb153ca 1822 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1823 if (!sock)
1824 goto out;
43db362d 1825 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1826 if (err < 0)
1827 goto out_put;
1828
89bddce5 1829 err =
230b1839 1830 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1831 if (err)
1832 goto out_put;
1833
230b1839 1834 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1835 sock->file->f_flags);
1836out_put:
6cb153ca 1837 fput_light(sock->file, fput_needed);
1da177e4
LT
1838out:
1839 return err;
1840}
1841
1387c2c2
DB
1842SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1843 int, addrlen)
1844{
1845 return __sys_connect(fd, uservaddr, addrlen);
1846}
1847
1da177e4
LT
1848/*
1849 * Get the local address ('name') of a socket object. Move the obtained
1850 * name to user space.
1851 */
1852
8882a107
DB
1853int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1854 int __user *usockaddr_len)
1da177e4
LT
1855{
1856 struct socket *sock;
230b1839 1857 struct sockaddr_storage address;
9b2c45d4 1858 int err, fput_needed;
89bddce5 1859
6cb153ca 1860 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1861 if (!sock)
1862 goto out;
1863
1864 err = security_socket_getsockname(sock);
1865 if (err)
1866 goto out_put;
1867
9b2c45d4
DV
1868 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
1869 if (err < 0)
1da177e4 1870 goto out_put;
9b2c45d4
DV
1871 /* "err" is actually length in this case */
1872 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
1873
1874out_put:
6cb153ca 1875 fput_light(sock->file, fput_needed);
1da177e4
LT
1876out:
1877 return err;
1878}
1879
8882a107
DB
1880SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1881 int __user *, usockaddr_len)
1882{
1883 return __sys_getsockname(fd, usockaddr, usockaddr_len);
1884}
1885
1da177e4
LT
1886/*
1887 * Get the remote address ('name') of a socket object. Move the obtained
1888 * name to user space.
1889 */
1890
b21c8f83
DB
1891int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1892 int __user *usockaddr_len)
1da177e4
LT
1893{
1894 struct socket *sock;
230b1839 1895 struct sockaddr_storage address;
9b2c45d4 1896 int err, fput_needed;
1da177e4 1897
89bddce5
SH
1898 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1899 if (sock != NULL) {
1da177e4
LT
1900 err = security_socket_getpeername(sock);
1901 if (err) {
6cb153ca 1902 fput_light(sock->file, fput_needed);
1da177e4
LT
1903 return err;
1904 }
1905
9b2c45d4
DV
1906 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
1907 if (err >= 0)
1908 /* "err" is actually length in this case */
1909 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 1910 usockaddr_len);
6cb153ca 1911 fput_light(sock->file, fput_needed);
1da177e4
LT
1912 }
1913 return err;
1914}
1915
b21c8f83
DB
1916SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1917 int __user *, usockaddr_len)
1918{
1919 return __sys_getpeername(fd, usockaddr, usockaddr_len);
1920}
1921
1da177e4
LT
1922/*
1923 * Send a datagram to a given address. We move the address into kernel
1924 * space and check the user space data area is readable before invoking
1925 * the protocol.
1926 */
211b634b
DB
1927int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
1928 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
1929{
1930 struct socket *sock;
230b1839 1931 struct sockaddr_storage address;
1da177e4
LT
1932 int err;
1933 struct msghdr msg;
1934 struct iovec iov;
6cb153ca 1935 int fput_needed;
6cb153ca 1936
602bd0e9
AV
1937 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1938 if (unlikely(err))
1939 return err;
de0fa95c
PE
1940 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1941 if (!sock)
4387ff75 1942 goto out;
6cb153ca 1943
89bddce5 1944 msg.msg_name = NULL;
89bddce5
SH
1945 msg.msg_control = NULL;
1946 msg.msg_controllen = 0;
1947 msg.msg_namelen = 0;
6cb153ca 1948 if (addr) {
43db362d 1949 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1950 if (err < 0)
1951 goto out_put;
230b1839 1952 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1953 msg.msg_namelen = addr_len;
1da177e4
LT
1954 }
1955 if (sock->file->f_flags & O_NONBLOCK)
1956 flags |= MSG_DONTWAIT;
1957 msg.msg_flags = flags;
d8725c86 1958 err = sock_sendmsg(sock, &msg);
1da177e4 1959
89bddce5 1960out_put:
de0fa95c 1961 fput_light(sock->file, fput_needed);
4387ff75 1962out:
1da177e4
LT
1963 return err;
1964}
1965
211b634b
DB
1966SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1967 unsigned int, flags, struct sockaddr __user *, addr,
1968 int, addr_len)
1969{
1970 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
1971}
1972
1da177e4 1973/*
89bddce5 1974 * Send a datagram down a socket.
1da177e4
LT
1975 */
1976
3e0fa65f 1977SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1978 unsigned int, flags)
1da177e4 1979{
211b634b 1980 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
1981}
1982
1983/*
89bddce5 1984 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1985 * sender. We verify the buffers are writable and if needed move the
1986 * sender address from kernel to user space.
1987 */
7a09e1eb
DB
1988int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
1989 struct sockaddr __user *addr, int __user *addr_len)
1da177e4
LT
1990{
1991 struct socket *sock;
1992 struct iovec iov;
1993 struct msghdr msg;
230b1839 1994 struct sockaddr_storage address;
89bddce5 1995 int err, err2;
6cb153ca
BL
1996 int fput_needed;
1997
602bd0e9
AV
1998 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1999 if (unlikely(err))
2000 return err;
de0fa95c 2001 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 2002 if (!sock)
de0fa95c 2003 goto out;
1da177e4 2004
89bddce5
SH
2005 msg.msg_control = NULL;
2006 msg.msg_controllen = 0;
f3d33426
HFS
2007 /* Save some cycles and don't copy the address if not needed */
2008 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
2009 /* We assume all kernel code knows the size of sockaddr_storage */
2010 msg.msg_namelen = 0;
130ed5d1 2011 msg.msg_iocb = NULL;
9f138fa6 2012 msg.msg_flags = 0;
1da177e4
LT
2013 if (sock->file->f_flags & O_NONBLOCK)
2014 flags |= MSG_DONTWAIT;
2da62906 2015 err = sock_recvmsg(sock, &msg, flags);
1da177e4 2016
89bddce5 2017 if (err >= 0 && addr != NULL) {
43db362d 2018 err2 = move_addr_to_user(&address,
230b1839 2019 msg.msg_namelen, addr, addr_len);
89bddce5
SH
2020 if (err2 < 0)
2021 err = err2;
1da177e4 2022 }
de0fa95c
PE
2023
2024 fput_light(sock->file, fput_needed);
4387ff75 2025out:
1da177e4
LT
2026 return err;
2027}
2028
7a09e1eb
DB
2029SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2030 unsigned int, flags, struct sockaddr __user *, addr,
2031 int __user *, addr_len)
2032{
2033 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2034}
2035
1da177e4 2036/*
89bddce5 2037 * Receive a datagram from a socket.
1da177e4
LT
2038 */
2039
b7c0ddf5
JG
2040SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2041 unsigned int, flags)
1da177e4 2042{
7a09e1eb 2043 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
2044}
2045
2046/*
2047 * Set a socket option. Because we don't know the option lengths we have
2048 * to pass the user mode parameter for the protocols to sort out.
2049 */
2050
cc36dca0
DB
2051static int __sys_setsockopt(int fd, int level, int optname,
2052 char __user *optval, int optlen)
1da177e4 2053{
6cb153ca 2054 int err, fput_needed;
1da177e4
LT
2055 struct socket *sock;
2056
2057 if (optlen < 0)
2058 return -EINVAL;
89bddce5
SH
2059
2060 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2061 if (sock != NULL) {
2062 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
2063 if (err)
2064 goto out_put;
1da177e4
LT
2065
2066 if (level == SOL_SOCKET)
89bddce5
SH
2067 err =
2068 sock_setsockopt(sock, level, optname, optval,
2069 optlen);
1da177e4 2070 else
89bddce5
SH
2071 err =
2072 sock->ops->setsockopt(sock, level, optname, optval,
2073 optlen);
6cb153ca
BL
2074out_put:
2075 fput_light(sock->file, fput_needed);
1da177e4
LT
2076 }
2077 return err;
2078}
2079
cc36dca0
DB
2080SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2081 char __user *, optval, int, optlen)
2082{
2083 return __sys_setsockopt(fd, level, optname, optval, optlen);
2084}
2085
1da177e4
LT
2086/*
2087 * Get a socket option. Because we don't know the option lengths we have
2088 * to pass a user mode parameter for the protocols to sort out.
2089 */
2090
13a2d70e
DB
2091static int __sys_getsockopt(int fd, int level, int optname,
2092 char __user *optval, int __user *optlen)
1da177e4 2093{
6cb153ca 2094 int err, fput_needed;
1da177e4
LT
2095 struct socket *sock;
2096
89bddce5
SH
2097 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2098 if (sock != NULL) {
6cb153ca
BL
2099 err = security_socket_getsockopt(sock, level, optname);
2100 if (err)
2101 goto out_put;
1da177e4
LT
2102
2103 if (level == SOL_SOCKET)
89bddce5
SH
2104 err =
2105 sock_getsockopt(sock, level, optname, optval,
2106 optlen);
1da177e4 2107 else
89bddce5
SH
2108 err =
2109 sock->ops->getsockopt(sock, level, optname, optval,
2110 optlen);
6cb153ca
BL
2111out_put:
2112 fput_light(sock->file, fput_needed);
1da177e4
LT
2113 }
2114 return err;
2115}
2116
13a2d70e
DB
2117SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2118 char __user *, optval, int __user *, optlen)
2119{
2120 return __sys_getsockopt(fd, level, optname, optval, optlen);
2121}
2122
1da177e4
LT
2123/*
2124 * Shutdown a socket.
2125 */
2126
005a1aea 2127int __sys_shutdown(int fd, int how)
1da177e4 2128{
6cb153ca 2129 int err, fput_needed;
1da177e4
LT
2130 struct socket *sock;
2131
89bddce5
SH
2132 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2133 if (sock != NULL) {
1da177e4 2134 err = security_socket_shutdown(sock, how);
6cb153ca
BL
2135 if (!err)
2136 err = sock->ops->shutdown(sock, how);
2137 fput_light(sock->file, fput_needed);
1da177e4
LT
2138 }
2139 return err;
2140}
2141
005a1aea
DB
2142SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2143{
2144 return __sys_shutdown(fd, how);
2145}
2146
89bddce5 2147/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2148 * fields which are the same type (int / unsigned) on our platforms.
2149 */
2150#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2151#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2152#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2153
c71d8ebe
TH
2154struct used_address {
2155 struct sockaddr_storage name;
2156 unsigned int name_len;
2157};
2158
da184284
AV
2159static int copy_msghdr_from_user(struct msghdr *kmsg,
2160 struct user_msghdr __user *umsg,
2161 struct sockaddr __user **save_addr,
2162 struct iovec **iov)
1661bf36 2163{
ffb07550 2164 struct user_msghdr msg;
08adb7da
AV
2165 ssize_t err;
2166
ffb07550 2167 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 2168 return -EFAULT;
dbb490b9 2169
864d9664 2170 kmsg->msg_control = (void __force *)msg.msg_control;
ffb07550
AV
2171 kmsg->msg_controllen = msg.msg_controllen;
2172 kmsg->msg_flags = msg.msg_flags;
2173
2174 kmsg->msg_namelen = msg.msg_namelen;
2175 if (!msg.msg_name)
6a2a2b3a
AS
2176 kmsg->msg_namelen = 0;
2177
dbb490b9
ML
2178 if (kmsg->msg_namelen < 0)
2179 return -EINVAL;
2180
1661bf36 2181 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2182 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2183
2184 if (save_addr)
ffb07550 2185 *save_addr = msg.msg_name;
08adb7da 2186
ffb07550 2187 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 2188 if (!save_addr) {
864d9664
PA
2189 err = move_addr_to_kernel(msg.msg_name,
2190 kmsg->msg_namelen,
08adb7da
AV
2191 kmsg->msg_name);
2192 if (err < 0)
2193 return err;
2194 }
2195 } else {
2196 kmsg->msg_name = NULL;
2197 kmsg->msg_namelen = 0;
2198 }
2199
ffb07550 2200 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2201 return -EMSGSIZE;
2202
0345f931 2203 kmsg->msg_iocb = NULL;
2204
ffb07550
AV
2205 return import_iovec(save_addr ? READ : WRITE,
2206 msg.msg_iov, msg.msg_iovlen,
da184284 2207 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
2208}
2209
666547ff 2210static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2211 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
2212 struct used_address *used_address,
2213 unsigned int allowed_msghdr_flags)
1da177e4 2214{
89bddce5
SH
2215 struct compat_msghdr __user *msg_compat =
2216 (struct compat_msghdr __user *)msg;
230b1839 2217 struct sockaddr_storage address;
1da177e4 2218 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 2219 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2220 __aligned(sizeof(__kernel_size_t));
89bddce5 2221 /* 20 is size of ipv6_pktinfo */
1da177e4 2222 unsigned char *ctl_buf = ctl;
d8725c86 2223 int ctl_len;
08adb7da 2224 ssize_t err;
89bddce5 2225
08adb7da 2226 msg_sys->msg_name = &address;
1da177e4 2227
08449320 2228 if (MSG_CMSG_COMPAT & flags)
08adb7da 2229 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 2230 else
08adb7da 2231 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 2232 if (err < 0)
da184284 2233 return err;
1da177e4
LT
2234
2235 err = -ENOBUFS;
2236
228e548e 2237 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 2238 goto out_freeiov;
28a94d8f 2239 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2240 ctl_len = msg_sys->msg_controllen;
1da177e4 2241 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2242 err =
228e548e 2243 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2244 sizeof(ctl));
1da177e4
LT
2245 if (err)
2246 goto out_freeiov;
228e548e
AB
2247 ctl_buf = msg_sys->msg_control;
2248 ctl_len = msg_sys->msg_controllen;
1da177e4 2249 } else if (ctl_len) {
ac4340fc
DM
2250 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2251 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2252 if (ctl_len > sizeof(ctl)) {
1da177e4 2253 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2254 if (ctl_buf == NULL)
1da177e4
LT
2255 goto out_freeiov;
2256 }
2257 err = -EFAULT;
2258 /*
228e548e 2259 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2260 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2261 * checking falls down on this.
2262 */
fb8621bb 2263 if (copy_from_user(ctl_buf,
228e548e 2264 (void __user __force *)msg_sys->msg_control,
89bddce5 2265 ctl_len))
1da177e4 2266 goto out_freectl;
228e548e 2267 msg_sys->msg_control = ctl_buf;
1da177e4 2268 }
228e548e 2269 msg_sys->msg_flags = flags;
1da177e4
LT
2270
2271 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2272 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2273 /*
2274 * If this is sendmmsg() and current destination address is same as
2275 * previously succeeded address, omit asking LSM's decision.
2276 * used_address->name_len is initialized to UINT_MAX so that the first
2277 * destination address never matches.
2278 */
bc909d9d
MD
2279 if (used_address && msg_sys->msg_name &&
2280 used_address->name_len == msg_sys->msg_namelen &&
2281 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2282 used_address->name_len)) {
d8725c86 2283 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2284 goto out_freectl;
2285 }
d8725c86 2286 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2287 /*
2288 * If this is sendmmsg() and sending to current destination address was
2289 * successful, remember it.
2290 */
2291 if (used_address && err >= 0) {
2292 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2293 if (msg_sys->msg_name)
2294 memcpy(&used_address->name, msg_sys->msg_name,
2295 used_address->name_len);
c71d8ebe 2296 }
1da177e4
LT
2297
2298out_freectl:
89bddce5 2299 if (ctl_buf != ctl)
1da177e4
LT
2300 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2301out_freeiov:
da184284 2302 kfree(iov);
228e548e
AB
2303 return err;
2304}
2305
2306/*
2307 * BSD sendmsg interface
2308 */
2309
e1834a32
DB
2310long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2311 bool forbid_cmsg_compat)
228e548e
AB
2312{
2313 int fput_needed, err;
2314 struct msghdr msg_sys;
1be374a0
AL
2315 struct socket *sock;
2316
e1834a32
DB
2317 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2318 return -EINVAL;
2319
1be374a0 2320 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2321 if (!sock)
2322 goto out;
2323
28a94d8f 2324 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2325
6cb153ca 2326 fput_light(sock->file, fput_needed);
89bddce5 2327out:
1da177e4
LT
2328 return err;
2329}
2330
666547ff 2331SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2332{
e1834a32 2333 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2334}
2335
228e548e
AB
2336/*
2337 * Linux sendmmsg interface
2338 */
2339
2340int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2341 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2342{
2343 int fput_needed, err, datagrams;
2344 struct socket *sock;
2345 struct mmsghdr __user *entry;
2346 struct compat_mmsghdr __user *compat_entry;
2347 struct msghdr msg_sys;
c71d8ebe 2348 struct used_address used_address;
f092276d 2349 unsigned int oflags = flags;
228e548e 2350
e1834a32
DB
2351 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2352 return -EINVAL;
2353
98382f41
AB
2354 if (vlen > UIO_MAXIOV)
2355 vlen = UIO_MAXIOV;
228e548e
AB
2356
2357 datagrams = 0;
2358
2359 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2360 if (!sock)
2361 return err;
2362
c71d8ebe 2363 used_address.name_len = UINT_MAX;
228e548e
AB
2364 entry = mmsg;
2365 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2366 err = 0;
f092276d 2367 flags |= MSG_BATCH;
228e548e
AB
2368
2369 while (datagrams < vlen) {
f092276d
TH
2370 if (datagrams == vlen - 1)
2371 flags = oflags;
2372
228e548e 2373 if (MSG_CMSG_COMPAT & flags) {
666547ff 2374 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2375 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2376 if (err < 0)
2377 break;
2378 err = __put_user(err, &compat_entry->msg_len);
2379 ++compat_entry;
2380 } else {
a7526eb5 2381 err = ___sys_sendmsg(sock,
666547ff 2382 (struct user_msghdr __user *)entry,
28a94d8f 2383 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2384 if (err < 0)
2385 break;
2386 err = put_user(err, &entry->msg_len);
2387 ++entry;
2388 }
2389
2390 if (err)
2391 break;
2392 ++datagrams;
3023898b
SHY
2393 if (msg_data_left(&msg_sys))
2394 break;
a78cb84c 2395 cond_resched();
228e548e
AB
2396 }
2397
228e548e
AB
2398 fput_light(sock->file, fput_needed);
2399
728ffb86
AB
2400 /* We only return an error if no datagrams were able to be sent */
2401 if (datagrams != 0)
228e548e
AB
2402 return datagrams;
2403
228e548e
AB
2404 return err;
2405}
2406
2407SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2408 unsigned int, vlen, unsigned int, flags)
2409{
e1834a32 2410 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2411}
2412
666547ff 2413static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2414 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2415{
89bddce5
SH
2416 struct compat_msghdr __user *msg_compat =
2417 (struct compat_msghdr __user *)msg;
1da177e4 2418 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2419 struct iovec *iov = iovstack;
1da177e4 2420 unsigned long cmsg_ptr;
2da62906 2421 int len;
08adb7da 2422 ssize_t err;
1da177e4
LT
2423
2424 /* kernel mode address */
230b1839 2425 struct sockaddr_storage addr;
1da177e4
LT
2426
2427 /* user mode address pointers */
2428 struct sockaddr __user *uaddr;
08adb7da 2429 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2430
08adb7da 2431 msg_sys->msg_name = &addr;
1da177e4 2432
f3d33426 2433 if (MSG_CMSG_COMPAT & flags)
08adb7da 2434 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2435 else
08adb7da 2436 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2437 if (err < 0)
da184284 2438 return err;
1da177e4 2439
a2e27255
ACM
2440 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2441 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2442
f3d33426
HFS
2443 /* We assume all kernel code knows the size of sockaddr_storage */
2444 msg_sys->msg_namelen = 0;
2445
1da177e4
LT
2446 if (sock->file->f_flags & O_NONBLOCK)
2447 flags |= MSG_DONTWAIT;
2da62906 2448 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4
LT
2449 if (err < 0)
2450 goto out_freeiov;
2451 len = err;
2452
2453 if (uaddr != NULL) {
43db362d 2454 err = move_addr_to_user(&addr,
a2e27255 2455 msg_sys->msg_namelen, uaddr,
89bddce5 2456 uaddr_len);
1da177e4
LT
2457 if (err < 0)
2458 goto out_freeiov;
2459 }
a2e27255 2460 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2461 COMPAT_FLAGS(msg));
1da177e4
LT
2462 if (err)
2463 goto out_freeiov;
2464 if (MSG_CMSG_COMPAT & flags)
a2e27255 2465 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2466 &msg_compat->msg_controllen);
2467 else
a2e27255 2468 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2469 &msg->msg_controllen);
2470 if (err)
2471 goto out_freeiov;
2472 err = len;
2473
2474out_freeiov:
da184284 2475 kfree(iov);
a2e27255
ACM
2476 return err;
2477}
2478
2479/*
2480 * BSD recvmsg interface
2481 */
2482
e1834a32
DB
2483long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2484 bool forbid_cmsg_compat)
a2e27255
ACM
2485{
2486 int fput_needed, err;
2487 struct msghdr msg_sys;
1be374a0
AL
2488 struct socket *sock;
2489
e1834a32
DB
2490 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2491 return -EINVAL;
2492
1be374a0 2493 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2494 if (!sock)
2495 goto out;
2496
a7526eb5 2497 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2498
6cb153ca 2499 fput_light(sock->file, fput_needed);
1da177e4
LT
2500out:
2501 return err;
2502}
2503
666547ff 2504SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2505 unsigned int, flags)
2506{
e1834a32 2507 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2508}
2509
a2e27255
ACM
2510/*
2511 * Linux recvmmsg interface
2512 */
2513
e11d4284
AB
2514static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2515 unsigned int vlen, unsigned int flags,
2516 struct timespec64 *timeout)
a2e27255
ACM
2517{
2518 int fput_needed, err, datagrams;
2519 struct socket *sock;
2520 struct mmsghdr __user *entry;
d7256d0e 2521 struct compat_mmsghdr __user *compat_entry;
a2e27255 2522 struct msghdr msg_sys;
766b9f92
DD
2523 struct timespec64 end_time;
2524 struct timespec64 timeout64;
a2e27255
ACM
2525
2526 if (timeout &&
2527 poll_select_set_timeout(&end_time, timeout->tv_sec,
2528 timeout->tv_nsec))
2529 return -EINVAL;
2530
2531 datagrams = 0;
2532
2533 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2534 if (!sock)
2535 return err;
2536
7797dc41
SHY
2537 if (likely(!(flags & MSG_ERRQUEUE))) {
2538 err = sock_error(sock->sk);
2539 if (err) {
2540 datagrams = err;
2541 goto out_put;
2542 }
e623a9e9 2543 }
a2e27255
ACM
2544
2545 entry = mmsg;
d7256d0e 2546 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2547
2548 while (datagrams < vlen) {
2549 /*
2550 * No need to ask LSM for more than the first datagram.
2551 */
d7256d0e 2552 if (MSG_CMSG_COMPAT & flags) {
666547ff 2553 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2554 &msg_sys, flags & ~MSG_WAITFORONE,
2555 datagrams);
d7256d0e
JMG
2556 if (err < 0)
2557 break;
2558 err = __put_user(err, &compat_entry->msg_len);
2559 ++compat_entry;
2560 } else {
a7526eb5 2561 err = ___sys_recvmsg(sock,
666547ff 2562 (struct user_msghdr __user *)entry,
a7526eb5
AL
2563 &msg_sys, flags & ~MSG_WAITFORONE,
2564 datagrams);
d7256d0e
JMG
2565 if (err < 0)
2566 break;
2567 err = put_user(err, &entry->msg_len);
2568 ++entry;
2569 }
2570
a2e27255
ACM
2571 if (err)
2572 break;
a2e27255
ACM
2573 ++datagrams;
2574
71c5c159
BB
2575 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2576 if (flags & MSG_WAITFORONE)
2577 flags |= MSG_DONTWAIT;
2578
a2e27255 2579 if (timeout) {
766b9f92 2580 ktime_get_ts64(&timeout64);
c2e6c856 2581 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2582 if (timeout->tv_sec < 0) {
2583 timeout->tv_sec = timeout->tv_nsec = 0;
2584 break;
2585 }
2586
2587 /* Timeout, return less than vlen datagrams */
2588 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2589 break;
2590 }
2591
2592 /* Out of band data, return right away */
2593 if (msg_sys.msg_flags & MSG_OOB)
2594 break;
a78cb84c 2595 cond_resched();
a2e27255
ACM
2596 }
2597
a2e27255 2598 if (err == 0)
34b88a68
ACM
2599 goto out_put;
2600
2601 if (datagrams == 0) {
2602 datagrams = err;
2603 goto out_put;
2604 }
a2e27255 2605
34b88a68
ACM
2606 /*
2607 * We may return less entries than requested (vlen) if the
2608 * sock is non block and there aren't enough datagrams...
2609 */
2610 if (err != -EAGAIN) {
a2e27255 2611 /*
34b88a68
ACM
2612 * ... or if recvmsg returns an error after we
2613 * received some datagrams, where we record the
2614 * error to return on the next call or if the
2615 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2616 */
34b88a68 2617 sock->sk->sk_err = -err;
a2e27255 2618 }
34b88a68
ACM
2619out_put:
2620 fput_light(sock->file, fput_needed);
a2e27255 2621
34b88a68 2622 return datagrams;
a2e27255
ACM
2623}
2624
e11d4284
AB
2625int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2626 unsigned int vlen, unsigned int flags,
2627 struct __kernel_timespec __user *timeout,
2628 struct old_timespec32 __user *timeout32)
a2e27255
ACM
2629{
2630 int datagrams;
c2e6c856 2631 struct timespec64 timeout_sys;
a2e27255 2632
e11d4284
AB
2633 if (timeout && get_timespec64(&timeout_sys, timeout))
2634 return -EFAULT;
a2e27255 2635
e11d4284 2636 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2637 return -EFAULT;
2638
e11d4284
AB
2639 if (!timeout && !timeout32)
2640 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2641
2642 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 2643
e11d4284
AB
2644 if (datagrams <= 0)
2645 return datagrams;
2646
2647 if (timeout && put_timespec64(&timeout_sys, timeout))
2648 datagrams = -EFAULT;
2649
2650 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2651 datagrams = -EFAULT;
2652
2653 return datagrams;
2654}
2655
1255e269
DB
2656SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2657 unsigned int, vlen, unsigned int, flags,
c2e6c856 2658 struct __kernel_timespec __user *, timeout)
1255e269 2659{
e11d4284
AB
2660 if (flags & MSG_CMSG_COMPAT)
2661 return -EINVAL;
2662
2663 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
2664}
2665
2666#ifdef CONFIG_COMPAT_32BIT_TIME
2667SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2668 unsigned int, vlen, unsigned int, flags,
2669 struct old_timespec32 __user *, timeout)
2670{
2671 if (flags & MSG_CMSG_COMPAT)
2672 return -EINVAL;
2673
2674 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 2675}
e11d4284 2676#endif
1255e269 2677
a2e27255 2678#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2679/* Argument list sizes for sys_socketcall */
2680#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2681static const unsigned char nargs[21] = {
c6d409cf
ED
2682 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2683 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2684 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2685 AL(4), AL(5), AL(4)
89bddce5
SH
2686};
2687
1da177e4
LT
2688#undef AL
2689
2690/*
89bddce5 2691 * System call vectors.
1da177e4
LT
2692 *
2693 * Argument checking cleaned up. Saved 20% in size.
2694 * This function doesn't need to set the kernel lock because
89bddce5 2695 * it is set by the callees.
1da177e4
LT
2696 */
2697
3e0fa65f 2698SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2699{
2950fa9d 2700 unsigned long a[AUDITSC_ARGS];
89bddce5 2701 unsigned long a0, a1;
1da177e4 2702 int err;
47379052 2703 unsigned int len;
1da177e4 2704
228e548e 2705 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2706 return -EINVAL;
c8e8cd57 2707 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2708
47379052
AV
2709 len = nargs[call];
2710 if (len > sizeof(a))
2711 return -EINVAL;
2712
1da177e4 2713 /* copy_from_user should be SMP safe. */
47379052 2714 if (copy_from_user(a, args, len))
1da177e4 2715 return -EFAULT;
3ec3b2fb 2716
2950fa9d
CG
2717 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2718 if (err)
2719 return err;
3ec3b2fb 2720
89bddce5
SH
2721 a0 = a[0];
2722 a1 = a[1];
2723
2724 switch (call) {
2725 case SYS_SOCKET:
9d6a15c3 2726 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
2727 break;
2728 case SYS_BIND:
a87d35d8 2729 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2730 break;
2731 case SYS_CONNECT:
1387c2c2 2732 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2733 break;
2734 case SYS_LISTEN:
25e290ee 2735 err = __sys_listen(a0, a1);
89bddce5
SH
2736 break;
2737 case SYS_ACCEPT:
4541e805
DB
2738 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2739 (int __user *)a[2], 0);
89bddce5
SH
2740 break;
2741 case SYS_GETSOCKNAME:
2742 err =
8882a107
DB
2743 __sys_getsockname(a0, (struct sockaddr __user *)a1,
2744 (int __user *)a[2]);
89bddce5
SH
2745 break;
2746 case SYS_GETPEERNAME:
2747 err =
b21c8f83
DB
2748 __sys_getpeername(a0, (struct sockaddr __user *)a1,
2749 (int __user *)a[2]);
89bddce5
SH
2750 break;
2751 case SYS_SOCKETPAIR:
6debc8d8 2752 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
2753 break;
2754 case SYS_SEND:
f3bf896b
DB
2755 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2756 NULL, 0);
89bddce5
SH
2757 break;
2758 case SYS_SENDTO:
211b634b
DB
2759 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2760 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
2761 break;
2762 case SYS_RECV:
d27e9afc
DB
2763 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2764 NULL, NULL);
89bddce5
SH
2765 break;
2766 case SYS_RECVFROM:
7a09e1eb
DB
2767 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2768 (struct sockaddr __user *)a[4],
2769 (int __user *)a[5]);
89bddce5
SH
2770 break;
2771 case SYS_SHUTDOWN:
005a1aea 2772 err = __sys_shutdown(a0, a1);
89bddce5
SH
2773 break;
2774 case SYS_SETSOCKOPT:
cc36dca0
DB
2775 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
2776 a[4]);
89bddce5
SH
2777 break;
2778 case SYS_GETSOCKOPT:
2779 err =
13a2d70e
DB
2780 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2781 (int __user *)a[4]);
89bddce5
SH
2782 break;
2783 case SYS_SENDMSG:
e1834a32
DB
2784 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
2785 a[2], true);
89bddce5 2786 break;
228e548e 2787 case SYS_SENDMMSG:
e1834a32
DB
2788 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2789 a[3], true);
228e548e 2790 break;
89bddce5 2791 case SYS_RECVMSG:
e1834a32
DB
2792 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
2793 a[2], true);
89bddce5 2794 break;
a2e27255 2795 case SYS_RECVMMSG:
e11d4284
AB
2796 if (IS_ENABLED(CONFIG_64BIT) || !IS_ENABLED(CONFIG_64BIT_TIME))
2797 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2798 a[2], a[3],
2799 (struct __kernel_timespec __user *)a[4],
2800 NULL);
2801 else
2802 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2803 a[2], a[3], NULL,
2804 (struct old_timespec32 __user *)a[4]);
a2e27255 2805 break;
de11defe 2806 case SYS_ACCEPT4:
4541e805
DB
2807 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2808 (int __user *)a[2], a[3]);
aaca0bdc 2809 break;
89bddce5
SH
2810 default:
2811 err = -EINVAL;
2812 break;
1da177e4
LT
2813 }
2814 return err;
2815}
2816
89bddce5 2817#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2818
55737fda
SH
2819/**
2820 * sock_register - add a socket protocol handler
2821 * @ops: description of protocol
2822 *
1da177e4
LT
2823 * This function is called by a protocol handler that wants to
2824 * advertise its address family, and have it linked into the
e793c0f7 2825 * socket interface. The value ops->family corresponds to the
55737fda 2826 * socket system call protocol family.
1da177e4 2827 */
f0fd27d4 2828int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2829{
2830 int err;
2831
2832 if (ops->family >= NPROTO) {
3410f22e 2833 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2834 return -ENOBUFS;
2835 }
55737fda
SH
2836
2837 spin_lock(&net_family_lock);
190683a9
ED
2838 if (rcu_dereference_protected(net_families[ops->family],
2839 lockdep_is_held(&net_family_lock)))
55737fda
SH
2840 err = -EEXIST;
2841 else {
cf778b00 2842 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2843 err = 0;
2844 }
55737fda
SH
2845 spin_unlock(&net_family_lock);
2846
3410f22e 2847 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2848 return err;
2849}
c6d409cf 2850EXPORT_SYMBOL(sock_register);
1da177e4 2851
55737fda
SH
2852/**
2853 * sock_unregister - remove a protocol handler
2854 * @family: protocol family to remove
2855 *
1da177e4
LT
2856 * This function is called by a protocol handler that wants to
2857 * remove its address family, and have it unlinked from the
55737fda
SH
2858 * new socket creation.
2859 *
2860 * If protocol handler is a module, then it can use module reference
2861 * counts to protect against new references. If protocol handler is not
2862 * a module then it needs to provide its own protection in
2863 * the ops->create routine.
1da177e4 2864 */
f0fd27d4 2865void sock_unregister(int family)
1da177e4 2866{
f0fd27d4 2867 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2868
55737fda 2869 spin_lock(&net_family_lock);
a9b3cd7f 2870 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2871 spin_unlock(&net_family_lock);
2872
2873 synchronize_rcu();
2874
3410f22e 2875 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2876}
c6d409cf 2877EXPORT_SYMBOL(sock_unregister);
1da177e4 2878
bf2ae2e4
XL
2879bool sock_is_registered(int family)
2880{
66b51b0a 2881 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
2882}
2883
77d76ea3 2884static int __init sock_init(void)
1da177e4 2885{
b3e19d92 2886 int err;
2ca794e5
EB
2887 /*
2888 * Initialize the network sysctl infrastructure.
2889 */
2890 err = net_sysctl_init();
2891 if (err)
2892 goto out;
b3e19d92 2893
1da177e4 2894 /*
89bddce5 2895 * Initialize skbuff SLAB cache
1da177e4
LT
2896 */
2897 skb_init();
1da177e4
LT
2898
2899 /*
89bddce5 2900 * Initialize the protocols module.
1da177e4
LT
2901 */
2902
2903 init_inodecache();
b3e19d92
NP
2904
2905 err = register_filesystem(&sock_fs_type);
2906 if (err)
2907 goto out_fs;
1da177e4 2908 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2909 if (IS_ERR(sock_mnt)) {
2910 err = PTR_ERR(sock_mnt);
2911 goto out_mount;
2912 }
77d76ea3
AK
2913
2914 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2915 */
2916
2917#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2918 err = netfilter_init();
2919 if (err)
2920 goto out;
1da177e4 2921#endif
cbeb321a 2922
408eccce 2923 ptp_classifier_init();
c1f19b51 2924
b3e19d92
NP
2925out:
2926 return err;
2927
2928out_mount:
2929 unregister_filesystem(&sock_fs_type);
2930out_fs:
2931 goto out;
1da177e4
LT
2932}
2933
77d76ea3
AK
2934core_initcall(sock_init); /* early initcall */
2935
1da177e4
LT
2936#ifdef CONFIG_PROC_FS
2937void socket_seq_show(struct seq_file *seq)
2938{
648845ab
TZ
2939 seq_printf(seq, "sockets: used %d\n",
2940 sock_inuse_get(seq->private));
1da177e4 2941}
89bddce5 2942#endif /* CONFIG_PROC_FS */
1da177e4 2943
89bbfc95 2944#ifdef CONFIG_COMPAT
36fd633e 2945static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2946{
6b96018b 2947 struct compat_ifconf ifc32;
7a229387 2948 struct ifconf ifc;
7a229387
AB
2949 int err;
2950
6b96018b 2951 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2952 return -EFAULT;
2953
36fd633e
AV
2954 ifc.ifc_len = ifc32.ifc_len;
2955 ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
7a229387 2956
36fd633e
AV
2957 rtnl_lock();
2958 err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
2959 rtnl_unlock();
7a229387
AB
2960 if (err)
2961 return err;
2962
36fd633e 2963 ifc32.ifc_len = ifc.ifc_len;
6b96018b 2964 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2965 return -EFAULT;
2966
2967 return 0;
2968}
2969
6b96018b 2970static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2971{
3a7da39d
BH
2972 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2973 bool convert_in = false, convert_out = false;
44c02a2c
AV
2974 size_t buf_size = 0;
2975 struct ethtool_rxnfc __user *rxnfc = NULL;
2976 struct ifreq ifr;
3a7da39d
BH
2977 u32 rule_cnt = 0, actual_rule_cnt;
2978 u32 ethcmd;
7a229387 2979 u32 data;
3a7da39d 2980 int ret;
7a229387 2981
3a7da39d
BH
2982 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2983 return -EFAULT;
7a229387 2984
3a7da39d
BH
2985 compat_rxnfc = compat_ptr(data);
2986
2987 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2988 return -EFAULT;
2989
3a7da39d
BH
2990 /* Most ethtool structures are defined without padding.
2991 * Unfortunately struct ethtool_rxnfc is an exception.
2992 */
2993 switch (ethcmd) {
2994 default:
2995 break;
2996 case ETHTOOL_GRXCLSRLALL:
2997 /* Buffer size is variable */
2998 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2999 return -EFAULT;
3000 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
3001 return -ENOMEM;
3002 buf_size += rule_cnt * sizeof(u32);
3003 /* fall through */
3004 case ETHTOOL_GRXRINGS:
3005 case ETHTOOL_GRXCLSRLCNT:
3006 case ETHTOOL_GRXCLSRULE:
55664f32 3007 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
3008 convert_out = true;
3009 /* fall through */
3010 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
3011 buf_size += sizeof(struct ethtool_rxnfc);
3012 convert_in = true;
44c02a2c 3013 rxnfc = compat_alloc_user_space(buf_size);
3a7da39d
BH
3014 break;
3015 }
3016
44c02a2c 3017 if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
3018 return -EFAULT;
3019
44c02a2c 3020 ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
7a229387 3021
3a7da39d 3022 if (convert_in) {
127fe533 3023 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
3024 * fs.ring_cookie and at the end of fs, but nowhere else.
3025 */
127fe533
AD
3026 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
3027 sizeof(compat_rxnfc->fs.m_ext) !=
3028 offsetof(struct ethtool_rxnfc, fs.m_ext) +
3029 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
3030 BUILD_BUG_ON(
3031 offsetof(struct compat_ethtool_rxnfc, fs.location) -
3032 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
3033 offsetof(struct ethtool_rxnfc, fs.location) -
3034 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
3035
3036 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
3037 (void __user *)(&rxnfc->fs.m_ext + 1) -
3038 (void __user *)rxnfc) ||
3a7da39d
BH
3039 copy_in_user(&rxnfc->fs.ring_cookie,
3040 &compat_rxnfc->fs.ring_cookie,
954b1244 3041 (void __user *)(&rxnfc->fs.location + 1) -
b6168562
WW
3042 (void __user *)&rxnfc->fs.ring_cookie))
3043 return -EFAULT;
3044 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3045 if (put_user(rule_cnt, &rxnfc->rule_cnt))
3046 return -EFAULT;
3047 } else if (copy_in_user(&rxnfc->rule_cnt,
3048 &compat_rxnfc->rule_cnt,
3049 sizeof(rxnfc->rule_cnt)))
3a7da39d
BH
3050 return -EFAULT;
3051 }
3052
44c02a2c 3053 ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
3a7da39d
BH
3054 if (ret)
3055 return ret;
3056
3057 if (convert_out) {
3058 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
3059 (const void __user *)(&rxnfc->fs.m_ext + 1) -
3060 (const void __user *)rxnfc) ||
3a7da39d
BH
3061 copy_in_user(&compat_rxnfc->fs.ring_cookie,
3062 &rxnfc->fs.ring_cookie,
954b1244
SH
3063 (const void __user *)(&rxnfc->fs.location + 1) -
3064 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
3065 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
3066 sizeof(rxnfc->rule_cnt)))
3067 return -EFAULT;
3068
3069 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3070 /* As an optimisation, we only copy the actual
3071 * number of rules that the underlying
3072 * function returned. Since Mallory might
3073 * change the rule count in user memory, we
3074 * check that it is less than the rule count
3075 * originally given (as the user buffer size),
3076 * which has been range-checked.
3077 */
3078 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
3079 return -EFAULT;
3080 if (actual_rule_cnt < rule_cnt)
3081 rule_cnt = actual_rule_cnt;
3082 if (copy_in_user(&compat_rxnfc->rule_locs[0],
3083 &rxnfc->rule_locs[0],
3084 rule_cnt * sizeof(u32)))
3085 return -EFAULT;
3086 }
3087 }
3088
3089 return 0;
7a229387
AB
3090}
3091
7a50a240
AB
3092static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3093{
7a50a240 3094 compat_uptr_t uptr32;
44c02a2c
AV
3095 struct ifreq ifr;
3096 void __user *saved;
3097 int err;
7a50a240 3098
44c02a2c 3099 if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
7a50a240
AB
3100 return -EFAULT;
3101
3102 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3103 return -EFAULT;
3104
44c02a2c
AV
3105 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3106 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 3107
44c02a2c
AV
3108 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
3109 if (!err) {
3110 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
3111 if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
3112 err = -EFAULT;
ccbd6a5a 3113 }
44c02a2c 3114 return err;
7a229387
AB
3115}
3116
590d4693
BH
3117/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3118static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3119 struct compat_ifreq __user *u_ifreq32)
7a229387 3120{
44c02a2c 3121 struct ifreq ifreq;
7a229387
AB
3122 u32 data32;
3123
44c02a2c 3124 if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
7a229387 3125 return -EFAULT;
44c02a2c 3126 if (get_user(data32, &u_ifreq32->ifr_data))
7a229387 3127 return -EFAULT;
44c02a2c 3128 ifreq.ifr_data = compat_ptr(data32);
7a229387 3129
44c02a2c 3130 return dev_ioctl(net, cmd, &ifreq, NULL);
7a229387
AB
3131}
3132
37ac39bd
JB
3133static int compat_ifreq_ioctl(struct net *net, struct socket *sock,
3134 unsigned int cmd,
3135 struct compat_ifreq __user *uifr32)
3136{
3137 struct ifreq __user *uifr;
3138 int err;
3139
3140 /* Handle the fact that while struct ifreq has the same *layout* on
3141 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3142 * which are handled elsewhere, it still has different *size* due to
3143 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3144 * resulting in struct ifreq being 32 and 40 bytes respectively).
3145 * As a result, if the struct happens to be at the end of a page and
3146 * the next page isn't readable/writable, we get a fault. To prevent
3147 * that, copy back and forth to the full size.
3148 */
3149
3150 uifr = compat_alloc_user_space(sizeof(*uifr));
3151 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3152 return -EFAULT;
3153
3154 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3155
3156 if (!err) {
3157 switch (cmd) {
3158 case SIOCGIFFLAGS:
3159 case SIOCGIFMETRIC:
3160 case SIOCGIFMTU:
3161 case SIOCGIFMEM:
3162 case SIOCGIFHWADDR:
3163 case SIOCGIFINDEX:
3164 case SIOCGIFADDR:
3165 case SIOCGIFBRDADDR:
3166 case SIOCGIFDSTADDR:
3167 case SIOCGIFNETMASK:
3168 case SIOCGIFPFLAGS:
3169 case SIOCGIFTXQLEN:
3170 case SIOCGMIIPHY:
3171 case SIOCGMIIREG:
c6c9fee3 3172 case SIOCGIFNAME:
37ac39bd
JB
3173 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
3174 err = -EFAULT;
3175 break;
3176 }
3177 }
3178 return err;
3179}
3180
a2116ed2
AB
3181static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3182 struct compat_ifreq __user *uifr32)
3183{
3184 struct ifreq ifr;
3185 struct compat_ifmap __user *uifmap32;
a2116ed2
AB
3186 int err;
3187
3188 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3189 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3190 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3191 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3192 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3193 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3194 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3195 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3196 if (err)
3197 return -EFAULT;
3198
44c02a2c 3199 err = dev_ioctl(net, cmd, &ifr, NULL);
a2116ed2
AB
3200
3201 if (cmd == SIOCGIFMAP && !err) {
3202 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3203 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3204 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3205 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3206 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3207 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3208 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3209 if (err)
3210 err = -EFAULT;
3211 }
3212 return err;
3213}
3214
7a229387 3215struct rtentry32 {
c6d409cf 3216 u32 rt_pad1;
7a229387
AB
3217 struct sockaddr rt_dst; /* target address */
3218 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3219 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3220 unsigned short rt_flags;
3221 short rt_pad2;
3222 u32 rt_pad3;
3223 unsigned char rt_tos;
3224 unsigned char rt_class;
3225 short rt_pad4;
3226 short rt_metric; /* +1 for binary compatibility! */
7a229387 3227 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3228 u32 rt_mtu; /* per route MTU/Window */
3229 u32 rt_window; /* Window clamping */
7a229387
AB
3230 unsigned short rt_irtt; /* Initial RTT */
3231};
3232
3233struct in6_rtmsg32 {
3234 struct in6_addr rtmsg_dst;
3235 struct in6_addr rtmsg_src;
3236 struct in6_addr rtmsg_gateway;
3237 u32 rtmsg_type;
3238 u16 rtmsg_dst_len;
3239 u16 rtmsg_src_len;
3240 u32 rtmsg_metric;
3241 u32 rtmsg_info;
3242 u32 rtmsg_flags;
3243 s32 rtmsg_ifindex;
3244};
3245
6b96018b
AB
3246static int routing_ioctl(struct net *net, struct socket *sock,
3247 unsigned int cmd, void __user *argp)
7a229387
AB
3248{
3249 int ret;
3250 void *r = NULL;
3251 struct in6_rtmsg r6;
3252 struct rtentry r4;
3253 char devname[16];
3254 u32 rtdev;
3255 mm_segment_t old_fs = get_fs();
3256
6b96018b
AB
3257 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3258 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3259 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3260 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3261 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3262 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3263 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3264 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3265 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3266 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3267 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3268
3269 r = (void *) &r6;
3270 } else { /* ipv4 */
6b96018b 3271 struct rtentry32 __user *ur4 = argp;
c6d409cf 3272 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3273 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3274 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3275 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3276 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3277 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3278 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3279 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3280 if (rtdev) {
c6d409cf 3281 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3282 r4.rt_dev = (char __user __force *)devname;
3283 devname[15] = 0;
7a229387
AB
3284 } else
3285 r4.rt_dev = NULL;
3286
3287 r = (void *) &r4;
3288 }
3289
3290 if (ret) {
3291 ret = -EFAULT;
3292 goto out;
3293 }
3294
c6d409cf 3295 set_fs(KERNEL_DS);
63ff03ab 3296 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3297 set_fs(old_fs);
7a229387
AB
3298
3299out:
7a229387
AB
3300 return ret;
3301}
3302
3303/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3304 * for some operations; this forces use of the newer bridge-utils that
25985edc 3305 * use compatible ioctls
7a229387 3306 */
6b96018b 3307static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3308{
6b96018b 3309 compat_ulong_t tmp;
7a229387 3310
6b96018b 3311 if (get_user(tmp, argp))
7a229387
AB
3312 return -EFAULT;
3313 if (tmp == BRCTL_GET_VERSION)
3314 return BRCTL_VERSION + 1;
3315 return -EINVAL;
3316}
3317
6b96018b
AB
3318static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3319 unsigned int cmd, unsigned long arg)
3320{
3321 void __user *argp = compat_ptr(arg);
3322 struct sock *sk = sock->sk;
3323 struct net *net = sock_net(sk);
7a229387 3324
6b96018b 3325 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3326 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3327
3328 switch (cmd) {
3329 case SIOCSIFBR:
3330 case SIOCGIFBR:
3331 return old_bridge_ioctl(argp);
6b96018b 3332 case SIOCGIFCONF:
36fd633e 3333 return compat_dev_ifconf(net, argp);
6b96018b
AB
3334 case SIOCETHTOOL:
3335 return ethtool_ioctl(net, argp);
7a50a240
AB
3336 case SIOCWANDEV:
3337 return compat_siocwandev(net, argp);
a2116ed2
AB
3338 case SIOCGIFMAP:
3339 case SIOCSIFMAP:
3340 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3341 case SIOCADDRT:
3342 case SIOCDELRT:
3343 return routing_ioctl(net, sock, cmd, argp);
0768e170
AB
3344 case SIOCGSTAMP_OLD:
3345 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
3346 if (!sock->ops->gettstamp)
3347 return -ENOIOCTLCMD;
0768e170 3348 return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
c7cbdbf2
AB
3349 !COMPAT_USE_64BIT_TIME);
3350
590d4693
BH
3351 case SIOCBONDSLAVEINFOQUERY:
3352 case SIOCBONDINFOQUERY:
a2116ed2 3353 case SIOCSHWTSTAMP:
fd468c74 3354 case SIOCGHWTSTAMP:
590d4693 3355 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3356
3357 case FIOSETOWN:
3358 case SIOCSPGRP:
3359 case FIOGETOWN:
3360 case SIOCGPGRP:
3361 case SIOCBRADDBR:
3362 case SIOCBRDELBR:
3363 case SIOCGIFVLAN:
3364 case SIOCSIFVLAN:
3365 case SIOCADDDLCI:
3366 case SIOCDELDLCI:
c62cce2c 3367 case SIOCGSKNS:
0768e170
AB
3368 case SIOCGSTAMP_NEW:
3369 case SIOCGSTAMPNS_NEW:
6b96018b
AB
3370 return sock_ioctl(file, cmd, arg);
3371
3372 case SIOCGIFFLAGS:
3373 case SIOCSIFFLAGS:
3374 case SIOCGIFMETRIC:
3375 case SIOCSIFMETRIC:
3376 case SIOCGIFMTU:
3377 case SIOCSIFMTU:
3378 case SIOCGIFMEM:
3379 case SIOCSIFMEM:
3380 case SIOCGIFHWADDR:
3381 case SIOCSIFHWADDR:
3382 case SIOCADDMULTI:
3383 case SIOCDELMULTI:
3384 case SIOCGIFINDEX:
6b96018b
AB
3385 case SIOCGIFADDR:
3386 case SIOCSIFADDR:
3387 case SIOCSIFHWBROADCAST:
6b96018b 3388 case SIOCDIFADDR:
6b96018b
AB
3389 case SIOCGIFBRDADDR:
3390 case SIOCSIFBRDADDR:
3391 case SIOCGIFDSTADDR:
3392 case SIOCSIFDSTADDR:
3393 case SIOCGIFNETMASK:
3394 case SIOCSIFNETMASK:
3395 case SIOCSIFPFLAGS:
3396 case SIOCGIFPFLAGS:
3397 case SIOCGIFTXQLEN:
3398 case SIOCSIFTXQLEN:
3399 case SIOCBRADDIF:
3400 case SIOCBRDELIF:
c6c9fee3 3401 case SIOCGIFNAME:
9177efd3
AB
3402 case SIOCSIFNAME:
3403 case SIOCGMIIPHY:
3404 case SIOCGMIIREG:
3405 case SIOCSMIIREG:
f92d4fc9
AV
3406 case SIOCBONDENSLAVE:
3407 case SIOCBONDRELEASE:
3408 case SIOCBONDSETHWADDR:
3409 case SIOCBONDCHANGEACTIVE:
37ac39bd
JB
3410 return compat_ifreq_ioctl(net, sock, cmd, argp);
3411
6b96018b
AB
3412 case SIOCSARP:
3413 case SIOCGARP:
3414 case SIOCDARP:
6b96018b 3415 case SIOCATMARK:
63ff03ab 3416 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3417 }
3418
6b96018b
AB
3419 return -ENOIOCTLCMD;
3420}
7a229387 3421
95c96174 3422static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3423 unsigned long arg)
89bbfc95
SP
3424{
3425 struct socket *sock = file->private_data;
3426 int ret = -ENOIOCTLCMD;
87de87d5
DM
3427 struct sock *sk;
3428 struct net *net;
3429
3430 sk = sock->sk;
3431 net = sock_net(sk);
89bbfc95
SP
3432
3433 if (sock->ops->compat_ioctl)
3434 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3435
87de87d5
DM
3436 if (ret == -ENOIOCTLCMD &&
3437 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3438 ret = compat_wext_handle_ioctl(net, cmd, arg);
3439
6b96018b
AB
3440 if (ret == -ENOIOCTLCMD)
3441 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3442
89bbfc95
SP
3443 return ret;
3444}
3445#endif
3446
8a3c245c
PT
3447/**
3448 * kernel_bind - bind an address to a socket (kernel space)
3449 * @sock: socket
3450 * @addr: address
3451 * @addrlen: length of address
3452 *
3453 * Returns 0 or an error.
3454 */
3455
ac5a488e
SS
3456int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3457{
3458 return sock->ops->bind(sock, addr, addrlen);
3459}
c6d409cf 3460EXPORT_SYMBOL(kernel_bind);
ac5a488e 3461
8a3c245c
PT
3462/**
3463 * kernel_listen - move socket to listening state (kernel space)
3464 * @sock: socket
3465 * @backlog: pending connections queue size
3466 *
3467 * Returns 0 or an error.
3468 */
3469
ac5a488e
SS
3470int kernel_listen(struct socket *sock, int backlog)
3471{
3472 return sock->ops->listen(sock, backlog);
3473}
c6d409cf 3474EXPORT_SYMBOL(kernel_listen);
ac5a488e 3475
8a3c245c
PT
3476/**
3477 * kernel_accept - accept a connection (kernel space)
3478 * @sock: listening socket
3479 * @newsock: new connected socket
3480 * @flags: flags
3481 *
3482 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3483 * If it fails, @newsock is guaranteed to be %NULL.
3484 * Returns 0 or an error.
3485 */
3486
ac5a488e
SS
3487int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3488{
3489 struct sock *sk = sock->sk;
3490 int err;
3491
3492 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3493 newsock);
3494 if (err < 0)
3495 goto done;
3496
cdfbabfb 3497 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3498 if (err < 0) {
3499 sock_release(*newsock);
fa8705b0 3500 *newsock = NULL;
ac5a488e
SS
3501 goto done;
3502 }
3503
3504 (*newsock)->ops = sock->ops;
1b08534e 3505 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3506
3507done:
3508 return err;
3509}
c6d409cf 3510EXPORT_SYMBOL(kernel_accept);
ac5a488e 3511
8a3c245c
PT
3512/**
3513 * kernel_connect - connect a socket (kernel space)
3514 * @sock: socket
3515 * @addr: address
3516 * @addrlen: address length
3517 * @flags: flags (O_NONBLOCK, ...)
3518 *
3519 * For datagram sockets, @addr is the addres to which datagrams are sent
3520 * by default, and the only address from which datagrams are received.
3521 * For stream sockets, attempts to connect to @addr.
3522 * Returns 0 or an error code.
3523 */
3524
ac5a488e 3525int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3526 int flags)
ac5a488e
SS
3527{
3528 return sock->ops->connect(sock, addr, addrlen, flags);
3529}
c6d409cf 3530EXPORT_SYMBOL(kernel_connect);
ac5a488e 3531
8a3c245c
PT
3532/**
3533 * kernel_getsockname - get the address which the socket is bound (kernel space)
3534 * @sock: socket
3535 * @addr: address holder
3536 *
3537 * Fills the @addr pointer with the address which the socket is bound.
3538 * Returns 0 or an error code.
3539 */
3540
9b2c45d4 3541int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3542{
9b2c45d4 3543 return sock->ops->getname(sock, addr, 0);
ac5a488e 3544}
c6d409cf 3545EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3546
8a3c245c
PT
3547/**
3548 * kernel_peername - get the address which the socket is connected (kernel space)
3549 * @sock: socket
3550 * @addr: address holder
3551 *
3552 * Fills the @addr pointer with the address which the socket is connected.
3553 * Returns 0 or an error code.
3554 */
3555
9b2c45d4 3556int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3557{
9b2c45d4 3558 return sock->ops->getname(sock, addr, 1);
ac5a488e 3559}
c6d409cf 3560EXPORT_SYMBOL(kernel_getpeername);
ac5a488e 3561
8a3c245c
PT
3562/**
3563 * kernel_getsockopt - get a socket option (kernel space)
3564 * @sock: socket
3565 * @level: API level (SOL_SOCKET, ...)
3566 * @optname: option tag
3567 * @optval: option value
3568 * @optlen: option length
3569 *
3570 * Assigns the option length to @optlen.
3571 * Returns 0 or an error.
3572 */
3573
ac5a488e
SS
3574int kernel_getsockopt(struct socket *sock, int level, int optname,
3575 char *optval, int *optlen)
3576{
3577 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3578 char __user *uoptval;
3579 int __user *uoptlen;
ac5a488e
SS
3580 int err;
3581
fb8621bb
NK
3582 uoptval = (char __user __force *) optval;
3583 uoptlen = (int __user __force *) optlen;
3584
ac5a488e
SS
3585 set_fs(KERNEL_DS);
3586 if (level == SOL_SOCKET)
fb8621bb 3587 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3588 else
fb8621bb
NK
3589 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3590 uoptlen);
ac5a488e
SS
3591 set_fs(oldfs);
3592 return err;
3593}
c6d409cf 3594EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e 3595
8a3c245c
PT
3596/**
3597 * kernel_setsockopt - set a socket option (kernel space)
3598 * @sock: socket
3599 * @level: API level (SOL_SOCKET, ...)
3600 * @optname: option tag
3601 * @optval: option value
3602 * @optlen: option length
3603 *
3604 * Returns 0 or an error.
3605 */
3606
ac5a488e 3607int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3608 char *optval, unsigned int optlen)
ac5a488e
SS
3609{
3610 mm_segment_t oldfs = get_fs();
fb8621bb 3611 char __user *uoptval;
ac5a488e
SS
3612 int err;
3613
fb8621bb
NK
3614 uoptval = (char __user __force *) optval;
3615
ac5a488e
SS
3616 set_fs(KERNEL_DS);
3617 if (level == SOL_SOCKET)
fb8621bb 3618 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3619 else
fb8621bb 3620 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3621 optlen);
3622 set_fs(oldfs);
3623 return err;
3624}
c6d409cf 3625EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e 3626
8a3c245c
PT
3627/**
3628 * kernel_sendpage - send a &page through a socket (kernel space)
3629 * @sock: socket
3630 * @page: page
3631 * @offset: page offset
3632 * @size: total size in bytes
3633 * @flags: flags (MSG_DONTWAIT, ...)
3634 *
3635 * Returns the total amount sent in bytes or an error.
3636 */
3637
ac5a488e
SS
3638int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3639 size_t size, int flags)
3640{
3641 if (sock->ops->sendpage)
3642 return sock->ops->sendpage(sock, page, offset, size, flags);
3643
3644 return sock_no_sendpage(sock, page, offset, size, flags);
3645}
c6d409cf 3646EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3647
8a3c245c
PT
3648/**
3649 * kernel_sendpage_locked - send a &page through the locked sock (kernel space)
3650 * @sk: sock
3651 * @page: page
3652 * @offset: page offset
3653 * @size: total size in bytes
3654 * @flags: flags (MSG_DONTWAIT, ...)
3655 *
3656 * Returns the total amount sent in bytes or an error.
3657 * Caller must hold @sk.
3658 */
3659
306b13eb
TH
3660int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3661 size_t size, int flags)
3662{
3663 struct socket *sock = sk->sk_socket;
3664
3665 if (sock->ops->sendpage_locked)
3666 return sock->ops->sendpage_locked(sk, page, offset, size,
3667 flags);
3668
3669 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3670}
3671EXPORT_SYMBOL(kernel_sendpage_locked);
3672
8a3c245c
PT
3673/**
3674 * kernel_shutdown - shut down part of a full-duplex connection (kernel space)
3675 * @sock: socket
3676 * @how: connection part
3677 *
3678 * Returns 0 or an error.
3679 */
3680
91cf45f0
TM
3681int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3682{
3683 return sock->ops->shutdown(sock, how);
3684}
91cf45f0 3685EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075 3686
8a3c245c
PT
3687/**
3688 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3689 * @sk: socket
3690 *
3691 * This routine returns the IP overhead imposed by a socket i.e.
3692 * the length of the underlying IP header, depending on whether
3693 * this is an IPv4 or IPv6 socket and the length from IP options turned
3694 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075 3695 */
8a3c245c 3696
113c3075
P
3697u32 kernel_sock_ip_overhead(struct sock *sk)
3698{
3699 struct inet_sock *inet;
3700 struct ip_options_rcu *opt;
3701 u32 overhead = 0;
113c3075
P
3702#if IS_ENABLED(CONFIG_IPV6)
3703 struct ipv6_pinfo *np;
3704 struct ipv6_txoptions *optv6 = NULL;
3705#endif /* IS_ENABLED(CONFIG_IPV6) */
3706
3707 if (!sk)
3708 return overhead;
3709
113c3075
P
3710 switch (sk->sk_family) {
3711 case AF_INET:
3712 inet = inet_sk(sk);
3713 overhead += sizeof(struct iphdr);
3714 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3715 sock_owned_by_user(sk));
113c3075
P
3716 if (opt)
3717 overhead += opt->opt.optlen;
3718 return overhead;
3719#if IS_ENABLED(CONFIG_IPV6)
3720 case AF_INET6:
3721 np = inet6_sk(sk);
3722 overhead += sizeof(struct ipv6hdr);
3723 if (np)
3724 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3725 sock_owned_by_user(sk));
113c3075
P
3726 if (optv6)
3727 overhead += (optv6->opt_flen + optv6->opt_nflen);
3728 return overhead;
3729#endif /* IS_ENABLED(CONFIG_IPV6) */
3730 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3731 return overhead;
3732 }
3733}
3734EXPORT_SYMBOL(kernel_sock_ip_overhead);