Merge branch 's390-qeth-cleanups'
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
c8e8cd57 92#include <linux/nospec.h>
1da177e4 93
7c0f6ba6 94#include <linux/uaccess.h>
1da177e4
LT
95#include <asm/unistd.h>
96
97#include <net/compat.h>
87de87d5 98#include <net/wext.h>
f8451725 99#include <net/cls_cgroup.h>
1da177e4
LT
100
101#include <net/sock.h>
102#include <linux/netfilter.h>
103
6b96018b
AB
104#include <linux/if_tun.h>
105#include <linux/ipv6_route.h>
106#include <linux/route.h>
6b96018b 107#include <linux/sockios.h>
076bb0c8 108#include <net/busy_poll.h>
f24b9be5 109#include <linux/errqueue.h>
06021292 110
e0d1095a 111#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
112unsigned int sysctl_net_busy_read __read_mostly;
113unsigned int sysctl_net_busy_poll __read_mostly;
06021292 114#endif
6b96018b 115
8ae5e030
AV
116static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
117static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 118static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
119
120static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
121static __poll_t sock_poll(struct file *file,
122 struct poll_table_struct *wait);
89bddce5 123static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
124#ifdef CONFIG_COMPAT
125static long compat_sock_ioctl(struct file *file,
89bddce5 126 unsigned int cmd, unsigned long arg);
89bbfc95 127#endif
1da177e4 128static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
129static ssize_t sock_sendpage(struct file *file, struct page *page,
130 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 132 struct pipe_inode_info *pipe, size_t len,
9c55e01c 133 unsigned int flags);
1da177e4 134
1da177e4
LT
135/*
136 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
137 * in the operation structures but are done directly via the socketcall() multiplexor.
138 */
139
da7071d7 140static const struct file_operations socket_file_ops = {
1da177e4
LT
141 .owner = THIS_MODULE,
142 .llseek = no_llseek,
8ae5e030
AV
143 .read_iter = sock_read_iter,
144 .write_iter = sock_write_iter,
1da177e4
LT
145 .poll = sock_poll,
146 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
147#ifdef CONFIG_COMPAT
148 .compat_ioctl = compat_sock_ioctl,
149#endif
1da177e4 150 .mmap = sock_mmap,
1da177e4
LT
151 .release = sock_close,
152 .fasync = sock_fasync,
5274f052
JA
153 .sendpage = sock_sendpage,
154 .splice_write = generic_splice_sendpage,
9c55e01c 155 .splice_read = sock_splice_read,
1da177e4
LT
156};
157
158/*
159 * The protocol list. Each protocol is registered in here.
160 */
161
1da177e4 162static DEFINE_SPINLOCK(net_family_lock);
190683a9 163static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 164
1da177e4 165/*
89bddce5
SH
166 * Support routines.
167 * Move socket addresses back and forth across the kernel/user
168 * divide and look after the messy bits.
1da177e4
LT
169 */
170
1da177e4
LT
171/**
172 * move_addr_to_kernel - copy a socket address into kernel space
173 * @uaddr: Address in user space
174 * @kaddr: Address in kernel space
175 * @ulen: Length in user space
176 *
177 * The address is copied into kernel space. If the provided address is
178 * too long an error code of -EINVAL is returned. If the copy gives
179 * invalid addresses -EFAULT is returned. On a success 0 is returned.
180 */
181
43db362d 182int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 183{
230b1839 184 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 185 return -EINVAL;
89bddce5 186 if (ulen == 0)
1da177e4 187 return 0;
89bddce5 188 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 189 return -EFAULT;
3ec3b2fb 190 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
191}
192
193/**
194 * move_addr_to_user - copy an address to user space
195 * @kaddr: kernel space address
196 * @klen: length of address in kernel
197 * @uaddr: user space address
198 * @ulen: pointer to user length field
199 *
200 * The value pointed to by ulen on entry is the buffer length available.
201 * This is overwritten with the buffer space used. -EINVAL is returned
202 * if an overlong buffer is specified or a negative buffer size. -EFAULT
203 * is returned if either the buffer or the length field are not
204 * accessible.
205 * After copying the data up to the limit the user specifies, the true
206 * length of the data is written over the length limit the user
207 * specified. Zero is returned for a success.
208 */
89bddce5 209
43db362d 210static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 211 void __user *uaddr, int __user *ulen)
1da177e4
LT
212{
213 int err;
214 int len;
215
68c6beb3 216 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
217 err = get_user(len, ulen);
218 if (err)
1da177e4 219 return err;
89bddce5
SH
220 if (len > klen)
221 len = klen;
68c6beb3 222 if (len < 0)
1da177e4 223 return -EINVAL;
89bddce5 224 if (len) {
d6fe3945
SG
225 if (audit_sockaddr(klen, kaddr))
226 return -ENOMEM;
89bddce5 227 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
228 return -EFAULT;
229 }
230 /*
89bddce5
SH
231 * "fromlen shall refer to the value before truncation.."
232 * 1003.1g
1da177e4
LT
233 */
234 return __put_user(klen, ulen);
235}
236
08009a76 237static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
238
239static struct inode *sock_alloc_inode(struct super_block *sb)
240{
241 struct socket_alloc *ei;
eaefd110 242 struct socket_wq *wq;
89bddce5 243
e94b1766 244 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
245 if (!ei)
246 return NULL;
eaefd110
ED
247 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
248 if (!wq) {
43815482
ED
249 kmem_cache_free(sock_inode_cachep, ei);
250 return NULL;
251 }
eaefd110
ED
252 init_waitqueue_head(&wq->wait);
253 wq->fasync_list = NULL;
574aab1e 254 wq->flags = 0;
e6476c21 255 ei->socket.wq = wq;
89bddce5 256
1da177e4
LT
257 ei->socket.state = SS_UNCONNECTED;
258 ei->socket.flags = 0;
259 ei->socket.ops = NULL;
260 ei->socket.sk = NULL;
261 ei->socket.file = NULL;
1da177e4
LT
262
263 return &ei->vfs_inode;
264}
265
266static void sock_destroy_inode(struct inode *inode)
267{
43815482
ED
268 struct socket_alloc *ei;
269
270 ei = container_of(inode, struct socket_alloc, vfs_inode);
e6476c21 271 kfree_rcu(ei->socket.wq, rcu);
43815482 272 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
273}
274
51cc5068 275static void init_once(void *foo)
1da177e4 276{
89bddce5 277 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 278
a35afb83 279 inode_init_once(&ei->vfs_inode);
1da177e4 280}
89bddce5 281
1e911632 282static void init_inodecache(void)
1da177e4
LT
283{
284 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
285 sizeof(struct socket_alloc),
286 0,
287 (SLAB_HWCACHE_ALIGN |
288 SLAB_RECLAIM_ACCOUNT |
5d097056 289 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 290 init_once);
1e911632 291 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
292}
293
b87221de 294static const struct super_operations sockfs_ops = {
c6d409cf
ED
295 .alloc_inode = sock_alloc_inode,
296 .destroy_inode = sock_destroy_inode,
297 .statfs = simple_statfs,
1da177e4
LT
298};
299
c23fbb6b
ED
300/*
301 * sockfs_dname() is called from d_path().
302 */
303static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
304{
305 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 306 d_inode(dentry)->i_ino);
c23fbb6b
ED
307}
308
3ba13d17 309static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 310 .d_dname = sockfs_dname,
1da177e4
LT
311};
312
bba0bd31
AG
313static int sockfs_xattr_get(const struct xattr_handler *handler,
314 struct dentry *dentry, struct inode *inode,
315 const char *suffix, void *value, size_t size)
316{
317 if (value) {
318 if (dentry->d_name.len + 1 > size)
319 return -ERANGE;
320 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
321 }
322 return dentry->d_name.len + 1;
323}
324
325#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
326#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
327#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
328
329static const struct xattr_handler sockfs_xattr_handler = {
330 .name = XATTR_NAME_SOCKPROTONAME,
331 .get = sockfs_xattr_get,
332};
333
4a590153
AG
334static int sockfs_security_xattr_set(const struct xattr_handler *handler,
335 struct dentry *dentry, struct inode *inode,
336 const char *suffix, const void *value,
337 size_t size, int flags)
338{
339 /* Handled by LSM. */
340 return -EAGAIN;
341}
342
343static const struct xattr_handler sockfs_security_xattr_handler = {
344 .prefix = XATTR_SECURITY_PREFIX,
345 .set = sockfs_security_xattr_set,
346};
347
bba0bd31
AG
348static const struct xattr_handler *sockfs_xattr_handlers[] = {
349 &sockfs_xattr_handler,
4a590153 350 &sockfs_security_xattr_handler,
bba0bd31
AG
351 NULL
352};
353
c74a1cbb
AV
354static struct dentry *sockfs_mount(struct file_system_type *fs_type,
355 int flags, const char *dev_name, void *data)
356{
bba0bd31
AG
357 return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
358 sockfs_xattr_handlers,
359 &sockfs_dentry_operations, SOCKFS_MAGIC);
c74a1cbb
AV
360}
361
362static struct vfsmount *sock_mnt __read_mostly;
363
364static struct file_system_type sock_fs_type = {
365 .name = "sockfs",
366 .mount = sockfs_mount,
367 .kill_sb = kill_anon_super,
368};
369
1da177e4
LT
370/*
371 * Obtains the first available file descriptor and sets it up for use.
372 *
39d8c1b6
DM
373 * These functions create file structures and maps them to fd space
374 * of the current process. On success it returns file descriptor
1da177e4
LT
375 * and file struct implicitly stored in sock->file.
376 * Note that another thread may close file descriptor before we return
377 * from this function. We use the fact that now we do not refer
378 * to socket after mapping. If one day we will need it, this
379 * function will increment ref. count on file by 1.
380 *
381 * In any case returned fd MAY BE not valid!
382 * This race condition is unavoidable
383 * with shared fd spaces, we cannot solve it inside kernel,
384 * but we take care of internal coherence yet.
385 */
386
8a3c245c
PT
387/**
388 * sock_alloc_file - Bind a &socket to a &file
389 * @sock: socket
390 * @flags: file status flags
391 * @dname: protocol name
392 *
393 * Returns the &file bound with @sock, implicitly storing it
394 * in sock->file. If dname is %NULL, sets to "".
395 * On failure the return is a ERR pointer (see linux/err.h).
396 * This function uses GFP_KERNEL internally.
397 */
398
aab174f0 399struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 400{
7cbe66b6 401 struct file *file;
1da177e4 402
d93aa9d8
AV
403 if (!dname)
404 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 405
d93aa9d8
AV
406 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
407 O_RDWR | (flags & O_NONBLOCK),
408 &socket_file_ops);
b5ffe634 409 if (IS_ERR(file)) {
8e1611e2 410 sock_release(sock);
39b65252 411 return file;
cc3808f8
AV
412 }
413
414 sock->file = file;
39d8c1b6 415 file->private_data = sock;
28407630 416 return file;
39d8c1b6 417}
56b31d1c 418EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 419
56b31d1c 420static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
421{
422 struct file *newfile;
28407630 423 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
424 if (unlikely(fd < 0)) {
425 sock_release(sock);
28407630 426 return fd;
ce4bb04c 427 }
39d8c1b6 428
aab174f0 429 newfile = sock_alloc_file(sock, flags, NULL);
28407630 430 if (likely(!IS_ERR(newfile))) {
39d8c1b6 431 fd_install(fd, newfile);
28407630
AV
432 return fd;
433 }
7cbe66b6 434
28407630
AV
435 put_unused_fd(fd);
436 return PTR_ERR(newfile);
1da177e4
LT
437}
438
8a3c245c
PT
439/**
440 * sock_from_file - Return the &socket bounded to @file.
441 * @file: file
442 * @err: pointer to an error code return
443 *
444 * On failure returns %NULL and assigns -ENOTSOCK to @err.
445 */
446
406a3c63 447struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 448{
6cb153ca
BL
449 if (file->f_op == &socket_file_ops)
450 return file->private_data; /* set in sock_map_fd */
451
23bb80d2
ED
452 *err = -ENOTSOCK;
453 return NULL;
6cb153ca 454}
406a3c63 455EXPORT_SYMBOL(sock_from_file);
6cb153ca 456
1da177e4 457/**
c6d409cf 458 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
459 * @fd: file handle
460 * @err: pointer to an error code return
461 *
462 * The file handle passed in is locked and the socket it is bound
241c4667 463 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
464 * with a negative errno code and NULL is returned. The function checks
465 * for both invalid handles and passing a handle which is not a socket.
466 *
467 * On a success the socket object pointer is returned.
468 */
469
470struct socket *sockfd_lookup(int fd, int *err)
471{
472 struct file *file;
1da177e4
LT
473 struct socket *sock;
474
89bddce5
SH
475 file = fget(fd);
476 if (!file) {
1da177e4
LT
477 *err = -EBADF;
478 return NULL;
479 }
89bddce5 480
6cb153ca
BL
481 sock = sock_from_file(file, err);
482 if (!sock)
1da177e4 483 fput(file);
6cb153ca
BL
484 return sock;
485}
c6d409cf 486EXPORT_SYMBOL(sockfd_lookup);
1da177e4 487
6cb153ca
BL
488static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
489{
00e188ef 490 struct fd f = fdget(fd);
6cb153ca
BL
491 struct socket *sock;
492
3672558c 493 *err = -EBADF;
00e188ef
AV
494 if (f.file) {
495 sock = sock_from_file(f.file, err);
496 if (likely(sock)) {
497 *fput_needed = f.flags;
6cb153ca 498 return sock;
00e188ef
AV
499 }
500 fdput(f);
1da177e4 501 }
6cb153ca 502 return NULL;
1da177e4
LT
503}
504
600e1779
MY
505static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
506 size_t size)
507{
508 ssize_t len;
509 ssize_t used = 0;
510
c5ef6035 511 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
512 if (len < 0)
513 return len;
514 used += len;
515 if (buffer) {
516 if (size < used)
517 return -ERANGE;
518 buffer += len;
519 }
520
521 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
522 used += len;
523 if (buffer) {
524 if (size < used)
525 return -ERANGE;
526 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
527 buffer += len;
528 }
529
530 return used;
531}
532
dc647ec8 533static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
86741ec2
LC
534{
535 int err = simple_setattr(dentry, iattr);
536
e1a3a60a 537 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
538 struct socket *sock = SOCKET_I(d_inode(dentry));
539
6d8c50dc
CW
540 if (sock->sk)
541 sock->sk->sk_uid = iattr->ia_uid;
542 else
543 err = -ENOENT;
86741ec2
LC
544 }
545
546 return err;
547}
548
600e1779 549static const struct inode_operations sockfs_inode_ops = {
600e1779 550 .listxattr = sockfs_listxattr,
86741ec2 551 .setattr = sockfs_setattr,
600e1779
MY
552};
553
1da177e4 554/**
8a3c245c 555 * sock_alloc - allocate a socket
89bddce5 556 *
1da177e4
LT
557 * Allocate a new inode and socket object. The two are bound together
558 * and initialised. The socket is then returned. If we are out of inodes
8a3c245c 559 * NULL is returned. This functions uses GFP_KERNEL internally.
1da177e4
LT
560 */
561
f4a00aac 562struct socket *sock_alloc(void)
1da177e4 563{
89bddce5
SH
564 struct inode *inode;
565 struct socket *sock;
1da177e4 566
a209dfc7 567 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
568 if (!inode)
569 return NULL;
570
571 sock = SOCKET_I(inode);
572
85fe4025 573 inode->i_ino = get_next_ino();
89bddce5 574 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
575 inode->i_uid = current_fsuid();
576 inode->i_gid = current_fsgid();
600e1779 577 inode->i_op = &sockfs_inode_ops;
1da177e4 578
1da177e4
LT
579 return sock;
580}
f4a00aac 581EXPORT_SYMBOL(sock_alloc);
1da177e4 582
1da177e4 583/**
8a3c245c 584 * sock_release - close a socket
1da177e4
LT
585 * @sock: socket to close
586 *
587 * The socket is released from the protocol stack if it has a release
588 * callback, and the inode is then released if the socket is bound to
89bddce5 589 * an inode not a file.
1da177e4 590 */
89bddce5 591
6d8c50dc 592static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
593{
594 if (sock->ops) {
595 struct module *owner = sock->ops->owner;
596
6d8c50dc
CW
597 if (inode)
598 inode_lock(inode);
1da177e4 599 sock->ops->release(sock);
ff7b11aa 600 sock->sk = NULL;
6d8c50dc
CW
601 if (inode)
602 inode_unlock(inode);
1da177e4
LT
603 sock->ops = NULL;
604 module_put(owner);
605 }
606
e6476c21 607 if (sock->wq->fasync_list)
3410f22e 608 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 609
1da177e4
LT
610 if (!sock->file) {
611 iput(SOCK_INODE(sock));
612 return;
613 }
89bddce5 614 sock->file = NULL;
1da177e4 615}
6d8c50dc
CW
616
617void sock_release(struct socket *sock)
618{
619 __sock_release(sock, NULL);
620}
c6d409cf 621EXPORT_SYMBOL(sock_release);
1da177e4 622
c14ac945 623void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 624{
140c55d4
ED
625 u8 flags = *tx_flags;
626
c14ac945 627 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
628 flags |= SKBTX_HW_TSTAMP;
629
c14ac945 630 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
631 flags |= SKBTX_SW_TSTAMP;
632
c14ac945 633 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
634 flags |= SKBTX_SCHED_TSTAMP;
635
140c55d4 636 *tx_flags = flags;
20d49473 637}
67cc0d40 638EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 639
8a3c245c
PT
640/**
641 * sock_sendmsg - send a message through @sock
642 * @sock: socket
643 * @msg: message to send
644 *
645 * Sends @msg through @sock, passing through LSM.
646 * Returns the number of bytes sent, or an error code.
647 */
648
d8725c86 649static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 650{
01e97e65 651 int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
d8725c86
AV
652 BUG_ON(ret == -EIOCBQUEUED);
653 return ret;
1da177e4
LT
654}
655
d8725c86 656int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 657{
d8725c86 658 int err = security_socket_sendmsg(sock, msg,
01e97e65 659 msg_data_left(msg));
228e548e 660
d8725c86 661 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 662}
c6d409cf 663EXPORT_SYMBOL(sock_sendmsg);
1da177e4 664
8a3c245c
PT
665/**
666 * kernel_sendmsg - send a message through @sock (kernel-space)
667 * @sock: socket
668 * @msg: message header
669 * @vec: kernel vec
670 * @num: vec array length
671 * @size: total message data size
672 *
673 * Builds the message data with @vec and sends it through @sock.
674 * Returns the number of bytes sent, or an error code.
675 */
676
1da177e4
LT
677int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
678 struct kvec *vec, size_t num, size_t size)
679{
aa563d7b 680 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
d8725c86 681 return sock_sendmsg(sock, msg);
1da177e4 682}
c6d409cf 683EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 684
8a3c245c
PT
685/**
686 * kernel_sendmsg_locked - send a message through @sock (kernel-space)
687 * @sk: sock
688 * @msg: message header
689 * @vec: output s/g array
690 * @num: output s/g array length
691 * @size: total message data size
692 *
693 * Builds the message data with @vec and sends it through @sock.
694 * Returns the number of bytes sent, or an error code.
695 * Caller must hold @sk.
696 */
697
306b13eb
TH
698int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
699 struct kvec *vec, size_t num, size_t size)
700{
701 struct socket *sock = sk->sk_socket;
702
703 if (!sock->ops->sendmsg_locked)
db5980d8 704 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 705
aa563d7b 706 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
306b13eb
TH
707
708 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
709}
710EXPORT_SYMBOL(kernel_sendmsg_locked);
711
8605330a
SHY
712static bool skb_is_err_queue(const struct sk_buff *skb)
713{
714 /* pkt_type of skbs enqueued on the error queue are set to
715 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
716 * in recvmsg, since skbs received on a local socket will never
717 * have a pkt_type of PACKET_OUTGOING.
718 */
719 return skb->pkt_type == PACKET_OUTGOING;
720}
721
b50a5c70
ML
722/* On transmit, software and hardware timestamps are returned independently.
723 * As the two skb clones share the hardware timestamp, which may be updated
724 * before the software timestamp is received, a hardware TX timestamp may be
725 * returned only if there is no software TX timestamp. Ignore false software
726 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 727 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
728 * hardware timestamp.
729 */
730static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
731{
732 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
733}
734
aad9c8c4
ML
735static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
736{
737 struct scm_ts_pktinfo ts_pktinfo;
738 struct net_device *orig_dev;
739
740 if (!skb_mac_header_was_set(skb))
741 return;
742
743 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
744
745 rcu_read_lock();
746 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
747 if (orig_dev)
748 ts_pktinfo.if_index = orig_dev->ifindex;
749 rcu_read_unlock();
750
751 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
752 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
753 sizeof(ts_pktinfo), &ts_pktinfo);
754}
755
92f37fd2
ED
756/*
757 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
758 */
759void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
760 struct sk_buff *skb)
761{
20d49473 762 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 763 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e
DD
764 struct scm_timestamping_internal tss;
765
b50a5c70 766 int empty = 1, false_tstamp = 0;
20d49473
PO
767 struct skb_shared_hwtstamps *shhwtstamps =
768 skb_hwtstamps(skb);
769
770 /* Race occurred between timestamp enabling and packet
771 receiving. Fill in the current time for now. */
b50a5c70 772 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 773 __net_timestamp(skb);
b50a5c70
ML
774 false_tstamp = 1;
775 }
20d49473
PO
776
777 if (need_software_tstamp) {
778 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
779 if (new_tstamp) {
780 struct __kernel_sock_timeval tv;
781
782 skb_get_new_timestamp(skb, &tv);
783 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
784 sizeof(tv), &tv);
785 } else {
786 struct __kernel_old_timeval tv;
787
788 skb_get_timestamp(skb, &tv);
789 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
790 sizeof(tv), &tv);
791 }
20d49473 792 } else {
887feae3
DD
793 if (new_tstamp) {
794 struct __kernel_timespec ts;
795
796 skb_get_new_timestampns(skb, &ts);
797 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
798 sizeof(ts), &ts);
799 } else {
800 struct timespec ts;
801
802 skb_get_timestampns(skb, &ts);
803 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
804 sizeof(ts), &ts);
805 }
20d49473
PO
806 }
807 }
808
f24b9be5 809 memset(&tss, 0, sizeof(tss));
c199105d 810 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 811 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 812 empty = 0;
4d276eb6 813 if (shhwtstamps &&
b9f40e21 814 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
b50a5c70 815 !skb_is_swtx_tstamp(skb, false_tstamp) &&
9718475e 816 ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
4d276eb6 817 empty = 0;
aad9c8c4
ML
818 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
819 !skb_is_err_queue(skb))
820 put_ts_pktinfo(msg, skb);
821 }
1c885808 822 if (!empty) {
9718475e
DD
823 if (sock_flag(sk, SOCK_TSTAMP_NEW))
824 put_cmsg_scm_timestamping64(msg, &tss);
825 else
826 put_cmsg_scm_timestamping(msg, &tss);
1c885808 827
8605330a 828 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 829 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
830 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
831 skb->len, skb->data);
832 }
92f37fd2 833}
7c81fd8b
ACM
834EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
835
6e3e939f
JB
836void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
837 struct sk_buff *skb)
838{
839 int ack;
840
841 if (!sock_flag(sk, SOCK_WIFI_STATUS))
842 return;
843 if (!skb->wifi_acked_valid)
844 return;
845
846 ack = skb->wifi_acked;
847
848 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
849}
850EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
851
11165f14 852static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
853 struct sk_buff *skb)
3b885787 854{
744d5a3e 855 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 856 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 857 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
858}
859
767dd033 860void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
861 struct sk_buff *skb)
862{
863 sock_recv_timestamp(msg, sk, skb);
864 sock_recv_drops(msg, sk, skb);
865}
767dd033 866EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 867
8a3c245c
PT
868/**
869 * sock_recvmsg - receive a message from @sock
870 * @sock: socket
871 * @msg: message to receive
872 * @flags: message flags
873 *
874 * Receives @msg from @sock, passing through LSM. Returns the total number
875 * of bytes received, or an error.
876 */
877
1b784140 878static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 879 int flags)
1da177e4 880{
2da62906 881 return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags);
1da177e4
LT
882}
883
2da62906 884int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 885{
2da62906 886 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 887
2da62906 888 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 889}
c6d409cf 890EXPORT_SYMBOL(sock_recvmsg);
1da177e4 891
c1249c0a 892/**
8a3c245c
PT
893 * kernel_recvmsg - Receive a message from a socket (kernel space)
894 * @sock: The socket to receive the message from
895 * @msg: Received message
896 * @vec: Input s/g array for message data
897 * @num: Size of input s/g array
898 * @size: Number of bytes to read
899 * @flags: Message flags (MSG_DONTWAIT, etc...)
c1249c0a 900 *
8a3c245c
PT
901 * On return the msg structure contains the scatter/gather array passed in the
902 * vec argument. The array is modified so that it consists of the unfilled
903 * portion of the original array.
c1249c0a 904 *
8a3c245c 905 * The returned value is the total number of bytes received, or an error.
c1249c0a 906 */
8a3c245c 907
89bddce5
SH
908int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
909 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
910{
911 mm_segment_t oldfs = get_fs();
912 int result;
913
aa563d7b 914 iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
1da177e4 915 set_fs(KERNEL_DS);
2da62906 916 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
917 set_fs(oldfs);
918 return result;
919}
c6d409cf 920EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 921
ce1d4d3e
CH
922static ssize_t sock_sendpage(struct file *file, struct page *page,
923 int offset, size_t size, loff_t *ppos, int more)
1da177e4 924{
1da177e4
LT
925 struct socket *sock;
926 int flags;
927
ce1d4d3e
CH
928 sock = file->private_data;
929
35f9c09f
ED
930 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
931 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
932 flags |= more;
ce1d4d3e 933
e6949583 934 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 935}
1da177e4 936
9c55e01c 937static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 938 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
939 unsigned int flags)
940{
941 struct socket *sock = file->private_data;
942
997b37da 943 if (unlikely(!sock->ops->splice_read))
95506588 944 return generic_file_splice_read(file, ppos, pipe, len, flags);
997b37da 945
9c55e01c
JA
946 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
947}
948
8ae5e030 949static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 950{
6d652330
AV
951 struct file *file = iocb->ki_filp;
952 struct socket *sock = file->private_data;
0345f931 953 struct msghdr msg = {.msg_iter = *to,
954 .msg_iocb = iocb};
8ae5e030 955 ssize_t res;
ce1d4d3e 956
8ae5e030
AV
957 if (file->f_flags & O_NONBLOCK)
958 msg.msg_flags = MSG_DONTWAIT;
959
960 if (iocb->ki_pos != 0)
1da177e4 961 return -ESPIPE;
027445c3 962
66ee59af 963 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
964 return 0;
965
2da62906 966 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
967 *to = msg.msg_iter;
968 return res;
1da177e4
LT
969}
970
8ae5e030 971static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 972{
6d652330
AV
973 struct file *file = iocb->ki_filp;
974 struct socket *sock = file->private_data;
0345f931 975 struct msghdr msg = {.msg_iter = *from,
976 .msg_iocb = iocb};
8ae5e030 977 ssize_t res;
1da177e4 978
8ae5e030 979 if (iocb->ki_pos != 0)
ce1d4d3e 980 return -ESPIPE;
027445c3 981
8ae5e030
AV
982 if (file->f_flags & O_NONBLOCK)
983 msg.msg_flags = MSG_DONTWAIT;
984
6d652330
AV
985 if (sock->type == SOCK_SEQPACKET)
986 msg.msg_flags |= MSG_EOR;
987
d8725c86 988 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
989 *from = msg.msg_iter;
990 return res;
1da177e4
LT
991}
992
1da177e4
LT
993/*
994 * Atomic setting of ioctl hooks to avoid race
995 * with module unload.
996 */
997
4a3e2f71 998static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 999static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 1000
881d966b 1001void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 1002{
4a3e2f71 1003 mutex_lock(&br_ioctl_mutex);
1da177e4 1004 br_ioctl_hook = hook;
4a3e2f71 1005 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1006}
1007EXPORT_SYMBOL(brioctl_set);
1008
4a3e2f71 1009static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1010static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1011
881d966b 1012void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1013{
4a3e2f71 1014 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1015 vlan_ioctl_hook = hook;
4a3e2f71 1016 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1017}
1018EXPORT_SYMBOL(vlan_ioctl_set);
1019
4a3e2f71 1020static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 1021static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 1022
89bddce5 1023void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 1024{
4a3e2f71 1025 mutex_lock(&dlci_ioctl_mutex);
1da177e4 1026 dlci_ioctl_hook = hook;
4a3e2f71 1027 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1028}
1029EXPORT_SYMBOL(dlci_ioctl_set);
1030
6b96018b 1031static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 1032 unsigned int cmd, unsigned long arg)
6b96018b
AB
1033{
1034 int err;
1035 void __user *argp = (void __user *)arg;
1036
1037 err = sock->ops->ioctl(sock, cmd, arg);
1038
1039 /*
1040 * If this ioctl is unknown try to hand it down
1041 * to the NIC driver.
1042 */
36fd633e
AV
1043 if (err != -ENOIOCTLCMD)
1044 return err;
6b96018b 1045
36fd633e
AV
1046 if (cmd == SIOCGIFCONF) {
1047 struct ifconf ifc;
1048 if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
1049 return -EFAULT;
1050 rtnl_lock();
1051 err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
1052 rtnl_unlock();
1053 if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
1054 err = -EFAULT;
44c02a2c
AV
1055 } else {
1056 struct ifreq ifr;
1057 bool need_copyout;
63ff03ab 1058 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
44c02a2c
AV
1059 return -EFAULT;
1060 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1061 if (!err && need_copyout)
63ff03ab 1062 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
44c02a2c 1063 return -EFAULT;
36fd633e 1064 }
6b96018b
AB
1065 return err;
1066}
1067
1da177e4
LT
1068/*
1069 * With an ioctl, arg may well be a user mode pointer, but we don't know
1070 * what to do with it - that's up to the protocol still.
1071 */
1072
8a3c245c
PT
1073/**
1074 * get_net_ns - increment the refcount of the network namespace
1075 * @ns: common namespace (net)
1076 *
1077 * Returns the net's common namespace.
1078 */
1079
d8d211a2 1080struct ns_common *get_net_ns(struct ns_common *ns)
c62cce2c
AV
1081{
1082 return &get_net(container_of(ns, struct net, ns))->ns;
1083}
d8d211a2 1084EXPORT_SYMBOL_GPL(get_net_ns);
c62cce2c 1085
1da177e4
LT
1086static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1087{
1088 struct socket *sock;
881d966b 1089 struct sock *sk;
1da177e4
LT
1090 void __user *argp = (void __user *)arg;
1091 int pid, err;
881d966b 1092 struct net *net;
1da177e4 1093
b69aee04 1094 sock = file->private_data;
881d966b 1095 sk = sock->sk;
3b1e0a65 1096 net = sock_net(sk);
44c02a2c
AV
1097 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1098 struct ifreq ifr;
1099 bool need_copyout;
1100 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
1101 return -EFAULT;
1102 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1103 if (!err && need_copyout)
1104 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
1105 return -EFAULT;
1da177e4 1106 } else
3d23e349 1107#ifdef CONFIG_WEXT_CORE
1da177e4 1108 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1109 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1110 } else
3d23e349 1111#endif
89bddce5 1112 switch (cmd) {
1da177e4
LT
1113 case FIOSETOWN:
1114 case SIOCSPGRP:
1115 err = -EFAULT;
1116 if (get_user(pid, (int __user *)argp))
1117 break;
393cc3f5 1118 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1119 break;
1120 case FIOGETOWN:
1121 case SIOCGPGRP:
609d7fa9 1122 err = put_user(f_getown(sock->file),
89bddce5 1123 (int __user *)argp);
1da177e4
LT
1124 break;
1125 case SIOCGIFBR:
1126 case SIOCSIFBR:
1127 case SIOCBRADDBR:
1128 case SIOCBRDELBR:
1129 err = -ENOPKG;
1130 if (!br_ioctl_hook)
1131 request_module("bridge");
1132
4a3e2f71 1133 mutex_lock(&br_ioctl_mutex);
89bddce5 1134 if (br_ioctl_hook)
881d966b 1135 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1136 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1137 break;
1138 case SIOCGIFVLAN:
1139 case SIOCSIFVLAN:
1140 err = -ENOPKG;
1141 if (!vlan_ioctl_hook)
1142 request_module("8021q");
1143
4a3e2f71 1144 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1145 if (vlan_ioctl_hook)
881d966b 1146 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1147 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1148 break;
1da177e4
LT
1149 case SIOCADDDLCI:
1150 case SIOCDELDLCI:
1151 err = -ENOPKG;
1152 if (!dlci_ioctl_hook)
1153 request_module("dlci");
1154
7512cbf6
PE
1155 mutex_lock(&dlci_ioctl_mutex);
1156 if (dlci_ioctl_hook)
1da177e4 1157 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1158 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 1159 break;
c62cce2c
AV
1160 case SIOCGSKNS:
1161 err = -EPERM;
1162 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1163 break;
1164
1165 err = open_related_ns(&net->ns, get_net_ns);
1166 break;
0768e170
AB
1167 case SIOCGSTAMP_OLD:
1168 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
1169 if (!sock->ops->gettstamp) {
1170 err = -ENOIOCTLCMD;
1171 break;
1172 }
1173 err = sock->ops->gettstamp(sock, argp,
0768e170
AB
1174 cmd == SIOCGSTAMP_OLD,
1175 !IS_ENABLED(CONFIG_64BIT));
1176 case SIOCGSTAMP_NEW:
1177 case SIOCGSTAMPNS_NEW:
1178 if (!sock->ops->gettstamp) {
1179 err = -ENOIOCTLCMD;
1180 break;
1181 }
1182 err = sock->ops->gettstamp(sock, argp,
1183 cmd == SIOCGSTAMP_NEW,
1184 false);
c7cbdbf2 1185 break;
1da177e4 1186 default:
63ff03ab 1187 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1188 break;
89bddce5 1189 }
1da177e4
LT
1190 return err;
1191}
1192
8a3c245c
PT
1193/**
1194 * sock_create_lite - creates a socket
1195 * @family: protocol family (AF_INET, ...)
1196 * @type: communication type (SOCK_STREAM, ...)
1197 * @protocol: protocol (0, ...)
1198 * @res: new socket
1199 *
1200 * Creates a new socket and assigns it to @res, passing through LSM.
1201 * The new socket initialization is not complete, see kernel_accept().
1202 * Returns 0 or an error. On failure @res is set to %NULL.
1203 * This function internally uses GFP_KERNEL.
1204 */
1205
1da177e4
LT
1206int sock_create_lite(int family, int type, int protocol, struct socket **res)
1207{
1208 int err;
1209 struct socket *sock = NULL;
89bddce5 1210
1da177e4
LT
1211 err = security_socket_create(family, type, protocol, 1);
1212 if (err)
1213 goto out;
1214
1215 sock = sock_alloc();
1216 if (!sock) {
1217 err = -ENOMEM;
1218 goto out;
1219 }
1220
1da177e4 1221 sock->type = type;
7420ed23
VY
1222 err = security_socket_post_create(sock, family, type, protocol, 1);
1223 if (err)
1224 goto out_release;
1225
1da177e4
LT
1226out:
1227 *res = sock;
1228 return err;
7420ed23
VY
1229out_release:
1230 sock_release(sock);
1231 sock = NULL;
1232 goto out;
1da177e4 1233}
c6d409cf 1234EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1235
1236/* No kernel lock held - perfect */
ade994f4 1237static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1238{
3cafb376 1239 struct socket *sock = file->private_data;
a331de3b 1240 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1241
e88958e6
CH
1242 if (!sock->ops->poll)
1243 return 0;
f641f13b 1244
a331de3b
CH
1245 if (sk_can_busy_loop(sock->sk)) {
1246 /* poll once if requested by the syscall */
1247 if (events & POLL_BUSY_LOOP)
1248 sk_busy_loop(sock->sk, 1);
1249
1250 /* if this socket can poll_ll, tell the system call */
1251 flag = POLL_BUSY_LOOP;
1252 }
1253
1254 return sock->ops->poll(file, sock, wait) | flag;
1da177e4
LT
1255}
1256
89bddce5 1257static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1258{
b69aee04 1259 struct socket *sock = file->private_data;
1da177e4
LT
1260
1261 return sock->ops->mmap(file, sock, vma);
1262}
1263
20380731 1264static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1265{
6d8c50dc 1266 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1267 return 0;
1268}
1269
1270/*
1271 * Update the socket async list
1272 *
1273 * Fasync_list locking strategy.
1274 *
1275 * 1. fasync_list is modified only under process context socket lock
1276 * i.e. under semaphore.
1277 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1278 * or under socket lock
1da177e4
LT
1279 */
1280
1281static int sock_fasync(int fd, struct file *filp, int on)
1282{
989a2979
ED
1283 struct socket *sock = filp->private_data;
1284 struct sock *sk = sock->sk;
eaefd110 1285 struct socket_wq *wq;
1da177e4 1286
989a2979 1287 if (sk == NULL)
1da177e4 1288 return -EINVAL;
1da177e4
LT
1289
1290 lock_sock(sk);
e6476c21 1291 wq = sock->wq;
eaefd110 1292 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1293
eaefd110 1294 if (!wq->fasync_list)
989a2979
ED
1295 sock_reset_flag(sk, SOCK_FASYNC);
1296 else
bcdce719 1297 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1298
989a2979 1299 release_sock(sk);
1da177e4
LT
1300 return 0;
1301}
1302
ceb5d58b 1303/* This function may be called only under rcu_lock */
1da177e4 1304
ceb5d58b 1305int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1306{
ceb5d58b 1307 if (!wq || !wq->fasync_list)
1da177e4 1308 return -1;
ceb5d58b 1309
89bddce5 1310 switch (how) {
8d8ad9d7 1311 case SOCK_WAKE_WAITD:
ceb5d58b 1312 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1313 break;
1314 goto call_kill;
8d8ad9d7 1315 case SOCK_WAKE_SPACE:
ceb5d58b 1316 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1317 break;
1318 /* fall through */
8d8ad9d7 1319 case SOCK_WAKE_IO:
89bddce5 1320call_kill:
43815482 1321 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1322 break;
8d8ad9d7 1323 case SOCK_WAKE_URG:
43815482 1324 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1325 }
ceb5d58b 1326
1da177e4
LT
1327 return 0;
1328}
c6d409cf 1329EXPORT_SYMBOL(sock_wake_async);
1da177e4 1330
8a3c245c
PT
1331/**
1332 * __sock_create - creates a socket
1333 * @net: net namespace
1334 * @family: protocol family (AF_INET, ...)
1335 * @type: communication type (SOCK_STREAM, ...)
1336 * @protocol: protocol (0, ...)
1337 * @res: new socket
1338 * @kern: boolean for kernel space sockets
1339 *
1340 * Creates a new socket and assigns it to @res, passing through LSM.
1341 * Returns 0 or an error. On failure @res is set to %NULL. @kern must
1342 * be set to true if the socket resides in kernel space.
1343 * This function internally uses GFP_KERNEL.
1344 */
1345
721db93a 1346int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1347 struct socket **res, int kern)
1da177e4
LT
1348{
1349 int err;
1350 struct socket *sock;
55737fda 1351 const struct net_proto_family *pf;
1da177e4
LT
1352
1353 /*
89bddce5 1354 * Check protocol is in range
1da177e4
LT
1355 */
1356 if (family < 0 || family >= NPROTO)
1357 return -EAFNOSUPPORT;
1358 if (type < 0 || type >= SOCK_MAX)
1359 return -EINVAL;
1360
1361 /* Compatibility.
1362
1363 This uglymoron is moved from INET layer to here to avoid
1364 deadlock in module load.
1365 */
1366 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1367 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1368 current->comm);
1da177e4
LT
1369 family = PF_PACKET;
1370 }
1371
1372 err = security_socket_create(family, type, protocol, kern);
1373 if (err)
1374 return err;
89bddce5 1375
55737fda
SH
1376 /*
1377 * Allocate the socket and allow the family to set things up. if
1378 * the protocol is 0, the family is instructed to select an appropriate
1379 * default.
1380 */
1381 sock = sock_alloc();
1382 if (!sock) {
e87cc472 1383 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1384 return -ENFILE; /* Not exactly a match, but its the
1385 closest posix thing */
1386 }
1387
1388 sock->type = type;
1389
95a5afca 1390#ifdef CONFIG_MODULES
89bddce5
SH
1391 /* Attempt to load a protocol module if the find failed.
1392 *
1393 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1394 * requested real, full-featured networking support upon configuration.
1395 * Otherwise module support will break!
1396 */
190683a9 1397 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1398 request_module("net-pf-%d", family);
1da177e4
LT
1399#endif
1400
55737fda
SH
1401 rcu_read_lock();
1402 pf = rcu_dereference(net_families[family]);
1403 err = -EAFNOSUPPORT;
1404 if (!pf)
1405 goto out_release;
1da177e4
LT
1406
1407 /*
1408 * We will call the ->create function, that possibly is in a loadable
1409 * module, so we have to bump that loadable module refcnt first.
1410 */
55737fda 1411 if (!try_module_get(pf->owner))
1da177e4
LT
1412 goto out_release;
1413
55737fda
SH
1414 /* Now protected by module ref count */
1415 rcu_read_unlock();
1416
3f378b68 1417 err = pf->create(net, sock, protocol, kern);
55737fda 1418 if (err < 0)
1da177e4 1419 goto out_module_put;
a79af59e 1420
1da177e4
LT
1421 /*
1422 * Now to bump the refcnt of the [loadable] module that owns this
1423 * socket at sock_release time we decrement its refcnt.
1424 */
55737fda
SH
1425 if (!try_module_get(sock->ops->owner))
1426 goto out_module_busy;
1427
1da177e4
LT
1428 /*
1429 * Now that we're done with the ->create function, the [loadable]
1430 * module can have its refcnt decremented
1431 */
55737fda 1432 module_put(pf->owner);
7420ed23
VY
1433 err = security_socket_post_create(sock, family, type, protocol, kern);
1434 if (err)
3b185525 1435 goto out_sock_release;
55737fda 1436 *res = sock;
1da177e4 1437
55737fda
SH
1438 return 0;
1439
1440out_module_busy:
1441 err = -EAFNOSUPPORT;
1da177e4 1442out_module_put:
55737fda
SH
1443 sock->ops = NULL;
1444 module_put(pf->owner);
1445out_sock_release:
1da177e4 1446 sock_release(sock);
55737fda
SH
1447 return err;
1448
1449out_release:
1450 rcu_read_unlock();
1451 goto out_sock_release;
1da177e4 1452}
721db93a 1453EXPORT_SYMBOL(__sock_create);
1da177e4 1454
8a3c245c
PT
1455/**
1456 * sock_create - creates a socket
1457 * @family: protocol family (AF_INET, ...)
1458 * @type: communication type (SOCK_STREAM, ...)
1459 * @protocol: protocol (0, ...)
1460 * @res: new socket
1461 *
1462 * A wrapper around __sock_create().
1463 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1464 */
1465
1da177e4
LT
1466int sock_create(int family, int type, int protocol, struct socket **res)
1467{
1b8d7ae4 1468 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1469}
c6d409cf 1470EXPORT_SYMBOL(sock_create);
1da177e4 1471
8a3c245c
PT
1472/**
1473 * sock_create_kern - creates a socket (kernel space)
1474 * @net: net namespace
1475 * @family: protocol family (AF_INET, ...)
1476 * @type: communication type (SOCK_STREAM, ...)
1477 * @protocol: protocol (0, ...)
1478 * @res: new socket
1479 *
1480 * A wrapper around __sock_create().
1481 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1482 */
1483
eeb1bd5c 1484int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1485{
eeb1bd5c 1486 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1487}
c6d409cf 1488EXPORT_SYMBOL(sock_create_kern);
1da177e4 1489
9d6a15c3 1490int __sys_socket(int family, int type, int protocol)
1da177e4
LT
1491{
1492 int retval;
1493 struct socket *sock;
a677a039
UD
1494 int flags;
1495
e38b36f3
UD
1496 /* Check the SOCK_* constants for consistency. */
1497 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1498 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1499 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1500 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1501
a677a039 1502 flags = type & ~SOCK_TYPE_MASK;
77d27200 1503 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1504 return -EINVAL;
1505 type &= SOCK_TYPE_MASK;
1da177e4 1506
aaca0bdc
UD
1507 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1508 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1509
1da177e4
LT
1510 retval = sock_create(family, type, protocol, &sock);
1511 if (retval < 0)
8e1611e2 1512 return retval;
1da177e4 1513
8e1611e2 1514 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1515}
1516
9d6a15c3
DB
1517SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1518{
1519 return __sys_socket(family, type, protocol);
1520}
1521
1da177e4
LT
1522/*
1523 * Create a pair of connected sockets.
1524 */
1525
6debc8d8 1526int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1527{
1528 struct socket *sock1, *sock2;
1529 int fd1, fd2, err;
db349509 1530 struct file *newfile1, *newfile2;
a677a039
UD
1531 int flags;
1532
1533 flags = type & ~SOCK_TYPE_MASK;
77d27200 1534 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1535 return -EINVAL;
1536 type &= SOCK_TYPE_MASK;
1da177e4 1537
aaca0bdc
UD
1538 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1539 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1540
016a266b
AV
1541 /*
1542 * reserve descriptors and make sure we won't fail
1543 * to return them to userland.
1544 */
1545 fd1 = get_unused_fd_flags(flags);
1546 if (unlikely(fd1 < 0))
1547 return fd1;
1548
1549 fd2 = get_unused_fd_flags(flags);
1550 if (unlikely(fd2 < 0)) {
1551 put_unused_fd(fd1);
1552 return fd2;
1553 }
1554
1555 err = put_user(fd1, &usockvec[0]);
1556 if (err)
1557 goto out;
1558
1559 err = put_user(fd2, &usockvec[1]);
1560 if (err)
1561 goto out;
1562
1da177e4
LT
1563 /*
1564 * Obtain the first socket and check if the underlying protocol
1565 * supports the socketpair call.
1566 */
1567
1568 err = sock_create(family, type, protocol, &sock1);
016a266b 1569 if (unlikely(err < 0))
1da177e4
LT
1570 goto out;
1571
1572 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1573 if (unlikely(err < 0)) {
1574 sock_release(sock1);
1575 goto out;
bf3c23d1 1576 }
d73aa286 1577
d47cd945
DH
1578 err = security_socket_socketpair(sock1, sock2);
1579 if (unlikely(err)) {
1580 sock_release(sock2);
1581 sock_release(sock1);
1582 goto out;
1583 }
1584
016a266b
AV
1585 err = sock1->ops->socketpair(sock1, sock2);
1586 if (unlikely(err < 0)) {
1587 sock_release(sock2);
1588 sock_release(sock1);
1589 goto out;
28407630
AV
1590 }
1591
aab174f0 1592 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1593 if (IS_ERR(newfile1)) {
28407630 1594 err = PTR_ERR(newfile1);
016a266b
AV
1595 sock_release(sock2);
1596 goto out;
28407630
AV
1597 }
1598
aab174f0 1599 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1600 if (IS_ERR(newfile2)) {
1601 err = PTR_ERR(newfile2);
016a266b
AV
1602 fput(newfile1);
1603 goto out;
db349509
AV
1604 }
1605
157cf649 1606 audit_fd_pair(fd1, fd2);
d73aa286 1607
db349509
AV
1608 fd_install(fd1, newfile1);
1609 fd_install(fd2, newfile2);
d73aa286 1610 return 0;
1da177e4 1611
016a266b 1612out:
d73aa286 1613 put_unused_fd(fd2);
d73aa286 1614 put_unused_fd(fd1);
1da177e4
LT
1615 return err;
1616}
1617
6debc8d8
DB
1618SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1619 int __user *, usockvec)
1620{
1621 return __sys_socketpair(family, type, protocol, usockvec);
1622}
1623
1da177e4
LT
1624/*
1625 * Bind a name to a socket. Nothing much to do here since it's
1626 * the protocol's responsibility to handle the local address.
1627 *
1628 * We move the socket address to kernel space before we call
1629 * the protocol layer (having also checked the address is ok).
1630 */
1631
a87d35d8 1632int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1633{
1634 struct socket *sock;
230b1839 1635 struct sockaddr_storage address;
6cb153ca 1636 int err, fput_needed;
1da177e4 1637
89bddce5 1638 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1639 if (sock) {
43db362d 1640 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1641 if (!err) {
89bddce5 1642 err = security_socket_bind(sock,
230b1839 1643 (struct sockaddr *)&address,
89bddce5 1644 addrlen);
6cb153ca
BL
1645 if (!err)
1646 err = sock->ops->bind(sock,
89bddce5 1647 (struct sockaddr *)
230b1839 1648 &address, addrlen);
1da177e4 1649 }
6cb153ca 1650 fput_light(sock->file, fput_needed);
89bddce5 1651 }
1da177e4
LT
1652 return err;
1653}
1654
a87d35d8
DB
1655SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1656{
1657 return __sys_bind(fd, umyaddr, addrlen);
1658}
1659
1da177e4
LT
1660/*
1661 * Perform a listen. Basically, we allow the protocol to do anything
1662 * necessary for a listen, and if that works, we mark the socket as
1663 * ready for listening.
1664 */
1665
25e290ee 1666int __sys_listen(int fd, int backlog)
1da177e4
LT
1667{
1668 struct socket *sock;
6cb153ca 1669 int err, fput_needed;
b8e1f9b5 1670 int somaxconn;
89bddce5
SH
1671
1672 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1673 if (sock) {
8efa6e93 1674 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1675 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1676 backlog = somaxconn;
1da177e4
LT
1677
1678 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1679 if (!err)
1680 err = sock->ops->listen(sock, backlog);
1da177e4 1681
6cb153ca 1682 fput_light(sock->file, fput_needed);
1da177e4
LT
1683 }
1684 return err;
1685}
1686
25e290ee
DB
1687SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1688{
1689 return __sys_listen(fd, backlog);
1690}
1691
1da177e4
LT
1692/*
1693 * For accept, we attempt to create a new socket, set up the link
1694 * with the client, wake up the client, then return the new
1695 * connected fd. We collect the address of the connector in kernel
1696 * space and move it to user at the very end. This is unclean because
1697 * we open the socket then return an error.
1698 *
1699 * 1003.1g adds the ability to recvmsg() to query connection pending
1700 * status to recvmsg. We need to add that support in a way thats
b903036a 1701 * clean when we restructure accept also.
1da177e4
LT
1702 */
1703
4541e805
DB
1704int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1705 int __user *upeer_addrlen, int flags)
1da177e4
LT
1706{
1707 struct socket *sock, *newsock;
39d8c1b6 1708 struct file *newfile;
6cb153ca 1709 int err, len, newfd, fput_needed;
230b1839 1710 struct sockaddr_storage address;
1da177e4 1711
77d27200 1712 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1713 return -EINVAL;
1714
1715 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1716 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1717
6cb153ca 1718 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1719 if (!sock)
1720 goto out;
1721
1722 err = -ENFILE;
c6d409cf
ED
1723 newsock = sock_alloc();
1724 if (!newsock)
1da177e4
LT
1725 goto out_put;
1726
1727 newsock->type = sock->type;
1728 newsock->ops = sock->ops;
1729
1da177e4
LT
1730 /*
1731 * We don't need try_module_get here, as the listening socket (sock)
1732 * has the protocol module (sock->ops->owner) held.
1733 */
1734 __module_get(newsock->ops->owner);
1735
28407630 1736 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1737 if (unlikely(newfd < 0)) {
1738 err = newfd;
9a1875e6
DM
1739 sock_release(newsock);
1740 goto out_put;
39d8c1b6 1741 }
aab174f0 1742 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1743 if (IS_ERR(newfile)) {
28407630
AV
1744 err = PTR_ERR(newfile);
1745 put_unused_fd(newfd);
28407630
AV
1746 goto out_put;
1747 }
39d8c1b6 1748
a79af59e
FF
1749 err = security_socket_accept(sock, newsock);
1750 if (err)
39d8c1b6 1751 goto out_fd;
a79af59e 1752
cdfbabfb 1753 err = sock->ops->accept(sock, newsock, sock->file->f_flags, false);
1da177e4 1754 if (err < 0)
39d8c1b6 1755 goto out_fd;
1da177e4
LT
1756
1757 if (upeer_sockaddr) {
9b2c45d4
DV
1758 len = newsock->ops->getname(newsock,
1759 (struct sockaddr *)&address, 2);
1760 if (len < 0) {
1da177e4 1761 err = -ECONNABORTED;
39d8c1b6 1762 goto out_fd;
1da177e4 1763 }
43db362d 1764 err = move_addr_to_user(&address,
230b1839 1765 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1766 if (err < 0)
39d8c1b6 1767 goto out_fd;
1da177e4
LT
1768 }
1769
1770 /* File flags are not inherited via accept() unlike another OSes. */
1771
39d8c1b6
DM
1772 fd_install(newfd, newfile);
1773 err = newfd;
1da177e4 1774
1da177e4 1775out_put:
6cb153ca 1776 fput_light(sock->file, fput_needed);
1da177e4
LT
1777out:
1778 return err;
39d8c1b6 1779out_fd:
9606a216 1780 fput(newfile);
39d8c1b6 1781 put_unused_fd(newfd);
1da177e4
LT
1782 goto out_put;
1783}
1784
4541e805
DB
1785SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1786 int __user *, upeer_addrlen, int, flags)
1787{
1788 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1789}
1790
20f37034
HC
1791SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1792 int __user *, upeer_addrlen)
aaca0bdc 1793{
4541e805 1794 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1795}
1796
1da177e4
LT
1797/*
1798 * Attempt to connect to a socket with the server address. The address
1799 * is in user space so we verify it is OK and move it to kernel space.
1800 *
1801 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1802 * break bindings
1803 *
1804 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1805 * other SEQPACKET protocols that take time to connect() as it doesn't
1806 * include the -EINPROGRESS status for such sockets.
1807 */
1808
1387c2c2 1809int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1da177e4
LT
1810{
1811 struct socket *sock;
230b1839 1812 struct sockaddr_storage address;
6cb153ca 1813 int err, fput_needed;
1da177e4 1814
6cb153ca 1815 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1816 if (!sock)
1817 goto out;
43db362d 1818 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1819 if (err < 0)
1820 goto out_put;
1821
89bddce5 1822 err =
230b1839 1823 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1824 if (err)
1825 goto out_put;
1826
230b1839 1827 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1828 sock->file->f_flags);
1829out_put:
6cb153ca 1830 fput_light(sock->file, fput_needed);
1da177e4
LT
1831out:
1832 return err;
1833}
1834
1387c2c2
DB
1835SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1836 int, addrlen)
1837{
1838 return __sys_connect(fd, uservaddr, addrlen);
1839}
1840
1da177e4
LT
1841/*
1842 * Get the local address ('name') of a socket object. Move the obtained
1843 * name to user space.
1844 */
1845
8882a107
DB
1846int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1847 int __user *usockaddr_len)
1da177e4
LT
1848{
1849 struct socket *sock;
230b1839 1850 struct sockaddr_storage address;
9b2c45d4 1851 int err, fput_needed;
89bddce5 1852
6cb153ca 1853 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1854 if (!sock)
1855 goto out;
1856
1857 err = security_socket_getsockname(sock);
1858 if (err)
1859 goto out_put;
1860
9b2c45d4
DV
1861 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
1862 if (err < 0)
1da177e4 1863 goto out_put;
9b2c45d4
DV
1864 /* "err" is actually length in this case */
1865 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
1866
1867out_put:
6cb153ca 1868 fput_light(sock->file, fput_needed);
1da177e4
LT
1869out:
1870 return err;
1871}
1872
8882a107
DB
1873SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1874 int __user *, usockaddr_len)
1875{
1876 return __sys_getsockname(fd, usockaddr, usockaddr_len);
1877}
1878
1da177e4
LT
1879/*
1880 * Get the remote address ('name') of a socket object. Move the obtained
1881 * name to user space.
1882 */
1883
b21c8f83
DB
1884int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1885 int __user *usockaddr_len)
1da177e4
LT
1886{
1887 struct socket *sock;
230b1839 1888 struct sockaddr_storage address;
9b2c45d4 1889 int err, fput_needed;
1da177e4 1890
89bddce5
SH
1891 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1892 if (sock != NULL) {
1da177e4
LT
1893 err = security_socket_getpeername(sock);
1894 if (err) {
6cb153ca 1895 fput_light(sock->file, fput_needed);
1da177e4
LT
1896 return err;
1897 }
1898
9b2c45d4
DV
1899 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
1900 if (err >= 0)
1901 /* "err" is actually length in this case */
1902 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 1903 usockaddr_len);
6cb153ca 1904 fput_light(sock->file, fput_needed);
1da177e4
LT
1905 }
1906 return err;
1907}
1908
b21c8f83
DB
1909SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1910 int __user *, usockaddr_len)
1911{
1912 return __sys_getpeername(fd, usockaddr, usockaddr_len);
1913}
1914
1da177e4
LT
1915/*
1916 * Send a datagram to a given address. We move the address into kernel
1917 * space and check the user space data area is readable before invoking
1918 * the protocol.
1919 */
211b634b
DB
1920int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
1921 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
1922{
1923 struct socket *sock;
230b1839 1924 struct sockaddr_storage address;
1da177e4
LT
1925 int err;
1926 struct msghdr msg;
1927 struct iovec iov;
6cb153ca 1928 int fput_needed;
6cb153ca 1929
602bd0e9
AV
1930 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1931 if (unlikely(err))
1932 return err;
de0fa95c
PE
1933 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1934 if (!sock)
4387ff75 1935 goto out;
6cb153ca 1936
89bddce5 1937 msg.msg_name = NULL;
89bddce5
SH
1938 msg.msg_control = NULL;
1939 msg.msg_controllen = 0;
1940 msg.msg_namelen = 0;
6cb153ca 1941 if (addr) {
43db362d 1942 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1943 if (err < 0)
1944 goto out_put;
230b1839 1945 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1946 msg.msg_namelen = addr_len;
1da177e4
LT
1947 }
1948 if (sock->file->f_flags & O_NONBLOCK)
1949 flags |= MSG_DONTWAIT;
1950 msg.msg_flags = flags;
d8725c86 1951 err = sock_sendmsg(sock, &msg);
1da177e4 1952
89bddce5 1953out_put:
de0fa95c 1954 fput_light(sock->file, fput_needed);
4387ff75 1955out:
1da177e4
LT
1956 return err;
1957}
1958
211b634b
DB
1959SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1960 unsigned int, flags, struct sockaddr __user *, addr,
1961 int, addr_len)
1962{
1963 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
1964}
1965
1da177e4 1966/*
89bddce5 1967 * Send a datagram down a socket.
1da177e4
LT
1968 */
1969
3e0fa65f 1970SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1971 unsigned int, flags)
1da177e4 1972{
211b634b 1973 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
1974}
1975
1976/*
89bddce5 1977 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1978 * sender. We verify the buffers are writable and if needed move the
1979 * sender address from kernel to user space.
1980 */
7a09e1eb
DB
1981int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
1982 struct sockaddr __user *addr, int __user *addr_len)
1da177e4
LT
1983{
1984 struct socket *sock;
1985 struct iovec iov;
1986 struct msghdr msg;
230b1839 1987 struct sockaddr_storage address;
89bddce5 1988 int err, err2;
6cb153ca
BL
1989 int fput_needed;
1990
602bd0e9
AV
1991 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1992 if (unlikely(err))
1993 return err;
de0fa95c 1994 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1995 if (!sock)
de0fa95c 1996 goto out;
1da177e4 1997
89bddce5
SH
1998 msg.msg_control = NULL;
1999 msg.msg_controllen = 0;
f3d33426
HFS
2000 /* Save some cycles and don't copy the address if not needed */
2001 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
2002 /* We assume all kernel code knows the size of sockaddr_storage */
2003 msg.msg_namelen = 0;
130ed5d1 2004 msg.msg_iocb = NULL;
9f138fa6 2005 msg.msg_flags = 0;
1da177e4
LT
2006 if (sock->file->f_flags & O_NONBLOCK)
2007 flags |= MSG_DONTWAIT;
2da62906 2008 err = sock_recvmsg(sock, &msg, flags);
1da177e4 2009
89bddce5 2010 if (err >= 0 && addr != NULL) {
43db362d 2011 err2 = move_addr_to_user(&address,
230b1839 2012 msg.msg_namelen, addr, addr_len);
89bddce5
SH
2013 if (err2 < 0)
2014 err = err2;
1da177e4 2015 }
de0fa95c
PE
2016
2017 fput_light(sock->file, fput_needed);
4387ff75 2018out:
1da177e4
LT
2019 return err;
2020}
2021
7a09e1eb
DB
2022SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2023 unsigned int, flags, struct sockaddr __user *, addr,
2024 int __user *, addr_len)
2025{
2026 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2027}
2028
1da177e4 2029/*
89bddce5 2030 * Receive a datagram from a socket.
1da177e4
LT
2031 */
2032
b7c0ddf5
JG
2033SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2034 unsigned int, flags)
1da177e4 2035{
7a09e1eb 2036 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
2037}
2038
2039/*
2040 * Set a socket option. Because we don't know the option lengths we have
2041 * to pass the user mode parameter for the protocols to sort out.
2042 */
2043
cc36dca0
DB
2044static int __sys_setsockopt(int fd, int level, int optname,
2045 char __user *optval, int optlen)
1da177e4 2046{
6cb153ca 2047 int err, fput_needed;
1da177e4
LT
2048 struct socket *sock;
2049
2050 if (optlen < 0)
2051 return -EINVAL;
89bddce5
SH
2052
2053 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2054 if (sock != NULL) {
2055 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
2056 if (err)
2057 goto out_put;
1da177e4
LT
2058
2059 if (level == SOL_SOCKET)
89bddce5
SH
2060 err =
2061 sock_setsockopt(sock, level, optname, optval,
2062 optlen);
1da177e4 2063 else
89bddce5
SH
2064 err =
2065 sock->ops->setsockopt(sock, level, optname, optval,
2066 optlen);
6cb153ca
BL
2067out_put:
2068 fput_light(sock->file, fput_needed);
1da177e4
LT
2069 }
2070 return err;
2071}
2072
cc36dca0
DB
2073SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2074 char __user *, optval, int, optlen)
2075{
2076 return __sys_setsockopt(fd, level, optname, optval, optlen);
2077}
2078
1da177e4
LT
2079/*
2080 * Get a socket option. Because we don't know the option lengths we have
2081 * to pass a user mode parameter for the protocols to sort out.
2082 */
2083
13a2d70e
DB
2084static int __sys_getsockopt(int fd, int level, int optname,
2085 char __user *optval, int __user *optlen)
1da177e4 2086{
6cb153ca 2087 int err, fput_needed;
1da177e4
LT
2088 struct socket *sock;
2089
89bddce5
SH
2090 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2091 if (sock != NULL) {
6cb153ca
BL
2092 err = security_socket_getsockopt(sock, level, optname);
2093 if (err)
2094 goto out_put;
1da177e4
LT
2095
2096 if (level == SOL_SOCKET)
89bddce5
SH
2097 err =
2098 sock_getsockopt(sock, level, optname, optval,
2099 optlen);
1da177e4 2100 else
89bddce5
SH
2101 err =
2102 sock->ops->getsockopt(sock, level, optname, optval,
2103 optlen);
6cb153ca
BL
2104out_put:
2105 fput_light(sock->file, fput_needed);
1da177e4
LT
2106 }
2107 return err;
2108}
2109
13a2d70e
DB
2110SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2111 char __user *, optval, int __user *, optlen)
2112{
2113 return __sys_getsockopt(fd, level, optname, optval, optlen);
2114}
2115
1da177e4
LT
2116/*
2117 * Shutdown a socket.
2118 */
2119
005a1aea 2120int __sys_shutdown(int fd, int how)
1da177e4 2121{
6cb153ca 2122 int err, fput_needed;
1da177e4
LT
2123 struct socket *sock;
2124
89bddce5
SH
2125 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2126 if (sock != NULL) {
1da177e4 2127 err = security_socket_shutdown(sock, how);
6cb153ca
BL
2128 if (!err)
2129 err = sock->ops->shutdown(sock, how);
2130 fput_light(sock->file, fput_needed);
1da177e4
LT
2131 }
2132 return err;
2133}
2134
005a1aea
DB
2135SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2136{
2137 return __sys_shutdown(fd, how);
2138}
2139
89bddce5 2140/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2141 * fields which are the same type (int / unsigned) on our platforms.
2142 */
2143#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2144#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2145#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2146
c71d8ebe
TH
2147struct used_address {
2148 struct sockaddr_storage name;
2149 unsigned int name_len;
2150};
2151
da184284
AV
2152static int copy_msghdr_from_user(struct msghdr *kmsg,
2153 struct user_msghdr __user *umsg,
2154 struct sockaddr __user **save_addr,
2155 struct iovec **iov)
1661bf36 2156{
ffb07550 2157 struct user_msghdr msg;
08adb7da
AV
2158 ssize_t err;
2159
ffb07550 2160 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 2161 return -EFAULT;
dbb490b9 2162
864d9664 2163 kmsg->msg_control = (void __force *)msg.msg_control;
ffb07550
AV
2164 kmsg->msg_controllen = msg.msg_controllen;
2165 kmsg->msg_flags = msg.msg_flags;
2166
2167 kmsg->msg_namelen = msg.msg_namelen;
2168 if (!msg.msg_name)
6a2a2b3a
AS
2169 kmsg->msg_namelen = 0;
2170
dbb490b9
ML
2171 if (kmsg->msg_namelen < 0)
2172 return -EINVAL;
2173
1661bf36 2174 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2175 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2176
2177 if (save_addr)
ffb07550 2178 *save_addr = msg.msg_name;
08adb7da 2179
ffb07550 2180 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 2181 if (!save_addr) {
864d9664
PA
2182 err = move_addr_to_kernel(msg.msg_name,
2183 kmsg->msg_namelen,
08adb7da
AV
2184 kmsg->msg_name);
2185 if (err < 0)
2186 return err;
2187 }
2188 } else {
2189 kmsg->msg_name = NULL;
2190 kmsg->msg_namelen = 0;
2191 }
2192
ffb07550 2193 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2194 return -EMSGSIZE;
2195
0345f931 2196 kmsg->msg_iocb = NULL;
2197
ffb07550
AV
2198 return import_iovec(save_addr ? READ : WRITE,
2199 msg.msg_iov, msg.msg_iovlen,
da184284 2200 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
2201}
2202
666547ff 2203static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2204 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
2205 struct used_address *used_address,
2206 unsigned int allowed_msghdr_flags)
1da177e4 2207{
89bddce5
SH
2208 struct compat_msghdr __user *msg_compat =
2209 (struct compat_msghdr __user *)msg;
230b1839 2210 struct sockaddr_storage address;
1da177e4 2211 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 2212 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2213 __aligned(sizeof(__kernel_size_t));
89bddce5 2214 /* 20 is size of ipv6_pktinfo */
1da177e4 2215 unsigned char *ctl_buf = ctl;
d8725c86 2216 int ctl_len;
08adb7da 2217 ssize_t err;
89bddce5 2218
08adb7da 2219 msg_sys->msg_name = &address;
1da177e4 2220
08449320 2221 if (MSG_CMSG_COMPAT & flags)
08adb7da 2222 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 2223 else
08adb7da 2224 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 2225 if (err < 0)
da184284 2226 return err;
1da177e4
LT
2227
2228 err = -ENOBUFS;
2229
228e548e 2230 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 2231 goto out_freeiov;
28a94d8f 2232 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2233 ctl_len = msg_sys->msg_controllen;
1da177e4 2234 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2235 err =
228e548e 2236 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2237 sizeof(ctl));
1da177e4
LT
2238 if (err)
2239 goto out_freeiov;
228e548e
AB
2240 ctl_buf = msg_sys->msg_control;
2241 ctl_len = msg_sys->msg_controllen;
1da177e4 2242 } else if (ctl_len) {
ac4340fc
DM
2243 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2244 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2245 if (ctl_len > sizeof(ctl)) {
1da177e4 2246 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2247 if (ctl_buf == NULL)
1da177e4
LT
2248 goto out_freeiov;
2249 }
2250 err = -EFAULT;
2251 /*
228e548e 2252 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2253 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2254 * checking falls down on this.
2255 */
fb8621bb 2256 if (copy_from_user(ctl_buf,
228e548e 2257 (void __user __force *)msg_sys->msg_control,
89bddce5 2258 ctl_len))
1da177e4 2259 goto out_freectl;
228e548e 2260 msg_sys->msg_control = ctl_buf;
1da177e4 2261 }
228e548e 2262 msg_sys->msg_flags = flags;
1da177e4
LT
2263
2264 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2265 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2266 /*
2267 * If this is sendmmsg() and current destination address is same as
2268 * previously succeeded address, omit asking LSM's decision.
2269 * used_address->name_len is initialized to UINT_MAX so that the first
2270 * destination address never matches.
2271 */
bc909d9d
MD
2272 if (used_address && msg_sys->msg_name &&
2273 used_address->name_len == msg_sys->msg_namelen &&
2274 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2275 used_address->name_len)) {
d8725c86 2276 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2277 goto out_freectl;
2278 }
d8725c86 2279 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2280 /*
2281 * If this is sendmmsg() and sending to current destination address was
2282 * successful, remember it.
2283 */
2284 if (used_address && err >= 0) {
2285 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2286 if (msg_sys->msg_name)
2287 memcpy(&used_address->name, msg_sys->msg_name,
2288 used_address->name_len);
c71d8ebe 2289 }
1da177e4
LT
2290
2291out_freectl:
89bddce5 2292 if (ctl_buf != ctl)
1da177e4
LT
2293 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2294out_freeiov:
da184284 2295 kfree(iov);
228e548e
AB
2296 return err;
2297}
2298
2299/*
2300 * BSD sendmsg interface
2301 */
2302
e1834a32
DB
2303long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2304 bool forbid_cmsg_compat)
228e548e
AB
2305{
2306 int fput_needed, err;
2307 struct msghdr msg_sys;
1be374a0
AL
2308 struct socket *sock;
2309
e1834a32
DB
2310 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2311 return -EINVAL;
2312
1be374a0 2313 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2314 if (!sock)
2315 goto out;
2316
28a94d8f 2317 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2318
6cb153ca 2319 fput_light(sock->file, fput_needed);
89bddce5 2320out:
1da177e4
LT
2321 return err;
2322}
2323
666547ff 2324SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2325{
e1834a32 2326 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2327}
2328
228e548e
AB
2329/*
2330 * Linux sendmmsg interface
2331 */
2332
2333int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2334 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2335{
2336 int fput_needed, err, datagrams;
2337 struct socket *sock;
2338 struct mmsghdr __user *entry;
2339 struct compat_mmsghdr __user *compat_entry;
2340 struct msghdr msg_sys;
c71d8ebe 2341 struct used_address used_address;
f092276d 2342 unsigned int oflags = flags;
228e548e 2343
e1834a32
DB
2344 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2345 return -EINVAL;
2346
98382f41
AB
2347 if (vlen > UIO_MAXIOV)
2348 vlen = UIO_MAXIOV;
228e548e
AB
2349
2350 datagrams = 0;
2351
2352 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2353 if (!sock)
2354 return err;
2355
c71d8ebe 2356 used_address.name_len = UINT_MAX;
228e548e
AB
2357 entry = mmsg;
2358 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2359 err = 0;
f092276d 2360 flags |= MSG_BATCH;
228e548e
AB
2361
2362 while (datagrams < vlen) {
f092276d
TH
2363 if (datagrams == vlen - 1)
2364 flags = oflags;
2365
228e548e 2366 if (MSG_CMSG_COMPAT & flags) {
666547ff 2367 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2368 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2369 if (err < 0)
2370 break;
2371 err = __put_user(err, &compat_entry->msg_len);
2372 ++compat_entry;
2373 } else {
a7526eb5 2374 err = ___sys_sendmsg(sock,
666547ff 2375 (struct user_msghdr __user *)entry,
28a94d8f 2376 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2377 if (err < 0)
2378 break;
2379 err = put_user(err, &entry->msg_len);
2380 ++entry;
2381 }
2382
2383 if (err)
2384 break;
2385 ++datagrams;
3023898b
SHY
2386 if (msg_data_left(&msg_sys))
2387 break;
a78cb84c 2388 cond_resched();
228e548e
AB
2389 }
2390
228e548e
AB
2391 fput_light(sock->file, fput_needed);
2392
728ffb86
AB
2393 /* We only return an error if no datagrams were able to be sent */
2394 if (datagrams != 0)
228e548e
AB
2395 return datagrams;
2396
228e548e
AB
2397 return err;
2398}
2399
2400SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2401 unsigned int, vlen, unsigned int, flags)
2402{
e1834a32 2403 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2404}
2405
666547ff 2406static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2407 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2408{
89bddce5
SH
2409 struct compat_msghdr __user *msg_compat =
2410 (struct compat_msghdr __user *)msg;
1da177e4 2411 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2412 struct iovec *iov = iovstack;
1da177e4 2413 unsigned long cmsg_ptr;
2da62906 2414 int len;
08adb7da 2415 ssize_t err;
1da177e4
LT
2416
2417 /* kernel mode address */
230b1839 2418 struct sockaddr_storage addr;
1da177e4
LT
2419
2420 /* user mode address pointers */
2421 struct sockaddr __user *uaddr;
08adb7da 2422 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2423
08adb7da 2424 msg_sys->msg_name = &addr;
1da177e4 2425
f3d33426 2426 if (MSG_CMSG_COMPAT & flags)
08adb7da 2427 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2428 else
08adb7da 2429 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2430 if (err < 0)
da184284 2431 return err;
1da177e4 2432
a2e27255
ACM
2433 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2434 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2435
f3d33426
HFS
2436 /* We assume all kernel code knows the size of sockaddr_storage */
2437 msg_sys->msg_namelen = 0;
2438
1da177e4
LT
2439 if (sock->file->f_flags & O_NONBLOCK)
2440 flags |= MSG_DONTWAIT;
2da62906 2441 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4
LT
2442 if (err < 0)
2443 goto out_freeiov;
2444 len = err;
2445
2446 if (uaddr != NULL) {
43db362d 2447 err = move_addr_to_user(&addr,
a2e27255 2448 msg_sys->msg_namelen, uaddr,
89bddce5 2449 uaddr_len);
1da177e4
LT
2450 if (err < 0)
2451 goto out_freeiov;
2452 }
a2e27255 2453 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2454 COMPAT_FLAGS(msg));
1da177e4
LT
2455 if (err)
2456 goto out_freeiov;
2457 if (MSG_CMSG_COMPAT & flags)
a2e27255 2458 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2459 &msg_compat->msg_controllen);
2460 else
a2e27255 2461 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2462 &msg->msg_controllen);
2463 if (err)
2464 goto out_freeiov;
2465 err = len;
2466
2467out_freeiov:
da184284 2468 kfree(iov);
a2e27255
ACM
2469 return err;
2470}
2471
2472/*
2473 * BSD recvmsg interface
2474 */
2475
e1834a32
DB
2476long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2477 bool forbid_cmsg_compat)
a2e27255
ACM
2478{
2479 int fput_needed, err;
2480 struct msghdr msg_sys;
1be374a0
AL
2481 struct socket *sock;
2482
e1834a32
DB
2483 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2484 return -EINVAL;
2485
1be374a0 2486 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2487 if (!sock)
2488 goto out;
2489
a7526eb5 2490 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2491
6cb153ca 2492 fput_light(sock->file, fput_needed);
1da177e4
LT
2493out:
2494 return err;
2495}
2496
666547ff 2497SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2498 unsigned int, flags)
2499{
e1834a32 2500 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2501}
2502
a2e27255
ACM
2503/*
2504 * Linux recvmmsg interface
2505 */
2506
e11d4284
AB
2507static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2508 unsigned int vlen, unsigned int flags,
2509 struct timespec64 *timeout)
a2e27255
ACM
2510{
2511 int fput_needed, err, datagrams;
2512 struct socket *sock;
2513 struct mmsghdr __user *entry;
d7256d0e 2514 struct compat_mmsghdr __user *compat_entry;
a2e27255 2515 struct msghdr msg_sys;
766b9f92
DD
2516 struct timespec64 end_time;
2517 struct timespec64 timeout64;
a2e27255
ACM
2518
2519 if (timeout &&
2520 poll_select_set_timeout(&end_time, timeout->tv_sec,
2521 timeout->tv_nsec))
2522 return -EINVAL;
2523
2524 datagrams = 0;
2525
2526 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2527 if (!sock)
2528 return err;
2529
7797dc41
SHY
2530 if (likely(!(flags & MSG_ERRQUEUE))) {
2531 err = sock_error(sock->sk);
2532 if (err) {
2533 datagrams = err;
2534 goto out_put;
2535 }
e623a9e9 2536 }
a2e27255
ACM
2537
2538 entry = mmsg;
d7256d0e 2539 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2540
2541 while (datagrams < vlen) {
2542 /*
2543 * No need to ask LSM for more than the first datagram.
2544 */
d7256d0e 2545 if (MSG_CMSG_COMPAT & flags) {
666547ff 2546 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2547 &msg_sys, flags & ~MSG_WAITFORONE,
2548 datagrams);
d7256d0e
JMG
2549 if (err < 0)
2550 break;
2551 err = __put_user(err, &compat_entry->msg_len);
2552 ++compat_entry;
2553 } else {
a7526eb5 2554 err = ___sys_recvmsg(sock,
666547ff 2555 (struct user_msghdr __user *)entry,
a7526eb5
AL
2556 &msg_sys, flags & ~MSG_WAITFORONE,
2557 datagrams);
d7256d0e
JMG
2558 if (err < 0)
2559 break;
2560 err = put_user(err, &entry->msg_len);
2561 ++entry;
2562 }
2563
a2e27255
ACM
2564 if (err)
2565 break;
a2e27255
ACM
2566 ++datagrams;
2567
71c5c159
BB
2568 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2569 if (flags & MSG_WAITFORONE)
2570 flags |= MSG_DONTWAIT;
2571
a2e27255 2572 if (timeout) {
766b9f92 2573 ktime_get_ts64(&timeout64);
c2e6c856 2574 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2575 if (timeout->tv_sec < 0) {
2576 timeout->tv_sec = timeout->tv_nsec = 0;
2577 break;
2578 }
2579
2580 /* Timeout, return less than vlen datagrams */
2581 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2582 break;
2583 }
2584
2585 /* Out of band data, return right away */
2586 if (msg_sys.msg_flags & MSG_OOB)
2587 break;
a78cb84c 2588 cond_resched();
a2e27255
ACM
2589 }
2590
a2e27255 2591 if (err == 0)
34b88a68
ACM
2592 goto out_put;
2593
2594 if (datagrams == 0) {
2595 datagrams = err;
2596 goto out_put;
2597 }
a2e27255 2598
34b88a68
ACM
2599 /*
2600 * We may return less entries than requested (vlen) if the
2601 * sock is non block and there aren't enough datagrams...
2602 */
2603 if (err != -EAGAIN) {
a2e27255 2604 /*
34b88a68
ACM
2605 * ... or if recvmsg returns an error after we
2606 * received some datagrams, where we record the
2607 * error to return on the next call or if the
2608 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2609 */
34b88a68 2610 sock->sk->sk_err = -err;
a2e27255 2611 }
34b88a68
ACM
2612out_put:
2613 fput_light(sock->file, fput_needed);
a2e27255 2614
34b88a68 2615 return datagrams;
a2e27255
ACM
2616}
2617
e11d4284
AB
2618int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2619 unsigned int vlen, unsigned int flags,
2620 struct __kernel_timespec __user *timeout,
2621 struct old_timespec32 __user *timeout32)
a2e27255
ACM
2622{
2623 int datagrams;
c2e6c856 2624 struct timespec64 timeout_sys;
a2e27255 2625
e11d4284
AB
2626 if (timeout && get_timespec64(&timeout_sys, timeout))
2627 return -EFAULT;
a2e27255 2628
e11d4284 2629 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2630 return -EFAULT;
2631
e11d4284
AB
2632 if (!timeout && !timeout32)
2633 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2634
2635 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 2636
e11d4284
AB
2637 if (datagrams <= 0)
2638 return datagrams;
2639
2640 if (timeout && put_timespec64(&timeout_sys, timeout))
2641 datagrams = -EFAULT;
2642
2643 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2644 datagrams = -EFAULT;
2645
2646 return datagrams;
2647}
2648
1255e269
DB
2649SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2650 unsigned int, vlen, unsigned int, flags,
c2e6c856 2651 struct __kernel_timespec __user *, timeout)
1255e269 2652{
e11d4284
AB
2653 if (flags & MSG_CMSG_COMPAT)
2654 return -EINVAL;
2655
2656 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
2657}
2658
2659#ifdef CONFIG_COMPAT_32BIT_TIME
2660SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2661 unsigned int, vlen, unsigned int, flags,
2662 struct old_timespec32 __user *, timeout)
2663{
2664 if (flags & MSG_CMSG_COMPAT)
2665 return -EINVAL;
2666
2667 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 2668}
e11d4284 2669#endif
1255e269 2670
a2e27255 2671#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2672/* Argument list sizes for sys_socketcall */
2673#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2674static const unsigned char nargs[21] = {
c6d409cf
ED
2675 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2676 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2677 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2678 AL(4), AL(5), AL(4)
89bddce5
SH
2679};
2680
1da177e4
LT
2681#undef AL
2682
2683/*
89bddce5 2684 * System call vectors.
1da177e4
LT
2685 *
2686 * Argument checking cleaned up. Saved 20% in size.
2687 * This function doesn't need to set the kernel lock because
89bddce5 2688 * it is set by the callees.
1da177e4
LT
2689 */
2690
3e0fa65f 2691SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2692{
2950fa9d 2693 unsigned long a[AUDITSC_ARGS];
89bddce5 2694 unsigned long a0, a1;
1da177e4 2695 int err;
47379052 2696 unsigned int len;
1da177e4 2697
228e548e 2698 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2699 return -EINVAL;
c8e8cd57 2700 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2701
47379052
AV
2702 len = nargs[call];
2703 if (len > sizeof(a))
2704 return -EINVAL;
2705
1da177e4 2706 /* copy_from_user should be SMP safe. */
47379052 2707 if (copy_from_user(a, args, len))
1da177e4 2708 return -EFAULT;
3ec3b2fb 2709
2950fa9d
CG
2710 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2711 if (err)
2712 return err;
3ec3b2fb 2713
89bddce5
SH
2714 a0 = a[0];
2715 a1 = a[1];
2716
2717 switch (call) {
2718 case SYS_SOCKET:
9d6a15c3 2719 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
2720 break;
2721 case SYS_BIND:
a87d35d8 2722 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2723 break;
2724 case SYS_CONNECT:
1387c2c2 2725 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2726 break;
2727 case SYS_LISTEN:
25e290ee 2728 err = __sys_listen(a0, a1);
89bddce5
SH
2729 break;
2730 case SYS_ACCEPT:
4541e805
DB
2731 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2732 (int __user *)a[2], 0);
89bddce5
SH
2733 break;
2734 case SYS_GETSOCKNAME:
2735 err =
8882a107
DB
2736 __sys_getsockname(a0, (struct sockaddr __user *)a1,
2737 (int __user *)a[2]);
89bddce5
SH
2738 break;
2739 case SYS_GETPEERNAME:
2740 err =
b21c8f83
DB
2741 __sys_getpeername(a0, (struct sockaddr __user *)a1,
2742 (int __user *)a[2]);
89bddce5
SH
2743 break;
2744 case SYS_SOCKETPAIR:
6debc8d8 2745 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
2746 break;
2747 case SYS_SEND:
f3bf896b
DB
2748 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2749 NULL, 0);
89bddce5
SH
2750 break;
2751 case SYS_SENDTO:
211b634b
DB
2752 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2753 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
2754 break;
2755 case SYS_RECV:
d27e9afc
DB
2756 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2757 NULL, NULL);
89bddce5
SH
2758 break;
2759 case SYS_RECVFROM:
7a09e1eb
DB
2760 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2761 (struct sockaddr __user *)a[4],
2762 (int __user *)a[5]);
89bddce5
SH
2763 break;
2764 case SYS_SHUTDOWN:
005a1aea 2765 err = __sys_shutdown(a0, a1);
89bddce5
SH
2766 break;
2767 case SYS_SETSOCKOPT:
cc36dca0
DB
2768 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
2769 a[4]);
89bddce5
SH
2770 break;
2771 case SYS_GETSOCKOPT:
2772 err =
13a2d70e
DB
2773 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2774 (int __user *)a[4]);
89bddce5
SH
2775 break;
2776 case SYS_SENDMSG:
e1834a32
DB
2777 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
2778 a[2], true);
89bddce5 2779 break;
228e548e 2780 case SYS_SENDMMSG:
e1834a32
DB
2781 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2782 a[3], true);
228e548e 2783 break;
89bddce5 2784 case SYS_RECVMSG:
e1834a32
DB
2785 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
2786 a[2], true);
89bddce5 2787 break;
a2e27255 2788 case SYS_RECVMMSG:
e11d4284
AB
2789 if (IS_ENABLED(CONFIG_64BIT) || !IS_ENABLED(CONFIG_64BIT_TIME))
2790 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2791 a[2], a[3],
2792 (struct __kernel_timespec __user *)a[4],
2793 NULL);
2794 else
2795 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2796 a[2], a[3], NULL,
2797 (struct old_timespec32 __user *)a[4]);
a2e27255 2798 break;
de11defe 2799 case SYS_ACCEPT4:
4541e805
DB
2800 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2801 (int __user *)a[2], a[3]);
aaca0bdc 2802 break;
89bddce5
SH
2803 default:
2804 err = -EINVAL;
2805 break;
1da177e4
LT
2806 }
2807 return err;
2808}
2809
89bddce5 2810#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2811
55737fda
SH
2812/**
2813 * sock_register - add a socket protocol handler
2814 * @ops: description of protocol
2815 *
1da177e4
LT
2816 * This function is called by a protocol handler that wants to
2817 * advertise its address family, and have it linked into the
e793c0f7 2818 * socket interface. The value ops->family corresponds to the
55737fda 2819 * socket system call protocol family.
1da177e4 2820 */
f0fd27d4 2821int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2822{
2823 int err;
2824
2825 if (ops->family >= NPROTO) {
3410f22e 2826 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2827 return -ENOBUFS;
2828 }
55737fda
SH
2829
2830 spin_lock(&net_family_lock);
190683a9
ED
2831 if (rcu_dereference_protected(net_families[ops->family],
2832 lockdep_is_held(&net_family_lock)))
55737fda
SH
2833 err = -EEXIST;
2834 else {
cf778b00 2835 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2836 err = 0;
2837 }
55737fda
SH
2838 spin_unlock(&net_family_lock);
2839
3410f22e 2840 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2841 return err;
2842}
c6d409cf 2843EXPORT_SYMBOL(sock_register);
1da177e4 2844
55737fda
SH
2845/**
2846 * sock_unregister - remove a protocol handler
2847 * @family: protocol family to remove
2848 *
1da177e4
LT
2849 * This function is called by a protocol handler that wants to
2850 * remove its address family, and have it unlinked from the
55737fda
SH
2851 * new socket creation.
2852 *
2853 * If protocol handler is a module, then it can use module reference
2854 * counts to protect against new references. If protocol handler is not
2855 * a module then it needs to provide its own protection in
2856 * the ops->create routine.
1da177e4 2857 */
f0fd27d4 2858void sock_unregister(int family)
1da177e4 2859{
f0fd27d4 2860 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2861
55737fda 2862 spin_lock(&net_family_lock);
a9b3cd7f 2863 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2864 spin_unlock(&net_family_lock);
2865
2866 synchronize_rcu();
2867
3410f22e 2868 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2869}
c6d409cf 2870EXPORT_SYMBOL(sock_unregister);
1da177e4 2871
bf2ae2e4
XL
2872bool sock_is_registered(int family)
2873{
66b51b0a 2874 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
2875}
2876
77d76ea3 2877static int __init sock_init(void)
1da177e4 2878{
b3e19d92 2879 int err;
2ca794e5
EB
2880 /*
2881 * Initialize the network sysctl infrastructure.
2882 */
2883 err = net_sysctl_init();
2884 if (err)
2885 goto out;
b3e19d92 2886
1da177e4 2887 /*
89bddce5 2888 * Initialize skbuff SLAB cache
1da177e4
LT
2889 */
2890 skb_init();
1da177e4
LT
2891
2892 /*
89bddce5 2893 * Initialize the protocols module.
1da177e4
LT
2894 */
2895
2896 init_inodecache();
b3e19d92
NP
2897
2898 err = register_filesystem(&sock_fs_type);
2899 if (err)
2900 goto out_fs;
1da177e4 2901 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2902 if (IS_ERR(sock_mnt)) {
2903 err = PTR_ERR(sock_mnt);
2904 goto out_mount;
2905 }
77d76ea3
AK
2906
2907 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2908 */
2909
2910#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2911 err = netfilter_init();
2912 if (err)
2913 goto out;
1da177e4 2914#endif
cbeb321a 2915
408eccce 2916 ptp_classifier_init();
c1f19b51 2917
b3e19d92
NP
2918out:
2919 return err;
2920
2921out_mount:
2922 unregister_filesystem(&sock_fs_type);
2923out_fs:
2924 goto out;
1da177e4
LT
2925}
2926
77d76ea3
AK
2927core_initcall(sock_init); /* early initcall */
2928
1da177e4
LT
2929#ifdef CONFIG_PROC_FS
2930void socket_seq_show(struct seq_file *seq)
2931{
648845ab
TZ
2932 seq_printf(seq, "sockets: used %d\n",
2933 sock_inuse_get(seq->private));
1da177e4 2934}
89bddce5 2935#endif /* CONFIG_PROC_FS */
1da177e4 2936
89bbfc95 2937#ifdef CONFIG_COMPAT
36fd633e 2938static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2939{
6b96018b 2940 struct compat_ifconf ifc32;
7a229387 2941 struct ifconf ifc;
7a229387
AB
2942 int err;
2943
6b96018b 2944 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2945 return -EFAULT;
2946
36fd633e
AV
2947 ifc.ifc_len = ifc32.ifc_len;
2948 ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
7a229387 2949
36fd633e
AV
2950 rtnl_lock();
2951 err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
2952 rtnl_unlock();
7a229387
AB
2953 if (err)
2954 return err;
2955
36fd633e 2956 ifc32.ifc_len = ifc.ifc_len;
6b96018b 2957 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2958 return -EFAULT;
2959
2960 return 0;
2961}
2962
6b96018b 2963static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2964{
3a7da39d
BH
2965 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2966 bool convert_in = false, convert_out = false;
44c02a2c
AV
2967 size_t buf_size = 0;
2968 struct ethtool_rxnfc __user *rxnfc = NULL;
2969 struct ifreq ifr;
3a7da39d
BH
2970 u32 rule_cnt = 0, actual_rule_cnt;
2971 u32 ethcmd;
7a229387 2972 u32 data;
3a7da39d 2973 int ret;
7a229387 2974
3a7da39d
BH
2975 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2976 return -EFAULT;
7a229387 2977
3a7da39d
BH
2978 compat_rxnfc = compat_ptr(data);
2979
2980 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2981 return -EFAULT;
2982
3a7da39d
BH
2983 /* Most ethtool structures are defined without padding.
2984 * Unfortunately struct ethtool_rxnfc is an exception.
2985 */
2986 switch (ethcmd) {
2987 default:
2988 break;
2989 case ETHTOOL_GRXCLSRLALL:
2990 /* Buffer size is variable */
2991 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2992 return -EFAULT;
2993 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2994 return -ENOMEM;
2995 buf_size += rule_cnt * sizeof(u32);
2996 /* fall through */
2997 case ETHTOOL_GRXRINGS:
2998 case ETHTOOL_GRXCLSRLCNT:
2999 case ETHTOOL_GRXCLSRULE:
55664f32 3000 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
3001 convert_out = true;
3002 /* fall through */
3003 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
3004 buf_size += sizeof(struct ethtool_rxnfc);
3005 convert_in = true;
44c02a2c 3006 rxnfc = compat_alloc_user_space(buf_size);
3a7da39d
BH
3007 break;
3008 }
3009
44c02a2c 3010 if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
3011 return -EFAULT;
3012
44c02a2c 3013 ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
7a229387 3014
3a7da39d 3015 if (convert_in) {
127fe533 3016 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
3017 * fs.ring_cookie and at the end of fs, but nowhere else.
3018 */
127fe533
AD
3019 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
3020 sizeof(compat_rxnfc->fs.m_ext) !=
3021 offsetof(struct ethtool_rxnfc, fs.m_ext) +
3022 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
3023 BUILD_BUG_ON(
3024 offsetof(struct compat_ethtool_rxnfc, fs.location) -
3025 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
3026 offsetof(struct ethtool_rxnfc, fs.location) -
3027 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
3028
3029 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
3030 (void __user *)(&rxnfc->fs.m_ext + 1) -
3031 (void __user *)rxnfc) ||
3a7da39d
BH
3032 copy_in_user(&rxnfc->fs.ring_cookie,
3033 &compat_rxnfc->fs.ring_cookie,
954b1244 3034 (void __user *)(&rxnfc->fs.location + 1) -
b6168562
WW
3035 (void __user *)&rxnfc->fs.ring_cookie))
3036 return -EFAULT;
3037 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3038 if (put_user(rule_cnt, &rxnfc->rule_cnt))
3039 return -EFAULT;
3040 } else if (copy_in_user(&rxnfc->rule_cnt,
3041 &compat_rxnfc->rule_cnt,
3042 sizeof(rxnfc->rule_cnt)))
3a7da39d
BH
3043 return -EFAULT;
3044 }
3045
44c02a2c 3046 ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
3a7da39d
BH
3047 if (ret)
3048 return ret;
3049
3050 if (convert_out) {
3051 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
3052 (const void __user *)(&rxnfc->fs.m_ext + 1) -
3053 (const void __user *)rxnfc) ||
3a7da39d
BH
3054 copy_in_user(&compat_rxnfc->fs.ring_cookie,
3055 &rxnfc->fs.ring_cookie,
954b1244
SH
3056 (const void __user *)(&rxnfc->fs.location + 1) -
3057 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
3058 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
3059 sizeof(rxnfc->rule_cnt)))
3060 return -EFAULT;
3061
3062 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3063 /* As an optimisation, we only copy the actual
3064 * number of rules that the underlying
3065 * function returned. Since Mallory might
3066 * change the rule count in user memory, we
3067 * check that it is less than the rule count
3068 * originally given (as the user buffer size),
3069 * which has been range-checked.
3070 */
3071 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
3072 return -EFAULT;
3073 if (actual_rule_cnt < rule_cnt)
3074 rule_cnt = actual_rule_cnt;
3075 if (copy_in_user(&compat_rxnfc->rule_locs[0],
3076 &rxnfc->rule_locs[0],
3077 rule_cnt * sizeof(u32)))
3078 return -EFAULT;
3079 }
3080 }
3081
3082 return 0;
7a229387
AB
3083}
3084
7a50a240
AB
3085static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3086{
7a50a240 3087 compat_uptr_t uptr32;
44c02a2c
AV
3088 struct ifreq ifr;
3089 void __user *saved;
3090 int err;
7a50a240 3091
44c02a2c 3092 if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
7a50a240
AB
3093 return -EFAULT;
3094
3095 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3096 return -EFAULT;
3097
44c02a2c
AV
3098 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3099 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 3100
44c02a2c
AV
3101 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
3102 if (!err) {
3103 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
3104 if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
3105 err = -EFAULT;
ccbd6a5a 3106 }
44c02a2c 3107 return err;
7a229387
AB
3108}
3109
590d4693
BH
3110/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3111static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3112 struct compat_ifreq __user *u_ifreq32)
7a229387 3113{
44c02a2c 3114 struct ifreq ifreq;
7a229387
AB
3115 u32 data32;
3116
44c02a2c 3117 if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
7a229387 3118 return -EFAULT;
44c02a2c 3119 if (get_user(data32, &u_ifreq32->ifr_data))
7a229387 3120 return -EFAULT;
44c02a2c 3121 ifreq.ifr_data = compat_ptr(data32);
7a229387 3122
44c02a2c 3123 return dev_ioctl(net, cmd, &ifreq, NULL);
7a229387
AB
3124}
3125
37ac39bd
JB
3126static int compat_ifreq_ioctl(struct net *net, struct socket *sock,
3127 unsigned int cmd,
3128 struct compat_ifreq __user *uifr32)
3129{
3130 struct ifreq __user *uifr;
3131 int err;
3132
3133 /* Handle the fact that while struct ifreq has the same *layout* on
3134 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3135 * which are handled elsewhere, it still has different *size* due to
3136 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3137 * resulting in struct ifreq being 32 and 40 bytes respectively).
3138 * As a result, if the struct happens to be at the end of a page and
3139 * the next page isn't readable/writable, we get a fault. To prevent
3140 * that, copy back and forth to the full size.
3141 */
3142
3143 uifr = compat_alloc_user_space(sizeof(*uifr));
3144 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3145 return -EFAULT;
3146
3147 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3148
3149 if (!err) {
3150 switch (cmd) {
3151 case SIOCGIFFLAGS:
3152 case SIOCGIFMETRIC:
3153 case SIOCGIFMTU:
3154 case SIOCGIFMEM:
3155 case SIOCGIFHWADDR:
3156 case SIOCGIFINDEX:
3157 case SIOCGIFADDR:
3158 case SIOCGIFBRDADDR:
3159 case SIOCGIFDSTADDR:
3160 case SIOCGIFNETMASK:
3161 case SIOCGIFPFLAGS:
3162 case SIOCGIFTXQLEN:
3163 case SIOCGMIIPHY:
3164 case SIOCGMIIREG:
c6c9fee3 3165 case SIOCGIFNAME:
37ac39bd
JB
3166 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
3167 err = -EFAULT;
3168 break;
3169 }
3170 }
3171 return err;
3172}
3173
a2116ed2
AB
3174static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3175 struct compat_ifreq __user *uifr32)
3176{
3177 struct ifreq ifr;
3178 struct compat_ifmap __user *uifmap32;
a2116ed2
AB
3179 int err;
3180
3181 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3182 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3183 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3184 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3185 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3186 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3187 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3188 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3189 if (err)
3190 return -EFAULT;
3191
44c02a2c 3192 err = dev_ioctl(net, cmd, &ifr, NULL);
a2116ed2
AB
3193
3194 if (cmd == SIOCGIFMAP && !err) {
3195 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3196 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3197 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3198 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3199 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3200 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3201 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3202 if (err)
3203 err = -EFAULT;
3204 }
3205 return err;
3206}
3207
7a229387 3208struct rtentry32 {
c6d409cf 3209 u32 rt_pad1;
7a229387
AB
3210 struct sockaddr rt_dst; /* target address */
3211 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3212 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3213 unsigned short rt_flags;
3214 short rt_pad2;
3215 u32 rt_pad3;
3216 unsigned char rt_tos;
3217 unsigned char rt_class;
3218 short rt_pad4;
3219 short rt_metric; /* +1 for binary compatibility! */
7a229387 3220 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3221 u32 rt_mtu; /* per route MTU/Window */
3222 u32 rt_window; /* Window clamping */
7a229387
AB
3223 unsigned short rt_irtt; /* Initial RTT */
3224};
3225
3226struct in6_rtmsg32 {
3227 struct in6_addr rtmsg_dst;
3228 struct in6_addr rtmsg_src;
3229 struct in6_addr rtmsg_gateway;
3230 u32 rtmsg_type;
3231 u16 rtmsg_dst_len;
3232 u16 rtmsg_src_len;
3233 u32 rtmsg_metric;
3234 u32 rtmsg_info;
3235 u32 rtmsg_flags;
3236 s32 rtmsg_ifindex;
3237};
3238
6b96018b
AB
3239static int routing_ioctl(struct net *net, struct socket *sock,
3240 unsigned int cmd, void __user *argp)
7a229387
AB
3241{
3242 int ret;
3243 void *r = NULL;
3244 struct in6_rtmsg r6;
3245 struct rtentry r4;
3246 char devname[16];
3247 u32 rtdev;
3248 mm_segment_t old_fs = get_fs();
3249
6b96018b
AB
3250 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3251 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3252 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3253 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3254 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3255 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3256 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3257 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3258 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3259 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3260 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3261
3262 r = (void *) &r6;
3263 } else { /* ipv4 */
6b96018b 3264 struct rtentry32 __user *ur4 = argp;
c6d409cf 3265 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3266 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3267 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3268 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3269 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3270 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3271 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3272 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3273 if (rtdev) {
c6d409cf 3274 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3275 r4.rt_dev = (char __user __force *)devname;
3276 devname[15] = 0;
7a229387
AB
3277 } else
3278 r4.rt_dev = NULL;
3279
3280 r = (void *) &r4;
3281 }
3282
3283 if (ret) {
3284 ret = -EFAULT;
3285 goto out;
3286 }
3287
c6d409cf 3288 set_fs(KERNEL_DS);
63ff03ab 3289 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3290 set_fs(old_fs);
7a229387
AB
3291
3292out:
7a229387
AB
3293 return ret;
3294}
3295
3296/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3297 * for some operations; this forces use of the newer bridge-utils that
25985edc 3298 * use compatible ioctls
7a229387 3299 */
6b96018b 3300static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3301{
6b96018b 3302 compat_ulong_t tmp;
7a229387 3303
6b96018b 3304 if (get_user(tmp, argp))
7a229387
AB
3305 return -EFAULT;
3306 if (tmp == BRCTL_GET_VERSION)
3307 return BRCTL_VERSION + 1;
3308 return -EINVAL;
3309}
3310
6b96018b
AB
3311static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3312 unsigned int cmd, unsigned long arg)
3313{
3314 void __user *argp = compat_ptr(arg);
3315 struct sock *sk = sock->sk;
3316 struct net *net = sock_net(sk);
7a229387 3317
6b96018b 3318 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3319 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3320
3321 switch (cmd) {
3322 case SIOCSIFBR:
3323 case SIOCGIFBR:
3324 return old_bridge_ioctl(argp);
6b96018b 3325 case SIOCGIFCONF:
36fd633e 3326 return compat_dev_ifconf(net, argp);
6b96018b
AB
3327 case SIOCETHTOOL:
3328 return ethtool_ioctl(net, argp);
7a50a240
AB
3329 case SIOCWANDEV:
3330 return compat_siocwandev(net, argp);
a2116ed2
AB
3331 case SIOCGIFMAP:
3332 case SIOCSIFMAP:
3333 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3334 case SIOCADDRT:
3335 case SIOCDELRT:
3336 return routing_ioctl(net, sock, cmd, argp);
0768e170
AB
3337 case SIOCGSTAMP_OLD:
3338 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
3339 if (!sock->ops->gettstamp)
3340 return -ENOIOCTLCMD;
0768e170 3341 return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
c7cbdbf2
AB
3342 !COMPAT_USE_64BIT_TIME);
3343
590d4693
BH
3344 case SIOCBONDSLAVEINFOQUERY:
3345 case SIOCBONDINFOQUERY:
a2116ed2 3346 case SIOCSHWTSTAMP:
fd468c74 3347 case SIOCGHWTSTAMP:
590d4693 3348 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3349
3350 case FIOSETOWN:
3351 case SIOCSPGRP:
3352 case FIOGETOWN:
3353 case SIOCGPGRP:
3354 case SIOCBRADDBR:
3355 case SIOCBRDELBR:
3356 case SIOCGIFVLAN:
3357 case SIOCSIFVLAN:
3358 case SIOCADDDLCI:
3359 case SIOCDELDLCI:
c62cce2c 3360 case SIOCGSKNS:
0768e170
AB
3361 case SIOCGSTAMP_NEW:
3362 case SIOCGSTAMPNS_NEW:
6b96018b
AB
3363 return sock_ioctl(file, cmd, arg);
3364
3365 case SIOCGIFFLAGS:
3366 case SIOCSIFFLAGS:
3367 case SIOCGIFMETRIC:
3368 case SIOCSIFMETRIC:
3369 case SIOCGIFMTU:
3370 case SIOCSIFMTU:
3371 case SIOCGIFMEM:
3372 case SIOCSIFMEM:
3373 case SIOCGIFHWADDR:
3374 case SIOCSIFHWADDR:
3375 case SIOCADDMULTI:
3376 case SIOCDELMULTI:
3377 case SIOCGIFINDEX:
6b96018b
AB
3378 case SIOCGIFADDR:
3379 case SIOCSIFADDR:
3380 case SIOCSIFHWBROADCAST:
6b96018b 3381 case SIOCDIFADDR:
6b96018b
AB
3382 case SIOCGIFBRDADDR:
3383 case SIOCSIFBRDADDR:
3384 case SIOCGIFDSTADDR:
3385 case SIOCSIFDSTADDR:
3386 case SIOCGIFNETMASK:
3387 case SIOCSIFNETMASK:
3388 case SIOCSIFPFLAGS:
3389 case SIOCGIFPFLAGS:
3390 case SIOCGIFTXQLEN:
3391 case SIOCSIFTXQLEN:
3392 case SIOCBRADDIF:
3393 case SIOCBRDELIF:
c6c9fee3 3394 case SIOCGIFNAME:
9177efd3
AB
3395 case SIOCSIFNAME:
3396 case SIOCGMIIPHY:
3397 case SIOCGMIIREG:
3398 case SIOCSMIIREG:
f92d4fc9
AV
3399 case SIOCBONDENSLAVE:
3400 case SIOCBONDRELEASE:
3401 case SIOCBONDSETHWADDR:
3402 case SIOCBONDCHANGEACTIVE:
37ac39bd
JB
3403 return compat_ifreq_ioctl(net, sock, cmd, argp);
3404
6b96018b
AB
3405 case SIOCSARP:
3406 case SIOCGARP:
3407 case SIOCDARP:
6b96018b 3408 case SIOCATMARK:
63ff03ab 3409 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3410 }
3411
6b96018b
AB
3412 return -ENOIOCTLCMD;
3413}
7a229387 3414
95c96174 3415static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3416 unsigned long arg)
89bbfc95
SP
3417{
3418 struct socket *sock = file->private_data;
3419 int ret = -ENOIOCTLCMD;
87de87d5
DM
3420 struct sock *sk;
3421 struct net *net;
3422
3423 sk = sock->sk;
3424 net = sock_net(sk);
89bbfc95
SP
3425
3426 if (sock->ops->compat_ioctl)
3427 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3428
87de87d5
DM
3429 if (ret == -ENOIOCTLCMD &&
3430 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3431 ret = compat_wext_handle_ioctl(net, cmd, arg);
3432
6b96018b
AB
3433 if (ret == -ENOIOCTLCMD)
3434 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3435
89bbfc95
SP
3436 return ret;
3437}
3438#endif
3439
8a3c245c
PT
3440/**
3441 * kernel_bind - bind an address to a socket (kernel space)
3442 * @sock: socket
3443 * @addr: address
3444 * @addrlen: length of address
3445 *
3446 * Returns 0 or an error.
3447 */
3448
ac5a488e
SS
3449int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3450{
3451 return sock->ops->bind(sock, addr, addrlen);
3452}
c6d409cf 3453EXPORT_SYMBOL(kernel_bind);
ac5a488e 3454
8a3c245c
PT
3455/**
3456 * kernel_listen - move socket to listening state (kernel space)
3457 * @sock: socket
3458 * @backlog: pending connections queue size
3459 *
3460 * Returns 0 or an error.
3461 */
3462
ac5a488e
SS
3463int kernel_listen(struct socket *sock, int backlog)
3464{
3465 return sock->ops->listen(sock, backlog);
3466}
c6d409cf 3467EXPORT_SYMBOL(kernel_listen);
ac5a488e 3468
8a3c245c
PT
3469/**
3470 * kernel_accept - accept a connection (kernel space)
3471 * @sock: listening socket
3472 * @newsock: new connected socket
3473 * @flags: flags
3474 *
3475 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3476 * If it fails, @newsock is guaranteed to be %NULL.
3477 * Returns 0 or an error.
3478 */
3479
ac5a488e
SS
3480int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3481{
3482 struct sock *sk = sock->sk;
3483 int err;
3484
3485 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3486 newsock);
3487 if (err < 0)
3488 goto done;
3489
cdfbabfb 3490 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3491 if (err < 0) {
3492 sock_release(*newsock);
fa8705b0 3493 *newsock = NULL;
ac5a488e
SS
3494 goto done;
3495 }
3496
3497 (*newsock)->ops = sock->ops;
1b08534e 3498 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3499
3500done:
3501 return err;
3502}
c6d409cf 3503EXPORT_SYMBOL(kernel_accept);
ac5a488e 3504
8a3c245c
PT
3505/**
3506 * kernel_connect - connect a socket (kernel space)
3507 * @sock: socket
3508 * @addr: address
3509 * @addrlen: address length
3510 * @flags: flags (O_NONBLOCK, ...)
3511 *
3512 * For datagram sockets, @addr is the addres to which datagrams are sent
3513 * by default, and the only address from which datagrams are received.
3514 * For stream sockets, attempts to connect to @addr.
3515 * Returns 0 or an error code.
3516 */
3517
ac5a488e 3518int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3519 int flags)
ac5a488e
SS
3520{
3521 return sock->ops->connect(sock, addr, addrlen, flags);
3522}
c6d409cf 3523EXPORT_SYMBOL(kernel_connect);
ac5a488e 3524
8a3c245c
PT
3525/**
3526 * kernel_getsockname - get the address which the socket is bound (kernel space)
3527 * @sock: socket
3528 * @addr: address holder
3529 *
3530 * Fills the @addr pointer with the address which the socket is bound.
3531 * Returns 0 or an error code.
3532 */
3533
9b2c45d4 3534int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3535{
9b2c45d4 3536 return sock->ops->getname(sock, addr, 0);
ac5a488e 3537}
c6d409cf 3538EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3539
8a3c245c
PT
3540/**
3541 * kernel_peername - get the address which the socket is connected (kernel space)
3542 * @sock: socket
3543 * @addr: address holder
3544 *
3545 * Fills the @addr pointer with the address which the socket is connected.
3546 * Returns 0 or an error code.
3547 */
3548
9b2c45d4 3549int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3550{
9b2c45d4 3551 return sock->ops->getname(sock, addr, 1);
ac5a488e 3552}
c6d409cf 3553EXPORT_SYMBOL(kernel_getpeername);
ac5a488e 3554
8a3c245c
PT
3555/**
3556 * kernel_getsockopt - get a socket option (kernel space)
3557 * @sock: socket
3558 * @level: API level (SOL_SOCKET, ...)
3559 * @optname: option tag
3560 * @optval: option value
3561 * @optlen: option length
3562 *
3563 * Assigns the option length to @optlen.
3564 * Returns 0 or an error.
3565 */
3566
ac5a488e
SS
3567int kernel_getsockopt(struct socket *sock, int level, int optname,
3568 char *optval, int *optlen)
3569{
3570 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3571 char __user *uoptval;
3572 int __user *uoptlen;
ac5a488e
SS
3573 int err;
3574
fb8621bb
NK
3575 uoptval = (char __user __force *) optval;
3576 uoptlen = (int __user __force *) optlen;
3577
ac5a488e
SS
3578 set_fs(KERNEL_DS);
3579 if (level == SOL_SOCKET)
fb8621bb 3580 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3581 else
fb8621bb
NK
3582 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3583 uoptlen);
ac5a488e
SS
3584 set_fs(oldfs);
3585 return err;
3586}
c6d409cf 3587EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e 3588
8a3c245c
PT
3589/**
3590 * kernel_setsockopt - set a socket option (kernel space)
3591 * @sock: socket
3592 * @level: API level (SOL_SOCKET, ...)
3593 * @optname: option tag
3594 * @optval: option value
3595 * @optlen: option length
3596 *
3597 * Returns 0 or an error.
3598 */
3599
ac5a488e 3600int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3601 char *optval, unsigned int optlen)
ac5a488e
SS
3602{
3603 mm_segment_t oldfs = get_fs();
fb8621bb 3604 char __user *uoptval;
ac5a488e
SS
3605 int err;
3606
fb8621bb
NK
3607 uoptval = (char __user __force *) optval;
3608
ac5a488e
SS
3609 set_fs(KERNEL_DS);
3610 if (level == SOL_SOCKET)
fb8621bb 3611 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3612 else
fb8621bb 3613 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3614 optlen);
3615 set_fs(oldfs);
3616 return err;
3617}
c6d409cf 3618EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e 3619
8a3c245c
PT
3620/**
3621 * kernel_sendpage - send a &page through a socket (kernel space)
3622 * @sock: socket
3623 * @page: page
3624 * @offset: page offset
3625 * @size: total size in bytes
3626 * @flags: flags (MSG_DONTWAIT, ...)
3627 *
3628 * Returns the total amount sent in bytes or an error.
3629 */
3630
ac5a488e
SS
3631int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3632 size_t size, int flags)
3633{
3634 if (sock->ops->sendpage)
3635 return sock->ops->sendpage(sock, page, offset, size, flags);
3636
3637 return sock_no_sendpage(sock, page, offset, size, flags);
3638}
c6d409cf 3639EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3640
8a3c245c
PT
3641/**
3642 * kernel_sendpage_locked - send a &page through the locked sock (kernel space)
3643 * @sk: sock
3644 * @page: page
3645 * @offset: page offset
3646 * @size: total size in bytes
3647 * @flags: flags (MSG_DONTWAIT, ...)
3648 *
3649 * Returns the total amount sent in bytes or an error.
3650 * Caller must hold @sk.
3651 */
3652
306b13eb
TH
3653int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3654 size_t size, int flags)
3655{
3656 struct socket *sock = sk->sk_socket;
3657
3658 if (sock->ops->sendpage_locked)
3659 return sock->ops->sendpage_locked(sk, page, offset, size,
3660 flags);
3661
3662 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3663}
3664EXPORT_SYMBOL(kernel_sendpage_locked);
3665
8a3c245c
PT
3666/**
3667 * kernel_shutdown - shut down part of a full-duplex connection (kernel space)
3668 * @sock: socket
3669 * @how: connection part
3670 *
3671 * Returns 0 or an error.
3672 */
3673
91cf45f0
TM
3674int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3675{
3676 return sock->ops->shutdown(sock, how);
3677}
91cf45f0 3678EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075 3679
8a3c245c
PT
3680/**
3681 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3682 * @sk: socket
3683 *
3684 * This routine returns the IP overhead imposed by a socket i.e.
3685 * the length of the underlying IP header, depending on whether
3686 * this is an IPv4 or IPv6 socket and the length from IP options turned
3687 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075 3688 */
8a3c245c 3689
113c3075
P
3690u32 kernel_sock_ip_overhead(struct sock *sk)
3691{
3692 struct inet_sock *inet;
3693 struct ip_options_rcu *opt;
3694 u32 overhead = 0;
113c3075
P
3695#if IS_ENABLED(CONFIG_IPV6)
3696 struct ipv6_pinfo *np;
3697 struct ipv6_txoptions *optv6 = NULL;
3698#endif /* IS_ENABLED(CONFIG_IPV6) */
3699
3700 if (!sk)
3701 return overhead;
3702
113c3075
P
3703 switch (sk->sk_family) {
3704 case AF_INET:
3705 inet = inet_sk(sk);
3706 overhead += sizeof(struct iphdr);
3707 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3708 sock_owned_by_user(sk));
113c3075
P
3709 if (opt)
3710 overhead += opt->opt.optlen;
3711 return overhead;
3712#if IS_ENABLED(CONFIG_IPV6)
3713 case AF_INET6:
3714 np = inet6_sk(sk);
3715 overhead += sizeof(struct ipv6hdr);
3716 if (np)
3717 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3718 sock_owned_by_user(sk));
113c3075
P
3719 if (optv6)
3720 overhead += (optv6->opt_flen + optv6->opt_nflen);
3721 return overhead;
3722#endif /* IS_ENABLED(CONFIG_IPV6) */
3723 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3724 return overhead;
3725 }
3726}
3727EXPORT_SYMBOL(kernel_sock_ip_overhead);