cpufreq: Fix kobject memleak
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
c8e8cd57 92#include <linux/nospec.h>
1da177e4 93
7c0f6ba6 94#include <linux/uaccess.h>
1da177e4
LT
95#include <asm/unistd.h>
96
97#include <net/compat.h>
87de87d5 98#include <net/wext.h>
f8451725 99#include <net/cls_cgroup.h>
1da177e4
LT
100
101#include <net/sock.h>
102#include <linux/netfilter.h>
103
6b96018b
AB
104#include <linux/if_tun.h>
105#include <linux/ipv6_route.h>
106#include <linux/route.h>
6b96018b 107#include <linux/sockios.h>
076bb0c8 108#include <net/busy_poll.h>
f24b9be5 109#include <linux/errqueue.h>
06021292 110
e0d1095a 111#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
112unsigned int sysctl_net_busy_read __read_mostly;
113unsigned int sysctl_net_busy_poll __read_mostly;
06021292 114#endif
6b96018b 115
8ae5e030
AV
116static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
117static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 118static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
119
120static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
121static __poll_t sock_poll(struct file *file,
122 struct poll_table_struct *wait);
89bddce5 123static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
124#ifdef CONFIG_COMPAT
125static long compat_sock_ioctl(struct file *file,
89bddce5 126 unsigned int cmd, unsigned long arg);
89bbfc95 127#endif
1da177e4 128static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
129static ssize_t sock_sendpage(struct file *file, struct page *page,
130 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 132 struct pipe_inode_info *pipe, size_t len,
9c55e01c 133 unsigned int flags);
1da177e4 134
1da177e4
LT
135/*
136 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
137 * in the operation structures but are done directly via the socketcall() multiplexor.
138 */
139
da7071d7 140static const struct file_operations socket_file_ops = {
1da177e4
LT
141 .owner = THIS_MODULE,
142 .llseek = no_llseek,
8ae5e030
AV
143 .read_iter = sock_read_iter,
144 .write_iter = sock_write_iter,
1da177e4
LT
145 .poll = sock_poll,
146 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
147#ifdef CONFIG_COMPAT
148 .compat_ioctl = compat_sock_ioctl,
149#endif
1da177e4 150 .mmap = sock_mmap,
1da177e4
LT
151 .release = sock_close,
152 .fasync = sock_fasync,
5274f052
JA
153 .sendpage = sock_sendpage,
154 .splice_write = generic_splice_sendpage,
9c55e01c 155 .splice_read = sock_splice_read,
1da177e4
LT
156};
157
158/*
159 * The protocol list. Each protocol is registered in here.
160 */
161
1da177e4 162static DEFINE_SPINLOCK(net_family_lock);
190683a9 163static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 164
1da177e4 165/*
89bddce5
SH
166 * Support routines.
167 * Move socket addresses back and forth across the kernel/user
168 * divide and look after the messy bits.
1da177e4
LT
169 */
170
1da177e4
LT
171/**
172 * move_addr_to_kernel - copy a socket address into kernel space
173 * @uaddr: Address in user space
174 * @kaddr: Address in kernel space
175 * @ulen: Length in user space
176 *
177 * The address is copied into kernel space. If the provided address is
178 * too long an error code of -EINVAL is returned. If the copy gives
179 * invalid addresses -EFAULT is returned. On a success 0 is returned.
180 */
181
43db362d 182int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 183{
230b1839 184 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 185 return -EINVAL;
89bddce5 186 if (ulen == 0)
1da177e4 187 return 0;
89bddce5 188 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 189 return -EFAULT;
3ec3b2fb 190 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
191}
192
193/**
194 * move_addr_to_user - copy an address to user space
195 * @kaddr: kernel space address
196 * @klen: length of address in kernel
197 * @uaddr: user space address
198 * @ulen: pointer to user length field
199 *
200 * The value pointed to by ulen on entry is the buffer length available.
201 * This is overwritten with the buffer space used. -EINVAL is returned
202 * if an overlong buffer is specified or a negative buffer size. -EFAULT
203 * is returned if either the buffer or the length field are not
204 * accessible.
205 * After copying the data up to the limit the user specifies, the true
206 * length of the data is written over the length limit the user
207 * specified. Zero is returned for a success.
208 */
89bddce5 209
43db362d 210static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 211 void __user *uaddr, int __user *ulen)
1da177e4
LT
212{
213 int err;
214 int len;
215
68c6beb3 216 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
217 err = get_user(len, ulen);
218 if (err)
1da177e4 219 return err;
89bddce5
SH
220 if (len > klen)
221 len = klen;
68c6beb3 222 if (len < 0)
1da177e4 223 return -EINVAL;
89bddce5 224 if (len) {
d6fe3945
SG
225 if (audit_sockaddr(klen, kaddr))
226 return -ENOMEM;
89bddce5 227 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
228 return -EFAULT;
229 }
230 /*
89bddce5
SH
231 * "fromlen shall refer to the value before truncation.."
232 * 1003.1g
1da177e4
LT
233 */
234 return __put_user(klen, ulen);
235}
236
08009a76 237static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
238
239static struct inode *sock_alloc_inode(struct super_block *sb)
240{
241 struct socket_alloc *ei;
eaefd110 242 struct socket_wq *wq;
89bddce5 243
e94b1766 244 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
245 if (!ei)
246 return NULL;
eaefd110
ED
247 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
248 if (!wq) {
43815482
ED
249 kmem_cache_free(sock_inode_cachep, ei);
250 return NULL;
251 }
eaefd110
ED
252 init_waitqueue_head(&wq->wait);
253 wq->fasync_list = NULL;
574aab1e 254 wq->flags = 0;
e6476c21 255 ei->socket.wq = wq;
89bddce5 256
1da177e4
LT
257 ei->socket.state = SS_UNCONNECTED;
258 ei->socket.flags = 0;
259 ei->socket.ops = NULL;
260 ei->socket.sk = NULL;
261 ei->socket.file = NULL;
1da177e4
LT
262
263 return &ei->vfs_inode;
264}
265
266static void sock_destroy_inode(struct inode *inode)
267{
43815482
ED
268 struct socket_alloc *ei;
269
270 ei = container_of(inode, struct socket_alloc, vfs_inode);
e6476c21 271 kfree_rcu(ei->socket.wq, rcu);
43815482 272 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
273}
274
51cc5068 275static void init_once(void *foo)
1da177e4 276{
89bddce5 277 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 278
a35afb83 279 inode_init_once(&ei->vfs_inode);
1da177e4 280}
89bddce5 281
1e911632 282static void init_inodecache(void)
1da177e4
LT
283{
284 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
285 sizeof(struct socket_alloc),
286 0,
287 (SLAB_HWCACHE_ALIGN |
288 SLAB_RECLAIM_ACCOUNT |
5d097056 289 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 290 init_once);
1e911632 291 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
292}
293
b87221de 294static const struct super_operations sockfs_ops = {
c6d409cf
ED
295 .alloc_inode = sock_alloc_inode,
296 .destroy_inode = sock_destroy_inode,
297 .statfs = simple_statfs,
1da177e4
LT
298};
299
c23fbb6b
ED
300/*
301 * sockfs_dname() is called from d_path().
302 */
303static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
304{
305 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 306 d_inode(dentry)->i_ino);
c23fbb6b
ED
307}
308
3ba13d17 309static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 310 .d_dname = sockfs_dname,
1da177e4
LT
311};
312
bba0bd31
AG
313static int sockfs_xattr_get(const struct xattr_handler *handler,
314 struct dentry *dentry, struct inode *inode,
315 const char *suffix, void *value, size_t size)
316{
317 if (value) {
318 if (dentry->d_name.len + 1 > size)
319 return -ERANGE;
320 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
321 }
322 return dentry->d_name.len + 1;
323}
324
325#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
326#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
327#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
328
329static const struct xattr_handler sockfs_xattr_handler = {
330 .name = XATTR_NAME_SOCKPROTONAME,
331 .get = sockfs_xattr_get,
332};
333
4a590153
AG
334static int sockfs_security_xattr_set(const struct xattr_handler *handler,
335 struct dentry *dentry, struct inode *inode,
336 const char *suffix, const void *value,
337 size_t size, int flags)
338{
339 /* Handled by LSM. */
340 return -EAGAIN;
341}
342
343static const struct xattr_handler sockfs_security_xattr_handler = {
344 .prefix = XATTR_SECURITY_PREFIX,
345 .set = sockfs_security_xattr_set,
346};
347
bba0bd31
AG
348static const struct xattr_handler *sockfs_xattr_handlers[] = {
349 &sockfs_xattr_handler,
4a590153 350 &sockfs_security_xattr_handler,
bba0bd31
AG
351 NULL
352};
353
c74a1cbb
AV
354static struct dentry *sockfs_mount(struct file_system_type *fs_type,
355 int flags, const char *dev_name, void *data)
356{
bba0bd31
AG
357 return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
358 sockfs_xattr_handlers,
359 &sockfs_dentry_operations, SOCKFS_MAGIC);
c74a1cbb
AV
360}
361
362static struct vfsmount *sock_mnt __read_mostly;
363
364static struct file_system_type sock_fs_type = {
365 .name = "sockfs",
366 .mount = sockfs_mount,
367 .kill_sb = kill_anon_super,
368};
369
1da177e4
LT
370/*
371 * Obtains the first available file descriptor and sets it up for use.
372 *
39d8c1b6
DM
373 * These functions create file structures and maps them to fd space
374 * of the current process. On success it returns file descriptor
1da177e4
LT
375 * and file struct implicitly stored in sock->file.
376 * Note that another thread may close file descriptor before we return
377 * from this function. We use the fact that now we do not refer
378 * to socket after mapping. If one day we will need it, this
379 * function will increment ref. count on file by 1.
380 *
381 * In any case returned fd MAY BE not valid!
382 * This race condition is unavoidable
383 * with shared fd spaces, we cannot solve it inside kernel,
384 * but we take care of internal coherence yet.
385 */
386
8a3c245c
PT
387/**
388 * sock_alloc_file - Bind a &socket to a &file
389 * @sock: socket
390 * @flags: file status flags
391 * @dname: protocol name
392 *
393 * Returns the &file bound with @sock, implicitly storing it
394 * in sock->file. If dname is %NULL, sets to "".
395 * On failure the return is a ERR pointer (see linux/err.h).
396 * This function uses GFP_KERNEL internally.
397 */
398
aab174f0 399struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 400{
7cbe66b6 401 struct file *file;
1da177e4 402
d93aa9d8
AV
403 if (!dname)
404 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 405
d93aa9d8
AV
406 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
407 O_RDWR | (flags & O_NONBLOCK),
408 &socket_file_ops);
b5ffe634 409 if (IS_ERR(file)) {
8e1611e2 410 sock_release(sock);
39b65252 411 return file;
cc3808f8
AV
412 }
413
414 sock->file = file;
39d8c1b6 415 file->private_data = sock;
28407630 416 return file;
39d8c1b6 417}
56b31d1c 418EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 419
56b31d1c 420static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
421{
422 struct file *newfile;
28407630 423 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
424 if (unlikely(fd < 0)) {
425 sock_release(sock);
28407630 426 return fd;
ce4bb04c 427 }
39d8c1b6 428
aab174f0 429 newfile = sock_alloc_file(sock, flags, NULL);
28407630 430 if (likely(!IS_ERR(newfile))) {
39d8c1b6 431 fd_install(fd, newfile);
28407630
AV
432 return fd;
433 }
7cbe66b6 434
28407630
AV
435 put_unused_fd(fd);
436 return PTR_ERR(newfile);
1da177e4
LT
437}
438
8a3c245c
PT
439/**
440 * sock_from_file - Return the &socket bounded to @file.
441 * @file: file
442 * @err: pointer to an error code return
443 *
444 * On failure returns %NULL and assigns -ENOTSOCK to @err.
445 */
446
406a3c63 447struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 448{
6cb153ca
BL
449 if (file->f_op == &socket_file_ops)
450 return file->private_data; /* set in sock_map_fd */
451
23bb80d2
ED
452 *err = -ENOTSOCK;
453 return NULL;
6cb153ca 454}
406a3c63 455EXPORT_SYMBOL(sock_from_file);
6cb153ca 456
1da177e4 457/**
c6d409cf 458 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
459 * @fd: file handle
460 * @err: pointer to an error code return
461 *
462 * The file handle passed in is locked and the socket it is bound
241c4667 463 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
464 * with a negative errno code and NULL is returned. The function checks
465 * for both invalid handles and passing a handle which is not a socket.
466 *
467 * On a success the socket object pointer is returned.
468 */
469
470struct socket *sockfd_lookup(int fd, int *err)
471{
472 struct file *file;
1da177e4
LT
473 struct socket *sock;
474
89bddce5
SH
475 file = fget(fd);
476 if (!file) {
1da177e4
LT
477 *err = -EBADF;
478 return NULL;
479 }
89bddce5 480
6cb153ca
BL
481 sock = sock_from_file(file, err);
482 if (!sock)
1da177e4 483 fput(file);
6cb153ca
BL
484 return sock;
485}
c6d409cf 486EXPORT_SYMBOL(sockfd_lookup);
1da177e4 487
6cb153ca
BL
488static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
489{
00e188ef 490 struct fd f = fdget(fd);
6cb153ca
BL
491 struct socket *sock;
492
3672558c 493 *err = -EBADF;
00e188ef
AV
494 if (f.file) {
495 sock = sock_from_file(f.file, err);
496 if (likely(sock)) {
497 *fput_needed = f.flags;
6cb153ca 498 return sock;
00e188ef
AV
499 }
500 fdput(f);
1da177e4 501 }
6cb153ca 502 return NULL;
1da177e4
LT
503}
504
600e1779
MY
505static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
506 size_t size)
507{
508 ssize_t len;
509 ssize_t used = 0;
510
c5ef6035 511 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
512 if (len < 0)
513 return len;
514 used += len;
515 if (buffer) {
516 if (size < used)
517 return -ERANGE;
518 buffer += len;
519 }
520
521 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
522 used += len;
523 if (buffer) {
524 if (size < used)
525 return -ERANGE;
526 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
527 buffer += len;
528 }
529
530 return used;
531}
532
dc647ec8 533static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
86741ec2
LC
534{
535 int err = simple_setattr(dentry, iattr);
536
e1a3a60a 537 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
538 struct socket *sock = SOCKET_I(d_inode(dentry));
539
6d8c50dc
CW
540 if (sock->sk)
541 sock->sk->sk_uid = iattr->ia_uid;
542 else
543 err = -ENOENT;
86741ec2
LC
544 }
545
546 return err;
547}
548
600e1779 549static const struct inode_operations sockfs_inode_ops = {
600e1779 550 .listxattr = sockfs_listxattr,
86741ec2 551 .setattr = sockfs_setattr,
600e1779
MY
552};
553
1da177e4 554/**
8a3c245c 555 * sock_alloc - allocate a socket
89bddce5 556 *
1da177e4
LT
557 * Allocate a new inode and socket object. The two are bound together
558 * and initialised. The socket is then returned. If we are out of inodes
8a3c245c 559 * NULL is returned. This functions uses GFP_KERNEL internally.
1da177e4
LT
560 */
561
f4a00aac 562struct socket *sock_alloc(void)
1da177e4 563{
89bddce5
SH
564 struct inode *inode;
565 struct socket *sock;
1da177e4 566
a209dfc7 567 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
568 if (!inode)
569 return NULL;
570
571 sock = SOCKET_I(inode);
572
85fe4025 573 inode->i_ino = get_next_ino();
89bddce5 574 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
575 inode->i_uid = current_fsuid();
576 inode->i_gid = current_fsgid();
600e1779 577 inode->i_op = &sockfs_inode_ops;
1da177e4 578
1da177e4
LT
579 return sock;
580}
f4a00aac 581EXPORT_SYMBOL(sock_alloc);
1da177e4 582
1da177e4 583/**
8a3c245c 584 * sock_release - close a socket
1da177e4
LT
585 * @sock: socket to close
586 *
587 * The socket is released from the protocol stack if it has a release
588 * callback, and the inode is then released if the socket is bound to
89bddce5 589 * an inode not a file.
1da177e4 590 */
89bddce5 591
6d8c50dc 592static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
593{
594 if (sock->ops) {
595 struct module *owner = sock->ops->owner;
596
6d8c50dc
CW
597 if (inode)
598 inode_lock(inode);
1da177e4 599 sock->ops->release(sock);
ff7b11aa 600 sock->sk = NULL;
6d8c50dc
CW
601 if (inode)
602 inode_unlock(inode);
1da177e4
LT
603 sock->ops = NULL;
604 module_put(owner);
605 }
606
e6476c21 607 if (sock->wq->fasync_list)
3410f22e 608 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 609
1da177e4
LT
610 if (!sock->file) {
611 iput(SOCK_INODE(sock));
612 return;
613 }
89bddce5 614 sock->file = NULL;
1da177e4 615}
6d8c50dc
CW
616
617void sock_release(struct socket *sock)
618{
619 __sock_release(sock, NULL);
620}
c6d409cf 621EXPORT_SYMBOL(sock_release);
1da177e4 622
c14ac945 623void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 624{
140c55d4
ED
625 u8 flags = *tx_flags;
626
c14ac945 627 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
628 flags |= SKBTX_HW_TSTAMP;
629
c14ac945 630 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
631 flags |= SKBTX_SW_TSTAMP;
632
c14ac945 633 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
634 flags |= SKBTX_SCHED_TSTAMP;
635
140c55d4 636 *tx_flags = flags;
20d49473 637}
67cc0d40 638EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 639
8a3c245c
PT
640/**
641 * sock_sendmsg - send a message through @sock
642 * @sock: socket
643 * @msg: message to send
644 *
645 * Sends @msg through @sock, passing through LSM.
646 * Returns the number of bytes sent, or an error code.
647 */
648
d8725c86 649static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 650{
01e97e65 651 int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
d8725c86
AV
652 BUG_ON(ret == -EIOCBQUEUED);
653 return ret;
1da177e4
LT
654}
655
d8725c86 656int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 657{
d8725c86 658 int err = security_socket_sendmsg(sock, msg,
01e97e65 659 msg_data_left(msg));
228e548e 660
d8725c86 661 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 662}
c6d409cf 663EXPORT_SYMBOL(sock_sendmsg);
1da177e4 664
8a3c245c
PT
665/**
666 * kernel_sendmsg - send a message through @sock (kernel-space)
667 * @sock: socket
668 * @msg: message header
669 * @vec: kernel vec
670 * @num: vec array length
671 * @size: total message data size
672 *
673 * Builds the message data with @vec and sends it through @sock.
674 * Returns the number of bytes sent, or an error code.
675 */
676
1da177e4
LT
677int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
678 struct kvec *vec, size_t num, size_t size)
679{
aa563d7b 680 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
d8725c86 681 return sock_sendmsg(sock, msg);
1da177e4 682}
c6d409cf 683EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 684
8a3c245c
PT
685/**
686 * kernel_sendmsg_locked - send a message through @sock (kernel-space)
687 * @sk: sock
688 * @msg: message header
689 * @vec: output s/g array
690 * @num: output s/g array length
691 * @size: total message data size
692 *
693 * Builds the message data with @vec and sends it through @sock.
694 * Returns the number of bytes sent, or an error code.
695 * Caller must hold @sk.
696 */
697
306b13eb
TH
698int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
699 struct kvec *vec, size_t num, size_t size)
700{
701 struct socket *sock = sk->sk_socket;
702
703 if (!sock->ops->sendmsg_locked)
db5980d8 704 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 705
aa563d7b 706 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
306b13eb
TH
707
708 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
709}
710EXPORT_SYMBOL(kernel_sendmsg_locked);
711
8605330a
SHY
712static bool skb_is_err_queue(const struct sk_buff *skb)
713{
714 /* pkt_type of skbs enqueued on the error queue are set to
715 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
716 * in recvmsg, since skbs received on a local socket will never
717 * have a pkt_type of PACKET_OUTGOING.
718 */
719 return skb->pkt_type == PACKET_OUTGOING;
720}
721
b50a5c70
ML
722/* On transmit, software and hardware timestamps are returned independently.
723 * As the two skb clones share the hardware timestamp, which may be updated
724 * before the software timestamp is received, a hardware TX timestamp may be
725 * returned only if there is no software TX timestamp. Ignore false software
726 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 727 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
728 * hardware timestamp.
729 */
730static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
731{
732 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
733}
734
aad9c8c4
ML
735static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
736{
737 struct scm_ts_pktinfo ts_pktinfo;
738 struct net_device *orig_dev;
739
740 if (!skb_mac_header_was_set(skb))
741 return;
742
743 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
744
745 rcu_read_lock();
746 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
747 if (orig_dev)
748 ts_pktinfo.if_index = orig_dev->ifindex;
749 rcu_read_unlock();
750
751 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
752 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
753 sizeof(ts_pktinfo), &ts_pktinfo);
754}
755
92f37fd2
ED
756/*
757 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
758 */
759void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
760 struct sk_buff *skb)
761{
20d49473 762 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 763 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e
DD
764 struct scm_timestamping_internal tss;
765
b50a5c70 766 int empty = 1, false_tstamp = 0;
20d49473
PO
767 struct skb_shared_hwtstamps *shhwtstamps =
768 skb_hwtstamps(skb);
769
770 /* Race occurred between timestamp enabling and packet
771 receiving. Fill in the current time for now. */
b50a5c70 772 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 773 __net_timestamp(skb);
b50a5c70
ML
774 false_tstamp = 1;
775 }
20d49473
PO
776
777 if (need_software_tstamp) {
778 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
779 if (new_tstamp) {
780 struct __kernel_sock_timeval tv;
781
782 skb_get_new_timestamp(skb, &tv);
783 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
784 sizeof(tv), &tv);
785 } else {
786 struct __kernel_old_timeval tv;
787
788 skb_get_timestamp(skb, &tv);
789 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
790 sizeof(tv), &tv);
791 }
20d49473 792 } else {
887feae3
DD
793 if (new_tstamp) {
794 struct __kernel_timespec ts;
795
796 skb_get_new_timestampns(skb, &ts);
797 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
798 sizeof(ts), &ts);
799 } else {
800 struct timespec ts;
801
802 skb_get_timestampns(skb, &ts);
803 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
804 sizeof(ts), &ts);
805 }
20d49473
PO
806 }
807 }
808
f24b9be5 809 memset(&tss, 0, sizeof(tss));
c199105d 810 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 811 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 812 empty = 0;
4d276eb6 813 if (shhwtstamps &&
b9f40e21 814 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
b50a5c70 815 !skb_is_swtx_tstamp(skb, false_tstamp) &&
9718475e 816 ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
4d276eb6 817 empty = 0;
aad9c8c4
ML
818 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
819 !skb_is_err_queue(skb))
820 put_ts_pktinfo(msg, skb);
821 }
1c885808 822 if (!empty) {
9718475e
DD
823 if (sock_flag(sk, SOCK_TSTAMP_NEW))
824 put_cmsg_scm_timestamping64(msg, &tss);
825 else
826 put_cmsg_scm_timestamping(msg, &tss);
1c885808 827
8605330a 828 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 829 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
830 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
831 skb->len, skb->data);
832 }
92f37fd2 833}
7c81fd8b
ACM
834EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
835
6e3e939f
JB
836void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
837 struct sk_buff *skb)
838{
839 int ack;
840
841 if (!sock_flag(sk, SOCK_WIFI_STATUS))
842 return;
843 if (!skb->wifi_acked_valid)
844 return;
845
846 ack = skb->wifi_acked;
847
848 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
849}
850EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
851
11165f14 852static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
853 struct sk_buff *skb)
3b885787 854{
744d5a3e 855 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 856 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 857 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
858}
859
767dd033 860void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
861 struct sk_buff *skb)
862{
863 sock_recv_timestamp(msg, sk, skb);
864 sock_recv_drops(msg, sk, skb);
865}
767dd033 866EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 867
8a3c245c
PT
868/**
869 * sock_recvmsg - receive a message from @sock
870 * @sock: socket
871 * @msg: message to receive
872 * @flags: message flags
873 *
874 * Receives @msg from @sock, passing through LSM. Returns the total number
875 * of bytes received, or an error.
876 */
877
1b784140 878static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 879 int flags)
1da177e4 880{
2da62906 881 return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags);
1da177e4
LT
882}
883
2da62906 884int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 885{
2da62906 886 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 887
2da62906 888 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 889}
c6d409cf 890EXPORT_SYMBOL(sock_recvmsg);
1da177e4 891
c1249c0a 892/**
8a3c245c
PT
893 * kernel_recvmsg - Receive a message from a socket (kernel space)
894 * @sock: The socket to receive the message from
895 * @msg: Received message
896 * @vec: Input s/g array for message data
897 * @num: Size of input s/g array
898 * @size: Number of bytes to read
899 * @flags: Message flags (MSG_DONTWAIT, etc...)
c1249c0a 900 *
8a3c245c
PT
901 * On return the msg structure contains the scatter/gather array passed in the
902 * vec argument. The array is modified so that it consists of the unfilled
903 * portion of the original array.
c1249c0a 904 *
8a3c245c 905 * The returned value is the total number of bytes received, or an error.
c1249c0a 906 */
8a3c245c 907
89bddce5
SH
908int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
909 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
910{
911 mm_segment_t oldfs = get_fs();
912 int result;
913
aa563d7b 914 iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
1da177e4 915 set_fs(KERNEL_DS);
2da62906 916 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
917 set_fs(oldfs);
918 return result;
919}
c6d409cf 920EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 921
ce1d4d3e
CH
922static ssize_t sock_sendpage(struct file *file, struct page *page,
923 int offset, size_t size, loff_t *ppos, int more)
1da177e4 924{
1da177e4
LT
925 struct socket *sock;
926 int flags;
927
ce1d4d3e
CH
928 sock = file->private_data;
929
35f9c09f
ED
930 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
931 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
932 flags |= more;
ce1d4d3e 933
e6949583 934 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 935}
1da177e4 936
9c55e01c 937static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 938 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
939 unsigned int flags)
940{
941 struct socket *sock = file->private_data;
942
997b37da 943 if (unlikely(!sock->ops->splice_read))
95506588 944 return generic_file_splice_read(file, ppos, pipe, len, flags);
997b37da 945
9c55e01c
JA
946 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
947}
948
8ae5e030 949static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 950{
6d652330
AV
951 struct file *file = iocb->ki_filp;
952 struct socket *sock = file->private_data;
0345f931 953 struct msghdr msg = {.msg_iter = *to,
954 .msg_iocb = iocb};
8ae5e030 955 ssize_t res;
ce1d4d3e 956
8ae5e030
AV
957 if (file->f_flags & O_NONBLOCK)
958 msg.msg_flags = MSG_DONTWAIT;
959
960 if (iocb->ki_pos != 0)
1da177e4 961 return -ESPIPE;
027445c3 962
66ee59af 963 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
964 return 0;
965
2da62906 966 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
967 *to = msg.msg_iter;
968 return res;
1da177e4
LT
969}
970
8ae5e030 971static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 972{
6d652330
AV
973 struct file *file = iocb->ki_filp;
974 struct socket *sock = file->private_data;
0345f931 975 struct msghdr msg = {.msg_iter = *from,
976 .msg_iocb = iocb};
8ae5e030 977 ssize_t res;
1da177e4 978
8ae5e030 979 if (iocb->ki_pos != 0)
ce1d4d3e 980 return -ESPIPE;
027445c3 981
8ae5e030
AV
982 if (file->f_flags & O_NONBLOCK)
983 msg.msg_flags = MSG_DONTWAIT;
984
6d652330
AV
985 if (sock->type == SOCK_SEQPACKET)
986 msg.msg_flags |= MSG_EOR;
987
d8725c86 988 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
989 *from = msg.msg_iter;
990 return res;
1da177e4
LT
991}
992
1da177e4
LT
993/*
994 * Atomic setting of ioctl hooks to avoid race
995 * with module unload.
996 */
997
4a3e2f71 998static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 999static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 1000
881d966b 1001void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 1002{
4a3e2f71 1003 mutex_lock(&br_ioctl_mutex);
1da177e4 1004 br_ioctl_hook = hook;
4a3e2f71 1005 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1006}
1007EXPORT_SYMBOL(brioctl_set);
1008
4a3e2f71 1009static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1010static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1011
881d966b 1012void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1013{
4a3e2f71 1014 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1015 vlan_ioctl_hook = hook;
4a3e2f71 1016 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1017}
1018EXPORT_SYMBOL(vlan_ioctl_set);
1019
4a3e2f71 1020static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 1021static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 1022
89bddce5 1023void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 1024{
4a3e2f71 1025 mutex_lock(&dlci_ioctl_mutex);
1da177e4 1026 dlci_ioctl_hook = hook;
4a3e2f71 1027 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1028}
1029EXPORT_SYMBOL(dlci_ioctl_set);
1030
6b96018b 1031static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 1032 unsigned int cmd, unsigned long arg)
6b96018b
AB
1033{
1034 int err;
1035 void __user *argp = (void __user *)arg;
1036
1037 err = sock->ops->ioctl(sock, cmd, arg);
1038
1039 /*
1040 * If this ioctl is unknown try to hand it down
1041 * to the NIC driver.
1042 */
36fd633e
AV
1043 if (err != -ENOIOCTLCMD)
1044 return err;
6b96018b 1045
36fd633e
AV
1046 if (cmd == SIOCGIFCONF) {
1047 struct ifconf ifc;
1048 if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
1049 return -EFAULT;
1050 rtnl_lock();
1051 err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
1052 rtnl_unlock();
1053 if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
1054 err = -EFAULT;
44c02a2c
AV
1055 } else {
1056 struct ifreq ifr;
1057 bool need_copyout;
63ff03ab 1058 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
44c02a2c
AV
1059 return -EFAULT;
1060 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1061 if (!err && need_copyout)
63ff03ab 1062 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
44c02a2c 1063 return -EFAULT;
36fd633e 1064 }
6b96018b
AB
1065 return err;
1066}
1067
1da177e4
LT
1068/*
1069 * With an ioctl, arg may well be a user mode pointer, but we don't know
1070 * what to do with it - that's up to the protocol still.
1071 */
1072
8a3c245c
PT
1073/**
1074 * get_net_ns - increment the refcount of the network namespace
1075 * @ns: common namespace (net)
1076 *
1077 * Returns the net's common namespace.
1078 */
1079
d8d211a2 1080struct ns_common *get_net_ns(struct ns_common *ns)
c62cce2c
AV
1081{
1082 return &get_net(container_of(ns, struct net, ns))->ns;
1083}
d8d211a2 1084EXPORT_SYMBOL_GPL(get_net_ns);
c62cce2c 1085
1da177e4
LT
1086static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1087{
1088 struct socket *sock;
881d966b 1089 struct sock *sk;
1da177e4
LT
1090 void __user *argp = (void __user *)arg;
1091 int pid, err;
881d966b 1092 struct net *net;
1da177e4 1093
b69aee04 1094 sock = file->private_data;
881d966b 1095 sk = sock->sk;
3b1e0a65 1096 net = sock_net(sk);
44c02a2c
AV
1097 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1098 struct ifreq ifr;
1099 bool need_copyout;
1100 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
1101 return -EFAULT;
1102 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1103 if (!err && need_copyout)
1104 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
1105 return -EFAULT;
1da177e4 1106 } else
3d23e349 1107#ifdef CONFIG_WEXT_CORE
1da177e4 1108 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1109 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1110 } else
3d23e349 1111#endif
89bddce5 1112 switch (cmd) {
1da177e4
LT
1113 case FIOSETOWN:
1114 case SIOCSPGRP:
1115 err = -EFAULT;
1116 if (get_user(pid, (int __user *)argp))
1117 break;
393cc3f5 1118 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1119 break;
1120 case FIOGETOWN:
1121 case SIOCGPGRP:
609d7fa9 1122 err = put_user(f_getown(sock->file),
89bddce5 1123 (int __user *)argp);
1da177e4
LT
1124 break;
1125 case SIOCGIFBR:
1126 case SIOCSIFBR:
1127 case SIOCBRADDBR:
1128 case SIOCBRDELBR:
1129 err = -ENOPKG;
1130 if (!br_ioctl_hook)
1131 request_module("bridge");
1132
4a3e2f71 1133 mutex_lock(&br_ioctl_mutex);
89bddce5 1134 if (br_ioctl_hook)
881d966b 1135 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1136 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1137 break;
1138 case SIOCGIFVLAN:
1139 case SIOCSIFVLAN:
1140 err = -ENOPKG;
1141 if (!vlan_ioctl_hook)
1142 request_module("8021q");
1143
4a3e2f71 1144 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1145 if (vlan_ioctl_hook)
881d966b 1146 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1147 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1148 break;
1da177e4
LT
1149 case SIOCADDDLCI:
1150 case SIOCDELDLCI:
1151 err = -ENOPKG;
1152 if (!dlci_ioctl_hook)
1153 request_module("dlci");
1154
7512cbf6
PE
1155 mutex_lock(&dlci_ioctl_mutex);
1156 if (dlci_ioctl_hook)
1da177e4 1157 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1158 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 1159 break;
c62cce2c
AV
1160 case SIOCGSKNS:
1161 err = -EPERM;
1162 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1163 break;
1164
1165 err = open_related_ns(&net->ns, get_net_ns);
1166 break;
1da177e4 1167 default:
63ff03ab 1168 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1169 break;
89bddce5 1170 }
1da177e4
LT
1171 return err;
1172}
1173
8a3c245c
PT
1174/**
1175 * sock_create_lite - creates a socket
1176 * @family: protocol family (AF_INET, ...)
1177 * @type: communication type (SOCK_STREAM, ...)
1178 * @protocol: protocol (0, ...)
1179 * @res: new socket
1180 *
1181 * Creates a new socket and assigns it to @res, passing through LSM.
1182 * The new socket initialization is not complete, see kernel_accept().
1183 * Returns 0 or an error. On failure @res is set to %NULL.
1184 * This function internally uses GFP_KERNEL.
1185 */
1186
1da177e4
LT
1187int sock_create_lite(int family, int type, int protocol, struct socket **res)
1188{
1189 int err;
1190 struct socket *sock = NULL;
89bddce5 1191
1da177e4
LT
1192 err = security_socket_create(family, type, protocol, 1);
1193 if (err)
1194 goto out;
1195
1196 sock = sock_alloc();
1197 if (!sock) {
1198 err = -ENOMEM;
1199 goto out;
1200 }
1201
1da177e4 1202 sock->type = type;
7420ed23
VY
1203 err = security_socket_post_create(sock, family, type, protocol, 1);
1204 if (err)
1205 goto out_release;
1206
1da177e4
LT
1207out:
1208 *res = sock;
1209 return err;
7420ed23
VY
1210out_release:
1211 sock_release(sock);
1212 sock = NULL;
1213 goto out;
1da177e4 1214}
c6d409cf 1215EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1216
1217/* No kernel lock held - perfect */
ade994f4 1218static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1219{
3cafb376 1220 struct socket *sock = file->private_data;
a331de3b 1221 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1222
e88958e6
CH
1223 if (!sock->ops->poll)
1224 return 0;
f641f13b 1225
a331de3b
CH
1226 if (sk_can_busy_loop(sock->sk)) {
1227 /* poll once if requested by the syscall */
1228 if (events & POLL_BUSY_LOOP)
1229 sk_busy_loop(sock->sk, 1);
1230
1231 /* if this socket can poll_ll, tell the system call */
1232 flag = POLL_BUSY_LOOP;
1233 }
1234
1235 return sock->ops->poll(file, sock, wait) | flag;
1da177e4
LT
1236}
1237
89bddce5 1238static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1239{
b69aee04 1240 struct socket *sock = file->private_data;
1da177e4
LT
1241
1242 return sock->ops->mmap(file, sock, vma);
1243}
1244
20380731 1245static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1246{
6d8c50dc 1247 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1248 return 0;
1249}
1250
1251/*
1252 * Update the socket async list
1253 *
1254 * Fasync_list locking strategy.
1255 *
1256 * 1. fasync_list is modified only under process context socket lock
1257 * i.e. under semaphore.
1258 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1259 * or under socket lock
1da177e4
LT
1260 */
1261
1262static int sock_fasync(int fd, struct file *filp, int on)
1263{
989a2979
ED
1264 struct socket *sock = filp->private_data;
1265 struct sock *sk = sock->sk;
eaefd110 1266 struct socket_wq *wq;
1da177e4 1267
989a2979 1268 if (sk == NULL)
1da177e4 1269 return -EINVAL;
1da177e4
LT
1270
1271 lock_sock(sk);
e6476c21 1272 wq = sock->wq;
eaefd110 1273 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1274
eaefd110 1275 if (!wq->fasync_list)
989a2979
ED
1276 sock_reset_flag(sk, SOCK_FASYNC);
1277 else
bcdce719 1278 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1279
989a2979 1280 release_sock(sk);
1da177e4
LT
1281 return 0;
1282}
1283
ceb5d58b 1284/* This function may be called only under rcu_lock */
1da177e4 1285
ceb5d58b 1286int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1287{
ceb5d58b 1288 if (!wq || !wq->fasync_list)
1da177e4 1289 return -1;
ceb5d58b 1290
89bddce5 1291 switch (how) {
8d8ad9d7 1292 case SOCK_WAKE_WAITD:
ceb5d58b 1293 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1294 break;
1295 goto call_kill;
8d8ad9d7 1296 case SOCK_WAKE_SPACE:
ceb5d58b 1297 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1298 break;
1299 /* fall through */
8d8ad9d7 1300 case SOCK_WAKE_IO:
89bddce5 1301call_kill:
43815482 1302 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1303 break;
8d8ad9d7 1304 case SOCK_WAKE_URG:
43815482 1305 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1306 }
ceb5d58b 1307
1da177e4
LT
1308 return 0;
1309}
c6d409cf 1310EXPORT_SYMBOL(sock_wake_async);
1da177e4 1311
8a3c245c
PT
1312/**
1313 * __sock_create - creates a socket
1314 * @net: net namespace
1315 * @family: protocol family (AF_INET, ...)
1316 * @type: communication type (SOCK_STREAM, ...)
1317 * @protocol: protocol (0, ...)
1318 * @res: new socket
1319 * @kern: boolean for kernel space sockets
1320 *
1321 * Creates a new socket and assigns it to @res, passing through LSM.
1322 * Returns 0 or an error. On failure @res is set to %NULL. @kern must
1323 * be set to true if the socket resides in kernel space.
1324 * This function internally uses GFP_KERNEL.
1325 */
1326
721db93a 1327int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1328 struct socket **res, int kern)
1da177e4
LT
1329{
1330 int err;
1331 struct socket *sock;
55737fda 1332 const struct net_proto_family *pf;
1da177e4
LT
1333
1334 /*
89bddce5 1335 * Check protocol is in range
1da177e4
LT
1336 */
1337 if (family < 0 || family >= NPROTO)
1338 return -EAFNOSUPPORT;
1339 if (type < 0 || type >= SOCK_MAX)
1340 return -EINVAL;
1341
1342 /* Compatibility.
1343
1344 This uglymoron is moved from INET layer to here to avoid
1345 deadlock in module load.
1346 */
1347 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1348 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1349 current->comm);
1da177e4
LT
1350 family = PF_PACKET;
1351 }
1352
1353 err = security_socket_create(family, type, protocol, kern);
1354 if (err)
1355 return err;
89bddce5 1356
55737fda
SH
1357 /*
1358 * Allocate the socket and allow the family to set things up. if
1359 * the protocol is 0, the family is instructed to select an appropriate
1360 * default.
1361 */
1362 sock = sock_alloc();
1363 if (!sock) {
e87cc472 1364 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1365 return -ENFILE; /* Not exactly a match, but its the
1366 closest posix thing */
1367 }
1368
1369 sock->type = type;
1370
95a5afca 1371#ifdef CONFIG_MODULES
89bddce5
SH
1372 /* Attempt to load a protocol module if the find failed.
1373 *
1374 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1375 * requested real, full-featured networking support upon configuration.
1376 * Otherwise module support will break!
1377 */
190683a9 1378 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1379 request_module("net-pf-%d", family);
1da177e4
LT
1380#endif
1381
55737fda
SH
1382 rcu_read_lock();
1383 pf = rcu_dereference(net_families[family]);
1384 err = -EAFNOSUPPORT;
1385 if (!pf)
1386 goto out_release;
1da177e4
LT
1387
1388 /*
1389 * We will call the ->create function, that possibly is in a loadable
1390 * module, so we have to bump that loadable module refcnt first.
1391 */
55737fda 1392 if (!try_module_get(pf->owner))
1da177e4
LT
1393 goto out_release;
1394
55737fda
SH
1395 /* Now protected by module ref count */
1396 rcu_read_unlock();
1397
3f378b68 1398 err = pf->create(net, sock, protocol, kern);
55737fda 1399 if (err < 0)
1da177e4 1400 goto out_module_put;
a79af59e 1401
1da177e4
LT
1402 /*
1403 * Now to bump the refcnt of the [loadable] module that owns this
1404 * socket at sock_release time we decrement its refcnt.
1405 */
55737fda
SH
1406 if (!try_module_get(sock->ops->owner))
1407 goto out_module_busy;
1408
1da177e4
LT
1409 /*
1410 * Now that we're done with the ->create function, the [loadable]
1411 * module can have its refcnt decremented
1412 */
55737fda 1413 module_put(pf->owner);
7420ed23
VY
1414 err = security_socket_post_create(sock, family, type, protocol, kern);
1415 if (err)
3b185525 1416 goto out_sock_release;
55737fda 1417 *res = sock;
1da177e4 1418
55737fda
SH
1419 return 0;
1420
1421out_module_busy:
1422 err = -EAFNOSUPPORT;
1da177e4 1423out_module_put:
55737fda
SH
1424 sock->ops = NULL;
1425 module_put(pf->owner);
1426out_sock_release:
1da177e4 1427 sock_release(sock);
55737fda
SH
1428 return err;
1429
1430out_release:
1431 rcu_read_unlock();
1432 goto out_sock_release;
1da177e4 1433}
721db93a 1434EXPORT_SYMBOL(__sock_create);
1da177e4 1435
8a3c245c
PT
1436/**
1437 * sock_create - creates a socket
1438 * @family: protocol family (AF_INET, ...)
1439 * @type: communication type (SOCK_STREAM, ...)
1440 * @protocol: protocol (0, ...)
1441 * @res: new socket
1442 *
1443 * A wrapper around __sock_create().
1444 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1445 */
1446
1da177e4
LT
1447int sock_create(int family, int type, int protocol, struct socket **res)
1448{
1b8d7ae4 1449 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1450}
c6d409cf 1451EXPORT_SYMBOL(sock_create);
1da177e4 1452
8a3c245c
PT
1453/**
1454 * sock_create_kern - creates a socket (kernel space)
1455 * @net: net namespace
1456 * @family: protocol family (AF_INET, ...)
1457 * @type: communication type (SOCK_STREAM, ...)
1458 * @protocol: protocol (0, ...)
1459 * @res: new socket
1460 *
1461 * A wrapper around __sock_create().
1462 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1463 */
1464
eeb1bd5c 1465int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1466{
eeb1bd5c 1467 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1468}
c6d409cf 1469EXPORT_SYMBOL(sock_create_kern);
1da177e4 1470
9d6a15c3 1471int __sys_socket(int family, int type, int protocol)
1da177e4
LT
1472{
1473 int retval;
1474 struct socket *sock;
a677a039
UD
1475 int flags;
1476
e38b36f3
UD
1477 /* Check the SOCK_* constants for consistency. */
1478 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1479 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1480 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1481 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1482
a677a039 1483 flags = type & ~SOCK_TYPE_MASK;
77d27200 1484 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1485 return -EINVAL;
1486 type &= SOCK_TYPE_MASK;
1da177e4 1487
aaca0bdc
UD
1488 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1489 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1490
1da177e4
LT
1491 retval = sock_create(family, type, protocol, &sock);
1492 if (retval < 0)
8e1611e2 1493 return retval;
1da177e4 1494
8e1611e2 1495 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1496}
1497
9d6a15c3
DB
1498SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1499{
1500 return __sys_socket(family, type, protocol);
1501}
1502
1da177e4
LT
1503/*
1504 * Create a pair of connected sockets.
1505 */
1506
6debc8d8 1507int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1508{
1509 struct socket *sock1, *sock2;
1510 int fd1, fd2, err;
db349509 1511 struct file *newfile1, *newfile2;
a677a039
UD
1512 int flags;
1513
1514 flags = type & ~SOCK_TYPE_MASK;
77d27200 1515 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1516 return -EINVAL;
1517 type &= SOCK_TYPE_MASK;
1da177e4 1518
aaca0bdc
UD
1519 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1520 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1521
016a266b
AV
1522 /*
1523 * reserve descriptors and make sure we won't fail
1524 * to return them to userland.
1525 */
1526 fd1 = get_unused_fd_flags(flags);
1527 if (unlikely(fd1 < 0))
1528 return fd1;
1529
1530 fd2 = get_unused_fd_flags(flags);
1531 if (unlikely(fd2 < 0)) {
1532 put_unused_fd(fd1);
1533 return fd2;
1534 }
1535
1536 err = put_user(fd1, &usockvec[0]);
1537 if (err)
1538 goto out;
1539
1540 err = put_user(fd2, &usockvec[1]);
1541 if (err)
1542 goto out;
1543
1da177e4
LT
1544 /*
1545 * Obtain the first socket and check if the underlying protocol
1546 * supports the socketpair call.
1547 */
1548
1549 err = sock_create(family, type, protocol, &sock1);
016a266b 1550 if (unlikely(err < 0))
1da177e4
LT
1551 goto out;
1552
1553 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1554 if (unlikely(err < 0)) {
1555 sock_release(sock1);
1556 goto out;
bf3c23d1 1557 }
d73aa286 1558
d47cd945
DH
1559 err = security_socket_socketpair(sock1, sock2);
1560 if (unlikely(err)) {
1561 sock_release(sock2);
1562 sock_release(sock1);
1563 goto out;
1564 }
1565
016a266b
AV
1566 err = sock1->ops->socketpair(sock1, sock2);
1567 if (unlikely(err < 0)) {
1568 sock_release(sock2);
1569 sock_release(sock1);
1570 goto out;
28407630
AV
1571 }
1572
aab174f0 1573 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1574 if (IS_ERR(newfile1)) {
28407630 1575 err = PTR_ERR(newfile1);
016a266b
AV
1576 sock_release(sock2);
1577 goto out;
28407630
AV
1578 }
1579
aab174f0 1580 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1581 if (IS_ERR(newfile2)) {
1582 err = PTR_ERR(newfile2);
016a266b
AV
1583 fput(newfile1);
1584 goto out;
db349509
AV
1585 }
1586
157cf649 1587 audit_fd_pair(fd1, fd2);
d73aa286 1588
db349509
AV
1589 fd_install(fd1, newfile1);
1590 fd_install(fd2, newfile2);
d73aa286 1591 return 0;
1da177e4 1592
016a266b 1593out:
d73aa286 1594 put_unused_fd(fd2);
d73aa286 1595 put_unused_fd(fd1);
1da177e4
LT
1596 return err;
1597}
1598
6debc8d8
DB
1599SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1600 int __user *, usockvec)
1601{
1602 return __sys_socketpair(family, type, protocol, usockvec);
1603}
1604
1da177e4
LT
1605/*
1606 * Bind a name to a socket. Nothing much to do here since it's
1607 * the protocol's responsibility to handle the local address.
1608 *
1609 * We move the socket address to kernel space before we call
1610 * the protocol layer (having also checked the address is ok).
1611 */
1612
a87d35d8 1613int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1614{
1615 struct socket *sock;
230b1839 1616 struct sockaddr_storage address;
6cb153ca 1617 int err, fput_needed;
1da177e4 1618
89bddce5 1619 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1620 if (sock) {
43db362d 1621 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1622 if (!err) {
89bddce5 1623 err = security_socket_bind(sock,
230b1839 1624 (struct sockaddr *)&address,
89bddce5 1625 addrlen);
6cb153ca
BL
1626 if (!err)
1627 err = sock->ops->bind(sock,
89bddce5 1628 (struct sockaddr *)
230b1839 1629 &address, addrlen);
1da177e4 1630 }
6cb153ca 1631 fput_light(sock->file, fput_needed);
89bddce5 1632 }
1da177e4
LT
1633 return err;
1634}
1635
a87d35d8
DB
1636SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1637{
1638 return __sys_bind(fd, umyaddr, addrlen);
1639}
1640
1da177e4
LT
1641/*
1642 * Perform a listen. Basically, we allow the protocol to do anything
1643 * necessary for a listen, and if that works, we mark the socket as
1644 * ready for listening.
1645 */
1646
25e290ee 1647int __sys_listen(int fd, int backlog)
1da177e4
LT
1648{
1649 struct socket *sock;
6cb153ca 1650 int err, fput_needed;
b8e1f9b5 1651 int somaxconn;
89bddce5
SH
1652
1653 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1654 if (sock) {
8efa6e93 1655 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1656 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1657 backlog = somaxconn;
1da177e4
LT
1658
1659 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1660 if (!err)
1661 err = sock->ops->listen(sock, backlog);
1da177e4 1662
6cb153ca 1663 fput_light(sock->file, fput_needed);
1da177e4
LT
1664 }
1665 return err;
1666}
1667
25e290ee
DB
1668SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1669{
1670 return __sys_listen(fd, backlog);
1671}
1672
1da177e4
LT
1673/*
1674 * For accept, we attempt to create a new socket, set up the link
1675 * with the client, wake up the client, then return the new
1676 * connected fd. We collect the address of the connector in kernel
1677 * space and move it to user at the very end. This is unclean because
1678 * we open the socket then return an error.
1679 *
1680 * 1003.1g adds the ability to recvmsg() to query connection pending
1681 * status to recvmsg. We need to add that support in a way thats
b903036a 1682 * clean when we restructure accept also.
1da177e4
LT
1683 */
1684
4541e805
DB
1685int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1686 int __user *upeer_addrlen, int flags)
1da177e4
LT
1687{
1688 struct socket *sock, *newsock;
39d8c1b6 1689 struct file *newfile;
6cb153ca 1690 int err, len, newfd, fput_needed;
230b1839 1691 struct sockaddr_storage address;
1da177e4 1692
77d27200 1693 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1694 return -EINVAL;
1695
1696 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1697 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1698
6cb153ca 1699 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1700 if (!sock)
1701 goto out;
1702
1703 err = -ENFILE;
c6d409cf
ED
1704 newsock = sock_alloc();
1705 if (!newsock)
1da177e4
LT
1706 goto out_put;
1707
1708 newsock->type = sock->type;
1709 newsock->ops = sock->ops;
1710
1da177e4
LT
1711 /*
1712 * We don't need try_module_get here, as the listening socket (sock)
1713 * has the protocol module (sock->ops->owner) held.
1714 */
1715 __module_get(newsock->ops->owner);
1716
28407630 1717 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1718 if (unlikely(newfd < 0)) {
1719 err = newfd;
9a1875e6
DM
1720 sock_release(newsock);
1721 goto out_put;
39d8c1b6 1722 }
aab174f0 1723 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1724 if (IS_ERR(newfile)) {
28407630
AV
1725 err = PTR_ERR(newfile);
1726 put_unused_fd(newfd);
28407630
AV
1727 goto out_put;
1728 }
39d8c1b6 1729
a79af59e
FF
1730 err = security_socket_accept(sock, newsock);
1731 if (err)
39d8c1b6 1732 goto out_fd;
a79af59e 1733
cdfbabfb 1734 err = sock->ops->accept(sock, newsock, sock->file->f_flags, false);
1da177e4 1735 if (err < 0)
39d8c1b6 1736 goto out_fd;
1da177e4
LT
1737
1738 if (upeer_sockaddr) {
9b2c45d4
DV
1739 len = newsock->ops->getname(newsock,
1740 (struct sockaddr *)&address, 2);
1741 if (len < 0) {
1da177e4 1742 err = -ECONNABORTED;
39d8c1b6 1743 goto out_fd;
1da177e4 1744 }
43db362d 1745 err = move_addr_to_user(&address,
230b1839 1746 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1747 if (err < 0)
39d8c1b6 1748 goto out_fd;
1da177e4
LT
1749 }
1750
1751 /* File flags are not inherited via accept() unlike another OSes. */
1752
39d8c1b6
DM
1753 fd_install(newfd, newfile);
1754 err = newfd;
1da177e4 1755
1da177e4 1756out_put:
6cb153ca 1757 fput_light(sock->file, fput_needed);
1da177e4
LT
1758out:
1759 return err;
39d8c1b6 1760out_fd:
9606a216 1761 fput(newfile);
39d8c1b6 1762 put_unused_fd(newfd);
1da177e4
LT
1763 goto out_put;
1764}
1765
4541e805
DB
1766SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1767 int __user *, upeer_addrlen, int, flags)
1768{
1769 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1770}
1771
20f37034
HC
1772SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1773 int __user *, upeer_addrlen)
aaca0bdc 1774{
4541e805 1775 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1776}
1777
1da177e4
LT
1778/*
1779 * Attempt to connect to a socket with the server address. The address
1780 * is in user space so we verify it is OK and move it to kernel space.
1781 *
1782 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1783 * break bindings
1784 *
1785 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1786 * other SEQPACKET protocols that take time to connect() as it doesn't
1787 * include the -EINPROGRESS status for such sockets.
1788 */
1789
1387c2c2 1790int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1da177e4
LT
1791{
1792 struct socket *sock;
230b1839 1793 struct sockaddr_storage address;
6cb153ca 1794 int err, fput_needed;
1da177e4 1795
6cb153ca 1796 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1797 if (!sock)
1798 goto out;
43db362d 1799 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1800 if (err < 0)
1801 goto out_put;
1802
89bddce5 1803 err =
230b1839 1804 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1805 if (err)
1806 goto out_put;
1807
230b1839 1808 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1809 sock->file->f_flags);
1810out_put:
6cb153ca 1811 fput_light(sock->file, fput_needed);
1da177e4
LT
1812out:
1813 return err;
1814}
1815
1387c2c2
DB
1816SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1817 int, addrlen)
1818{
1819 return __sys_connect(fd, uservaddr, addrlen);
1820}
1821
1da177e4
LT
1822/*
1823 * Get the local address ('name') of a socket object. Move the obtained
1824 * name to user space.
1825 */
1826
8882a107
DB
1827int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1828 int __user *usockaddr_len)
1da177e4
LT
1829{
1830 struct socket *sock;
230b1839 1831 struct sockaddr_storage address;
9b2c45d4 1832 int err, fput_needed;
89bddce5 1833
6cb153ca 1834 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1835 if (!sock)
1836 goto out;
1837
1838 err = security_socket_getsockname(sock);
1839 if (err)
1840 goto out_put;
1841
9b2c45d4
DV
1842 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
1843 if (err < 0)
1da177e4 1844 goto out_put;
9b2c45d4
DV
1845 /* "err" is actually length in this case */
1846 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
1847
1848out_put:
6cb153ca 1849 fput_light(sock->file, fput_needed);
1da177e4
LT
1850out:
1851 return err;
1852}
1853
8882a107
DB
1854SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1855 int __user *, usockaddr_len)
1856{
1857 return __sys_getsockname(fd, usockaddr, usockaddr_len);
1858}
1859
1da177e4
LT
1860/*
1861 * Get the remote address ('name') of a socket object. Move the obtained
1862 * name to user space.
1863 */
1864
b21c8f83
DB
1865int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1866 int __user *usockaddr_len)
1da177e4
LT
1867{
1868 struct socket *sock;
230b1839 1869 struct sockaddr_storage address;
9b2c45d4 1870 int err, fput_needed;
1da177e4 1871
89bddce5
SH
1872 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1873 if (sock != NULL) {
1da177e4
LT
1874 err = security_socket_getpeername(sock);
1875 if (err) {
6cb153ca 1876 fput_light(sock->file, fput_needed);
1da177e4
LT
1877 return err;
1878 }
1879
9b2c45d4
DV
1880 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
1881 if (err >= 0)
1882 /* "err" is actually length in this case */
1883 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 1884 usockaddr_len);
6cb153ca 1885 fput_light(sock->file, fput_needed);
1da177e4
LT
1886 }
1887 return err;
1888}
1889
b21c8f83
DB
1890SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1891 int __user *, usockaddr_len)
1892{
1893 return __sys_getpeername(fd, usockaddr, usockaddr_len);
1894}
1895
1da177e4
LT
1896/*
1897 * Send a datagram to a given address. We move the address into kernel
1898 * space and check the user space data area is readable before invoking
1899 * the protocol.
1900 */
211b634b
DB
1901int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
1902 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
1903{
1904 struct socket *sock;
230b1839 1905 struct sockaddr_storage address;
1da177e4
LT
1906 int err;
1907 struct msghdr msg;
1908 struct iovec iov;
6cb153ca 1909 int fput_needed;
6cb153ca 1910
602bd0e9
AV
1911 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1912 if (unlikely(err))
1913 return err;
de0fa95c
PE
1914 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1915 if (!sock)
4387ff75 1916 goto out;
6cb153ca 1917
89bddce5 1918 msg.msg_name = NULL;
89bddce5
SH
1919 msg.msg_control = NULL;
1920 msg.msg_controllen = 0;
1921 msg.msg_namelen = 0;
6cb153ca 1922 if (addr) {
43db362d 1923 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1924 if (err < 0)
1925 goto out_put;
230b1839 1926 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1927 msg.msg_namelen = addr_len;
1da177e4
LT
1928 }
1929 if (sock->file->f_flags & O_NONBLOCK)
1930 flags |= MSG_DONTWAIT;
1931 msg.msg_flags = flags;
d8725c86 1932 err = sock_sendmsg(sock, &msg);
1da177e4 1933
89bddce5 1934out_put:
de0fa95c 1935 fput_light(sock->file, fput_needed);
4387ff75 1936out:
1da177e4
LT
1937 return err;
1938}
1939
211b634b
DB
1940SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1941 unsigned int, flags, struct sockaddr __user *, addr,
1942 int, addr_len)
1943{
1944 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
1945}
1946
1da177e4 1947/*
89bddce5 1948 * Send a datagram down a socket.
1da177e4
LT
1949 */
1950
3e0fa65f 1951SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1952 unsigned int, flags)
1da177e4 1953{
211b634b 1954 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
1955}
1956
1957/*
89bddce5 1958 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1959 * sender. We verify the buffers are writable and if needed move the
1960 * sender address from kernel to user space.
1961 */
7a09e1eb
DB
1962int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
1963 struct sockaddr __user *addr, int __user *addr_len)
1da177e4
LT
1964{
1965 struct socket *sock;
1966 struct iovec iov;
1967 struct msghdr msg;
230b1839 1968 struct sockaddr_storage address;
89bddce5 1969 int err, err2;
6cb153ca
BL
1970 int fput_needed;
1971
602bd0e9
AV
1972 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1973 if (unlikely(err))
1974 return err;
de0fa95c 1975 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1976 if (!sock)
de0fa95c 1977 goto out;
1da177e4 1978
89bddce5
SH
1979 msg.msg_control = NULL;
1980 msg.msg_controllen = 0;
f3d33426
HFS
1981 /* Save some cycles and don't copy the address if not needed */
1982 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1983 /* We assume all kernel code knows the size of sockaddr_storage */
1984 msg.msg_namelen = 0;
130ed5d1 1985 msg.msg_iocb = NULL;
9f138fa6 1986 msg.msg_flags = 0;
1da177e4
LT
1987 if (sock->file->f_flags & O_NONBLOCK)
1988 flags |= MSG_DONTWAIT;
2da62906 1989 err = sock_recvmsg(sock, &msg, flags);
1da177e4 1990
89bddce5 1991 if (err >= 0 && addr != NULL) {
43db362d 1992 err2 = move_addr_to_user(&address,
230b1839 1993 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1994 if (err2 < 0)
1995 err = err2;
1da177e4 1996 }
de0fa95c
PE
1997
1998 fput_light(sock->file, fput_needed);
4387ff75 1999out:
1da177e4
LT
2000 return err;
2001}
2002
7a09e1eb
DB
2003SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2004 unsigned int, flags, struct sockaddr __user *, addr,
2005 int __user *, addr_len)
2006{
2007 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2008}
2009
1da177e4 2010/*
89bddce5 2011 * Receive a datagram from a socket.
1da177e4
LT
2012 */
2013
b7c0ddf5
JG
2014SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2015 unsigned int, flags)
1da177e4 2016{
7a09e1eb 2017 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
2018}
2019
2020/*
2021 * Set a socket option. Because we don't know the option lengths we have
2022 * to pass the user mode parameter for the protocols to sort out.
2023 */
2024
cc36dca0
DB
2025static int __sys_setsockopt(int fd, int level, int optname,
2026 char __user *optval, int optlen)
1da177e4 2027{
6cb153ca 2028 int err, fput_needed;
1da177e4
LT
2029 struct socket *sock;
2030
2031 if (optlen < 0)
2032 return -EINVAL;
89bddce5
SH
2033
2034 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2035 if (sock != NULL) {
2036 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
2037 if (err)
2038 goto out_put;
1da177e4
LT
2039
2040 if (level == SOL_SOCKET)
89bddce5
SH
2041 err =
2042 sock_setsockopt(sock, level, optname, optval,
2043 optlen);
1da177e4 2044 else
89bddce5
SH
2045 err =
2046 sock->ops->setsockopt(sock, level, optname, optval,
2047 optlen);
6cb153ca
BL
2048out_put:
2049 fput_light(sock->file, fput_needed);
1da177e4
LT
2050 }
2051 return err;
2052}
2053
cc36dca0
DB
2054SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2055 char __user *, optval, int, optlen)
2056{
2057 return __sys_setsockopt(fd, level, optname, optval, optlen);
2058}
2059
1da177e4
LT
2060/*
2061 * Get a socket option. Because we don't know the option lengths we have
2062 * to pass a user mode parameter for the protocols to sort out.
2063 */
2064
13a2d70e
DB
2065static int __sys_getsockopt(int fd, int level, int optname,
2066 char __user *optval, int __user *optlen)
1da177e4 2067{
6cb153ca 2068 int err, fput_needed;
1da177e4
LT
2069 struct socket *sock;
2070
89bddce5
SH
2071 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2072 if (sock != NULL) {
6cb153ca
BL
2073 err = security_socket_getsockopt(sock, level, optname);
2074 if (err)
2075 goto out_put;
1da177e4
LT
2076
2077 if (level == SOL_SOCKET)
89bddce5
SH
2078 err =
2079 sock_getsockopt(sock, level, optname, optval,
2080 optlen);
1da177e4 2081 else
89bddce5
SH
2082 err =
2083 sock->ops->getsockopt(sock, level, optname, optval,
2084 optlen);
6cb153ca
BL
2085out_put:
2086 fput_light(sock->file, fput_needed);
1da177e4
LT
2087 }
2088 return err;
2089}
2090
13a2d70e
DB
2091SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2092 char __user *, optval, int __user *, optlen)
2093{
2094 return __sys_getsockopt(fd, level, optname, optval, optlen);
2095}
2096
1da177e4
LT
2097/*
2098 * Shutdown a socket.
2099 */
2100
005a1aea 2101int __sys_shutdown(int fd, int how)
1da177e4 2102{
6cb153ca 2103 int err, fput_needed;
1da177e4
LT
2104 struct socket *sock;
2105
89bddce5
SH
2106 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2107 if (sock != NULL) {
1da177e4 2108 err = security_socket_shutdown(sock, how);
6cb153ca
BL
2109 if (!err)
2110 err = sock->ops->shutdown(sock, how);
2111 fput_light(sock->file, fput_needed);
1da177e4
LT
2112 }
2113 return err;
2114}
2115
005a1aea
DB
2116SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2117{
2118 return __sys_shutdown(fd, how);
2119}
2120
89bddce5 2121/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2122 * fields which are the same type (int / unsigned) on our platforms.
2123 */
2124#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2125#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2126#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2127
c71d8ebe
TH
2128struct used_address {
2129 struct sockaddr_storage name;
2130 unsigned int name_len;
2131};
2132
da184284
AV
2133static int copy_msghdr_from_user(struct msghdr *kmsg,
2134 struct user_msghdr __user *umsg,
2135 struct sockaddr __user **save_addr,
2136 struct iovec **iov)
1661bf36 2137{
ffb07550 2138 struct user_msghdr msg;
08adb7da
AV
2139 ssize_t err;
2140
ffb07550 2141 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 2142 return -EFAULT;
dbb490b9 2143
864d9664 2144 kmsg->msg_control = (void __force *)msg.msg_control;
ffb07550
AV
2145 kmsg->msg_controllen = msg.msg_controllen;
2146 kmsg->msg_flags = msg.msg_flags;
2147
2148 kmsg->msg_namelen = msg.msg_namelen;
2149 if (!msg.msg_name)
6a2a2b3a
AS
2150 kmsg->msg_namelen = 0;
2151
dbb490b9
ML
2152 if (kmsg->msg_namelen < 0)
2153 return -EINVAL;
2154
1661bf36 2155 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2156 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2157
2158 if (save_addr)
ffb07550 2159 *save_addr = msg.msg_name;
08adb7da 2160
ffb07550 2161 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 2162 if (!save_addr) {
864d9664
PA
2163 err = move_addr_to_kernel(msg.msg_name,
2164 kmsg->msg_namelen,
08adb7da
AV
2165 kmsg->msg_name);
2166 if (err < 0)
2167 return err;
2168 }
2169 } else {
2170 kmsg->msg_name = NULL;
2171 kmsg->msg_namelen = 0;
2172 }
2173
ffb07550 2174 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2175 return -EMSGSIZE;
2176
0345f931 2177 kmsg->msg_iocb = NULL;
2178
ffb07550
AV
2179 return import_iovec(save_addr ? READ : WRITE,
2180 msg.msg_iov, msg.msg_iovlen,
da184284 2181 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
2182}
2183
666547ff 2184static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2185 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
2186 struct used_address *used_address,
2187 unsigned int allowed_msghdr_flags)
1da177e4 2188{
89bddce5
SH
2189 struct compat_msghdr __user *msg_compat =
2190 (struct compat_msghdr __user *)msg;
230b1839 2191 struct sockaddr_storage address;
1da177e4 2192 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 2193 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2194 __aligned(sizeof(__kernel_size_t));
89bddce5 2195 /* 20 is size of ipv6_pktinfo */
1da177e4 2196 unsigned char *ctl_buf = ctl;
d8725c86 2197 int ctl_len;
08adb7da 2198 ssize_t err;
89bddce5 2199
08adb7da 2200 msg_sys->msg_name = &address;
1da177e4 2201
08449320 2202 if (MSG_CMSG_COMPAT & flags)
08adb7da 2203 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 2204 else
08adb7da 2205 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 2206 if (err < 0)
da184284 2207 return err;
1da177e4
LT
2208
2209 err = -ENOBUFS;
2210
228e548e 2211 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 2212 goto out_freeiov;
28a94d8f 2213 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2214 ctl_len = msg_sys->msg_controllen;
1da177e4 2215 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2216 err =
228e548e 2217 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2218 sizeof(ctl));
1da177e4
LT
2219 if (err)
2220 goto out_freeiov;
228e548e
AB
2221 ctl_buf = msg_sys->msg_control;
2222 ctl_len = msg_sys->msg_controllen;
1da177e4 2223 } else if (ctl_len) {
ac4340fc
DM
2224 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2225 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2226 if (ctl_len > sizeof(ctl)) {
1da177e4 2227 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2228 if (ctl_buf == NULL)
1da177e4
LT
2229 goto out_freeiov;
2230 }
2231 err = -EFAULT;
2232 /*
228e548e 2233 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2234 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2235 * checking falls down on this.
2236 */
fb8621bb 2237 if (copy_from_user(ctl_buf,
228e548e 2238 (void __user __force *)msg_sys->msg_control,
89bddce5 2239 ctl_len))
1da177e4 2240 goto out_freectl;
228e548e 2241 msg_sys->msg_control = ctl_buf;
1da177e4 2242 }
228e548e 2243 msg_sys->msg_flags = flags;
1da177e4
LT
2244
2245 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2246 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2247 /*
2248 * If this is sendmmsg() and current destination address is same as
2249 * previously succeeded address, omit asking LSM's decision.
2250 * used_address->name_len is initialized to UINT_MAX so that the first
2251 * destination address never matches.
2252 */
bc909d9d
MD
2253 if (used_address && msg_sys->msg_name &&
2254 used_address->name_len == msg_sys->msg_namelen &&
2255 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2256 used_address->name_len)) {
d8725c86 2257 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2258 goto out_freectl;
2259 }
d8725c86 2260 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2261 /*
2262 * If this is sendmmsg() and sending to current destination address was
2263 * successful, remember it.
2264 */
2265 if (used_address && err >= 0) {
2266 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2267 if (msg_sys->msg_name)
2268 memcpy(&used_address->name, msg_sys->msg_name,
2269 used_address->name_len);
c71d8ebe 2270 }
1da177e4
LT
2271
2272out_freectl:
89bddce5 2273 if (ctl_buf != ctl)
1da177e4
LT
2274 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2275out_freeiov:
da184284 2276 kfree(iov);
228e548e
AB
2277 return err;
2278}
2279
2280/*
2281 * BSD sendmsg interface
2282 */
2283
e1834a32
DB
2284long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2285 bool forbid_cmsg_compat)
228e548e
AB
2286{
2287 int fput_needed, err;
2288 struct msghdr msg_sys;
1be374a0
AL
2289 struct socket *sock;
2290
e1834a32
DB
2291 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2292 return -EINVAL;
2293
1be374a0 2294 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2295 if (!sock)
2296 goto out;
2297
28a94d8f 2298 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2299
6cb153ca 2300 fput_light(sock->file, fput_needed);
89bddce5 2301out:
1da177e4
LT
2302 return err;
2303}
2304
666547ff 2305SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2306{
e1834a32 2307 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2308}
2309
228e548e
AB
2310/*
2311 * Linux sendmmsg interface
2312 */
2313
2314int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2315 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2316{
2317 int fput_needed, err, datagrams;
2318 struct socket *sock;
2319 struct mmsghdr __user *entry;
2320 struct compat_mmsghdr __user *compat_entry;
2321 struct msghdr msg_sys;
c71d8ebe 2322 struct used_address used_address;
f092276d 2323 unsigned int oflags = flags;
228e548e 2324
e1834a32
DB
2325 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2326 return -EINVAL;
2327
98382f41
AB
2328 if (vlen > UIO_MAXIOV)
2329 vlen = UIO_MAXIOV;
228e548e
AB
2330
2331 datagrams = 0;
2332
2333 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2334 if (!sock)
2335 return err;
2336
c71d8ebe 2337 used_address.name_len = UINT_MAX;
228e548e
AB
2338 entry = mmsg;
2339 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2340 err = 0;
f092276d 2341 flags |= MSG_BATCH;
228e548e
AB
2342
2343 while (datagrams < vlen) {
f092276d
TH
2344 if (datagrams == vlen - 1)
2345 flags = oflags;
2346
228e548e 2347 if (MSG_CMSG_COMPAT & flags) {
666547ff 2348 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2349 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2350 if (err < 0)
2351 break;
2352 err = __put_user(err, &compat_entry->msg_len);
2353 ++compat_entry;
2354 } else {
a7526eb5 2355 err = ___sys_sendmsg(sock,
666547ff 2356 (struct user_msghdr __user *)entry,
28a94d8f 2357 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2358 if (err < 0)
2359 break;
2360 err = put_user(err, &entry->msg_len);
2361 ++entry;
2362 }
2363
2364 if (err)
2365 break;
2366 ++datagrams;
3023898b
SHY
2367 if (msg_data_left(&msg_sys))
2368 break;
a78cb84c 2369 cond_resched();
228e548e
AB
2370 }
2371
228e548e
AB
2372 fput_light(sock->file, fput_needed);
2373
728ffb86
AB
2374 /* We only return an error if no datagrams were able to be sent */
2375 if (datagrams != 0)
228e548e
AB
2376 return datagrams;
2377
228e548e
AB
2378 return err;
2379}
2380
2381SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2382 unsigned int, vlen, unsigned int, flags)
2383{
e1834a32 2384 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2385}
2386
666547ff 2387static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2388 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2389{
89bddce5
SH
2390 struct compat_msghdr __user *msg_compat =
2391 (struct compat_msghdr __user *)msg;
1da177e4 2392 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2393 struct iovec *iov = iovstack;
1da177e4 2394 unsigned long cmsg_ptr;
2da62906 2395 int len;
08adb7da 2396 ssize_t err;
1da177e4
LT
2397
2398 /* kernel mode address */
230b1839 2399 struct sockaddr_storage addr;
1da177e4
LT
2400
2401 /* user mode address pointers */
2402 struct sockaddr __user *uaddr;
08adb7da 2403 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2404
08adb7da 2405 msg_sys->msg_name = &addr;
1da177e4 2406
f3d33426 2407 if (MSG_CMSG_COMPAT & flags)
08adb7da 2408 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2409 else
08adb7da 2410 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2411 if (err < 0)
da184284 2412 return err;
1da177e4 2413
a2e27255
ACM
2414 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2415 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2416
f3d33426
HFS
2417 /* We assume all kernel code knows the size of sockaddr_storage */
2418 msg_sys->msg_namelen = 0;
2419
1da177e4
LT
2420 if (sock->file->f_flags & O_NONBLOCK)
2421 flags |= MSG_DONTWAIT;
2da62906 2422 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4
LT
2423 if (err < 0)
2424 goto out_freeiov;
2425 len = err;
2426
2427 if (uaddr != NULL) {
43db362d 2428 err = move_addr_to_user(&addr,
a2e27255 2429 msg_sys->msg_namelen, uaddr,
89bddce5 2430 uaddr_len);
1da177e4
LT
2431 if (err < 0)
2432 goto out_freeiov;
2433 }
a2e27255 2434 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2435 COMPAT_FLAGS(msg));
1da177e4
LT
2436 if (err)
2437 goto out_freeiov;
2438 if (MSG_CMSG_COMPAT & flags)
a2e27255 2439 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2440 &msg_compat->msg_controllen);
2441 else
a2e27255 2442 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2443 &msg->msg_controllen);
2444 if (err)
2445 goto out_freeiov;
2446 err = len;
2447
2448out_freeiov:
da184284 2449 kfree(iov);
a2e27255
ACM
2450 return err;
2451}
2452
2453/*
2454 * BSD recvmsg interface
2455 */
2456
e1834a32
DB
2457long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2458 bool forbid_cmsg_compat)
a2e27255
ACM
2459{
2460 int fput_needed, err;
2461 struct msghdr msg_sys;
1be374a0
AL
2462 struct socket *sock;
2463
e1834a32
DB
2464 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2465 return -EINVAL;
2466
1be374a0 2467 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2468 if (!sock)
2469 goto out;
2470
a7526eb5 2471 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2472
6cb153ca 2473 fput_light(sock->file, fput_needed);
1da177e4
LT
2474out:
2475 return err;
2476}
2477
666547ff 2478SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2479 unsigned int, flags)
2480{
e1834a32 2481 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2482}
2483
a2e27255
ACM
2484/*
2485 * Linux recvmmsg interface
2486 */
2487
e11d4284
AB
2488static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2489 unsigned int vlen, unsigned int flags,
2490 struct timespec64 *timeout)
a2e27255
ACM
2491{
2492 int fput_needed, err, datagrams;
2493 struct socket *sock;
2494 struct mmsghdr __user *entry;
d7256d0e 2495 struct compat_mmsghdr __user *compat_entry;
a2e27255 2496 struct msghdr msg_sys;
766b9f92
DD
2497 struct timespec64 end_time;
2498 struct timespec64 timeout64;
a2e27255
ACM
2499
2500 if (timeout &&
2501 poll_select_set_timeout(&end_time, timeout->tv_sec,
2502 timeout->tv_nsec))
2503 return -EINVAL;
2504
2505 datagrams = 0;
2506
2507 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2508 if (!sock)
2509 return err;
2510
7797dc41
SHY
2511 if (likely(!(flags & MSG_ERRQUEUE))) {
2512 err = sock_error(sock->sk);
2513 if (err) {
2514 datagrams = err;
2515 goto out_put;
2516 }
e623a9e9 2517 }
a2e27255
ACM
2518
2519 entry = mmsg;
d7256d0e 2520 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2521
2522 while (datagrams < vlen) {
2523 /*
2524 * No need to ask LSM for more than the first datagram.
2525 */
d7256d0e 2526 if (MSG_CMSG_COMPAT & flags) {
666547ff 2527 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2528 &msg_sys, flags & ~MSG_WAITFORONE,
2529 datagrams);
d7256d0e
JMG
2530 if (err < 0)
2531 break;
2532 err = __put_user(err, &compat_entry->msg_len);
2533 ++compat_entry;
2534 } else {
a7526eb5 2535 err = ___sys_recvmsg(sock,
666547ff 2536 (struct user_msghdr __user *)entry,
a7526eb5
AL
2537 &msg_sys, flags & ~MSG_WAITFORONE,
2538 datagrams);
d7256d0e
JMG
2539 if (err < 0)
2540 break;
2541 err = put_user(err, &entry->msg_len);
2542 ++entry;
2543 }
2544
a2e27255
ACM
2545 if (err)
2546 break;
a2e27255
ACM
2547 ++datagrams;
2548
71c5c159
BB
2549 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2550 if (flags & MSG_WAITFORONE)
2551 flags |= MSG_DONTWAIT;
2552
a2e27255 2553 if (timeout) {
766b9f92 2554 ktime_get_ts64(&timeout64);
c2e6c856 2555 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2556 if (timeout->tv_sec < 0) {
2557 timeout->tv_sec = timeout->tv_nsec = 0;
2558 break;
2559 }
2560
2561 /* Timeout, return less than vlen datagrams */
2562 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2563 break;
2564 }
2565
2566 /* Out of band data, return right away */
2567 if (msg_sys.msg_flags & MSG_OOB)
2568 break;
a78cb84c 2569 cond_resched();
a2e27255
ACM
2570 }
2571
a2e27255 2572 if (err == 0)
34b88a68
ACM
2573 goto out_put;
2574
2575 if (datagrams == 0) {
2576 datagrams = err;
2577 goto out_put;
2578 }
a2e27255 2579
34b88a68
ACM
2580 /*
2581 * We may return less entries than requested (vlen) if the
2582 * sock is non block and there aren't enough datagrams...
2583 */
2584 if (err != -EAGAIN) {
a2e27255 2585 /*
34b88a68
ACM
2586 * ... or if recvmsg returns an error after we
2587 * received some datagrams, where we record the
2588 * error to return on the next call or if the
2589 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2590 */
34b88a68 2591 sock->sk->sk_err = -err;
a2e27255 2592 }
34b88a68
ACM
2593out_put:
2594 fput_light(sock->file, fput_needed);
a2e27255 2595
34b88a68 2596 return datagrams;
a2e27255
ACM
2597}
2598
e11d4284
AB
2599int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2600 unsigned int vlen, unsigned int flags,
2601 struct __kernel_timespec __user *timeout,
2602 struct old_timespec32 __user *timeout32)
a2e27255
ACM
2603{
2604 int datagrams;
c2e6c856 2605 struct timespec64 timeout_sys;
a2e27255 2606
e11d4284
AB
2607 if (timeout && get_timespec64(&timeout_sys, timeout))
2608 return -EFAULT;
a2e27255 2609
e11d4284 2610 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2611 return -EFAULT;
2612
e11d4284
AB
2613 if (!timeout && !timeout32)
2614 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2615
2616 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 2617
e11d4284
AB
2618 if (datagrams <= 0)
2619 return datagrams;
2620
2621 if (timeout && put_timespec64(&timeout_sys, timeout))
2622 datagrams = -EFAULT;
2623
2624 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2625 datagrams = -EFAULT;
2626
2627 return datagrams;
2628}
2629
1255e269
DB
2630SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2631 unsigned int, vlen, unsigned int, flags,
c2e6c856 2632 struct __kernel_timespec __user *, timeout)
1255e269 2633{
e11d4284
AB
2634 if (flags & MSG_CMSG_COMPAT)
2635 return -EINVAL;
2636
2637 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
2638}
2639
2640#ifdef CONFIG_COMPAT_32BIT_TIME
2641SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2642 unsigned int, vlen, unsigned int, flags,
2643 struct old_timespec32 __user *, timeout)
2644{
2645 if (flags & MSG_CMSG_COMPAT)
2646 return -EINVAL;
2647
2648 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 2649}
e11d4284 2650#endif
1255e269 2651
a2e27255 2652#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2653/* Argument list sizes for sys_socketcall */
2654#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2655static const unsigned char nargs[21] = {
c6d409cf
ED
2656 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2657 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2658 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2659 AL(4), AL(5), AL(4)
89bddce5
SH
2660};
2661
1da177e4
LT
2662#undef AL
2663
2664/*
89bddce5 2665 * System call vectors.
1da177e4
LT
2666 *
2667 * Argument checking cleaned up. Saved 20% in size.
2668 * This function doesn't need to set the kernel lock because
89bddce5 2669 * it is set by the callees.
1da177e4
LT
2670 */
2671
3e0fa65f 2672SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2673{
2950fa9d 2674 unsigned long a[AUDITSC_ARGS];
89bddce5 2675 unsigned long a0, a1;
1da177e4 2676 int err;
47379052 2677 unsigned int len;
1da177e4 2678
228e548e 2679 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2680 return -EINVAL;
c8e8cd57 2681 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2682
47379052
AV
2683 len = nargs[call];
2684 if (len > sizeof(a))
2685 return -EINVAL;
2686
1da177e4 2687 /* copy_from_user should be SMP safe. */
47379052 2688 if (copy_from_user(a, args, len))
1da177e4 2689 return -EFAULT;
3ec3b2fb 2690
2950fa9d
CG
2691 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2692 if (err)
2693 return err;
3ec3b2fb 2694
89bddce5
SH
2695 a0 = a[0];
2696 a1 = a[1];
2697
2698 switch (call) {
2699 case SYS_SOCKET:
9d6a15c3 2700 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
2701 break;
2702 case SYS_BIND:
a87d35d8 2703 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2704 break;
2705 case SYS_CONNECT:
1387c2c2 2706 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2707 break;
2708 case SYS_LISTEN:
25e290ee 2709 err = __sys_listen(a0, a1);
89bddce5
SH
2710 break;
2711 case SYS_ACCEPT:
4541e805
DB
2712 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2713 (int __user *)a[2], 0);
89bddce5
SH
2714 break;
2715 case SYS_GETSOCKNAME:
2716 err =
8882a107
DB
2717 __sys_getsockname(a0, (struct sockaddr __user *)a1,
2718 (int __user *)a[2]);
89bddce5
SH
2719 break;
2720 case SYS_GETPEERNAME:
2721 err =
b21c8f83
DB
2722 __sys_getpeername(a0, (struct sockaddr __user *)a1,
2723 (int __user *)a[2]);
89bddce5
SH
2724 break;
2725 case SYS_SOCKETPAIR:
6debc8d8 2726 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
2727 break;
2728 case SYS_SEND:
f3bf896b
DB
2729 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2730 NULL, 0);
89bddce5
SH
2731 break;
2732 case SYS_SENDTO:
211b634b
DB
2733 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2734 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
2735 break;
2736 case SYS_RECV:
d27e9afc
DB
2737 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2738 NULL, NULL);
89bddce5
SH
2739 break;
2740 case SYS_RECVFROM:
7a09e1eb
DB
2741 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2742 (struct sockaddr __user *)a[4],
2743 (int __user *)a[5]);
89bddce5
SH
2744 break;
2745 case SYS_SHUTDOWN:
005a1aea 2746 err = __sys_shutdown(a0, a1);
89bddce5
SH
2747 break;
2748 case SYS_SETSOCKOPT:
cc36dca0
DB
2749 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
2750 a[4]);
89bddce5
SH
2751 break;
2752 case SYS_GETSOCKOPT:
2753 err =
13a2d70e
DB
2754 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2755 (int __user *)a[4]);
89bddce5
SH
2756 break;
2757 case SYS_SENDMSG:
e1834a32
DB
2758 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
2759 a[2], true);
89bddce5 2760 break;
228e548e 2761 case SYS_SENDMMSG:
e1834a32
DB
2762 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2763 a[3], true);
228e548e 2764 break;
89bddce5 2765 case SYS_RECVMSG:
e1834a32
DB
2766 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
2767 a[2], true);
89bddce5 2768 break;
a2e27255 2769 case SYS_RECVMMSG:
e11d4284
AB
2770 if (IS_ENABLED(CONFIG_64BIT) || !IS_ENABLED(CONFIG_64BIT_TIME))
2771 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2772 a[2], a[3],
2773 (struct __kernel_timespec __user *)a[4],
2774 NULL);
2775 else
2776 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2777 a[2], a[3], NULL,
2778 (struct old_timespec32 __user *)a[4]);
a2e27255 2779 break;
de11defe 2780 case SYS_ACCEPT4:
4541e805
DB
2781 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2782 (int __user *)a[2], a[3]);
aaca0bdc 2783 break;
89bddce5
SH
2784 default:
2785 err = -EINVAL;
2786 break;
1da177e4
LT
2787 }
2788 return err;
2789}
2790
89bddce5 2791#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2792
55737fda
SH
2793/**
2794 * sock_register - add a socket protocol handler
2795 * @ops: description of protocol
2796 *
1da177e4
LT
2797 * This function is called by a protocol handler that wants to
2798 * advertise its address family, and have it linked into the
e793c0f7 2799 * socket interface. The value ops->family corresponds to the
55737fda 2800 * socket system call protocol family.
1da177e4 2801 */
f0fd27d4 2802int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2803{
2804 int err;
2805
2806 if (ops->family >= NPROTO) {
3410f22e 2807 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2808 return -ENOBUFS;
2809 }
55737fda
SH
2810
2811 spin_lock(&net_family_lock);
190683a9
ED
2812 if (rcu_dereference_protected(net_families[ops->family],
2813 lockdep_is_held(&net_family_lock)))
55737fda
SH
2814 err = -EEXIST;
2815 else {
cf778b00 2816 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2817 err = 0;
2818 }
55737fda
SH
2819 spin_unlock(&net_family_lock);
2820
3410f22e 2821 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2822 return err;
2823}
c6d409cf 2824EXPORT_SYMBOL(sock_register);
1da177e4 2825
55737fda
SH
2826/**
2827 * sock_unregister - remove a protocol handler
2828 * @family: protocol family to remove
2829 *
1da177e4
LT
2830 * This function is called by a protocol handler that wants to
2831 * remove its address family, and have it unlinked from the
55737fda
SH
2832 * new socket creation.
2833 *
2834 * If protocol handler is a module, then it can use module reference
2835 * counts to protect against new references. If protocol handler is not
2836 * a module then it needs to provide its own protection in
2837 * the ops->create routine.
1da177e4 2838 */
f0fd27d4 2839void sock_unregister(int family)
1da177e4 2840{
f0fd27d4 2841 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2842
55737fda 2843 spin_lock(&net_family_lock);
a9b3cd7f 2844 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2845 spin_unlock(&net_family_lock);
2846
2847 synchronize_rcu();
2848
3410f22e 2849 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2850}
c6d409cf 2851EXPORT_SYMBOL(sock_unregister);
1da177e4 2852
bf2ae2e4
XL
2853bool sock_is_registered(int family)
2854{
66b51b0a 2855 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
2856}
2857
77d76ea3 2858static int __init sock_init(void)
1da177e4 2859{
b3e19d92 2860 int err;
2ca794e5
EB
2861 /*
2862 * Initialize the network sysctl infrastructure.
2863 */
2864 err = net_sysctl_init();
2865 if (err)
2866 goto out;
b3e19d92 2867
1da177e4 2868 /*
89bddce5 2869 * Initialize skbuff SLAB cache
1da177e4
LT
2870 */
2871 skb_init();
1da177e4
LT
2872
2873 /*
89bddce5 2874 * Initialize the protocols module.
1da177e4
LT
2875 */
2876
2877 init_inodecache();
b3e19d92
NP
2878
2879 err = register_filesystem(&sock_fs_type);
2880 if (err)
2881 goto out_fs;
1da177e4 2882 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2883 if (IS_ERR(sock_mnt)) {
2884 err = PTR_ERR(sock_mnt);
2885 goto out_mount;
2886 }
77d76ea3
AK
2887
2888 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2889 */
2890
2891#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2892 err = netfilter_init();
2893 if (err)
2894 goto out;
1da177e4 2895#endif
cbeb321a 2896
408eccce 2897 ptp_classifier_init();
c1f19b51 2898
b3e19d92
NP
2899out:
2900 return err;
2901
2902out_mount:
2903 unregister_filesystem(&sock_fs_type);
2904out_fs:
2905 goto out;
1da177e4
LT
2906}
2907
77d76ea3
AK
2908core_initcall(sock_init); /* early initcall */
2909
1da177e4
LT
2910#ifdef CONFIG_PROC_FS
2911void socket_seq_show(struct seq_file *seq)
2912{
648845ab
TZ
2913 seq_printf(seq, "sockets: used %d\n",
2914 sock_inuse_get(seq->private));
1da177e4 2915}
89bddce5 2916#endif /* CONFIG_PROC_FS */
1da177e4 2917
89bbfc95 2918#ifdef CONFIG_COMPAT
6b96018b 2919static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2920 unsigned int cmd, void __user *up)
7a229387 2921{
7a229387
AB
2922 mm_segment_t old_fs = get_fs();
2923 struct timeval ktv;
2924 int err;
2925
2926 set_fs(KERNEL_DS);
63ff03ab 2927 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2928 set_fs(old_fs);
644595f8 2929 if (!err)
ed6fe9d6 2930 err = compat_put_timeval(&ktv, up);
644595f8 2931
7a229387
AB
2932 return err;
2933}
2934
6b96018b 2935static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2936 unsigned int cmd, void __user *up)
7a229387 2937{
7a229387
AB
2938 mm_segment_t old_fs = get_fs();
2939 struct timespec kts;
2940 int err;
2941
2942 set_fs(KERNEL_DS);
63ff03ab 2943 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2944 set_fs(old_fs);
644595f8 2945 if (!err)
ed6fe9d6 2946 err = compat_put_timespec(&kts, up);
644595f8 2947
7a229387
AB
2948 return err;
2949}
2950
36fd633e 2951static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2952{
6b96018b 2953 struct compat_ifconf ifc32;
7a229387 2954 struct ifconf ifc;
7a229387
AB
2955 int err;
2956
6b96018b 2957 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2958 return -EFAULT;
2959
36fd633e
AV
2960 ifc.ifc_len = ifc32.ifc_len;
2961 ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
7a229387 2962
36fd633e
AV
2963 rtnl_lock();
2964 err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
2965 rtnl_unlock();
7a229387
AB
2966 if (err)
2967 return err;
2968
36fd633e 2969 ifc32.ifc_len = ifc.ifc_len;
6b96018b 2970 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2971 return -EFAULT;
2972
2973 return 0;
2974}
2975
6b96018b 2976static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2977{
3a7da39d
BH
2978 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2979 bool convert_in = false, convert_out = false;
44c02a2c
AV
2980 size_t buf_size = 0;
2981 struct ethtool_rxnfc __user *rxnfc = NULL;
2982 struct ifreq ifr;
3a7da39d
BH
2983 u32 rule_cnt = 0, actual_rule_cnt;
2984 u32 ethcmd;
7a229387 2985 u32 data;
3a7da39d 2986 int ret;
7a229387 2987
3a7da39d
BH
2988 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2989 return -EFAULT;
7a229387 2990
3a7da39d
BH
2991 compat_rxnfc = compat_ptr(data);
2992
2993 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2994 return -EFAULT;
2995
3a7da39d
BH
2996 /* Most ethtool structures are defined without padding.
2997 * Unfortunately struct ethtool_rxnfc is an exception.
2998 */
2999 switch (ethcmd) {
3000 default:
3001 break;
3002 case ETHTOOL_GRXCLSRLALL:
3003 /* Buffer size is variable */
3004 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
3005 return -EFAULT;
3006 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
3007 return -ENOMEM;
3008 buf_size += rule_cnt * sizeof(u32);
3009 /* fall through */
3010 case ETHTOOL_GRXRINGS:
3011 case ETHTOOL_GRXCLSRLCNT:
3012 case ETHTOOL_GRXCLSRULE:
55664f32 3013 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
3014 convert_out = true;
3015 /* fall through */
3016 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
3017 buf_size += sizeof(struct ethtool_rxnfc);
3018 convert_in = true;
44c02a2c 3019 rxnfc = compat_alloc_user_space(buf_size);
3a7da39d
BH
3020 break;
3021 }
3022
44c02a2c 3023 if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
3024 return -EFAULT;
3025
44c02a2c 3026 ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
7a229387 3027
3a7da39d 3028 if (convert_in) {
127fe533 3029 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
3030 * fs.ring_cookie and at the end of fs, but nowhere else.
3031 */
127fe533
AD
3032 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
3033 sizeof(compat_rxnfc->fs.m_ext) !=
3034 offsetof(struct ethtool_rxnfc, fs.m_ext) +
3035 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
3036 BUILD_BUG_ON(
3037 offsetof(struct compat_ethtool_rxnfc, fs.location) -
3038 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
3039 offsetof(struct ethtool_rxnfc, fs.location) -
3040 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
3041
3042 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
3043 (void __user *)(&rxnfc->fs.m_ext + 1) -
3044 (void __user *)rxnfc) ||
3a7da39d
BH
3045 copy_in_user(&rxnfc->fs.ring_cookie,
3046 &compat_rxnfc->fs.ring_cookie,
954b1244 3047 (void __user *)(&rxnfc->fs.location + 1) -
b6168562
WW
3048 (void __user *)&rxnfc->fs.ring_cookie))
3049 return -EFAULT;
3050 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3051 if (put_user(rule_cnt, &rxnfc->rule_cnt))
3052 return -EFAULT;
3053 } else if (copy_in_user(&rxnfc->rule_cnt,
3054 &compat_rxnfc->rule_cnt,
3055 sizeof(rxnfc->rule_cnt)))
3a7da39d
BH
3056 return -EFAULT;
3057 }
3058
44c02a2c 3059 ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
3a7da39d
BH
3060 if (ret)
3061 return ret;
3062
3063 if (convert_out) {
3064 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
3065 (const void __user *)(&rxnfc->fs.m_ext + 1) -
3066 (const void __user *)rxnfc) ||
3a7da39d
BH
3067 copy_in_user(&compat_rxnfc->fs.ring_cookie,
3068 &rxnfc->fs.ring_cookie,
954b1244
SH
3069 (const void __user *)(&rxnfc->fs.location + 1) -
3070 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
3071 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
3072 sizeof(rxnfc->rule_cnt)))
3073 return -EFAULT;
3074
3075 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3076 /* As an optimisation, we only copy the actual
3077 * number of rules that the underlying
3078 * function returned. Since Mallory might
3079 * change the rule count in user memory, we
3080 * check that it is less than the rule count
3081 * originally given (as the user buffer size),
3082 * which has been range-checked.
3083 */
3084 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
3085 return -EFAULT;
3086 if (actual_rule_cnt < rule_cnt)
3087 rule_cnt = actual_rule_cnt;
3088 if (copy_in_user(&compat_rxnfc->rule_locs[0],
3089 &rxnfc->rule_locs[0],
3090 rule_cnt * sizeof(u32)))
3091 return -EFAULT;
3092 }
3093 }
3094
3095 return 0;
7a229387
AB
3096}
3097
7a50a240
AB
3098static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3099{
7a50a240 3100 compat_uptr_t uptr32;
44c02a2c
AV
3101 struct ifreq ifr;
3102 void __user *saved;
3103 int err;
7a50a240 3104
44c02a2c 3105 if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
7a50a240
AB
3106 return -EFAULT;
3107
3108 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3109 return -EFAULT;
3110
44c02a2c
AV
3111 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3112 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 3113
44c02a2c
AV
3114 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
3115 if (!err) {
3116 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
3117 if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
3118 err = -EFAULT;
ccbd6a5a 3119 }
44c02a2c 3120 return err;
7a229387
AB
3121}
3122
590d4693
BH
3123/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3124static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3125 struct compat_ifreq __user *u_ifreq32)
7a229387 3126{
44c02a2c 3127 struct ifreq ifreq;
7a229387
AB
3128 u32 data32;
3129
44c02a2c 3130 if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
7a229387 3131 return -EFAULT;
44c02a2c 3132 if (get_user(data32, &u_ifreq32->ifr_data))
7a229387 3133 return -EFAULT;
44c02a2c 3134 ifreq.ifr_data = compat_ptr(data32);
7a229387 3135
44c02a2c 3136 return dev_ioctl(net, cmd, &ifreq, NULL);
7a229387
AB
3137}
3138
37ac39bd
JB
3139static int compat_ifreq_ioctl(struct net *net, struct socket *sock,
3140 unsigned int cmd,
3141 struct compat_ifreq __user *uifr32)
3142{
3143 struct ifreq __user *uifr;
3144 int err;
3145
3146 /* Handle the fact that while struct ifreq has the same *layout* on
3147 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3148 * which are handled elsewhere, it still has different *size* due to
3149 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3150 * resulting in struct ifreq being 32 and 40 bytes respectively).
3151 * As a result, if the struct happens to be at the end of a page and
3152 * the next page isn't readable/writable, we get a fault. To prevent
3153 * that, copy back and forth to the full size.
3154 */
3155
3156 uifr = compat_alloc_user_space(sizeof(*uifr));
3157 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3158 return -EFAULT;
3159
3160 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3161
3162 if (!err) {
3163 switch (cmd) {
3164 case SIOCGIFFLAGS:
3165 case SIOCGIFMETRIC:
3166 case SIOCGIFMTU:
3167 case SIOCGIFMEM:
3168 case SIOCGIFHWADDR:
3169 case SIOCGIFINDEX:
3170 case SIOCGIFADDR:
3171 case SIOCGIFBRDADDR:
3172 case SIOCGIFDSTADDR:
3173 case SIOCGIFNETMASK:
3174 case SIOCGIFPFLAGS:
3175 case SIOCGIFTXQLEN:
3176 case SIOCGMIIPHY:
3177 case SIOCGMIIREG:
c6c9fee3 3178 case SIOCGIFNAME:
37ac39bd
JB
3179 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
3180 err = -EFAULT;
3181 break;
3182 }
3183 }
3184 return err;
3185}
3186
a2116ed2
AB
3187static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3188 struct compat_ifreq __user *uifr32)
3189{
3190 struct ifreq ifr;
3191 struct compat_ifmap __user *uifmap32;
a2116ed2
AB
3192 int err;
3193
3194 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3195 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3196 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3197 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3198 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3199 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3200 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3201 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3202 if (err)
3203 return -EFAULT;
3204
44c02a2c 3205 err = dev_ioctl(net, cmd, &ifr, NULL);
a2116ed2
AB
3206
3207 if (cmd == SIOCGIFMAP && !err) {
3208 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3209 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3210 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3211 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3212 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3213 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3214 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3215 if (err)
3216 err = -EFAULT;
3217 }
3218 return err;
3219}
3220
7a229387 3221struct rtentry32 {
c6d409cf 3222 u32 rt_pad1;
7a229387
AB
3223 struct sockaddr rt_dst; /* target address */
3224 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3225 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3226 unsigned short rt_flags;
3227 short rt_pad2;
3228 u32 rt_pad3;
3229 unsigned char rt_tos;
3230 unsigned char rt_class;
3231 short rt_pad4;
3232 short rt_metric; /* +1 for binary compatibility! */
7a229387 3233 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3234 u32 rt_mtu; /* per route MTU/Window */
3235 u32 rt_window; /* Window clamping */
7a229387
AB
3236 unsigned short rt_irtt; /* Initial RTT */
3237};
3238
3239struct in6_rtmsg32 {
3240 struct in6_addr rtmsg_dst;
3241 struct in6_addr rtmsg_src;
3242 struct in6_addr rtmsg_gateway;
3243 u32 rtmsg_type;
3244 u16 rtmsg_dst_len;
3245 u16 rtmsg_src_len;
3246 u32 rtmsg_metric;
3247 u32 rtmsg_info;
3248 u32 rtmsg_flags;
3249 s32 rtmsg_ifindex;
3250};
3251
6b96018b
AB
3252static int routing_ioctl(struct net *net, struct socket *sock,
3253 unsigned int cmd, void __user *argp)
7a229387
AB
3254{
3255 int ret;
3256 void *r = NULL;
3257 struct in6_rtmsg r6;
3258 struct rtentry r4;
3259 char devname[16];
3260 u32 rtdev;
3261 mm_segment_t old_fs = get_fs();
3262
6b96018b
AB
3263 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3264 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3265 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3266 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3267 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3268 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3269 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3270 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3271 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3272 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3273 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3274
3275 r = (void *) &r6;
3276 } else { /* ipv4 */
6b96018b 3277 struct rtentry32 __user *ur4 = argp;
c6d409cf 3278 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3279 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3280 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3281 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3282 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3283 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3284 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3285 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3286 if (rtdev) {
c6d409cf 3287 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3288 r4.rt_dev = (char __user __force *)devname;
3289 devname[15] = 0;
7a229387
AB
3290 } else
3291 r4.rt_dev = NULL;
3292
3293 r = (void *) &r4;
3294 }
3295
3296 if (ret) {
3297 ret = -EFAULT;
3298 goto out;
3299 }
3300
c6d409cf 3301 set_fs(KERNEL_DS);
63ff03ab 3302 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3303 set_fs(old_fs);
7a229387
AB
3304
3305out:
7a229387
AB
3306 return ret;
3307}
3308
3309/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3310 * for some operations; this forces use of the newer bridge-utils that
25985edc 3311 * use compatible ioctls
7a229387 3312 */
6b96018b 3313static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3314{
6b96018b 3315 compat_ulong_t tmp;
7a229387 3316
6b96018b 3317 if (get_user(tmp, argp))
7a229387
AB
3318 return -EFAULT;
3319 if (tmp == BRCTL_GET_VERSION)
3320 return BRCTL_VERSION + 1;
3321 return -EINVAL;
3322}
3323
6b96018b
AB
3324static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3325 unsigned int cmd, unsigned long arg)
3326{
3327 void __user *argp = compat_ptr(arg);
3328 struct sock *sk = sock->sk;
3329 struct net *net = sock_net(sk);
7a229387 3330
6b96018b 3331 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3332 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3333
3334 switch (cmd) {
3335 case SIOCSIFBR:
3336 case SIOCGIFBR:
3337 return old_bridge_ioctl(argp);
6b96018b 3338 case SIOCGIFCONF:
36fd633e 3339 return compat_dev_ifconf(net, argp);
6b96018b
AB
3340 case SIOCETHTOOL:
3341 return ethtool_ioctl(net, argp);
7a50a240
AB
3342 case SIOCWANDEV:
3343 return compat_siocwandev(net, argp);
a2116ed2
AB
3344 case SIOCGIFMAP:
3345 case SIOCSIFMAP:
3346 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3347 case SIOCADDRT:
3348 case SIOCDELRT:
3349 return routing_ioctl(net, sock, cmd, argp);
3350 case SIOCGSTAMP:
3351 return do_siocgstamp(net, sock, cmd, argp);
3352 case SIOCGSTAMPNS:
3353 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3354 case SIOCBONDSLAVEINFOQUERY:
3355 case SIOCBONDINFOQUERY:
a2116ed2 3356 case SIOCSHWTSTAMP:
fd468c74 3357 case SIOCGHWTSTAMP:
590d4693 3358 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3359
3360 case FIOSETOWN:
3361 case SIOCSPGRP:
3362 case FIOGETOWN:
3363 case SIOCGPGRP:
3364 case SIOCBRADDBR:
3365 case SIOCBRDELBR:
3366 case SIOCGIFVLAN:
3367 case SIOCSIFVLAN:
3368 case SIOCADDDLCI:
3369 case SIOCDELDLCI:
c62cce2c 3370 case SIOCGSKNS:
6b96018b
AB
3371 return sock_ioctl(file, cmd, arg);
3372
3373 case SIOCGIFFLAGS:
3374 case SIOCSIFFLAGS:
3375 case SIOCGIFMETRIC:
3376 case SIOCSIFMETRIC:
3377 case SIOCGIFMTU:
3378 case SIOCSIFMTU:
3379 case SIOCGIFMEM:
3380 case SIOCSIFMEM:
3381 case SIOCGIFHWADDR:
3382 case SIOCSIFHWADDR:
3383 case SIOCADDMULTI:
3384 case SIOCDELMULTI:
3385 case SIOCGIFINDEX:
6b96018b
AB
3386 case SIOCGIFADDR:
3387 case SIOCSIFADDR:
3388 case SIOCSIFHWBROADCAST:
6b96018b 3389 case SIOCDIFADDR:
6b96018b
AB
3390 case SIOCGIFBRDADDR:
3391 case SIOCSIFBRDADDR:
3392 case SIOCGIFDSTADDR:
3393 case SIOCSIFDSTADDR:
3394 case SIOCGIFNETMASK:
3395 case SIOCSIFNETMASK:
3396 case SIOCSIFPFLAGS:
3397 case SIOCGIFPFLAGS:
3398 case SIOCGIFTXQLEN:
3399 case SIOCSIFTXQLEN:
3400 case SIOCBRADDIF:
3401 case SIOCBRDELIF:
c6c9fee3 3402 case SIOCGIFNAME:
9177efd3
AB
3403 case SIOCSIFNAME:
3404 case SIOCGMIIPHY:
3405 case SIOCGMIIREG:
3406 case SIOCSMIIREG:
f92d4fc9
AV
3407 case SIOCBONDENSLAVE:
3408 case SIOCBONDRELEASE:
3409 case SIOCBONDSETHWADDR:
3410 case SIOCBONDCHANGEACTIVE:
37ac39bd
JB
3411 return compat_ifreq_ioctl(net, sock, cmd, argp);
3412
6b96018b
AB
3413 case SIOCSARP:
3414 case SIOCGARP:
3415 case SIOCDARP:
6b96018b 3416 case SIOCATMARK:
63ff03ab 3417 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3418 }
3419
6b96018b
AB
3420 return -ENOIOCTLCMD;
3421}
7a229387 3422
95c96174 3423static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3424 unsigned long arg)
89bbfc95
SP
3425{
3426 struct socket *sock = file->private_data;
3427 int ret = -ENOIOCTLCMD;
87de87d5
DM
3428 struct sock *sk;
3429 struct net *net;
3430
3431 sk = sock->sk;
3432 net = sock_net(sk);
89bbfc95
SP
3433
3434 if (sock->ops->compat_ioctl)
3435 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3436
87de87d5
DM
3437 if (ret == -ENOIOCTLCMD &&
3438 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3439 ret = compat_wext_handle_ioctl(net, cmd, arg);
3440
6b96018b
AB
3441 if (ret == -ENOIOCTLCMD)
3442 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3443
89bbfc95
SP
3444 return ret;
3445}
3446#endif
3447
8a3c245c
PT
3448/**
3449 * kernel_bind - bind an address to a socket (kernel space)
3450 * @sock: socket
3451 * @addr: address
3452 * @addrlen: length of address
3453 *
3454 * Returns 0 or an error.
3455 */
3456
ac5a488e
SS
3457int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3458{
3459 return sock->ops->bind(sock, addr, addrlen);
3460}
c6d409cf 3461EXPORT_SYMBOL(kernel_bind);
ac5a488e 3462
8a3c245c
PT
3463/**
3464 * kernel_listen - move socket to listening state (kernel space)
3465 * @sock: socket
3466 * @backlog: pending connections queue size
3467 *
3468 * Returns 0 or an error.
3469 */
3470
ac5a488e
SS
3471int kernel_listen(struct socket *sock, int backlog)
3472{
3473 return sock->ops->listen(sock, backlog);
3474}
c6d409cf 3475EXPORT_SYMBOL(kernel_listen);
ac5a488e 3476
8a3c245c
PT
3477/**
3478 * kernel_accept - accept a connection (kernel space)
3479 * @sock: listening socket
3480 * @newsock: new connected socket
3481 * @flags: flags
3482 *
3483 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3484 * If it fails, @newsock is guaranteed to be %NULL.
3485 * Returns 0 or an error.
3486 */
3487
ac5a488e
SS
3488int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3489{
3490 struct sock *sk = sock->sk;
3491 int err;
3492
3493 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3494 newsock);
3495 if (err < 0)
3496 goto done;
3497
cdfbabfb 3498 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3499 if (err < 0) {
3500 sock_release(*newsock);
fa8705b0 3501 *newsock = NULL;
ac5a488e
SS
3502 goto done;
3503 }
3504
3505 (*newsock)->ops = sock->ops;
1b08534e 3506 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3507
3508done:
3509 return err;
3510}
c6d409cf 3511EXPORT_SYMBOL(kernel_accept);
ac5a488e 3512
8a3c245c
PT
3513/**
3514 * kernel_connect - connect a socket (kernel space)
3515 * @sock: socket
3516 * @addr: address
3517 * @addrlen: address length
3518 * @flags: flags (O_NONBLOCK, ...)
3519 *
3520 * For datagram sockets, @addr is the addres to which datagrams are sent
3521 * by default, and the only address from which datagrams are received.
3522 * For stream sockets, attempts to connect to @addr.
3523 * Returns 0 or an error code.
3524 */
3525
ac5a488e 3526int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3527 int flags)
ac5a488e
SS
3528{
3529 return sock->ops->connect(sock, addr, addrlen, flags);
3530}
c6d409cf 3531EXPORT_SYMBOL(kernel_connect);
ac5a488e 3532
8a3c245c
PT
3533/**
3534 * kernel_getsockname - get the address which the socket is bound (kernel space)
3535 * @sock: socket
3536 * @addr: address holder
3537 *
3538 * Fills the @addr pointer with the address which the socket is bound.
3539 * Returns 0 or an error code.
3540 */
3541
9b2c45d4 3542int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3543{
9b2c45d4 3544 return sock->ops->getname(sock, addr, 0);
ac5a488e 3545}
c6d409cf 3546EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3547
8a3c245c
PT
3548/**
3549 * kernel_peername - get the address which the socket is connected (kernel space)
3550 * @sock: socket
3551 * @addr: address holder
3552 *
3553 * Fills the @addr pointer with the address which the socket is connected.
3554 * Returns 0 or an error code.
3555 */
3556
9b2c45d4 3557int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3558{
9b2c45d4 3559 return sock->ops->getname(sock, addr, 1);
ac5a488e 3560}
c6d409cf 3561EXPORT_SYMBOL(kernel_getpeername);
ac5a488e 3562
8a3c245c
PT
3563/**
3564 * kernel_getsockopt - get a socket option (kernel space)
3565 * @sock: socket
3566 * @level: API level (SOL_SOCKET, ...)
3567 * @optname: option tag
3568 * @optval: option value
3569 * @optlen: option length
3570 *
3571 * Assigns the option length to @optlen.
3572 * Returns 0 or an error.
3573 */
3574
ac5a488e
SS
3575int kernel_getsockopt(struct socket *sock, int level, int optname,
3576 char *optval, int *optlen)
3577{
3578 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3579 char __user *uoptval;
3580 int __user *uoptlen;
ac5a488e
SS
3581 int err;
3582
fb8621bb
NK
3583 uoptval = (char __user __force *) optval;
3584 uoptlen = (int __user __force *) optlen;
3585
ac5a488e
SS
3586 set_fs(KERNEL_DS);
3587 if (level == SOL_SOCKET)
fb8621bb 3588 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3589 else
fb8621bb
NK
3590 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3591 uoptlen);
ac5a488e
SS
3592 set_fs(oldfs);
3593 return err;
3594}
c6d409cf 3595EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e 3596
8a3c245c
PT
3597/**
3598 * kernel_setsockopt - set a socket option (kernel space)
3599 * @sock: socket
3600 * @level: API level (SOL_SOCKET, ...)
3601 * @optname: option tag
3602 * @optval: option value
3603 * @optlen: option length
3604 *
3605 * Returns 0 or an error.
3606 */
3607
ac5a488e 3608int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3609 char *optval, unsigned int optlen)
ac5a488e
SS
3610{
3611 mm_segment_t oldfs = get_fs();
fb8621bb 3612 char __user *uoptval;
ac5a488e
SS
3613 int err;
3614
fb8621bb
NK
3615 uoptval = (char __user __force *) optval;
3616
ac5a488e
SS
3617 set_fs(KERNEL_DS);
3618 if (level == SOL_SOCKET)
fb8621bb 3619 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3620 else
fb8621bb 3621 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3622 optlen);
3623 set_fs(oldfs);
3624 return err;
3625}
c6d409cf 3626EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e 3627
8a3c245c
PT
3628/**
3629 * kernel_sendpage - send a &page through a socket (kernel space)
3630 * @sock: socket
3631 * @page: page
3632 * @offset: page offset
3633 * @size: total size in bytes
3634 * @flags: flags (MSG_DONTWAIT, ...)
3635 *
3636 * Returns the total amount sent in bytes or an error.
3637 */
3638
ac5a488e
SS
3639int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3640 size_t size, int flags)
3641{
3642 if (sock->ops->sendpage)
3643 return sock->ops->sendpage(sock, page, offset, size, flags);
3644
3645 return sock_no_sendpage(sock, page, offset, size, flags);
3646}
c6d409cf 3647EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3648
8a3c245c
PT
3649/**
3650 * kernel_sendpage_locked - send a &page through the locked sock (kernel space)
3651 * @sk: sock
3652 * @page: page
3653 * @offset: page offset
3654 * @size: total size in bytes
3655 * @flags: flags (MSG_DONTWAIT, ...)
3656 *
3657 * Returns the total amount sent in bytes or an error.
3658 * Caller must hold @sk.
3659 */
3660
306b13eb
TH
3661int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3662 size_t size, int flags)
3663{
3664 struct socket *sock = sk->sk_socket;
3665
3666 if (sock->ops->sendpage_locked)
3667 return sock->ops->sendpage_locked(sk, page, offset, size,
3668 flags);
3669
3670 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3671}
3672EXPORT_SYMBOL(kernel_sendpage_locked);
3673
8a3c245c
PT
3674/**
3675 * kernel_shutdown - shut down part of a full-duplex connection (kernel space)
3676 * @sock: socket
3677 * @how: connection part
3678 *
3679 * Returns 0 or an error.
3680 */
3681
91cf45f0
TM
3682int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3683{
3684 return sock->ops->shutdown(sock, how);
3685}
91cf45f0 3686EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075 3687
8a3c245c
PT
3688/**
3689 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3690 * @sk: socket
3691 *
3692 * This routine returns the IP overhead imposed by a socket i.e.
3693 * the length of the underlying IP header, depending on whether
3694 * this is an IPv4 or IPv6 socket and the length from IP options turned
3695 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075 3696 */
8a3c245c 3697
113c3075
P
3698u32 kernel_sock_ip_overhead(struct sock *sk)
3699{
3700 struct inet_sock *inet;
3701 struct ip_options_rcu *opt;
3702 u32 overhead = 0;
113c3075
P
3703#if IS_ENABLED(CONFIG_IPV6)
3704 struct ipv6_pinfo *np;
3705 struct ipv6_txoptions *optv6 = NULL;
3706#endif /* IS_ENABLED(CONFIG_IPV6) */
3707
3708 if (!sk)
3709 return overhead;
3710
113c3075
P
3711 switch (sk->sk_family) {
3712 case AF_INET:
3713 inet = inet_sk(sk);
3714 overhead += sizeof(struct iphdr);
3715 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3716 sock_owned_by_user(sk));
113c3075
P
3717 if (opt)
3718 overhead += opt->opt.optlen;
3719 return overhead;
3720#if IS_ENABLED(CONFIG_IPV6)
3721 case AF_INET6:
3722 np = inet6_sk(sk);
3723 overhead += sizeof(struct ipv6hdr);
3724 if (np)
3725 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3726 sock_owned_by_user(sk));
113c3075
P
3727 if (optv6)
3728 overhead += (optv6->opt_flen + optv6->opt_nflen);
3729 return overhead;
3730#endif /* IS_ENABLED(CONFIG_IPV6) */
3731 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3732 return overhead;
3733 }
3734}
3735EXPORT_SYMBOL(kernel_sock_ip_overhead);