Merge branch 'pie-next'
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
c8e8cd57 92#include <linux/nospec.h>
1da177e4 93
7c0f6ba6 94#include <linux/uaccess.h>
1da177e4
LT
95#include <asm/unistd.h>
96
97#include <net/compat.h>
87de87d5 98#include <net/wext.h>
f8451725 99#include <net/cls_cgroup.h>
1da177e4
LT
100
101#include <net/sock.h>
102#include <linux/netfilter.h>
103
6b96018b
AB
104#include <linux/if_tun.h>
105#include <linux/ipv6_route.h>
106#include <linux/route.h>
6b96018b 107#include <linux/sockios.h>
076bb0c8 108#include <net/busy_poll.h>
f24b9be5 109#include <linux/errqueue.h>
06021292 110
e0d1095a 111#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
112unsigned int sysctl_net_busy_read __read_mostly;
113unsigned int sysctl_net_busy_poll __read_mostly;
06021292 114#endif
6b96018b 115
8ae5e030
AV
116static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
117static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 118static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
119
120static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
121static __poll_t sock_poll(struct file *file,
122 struct poll_table_struct *wait);
89bddce5 123static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
124#ifdef CONFIG_COMPAT
125static long compat_sock_ioctl(struct file *file,
89bddce5 126 unsigned int cmd, unsigned long arg);
89bbfc95 127#endif
1da177e4 128static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
129static ssize_t sock_sendpage(struct file *file, struct page *page,
130 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 132 struct pipe_inode_info *pipe, size_t len,
9c55e01c 133 unsigned int flags);
1da177e4 134
1da177e4
LT
135/*
136 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
137 * in the operation structures but are done directly via the socketcall() multiplexor.
138 */
139
da7071d7 140static const struct file_operations socket_file_ops = {
1da177e4
LT
141 .owner = THIS_MODULE,
142 .llseek = no_llseek,
8ae5e030
AV
143 .read_iter = sock_read_iter,
144 .write_iter = sock_write_iter,
1da177e4
LT
145 .poll = sock_poll,
146 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
147#ifdef CONFIG_COMPAT
148 .compat_ioctl = compat_sock_ioctl,
149#endif
1da177e4 150 .mmap = sock_mmap,
1da177e4
LT
151 .release = sock_close,
152 .fasync = sock_fasync,
5274f052
JA
153 .sendpage = sock_sendpage,
154 .splice_write = generic_splice_sendpage,
9c55e01c 155 .splice_read = sock_splice_read,
1da177e4
LT
156};
157
158/*
159 * The protocol list. Each protocol is registered in here.
160 */
161
1da177e4 162static DEFINE_SPINLOCK(net_family_lock);
190683a9 163static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 164
1da177e4 165/*
89bddce5
SH
166 * Support routines.
167 * Move socket addresses back and forth across the kernel/user
168 * divide and look after the messy bits.
1da177e4
LT
169 */
170
1da177e4
LT
171/**
172 * move_addr_to_kernel - copy a socket address into kernel space
173 * @uaddr: Address in user space
174 * @kaddr: Address in kernel space
175 * @ulen: Length in user space
176 *
177 * The address is copied into kernel space. If the provided address is
178 * too long an error code of -EINVAL is returned. If the copy gives
179 * invalid addresses -EFAULT is returned. On a success 0 is returned.
180 */
181
43db362d 182int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 183{
230b1839 184 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 185 return -EINVAL;
89bddce5 186 if (ulen == 0)
1da177e4 187 return 0;
89bddce5 188 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 189 return -EFAULT;
3ec3b2fb 190 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
191}
192
193/**
194 * move_addr_to_user - copy an address to user space
195 * @kaddr: kernel space address
196 * @klen: length of address in kernel
197 * @uaddr: user space address
198 * @ulen: pointer to user length field
199 *
200 * The value pointed to by ulen on entry is the buffer length available.
201 * This is overwritten with the buffer space used. -EINVAL is returned
202 * if an overlong buffer is specified or a negative buffer size. -EFAULT
203 * is returned if either the buffer or the length field are not
204 * accessible.
205 * After copying the data up to the limit the user specifies, the true
206 * length of the data is written over the length limit the user
207 * specified. Zero is returned for a success.
208 */
89bddce5 209
43db362d 210static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 211 void __user *uaddr, int __user *ulen)
1da177e4
LT
212{
213 int err;
214 int len;
215
68c6beb3 216 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
217 err = get_user(len, ulen);
218 if (err)
1da177e4 219 return err;
89bddce5
SH
220 if (len > klen)
221 len = klen;
68c6beb3 222 if (len < 0)
1da177e4 223 return -EINVAL;
89bddce5 224 if (len) {
d6fe3945
SG
225 if (audit_sockaddr(klen, kaddr))
226 return -ENOMEM;
89bddce5 227 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
228 return -EFAULT;
229 }
230 /*
89bddce5
SH
231 * "fromlen shall refer to the value before truncation.."
232 * 1003.1g
1da177e4
LT
233 */
234 return __put_user(klen, ulen);
235}
236
08009a76 237static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
238
239static struct inode *sock_alloc_inode(struct super_block *sb)
240{
241 struct socket_alloc *ei;
eaefd110 242 struct socket_wq *wq;
89bddce5 243
e94b1766 244 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
245 if (!ei)
246 return NULL;
eaefd110
ED
247 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
248 if (!wq) {
43815482
ED
249 kmem_cache_free(sock_inode_cachep, ei);
250 return NULL;
251 }
eaefd110
ED
252 init_waitqueue_head(&wq->wait);
253 wq->fasync_list = NULL;
574aab1e 254 wq->flags = 0;
e6476c21 255 ei->socket.wq = wq;
89bddce5 256
1da177e4
LT
257 ei->socket.state = SS_UNCONNECTED;
258 ei->socket.flags = 0;
259 ei->socket.ops = NULL;
260 ei->socket.sk = NULL;
261 ei->socket.file = NULL;
1da177e4
LT
262
263 return &ei->vfs_inode;
264}
265
266static void sock_destroy_inode(struct inode *inode)
267{
43815482
ED
268 struct socket_alloc *ei;
269
270 ei = container_of(inode, struct socket_alloc, vfs_inode);
e6476c21 271 kfree_rcu(ei->socket.wq, rcu);
43815482 272 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
273}
274
51cc5068 275static void init_once(void *foo)
1da177e4 276{
89bddce5 277 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 278
a35afb83 279 inode_init_once(&ei->vfs_inode);
1da177e4 280}
89bddce5 281
1e911632 282static void init_inodecache(void)
1da177e4
LT
283{
284 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
285 sizeof(struct socket_alloc),
286 0,
287 (SLAB_HWCACHE_ALIGN |
288 SLAB_RECLAIM_ACCOUNT |
5d097056 289 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 290 init_once);
1e911632 291 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
292}
293
b87221de 294static const struct super_operations sockfs_ops = {
c6d409cf
ED
295 .alloc_inode = sock_alloc_inode,
296 .destroy_inode = sock_destroy_inode,
297 .statfs = simple_statfs,
1da177e4
LT
298};
299
c23fbb6b
ED
300/*
301 * sockfs_dname() is called from d_path().
302 */
303static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
304{
305 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 306 d_inode(dentry)->i_ino);
c23fbb6b
ED
307}
308
3ba13d17 309static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 310 .d_dname = sockfs_dname,
1da177e4
LT
311};
312
bba0bd31
AG
313static int sockfs_xattr_get(const struct xattr_handler *handler,
314 struct dentry *dentry, struct inode *inode,
315 const char *suffix, void *value, size_t size)
316{
317 if (value) {
318 if (dentry->d_name.len + 1 > size)
319 return -ERANGE;
320 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
321 }
322 return dentry->d_name.len + 1;
323}
324
325#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
326#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
327#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
328
329static const struct xattr_handler sockfs_xattr_handler = {
330 .name = XATTR_NAME_SOCKPROTONAME,
331 .get = sockfs_xattr_get,
332};
333
4a590153
AG
334static int sockfs_security_xattr_set(const struct xattr_handler *handler,
335 struct dentry *dentry, struct inode *inode,
336 const char *suffix, const void *value,
337 size_t size, int flags)
338{
339 /* Handled by LSM. */
340 return -EAGAIN;
341}
342
343static const struct xattr_handler sockfs_security_xattr_handler = {
344 .prefix = XATTR_SECURITY_PREFIX,
345 .set = sockfs_security_xattr_set,
346};
347
bba0bd31
AG
348static const struct xattr_handler *sockfs_xattr_handlers[] = {
349 &sockfs_xattr_handler,
4a590153 350 &sockfs_security_xattr_handler,
bba0bd31
AG
351 NULL
352};
353
c74a1cbb
AV
354static struct dentry *sockfs_mount(struct file_system_type *fs_type,
355 int flags, const char *dev_name, void *data)
356{
bba0bd31
AG
357 return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
358 sockfs_xattr_handlers,
359 &sockfs_dentry_operations, SOCKFS_MAGIC);
c74a1cbb
AV
360}
361
362static struct vfsmount *sock_mnt __read_mostly;
363
364static struct file_system_type sock_fs_type = {
365 .name = "sockfs",
366 .mount = sockfs_mount,
367 .kill_sb = kill_anon_super,
368};
369
1da177e4
LT
370/*
371 * Obtains the first available file descriptor and sets it up for use.
372 *
39d8c1b6
DM
373 * These functions create file structures and maps them to fd space
374 * of the current process. On success it returns file descriptor
1da177e4
LT
375 * and file struct implicitly stored in sock->file.
376 * Note that another thread may close file descriptor before we return
377 * from this function. We use the fact that now we do not refer
378 * to socket after mapping. If one day we will need it, this
379 * function will increment ref. count on file by 1.
380 *
381 * In any case returned fd MAY BE not valid!
382 * This race condition is unavoidable
383 * with shared fd spaces, we cannot solve it inside kernel,
384 * but we take care of internal coherence yet.
385 */
386
aab174f0 387struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 388{
7cbe66b6 389 struct file *file;
1da177e4 390
d93aa9d8
AV
391 if (!dname)
392 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 393
d93aa9d8
AV
394 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
395 O_RDWR | (flags & O_NONBLOCK),
396 &socket_file_ops);
b5ffe634 397 if (IS_ERR(file)) {
8e1611e2 398 sock_release(sock);
39b65252 399 return file;
cc3808f8
AV
400 }
401
402 sock->file = file;
39d8c1b6 403 file->private_data = sock;
28407630 404 return file;
39d8c1b6 405}
56b31d1c 406EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 407
56b31d1c 408static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
409{
410 struct file *newfile;
28407630 411 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
412 if (unlikely(fd < 0)) {
413 sock_release(sock);
28407630 414 return fd;
ce4bb04c 415 }
39d8c1b6 416
aab174f0 417 newfile = sock_alloc_file(sock, flags, NULL);
28407630 418 if (likely(!IS_ERR(newfile))) {
39d8c1b6 419 fd_install(fd, newfile);
28407630
AV
420 return fd;
421 }
7cbe66b6 422
28407630
AV
423 put_unused_fd(fd);
424 return PTR_ERR(newfile);
1da177e4
LT
425}
426
406a3c63 427struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 428{
6cb153ca
BL
429 if (file->f_op == &socket_file_ops)
430 return file->private_data; /* set in sock_map_fd */
431
23bb80d2
ED
432 *err = -ENOTSOCK;
433 return NULL;
6cb153ca 434}
406a3c63 435EXPORT_SYMBOL(sock_from_file);
6cb153ca 436
1da177e4 437/**
c6d409cf 438 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
439 * @fd: file handle
440 * @err: pointer to an error code return
441 *
442 * The file handle passed in is locked and the socket it is bound
241c4667 443 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
444 * with a negative errno code and NULL is returned. The function checks
445 * for both invalid handles and passing a handle which is not a socket.
446 *
447 * On a success the socket object pointer is returned.
448 */
449
450struct socket *sockfd_lookup(int fd, int *err)
451{
452 struct file *file;
1da177e4
LT
453 struct socket *sock;
454
89bddce5
SH
455 file = fget(fd);
456 if (!file) {
1da177e4
LT
457 *err = -EBADF;
458 return NULL;
459 }
89bddce5 460
6cb153ca
BL
461 sock = sock_from_file(file, err);
462 if (!sock)
1da177e4 463 fput(file);
6cb153ca
BL
464 return sock;
465}
c6d409cf 466EXPORT_SYMBOL(sockfd_lookup);
1da177e4 467
6cb153ca
BL
468static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
469{
00e188ef 470 struct fd f = fdget(fd);
6cb153ca
BL
471 struct socket *sock;
472
3672558c 473 *err = -EBADF;
00e188ef
AV
474 if (f.file) {
475 sock = sock_from_file(f.file, err);
476 if (likely(sock)) {
477 *fput_needed = f.flags;
6cb153ca 478 return sock;
00e188ef
AV
479 }
480 fdput(f);
1da177e4 481 }
6cb153ca 482 return NULL;
1da177e4
LT
483}
484
600e1779
MY
485static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
486 size_t size)
487{
488 ssize_t len;
489 ssize_t used = 0;
490
c5ef6035 491 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
492 if (len < 0)
493 return len;
494 used += len;
495 if (buffer) {
496 if (size < used)
497 return -ERANGE;
498 buffer += len;
499 }
500
501 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
502 used += len;
503 if (buffer) {
504 if (size < used)
505 return -ERANGE;
506 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
507 buffer += len;
508 }
509
510 return used;
511}
512
dc647ec8 513static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
86741ec2
LC
514{
515 int err = simple_setattr(dentry, iattr);
516
e1a3a60a 517 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
518 struct socket *sock = SOCKET_I(d_inode(dentry));
519
6d8c50dc
CW
520 if (sock->sk)
521 sock->sk->sk_uid = iattr->ia_uid;
522 else
523 err = -ENOENT;
86741ec2
LC
524 }
525
526 return err;
527}
528
600e1779 529static const struct inode_operations sockfs_inode_ops = {
600e1779 530 .listxattr = sockfs_listxattr,
86741ec2 531 .setattr = sockfs_setattr,
600e1779
MY
532};
533
1da177e4
LT
534/**
535 * sock_alloc - allocate a socket
89bddce5 536 *
1da177e4
LT
537 * Allocate a new inode and socket object. The two are bound together
538 * and initialised. The socket is then returned. If we are out of inodes
539 * NULL is returned.
540 */
541
f4a00aac 542struct socket *sock_alloc(void)
1da177e4 543{
89bddce5
SH
544 struct inode *inode;
545 struct socket *sock;
1da177e4 546
a209dfc7 547 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
548 if (!inode)
549 return NULL;
550
551 sock = SOCKET_I(inode);
552
85fe4025 553 inode->i_ino = get_next_ino();
89bddce5 554 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
555 inode->i_uid = current_fsuid();
556 inode->i_gid = current_fsgid();
600e1779 557 inode->i_op = &sockfs_inode_ops;
1da177e4 558
1da177e4
LT
559 return sock;
560}
f4a00aac 561EXPORT_SYMBOL(sock_alloc);
1da177e4 562
1da177e4
LT
563/**
564 * sock_release - close a socket
565 * @sock: socket to close
566 *
567 * The socket is released from the protocol stack if it has a release
568 * callback, and the inode is then released if the socket is bound to
89bddce5 569 * an inode not a file.
1da177e4 570 */
89bddce5 571
6d8c50dc 572static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
573{
574 if (sock->ops) {
575 struct module *owner = sock->ops->owner;
576
6d8c50dc
CW
577 if (inode)
578 inode_lock(inode);
1da177e4 579 sock->ops->release(sock);
6d8c50dc
CW
580 if (inode)
581 inode_unlock(inode);
1da177e4
LT
582 sock->ops = NULL;
583 module_put(owner);
584 }
585
e6476c21 586 if (sock->wq->fasync_list)
3410f22e 587 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 588
1da177e4
LT
589 if (!sock->file) {
590 iput(SOCK_INODE(sock));
591 return;
592 }
89bddce5 593 sock->file = NULL;
1da177e4 594}
6d8c50dc
CW
595
596void sock_release(struct socket *sock)
597{
598 __sock_release(sock, NULL);
599}
c6d409cf 600EXPORT_SYMBOL(sock_release);
1da177e4 601
c14ac945 602void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 603{
140c55d4
ED
604 u8 flags = *tx_flags;
605
c14ac945 606 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
607 flags |= SKBTX_HW_TSTAMP;
608
c14ac945 609 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
610 flags |= SKBTX_SW_TSTAMP;
611
c14ac945 612 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
613 flags |= SKBTX_SCHED_TSTAMP;
614
140c55d4 615 *tx_flags = flags;
20d49473 616}
67cc0d40 617EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 618
d8725c86 619static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 620{
01e97e65 621 int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
d8725c86
AV
622 BUG_ON(ret == -EIOCBQUEUED);
623 return ret;
1da177e4
LT
624}
625
d8725c86 626int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 627{
d8725c86 628 int err = security_socket_sendmsg(sock, msg,
01e97e65 629 msg_data_left(msg));
228e548e 630
d8725c86 631 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 632}
c6d409cf 633EXPORT_SYMBOL(sock_sendmsg);
1da177e4
LT
634
635int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
636 struct kvec *vec, size_t num, size_t size)
637{
aa563d7b 638 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
d8725c86 639 return sock_sendmsg(sock, msg);
1da177e4 640}
c6d409cf 641EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 642
306b13eb
TH
643int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
644 struct kvec *vec, size_t num, size_t size)
645{
646 struct socket *sock = sk->sk_socket;
647
648 if (!sock->ops->sendmsg_locked)
db5980d8 649 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 650
aa563d7b 651 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
306b13eb
TH
652
653 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
654}
655EXPORT_SYMBOL(kernel_sendmsg_locked);
656
8605330a
SHY
657static bool skb_is_err_queue(const struct sk_buff *skb)
658{
659 /* pkt_type of skbs enqueued on the error queue are set to
660 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
661 * in recvmsg, since skbs received on a local socket will never
662 * have a pkt_type of PACKET_OUTGOING.
663 */
664 return skb->pkt_type == PACKET_OUTGOING;
665}
666
b50a5c70
ML
667/* On transmit, software and hardware timestamps are returned independently.
668 * As the two skb clones share the hardware timestamp, which may be updated
669 * before the software timestamp is received, a hardware TX timestamp may be
670 * returned only if there is no software TX timestamp. Ignore false software
671 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 672 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
673 * hardware timestamp.
674 */
675static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
676{
677 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
678}
679
aad9c8c4
ML
680static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
681{
682 struct scm_ts_pktinfo ts_pktinfo;
683 struct net_device *orig_dev;
684
685 if (!skb_mac_header_was_set(skb))
686 return;
687
688 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
689
690 rcu_read_lock();
691 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
692 if (orig_dev)
693 ts_pktinfo.if_index = orig_dev->ifindex;
694 rcu_read_unlock();
695
696 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
697 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
698 sizeof(ts_pktinfo), &ts_pktinfo);
699}
700
92f37fd2
ED
701/*
702 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
703 */
704void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
705 struct sk_buff *skb)
706{
20d49473 707 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 708 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e
DD
709 struct scm_timestamping_internal tss;
710
b50a5c70 711 int empty = 1, false_tstamp = 0;
20d49473
PO
712 struct skb_shared_hwtstamps *shhwtstamps =
713 skb_hwtstamps(skb);
714
715 /* Race occurred between timestamp enabling and packet
716 receiving. Fill in the current time for now. */
b50a5c70 717 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 718 __net_timestamp(skb);
b50a5c70
ML
719 false_tstamp = 1;
720 }
20d49473
PO
721
722 if (need_software_tstamp) {
723 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
724 if (new_tstamp) {
725 struct __kernel_sock_timeval tv;
726
727 skb_get_new_timestamp(skb, &tv);
728 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
729 sizeof(tv), &tv);
730 } else {
731 struct __kernel_old_timeval tv;
732
733 skb_get_timestamp(skb, &tv);
734 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
735 sizeof(tv), &tv);
736 }
20d49473 737 } else {
887feae3
DD
738 if (new_tstamp) {
739 struct __kernel_timespec ts;
740
741 skb_get_new_timestampns(skb, &ts);
742 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
743 sizeof(ts), &ts);
744 } else {
745 struct timespec ts;
746
747 skb_get_timestampns(skb, &ts);
748 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
749 sizeof(ts), &ts);
750 }
20d49473
PO
751 }
752 }
753
f24b9be5 754 memset(&tss, 0, sizeof(tss));
c199105d 755 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 756 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 757 empty = 0;
4d276eb6 758 if (shhwtstamps &&
b9f40e21 759 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
b50a5c70 760 !skb_is_swtx_tstamp(skb, false_tstamp) &&
9718475e 761 ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
4d276eb6 762 empty = 0;
aad9c8c4
ML
763 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
764 !skb_is_err_queue(skb))
765 put_ts_pktinfo(msg, skb);
766 }
1c885808 767 if (!empty) {
9718475e
DD
768 if (sock_flag(sk, SOCK_TSTAMP_NEW))
769 put_cmsg_scm_timestamping64(msg, &tss);
770 else
771 put_cmsg_scm_timestamping(msg, &tss);
1c885808 772
8605330a 773 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 774 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
775 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
776 skb->len, skb->data);
777 }
92f37fd2 778}
7c81fd8b
ACM
779EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
780
6e3e939f
JB
781void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
782 struct sk_buff *skb)
783{
784 int ack;
785
786 if (!sock_flag(sk, SOCK_WIFI_STATUS))
787 return;
788 if (!skb->wifi_acked_valid)
789 return;
790
791 ack = skb->wifi_acked;
792
793 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
794}
795EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
796
11165f14 797static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
798 struct sk_buff *skb)
3b885787 799{
744d5a3e 800 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 801 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 802 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
803}
804
767dd033 805void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
806 struct sk_buff *skb)
807{
808 sock_recv_timestamp(msg, sk, skb);
809 sock_recv_drops(msg, sk, skb);
810}
767dd033 811EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 812
1b784140 813static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 814 int flags)
1da177e4 815{
2da62906 816 return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags);
1da177e4
LT
817}
818
2da62906 819int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 820{
2da62906 821 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 822
2da62906 823 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 824}
c6d409cf 825EXPORT_SYMBOL(sock_recvmsg);
1da177e4 826
c1249c0a
ML
827/**
828 * kernel_recvmsg - Receive a message from a socket (kernel space)
829 * @sock: The socket to receive the message from
830 * @msg: Received message
831 * @vec: Input s/g array for message data
832 * @num: Size of input s/g array
833 * @size: Number of bytes to read
834 * @flags: Message flags (MSG_DONTWAIT, etc...)
835 *
836 * On return the msg structure contains the scatter/gather array passed in the
837 * vec argument. The array is modified so that it consists of the unfilled
838 * portion of the original array.
839 *
840 * The returned value is the total number of bytes received, or an error.
841 */
89bddce5
SH
842int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
843 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
844{
845 mm_segment_t oldfs = get_fs();
846 int result;
847
aa563d7b 848 iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
1da177e4 849 set_fs(KERNEL_DS);
2da62906 850 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
851 set_fs(oldfs);
852 return result;
853}
c6d409cf 854EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 855
ce1d4d3e
CH
856static ssize_t sock_sendpage(struct file *file, struct page *page,
857 int offset, size_t size, loff_t *ppos, int more)
1da177e4 858{
1da177e4
LT
859 struct socket *sock;
860 int flags;
861
ce1d4d3e
CH
862 sock = file->private_data;
863
35f9c09f
ED
864 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
865 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
866 flags |= more;
ce1d4d3e 867
e6949583 868 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 869}
1da177e4 870
9c55e01c 871static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 872 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
873 unsigned int flags)
874{
875 struct socket *sock = file->private_data;
876
997b37da 877 if (unlikely(!sock->ops->splice_read))
95506588 878 return generic_file_splice_read(file, ppos, pipe, len, flags);
997b37da 879
9c55e01c
JA
880 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
881}
882
8ae5e030 883static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 884{
6d652330
AV
885 struct file *file = iocb->ki_filp;
886 struct socket *sock = file->private_data;
0345f931 887 struct msghdr msg = {.msg_iter = *to,
888 .msg_iocb = iocb};
8ae5e030 889 ssize_t res;
ce1d4d3e 890
8ae5e030
AV
891 if (file->f_flags & O_NONBLOCK)
892 msg.msg_flags = MSG_DONTWAIT;
893
894 if (iocb->ki_pos != 0)
1da177e4 895 return -ESPIPE;
027445c3 896
66ee59af 897 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
898 return 0;
899
2da62906 900 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
901 *to = msg.msg_iter;
902 return res;
1da177e4
LT
903}
904
8ae5e030 905static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 906{
6d652330
AV
907 struct file *file = iocb->ki_filp;
908 struct socket *sock = file->private_data;
0345f931 909 struct msghdr msg = {.msg_iter = *from,
910 .msg_iocb = iocb};
8ae5e030 911 ssize_t res;
1da177e4 912
8ae5e030 913 if (iocb->ki_pos != 0)
ce1d4d3e 914 return -ESPIPE;
027445c3 915
8ae5e030
AV
916 if (file->f_flags & O_NONBLOCK)
917 msg.msg_flags = MSG_DONTWAIT;
918
6d652330
AV
919 if (sock->type == SOCK_SEQPACKET)
920 msg.msg_flags |= MSG_EOR;
921
d8725c86 922 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
923 *from = msg.msg_iter;
924 return res;
1da177e4
LT
925}
926
1da177e4
LT
927/*
928 * Atomic setting of ioctl hooks to avoid race
929 * with module unload.
930 */
931
4a3e2f71 932static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 933static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 934
881d966b 935void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 936{
4a3e2f71 937 mutex_lock(&br_ioctl_mutex);
1da177e4 938 br_ioctl_hook = hook;
4a3e2f71 939 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
940}
941EXPORT_SYMBOL(brioctl_set);
942
4a3e2f71 943static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 944static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 945
881d966b 946void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 947{
4a3e2f71 948 mutex_lock(&vlan_ioctl_mutex);
1da177e4 949 vlan_ioctl_hook = hook;
4a3e2f71 950 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
951}
952EXPORT_SYMBOL(vlan_ioctl_set);
953
4a3e2f71 954static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 955static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 956
89bddce5 957void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 958{
4a3e2f71 959 mutex_lock(&dlci_ioctl_mutex);
1da177e4 960 dlci_ioctl_hook = hook;
4a3e2f71 961 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
962}
963EXPORT_SYMBOL(dlci_ioctl_set);
964
6b96018b 965static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 966 unsigned int cmd, unsigned long arg)
6b96018b
AB
967{
968 int err;
969 void __user *argp = (void __user *)arg;
970
971 err = sock->ops->ioctl(sock, cmd, arg);
972
973 /*
974 * If this ioctl is unknown try to hand it down
975 * to the NIC driver.
976 */
36fd633e
AV
977 if (err != -ENOIOCTLCMD)
978 return err;
6b96018b 979
36fd633e
AV
980 if (cmd == SIOCGIFCONF) {
981 struct ifconf ifc;
982 if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
983 return -EFAULT;
984 rtnl_lock();
985 err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
986 rtnl_unlock();
987 if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
988 err = -EFAULT;
44c02a2c
AV
989 } else {
990 struct ifreq ifr;
991 bool need_copyout;
63ff03ab 992 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
44c02a2c
AV
993 return -EFAULT;
994 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
995 if (!err && need_copyout)
63ff03ab 996 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
44c02a2c 997 return -EFAULT;
36fd633e 998 }
6b96018b
AB
999 return err;
1000}
1001
1da177e4
LT
1002/*
1003 * With an ioctl, arg may well be a user mode pointer, but we don't know
1004 * what to do with it - that's up to the protocol still.
1005 */
1006
d8d211a2 1007struct ns_common *get_net_ns(struct ns_common *ns)
c62cce2c
AV
1008{
1009 return &get_net(container_of(ns, struct net, ns))->ns;
1010}
d8d211a2 1011EXPORT_SYMBOL_GPL(get_net_ns);
c62cce2c 1012
1da177e4
LT
1013static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1014{
1015 struct socket *sock;
881d966b 1016 struct sock *sk;
1da177e4
LT
1017 void __user *argp = (void __user *)arg;
1018 int pid, err;
881d966b 1019 struct net *net;
1da177e4 1020
b69aee04 1021 sock = file->private_data;
881d966b 1022 sk = sock->sk;
3b1e0a65 1023 net = sock_net(sk);
44c02a2c
AV
1024 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1025 struct ifreq ifr;
1026 bool need_copyout;
1027 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
1028 return -EFAULT;
1029 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1030 if (!err && need_copyout)
1031 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
1032 return -EFAULT;
1da177e4 1033 } else
3d23e349 1034#ifdef CONFIG_WEXT_CORE
1da177e4 1035 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1036 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1037 } else
3d23e349 1038#endif
89bddce5 1039 switch (cmd) {
1da177e4
LT
1040 case FIOSETOWN:
1041 case SIOCSPGRP:
1042 err = -EFAULT;
1043 if (get_user(pid, (int __user *)argp))
1044 break;
393cc3f5 1045 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1046 break;
1047 case FIOGETOWN:
1048 case SIOCGPGRP:
609d7fa9 1049 err = put_user(f_getown(sock->file),
89bddce5 1050 (int __user *)argp);
1da177e4
LT
1051 break;
1052 case SIOCGIFBR:
1053 case SIOCSIFBR:
1054 case SIOCBRADDBR:
1055 case SIOCBRDELBR:
1056 err = -ENOPKG;
1057 if (!br_ioctl_hook)
1058 request_module("bridge");
1059
4a3e2f71 1060 mutex_lock(&br_ioctl_mutex);
89bddce5 1061 if (br_ioctl_hook)
881d966b 1062 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1063 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1064 break;
1065 case SIOCGIFVLAN:
1066 case SIOCSIFVLAN:
1067 err = -ENOPKG;
1068 if (!vlan_ioctl_hook)
1069 request_module("8021q");
1070
4a3e2f71 1071 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1072 if (vlan_ioctl_hook)
881d966b 1073 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1074 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1075 break;
1da177e4
LT
1076 case SIOCADDDLCI:
1077 case SIOCDELDLCI:
1078 err = -ENOPKG;
1079 if (!dlci_ioctl_hook)
1080 request_module("dlci");
1081
7512cbf6
PE
1082 mutex_lock(&dlci_ioctl_mutex);
1083 if (dlci_ioctl_hook)
1da177e4 1084 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1085 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 1086 break;
c62cce2c
AV
1087 case SIOCGSKNS:
1088 err = -EPERM;
1089 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1090 break;
1091
1092 err = open_related_ns(&net->ns, get_net_ns);
1093 break;
1da177e4 1094 default:
63ff03ab 1095 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1096 break;
89bddce5 1097 }
1da177e4
LT
1098 return err;
1099}
1100
1101int sock_create_lite(int family, int type, int protocol, struct socket **res)
1102{
1103 int err;
1104 struct socket *sock = NULL;
89bddce5 1105
1da177e4
LT
1106 err = security_socket_create(family, type, protocol, 1);
1107 if (err)
1108 goto out;
1109
1110 sock = sock_alloc();
1111 if (!sock) {
1112 err = -ENOMEM;
1113 goto out;
1114 }
1115
1da177e4 1116 sock->type = type;
7420ed23
VY
1117 err = security_socket_post_create(sock, family, type, protocol, 1);
1118 if (err)
1119 goto out_release;
1120
1da177e4
LT
1121out:
1122 *res = sock;
1123 return err;
7420ed23
VY
1124out_release:
1125 sock_release(sock);
1126 sock = NULL;
1127 goto out;
1da177e4 1128}
c6d409cf 1129EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1130
1131/* No kernel lock held - perfect */
ade994f4 1132static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1133{
3cafb376 1134 struct socket *sock = file->private_data;
a331de3b 1135 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1136
e88958e6
CH
1137 if (!sock->ops->poll)
1138 return 0;
f641f13b 1139
a331de3b
CH
1140 if (sk_can_busy_loop(sock->sk)) {
1141 /* poll once if requested by the syscall */
1142 if (events & POLL_BUSY_LOOP)
1143 sk_busy_loop(sock->sk, 1);
1144
1145 /* if this socket can poll_ll, tell the system call */
1146 flag = POLL_BUSY_LOOP;
1147 }
1148
1149 return sock->ops->poll(file, sock, wait) | flag;
1da177e4
LT
1150}
1151
89bddce5 1152static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1153{
b69aee04 1154 struct socket *sock = file->private_data;
1da177e4
LT
1155
1156 return sock->ops->mmap(file, sock, vma);
1157}
1158
20380731 1159static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1160{
6d8c50dc 1161 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1162 return 0;
1163}
1164
1165/*
1166 * Update the socket async list
1167 *
1168 * Fasync_list locking strategy.
1169 *
1170 * 1. fasync_list is modified only under process context socket lock
1171 * i.e. under semaphore.
1172 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1173 * or under socket lock
1da177e4
LT
1174 */
1175
1176static int sock_fasync(int fd, struct file *filp, int on)
1177{
989a2979
ED
1178 struct socket *sock = filp->private_data;
1179 struct sock *sk = sock->sk;
eaefd110 1180 struct socket_wq *wq;
1da177e4 1181
989a2979 1182 if (sk == NULL)
1da177e4 1183 return -EINVAL;
1da177e4
LT
1184
1185 lock_sock(sk);
e6476c21 1186 wq = sock->wq;
eaefd110 1187 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1188
eaefd110 1189 if (!wq->fasync_list)
989a2979
ED
1190 sock_reset_flag(sk, SOCK_FASYNC);
1191 else
bcdce719 1192 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1193
989a2979 1194 release_sock(sk);
1da177e4
LT
1195 return 0;
1196}
1197
ceb5d58b 1198/* This function may be called only under rcu_lock */
1da177e4 1199
ceb5d58b 1200int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1201{
ceb5d58b 1202 if (!wq || !wq->fasync_list)
1da177e4 1203 return -1;
ceb5d58b 1204
89bddce5 1205 switch (how) {
8d8ad9d7 1206 case SOCK_WAKE_WAITD:
ceb5d58b 1207 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1208 break;
1209 goto call_kill;
8d8ad9d7 1210 case SOCK_WAKE_SPACE:
ceb5d58b 1211 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1212 break;
1213 /* fall through */
8d8ad9d7 1214 case SOCK_WAKE_IO:
89bddce5 1215call_kill:
43815482 1216 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1217 break;
8d8ad9d7 1218 case SOCK_WAKE_URG:
43815482 1219 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1220 }
ceb5d58b 1221
1da177e4
LT
1222 return 0;
1223}
c6d409cf 1224EXPORT_SYMBOL(sock_wake_async);
1da177e4 1225
721db93a 1226int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1227 struct socket **res, int kern)
1da177e4
LT
1228{
1229 int err;
1230 struct socket *sock;
55737fda 1231 const struct net_proto_family *pf;
1da177e4
LT
1232
1233 /*
89bddce5 1234 * Check protocol is in range
1da177e4
LT
1235 */
1236 if (family < 0 || family >= NPROTO)
1237 return -EAFNOSUPPORT;
1238 if (type < 0 || type >= SOCK_MAX)
1239 return -EINVAL;
1240
1241 /* Compatibility.
1242
1243 This uglymoron is moved from INET layer to here to avoid
1244 deadlock in module load.
1245 */
1246 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1247 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1248 current->comm);
1da177e4
LT
1249 family = PF_PACKET;
1250 }
1251
1252 err = security_socket_create(family, type, protocol, kern);
1253 if (err)
1254 return err;
89bddce5 1255
55737fda
SH
1256 /*
1257 * Allocate the socket and allow the family to set things up. if
1258 * the protocol is 0, the family is instructed to select an appropriate
1259 * default.
1260 */
1261 sock = sock_alloc();
1262 if (!sock) {
e87cc472 1263 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1264 return -ENFILE; /* Not exactly a match, but its the
1265 closest posix thing */
1266 }
1267
1268 sock->type = type;
1269
95a5afca 1270#ifdef CONFIG_MODULES
89bddce5
SH
1271 /* Attempt to load a protocol module if the find failed.
1272 *
1273 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1274 * requested real, full-featured networking support upon configuration.
1275 * Otherwise module support will break!
1276 */
190683a9 1277 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1278 request_module("net-pf-%d", family);
1da177e4
LT
1279#endif
1280
55737fda
SH
1281 rcu_read_lock();
1282 pf = rcu_dereference(net_families[family]);
1283 err = -EAFNOSUPPORT;
1284 if (!pf)
1285 goto out_release;
1da177e4
LT
1286
1287 /*
1288 * We will call the ->create function, that possibly is in a loadable
1289 * module, so we have to bump that loadable module refcnt first.
1290 */
55737fda 1291 if (!try_module_get(pf->owner))
1da177e4
LT
1292 goto out_release;
1293
55737fda
SH
1294 /* Now protected by module ref count */
1295 rcu_read_unlock();
1296
3f378b68 1297 err = pf->create(net, sock, protocol, kern);
55737fda 1298 if (err < 0)
1da177e4 1299 goto out_module_put;
a79af59e 1300
1da177e4
LT
1301 /*
1302 * Now to bump the refcnt of the [loadable] module that owns this
1303 * socket at sock_release time we decrement its refcnt.
1304 */
55737fda
SH
1305 if (!try_module_get(sock->ops->owner))
1306 goto out_module_busy;
1307
1da177e4
LT
1308 /*
1309 * Now that we're done with the ->create function, the [loadable]
1310 * module can have its refcnt decremented
1311 */
55737fda 1312 module_put(pf->owner);
7420ed23
VY
1313 err = security_socket_post_create(sock, family, type, protocol, kern);
1314 if (err)
3b185525 1315 goto out_sock_release;
55737fda 1316 *res = sock;
1da177e4 1317
55737fda
SH
1318 return 0;
1319
1320out_module_busy:
1321 err = -EAFNOSUPPORT;
1da177e4 1322out_module_put:
55737fda
SH
1323 sock->ops = NULL;
1324 module_put(pf->owner);
1325out_sock_release:
1da177e4 1326 sock_release(sock);
55737fda
SH
1327 return err;
1328
1329out_release:
1330 rcu_read_unlock();
1331 goto out_sock_release;
1da177e4 1332}
721db93a 1333EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1334
1335int sock_create(int family, int type, int protocol, struct socket **res)
1336{
1b8d7ae4 1337 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1338}
c6d409cf 1339EXPORT_SYMBOL(sock_create);
1da177e4 1340
eeb1bd5c 1341int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1342{
eeb1bd5c 1343 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1344}
c6d409cf 1345EXPORT_SYMBOL(sock_create_kern);
1da177e4 1346
9d6a15c3 1347int __sys_socket(int family, int type, int protocol)
1da177e4
LT
1348{
1349 int retval;
1350 struct socket *sock;
a677a039
UD
1351 int flags;
1352
e38b36f3
UD
1353 /* Check the SOCK_* constants for consistency. */
1354 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1355 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1356 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1357 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1358
a677a039 1359 flags = type & ~SOCK_TYPE_MASK;
77d27200 1360 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1361 return -EINVAL;
1362 type &= SOCK_TYPE_MASK;
1da177e4 1363
aaca0bdc
UD
1364 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1365 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1366
1da177e4
LT
1367 retval = sock_create(family, type, protocol, &sock);
1368 if (retval < 0)
8e1611e2 1369 return retval;
1da177e4 1370
8e1611e2 1371 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1372}
1373
9d6a15c3
DB
1374SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1375{
1376 return __sys_socket(family, type, protocol);
1377}
1378
1da177e4
LT
1379/*
1380 * Create a pair of connected sockets.
1381 */
1382
6debc8d8 1383int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1384{
1385 struct socket *sock1, *sock2;
1386 int fd1, fd2, err;
db349509 1387 struct file *newfile1, *newfile2;
a677a039
UD
1388 int flags;
1389
1390 flags = type & ~SOCK_TYPE_MASK;
77d27200 1391 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1392 return -EINVAL;
1393 type &= SOCK_TYPE_MASK;
1da177e4 1394
aaca0bdc
UD
1395 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1396 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1397
016a266b
AV
1398 /*
1399 * reserve descriptors and make sure we won't fail
1400 * to return them to userland.
1401 */
1402 fd1 = get_unused_fd_flags(flags);
1403 if (unlikely(fd1 < 0))
1404 return fd1;
1405
1406 fd2 = get_unused_fd_flags(flags);
1407 if (unlikely(fd2 < 0)) {
1408 put_unused_fd(fd1);
1409 return fd2;
1410 }
1411
1412 err = put_user(fd1, &usockvec[0]);
1413 if (err)
1414 goto out;
1415
1416 err = put_user(fd2, &usockvec[1]);
1417 if (err)
1418 goto out;
1419
1da177e4
LT
1420 /*
1421 * Obtain the first socket and check if the underlying protocol
1422 * supports the socketpair call.
1423 */
1424
1425 err = sock_create(family, type, protocol, &sock1);
016a266b 1426 if (unlikely(err < 0))
1da177e4
LT
1427 goto out;
1428
1429 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1430 if (unlikely(err < 0)) {
1431 sock_release(sock1);
1432 goto out;
bf3c23d1 1433 }
d73aa286 1434
d47cd945
DH
1435 err = security_socket_socketpair(sock1, sock2);
1436 if (unlikely(err)) {
1437 sock_release(sock2);
1438 sock_release(sock1);
1439 goto out;
1440 }
1441
016a266b
AV
1442 err = sock1->ops->socketpair(sock1, sock2);
1443 if (unlikely(err < 0)) {
1444 sock_release(sock2);
1445 sock_release(sock1);
1446 goto out;
28407630
AV
1447 }
1448
aab174f0 1449 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1450 if (IS_ERR(newfile1)) {
28407630 1451 err = PTR_ERR(newfile1);
016a266b
AV
1452 sock_release(sock2);
1453 goto out;
28407630
AV
1454 }
1455
aab174f0 1456 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1457 if (IS_ERR(newfile2)) {
1458 err = PTR_ERR(newfile2);
016a266b
AV
1459 fput(newfile1);
1460 goto out;
db349509
AV
1461 }
1462
157cf649 1463 audit_fd_pair(fd1, fd2);
d73aa286 1464
db349509
AV
1465 fd_install(fd1, newfile1);
1466 fd_install(fd2, newfile2);
d73aa286 1467 return 0;
1da177e4 1468
016a266b 1469out:
d73aa286 1470 put_unused_fd(fd2);
d73aa286 1471 put_unused_fd(fd1);
1da177e4
LT
1472 return err;
1473}
1474
6debc8d8
DB
1475SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1476 int __user *, usockvec)
1477{
1478 return __sys_socketpair(family, type, protocol, usockvec);
1479}
1480
1da177e4
LT
1481/*
1482 * Bind a name to a socket. Nothing much to do here since it's
1483 * the protocol's responsibility to handle the local address.
1484 *
1485 * We move the socket address to kernel space before we call
1486 * the protocol layer (having also checked the address is ok).
1487 */
1488
a87d35d8 1489int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1490{
1491 struct socket *sock;
230b1839 1492 struct sockaddr_storage address;
6cb153ca 1493 int err, fput_needed;
1da177e4 1494
89bddce5 1495 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1496 if (sock) {
43db362d 1497 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1498 if (!err) {
89bddce5 1499 err = security_socket_bind(sock,
230b1839 1500 (struct sockaddr *)&address,
89bddce5 1501 addrlen);
6cb153ca
BL
1502 if (!err)
1503 err = sock->ops->bind(sock,
89bddce5 1504 (struct sockaddr *)
230b1839 1505 &address, addrlen);
1da177e4 1506 }
6cb153ca 1507 fput_light(sock->file, fput_needed);
89bddce5 1508 }
1da177e4
LT
1509 return err;
1510}
1511
a87d35d8
DB
1512SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1513{
1514 return __sys_bind(fd, umyaddr, addrlen);
1515}
1516
1da177e4
LT
1517/*
1518 * Perform a listen. Basically, we allow the protocol to do anything
1519 * necessary for a listen, and if that works, we mark the socket as
1520 * ready for listening.
1521 */
1522
25e290ee 1523int __sys_listen(int fd, int backlog)
1da177e4
LT
1524{
1525 struct socket *sock;
6cb153ca 1526 int err, fput_needed;
b8e1f9b5 1527 int somaxconn;
89bddce5
SH
1528
1529 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1530 if (sock) {
8efa6e93 1531 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1532 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1533 backlog = somaxconn;
1da177e4
LT
1534
1535 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1536 if (!err)
1537 err = sock->ops->listen(sock, backlog);
1da177e4 1538
6cb153ca 1539 fput_light(sock->file, fput_needed);
1da177e4
LT
1540 }
1541 return err;
1542}
1543
25e290ee
DB
1544SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1545{
1546 return __sys_listen(fd, backlog);
1547}
1548
1da177e4
LT
1549/*
1550 * For accept, we attempt to create a new socket, set up the link
1551 * with the client, wake up the client, then return the new
1552 * connected fd. We collect the address of the connector in kernel
1553 * space and move it to user at the very end. This is unclean because
1554 * we open the socket then return an error.
1555 *
1556 * 1003.1g adds the ability to recvmsg() to query connection pending
1557 * status to recvmsg. We need to add that support in a way thats
b903036a 1558 * clean when we restructure accept also.
1da177e4
LT
1559 */
1560
4541e805
DB
1561int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1562 int __user *upeer_addrlen, int flags)
1da177e4
LT
1563{
1564 struct socket *sock, *newsock;
39d8c1b6 1565 struct file *newfile;
6cb153ca 1566 int err, len, newfd, fput_needed;
230b1839 1567 struct sockaddr_storage address;
1da177e4 1568
77d27200 1569 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1570 return -EINVAL;
1571
1572 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1573 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1574
6cb153ca 1575 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1576 if (!sock)
1577 goto out;
1578
1579 err = -ENFILE;
c6d409cf
ED
1580 newsock = sock_alloc();
1581 if (!newsock)
1da177e4
LT
1582 goto out_put;
1583
1584 newsock->type = sock->type;
1585 newsock->ops = sock->ops;
1586
1da177e4
LT
1587 /*
1588 * We don't need try_module_get here, as the listening socket (sock)
1589 * has the protocol module (sock->ops->owner) held.
1590 */
1591 __module_get(newsock->ops->owner);
1592
28407630 1593 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1594 if (unlikely(newfd < 0)) {
1595 err = newfd;
9a1875e6
DM
1596 sock_release(newsock);
1597 goto out_put;
39d8c1b6 1598 }
aab174f0 1599 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1600 if (IS_ERR(newfile)) {
28407630
AV
1601 err = PTR_ERR(newfile);
1602 put_unused_fd(newfd);
28407630
AV
1603 goto out_put;
1604 }
39d8c1b6 1605
a79af59e
FF
1606 err = security_socket_accept(sock, newsock);
1607 if (err)
39d8c1b6 1608 goto out_fd;
a79af59e 1609
cdfbabfb 1610 err = sock->ops->accept(sock, newsock, sock->file->f_flags, false);
1da177e4 1611 if (err < 0)
39d8c1b6 1612 goto out_fd;
1da177e4
LT
1613
1614 if (upeer_sockaddr) {
9b2c45d4
DV
1615 len = newsock->ops->getname(newsock,
1616 (struct sockaddr *)&address, 2);
1617 if (len < 0) {
1da177e4 1618 err = -ECONNABORTED;
39d8c1b6 1619 goto out_fd;
1da177e4 1620 }
43db362d 1621 err = move_addr_to_user(&address,
230b1839 1622 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1623 if (err < 0)
39d8c1b6 1624 goto out_fd;
1da177e4
LT
1625 }
1626
1627 /* File flags are not inherited via accept() unlike another OSes. */
1628
39d8c1b6
DM
1629 fd_install(newfd, newfile);
1630 err = newfd;
1da177e4 1631
1da177e4 1632out_put:
6cb153ca 1633 fput_light(sock->file, fput_needed);
1da177e4
LT
1634out:
1635 return err;
39d8c1b6 1636out_fd:
9606a216 1637 fput(newfile);
39d8c1b6 1638 put_unused_fd(newfd);
1da177e4
LT
1639 goto out_put;
1640}
1641
4541e805
DB
1642SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1643 int __user *, upeer_addrlen, int, flags)
1644{
1645 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1646}
1647
20f37034
HC
1648SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1649 int __user *, upeer_addrlen)
aaca0bdc 1650{
4541e805 1651 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1652}
1653
1da177e4
LT
1654/*
1655 * Attempt to connect to a socket with the server address. The address
1656 * is in user space so we verify it is OK and move it to kernel space.
1657 *
1658 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1659 * break bindings
1660 *
1661 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1662 * other SEQPACKET protocols that take time to connect() as it doesn't
1663 * include the -EINPROGRESS status for such sockets.
1664 */
1665
1387c2c2 1666int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1da177e4
LT
1667{
1668 struct socket *sock;
230b1839 1669 struct sockaddr_storage address;
6cb153ca 1670 int err, fput_needed;
1da177e4 1671
6cb153ca 1672 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1673 if (!sock)
1674 goto out;
43db362d 1675 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1676 if (err < 0)
1677 goto out_put;
1678
89bddce5 1679 err =
230b1839 1680 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1681 if (err)
1682 goto out_put;
1683
230b1839 1684 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1685 sock->file->f_flags);
1686out_put:
6cb153ca 1687 fput_light(sock->file, fput_needed);
1da177e4
LT
1688out:
1689 return err;
1690}
1691
1387c2c2
DB
1692SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1693 int, addrlen)
1694{
1695 return __sys_connect(fd, uservaddr, addrlen);
1696}
1697
1da177e4
LT
1698/*
1699 * Get the local address ('name') of a socket object. Move the obtained
1700 * name to user space.
1701 */
1702
8882a107
DB
1703int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1704 int __user *usockaddr_len)
1da177e4
LT
1705{
1706 struct socket *sock;
230b1839 1707 struct sockaddr_storage address;
9b2c45d4 1708 int err, fput_needed;
89bddce5 1709
6cb153ca 1710 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1711 if (!sock)
1712 goto out;
1713
1714 err = security_socket_getsockname(sock);
1715 if (err)
1716 goto out_put;
1717
9b2c45d4
DV
1718 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
1719 if (err < 0)
1da177e4 1720 goto out_put;
9b2c45d4
DV
1721 /* "err" is actually length in this case */
1722 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
1723
1724out_put:
6cb153ca 1725 fput_light(sock->file, fput_needed);
1da177e4
LT
1726out:
1727 return err;
1728}
1729
8882a107
DB
1730SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1731 int __user *, usockaddr_len)
1732{
1733 return __sys_getsockname(fd, usockaddr, usockaddr_len);
1734}
1735
1da177e4
LT
1736/*
1737 * Get the remote address ('name') of a socket object. Move the obtained
1738 * name to user space.
1739 */
1740
b21c8f83
DB
1741int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1742 int __user *usockaddr_len)
1da177e4
LT
1743{
1744 struct socket *sock;
230b1839 1745 struct sockaddr_storage address;
9b2c45d4 1746 int err, fput_needed;
1da177e4 1747
89bddce5
SH
1748 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1749 if (sock != NULL) {
1da177e4
LT
1750 err = security_socket_getpeername(sock);
1751 if (err) {
6cb153ca 1752 fput_light(sock->file, fput_needed);
1da177e4
LT
1753 return err;
1754 }
1755
9b2c45d4
DV
1756 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
1757 if (err >= 0)
1758 /* "err" is actually length in this case */
1759 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 1760 usockaddr_len);
6cb153ca 1761 fput_light(sock->file, fput_needed);
1da177e4
LT
1762 }
1763 return err;
1764}
1765
b21c8f83
DB
1766SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1767 int __user *, usockaddr_len)
1768{
1769 return __sys_getpeername(fd, usockaddr, usockaddr_len);
1770}
1771
1da177e4
LT
1772/*
1773 * Send a datagram to a given address. We move the address into kernel
1774 * space and check the user space data area is readable before invoking
1775 * the protocol.
1776 */
211b634b
DB
1777int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
1778 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
1779{
1780 struct socket *sock;
230b1839 1781 struct sockaddr_storage address;
1da177e4
LT
1782 int err;
1783 struct msghdr msg;
1784 struct iovec iov;
6cb153ca 1785 int fput_needed;
6cb153ca 1786
602bd0e9
AV
1787 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1788 if (unlikely(err))
1789 return err;
de0fa95c
PE
1790 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1791 if (!sock)
4387ff75 1792 goto out;
6cb153ca 1793
89bddce5 1794 msg.msg_name = NULL;
89bddce5
SH
1795 msg.msg_control = NULL;
1796 msg.msg_controllen = 0;
1797 msg.msg_namelen = 0;
6cb153ca 1798 if (addr) {
43db362d 1799 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1800 if (err < 0)
1801 goto out_put;
230b1839 1802 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1803 msg.msg_namelen = addr_len;
1da177e4
LT
1804 }
1805 if (sock->file->f_flags & O_NONBLOCK)
1806 flags |= MSG_DONTWAIT;
1807 msg.msg_flags = flags;
d8725c86 1808 err = sock_sendmsg(sock, &msg);
1da177e4 1809
89bddce5 1810out_put:
de0fa95c 1811 fput_light(sock->file, fput_needed);
4387ff75 1812out:
1da177e4
LT
1813 return err;
1814}
1815
211b634b
DB
1816SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1817 unsigned int, flags, struct sockaddr __user *, addr,
1818 int, addr_len)
1819{
1820 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
1821}
1822
1da177e4 1823/*
89bddce5 1824 * Send a datagram down a socket.
1da177e4
LT
1825 */
1826
3e0fa65f 1827SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1828 unsigned int, flags)
1da177e4 1829{
211b634b 1830 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
1831}
1832
1833/*
89bddce5 1834 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1835 * sender. We verify the buffers are writable and if needed move the
1836 * sender address from kernel to user space.
1837 */
7a09e1eb
DB
1838int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
1839 struct sockaddr __user *addr, int __user *addr_len)
1da177e4
LT
1840{
1841 struct socket *sock;
1842 struct iovec iov;
1843 struct msghdr msg;
230b1839 1844 struct sockaddr_storage address;
89bddce5 1845 int err, err2;
6cb153ca
BL
1846 int fput_needed;
1847
602bd0e9
AV
1848 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1849 if (unlikely(err))
1850 return err;
de0fa95c 1851 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1852 if (!sock)
de0fa95c 1853 goto out;
1da177e4 1854
89bddce5
SH
1855 msg.msg_control = NULL;
1856 msg.msg_controllen = 0;
f3d33426
HFS
1857 /* Save some cycles and don't copy the address if not needed */
1858 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1859 /* We assume all kernel code knows the size of sockaddr_storage */
1860 msg.msg_namelen = 0;
130ed5d1 1861 msg.msg_iocb = NULL;
9f138fa6 1862 msg.msg_flags = 0;
1da177e4
LT
1863 if (sock->file->f_flags & O_NONBLOCK)
1864 flags |= MSG_DONTWAIT;
2da62906 1865 err = sock_recvmsg(sock, &msg, flags);
1da177e4 1866
89bddce5 1867 if (err >= 0 && addr != NULL) {
43db362d 1868 err2 = move_addr_to_user(&address,
230b1839 1869 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1870 if (err2 < 0)
1871 err = err2;
1da177e4 1872 }
de0fa95c
PE
1873
1874 fput_light(sock->file, fput_needed);
4387ff75 1875out:
1da177e4
LT
1876 return err;
1877}
1878
7a09e1eb
DB
1879SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
1880 unsigned int, flags, struct sockaddr __user *, addr,
1881 int __user *, addr_len)
1882{
1883 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
1884}
1885
1da177e4 1886/*
89bddce5 1887 * Receive a datagram from a socket.
1da177e4
LT
1888 */
1889
b7c0ddf5
JG
1890SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1891 unsigned int, flags)
1da177e4 1892{
7a09e1eb 1893 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
1894}
1895
1896/*
1897 * Set a socket option. Because we don't know the option lengths we have
1898 * to pass the user mode parameter for the protocols to sort out.
1899 */
1900
cc36dca0
DB
1901static int __sys_setsockopt(int fd, int level, int optname,
1902 char __user *optval, int optlen)
1da177e4 1903{
6cb153ca 1904 int err, fput_needed;
1da177e4
LT
1905 struct socket *sock;
1906
1907 if (optlen < 0)
1908 return -EINVAL;
89bddce5
SH
1909
1910 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1911 if (sock != NULL) {
1912 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1913 if (err)
1914 goto out_put;
1da177e4
LT
1915
1916 if (level == SOL_SOCKET)
89bddce5
SH
1917 err =
1918 sock_setsockopt(sock, level, optname, optval,
1919 optlen);
1da177e4 1920 else
89bddce5
SH
1921 err =
1922 sock->ops->setsockopt(sock, level, optname, optval,
1923 optlen);
6cb153ca
BL
1924out_put:
1925 fput_light(sock->file, fput_needed);
1da177e4
LT
1926 }
1927 return err;
1928}
1929
cc36dca0
DB
1930SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1931 char __user *, optval, int, optlen)
1932{
1933 return __sys_setsockopt(fd, level, optname, optval, optlen);
1934}
1935
1da177e4
LT
1936/*
1937 * Get a socket option. Because we don't know the option lengths we have
1938 * to pass a user mode parameter for the protocols to sort out.
1939 */
1940
13a2d70e
DB
1941static int __sys_getsockopt(int fd, int level, int optname,
1942 char __user *optval, int __user *optlen)
1da177e4 1943{
6cb153ca 1944 int err, fput_needed;
1da177e4
LT
1945 struct socket *sock;
1946
89bddce5
SH
1947 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1948 if (sock != NULL) {
6cb153ca
BL
1949 err = security_socket_getsockopt(sock, level, optname);
1950 if (err)
1951 goto out_put;
1da177e4
LT
1952
1953 if (level == SOL_SOCKET)
89bddce5
SH
1954 err =
1955 sock_getsockopt(sock, level, optname, optval,
1956 optlen);
1da177e4 1957 else
89bddce5
SH
1958 err =
1959 sock->ops->getsockopt(sock, level, optname, optval,
1960 optlen);
6cb153ca
BL
1961out_put:
1962 fput_light(sock->file, fput_needed);
1da177e4
LT
1963 }
1964 return err;
1965}
1966
13a2d70e
DB
1967SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1968 char __user *, optval, int __user *, optlen)
1969{
1970 return __sys_getsockopt(fd, level, optname, optval, optlen);
1971}
1972
1da177e4
LT
1973/*
1974 * Shutdown a socket.
1975 */
1976
005a1aea 1977int __sys_shutdown(int fd, int how)
1da177e4 1978{
6cb153ca 1979 int err, fput_needed;
1da177e4
LT
1980 struct socket *sock;
1981
89bddce5
SH
1982 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1983 if (sock != NULL) {
1da177e4 1984 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1985 if (!err)
1986 err = sock->ops->shutdown(sock, how);
1987 fput_light(sock->file, fput_needed);
1da177e4
LT
1988 }
1989 return err;
1990}
1991
005a1aea
DB
1992SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1993{
1994 return __sys_shutdown(fd, how);
1995}
1996
89bddce5 1997/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1998 * fields which are the same type (int / unsigned) on our platforms.
1999 */
2000#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2001#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2002#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2003
c71d8ebe
TH
2004struct used_address {
2005 struct sockaddr_storage name;
2006 unsigned int name_len;
2007};
2008
da184284
AV
2009static int copy_msghdr_from_user(struct msghdr *kmsg,
2010 struct user_msghdr __user *umsg,
2011 struct sockaddr __user **save_addr,
2012 struct iovec **iov)
1661bf36 2013{
ffb07550 2014 struct user_msghdr msg;
08adb7da
AV
2015 ssize_t err;
2016
ffb07550 2017 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 2018 return -EFAULT;
dbb490b9 2019
864d9664 2020 kmsg->msg_control = (void __force *)msg.msg_control;
ffb07550
AV
2021 kmsg->msg_controllen = msg.msg_controllen;
2022 kmsg->msg_flags = msg.msg_flags;
2023
2024 kmsg->msg_namelen = msg.msg_namelen;
2025 if (!msg.msg_name)
6a2a2b3a
AS
2026 kmsg->msg_namelen = 0;
2027
dbb490b9
ML
2028 if (kmsg->msg_namelen < 0)
2029 return -EINVAL;
2030
1661bf36 2031 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2032 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2033
2034 if (save_addr)
ffb07550 2035 *save_addr = msg.msg_name;
08adb7da 2036
ffb07550 2037 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 2038 if (!save_addr) {
864d9664
PA
2039 err = move_addr_to_kernel(msg.msg_name,
2040 kmsg->msg_namelen,
08adb7da
AV
2041 kmsg->msg_name);
2042 if (err < 0)
2043 return err;
2044 }
2045 } else {
2046 kmsg->msg_name = NULL;
2047 kmsg->msg_namelen = 0;
2048 }
2049
ffb07550 2050 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2051 return -EMSGSIZE;
2052
0345f931 2053 kmsg->msg_iocb = NULL;
2054
ffb07550
AV
2055 return import_iovec(save_addr ? READ : WRITE,
2056 msg.msg_iov, msg.msg_iovlen,
da184284 2057 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
2058}
2059
666547ff 2060static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2061 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
2062 struct used_address *used_address,
2063 unsigned int allowed_msghdr_flags)
1da177e4 2064{
89bddce5
SH
2065 struct compat_msghdr __user *msg_compat =
2066 (struct compat_msghdr __user *)msg;
230b1839 2067 struct sockaddr_storage address;
1da177e4 2068 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 2069 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2070 __aligned(sizeof(__kernel_size_t));
89bddce5 2071 /* 20 is size of ipv6_pktinfo */
1da177e4 2072 unsigned char *ctl_buf = ctl;
d8725c86 2073 int ctl_len;
08adb7da 2074 ssize_t err;
89bddce5 2075
08adb7da 2076 msg_sys->msg_name = &address;
1da177e4 2077
08449320 2078 if (MSG_CMSG_COMPAT & flags)
08adb7da 2079 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 2080 else
08adb7da 2081 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 2082 if (err < 0)
da184284 2083 return err;
1da177e4
LT
2084
2085 err = -ENOBUFS;
2086
228e548e 2087 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 2088 goto out_freeiov;
28a94d8f 2089 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2090 ctl_len = msg_sys->msg_controllen;
1da177e4 2091 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2092 err =
228e548e 2093 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2094 sizeof(ctl));
1da177e4
LT
2095 if (err)
2096 goto out_freeiov;
228e548e
AB
2097 ctl_buf = msg_sys->msg_control;
2098 ctl_len = msg_sys->msg_controllen;
1da177e4 2099 } else if (ctl_len) {
ac4340fc
DM
2100 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2101 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2102 if (ctl_len > sizeof(ctl)) {
1da177e4 2103 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2104 if (ctl_buf == NULL)
1da177e4
LT
2105 goto out_freeiov;
2106 }
2107 err = -EFAULT;
2108 /*
228e548e 2109 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2110 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2111 * checking falls down on this.
2112 */
fb8621bb 2113 if (copy_from_user(ctl_buf,
228e548e 2114 (void __user __force *)msg_sys->msg_control,
89bddce5 2115 ctl_len))
1da177e4 2116 goto out_freectl;
228e548e 2117 msg_sys->msg_control = ctl_buf;
1da177e4 2118 }
228e548e 2119 msg_sys->msg_flags = flags;
1da177e4
LT
2120
2121 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2122 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2123 /*
2124 * If this is sendmmsg() and current destination address is same as
2125 * previously succeeded address, omit asking LSM's decision.
2126 * used_address->name_len is initialized to UINT_MAX so that the first
2127 * destination address never matches.
2128 */
bc909d9d
MD
2129 if (used_address && msg_sys->msg_name &&
2130 used_address->name_len == msg_sys->msg_namelen &&
2131 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2132 used_address->name_len)) {
d8725c86 2133 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2134 goto out_freectl;
2135 }
d8725c86 2136 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2137 /*
2138 * If this is sendmmsg() and sending to current destination address was
2139 * successful, remember it.
2140 */
2141 if (used_address && err >= 0) {
2142 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2143 if (msg_sys->msg_name)
2144 memcpy(&used_address->name, msg_sys->msg_name,
2145 used_address->name_len);
c71d8ebe 2146 }
1da177e4
LT
2147
2148out_freectl:
89bddce5 2149 if (ctl_buf != ctl)
1da177e4
LT
2150 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2151out_freeiov:
da184284 2152 kfree(iov);
228e548e
AB
2153 return err;
2154}
2155
2156/*
2157 * BSD sendmsg interface
2158 */
2159
e1834a32
DB
2160long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2161 bool forbid_cmsg_compat)
228e548e
AB
2162{
2163 int fput_needed, err;
2164 struct msghdr msg_sys;
1be374a0
AL
2165 struct socket *sock;
2166
e1834a32
DB
2167 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2168 return -EINVAL;
2169
1be374a0 2170 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2171 if (!sock)
2172 goto out;
2173
28a94d8f 2174 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2175
6cb153ca 2176 fput_light(sock->file, fput_needed);
89bddce5 2177out:
1da177e4
LT
2178 return err;
2179}
2180
666547ff 2181SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2182{
e1834a32 2183 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2184}
2185
228e548e
AB
2186/*
2187 * Linux sendmmsg interface
2188 */
2189
2190int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2191 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2192{
2193 int fput_needed, err, datagrams;
2194 struct socket *sock;
2195 struct mmsghdr __user *entry;
2196 struct compat_mmsghdr __user *compat_entry;
2197 struct msghdr msg_sys;
c71d8ebe 2198 struct used_address used_address;
f092276d 2199 unsigned int oflags = flags;
228e548e 2200
e1834a32
DB
2201 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2202 return -EINVAL;
2203
98382f41
AB
2204 if (vlen > UIO_MAXIOV)
2205 vlen = UIO_MAXIOV;
228e548e
AB
2206
2207 datagrams = 0;
2208
2209 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2210 if (!sock)
2211 return err;
2212
c71d8ebe 2213 used_address.name_len = UINT_MAX;
228e548e
AB
2214 entry = mmsg;
2215 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2216 err = 0;
f092276d 2217 flags |= MSG_BATCH;
228e548e
AB
2218
2219 while (datagrams < vlen) {
f092276d
TH
2220 if (datagrams == vlen - 1)
2221 flags = oflags;
2222
228e548e 2223 if (MSG_CMSG_COMPAT & flags) {
666547ff 2224 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2225 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2226 if (err < 0)
2227 break;
2228 err = __put_user(err, &compat_entry->msg_len);
2229 ++compat_entry;
2230 } else {
a7526eb5 2231 err = ___sys_sendmsg(sock,
666547ff 2232 (struct user_msghdr __user *)entry,
28a94d8f 2233 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2234 if (err < 0)
2235 break;
2236 err = put_user(err, &entry->msg_len);
2237 ++entry;
2238 }
2239
2240 if (err)
2241 break;
2242 ++datagrams;
3023898b
SHY
2243 if (msg_data_left(&msg_sys))
2244 break;
a78cb84c 2245 cond_resched();
228e548e
AB
2246 }
2247
228e548e
AB
2248 fput_light(sock->file, fput_needed);
2249
728ffb86
AB
2250 /* We only return an error if no datagrams were able to be sent */
2251 if (datagrams != 0)
228e548e
AB
2252 return datagrams;
2253
228e548e
AB
2254 return err;
2255}
2256
2257SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2258 unsigned int, vlen, unsigned int, flags)
2259{
e1834a32 2260 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2261}
2262
666547ff 2263static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2264 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2265{
89bddce5
SH
2266 struct compat_msghdr __user *msg_compat =
2267 (struct compat_msghdr __user *)msg;
1da177e4 2268 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2269 struct iovec *iov = iovstack;
1da177e4 2270 unsigned long cmsg_ptr;
2da62906 2271 int len;
08adb7da 2272 ssize_t err;
1da177e4
LT
2273
2274 /* kernel mode address */
230b1839 2275 struct sockaddr_storage addr;
1da177e4
LT
2276
2277 /* user mode address pointers */
2278 struct sockaddr __user *uaddr;
08adb7da 2279 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2280
08adb7da 2281 msg_sys->msg_name = &addr;
1da177e4 2282
f3d33426 2283 if (MSG_CMSG_COMPAT & flags)
08adb7da 2284 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2285 else
08adb7da 2286 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2287 if (err < 0)
da184284 2288 return err;
1da177e4 2289
a2e27255
ACM
2290 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2291 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2292
f3d33426
HFS
2293 /* We assume all kernel code knows the size of sockaddr_storage */
2294 msg_sys->msg_namelen = 0;
2295
1da177e4
LT
2296 if (sock->file->f_flags & O_NONBLOCK)
2297 flags |= MSG_DONTWAIT;
2da62906 2298 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4
LT
2299 if (err < 0)
2300 goto out_freeiov;
2301 len = err;
2302
2303 if (uaddr != NULL) {
43db362d 2304 err = move_addr_to_user(&addr,
a2e27255 2305 msg_sys->msg_namelen, uaddr,
89bddce5 2306 uaddr_len);
1da177e4
LT
2307 if (err < 0)
2308 goto out_freeiov;
2309 }
a2e27255 2310 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2311 COMPAT_FLAGS(msg));
1da177e4
LT
2312 if (err)
2313 goto out_freeiov;
2314 if (MSG_CMSG_COMPAT & flags)
a2e27255 2315 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2316 &msg_compat->msg_controllen);
2317 else
a2e27255 2318 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2319 &msg->msg_controllen);
2320 if (err)
2321 goto out_freeiov;
2322 err = len;
2323
2324out_freeiov:
da184284 2325 kfree(iov);
a2e27255
ACM
2326 return err;
2327}
2328
2329/*
2330 * BSD recvmsg interface
2331 */
2332
e1834a32
DB
2333long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2334 bool forbid_cmsg_compat)
a2e27255
ACM
2335{
2336 int fput_needed, err;
2337 struct msghdr msg_sys;
1be374a0
AL
2338 struct socket *sock;
2339
e1834a32
DB
2340 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2341 return -EINVAL;
2342
1be374a0 2343 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2344 if (!sock)
2345 goto out;
2346
a7526eb5 2347 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2348
6cb153ca 2349 fput_light(sock->file, fput_needed);
1da177e4
LT
2350out:
2351 return err;
2352}
2353
666547ff 2354SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2355 unsigned int, flags)
2356{
e1834a32 2357 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2358}
2359
a2e27255
ACM
2360/*
2361 * Linux recvmmsg interface
2362 */
2363
e11d4284
AB
2364static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2365 unsigned int vlen, unsigned int flags,
2366 struct timespec64 *timeout)
a2e27255
ACM
2367{
2368 int fput_needed, err, datagrams;
2369 struct socket *sock;
2370 struct mmsghdr __user *entry;
d7256d0e 2371 struct compat_mmsghdr __user *compat_entry;
a2e27255 2372 struct msghdr msg_sys;
766b9f92
DD
2373 struct timespec64 end_time;
2374 struct timespec64 timeout64;
a2e27255
ACM
2375
2376 if (timeout &&
2377 poll_select_set_timeout(&end_time, timeout->tv_sec,
2378 timeout->tv_nsec))
2379 return -EINVAL;
2380
2381 datagrams = 0;
2382
2383 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2384 if (!sock)
2385 return err;
2386
7797dc41
SHY
2387 if (likely(!(flags & MSG_ERRQUEUE))) {
2388 err = sock_error(sock->sk);
2389 if (err) {
2390 datagrams = err;
2391 goto out_put;
2392 }
e623a9e9 2393 }
a2e27255
ACM
2394
2395 entry = mmsg;
d7256d0e 2396 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2397
2398 while (datagrams < vlen) {
2399 /*
2400 * No need to ask LSM for more than the first datagram.
2401 */
d7256d0e 2402 if (MSG_CMSG_COMPAT & flags) {
666547ff 2403 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2404 &msg_sys, flags & ~MSG_WAITFORONE,
2405 datagrams);
d7256d0e
JMG
2406 if (err < 0)
2407 break;
2408 err = __put_user(err, &compat_entry->msg_len);
2409 ++compat_entry;
2410 } else {
a7526eb5 2411 err = ___sys_recvmsg(sock,
666547ff 2412 (struct user_msghdr __user *)entry,
a7526eb5
AL
2413 &msg_sys, flags & ~MSG_WAITFORONE,
2414 datagrams);
d7256d0e
JMG
2415 if (err < 0)
2416 break;
2417 err = put_user(err, &entry->msg_len);
2418 ++entry;
2419 }
2420
a2e27255
ACM
2421 if (err)
2422 break;
a2e27255
ACM
2423 ++datagrams;
2424
71c5c159
BB
2425 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2426 if (flags & MSG_WAITFORONE)
2427 flags |= MSG_DONTWAIT;
2428
a2e27255 2429 if (timeout) {
766b9f92 2430 ktime_get_ts64(&timeout64);
c2e6c856 2431 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2432 if (timeout->tv_sec < 0) {
2433 timeout->tv_sec = timeout->tv_nsec = 0;
2434 break;
2435 }
2436
2437 /* Timeout, return less than vlen datagrams */
2438 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2439 break;
2440 }
2441
2442 /* Out of band data, return right away */
2443 if (msg_sys.msg_flags & MSG_OOB)
2444 break;
a78cb84c 2445 cond_resched();
a2e27255
ACM
2446 }
2447
a2e27255 2448 if (err == 0)
34b88a68
ACM
2449 goto out_put;
2450
2451 if (datagrams == 0) {
2452 datagrams = err;
2453 goto out_put;
2454 }
a2e27255 2455
34b88a68
ACM
2456 /*
2457 * We may return less entries than requested (vlen) if the
2458 * sock is non block and there aren't enough datagrams...
2459 */
2460 if (err != -EAGAIN) {
a2e27255 2461 /*
34b88a68
ACM
2462 * ... or if recvmsg returns an error after we
2463 * received some datagrams, where we record the
2464 * error to return on the next call or if the
2465 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2466 */
34b88a68 2467 sock->sk->sk_err = -err;
a2e27255 2468 }
34b88a68
ACM
2469out_put:
2470 fput_light(sock->file, fput_needed);
a2e27255 2471
34b88a68 2472 return datagrams;
a2e27255
ACM
2473}
2474
e11d4284
AB
2475int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2476 unsigned int vlen, unsigned int flags,
2477 struct __kernel_timespec __user *timeout,
2478 struct old_timespec32 __user *timeout32)
a2e27255
ACM
2479{
2480 int datagrams;
c2e6c856 2481 struct timespec64 timeout_sys;
a2e27255 2482
e11d4284
AB
2483 if (timeout && get_timespec64(&timeout_sys, timeout))
2484 return -EFAULT;
a2e27255 2485
e11d4284 2486 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2487 return -EFAULT;
2488
e11d4284
AB
2489 if (!timeout && !timeout32)
2490 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2491
2492 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 2493
e11d4284
AB
2494 if (datagrams <= 0)
2495 return datagrams;
2496
2497 if (timeout && put_timespec64(&timeout_sys, timeout))
2498 datagrams = -EFAULT;
2499
2500 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2501 datagrams = -EFAULT;
2502
2503 return datagrams;
2504}
2505
1255e269
DB
2506SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2507 unsigned int, vlen, unsigned int, flags,
c2e6c856 2508 struct __kernel_timespec __user *, timeout)
1255e269 2509{
e11d4284
AB
2510 if (flags & MSG_CMSG_COMPAT)
2511 return -EINVAL;
2512
2513 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
2514}
2515
2516#ifdef CONFIG_COMPAT_32BIT_TIME
2517SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2518 unsigned int, vlen, unsigned int, flags,
2519 struct old_timespec32 __user *, timeout)
2520{
2521 if (flags & MSG_CMSG_COMPAT)
2522 return -EINVAL;
2523
2524 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 2525}
e11d4284 2526#endif
1255e269 2527
a2e27255 2528#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2529/* Argument list sizes for sys_socketcall */
2530#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2531static const unsigned char nargs[21] = {
c6d409cf
ED
2532 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2533 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2534 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2535 AL(4), AL(5), AL(4)
89bddce5
SH
2536};
2537
1da177e4
LT
2538#undef AL
2539
2540/*
89bddce5 2541 * System call vectors.
1da177e4
LT
2542 *
2543 * Argument checking cleaned up. Saved 20% in size.
2544 * This function doesn't need to set the kernel lock because
89bddce5 2545 * it is set by the callees.
1da177e4
LT
2546 */
2547
3e0fa65f 2548SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2549{
2950fa9d 2550 unsigned long a[AUDITSC_ARGS];
89bddce5 2551 unsigned long a0, a1;
1da177e4 2552 int err;
47379052 2553 unsigned int len;
1da177e4 2554
228e548e 2555 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2556 return -EINVAL;
c8e8cd57 2557 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2558
47379052
AV
2559 len = nargs[call];
2560 if (len > sizeof(a))
2561 return -EINVAL;
2562
1da177e4 2563 /* copy_from_user should be SMP safe. */
47379052 2564 if (copy_from_user(a, args, len))
1da177e4 2565 return -EFAULT;
3ec3b2fb 2566
2950fa9d
CG
2567 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2568 if (err)
2569 return err;
3ec3b2fb 2570
89bddce5
SH
2571 a0 = a[0];
2572 a1 = a[1];
2573
2574 switch (call) {
2575 case SYS_SOCKET:
9d6a15c3 2576 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
2577 break;
2578 case SYS_BIND:
a87d35d8 2579 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2580 break;
2581 case SYS_CONNECT:
1387c2c2 2582 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2583 break;
2584 case SYS_LISTEN:
25e290ee 2585 err = __sys_listen(a0, a1);
89bddce5
SH
2586 break;
2587 case SYS_ACCEPT:
4541e805
DB
2588 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2589 (int __user *)a[2], 0);
89bddce5
SH
2590 break;
2591 case SYS_GETSOCKNAME:
2592 err =
8882a107
DB
2593 __sys_getsockname(a0, (struct sockaddr __user *)a1,
2594 (int __user *)a[2]);
89bddce5
SH
2595 break;
2596 case SYS_GETPEERNAME:
2597 err =
b21c8f83
DB
2598 __sys_getpeername(a0, (struct sockaddr __user *)a1,
2599 (int __user *)a[2]);
89bddce5
SH
2600 break;
2601 case SYS_SOCKETPAIR:
6debc8d8 2602 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
2603 break;
2604 case SYS_SEND:
f3bf896b
DB
2605 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2606 NULL, 0);
89bddce5
SH
2607 break;
2608 case SYS_SENDTO:
211b634b
DB
2609 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2610 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
2611 break;
2612 case SYS_RECV:
d27e9afc
DB
2613 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2614 NULL, NULL);
89bddce5
SH
2615 break;
2616 case SYS_RECVFROM:
7a09e1eb
DB
2617 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2618 (struct sockaddr __user *)a[4],
2619 (int __user *)a[5]);
89bddce5
SH
2620 break;
2621 case SYS_SHUTDOWN:
005a1aea 2622 err = __sys_shutdown(a0, a1);
89bddce5
SH
2623 break;
2624 case SYS_SETSOCKOPT:
cc36dca0
DB
2625 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
2626 a[4]);
89bddce5
SH
2627 break;
2628 case SYS_GETSOCKOPT:
2629 err =
13a2d70e
DB
2630 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2631 (int __user *)a[4]);
89bddce5
SH
2632 break;
2633 case SYS_SENDMSG:
e1834a32
DB
2634 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
2635 a[2], true);
89bddce5 2636 break;
228e548e 2637 case SYS_SENDMMSG:
e1834a32
DB
2638 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2639 a[3], true);
228e548e 2640 break;
89bddce5 2641 case SYS_RECVMSG:
e1834a32
DB
2642 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
2643 a[2], true);
89bddce5 2644 break;
a2e27255 2645 case SYS_RECVMMSG:
e11d4284
AB
2646 if (IS_ENABLED(CONFIG_64BIT) || !IS_ENABLED(CONFIG_64BIT_TIME))
2647 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2648 a[2], a[3],
2649 (struct __kernel_timespec __user *)a[4],
2650 NULL);
2651 else
2652 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2653 a[2], a[3], NULL,
2654 (struct old_timespec32 __user *)a[4]);
a2e27255 2655 break;
de11defe 2656 case SYS_ACCEPT4:
4541e805
DB
2657 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2658 (int __user *)a[2], a[3]);
aaca0bdc 2659 break;
89bddce5
SH
2660 default:
2661 err = -EINVAL;
2662 break;
1da177e4
LT
2663 }
2664 return err;
2665}
2666
89bddce5 2667#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2668
55737fda
SH
2669/**
2670 * sock_register - add a socket protocol handler
2671 * @ops: description of protocol
2672 *
1da177e4
LT
2673 * This function is called by a protocol handler that wants to
2674 * advertise its address family, and have it linked into the
e793c0f7 2675 * socket interface. The value ops->family corresponds to the
55737fda 2676 * socket system call protocol family.
1da177e4 2677 */
f0fd27d4 2678int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2679{
2680 int err;
2681
2682 if (ops->family >= NPROTO) {
3410f22e 2683 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2684 return -ENOBUFS;
2685 }
55737fda
SH
2686
2687 spin_lock(&net_family_lock);
190683a9
ED
2688 if (rcu_dereference_protected(net_families[ops->family],
2689 lockdep_is_held(&net_family_lock)))
55737fda
SH
2690 err = -EEXIST;
2691 else {
cf778b00 2692 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2693 err = 0;
2694 }
55737fda
SH
2695 spin_unlock(&net_family_lock);
2696
3410f22e 2697 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2698 return err;
2699}
c6d409cf 2700EXPORT_SYMBOL(sock_register);
1da177e4 2701
55737fda
SH
2702/**
2703 * sock_unregister - remove a protocol handler
2704 * @family: protocol family to remove
2705 *
1da177e4
LT
2706 * This function is called by a protocol handler that wants to
2707 * remove its address family, and have it unlinked from the
55737fda
SH
2708 * new socket creation.
2709 *
2710 * If protocol handler is a module, then it can use module reference
2711 * counts to protect against new references. If protocol handler is not
2712 * a module then it needs to provide its own protection in
2713 * the ops->create routine.
1da177e4 2714 */
f0fd27d4 2715void sock_unregister(int family)
1da177e4 2716{
f0fd27d4 2717 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2718
55737fda 2719 spin_lock(&net_family_lock);
a9b3cd7f 2720 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2721 spin_unlock(&net_family_lock);
2722
2723 synchronize_rcu();
2724
3410f22e 2725 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2726}
c6d409cf 2727EXPORT_SYMBOL(sock_unregister);
1da177e4 2728
bf2ae2e4
XL
2729bool sock_is_registered(int family)
2730{
66b51b0a 2731 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
2732}
2733
77d76ea3 2734static int __init sock_init(void)
1da177e4 2735{
b3e19d92 2736 int err;
2ca794e5
EB
2737 /*
2738 * Initialize the network sysctl infrastructure.
2739 */
2740 err = net_sysctl_init();
2741 if (err)
2742 goto out;
b3e19d92 2743
1da177e4 2744 /*
89bddce5 2745 * Initialize skbuff SLAB cache
1da177e4
LT
2746 */
2747 skb_init();
1da177e4
LT
2748
2749 /*
89bddce5 2750 * Initialize the protocols module.
1da177e4
LT
2751 */
2752
2753 init_inodecache();
b3e19d92
NP
2754
2755 err = register_filesystem(&sock_fs_type);
2756 if (err)
2757 goto out_fs;
1da177e4 2758 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2759 if (IS_ERR(sock_mnt)) {
2760 err = PTR_ERR(sock_mnt);
2761 goto out_mount;
2762 }
77d76ea3
AK
2763
2764 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2765 */
2766
2767#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2768 err = netfilter_init();
2769 if (err)
2770 goto out;
1da177e4 2771#endif
cbeb321a 2772
408eccce 2773 ptp_classifier_init();
c1f19b51 2774
b3e19d92
NP
2775out:
2776 return err;
2777
2778out_mount:
2779 unregister_filesystem(&sock_fs_type);
2780out_fs:
2781 goto out;
1da177e4
LT
2782}
2783
77d76ea3
AK
2784core_initcall(sock_init); /* early initcall */
2785
1da177e4
LT
2786#ifdef CONFIG_PROC_FS
2787void socket_seq_show(struct seq_file *seq)
2788{
648845ab
TZ
2789 seq_printf(seq, "sockets: used %d\n",
2790 sock_inuse_get(seq->private));
1da177e4 2791}
89bddce5 2792#endif /* CONFIG_PROC_FS */
1da177e4 2793
89bbfc95 2794#ifdef CONFIG_COMPAT
6b96018b 2795static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2796 unsigned int cmd, void __user *up)
7a229387 2797{
7a229387
AB
2798 mm_segment_t old_fs = get_fs();
2799 struct timeval ktv;
2800 int err;
2801
2802 set_fs(KERNEL_DS);
63ff03ab 2803 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2804 set_fs(old_fs);
644595f8 2805 if (!err)
ed6fe9d6 2806 err = compat_put_timeval(&ktv, up);
644595f8 2807
7a229387
AB
2808 return err;
2809}
2810
6b96018b 2811static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2812 unsigned int cmd, void __user *up)
7a229387 2813{
7a229387
AB
2814 mm_segment_t old_fs = get_fs();
2815 struct timespec kts;
2816 int err;
2817
2818 set_fs(KERNEL_DS);
63ff03ab 2819 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2820 set_fs(old_fs);
644595f8 2821 if (!err)
ed6fe9d6 2822 err = compat_put_timespec(&kts, up);
644595f8 2823
7a229387
AB
2824 return err;
2825}
2826
36fd633e 2827static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2828{
6b96018b 2829 struct compat_ifconf ifc32;
7a229387 2830 struct ifconf ifc;
7a229387
AB
2831 int err;
2832
6b96018b 2833 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2834 return -EFAULT;
2835
36fd633e
AV
2836 ifc.ifc_len = ifc32.ifc_len;
2837 ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
7a229387 2838
36fd633e
AV
2839 rtnl_lock();
2840 err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
2841 rtnl_unlock();
7a229387
AB
2842 if (err)
2843 return err;
2844
36fd633e 2845 ifc32.ifc_len = ifc.ifc_len;
6b96018b 2846 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2847 return -EFAULT;
2848
2849 return 0;
2850}
2851
6b96018b 2852static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2853{
3a7da39d
BH
2854 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2855 bool convert_in = false, convert_out = false;
44c02a2c
AV
2856 size_t buf_size = 0;
2857 struct ethtool_rxnfc __user *rxnfc = NULL;
2858 struct ifreq ifr;
3a7da39d
BH
2859 u32 rule_cnt = 0, actual_rule_cnt;
2860 u32 ethcmd;
7a229387 2861 u32 data;
3a7da39d 2862 int ret;
7a229387 2863
3a7da39d
BH
2864 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2865 return -EFAULT;
7a229387 2866
3a7da39d
BH
2867 compat_rxnfc = compat_ptr(data);
2868
2869 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2870 return -EFAULT;
2871
3a7da39d
BH
2872 /* Most ethtool structures are defined without padding.
2873 * Unfortunately struct ethtool_rxnfc is an exception.
2874 */
2875 switch (ethcmd) {
2876 default:
2877 break;
2878 case ETHTOOL_GRXCLSRLALL:
2879 /* Buffer size is variable */
2880 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2881 return -EFAULT;
2882 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2883 return -ENOMEM;
2884 buf_size += rule_cnt * sizeof(u32);
2885 /* fall through */
2886 case ETHTOOL_GRXRINGS:
2887 case ETHTOOL_GRXCLSRLCNT:
2888 case ETHTOOL_GRXCLSRULE:
55664f32 2889 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2890 convert_out = true;
2891 /* fall through */
2892 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2893 buf_size += sizeof(struct ethtool_rxnfc);
2894 convert_in = true;
44c02a2c 2895 rxnfc = compat_alloc_user_space(buf_size);
3a7da39d
BH
2896 break;
2897 }
2898
44c02a2c 2899 if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2900 return -EFAULT;
2901
44c02a2c 2902 ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
7a229387 2903
3a7da39d 2904 if (convert_in) {
127fe533 2905 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2906 * fs.ring_cookie and at the end of fs, but nowhere else.
2907 */
127fe533
AD
2908 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2909 sizeof(compat_rxnfc->fs.m_ext) !=
2910 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2911 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2912 BUILD_BUG_ON(
2913 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2914 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2915 offsetof(struct ethtool_rxnfc, fs.location) -
2916 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2917
2918 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2919 (void __user *)(&rxnfc->fs.m_ext + 1) -
2920 (void __user *)rxnfc) ||
3a7da39d
BH
2921 copy_in_user(&rxnfc->fs.ring_cookie,
2922 &compat_rxnfc->fs.ring_cookie,
954b1244 2923 (void __user *)(&rxnfc->fs.location + 1) -
b6168562
WW
2924 (void __user *)&rxnfc->fs.ring_cookie))
2925 return -EFAULT;
2926 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2927 if (put_user(rule_cnt, &rxnfc->rule_cnt))
2928 return -EFAULT;
2929 } else if (copy_in_user(&rxnfc->rule_cnt,
2930 &compat_rxnfc->rule_cnt,
2931 sizeof(rxnfc->rule_cnt)))
3a7da39d
BH
2932 return -EFAULT;
2933 }
2934
44c02a2c 2935 ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
3a7da39d
BH
2936 if (ret)
2937 return ret;
2938
2939 if (convert_out) {
2940 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2941 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2942 (const void __user *)rxnfc) ||
3a7da39d
BH
2943 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2944 &rxnfc->fs.ring_cookie,
954b1244
SH
2945 (const void __user *)(&rxnfc->fs.location + 1) -
2946 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2947 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2948 sizeof(rxnfc->rule_cnt)))
2949 return -EFAULT;
2950
2951 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2952 /* As an optimisation, we only copy the actual
2953 * number of rules that the underlying
2954 * function returned. Since Mallory might
2955 * change the rule count in user memory, we
2956 * check that it is less than the rule count
2957 * originally given (as the user buffer size),
2958 * which has been range-checked.
2959 */
2960 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2961 return -EFAULT;
2962 if (actual_rule_cnt < rule_cnt)
2963 rule_cnt = actual_rule_cnt;
2964 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2965 &rxnfc->rule_locs[0],
2966 rule_cnt * sizeof(u32)))
2967 return -EFAULT;
2968 }
2969 }
2970
2971 return 0;
7a229387
AB
2972}
2973
7a50a240
AB
2974static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2975{
7a50a240 2976 compat_uptr_t uptr32;
44c02a2c
AV
2977 struct ifreq ifr;
2978 void __user *saved;
2979 int err;
7a50a240 2980
44c02a2c 2981 if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
7a50a240
AB
2982 return -EFAULT;
2983
2984 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2985 return -EFAULT;
2986
44c02a2c
AV
2987 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
2988 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 2989
44c02a2c
AV
2990 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
2991 if (!err) {
2992 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
2993 if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
2994 err = -EFAULT;
ccbd6a5a 2995 }
44c02a2c 2996 return err;
7a229387
AB
2997}
2998
590d4693
BH
2999/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3000static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3001 struct compat_ifreq __user *u_ifreq32)
7a229387 3002{
44c02a2c 3003 struct ifreq ifreq;
7a229387
AB
3004 u32 data32;
3005
44c02a2c 3006 if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
7a229387 3007 return -EFAULT;
44c02a2c 3008 if (get_user(data32, &u_ifreq32->ifr_data))
7a229387 3009 return -EFAULT;
44c02a2c 3010 ifreq.ifr_data = compat_ptr(data32);
7a229387 3011
44c02a2c 3012 return dev_ioctl(net, cmd, &ifreq, NULL);
7a229387
AB
3013}
3014
37ac39bd
JB
3015static int compat_ifreq_ioctl(struct net *net, struct socket *sock,
3016 unsigned int cmd,
3017 struct compat_ifreq __user *uifr32)
3018{
3019 struct ifreq __user *uifr;
3020 int err;
3021
3022 /* Handle the fact that while struct ifreq has the same *layout* on
3023 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3024 * which are handled elsewhere, it still has different *size* due to
3025 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3026 * resulting in struct ifreq being 32 and 40 bytes respectively).
3027 * As a result, if the struct happens to be at the end of a page and
3028 * the next page isn't readable/writable, we get a fault. To prevent
3029 * that, copy back and forth to the full size.
3030 */
3031
3032 uifr = compat_alloc_user_space(sizeof(*uifr));
3033 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3034 return -EFAULT;
3035
3036 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3037
3038 if (!err) {
3039 switch (cmd) {
3040 case SIOCGIFFLAGS:
3041 case SIOCGIFMETRIC:
3042 case SIOCGIFMTU:
3043 case SIOCGIFMEM:
3044 case SIOCGIFHWADDR:
3045 case SIOCGIFINDEX:
3046 case SIOCGIFADDR:
3047 case SIOCGIFBRDADDR:
3048 case SIOCGIFDSTADDR:
3049 case SIOCGIFNETMASK:
3050 case SIOCGIFPFLAGS:
3051 case SIOCGIFTXQLEN:
3052 case SIOCGMIIPHY:
3053 case SIOCGMIIREG:
c6c9fee3 3054 case SIOCGIFNAME:
37ac39bd
JB
3055 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
3056 err = -EFAULT;
3057 break;
3058 }
3059 }
3060 return err;
3061}
3062
a2116ed2
AB
3063static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3064 struct compat_ifreq __user *uifr32)
3065{
3066 struct ifreq ifr;
3067 struct compat_ifmap __user *uifmap32;
a2116ed2
AB
3068 int err;
3069
3070 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3071 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3072 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3073 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3074 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3075 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3076 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3077 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3078 if (err)
3079 return -EFAULT;
3080
44c02a2c 3081 err = dev_ioctl(net, cmd, &ifr, NULL);
a2116ed2
AB
3082
3083 if (cmd == SIOCGIFMAP && !err) {
3084 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3085 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3086 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3087 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3088 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3089 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3090 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3091 if (err)
3092 err = -EFAULT;
3093 }
3094 return err;
3095}
3096
7a229387 3097struct rtentry32 {
c6d409cf 3098 u32 rt_pad1;
7a229387
AB
3099 struct sockaddr rt_dst; /* target address */
3100 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3101 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3102 unsigned short rt_flags;
3103 short rt_pad2;
3104 u32 rt_pad3;
3105 unsigned char rt_tos;
3106 unsigned char rt_class;
3107 short rt_pad4;
3108 short rt_metric; /* +1 for binary compatibility! */
7a229387 3109 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3110 u32 rt_mtu; /* per route MTU/Window */
3111 u32 rt_window; /* Window clamping */
7a229387
AB
3112 unsigned short rt_irtt; /* Initial RTT */
3113};
3114
3115struct in6_rtmsg32 {
3116 struct in6_addr rtmsg_dst;
3117 struct in6_addr rtmsg_src;
3118 struct in6_addr rtmsg_gateway;
3119 u32 rtmsg_type;
3120 u16 rtmsg_dst_len;
3121 u16 rtmsg_src_len;
3122 u32 rtmsg_metric;
3123 u32 rtmsg_info;
3124 u32 rtmsg_flags;
3125 s32 rtmsg_ifindex;
3126};
3127
6b96018b
AB
3128static int routing_ioctl(struct net *net, struct socket *sock,
3129 unsigned int cmd, void __user *argp)
7a229387
AB
3130{
3131 int ret;
3132 void *r = NULL;
3133 struct in6_rtmsg r6;
3134 struct rtentry r4;
3135 char devname[16];
3136 u32 rtdev;
3137 mm_segment_t old_fs = get_fs();
3138
6b96018b
AB
3139 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3140 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3141 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3142 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3143 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3144 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3145 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3146 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3147 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3148 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3149 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3150
3151 r = (void *) &r6;
3152 } else { /* ipv4 */
6b96018b 3153 struct rtentry32 __user *ur4 = argp;
c6d409cf 3154 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3155 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3156 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3157 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3158 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3159 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3160 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3161 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3162 if (rtdev) {
c6d409cf 3163 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3164 r4.rt_dev = (char __user __force *)devname;
3165 devname[15] = 0;
7a229387
AB
3166 } else
3167 r4.rt_dev = NULL;
3168
3169 r = (void *) &r4;
3170 }
3171
3172 if (ret) {
3173 ret = -EFAULT;
3174 goto out;
3175 }
3176
c6d409cf 3177 set_fs(KERNEL_DS);
63ff03ab 3178 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3179 set_fs(old_fs);
7a229387
AB
3180
3181out:
7a229387
AB
3182 return ret;
3183}
3184
3185/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3186 * for some operations; this forces use of the newer bridge-utils that
25985edc 3187 * use compatible ioctls
7a229387 3188 */
6b96018b 3189static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3190{
6b96018b 3191 compat_ulong_t tmp;
7a229387 3192
6b96018b 3193 if (get_user(tmp, argp))
7a229387
AB
3194 return -EFAULT;
3195 if (tmp == BRCTL_GET_VERSION)
3196 return BRCTL_VERSION + 1;
3197 return -EINVAL;
3198}
3199
6b96018b
AB
3200static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3201 unsigned int cmd, unsigned long arg)
3202{
3203 void __user *argp = compat_ptr(arg);
3204 struct sock *sk = sock->sk;
3205 struct net *net = sock_net(sk);
7a229387 3206
6b96018b 3207 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3208 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3209
3210 switch (cmd) {
3211 case SIOCSIFBR:
3212 case SIOCGIFBR:
3213 return old_bridge_ioctl(argp);
6b96018b 3214 case SIOCGIFCONF:
36fd633e 3215 return compat_dev_ifconf(net, argp);
6b96018b
AB
3216 case SIOCETHTOOL:
3217 return ethtool_ioctl(net, argp);
7a50a240
AB
3218 case SIOCWANDEV:
3219 return compat_siocwandev(net, argp);
a2116ed2
AB
3220 case SIOCGIFMAP:
3221 case SIOCSIFMAP:
3222 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3223 case SIOCADDRT:
3224 case SIOCDELRT:
3225 return routing_ioctl(net, sock, cmd, argp);
3226 case SIOCGSTAMP:
3227 return do_siocgstamp(net, sock, cmd, argp);
3228 case SIOCGSTAMPNS:
3229 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3230 case SIOCBONDSLAVEINFOQUERY:
3231 case SIOCBONDINFOQUERY:
a2116ed2 3232 case SIOCSHWTSTAMP:
fd468c74 3233 case SIOCGHWTSTAMP:
590d4693 3234 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3235
3236 case FIOSETOWN:
3237 case SIOCSPGRP:
3238 case FIOGETOWN:
3239 case SIOCGPGRP:
3240 case SIOCBRADDBR:
3241 case SIOCBRDELBR:
3242 case SIOCGIFVLAN:
3243 case SIOCSIFVLAN:
3244 case SIOCADDDLCI:
3245 case SIOCDELDLCI:
c62cce2c 3246 case SIOCGSKNS:
6b96018b
AB
3247 return sock_ioctl(file, cmd, arg);
3248
3249 case SIOCGIFFLAGS:
3250 case SIOCSIFFLAGS:
3251 case SIOCGIFMETRIC:
3252 case SIOCSIFMETRIC:
3253 case SIOCGIFMTU:
3254 case SIOCSIFMTU:
3255 case SIOCGIFMEM:
3256 case SIOCSIFMEM:
3257 case SIOCGIFHWADDR:
3258 case SIOCSIFHWADDR:
3259 case SIOCADDMULTI:
3260 case SIOCDELMULTI:
3261 case SIOCGIFINDEX:
6b96018b
AB
3262 case SIOCGIFADDR:
3263 case SIOCSIFADDR:
3264 case SIOCSIFHWBROADCAST:
6b96018b 3265 case SIOCDIFADDR:
6b96018b
AB
3266 case SIOCGIFBRDADDR:
3267 case SIOCSIFBRDADDR:
3268 case SIOCGIFDSTADDR:
3269 case SIOCSIFDSTADDR:
3270 case SIOCGIFNETMASK:
3271 case SIOCSIFNETMASK:
3272 case SIOCSIFPFLAGS:
3273 case SIOCGIFPFLAGS:
3274 case SIOCGIFTXQLEN:
3275 case SIOCSIFTXQLEN:
3276 case SIOCBRADDIF:
3277 case SIOCBRDELIF:
c6c9fee3 3278 case SIOCGIFNAME:
9177efd3
AB
3279 case SIOCSIFNAME:
3280 case SIOCGMIIPHY:
3281 case SIOCGMIIREG:
3282 case SIOCSMIIREG:
f92d4fc9
AV
3283 case SIOCBONDENSLAVE:
3284 case SIOCBONDRELEASE:
3285 case SIOCBONDSETHWADDR:
3286 case SIOCBONDCHANGEACTIVE:
37ac39bd
JB
3287 return compat_ifreq_ioctl(net, sock, cmd, argp);
3288
6b96018b
AB
3289 case SIOCSARP:
3290 case SIOCGARP:
3291 case SIOCDARP:
6b96018b 3292 case SIOCATMARK:
63ff03ab 3293 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3294 }
3295
6b96018b
AB
3296 return -ENOIOCTLCMD;
3297}
7a229387 3298
95c96174 3299static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3300 unsigned long arg)
89bbfc95
SP
3301{
3302 struct socket *sock = file->private_data;
3303 int ret = -ENOIOCTLCMD;
87de87d5
DM
3304 struct sock *sk;
3305 struct net *net;
3306
3307 sk = sock->sk;
3308 net = sock_net(sk);
89bbfc95
SP
3309
3310 if (sock->ops->compat_ioctl)
3311 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3312
87de87d5
DM
3313 if (ret == -ENOIOCTLCMD &&
3314 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3315 ret = compat_wext_handle_ioctl(net, cmd, arg);
3316
6b96018b
AB
3317 if (ret == -ENOIOCTLCMD)
3318 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3319
89bbfc95
SP
3320 return ret;
3321}
3322#endif
3323
ac5a488e
SS
3324int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3325{
3326 return sock->ops->bind(sock, addr, addrlen);
3327}
c6d409cf 3328EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3329
3330int kernel_listen(struct socket *sock, int backlog)
3331{
3332 return sock->ops->listen(sock, backlog);
3333}
c6d409cf 3334EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3335
3336int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3337{
3338 struct sock *sk = sock->sk;
3339 int err;
3340
3341 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3342 newsock);
3343 if (err < 0)
3344 goto done;
3345
cdfbabfb 3346 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3347 if (err < 0) {
3348 sock_release(*newsock);
fa8705b0 3349 *newsock = NULL;
ac5a488e
SS
3350 goto done;
3351 }
3352
3353 (*newsock)->ops = sock->ops;
1b08534e 3354 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3355
3356done:
3357 return err;
3358}
c6d409cf 3359EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3360
3361int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3362 int flags)
ac5a488e
SS
3363{
3364 return sock->ops->connect(sock, addr, addrlen, flags);
3365}
c6d409cf 3366EXPORT_SYMBOL(kernel_connect);
ac5a488e 3367
9b2c45d4 3368int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3369{
9b2c45d4 3370 return sock->ops->getname(sock, addr, 0);
ac5a488e 3371}
c6d409cf 3372EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3373
9b2c45d4 3374int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3375{
9b2c45d4 3376 return sock->ops->getname(sock, addr, 1);
ac5a488e 3377}
c6d409cf 3378EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3379
3380int kernel_getsockopt(struct socket *sock, int level, int optname,
3381 char *optval, int *optlen)
3382{
3383 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3384 char __user *uoptval;
3385 int __user *uoptlen;
ac5a488e
SS
3386 int err;
3387
fb8621bb
NK
3388 uoptval = (char __user __force *) optval;
3389 uoptlen = (int __user __force *) optlen;
3390
ac5a488e
SS
3391 set_fs(KERNEL_DS);
3392 if (level == SOL_SOCKET)
fb8621bb 3393 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3394 else
fb8621bb
NK
3395 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3396 uoptlen);
ac5a488e
SS
3397 set_fs(oldfs);
3398 return err;
3399}
c6d409cf 3400EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3401
3402int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3403 char *optval, unsigned int optlen)
ac5a488e
SS
3404{
3405 mm_segment_t oldfs = get_fs();
fb8621bb 3406 char __user *uoptval;
ac5a488e
SS
3407 int err;
3408
fb8621bb
NK
3409 uoptval = (char __user __force *) optval;
3410
ac5a488e
SS
3411 set_fs(KERNEL_DS);
3412 if (level == SOL_SOCKET)
fb8621bb 3413 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3414 else
fb8621bb 3415 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3416 optlen);
3417 set_fs(oldfs);
3418 return err;
3419}
c6d409cf 3420EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3421
3422int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3423 size_t size, int flags)
3424{
3425 if (sock->ops->sendpage)
3426 return sock->ops->sendpage(sock, page, offset, size, flags);
3427
3428 return sock_no_sendpage(sock, page, offset, size, flags);
3429}
c6d409cf 3430EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3431
306b13eb
TH
3432int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3433 size_t size, int flags)
3434{
3435 struct socket *sock = sk->sk_socket;
3436
3437 if (sock->ops->sendpage_locked)
3438 return sock->ops->sendpage_locked(sk, page, offset, size,
3439 flags);
3440
3441 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3442}
3443EXPORT_SYMBOL(kernel_sendpage_locked);
3444
91cf45f0
TM
3445int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3446{
3447 return sock->ops->shutdown(sock, how);
3448}
91cf45f0 3449EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075
P
3450
3451/* This routine returns the IP overhead imposed by a socket i.e.
3452 * the length of the underlying IP header, depending on whether
3453 * this is an IPv4 or IPv6 socket and the length from IP options turned
57240d00 3454 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075
P
3455 */
3456u32 kernel_sock_ip_overhead(struct sock *sk)
3457{
3458 struct inet_sock *inet;
3459 struct ip_options_rcu *opt;
3460 u32 overhead = 0;
113c3075
P
3461#if IS_ENABLED(CONFIG_IPV6)
3462 struct ipv6_pinfo *np;
3463 struct ipv6_txoptions *optv6 = NULL;
3464#endif /* IS_ENABLED(CONFIG_IPV6) */
3465
3466 if (!sk)
3467 return overhead;
3468
113c3075
P
3469 switch (sk->sk_family) {
3470 case AF_INET:
3471 inet = inet_sk(sk);
3472 overhead += sizeof(struct iphdr);
3473 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3474 sock_owned_by_user(sk));
113c3075
P
3475 if (opt)
3476 overhead += opt->opt.optlen;
3477 return overhead;
3478#if IS_ENABLED(CONFIG_IPV6)
3479 case AF_INET6:
3480 np = inet6_sk(sk);
3481 overhead += sizeof(struct ipv6hdr);
3482 if (np)
3483 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3484 sock_owned_by_user(sk));
113c3075
P
3485 if (optv6)
3486 overhead += (optv6->opt_flen + optv6->opt_nflen);
3487 return overhead;
3488#endif /* IS_ENABLED(CONFIG_IPV6) */
3489 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3490 return overhead;
3491 }
3492}
3493EXPORT_SYMBOL(kernel_sock_ip_overhead);