scsi: sd: don't crash the host on invalid commands
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
c8e8cd57 92#include <linux/nospec.h>
1da177e4 93
7c0f6ba6 94#include <linux/uaccess.h>
1da177e4
LT
95#include <asm/unistd.h>
96
97#include <net/compat.h>
87de87d5 98#include <net/wext.h>
f8451725 99#include <net/cls_cgroup.h>
1da177e4
LT
100
101#include <net/sock.h>
102#include <linux/netfilter.h>
103
6b96018b
AB
104#include <linux/if_tun.h>
105#include <linux/ipv6_route.h>
106#include <linux/route.h>
6b96018b 107#include <linux/sockios.h>
076bb0c8 108#include <net/busy_poll.h>
f24b9be5 109#include <linux/errqueue.h>
06021292 110
e0d1095a 111#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
112unsigned int sysctl_net_busy_read __read_mostly;
113unsigned int sysctl_net_busy_poll __read_mostly;
06021292 114#endif
6b96018b 115
8ae5e030
AV
116static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
117static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 118static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
119
120static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
121static __poll_t sock_poll(struct file *file,
122 struct poll_table_struct *wait);
89bddce5 123static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
124#ifdef CONFIG_COMPAT
125static long compat_sock_ioctl(struct file *file,
89bddce5 126 unsigned int cmd, unsigned long arg);
89bbfc95 127#endif
1da177e4 128static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
129static ssize_t sock_sendpage(struct file *file, struct page *page,
130 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 132 struct pipe_inode_info *pipe, size_t len,
9c55e01c 133 unsigned int flags);
1da177e4 134
1da177e4
LT
135/*
136 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
137 * in the operation structures but are done directly via the socketcall() multiplexor.
138 */
139
da7071d7 140static const struct file_operations socket_file_ops = {
1da177e4
LT
141 .owner = THIS_MODULE,
142 .llseek = no_llseek,
8ae5e030
AV
143 .read_iter = sock_read_iter,
144 .write_iter = sock_write_iter,
1da177e4
LT
145 .poll = sock_poll,
146 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
147#ifdef CONFIG_COMPAT
148 .compat_ioctl = compat_sock_ioctl,
149#endif
1da177e4 150 .mmap = sock_mmap,
1da177e4
LT
151 .release = sock_close,
152 .fasync = sock_fasync,
5274f052
JA
153 .sendpage = sock_sendpage,
154 .splice_write = generic_splice_sendpage,
9c55e01c 155 .splice_read = sock_splice_read,
1da177e4
LT
156};
157
158/*
159 * The protocol list. Each protocol is registered in here.
160 */
161
1da177e4 162static DEFINE_SPINLOCK(net_family_lock);
190683a9 163static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 164
1da177e4 165/*
89bddce5
SH
166 * Support routines.
167 * Move socket addresses back and forth across the kernel/user
168 * divide and look after the messy bits.
1da177e4
LT
169 */
170
1da177e4
LT
171/**
172 * move_addr_to_kernel - copy a socket address into kernel space
173 * @uaddr: Address in user space
174 * @kaddr: Address in kernel space
175 * @ulen: Length in user space
176 *
177 * The address is copied into kernel space. If the provided address is
178 * too long an error code of -EINVAL is returned. If the copy gives
179 * invalid addresses -EFAULT is returned. On a success 0 is returned.
180 */
181
43db362d 182int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 183{
230b1839 184 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 185 return -EINVAL;
89bddce5 186 if (ulen == 0)
1da177e4 187 return 0;
89bddce5 188 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 189 return -EFAULT;
3ec3b2fb 190 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
191}
192
193/**
194 * move_addr_to_user - copy an address to user space
195 * @kaddr: kernel space address
196 * @klen: length of address in kernel
197 * @uaddr: user space address
198 * @ulen: pointer to user length field
199 *
200 * The value pointed to by ulen on entry is the buffer length available.
201 * This is overwritten with the buffer space used. -EINVAL is returned
202 * if an overlong buffer is specified or a negative buffer size. -EFAULT
203 * is returned if either the buffer or the length field are not
204 * accessible.
205 * After copying the data up to the limit the user specifies, the true
206 * length of the data is written over the length limit the user
207 * specified. Zero is returned for a success.
208 */
89bddce5 209
43db362d 210static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 211 void __user *uaddr, int __user *ulen)
1da177e4
LT
212{
213 int err;
214 int len;
215
68c6beb3 216 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
217 err = get_user(len, ulen);
218 if (err)
1da177e4 219 return err;
89bddce5
SH
220 if (len > klen)
221 len = klen;
68c6beb3 222 if (len < 0)
1da177e4 223 return -EINVAL;
89bddce5 224 if (len) {
d6fe3945
SG
225 if (audit_sockaddr(klen, kaddr))
226 return -ENOMEM;
89bddce5 227 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
228 return -EFAULT;
229 }
230 /*
89bddce5
SH
231 * "fromlen shall refer to the value before truncation.."
232 * 1003.1g
1da177e4
LT
233 */
234 return __put_user(klen, ulen);
235}
236
08009a76 237static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
238
239static struct inode *sock_alloc_inode(struct super_block *sb)
240{
241 struct socket_alloc *ei;
eaefd110 242 struct socket_wq *wq;
89bddce5 243
e94b1766 244 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
245 if (!ei)
246 return NULL;
eaefd110
ED
247 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
248 if (!wq) {
43815482
ED
249 kmem_cache_free(sock_inode_cachep, ei);
250 return NULL;
251 }
eaefd110
ED
252 init_waitqueue_head(&wq->wait);
253 wq->fasync_list = NULL;
574aab1e 254 wq->flags = 0;
e6476c21 255 ei->socket.wq = wq;
89bddce5 256
1da177e4
LT
257 ei->socket.state = SS_UNCONNECTED;
258 ei->socket.flags = 0;
259 ei->socket.ops = NULL;
260 ei->socket.sk = NULL;
261 ei->socket.file = NULL;
1da177e4
LT
262
263 return &ei->vfs_inode;
264}
265
266static void sock_destroy_inode(struct inode *inode)
267{
43815482
ED
268 struct socket_alloc *ei;
269
270 ei = container_of(inode, struct socket_alloc, vfs_inode);
e6476c21 271 kfree_rcu(ei->socket.wq, rcu);
43815482 272 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
273}
274
51cc5068 275static void init_once(void *foo)
1da177e4 276{
89bddce5 277 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 278
a35afb83 279 inode_init_once(&ei->vfs_inode);
1da177e4 280}
89bddce5 281
1e911632 282static void init_inodecache(void)
1da177e4
LT
283{
284 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
285 sizeof(struct socket_alloc),
286 0,
287 (SLAB_HWCACHE_ALIGN |
288 SLAB_RECLAIM_ACCOUNT |
5d097056 289 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 290 init_once);
1e911632 291 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
292}
293
b87221de 294static const struct super_operations sockfs_ops = {
c6d409cf
ED
295 .alloc_inode = sock_alloc_inode,
296 .destroy_inode = sock_destroy_inode,
297 .statfs = simple_statfs,
1da177e4
LT
298};
299
c23fbb6b
ED
300/*
301 * sockfs_dname() is called from d_path().
302 */
303static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
304{
305 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 306 d_inode(dentry)->i_ino);
c23fbb6b
ED
307}
308
3ba13d17 309static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 310 .d_dname = sockfs_dname,
1da177e4
LT
311};
312
bba0bd31
AG
313static int sockfs_xattr_get(const struct xattr_handler *handler,
314 struct dentry *dentry, struct inode *inode,
315 const char *suffix, void *value, size_t size)
316{
317 if (value) {
318 if (dentry->d_name.len + 1 > size)
319 return -ERANGE;
320 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
321 }
322 return dentry->d_name.len + 1;
323}
324
325#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
326#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
327#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
328
329static const struct xattr_handler sockfs_xattr_handler = {
330 .name = XATTR_NAME_SOCKPROTONAME,
331 .get = sockfs_xattr_get,
332};
333
4a590153
AG
334static int sockfs_security_xattr_set(const struct xattr_handler *handler,
335 struct dentry *dentry, struct inode *inode,
336 const char *suffix, const void *value,
337 size_t size, int flags)
338{
339 /* Handled by LSM. */
340 return -EAGAIN;
341}
342
343static const struct xattr_handler sockfs_security_xattr_handler = {
344 .prefix = XATTR_SECURITY_PREFIX,
345 .set = sockfs_security_xattr_set,
346};
347
bba0bd31
AG
348static const struct xattr_handler *sockfs_xattr_handlers[] = {
349 &sockfs_xattr_handler,
4a590153 350 &sockfs_security_xattr_handler,
bba0bd31
AG
351 NULL
352};
353
c74a1cbb
AV
354static struct dentry *sockfs_mount(struct file_system_type *fs_type,
355 int flags, const char *dev_name, void *data)
356{
bba0bd31
AG
357 return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
358 sockfs_xattr_handlers,
359 &sockfs_dentry_operations, SOCKFS_MAGIC);
c74a1cbb
AV
360}
361
362static struct vfsmount *sock_mnt __read_mostly;
363
364static struct file_system_type sock_fs_type = {
365 .name = "sockfs",
366 .mount = sockfs_mount,
367 .kill_sb = kill_anon_super,
368};
369
1da177e4
LT
370/*
371 * Obtains the first available file descriptor and sets it up for use.
372 *
39d8c1b6
DM
373 * These functions create file structures and maps them to fd space
374 * of the current process. On success it returns file descriptor
1da177e4
LT
375 * and file struct implicitly stored in sock->file.
376 * Note that another thread may close file descriptor before we return
377 * from this function. We use the fact that now we do not refer
378 * to socket after mapping. If one day we will need it, this
379 * function will increment ref. count on file by 1.
380 *
381 * In any case returned fd MAY BE not valid!
382 * This race condition is unavoidable
383 * with shared fd spaces, we cannot solve it inside kernel,
384 * but we take care of internal coherence yet.
385 */
386
aab174f0 387struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 388{
7cbe66b6 389 struct file *file;
1da177e4 390
d93aa9d8
AV
391 if (!dname)
392 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 393
d93aa9d8
AV
394 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
395 O_RDWR | (flags & O_NONBLOCK),
396 &socket_file_ops);
b5ffe634 397 if (IS_ERR(file)) {
8e1611e2 398 sock_release(sock);
39b65252 399 return file;
cc3808f8
AV
400 }
401
402 sock->file = file;
39d8c1b6 403 file->private_data = sock;
28407630 404 return file;
39d8c1b6 405}
56b31d1c 406EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 407
56b31d1c 408static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
409{
410 struct file *newfile;
28407630 411 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
412 if (unlikely(fd < 0)) {
413 sock_release(sock);
28407630 414 return fd;
ce4bb04c 415 }
39d8c1b6 416
aab174f0 417 newfile = sock_alloc_file(sock, flags, NULL);
28407630 418 if (likely(!IS_ERR(newfile))) {
39d8c1b6 419 fd_install(fd, newfile);
28407630
AV
420 return fd;
421 }
7cbe66b6 422
28407630
AV
423 put_unused_fd(fd);
424 return PTR_ERR(newfile);
1da177e4
LT
425}
426
406a3c63 427struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 428{
6cb153ca
BL
429 if (file->f_op == &socket_file_ops)
430 return file->private_data; /* set in sock_map_fd */
431
23bb80d2
ED
432 *err = -ENOTSOCK;
433 return NULL;
6cb153ca 434}
406a3c63 435EXPORT_SYMBOL(sock_from_file);
6cb153ca 436
1da177e4 437/**
c6d409cf 438 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
439 * @fd: file handle
440 * @err: pointer to an error code return
441 *
442 * The file handle passed in is locked and the socket it is bound
241c4667 443 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
444 * with a negative errno code and NULL is returned. The function checks
445 * for both invalid handles and passing a handle which is not a socket.
446 *
447 * On a success the socket object pointer is returned.
448 */
449
450struct socket *sockfd_lookup(int fd, int *err)
451{
452 struct file *file;
1da177e4
LT
453 struct socket *sock;
454
89bddce5
SH
455 file = fget(fd);
456 if (!file) {
1da177e4
LT
457 *err = -EBADF;
458 return NULL;
459 }
89bddce5 460
6cb153ca
BL
461 sock = sock_from_file(file, err);
462 if (!sock)
1da177e4 463 fput(file);
6cb153ca
BL
464 return sock;
465}
c6d409cf 466EXPORT_SYMBOL(sockfd_lookup);
1da177e4 467
6cb153ca
BL
468static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
469{
00e188ef 470 struct fd f = fdget(fd);
6cb153ca
BL
471 struct socket *sock;
472
3672558c 473 *err = -EBADF;
00e188ef
AV
474 if (f.file) {
475 sock = sock_from_file(f.file, err);
476 if (likely(sock)) {
477 *fput_needed = f.flags;
6cb153ca 478 return sock;
00e188ef
AV
479 }
480 fdput(f);
1da177e4 481 }
6cb153ca 482 return NULL;
1da177e4
LT
483}
484
600e1779
MY
485static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
486 size_t size)
487{
488 ssize_t len;
489 ssize_t used = 0;
490
c5ef6035 491 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
492 if (len < 0)
493 return len;
494 used += len;
495 if (buffer) {
496 if (size < used)
497 return -ERANGE;
498 buffer += len;
499 }
500
501 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
502 used += len;
503 if (buffer) {
504 if (size < used)
505 return -ERANGE;
506 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
507 buffer += len;
508 }
509
510 return used;
511}
512
dc647ec8 513static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
86741ec2
LC
514{
515 int err = simple_setattr(dentry, iattr);
516
e1a3a60a 517 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
518 struct socket *sock = SOCKET_I(d_inode(dentry));
519
6d8c50dc
CW
520 if (sock->sk)
521 sock->sk->sk_uid = iattr->ia_uid;
522 else
523 err = -ENOENT;
86741ec2
LC
524 }
525
526 return err;
527}
528
600e1779 529static const struct inode_operations sockfs_inode_ops = {
600e1779 530 .listxattr = sockfs_listxattr,
86741ec2 531 .setattr = sockfs_setattr,
600e1779
MY
532};
533
1da177e4
LT
534/**
535 * sock_alloc - allocate a socket
89bddce5 536 *
1da177e4
LT
537 * Allocate a new inode and socket object. The two are bound together
538 * and initialised. The socket is then returned. If we are out of inodes
539 * NULL is returned.
540 */
541
f4a00aac 542struct socket *sock_alloc(void)
1da177e4 543{
89bddce5
SH
544 struct inode *inode;
545 struct socket *sock;
1da177e4 546
a209dfc7 547 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
548 if (!inode)
549 return NULL;
550
551 sock = SOCKET_I(inode);
552
85fe4025 553 inode->i_ino = get_next_ino();
89bddce5 554 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
555 inode->i_uid = current_fsuid();
556 inode->i_gid = current_fsgid();
600e1779 557 inode->i_op = &sockfs_inode_ops;
1da177e4 558
1da177e4
LT
559 return sock;
560}
f4a00aac 561EXPORT_SYMBOL(sock_alloc);
1da177e4 562
1da177e4
LT
563/**
564 * sock_release - close a socket
565 * @sock: socket to close
566 *
567 * The socket is released from the protocol stack if it has a release
568 * callback, and the inode is then released if the socket is bound to
89bddce5 569 * an inode not a file.
1da177e4 570 */
89bddce5 571
6d8c50dc 572static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
573{
574 if (sock->ops) {
575 struct module *owner = sock->ops->owner;
576
6d8c50dc
CW
577 if (inode)
578 inode_lock(inode);
1da177e4 579 sock->ops->release(sock);
6d8c50dc
CW
580 if (inode)
581 inode_unlock(inode);
1da177e4
LT
582 sock->ops = NULL;
583 module_put(owner);
584 }
585
e6476c21 586 if (sock->wq->fasync_list)
3410f22e 587 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 588
1da177e4
LT
589 if (!sock->file) {
590 iput(SOCK_INODE(sock));
591 return;
592 }
89bddce5 593 sock->file = NULL;
1da177e4 594}
6d8c50dc
CW
595
596void sock_release(struct socket *sock)
597{
598 __sock_release(sock, NULL);
599}
c6d409cf 600EXPORT_SYMBOL(sock_release);
1da177e4 601
c14ac945 602void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 603{
140c55d4
ED
604 u8 flags = *tx_flags;
605
c14ac945 606 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
607 flags |= SKBTX_HW_TSTAMP;
608
c14ac945 609 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
610 flags |= SKBTX_SW_TSTAMP;
611
c14ac945 612 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
613 flags |= SKBTX_SCHED_TSTAMP;
614
140c55d4 615 *tx_flags = flags;
20d49473 616}
67cc0d40 617EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 618
d8725c86 619static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 620{
01e97e65 621 int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
d8725c86
AV
622 BUG_ON(ret == -EIOCBQUEUED);
623 return ret;
1da177e4
LT
624}
625
d8725c86 626int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 627{
d8725c86 628 int err = security_socket_sendmsg(sock, msg,
01e97e65 629 msg_data_left(msg));
228e548e 630
d8725c86 631 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 632}
c6d409cf 633EXPORT_SYMBOL(sock_sendmsg);
1da177e4
LT
634
635int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
636 struct kvec *vec, size_t num, size_t size)
637{
6aa24814 638 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
d8725c86 639 return sock_sendmsg(sock, msg);
1da177e4 640}
c6d409cf 641EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 642
306b13eb
TH
643int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
644 struct kvec *vec, size_t num, size_t size)
645{
646 struct socket *sock = sk->sk_socket;
647
648 if (!sock->ops->sendmsg_locked)
db5980d8 649 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb
TH
650
651 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
652
653 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
654}
655EXPORT_SYMBOL(kernel_sendmsg_locked);
656
8605330a
SHY
657static bool skb_is_err_queue(const struct sk_buff *skb)
658{
659 /* pkt_type of skbs enqueued on the error queue are set to
660 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
661 * in recvmsg, since skbs received on a local socket will never
662 * have a pkt_type of PACKET_OUTGOING.
663 */
664 return skb->pkt_type == PACKET_OUTGOING;
665}
666
b50a5c70
ML
667/* On transmit, software and hardware timestamps are returned independently.
668 * As the two skb clones share the hardware timestamp, which may be updated
669 * before the software timestamp is received, a hardware TX timestamp may be
670 * returned only if there is no software TX timestamp. Ignore false software
671 * timestamps, which may be made in the __sock_recv_timestamp() call when the
672 * option SO_TIMESTAMP(NS) is enabled on the socket, even when the skb has a
673 * hardware timestamp.
674 */
675static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
676{
677 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
678}
679
aad9c8c4
ML
680static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
681{
682 struct scm_ts_pktinfo ts_pktinfo;
683 struct net_device *orig_dev;
684
685 if (!skb_mac_header_was_set(skb))
686 return;
687
688 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
689
690 rcu_read_lock();
691 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
692 if (orig_dev)
693 ts_pktinfo.if_index = orig_dev->ifindex;
694 rcu_read_unlock();
695
696 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
697 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
698 sizeof(ts_pktinfo), &ts_pktinfo);
699}
700
92f37fd2
ED
701/*
702 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
703 */
704void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
705 struct sk_buff *skb)
706{
20d49473 707 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
f24b9be5 708 struct scm_timestamping tss;
b50a5c70 709 int empty = 1, false_tstamp = 0;
20d49473
PO
710 struct skb_shared_hwtstamps *shhwtstamps =
711 skb_hwtstamps(skb);
712
713 /* Race occurred between timestamp enabling and packet
714 receiving. Fill in the current time for now. */
b50a5c70 715 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 716 __net_timestamp(skb);
b50a5c70
ML
717 false_tstamp = 1;
718 }
20d49473
PO
719
720 if (need_software_tstamp) {
721 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
722 struct timeval tv;
723 skb_get_timestamp(skb, &tv);
724 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
725 sizeof(tv), &tv);
726 } else {
f24b9be5
WB
727 struct timespec ts;
728 skb_get_timestampns(skb, &ts);
20d49473 729 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
f24b9be5 730 sizeof(ts), &ts);
20d49473
PO
731 }
732 }
733
f24b9be5 734 memset(&tss, 0, sizeof(tss));
c199105d 735 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
f24b9be5 736 ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
20d49473 737 empty = 0;
4d276eb6 738 if (shhwtstamps &&
b9f40e21 739 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
b50a5c70 740 !skb_is_swtx_tstamp(skb, false_tstamp) &&
aad9c8c4 741 ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
4d276eb6 742 empty = 0;
aad9c8c4
ML
743 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
744 !skb_is_err_queue(skb))
745 put_ts_pktinfo(msg, skb);
746 }
1c885808 747 if (!empty) {
20d49473 748 put_cmsg(msg, SOL_SOCKET,
f24b9be5 749 SCM_TIMESTAMPING, sizeof(tss), &tss);
1c885808 750
8605330a 751 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 752 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
753 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
754 skb->len, skb->data);
755 }
92f37fd2 756}
7c81fd8b
ACM
757EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
758
6e3e939f
JB
759void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
760 struct sk_buff *skb)
761{
762 int ack;
763
764 if (!sock_flag(sk, SOCK_WIFI_STATUS))
765 return;
766 if (!skb->wifi_acked_valid)
767 return;
768
769 ack = skb->wifi_acked;
770
771 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
772}
773EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
774
11165f14 775static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
776 struct sk_buff *skb)
3b885787 777{
744d5a3e 778 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 779 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 780 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
781}
782
767dd033 783void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
784 struct sk_buff *skb)
785{
786 sock_recv_timestamp(msg, sk, skb);
787 sock_recv_drops(msg, sk, skb);
788}
767dd033 789EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 790
1b784140 791static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 792 int flags)
1da177e4 793{
2da62906 794 return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags);
1da177e4
LT
795}
796
2da62906 797int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 798{
2da62906 799 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 800
2da62906 801 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 802}
c6d409cf 803EXPORT_SYMBOL(sock_recvmsg);
1da177e4 804
c1249c0a
ML
805/**
806 * kernel_recvmsg - Receive a message from a socket (kernel space)
807 * @sock: The socket to receive the message from
808 * @msg: Received message
809 * @vec: Input s/g array for message data
810 * @num: Size of input s/g array
811 * @size: Number of bytes to read
812 * @flags: Message flags (MSG_DONTWAIT, etc...)
813 *
814 * On return the msg structure contains the scatter/gather array passed in the
815 * vec argument. The array is modified so that it consists of the unfilled
816 * portion of the original array.
817 *
818 * The returned value is the total number of bytes received, or an error.
819 */
89bddce5
SH
820int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
821 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
822{
823 mm_segment_t oldfs = get_fs();
824 int result;
825
6aa24814 826 iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
1da177e4 827 set_fs(KERNEL_DS);
2da62906 828 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
829 set_fs(oldfs);
830 return result;
831}
c6d409cf 832EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 833
ce1d4d3e
CH
834static ssize_t sock_sendpage(struct file *file, struct page *page,
835 int offset, size_t size, loff_t *ppos, int more)
1da177e4 836{
1da177e4
LT
837 struct socket *sock;
838 int flags;
839
ce1d4d3e
CH
840 sock = file->private_data;
841
35f9c09f
ED
842 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
843 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
844 flags |= more;
ce1d4d3e 845
e6949583 846 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 847}
1da177e4 848
9c55e01c 849static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 850 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
851 unsigned int flags)
852{
853 struct socket *sock = file->private_data;
854
997b37da
RDC
855 if (unlikely(!sock->ops->splice_read))
856 return -EINVAL;
857
9c55e01c
JA
858 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
859}
860
8ae5e030 861static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 862{
6d652330
AV
863 struct file *file = iocb->ki_filp;
864 struct socket *sock = file->private_data;
0345f931 865 struct msghdr msg = {.msg_iter = *to,
866 .msg_iocb = iocb};
8ae5e030 867 ssize_t res;
ce1d4d3e 868
8ae5e030
AV
869 if (file->f_flags & O_NONBLOCK)
870 msg.msg_flags = MSG_DONTWAIT;
871
872 if (iocb->ki_pos != 0)
1da177e4 873 return -ESPIPE;
027445c3 874
66ee59af 875 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
876 return 0;
877
2da62906 878 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
879 *to = msg.msg_iter;
880 return res;
1da177e4
LT
881}
882
8ae5e030 883static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 884{
6d652330
AV
885 struct file *file = iocb->ki_filp;
886 struct socket *sock = file->private_data;
0345f931 887 struct msghdr msg = {.msg_iter = *from,
888 .msg_iocb = iocb};
8ae5e030 889 ssize_t res;
1da177e4 890
8ae5e030 891 if (iocb->ki_pos != 0)
ce1d4d3e 892 return -ESPIPE;
027445c3 893
8ae5e030
AV
894 if (file->f_flags & O_NONBLOCK)
895 msg.msg_flags = MSG_DONTWAIT;
896
6d652330
AV
897 if (sock->type == SOCK_SEQPACKET)
898 msg.msg_flags |= MSG_EOR;
899
d8725c86 900 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
901 *from = msg.msg_iter;
902 return res;
1da177e4
LT
903}
904
1da177e4
LT
905/*
906 * Atomic setting of ioctl hooks to avoid race
907 * with module unload.
908 */
909
4a3e2f71 910static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 911static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 912
881d966b 913void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 914{
4a3e2f71 915 mutex_lock(&br_ioctl_mutex);
1da177e4 916 br_ioctl_hook = hook;
4a3e2f71 917 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
918}
919EXPORT_SYMBOL(brioctl_set);
920
4a3e2f71 921static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 922static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 923
881d966b 924void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 925{
4a3e2f71 926 mutex_lock(&vlan_ioctl_mutex);
1da177e4 927 vlan_ioctl_hook = hook;
4a3e2f71 928 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
929}
930EXPORT_SYMBOL(vlan_ioctl_set);
931
4a3e2f71 932static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 933static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 934
89bddce5 935void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 936{
4a3e2f71 937 mutex_lock(&dlci_ioctl_mutex);
1da177e4 938 dlci_ioctl_hook = hook;
4a3e2f71 939 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
940}
941EXPORT_SYMBOL(dlci_ioctl_set);
942
6b96018b
AB
943static long sock_do_ioctl(struct net *net, struct socket *sock,
944 unsigned int cmd, unsigned long arg)
945{
946 int err;
947 void __user *argp = (void __user *)arg;
948
949 err = sock->ops->ioctl(sock, cmd, arg);
950
951 /*
952 * If this ioctl is unknown try to hand it down
953 * to the NIC driver.
954 */
36fd633e
AV
955 if (err != -ENOIOCTLCMD)
956 return err;
6b96018b 957
36fd633e
AV
958 if (cmd == SIOCGIFCONF) {
959 struct ifconf ifc;
960 if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
961 return -EFAULT;
962 rtnl_lock();
963 err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
964 rtnl_unlock();
965 if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
966 err = -EFAULT;
44c02a2c
AV
967 } else {
968 struct ifreq ifr;
969 bool need_copyout;
970 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
971 return -EFAULT;
972 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
973 if (!err && need_copyout)
974 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
975 return -EFAULT;
36fd633e 976 }
6b96018b
AB
977 return err;
978}
979
1da177e4
LT
980/*
981 * With an ioctl, arg may well be a user mode pointer, but we don't know
982 * what to do with it - that's up to the protocol still.
983 */
984
d8d211a2 985struct ns_common *get_net_ns(struct ns_common *ns)
c62cce2c
AV
986{
987 return &get_net(container_of(ns, struct net, ns))->ns;
988}
d8d211a2 989EXPORT_SYMBOL_GPL(get_net_ns);
c62cce2c 990
1da177e4
LT
991static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
992{
993 struct socket *sock;
881d966b 994 struct sock *sk;
1da177e4
LT
995 void __user *argp = (void __user *)arg;
996 int pid, err;
881d966b 997 struct net *net;
1da177e4 998
b69aee04 999 sock = file->private_data;
881d966b 1000 sk = sock->sk;
3b1e0a65 1001 net = sock_net(sk);
44c02a2c
AV
1002 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1003 struct ifreq ifr;
1004 bool need_copyout;
1005 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
1006 return -EFAULT;
1007 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1008 if (!err && need_copyout)
1009 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
1010 return -EFAULT;
1da177e4 1011 } else
3d23e349 1012#ifdef CONFIG_WEXT_CORE
1da177e4 1013 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1014 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1015 } else
3d23e349 1016#endif
89bddce5 1017 switch (cmd) {
1da177e4
LT
1018 case FIOSETOWN:
1019 case SIOCSPGRP:
1020 err = -EFAULT;
1021 if (get_user(pid, (int __user *)argp))
1022 break;
393cc3f5 1023 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1024 break;
1025 case FIOGETOWN:
1026 case SIOCGPGRP:
609d7fa9 1027 err = put_user(f_getown(sock->file),
89bddce5 1028 (int __user *)argp);
1da177e4
LT
1029 break;
1030 case SIOCGIFBR:
1031 case SIOCSIFBR:
1032 case SIOCBRADDBR:
1033 case SIOCBRDELBR:
1034 err = -ENOPKG;
1035 if (!br_ioctl_hook)
1036 request_module("bridge");
1037
4a3e2f71 1038 mutex_lock(&br_ioctl_mutex);
89bddce5 1039 if (br_ioctl_hook)
881d966b 1040 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1041 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1042 break;
1043 case SIOCGIFVLAN:
1044 case SIOCSIFVLAN:
1045 err = -ENOPKG;
1046 if (!vlan_ioctl_hook)
1047 request_module("8021q");
1048
4a3e2f71 1049 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1050 if (vlan_ioctl_hook)
881d966b 1051 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1052 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1053 break;
1da177e4
LT
1054 case SIOCADDDLCI:
1055 case SIOCDELDLCI:
1056 err = -ENOPKG;
1057 if (!dlci_ioctl_hook)
1058 request_module("dlci");
1059
7512cbf6
PE
1060 mutex_lock(&dlci_ioctl_mutex);
1061 if (dlci_ioctl_hook)
1da177e4 1062 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1063 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 1064 break;
c62cce2c
AV
1065 case SIOCGSKNS:
1066 err = -EPERM;
1067 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1068 break;
1069
1070 err = open_related_ns(&net->ns, get_net_ns);
1071 break;
1da177e4 1072 default:
6b96018b 1073 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1074 break;
89bddce5 1075 }
1da177e4
LT
1076 return err;
1077}
1078
1079int sock_create_lite(int family, int type, int protocol, struct socket **res)
1080{
1081 int err;
1082 struct socket *sock = NULL;
89bddce5 1083
1da177e4
LT
1084 err = security_socket_create(family, type, protocol, 1);
1085 if (err)
1086 goto out;
1087
1088 sock = sock_alloc();
1089 if (!sock) {
1090 err = -ENOMEM;
1091 goto out;
1092 }
1093
1da177e4 1094 sock->type = type;
7420ed23
VY
1095 err = security_socket_post_create(sock, family, type, protocol, 1);
1096 if (err)
1097 goto out_release;
1098
1da177e4
LT
1099out:
1100 *res = sock;
1101 return err;
7420ed23
VY
1102out_release:
1103 sock_release(sock);
1104 sock = NULL;
1105 goto out;
1da177e4 1106}
c6d409cf 1107EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1108
1109/* No kernel lock held - perfect */
ade994f4 1110static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1111{
3cafb376 1112 struct socket *sock = file->private_data;
a331de3b 1113 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1114
e88958e6
CH
1115 if (!sock->ops->poll)
1116 return 0;
f641f13b 1117
a331de3b
CH
1118 if (sk_can_busy_loop(sock->sk)) {
1119 /* poll once if requested by the syscall */
1120 if (events & POLL_BUSY_LOOP)
1121 sk_busy_loop(sock->sk, 1);
1122
1123 /* if this socket can poll_ll, tell the system call */
1124 flag = POLL_BUSY_LOOP;
1125 }
1126
1127 return sock->ops->poll(file, sock, wait) | flag;
1da177e4
LT
1128}
1129
89bddce5 1130static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1131{
b69aee04 1132 struct socket *sock = file->private_data;
1da177e4
LT
1133
1134 return sock->ops->mmap(file, sock, vma);
1135}
1136
20380731 1137static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1138{
6d8c50dc 1139 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1140 return 0;
1141}
1142
1143/*
1144 * Update the socket async list
1145 *
1146 * Fasync_list locking strategy.
1147 *
1148 * 1. fasync_list is modified only under process context socket lock
1149 * i.e. under semaphore.
1150 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1151 * or under socket lock
1da177e4
LT
1152 */
1153
1154static int sock_fasync(int fd, struct file *filp, int on)
1155{
989a2979
ED
1156 struct socket *sock = filp->private_data;
1157 struct sock *sk = sock->sk;
eaefd110 1158 struct socket_wq *wq;
1da177e4 1159
989a2979 1160 if (sk == NULL)
1da177e4 1161 return -EINVAL;
1da177e4
LT
1162
1163 lock_sock(sk);
e6476c21 1164 wq = sock->wq;
eaefd110 1165 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1166
eaefd110 1167 if (!wq->fasync_list)
989a2979
ED
1168 sock_reset_flag(sk, SOCK_FASYNC);
1169 else
bcdce719 1170 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1171
989a2979 1172 release_sock(sk);
1da177e4
LT
1173 return 0;
1174}
1175
ceb5d58b 1176/* This function may be called only under rcu_lock */
1da177e4 1177
ceb5d58b 1178int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1179{
ceb5d58b 1180 if (!wq || !wq->fasync_list)
1da177e4 1181 return -1;
ceb5d58b 1182
89bddce5 1183 switch (how) {
8d8ad9d7 1184 case SOCK_WAKE_WAITD:
ceb5d58b 1185 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1186 break;
1187 goto call_kill;
8d8ad9d7 1188 case SOCK_WAKE_SPACE:
ceb5d58b 1189 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1190 break;
1191 /* fall through */
8d8ad9d7 1192 case SOCK_WAKE_IO:
89bddce5 1193call_kill:
43815482 1194 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1195 break;
8d8ad9d7 1196 case SOCK_WAKE_URG:
43815482 1197 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1198 }
ceb5d58b 1199
1da177e4
LT
1200 return 0;
1201}
c6d409cf 1202EXPORT_SYMBOL(sock_wake_async);
1da177e4 1203
721db93a 1204int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1205 struct socket **res, int kern)
1da177e4
LT
1206{
1207 int err;
1208 struct socket *sock;
55737fda 1209 const struct net_proto_family *pf;
1da177e4
LT
1210
1211 /*
89bddce5 1212 * Check protocol is in range
1da177e4
LT
1213 */
1214 if (family < 0 || family >= NPROTO)
1215 return -EAFNOSUPPORT;
1216 if (type < 0 || type >= SOCK_MAX)
1217 return -EINVAL;
1218
1219 /* Compatibility.
1220
1221 This uglymoron is moved from INET layer to here to avoid
1222 deadlock in module load.
1223 */
1224 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1225 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1226 current->comm);
1da177e4
LT
1227 family = PF_PACKET;
1228 }
1229
1230 err = security_socket_create(family, type, protocol, kern);
1231 if (err)
1232 return err;
89bddce5 1233
55737fda
SH
1234 /*
1235 * Allocate the socket and allow the family to set things up. if
1236 * the protocol is 0, the family is instructed to select an appropriate
1237 * default.
1238 */
1239 sock = sock_alloc();
1240 if (!sock) {
e87cc472 1241 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1242 return -ENFILE; /* Not exactly a match, but its the
1243 closest posix thing */
1244 }
1245
1246 sock->type = type;
1247
95a5afca 1248#ifdef CONFIG_MODULES
89bddce5
SH
1249 /* Attempt to load a protocol module if the find failed.
1250 *
1251 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1252 * requested real, full-featured networking support upon configuration.
1253 * Otherwise module support will break!
1254 */
190683a9 1255 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1256 request_module("net-pf-%d", family);
1da177e4
LT
1257#endif
1258
55737fda
SH
1259 rcu_read_lock();
1260 pf = rcu_dereference(net_families[family]);
1261 err = -EAFNOSUPPORT;
1262 if (!pf)
1263 goto out_release;
1da177e4
LT
1264
1265 /*
1266 * We will call the ->create function, that possibly is in a loadable
1267 * module, so we have to bump that loadable module refcnt first.
1268 */
55737fda 1269 if (!try_module_get(pf->owner))
1da177e4
LT
1270 goto out_release;
1271
55737fda
SH
1272 /* Now protected by module ref count */
1273 rcu_read_unlock();
1274
3f378b68 1275 err = pf->create(net, sock, protocol, kern);
55737fda 1276 if (err < 0)
1da177e4 1277 goto out_module_put;
a79af59e 1278
1da177e4
LT
1279 /*
1280 * Now to bump the refcnt of the [loadable] module that owns this
1281 * socket at sock_release time we decrement its refcnt.
1282 */
55737fda
SH
1283 if (!try_module_get(sock->ops->owner))
1284 goto out_module_busy;
1285
1da177e4
LT
1286 /*
1287 * Now that we're done with the ->create function, the [loadable]
1288 * module can have its refcnt decremented
1289 */
55737fda 1290 module_put(pf->owner);
7420ed23
VY
1291 err = security_socket_post_create(sock, family, type, protocol, kern);
1292 if (err)
3b185525 1293 goto out_sock_release;
55737fda 1294 *res = sock;
1da177e4 1295
55737fda
SH
1296 return 0;
1297
1298out_module_busy:
1299 err = -EAFNOSUPPORT;
1da177e4 1300out_module_put:
55737fda
SH
1301 sock->ops = NULL;
1302 module_put(pf->owner);
1303out_sock_release:
1da177e4 1304 sock_release(sock);
55737fda
SH
1305 return err;
1306
1307out_release:
1308 rcu_read_unlock();
1309 goto out_sock_release;
1da177e4 1310}
721db93a 1311EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1312
1313int sock_create(int family, int type, int protocol, struct socket **res)
1314{
1b8d7ae4 1315 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1316}
c6d409cf 1317EXPORT_SYMBOL(sock_create);
1da177e4 1318
eeb1bd5c 1319int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1320{
eeb1bd5c 1321 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1322}
c6d409cf 1323EXPORT_SYMBOL(sock_create_kern);
1da177e4 1324
9d6a15c3 1325int __sys_socket(int family, int type, int protocol)
1da177e4
LT
1326{
1327 int retval;
1328 struct socket *sock;
a677a039
UD
1329 int flags;
1330
e38b36f3
UD
1331 /* Check the SOCK_* constants for consistency. */
1332 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1333 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1334 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1335 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1336
a677a039 1337 flags = type & ~SOCK_TYPE_MASK;
77d27200 1338 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1339 return -EINVAL;
1340 type &= SOCK_TYPE_MASK;
1da177e4 1341
aaca0bdc
UD
1342 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1343 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1344
1da177e4
LT
1345 retval = sock_create(family, type, protocol, &sock);
1346 if (retval < 0)
8e1611e2 1347 return retval;
1da177e4 1348
8e1611e2 1349 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1350}
1351
9d6a15c3
DB
1352SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1353{
1354 return __sys_socket(family, type, protocol);
1355}
1356
1da177e4
LT
1357/*
1358 * Create a pair of connected sockets.
1359 */
1360
6debc8d8 1361int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1362{
1363 struct socket *sock1, *sock2;
1364 int fd1, fd2, err;
db349509 1365 struct file *newfile1, *newfile2;
a677a039
UD
1366 int flags;
1367
1368 flags = type & ~SOCK_TYPE_MASK;
77d27200 1369 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1370 return -EINVAL;
1371 type &= SOCK_TYPE_MASK;
1da177e4 1372
aaca0bdc
UD
1373 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1374 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1375
016a266b
AV
1376 /*
1377 * reserve descriptors and make sure we won't fail
1378 * to return them to userland.
1379 */
1380 fd1 = get_unused_fd_flags(flags);
1381 if (unlikely(fd1 < 0))
1382 return fd1;
1383
1384 fd2 = get_unused_fd_flags(flags);
1385 if (unlikely(fd2 < 0)) {
1386 put_unused_fd(fd1);
1387 return fd2;
1388 }
1389
1390 err = put_user(fd1, &usockvec[0]);
1391 if (err)
1392 goto out;
1393
1394 err = put_user(fd2, &usockvec[1]);
1395 if (err)
1396 goto out;
1397
1da177e4
LT
1398 /*
1399 * Obtain the first socket and check if the underlying protocol
1400 * supports the socketpair call.
1401 */
1402
1403 err = sock_create(family, type, protocol, &sock1);
016a266b 1404 if (unlikely(err < 0))
1da177e4
LT
1405 goto out;
1406
1407 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1408 if (unlikely(err < 0)) {
1409 sock_release(sock1);
1410 goto out;
bf3c23d1 1411 }
d73aa286 1412
d47cd945
DH
1413 err = security_socket_socketpair(sock1, sock2);
1414 if (unlikely(err)) {
1415 sock_release(sock2);
1416 sock_release(sock1);
1417 goto out;
1418 }
1419
016a266b
AV
1420 err = sock1->ops->socketpair(sock1, sock2);
1421 if (unlikely(err < 0)) {
1422 sock_release(sock2);
1423 sock_release(sock1);
1424 goto out;
28407630
AV
1425 }
1426
aab174f0 1427 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1428 if (IS_ERR(newfile1)) {
28407630 1429 err = PTR_ERR(newfile1);
016a266b
AV
1430 sock_release(sock2);
1431 goto out;
28407630
AV
1432 }
1433
aab174f0 1434 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1435 if (IS_ERR(newfile2)) {
1436 err = PTR_ERR(newfile2);
016a266b
AV
1437 fput(newfile1);
1438 goto out;
db349509
AV
1439 }
1440
157cf649 1441 audit_fd_pair(fd1, fd2);
d73aa286 1442
db349509
AV
1443 fd_install(fd1, newfile1);
1444 fd_install(fd2, newfile2);
d73aa286 1445 return 0;
1da177e4 1446
016a266b 1447out:
d73aa286 1448 put_unused_fd(fd2);
d73aa286 1449 put_unused_fd(fd1);
1da177e4
LT
1450 return err;
1451}
1452
6debc8d8
DB
1453SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1454 int __user *, usockvec)
1455{
1456 return __sys_socketpair(family, type, protocol, usockvec);
1457}
1458
1da177e4
LT
1459/*
1460 * Bind a name to a socket. Nothing much to do here since it's
1461 * the protocol's responsibility to handle the local address.
1462 *
1463 * We move the socket address to kernel space before we call
1464 * the protocol layer (having also checked the address is ok).
1465 */
1466
a87d35d8 1467int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1468{
1469 struct socket *sock;
230b1839 1470 struct sockaddr_storage address;
6cb153ca 1471 int err, fput_needed;
1da177e4 1472
89bddce5 1473 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1474 if (sock) {
43db362d 1475 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1476 if (err >= 0) {
1477 err = security_socket_bind(sock,
230b1839 1478 (struct sockaddr *)&address,
89bddce5 1479 addrlen);
6cb153ca
BL
1480 if (!err)
1481 err = sock->ops->bind(sock,
89bddce5 1482 (struct sockaddr *)
230b1839 1483 &address, addrlen);
1da177e4 1484 }
6cb153ca 1485 fput_light(sock->file, fput_needed);
89bddce5 1486 }
1da177e4
LT
1487 return err;
1488}
1489
a87d35d8
DB
1490SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1491{
1492 return __sys_bind(fd, umyaddr, addrlen);
1493}
1494
1da177e4
LT
1495/*
1496 * Perform a listen. Basically, we allow the protocol to do anything
1497 * necessary for a listen, and if that works, we mark the socket as
1498 * ready for listening.
1499 */
1500
25e290ee 1501int __sys_listen(int fd, int backlog)
1da177e4
LT
1502{
1503 struct socket *sock;
6cb153ca 1504 int err, fput_needed;
b8e1f9b5 1505 int somaxconn;
89bddce5
SH
1506
1507 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1508 if (sock) {
8efa6e93 1509 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1510 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1511 backlog = somaxconn;
1da177e4
LT
1512
1513 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1514 if (!err)
1515 err = sock->ops->listen(sock, backlog);
1da177e4 1516
6cb153ca 1517 fput_light(sock->file, fput_needed);
1da177e4
LT
1518 }
1519 return err;
1520}
1521
25e290ee
DB
1522SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1523{
1524 return __sys_listen(fd, backlog);
1525}
1526
1da177e4
LT
1527/*
1528 * For accept, we attempt to create a new socket, set up the link
1529 * with the client, wake up the client, then return the new
1530 * connected fd. We collect the address of the connector in kernel
1531 * space and move it to user at the very end. This is unclean because
1532 * we open the socket then return an error.
1533 *
1534 * 1003.1g adds the ability to recvmsg() to query connection pending
1535 * status to recvmsg. We need to add that support in a way thats
b903036a 1536 * clean when we restructure accept also.
1da177e4
LT
1537 */
1538
4541e805
DB
1539int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1540 int __user *upeer_addrlen, int flags)
1da177e4
LT
1541{
1542 struct socket *sock, *newsock;
39d8c1b6 1543 struct file *newfile;
6cb153ca 1544 int err, len, newfd, fput_needed;
230b1839 1545 struct sockaddr_storage address;
1da177e4 1546
77d27200 1547 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1548 return -EINVAL;
1549
1550 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1551 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1552
6cb153ca 1553 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1554 if (!sock)
1555 goto out;
1556
1557 err = -ENFILE;
c6d409cf
ED
1558 newsock = sock_alloc();
1559 if (!newsock)
1da177e4
LT
1560 goto out_put;
1561
1562 newsock->type = sock->type;
1563 newsock->ops = sock->ops;
1564
1da177e4
LT
1565 /*
1566 * We don't need try_module_get here, as the listening socket (sock)
1567 * has the protocol module (sock->ops->owner) held.
1568 */
1569 __module_get(newsock->ops->owner);
1570
28407630 1571 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1572 if (unlikely(newfd < 0)) {
1573 err = newfd;
9a1875e6
DM
1574 sock_release(newsock);
1575 goto out_put;
39d8c1b6 1576 }
aab174f0 1577 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1578 if (IS_ERR(newfile)) {
28407630
AV
1579 err = PTR_ERR(newfile);
1580 put_unused_fd(newfd);
28407630
AV
1581 goto out_put;
1582 }
39d8c1b6 1583
a79af59e
FF
1584 err = security_socket_accept(sock, newsock);
1585 if (err)
39d8c1b6 1586 goto out_fd;
a79af59e 1587
cdfbabfb 1588 err = sock->ops->accept(sock, newsock, sock->file->f_flags, false);
1da177e4 1589 if (err < 0)
39d8c1b6 1590 goto out_fd;
1da177e4
LT
1591
1592 if (upeer_sockaddr) {
9b2c45d4
DV
1593 len = newsock->ops->getname(newsock,
1594 (struct sockaddr *)&address, 2);
1595 if (len < 0) {
1da177e4 1596 err = -ECONNABORTED;
39d8c1b6 1597 goto out_fd;
1da177e4 1598 }
43db362d 1599 err = move_addr_to_user(&address,
230b1839 1600 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1601 if (err < 0)
39d8c1b6 1602 goto out_fd;
1da177e4
LT
1603 }
1604
1605 /* File flags are not inherited via accept() unlike another OSes. */
1606
39d8c1b6
DM
1607 fd_install(newfd, newfile);
1608 err = newfd;
1da177e4 1609
1da177e4 1610out_put:
6cb153ca 1611 fput_light(sock->file, fput_needed);
1da177e4
LT
1612out:
1613 return err;
39d8c1b6 1614out_fd:
9606a216 1615 fput(newfile);
39d8c1b6 1616 put_unused_fd(newfd);
1da177e4
LT
1617 goto out_put;
1618}
1619
4541e805
DB
1620SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1621 int __user *, upeer_addrlen, int, flags)
1622{
1623 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1624}
1625
20f37034
HC
1626SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1627 int __user *, upeer_addrlen)
aaca0bdc 1628{
4541e805 1629 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1630}
1631
1da177e4
LT
1632/*
1633 * Attempt to connect to a socket with the server address. The address
1634 * is in user space so we verify it is OK and move it to kernel space.
1635 *
1636 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1637 * break bindings
1638 *
1639 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1640 * other SEQPACKET protocols that take time to connect() as it doesn't
1641 * include the -EINPROGRESS status for such sockets.
1642 */
1643
1387c2c2 1644int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1da177e4
LT
1645{
1646 struct socket *sock;
230b1839 1647 struct sockaddr_storage address;
6cb153ca 1648 int err, fput_needed;
1da177e4 1649
6cb153ca 1650 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1651 if (!sock)
1652 goto out;
43db362d 1653 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1654 if (err < 0)
1655 goto out_put;
1656
89bddce5 1657 err =
230b1839 1658 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1659 if (err)
1660 goto out_put;
1661
230b1839 1662 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1663 sock->file->f_flags);
1664out_put:
6cb153ca 1665 fput_light(sock->file, fput_needed);
1da177e4
LT
1666out:
1667 return err;
1668}
1669
1387c2c2
DB
1670SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1671 int, addrlen)
1672{
1673 return __sys_connect(fd, uservaddr, addrlen);
1674}
1675
1da177e4
LT
1676/*
1677 * Get the local address ('name') of a socket object. Move the obtained
1678 * name to user space.
1679 */
1680
8882a107
DB
1681int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1682 int __user *usockaddr_len)
1da177e4
LT
1683{
1684 struct socket *sock;
230b1839 1685 struct sockaddr_storage address;
9b2c45d4 1686 int err, fput_needed;
89bddce5 1687
6cb153ca 1688 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1689 if (!sock)
1690 goto out;
1691
1692 err = security_socket_getsockname(sock);
1693 if (err)
1694 goto out_put;
1695
9b2c45d4
DV
1696 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
1697 if (err < 0)
1da177e4 1698 goto out_put;
9b2c45d4
DV
1699 /* "err" is actually length in this case */
1700 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
1701
1702out_put:
6cb153ca 1703 fput_light(sock->file, fput_needed);
1da177e4
LT
1704out:
1705 return err;
1706}
1707
8882a107
DB
1708SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1709 int __user *, usockaddr_len)
1710{
1711 return __sys_getsockname(fd, usockaddr, usockaddr_len);
1712}
1713
1da177e4
LT
1714/*
1715 * Get the remote address ('name') of a socket object. Move the obtained
1716 * name to user space.
1717 */
1718
b21c8f83
DB
1719int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1720 int __user *usockaddr_len)
1da177e4
LT
1721{
1722 struct socket *sock;
230b1839 1723 struct sockaddr_storage address;
9b2c45d4 1724 int err, fput_needed;
1da177e4 1725
89bddce5
SH
1726 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1727 if (sock != NULL) {
1da177e4
LT
1728 err = security_socket_getpeername(sock);
1729 if (err) {
6cb153ca 1730 fput_light(sock->file, fput_needed);
1da177e4
LT
1731 return err;
1732 }
1733
9b2c45d4
DV
1734 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
1735 if (err >= 0)
1736 /* "err" is actually length in this case */
1737 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 1738 usockaddr_len);
6cb153ca 1739 fput_light(sock->file, fput_needed);
1da177e4
LT
1740 }
1741 return err;
1742}
1743
b21c8f83
DB
1744SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1745 int __user *, usockaddr_len)
1746{
1747 return __sys_getpeername(fd, usockaddr, usockaddr_len);
1748}
1749
1da177e4
LT
1750/*
1751 * Send a datagram to a given address. We move the address into kernel
1752 * space and check the user space data area is readable before invoking
1753 * the protocol.
1754 */
211b634b
DB
1755int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
1756 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
1757{
1758 struct socket *sock;
230b1839 1759 struct sockaddr_storage address;
1da177e4
LT
1760 int err;
1761 struct msghdr msg;
1762 struct iovec iov;
6cb153ca 1763 int fput_needed;
6cb153ca 1764
602bd0e9
AV
1765 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1766 if (unlikely(err))
1767 return err;
de0fa95c
PE
1768 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1769 if (!sock)
4387ff75 1770 goto out;
6cb153ca 1771
89bddce5 1772 msg.msg_name = NULL;
89bddce5
SH
1773 msg.msg_control = NULL;
1774 msg.msg_controllen = 0;
1775 msg.msg_namelen = 0;
6cb153ca 1776 if (addr) {
43db362d 1777 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1778 if (err < 0)
1779 goto out_put;
230b1839 1780 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1781 msg.msg_namelen = addr_len;
1da177e4
LT
1782 }
1783 if (sock->file->f_flags & O_NONBLOCK)
1784 flags |= MSG_DONTWAIT;
1785 msg.msg_flags = flags;
d8725c86 1786 err = sock_sendmsg(sock, &msg);
1da177e4 1787
89bddce5 1788out_put:
de0fa95c 1789 fput_light(sock->file, fput_needed);
4387ff75 1790out:
1da177e4
LT
1791 return err;
1792}
1793
211b634b
DB
1794SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1795 unsigned int, flags, struct sockaddr __user *, addr,
1796 int, addr_len)
1797{
1798 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
1799}
1800
1da177e4 1801/*
89bddce5 1802 * Send a datagram down a socket.
1da177e4
LT
1803 */
1804
3e0fa65f 1805SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1806 unsigned int, flags)
1da177e4 1807{
211b634b 1808 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
1809}
1810
1811/*
89bddce5 1812 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1813 * sender. We verify the buffers are writable and if needed move the
1814 * sender address from kernel to user space.
1815 */
7a09e1eb
DB
1816int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
1817 struct sockaddr __user *addr, int __user *addr_len)
1da177e4
LT
1818{
1819 struct socket *sock;
1820 struct iovec iov;
1821 struct msghdr msg;
230b1839 1822 struct sockaddr_storage address;
89bddce5 1823 int err, err2;
6cb153ca
BL
1824 int fput_needed;
1825
602bd0e9
AV
1826 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1827 if (unlikely(err))
1828 return err;
de0fa95c 1829 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1830 if (!sock)
de0fa95c 1831 goto out;
1da177e4 1832
89bddce5
SH
1833 msg.msg_control = NULL;
1834 msg.msg_controllen = 0;
f3d33426
HFS
1835 /* Save some cycles and don't copy the address if not needed */
1836 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1837 /* We assume all kernel code knows the size of sockaddr_storage */
1838 msg.msg_namelen = 0;
130ed5d1 1839 msg.msg_iocb = NULL;
9f138fa6 1840 msg.msg_flags = 0;
1da177e4
LT
1841 if (sock->file->f_flags & O_NONBLOCK)
1842 flags |= MSG_DONTWAIT;
2da62906 1843 err = sock_recvmsg(sock, &msg, flags);
1da177e4 1844
89bddce5 1845 if (err >= 0 && addr != NULL) {
43db362d 1846 err2 = move_addr_to_user(&address,
230b1839 1847 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1848 if (err2 < 0)
1849 err = err2;
1da177e4 1850 }
de0fa95c
PE
1851
1852 fput_light(sock->file, fput_needed);
4387ff75 1853out:
1da177e4
LT
1854 return err;
1855}
1856
7a09e1eb
DB
1857SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
1858 unsigned int, flags, struct sockaddr __user *, addr,
1859 int __user *, addr_len)
1860{
1861 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
1862}
1863
1da177e4 1864/*
89bddce5 1865 * Receive a datagram from a socket.
1da177e4
LT
1866 */
1867
b7c0ddf5
JG
1868SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1869 unsigned int, flags)
1da177e4 1870{
7a09e1eb 1871 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
1872}
1873
1874/*
1875 * Set a socket option. Because we don't know the option lengths we have
1876 * to pass the user mode parameter for the protocols to sort out.
1877 */
1878
cc36dca0
DB
1879static int __sys_setsockopt(int fd, int level, int optname,
1880 char __user *optval, int optlen)
1da177e4 1881{
6cb153ca 1882 int err, fput_needed;
1da177e4
LT
1883 struct socket *sock;
1884
1885 if (optlen < 0)
1886 return -EINVAL;
89bddce5
SH
1887
1888 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1889 if (sock != NULL) {
1890 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1891 if (err)
1892 goto out_put;
1da177e4
LT
1893
1894 if (level == SOL_SOCKET)
89bddce5
SH
1895 err =
1896 sock_setsockopt(sock, level, optname, optval,
1897 optlen);
1da177e4 1898 else
89bddce5
SH
1899 err =
1900 sock->ops->setsockopt(sock, level, optname, optval,
1901 optlen);
6cb153ca
BL
1902out_put:
1903 fput_light(sock->file, fput_needed);
1da177e4
LT
1904 }
1905 return err;
1906}
1907
cc36dca0
DB
1908SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1909 char __user *, optval, int, optlen)
1910{
1911 return __sys_setsockopt(fd, level, optname, optval, optlen);
1912}
1913
1da177e4
LT
1914/*
1915 * Get a socket option. Because we don't know the option lengths we have
1916 * to pass a user mode parameter for the protocols to sort out.
1917 */
1918
13a2d70e
DB
1919static int __sys_getsockopt(int fd, int level, int optname,
1920 char __user *optval, int __user *optlen)
1da177e4 1921{
6cb153ca 1922 int err, fput_needed;
1da177e4
LT
1923 struct socket *sock;
1924
89bddce5
SH
1925 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1926 if (sock != NULL) {
6cb153ca
BL
1927 err = security_socket_getsockopt(sock, level, optname);
1928 if (err)
1929 goto out_put;
1da177e4
LT
1930
1931 if (level == SOL_SOCKET)
89bddce5
SH
1932 err =
1933 sock_getsockopt(sock, level, optname, optval,
1934 optlen);
1da177e4 1935 else
89bddce5
SH
1936 err =
1937 sock->ops->getsockopt(sock, level, optname, optval,
1938 optlen);
6cb153ca
BL
1939out_put:
1940 fput_light(sock->file, fput_needed);
1da177e4
LT
1941 }
1942 return err;
1943}
1944
13a2d70e
DB
1945SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1946 char __user *, optval, int __user *, optlen)
1947{
1948 return __sys_getsockopt(fd, level, optname, optval, optlen);
1949}
1950
1da177e4
LT
1951/*
1952 * Shutdown a socket.
1953 */
1954
005a1aea 1955int __sys_shutdown(int fd, int how)
1da177e4 1956{
6cb153ca 1957 int err, fput_needed;
1da177e4
LT
1958 struct socket *sock;
1959
89bddce5
SH
1960 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1961 if (sock != NULL) {
1da177e4 1962 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1963 if (!err)
1964 err = sock->ops->shutdown(sock, how);
1965 fput_light(sock->file, fput_needed);
1da177e4
LT
1966 }
1967 return err;
1968}
1969
005a1aea
DB
1970SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1971{
1972 return __sys_shutdown(fd, how);
1973}
1974
89bddce5 1975/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1976 * fields which are the same type (int / unsigned) on our platforms.
1977 */
1978#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1979#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1980#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1981
c71d8ebe
TH
1982struct used_address {
1983 struct sockaddr_storage name;
1984 unsigned int name_len;
1985};
1986
da184284
AV
1987static int copy_msghdr_from_user(struct msghdr *kmsg,
1988 struct user_msghdr __user *umsg,
1989 struct sockaddr __user **save_addr,
1990 struct iovec **iov)
1661bf36 1991{
ffb07550 1992 struct user_msghdr msg;
08adb7da
AV
1993 ssize_t err;
1994
ffb07550 1995 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 1996 return -EFAULT;
dbb490b9 1997
864d9664 1998 kmsg->msg_control = (void __force *)msg.msg_control;
ffb07550
AV
1999 kmsg->msg_controllen = msg.msg_controllen;
2000 kmsg->msg_flags = msg.msg_flags;
2001
2002 kmsg->msg_namelen = msg.msg_namelen;
2003 if (!msg.msg_name)
6a2a2b3a
AS
2004 kmsg->msg_namelen = 0;
2005
dbb490b9
ML
2006 if (kmsg->msg_namelen < 0)
2007 return -EINVAL;
2008
1661bf36 2009 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2010 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2011
2012 if (save_addr)
ffb07550 2013 *save_addr = msg.msg_name;
08adb7da 2014
ffb07550 2015 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 2016 if (!save_addr) {
864d9664
PA
2017 err = move_addr_to_kernel(msg.msg_name,
2018 kmsg->msg_namelen,
08adb7da
AV
2019 kmsg->msg_name);
2020 if (err < 0)
2021 return err;
2022 }
2023 } else {
2024 kmsg->msg_name = NULL;
2025 kmsg->msg_namelen = 0;
2026 }
2027
ffb07550 2028 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2029 return -EMSGSIZE;
2030
0345f931 2031 kmsg->msg_iocb = NULL;
2032
ffb07550
AV
2033 return import_iovec(save_addr ? READ : WRITE,
2034 msg.msg_iov, msg.msg_iovlen,
da184284 2035 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
2036}
2037
666547ff 2038static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2039 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
2040 struct used_address *used_address,
2041 unsigned int allowed_msghdr_flags)
1da177e4 2042{
89bddce5
SH
2043 struct compat_msghdr __user *msg_compat =
2044 (struct compat_msghdr __user *)msg;
230b1839 2045 struct sockaddr_storage address;
1da177e4 2046 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 2047 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2048 __aligned(sizeof(__kernel_size_t));
89bddce5 2049 /* 20 is size of ipv6_pktinfo */
1da177e4 2050 unsigned char *ctl_buf = ctl;
d8725c86 2051 int ctl_len;
08adb7da 2052 ssize_t err;
89bddce5 2053
08adb7da 2054 msg_sys->msg_name = &address;
1da177e4 2055
08449320 2056 if (MSG_CMSG_COMPAT & flags)
08adb7da 2057 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 2058 else
08adb7da 2059 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 2060 if (err < 0)
da184284 2061 return err;
1da177e4
LT
2062
2063 err = -ENOBUFS;
2064
228e548e 2065 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 2066 goto out_freeiov;
28a94d8f 2067 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2068 ctl_len = msg_sys->msg_controllen;
1da177e4 2069 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2070 err =
228e548e 2071 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2072 sizeof(ctl));
1da177e4
LT
2073 if (err)
2074 goto out_freeiov;
228e548e
AB
2075 ctl_buf = msg_sys->msg_control;
2076 ctl_len = msg_sys->msg_controllen;
1da177e4 2077 } else if (ctl_len) {
ac4340fc
DM
2078 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2079 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2080 if (ctl_len > sizeof(ctl)) {
1da177e4 2081 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2082 if (ctl_buf == NULL)
1da177e4
LT
2083 goto out_freeiov;
2084 }
2085 err = -EFAULT;
2086 /*
228e548e 2087 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2088 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2089 * checking falls down on this.
2090 */
fb8621bb 2091 if (copy_from_user(ctl_buf,
228e548e 2092 (void __user __force *)msg_sys->msg_control,
89bddce5 2093 ctl_len))
1da177e4 2094 goto out_freectl;
228e548e 2095 msg_sys->msg_control = ctl_buf;
1da177e4 2096 }
228e548e 2097 msg_sys->msg_flags = flags;
1da177e4
LT
2098
2099 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2100 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2101 /*
2102 * If this is sendmmsg() and current destination address is same as
2103 * previously succeeded address, omit asking LSM's decision.
2104 * used_address->name_len is initialized to UINT_MAX so that the first
2105 * destination address never matches.
2106 */
bc909d9d
MD
2107 if (used_address && msg_sys->msg_name &&
2108 used_address->name_len == msg_sys->msg_namelen &&
2109 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2110 used_address->name_len)) {
d8725c86 2111 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2112 goto out_freectl;
2113 }
d8725c86 2114 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2115 /*
2116 * If this is sendmmsg() and sending to current destination address was
2117 * successful, remember it.
2118 */
2119 if (used_address && err >= 0) {
2120 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2121 if (msg_sys->msg_name)
2122 memcpy(&used_address->name, msg_sys->msg_name,
2123 used_address->name_len);
c71d8ebe 2124 }
1da177e4
LT
2125
2126out_freectl:
89bddce5 2127 if (ctl_buf != ctl)
1da177e4
LT
2128 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2129out_freeiov:
da184284 2130 kfree(iov);
228e548e
AB
2131 return err;
2132}
2133
2134/*
2135 * BSD sendmsg interface
2136 */
2137
e1834a32
DB
2138long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2139 bool forbid_cmsg_compat)
228e548e
AB
2140{
2141 int fput_needed, err;
2142 struct msghdr msg_sys;
1be374a0
AL
2143 struct socket *sock;
2144
e1834a32
DB
2145 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2146 return -EINVAL;
2147
1be374a0 2148 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2149 if (!sock)
2150 goto out;
2151
28a94d8f 2152 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2153
6cb153ca 2154 fput_light(sock->file, fput_needed);
89bddce5 2155out:
1da177e4
LT
2156 return err;
2157}
2158
666547ff 2159SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2160{
e1834a32 2161 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2162}
2163
228e548e
AB
2164/*
2165 * Linux sendmmsg interface
2166 */
2167
2168int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2169 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2170{
2171 int fput_needed, err, datagrams;
2172 struct socket *sock;
2173 struct mmsghdr __user *entry;
2174 struct compat_mmsghdr __user *compat_entry;
2175 struct msghdr msg_sys;
c71d8ebe 2176 struct used_address used_address;
f092276d 2177 unsigned int oflags = flags;
228e548e 2178
e1834a32
DB
2179 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2180 return -EINVAL;
2181
98382f41
AB
2182 if (vlen > UIO_MAXIOV)
2183 vlen = UIO_MAXIOV;
228e548e
AB
2184
2185 datagrams = 0;
2186
2187 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2188 if (!sock)
2189 return err;
2190
c71d8ebe 2191 used_address.name_len = UINT_MAX;
228e548e
AB
2192 entry = mmsg;
2193 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2194 err = 0;
f092276d 2195 flags |= MSG_BATCH;
228e548e
AB
2196
2197 while (datagrams < vlen) {
f092276d
TH
2198 if (datagrams == vlen - 1)
2199 flags = oflags;
2200
228e548e 2201 if (MSG_CMSG_COMPAT & flags) {
666547ff 2202 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2203 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2204 if (err < 0)
2205 break;
2206 err = __put_user(err, &compat_entry->msg_len);
2207 ++compat_entry;
2208 } else {
a7526eb5 2209 err = ___sys_sendmsg(sock,
666547ff 2210 (struct user_msghdr __user *)entry,
28a94d8f 2211 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2212 if (err < 0)
2213 break;
2214 err = put_user(err, &entry->msg_len);
2215 ++entry;
2216 }
2217
2218 if (err)
2219 break;
2220 ++datagrams;
3023898b
SHY
2221 if (msg_data_left(&msg_sys))
2222 break;
a78cb84c 2223 cond_resched();
228e548e
AB
2224 }
2225
228e548e
AB
2226 fput_light(sock->file, fput_needed);
2227
728ffb86
AB
2228 /* We only return an error if no datagrams were able to be sent */
2229 if (datagrams != 0)
228e548e
AB
2230 return datagrams;
2231
228e548e
AB
2232 return err;
2233}
2234
2235SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2236 unsigned int, vlen, unsigned int, flags)
2237{
e1834a32 2238 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2239}
2240
666547ff 2241static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2242 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2243{
89bddce5
SH
2244 struct compat_msghdr __user *msg_compat =
2245 (struct compat_msghdr __user *)msg;
1da177e4 2246 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2247 struct iovec *iov = iovstack;
1da177e4 2248 unsigned long cmsg_ptr;
2da62906 2249 int len;
08adb7da 2250 ssize_t err;
1da177e4
LT
2251
2252 /* kernel mode address */
230b1839 2253 struct sockaddr_storage addr;
1da177e4
LT
2254
2255 /* user mode address pointers */
2256 struct sockaddr __user *uaddr;
08adb7da 2257 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2258
08adb7da 2259 msg_sys->msg_name = &addr;
1da177e4 2260
f3d33426 2261 if (MSG_CMSG_COMPAT & flags)
08adb7da 2262 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2263 else
08adb7da 2264 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2265 if (err < 0)
da184284 2266 return err;
1da177e4 2267
a2e27255
ACM
2268 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2269 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2270
f3d33426
HFS
2271 /* We assume all kernel code knows the size of sockaddr_storage */
2272 msg_sys->msg_namelen = 0;
2273
1da177e4
LT
2274 if (sock->file->f_flags & O_NONBLOCK)
2275 flags |= MSG_DONTWAIT;
2da62906 2276 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4
LT
2277 if (err < 0)
2278 goto out_freeiov;
2279 len = err;
2280
2281 if (uaddr != NULL) {
43db362d 2282 err = move_addr_to_user(&addr,
a2e27255 2283 msg_sys->msg_namelen, uaddr,
89bddce5 2284 uaddr_len);
1da177e4
LT
2285 if (err < 0)
2286 goto out_freeiov;
2287 }
a2e27255 2288 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2289 COMPAT_FLAGS(msg));
1da177e4
LT
2290 if (err)
2291 goto out_freeiov;
2292 if (MSG_CMSG_COMPAT & flags)
a2e27255 2293 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2294 &msg_compat->msg_controllen);
2295 else
a2e27255 2296 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2297 &msg->msg_controllen);
2298 if (err)
2299 goto out_freeiov;
2300 err = len;
2301
2302out_freeiov:
da184284 2303 kfree(iov);
a2e27255
ACM
2304 return err;
2305}
2306
2307/*
2308 * BSD recvmsg interface
2309 */
2310
e1834a32
DB
2311long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2312 bool forbid_cmsg_compat)
a2e27255
ACM
2313{
2314 int fput_needed, err;
2315 struct msghdr msg_sys;
1be374a0
AL
2316 struct socket *sock;
2317
e1834a32
DB
2318 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2319 return -EINVAL;
2320
1be374a0 2321 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2322 if (!sock)
2323 goto out;
2324
a7526eb5 2325 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2326
6cb153ca 2327 fput_light(sock->file, fput_needed);
1da177e4
LT
2328out:
2329 return err;
2330}
2331
666547ff 2332SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2333 unsigned int, flags)
2334{
e1834a32 2335 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2336}
2337
a2e27255
ACM
2338/*
2339 * Linux recvmmsg interface
2340 */
2341
2342int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2343 unsigned int flags, struct timespec *timeout)
2344{
2345 int fput_needed, err, datagrams;
2346 struct socket *sock;
2347 struct mmsghdr __user *entry;
d7256d0e 2348 struct compat_mmsghdr __user *compat_entry;
a2e27255 2349 struct msghdr msg_sys;
766b9f92
DD
2350 struct timespec64 end_time;
2351 struct timespec64 timeout64;
a2e27255
ACM
2352
2353 if (timeout &&
2354 poll_select_set_timeout(&end_time, timeout->tv_sec,
2355 timeout->tv_nsec))
2356 return -EINVAL;
2357
2358 datagrams = 0;
2359
2360 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2361 if (!sock)
2362 return err;
2363
7797dc41
SHY
2364 if (likely(!(flags & MSG_ERRQUEUE))) {
2365 err = sock_error(sock->sk);
2366 if (err) {
2367 datagrams = err;
2368 goto out_put;
2369 }
e623a9e9 2370 }
a2e27255
ACM
2371
2372 entry = mmsg;
d7256d0e 2373 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2374
2375 while (datagrams < vlen) {
2376 /*
2377 * No need to ask LSM for more than the first datagram.
2378 */
d7256d0e 2379 if (MSG_CMSG_COMPAT & flags) {
666547ff 2380 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2381 &msg_sys, flags & ~MSG_WAITFORONE,
2382 datagrams);
d7256d0e
JMG
2383 if (err < 0)
2384 break;
2385 err = __put_user(err, &compat_entry->msg_len);
2386 ++compat_entry;
2387 } else {
a7526eb5 2388 err = ___sys_recvmsg(sock,
666547ff 2389 (struct user_msghdr __user *)entry,
a7526eb5
AL
2390 &msg_sys, flags & ~MSG_WAITFORONE,
2391 datagrams);
d7256d0e
JMG
2392 if (err < 0)
2393 break;
2394 err = put_user(err, &entry->msg_len);
2395 ++entry;
2396 }
2397
a2e27255
ACM
2398 if (err)
2399 break;
a2e27255
ACM
2400 ++datagrams;
2401
71c5c159
BB
2402 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2403 if (flags & MSG_WAITFORONE)
2404 flags |= MSG_DONTWAIT;
2405
a2e27255 2406 if (timeout) {
766b9f92
DD
2407 ktime_get_ts64(&timeout64);
2408 *timeout = timespec64_to_timespec(
2409 timespec64_sub(end_time, timeout64));
a2e27255
ACM
2410 if (timeout->tv_sec < 0) {
2411 timeout->tv_sec = timeout->tv_nsec = 0;
2412 break;
2413 }
2414
2415 /* Timeout, return less than vlen datagrams */
2416 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2417 break;
2418 }
2419
2420 /* Out of band data, return right away */
2421 if (msg_sys.msg_flags & MSG_OOB)
2422 break;
a78cb84c 2423 cond_resched();
a2e27255
ACM
2424 }
2425
a2e27255 2426 if (err == 0)
34b88a68
ACM
2427 goto out_put;
2428
2429 if (datagrams == 0) {
2430 datagrams = err;
2431 goto out_put;
2432 }
a2e27255 2433
34b88a68
ACM
2434 /*
2435 * We may return less entries than requested (vlen) if the
2436 * sock is non block and there aren't enough datagrams...
2437 */
2438 if (err != -EAGAIN) {
a2e27255 2439 /*
34b88a68
ACM
2440 * ... or if recvmsg returns an error after we
2441 * received some datagrams, where we record the
2442 * error to return on the next call or if the
2443 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2444 */
34b88a68 2445 sock->sk->sk_err = -err;
a2e27255 2446 }
34b88a68
ACM
2447out_put:
2448 fput_light(sock->file, fput_needed);
a2e27255 2449
34b88a68 2450 return datagrams;
a2e27255
ACM
2451}
2452
1255e269
DB
2453static int do_sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2454 unsigned int vlen, unsigned int flags,
2455 struct timespec __user *timeout)
a2e27255
ACM
2456{
2457 int datagrams;
2458 struct timespec timeout_sys;
2459
1be374a0
AL
2460 if (flags & MSG_CMSG_COMPAT)
2461 return -EINVAL;
2462
a2e27255
ACM
2463 if (!timeout)
2464 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2465
2466 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2467 return -EFAULT;
2468
2469 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2470
2471 if (datagrams > 0 &&
2472 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2473 datagrams = -EFAULT;
2474
2475 return datagrams;
2476}
2477
1255e269
DB
2478SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2479 unsigned int, vlen, unsigned int, flags,
2480 struct timespec __user *, timeout)
2481{
2482 return do_sys_recvmmsg(fd, mmsg, vlen, flags, timeout);
2483}
2484
a2e27255 2485#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2486/* Argument list sizes for sys_socketcall */
2487#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2488static const unsigned char nargs[21] = {
c6d409cf
ED
2489 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2490 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2491 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2492 AL(4), AL(5), AL(4)
89bddce5
SH
2493};
2494
1da177e4
LT
2495#undef AL
2496
2497/*
89bddce5 2498 * System call vectors.
1da177e4
LT
2499 *
2500 * Argument checking cleaned up. Saved 20% in size.
2501 * This function doesn't need to set the kernel lock because
89bddce5 2502 * it is set by the callees.
1da177e4
LT
2503 */
2504
3e0fa65f 2505SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2506{
2950fa9d 2507 unsigned long a[AUDITSC_ARGS];
89bddce5 2508 unsigned long a0, a1;
1da177e4 2509 int err;
47379052 2510 unsigned int len;
1da177e4 2511
228e548e 2512 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2513 return -EINVAL;
c8e8cd57 2514 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2515
47379052
AV
2516 len = nargs[call];
2517 if (len > sizeof(a))
2518 return -EINVAL;
2519
1da177e4 2520 /* copy_from_user should be SMP safe. */
47379052 2521 if (copy_from_user(a, args, len))
1da177e4 2522 return -EFAULT;
3ec3b2fb 2523
2950fa9d
CG
2524 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2525 if (err)
2526 return err;
3ec3b2fb 2527
89bddce5
SH
2528 a0 = a[0];
2529 a1 = a[1];
2530
2531 switch (call) {
2532 case SYS_SOCKET:
9d6a15c3 2533 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
2534 break;
2535 case SYS_BIND:
a87d35d8 2536 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2537 break;
2538 case SYS_CONNECT:
1387c2c2 2539 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2540 break;
2541 case SYS_LISTEN:
25e290ee 2542 err = __sys_listen(a0, a1);
89bddce5
SH
2543 break;
2544 case SYS_ACCEPT:
4541e805
DB
2545 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2546 (int __user *)a[2], 0);
89bddce5
SH
2547 break;
2548 case SYS_GETSOCKNAME:
2549 err =
8882a107
DB
2550 __sys_getsockname(a0, (struct sockaddr __user *)a1,
2551 (int __user *)a[2]);
89bddce5
SH
2552 break;
2553 case SYS_GETPEERNAME:
2554 err =
b21c8f83
DB
2555 __sys_getpeername(a0, (struct sockaddr __user *)a1,
2556 (int __user *)a[2]);
89bddce5
SH
2557 break;
2558 case SYS_SOCKETPAIR:
6debc8d8 2559 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
2560 break;
2561 case SYS_SEND:
f3bf896b
DB
2562 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2563 NULL, 0);
89bddce5
SH
2564 break;
2565 case SYS_SENDTO:
211b634b
DB
2566 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2567 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
2568 break;
2569 case SYS_RECV:
d27e9afc
DB
2570 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2571 NULL, NULL);
89bddce5
SH
2572 break;
2573 case SYS_RECVFROM:
7a09e1eb
DB
2574 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2575 (struct sockaddr __user *)a[4],
2576 (int __user *)a[5]);
89bddce5
SH
2577 break;
2578 case SYS_SHUTDOWN:
005a1aea 2579 err = __sys_shutdown(a0, a1);
89bddce5
SH
2580 break;
2581 case SYS_SETSOCKOPT:
cc36dca0
DB
2582 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
2583 a[4]);
89bddce5
SH
2584 break;
2585 case SYS_GETSOCKOPT:
2586 err =
13a2d70e
DB
2587 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2588 (int __user *)a[4]);
89bddce5
SH
2589 break;
2590 case SYS_SENDMSG:
e1834a32
DB
2591 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
2592 a[2], true);
89bddce5 2593 break;
228e548e 2594 case SYS_SENDMMSG:
e1834a32
DB
2595 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2596 a[3], true);
228e548e 2597 break;
89bddce5 2598 case SYS_RECVMSG:
e1834a32
DB
2599 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
2600 a[2], true);
89bddce5 2601 break;
a2e27255 2602 case SYS_RECVMMSG:
1255e269
DB
2603 err = do_sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2604 a[3], (struct timespec __user *)a[4]);
a2e27255 2605 break;
de11defe 2606 case SYS_ACCEPT4:
4541e805
DB
2607 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2608 (int __user *)a[2], a[3]);
aaca0bdc 2609 break;
89bddce5
SH
2610 default:
2611 err = -EINVAL;
2612 break;
1da177e4
LT
2613 }
2614 return err;
2615}
2616
89bddce5 2617#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2618
55737fda
SH
2619/**
2620 * sock_register - add a socket protocol handler
2621 * @ops: description of protocol
2622 *
1da177e4
LT
2623 * This function is called by a protocol handler that wants to
2624 * advertise its address family, and have it linked into the
e793c0f7 2625 * socket interface. The value ops->family corresponds to the
55737fda 2626 * socket system call protocol family.
1da177e4 2627 */
f0fd27d4 2628int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2629{
2630 int err;
2631
2632 if (ops->family >= NPROTO) {
3410f22e 2633 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2634 return -ENOBUFS;
2635 }
55737fda
SH
2636
2637 spin_lock(&net_family_lock);
190683a9
ED
2638 if (rcu_dereference_protected(net_families[ops->family],
2639 lockdep_is_held(&net_family_lock)))
55737fda
SH
2640 err = -EEXIST;
2641 else {
cf778b00 2642 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2643 err = 0;
2644 }
55737fda
SH
2645 spin_unlock(&net_family_lock);
2646
3410f22e 2647 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2648 return err;
2649}
c6d409cf 2650EXPORT_SYMBOL(sock_register);
1da177e4 2651
55737fda
SH
2652/**
2653 * sock_unregister - remove a protocol handler
2654 * @family: protocol family to remove
2655 *
1da177e4
LT
2656 * This function is called by a protocol handler that wants to
2657 * remove its address family, and have it unlinked from the
55737fda
SH
2658 * new socket creation.
2659 *
2660 * If protocol handler is a module, then it can use module reference
2661 * counts to protect against new references. If protocol handler is not
2662 * a module then it needs to provide its own protection in
2663 * the ops->create routine.
1da177e4 2664 */
f0fd27d4 2665void sock_unregister(int family)
1da177e4 2666{
f0fd27d4 2667 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2668
55737fda 2669 spin_lock(&net_family_lock);
a9b3cd7f 2670 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2671 spin_unlock(&net_family_lock);
2672
2673 synchronize_rcu();
2674
3410f22e 2675 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2676}
c6d409cf 2677EXPORT_SYMBOL(sock_unregister);
1da177e4 2678
bf2ae2e4
XL
2679bool sock_is_registered(int family)
2680{
66b51b0a 2681 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
2682}
2683
77d76ea3 2684static int __init sock_init(void)
1da177e4 2685{
b3e19d92 2686 int err;
2ca794e5
EB
2687 /*
2688 * Initialize the network sysctl infrastructure.
2689 */
2690 err = net_sysctl_init();
2691 if (err)
2692 goto out;
b3e19d92 2693
1da177e4 2694 /*
89bddce5 2695 * Initialize skbuff SLAB cache
1da177e4
LT
2696 */
2697 skb_init();
1da177e4
LT
2698
2699 /*
89bddce5 2700 * Initialize the protocols module.
1da177e4
LT
2701 */
2702
2703 init_inodecache();
b3e19d92
NP
2704
2705 err = register_filesystem(&sock_fs_type);
2706 if (err)
2707 goto out_fs;
1da177e4 2708 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2709 if (IS_ERR(sock_mnt)) {
2710 err = PTR_ERR(sock_mnt);
2711 goto out_mount;
2712 }
77d76ea3
AK
2713
2714 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2715 */
2716
2717#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2718 err = netfilter_init();
2719 if (err)
2720 goto out;
1da177e4 2721#endif
cbeb321a 2722
408eccce 2723 ptp_classifier_init();
c1f19b51 2724
b3e19d92
NP
2725out:
2726 return err;
2727
2728out_mount:
2729 unregister_filesystem(&sock_fs_type);
2730out_fs:
2731 goto out;
1da177e4
LT
2732}
2733
77d76ea3
AK
2734core_initcall(sock_init); /* early initcall */
2735
1da177e4
LT
2736#ifdef CONFIG_PROC_FS
2737void socket_seq_show(struct seq_file *seq)
2738{
648845ab
TZ
2739 seq_printf(seq, "sockets: used %d\n",
2740 sock_inuse_get(seq->private));
1da177e4 2741}
89bddce5 2742#endif /* CONFIG_PROC_FS */
1da177e4 2743
89bbfc95 2744#ifdef CONFIG_COMPAT
6b96018b 2745static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2746 unsigned int cmd, void __user *up)
7a229387 2747{
7a229387
AB
2748 mm_segment_t old_fs = get_fs();
2749 struct timeval ktv;
2750 int err;
2751
2752 set_fs(KERNEL_DS);
6b96018b 2753 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2754 set_fs(old_fs);
644595f8 2755 if (!err)
ed6fe9d6 2756 err = compat_put_timeval(&ktv, up);
644595f8 2757
7a229387
AB
2758 return err;
2759}
2760
6b96018b 2761static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2762 unsigned int cmd, void __user *up)
7a229387 2763{
7a229387
AB
2764 mm_segment_t old_fs = get_fs();
2765 struct timespec kts;
2766 int err;
2767
2768 set_fs(KERNEL_DS);
6b96018b 2769 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2770 set_fs(old_fs);
644595f8 2771 if (!err)
ed6fe9d6 2772 err = compat_put_timespec(&kts, up);
644595f8 2773
7a229387
AB
2774 return err;
2775}
2776
36fd633e 2777static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2778{
6b96018b 2779 struct compat_ifconf ifc32;
7a229387 2780 struct ifconf ifc;
7a229387
AB
2781 int err;
2782
6b96018b 2783 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2784 return -EFAULT;
2785
36fd633e
AV
2786 ifc.ifc_len = ifc32.ifc_len;
2787 ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
7a229387 2788
36fd633e
AV
2789 rtnl_lock();
2790 err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
2791 rtnl_unlock();
7a229387
AB
2792 if (err)
2793 return err;
2794
36fd633e 2795 ifc32.ifc_len = ifc.ifc_len;
6b96018b 2796 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2797 return -EFAULT;
2798
2799 return 0;
2800}
2801
6b96018b 2802static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2803{
3a7da39d
BH
2804 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2805 bool convert_in = false, convert_out = false;
44c02a2c
AV
2806 size_t buf_size = 0;
2807 struct ethtool_rxnfc __user *rxnfc = NULL;
2808 struct ifreq ifr;
3a7da39d
BH
2809 u32 rule_cnt = 0, actual_rule_cnt;
2810 u32 ethcmd;
7a229387 2811 u32 data;
3a7da39d 2812 int ret;
7a229387 2813
3a7da39d
BH
2814 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2815 return -EFAULT;
7a229387 2816
3a7da39d
BH
2817 compat_rxnfc = compat_ptr(data);
2818
2819 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2820 return -EFAULT;
2821
3a7da39d
BH
2822 /* Most ethtool structures are defined without padding.
2823 * Unfortunately struct ethtool_rxnfc is an exception.
2824 */
2825 switch (ethcmd) {
2826 default:
2827 break;
2828 case ETHTOOL_GRXCLSRLALL:
2829 /* Buffer size is variable */
2830 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2831 return -EFAULT;
2832 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2833 return -ENOMEM;
2834 buf_size += rule_cnt * sizeof(u32);
2835 /* fall through */
2836 case ETHTOOL_GRXRINGS:
2837 case ETHTOOL_GRXCLSRLCNT:
2838 case ETHTOOL_GRXCLSRULE:
55664f32 2839 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2840 convert_out = true;
2841 /* fall through */
2842 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2843 buf_size += sizeof(struct ethtool_rxnfc);
2844 convert_in = true;
44c02a2c 2845 rxnfc = compat_alloc_user_space(buf_size);
3a7da39d
BH
2846 break;
2847 }
2848
44c02a2c 2849 if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2850 return -EFAULT;
2851
44c02a2c 2852 ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
7a229387 2853
3a7da39d 2854 if (convert_in) {
127fe533 2855 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2856 * fs.ring_cookie and at the end of fs, but nowhere else.
2857 */
127fe533
AD
2858 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2859 sizeof(compat_rxnfc->fs.m_ext) !=
2860 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2861 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2862 BUILD_BUG_ON(
2863 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2864 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2865 offsetof(struct ethtool_rxnfc, fs.location) -
2866 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2867
2868 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2869 (void __user *)(&rxnfc->fs.m_ext + 1) -
2870 (void __user *)rxnfc) ||
3a7da39d
BH
2871 copy_in_user(&rxnfc->fs.ring_cookie,
2872 &compat_rxnfc->fs.ring_cookie,
954b1244
SH
2873 (void __user *)(&rxnfc->fs.location + 1) -
2874 (void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2875 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2876 sizeof(rxnfc->rule_cnt)))
2877 return -EFAULT;
2878 }
2879
44c02a2c 2880 ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
3a7da39d
BH
2881 if (ret)
2882 return ret;
2883
2884 if (convert_out) {
2885 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2886 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2887 (const void __user *)rxnfc) ||
3a7da39d
BH
2888 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2889 &rxnfc->fs.ring_cookie,
954b1244
SH
2890 (const void __user *)(&rxnfc->fs.location + 1) -
2891 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2892 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2893 sizeof(rxnfc->rule_cnt)))
2894 return -EFAULT;
2895
2896 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2897 /* As an optimisation, we only copy the actual
2898 * number of rules that the underlying
2899 * function returned. Since Mallory might
2900 * change the rule count in user memory, we
2901 * check that it is less than the rule count
2902 * originally given (as the user buffer size),
2903 * which has been range-checked.
2904 */
2905 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2906 return -EFAULT;
2907 if (actual_rule_cnt < rule_cnt)
2908 rule_cnt = actual_rule_cnt;
2909 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2910 &rxnfc->rule_locs[0],
2911 rule_cnt * sizeof(u32)))
2912 return -EFAULT;
2913 }
2914 }
2915
2916 return 0;
7a229387
AB
2917}
2918
7a50a240
AB
2919static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2920{
7a50a240 2921 compat_uptr_t uptr32;
44c02a2c
AV
2922 struct ifreq ifr;
2923 void __user *saved;
2924 int err;
7a50a240 2925
44c02a2c 2926 if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
7a50a240
AB
2927 return -EFAULT;
2928
2929 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2930 return -EFAULT;
2931
44c02a2c
AV
2932 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
2933 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 2934
44c02a2c
AV
2935 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
2936 if (!err) {
2937 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
2938 if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
2939 err = -EFAULT;
ccbd6a5a 2940 }
44c02a2c 2941 return err;
7a229387
AB
2942}
2943
590d4693
BH
2944/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
2945static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 2946 struct compat_ifreq __user *u_ifreq32)
7a229387 2947{
44c02a2c 2948 struct ifreq ifreq;
7a229387
AB
2949 u32 data32;
2950
44c02a2c 2951 if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
7a229387 2952 return -EFAULT;
44c02a2c 2953 if (get_user(data32, &u_ifreq32->ifr_data))
7a229387 2954 return -EFAULT;
44c02a2c 2955 ifreq.ifr_data = compat_ptr(data32);
7a229387 2956
44c02a2c 2957 return dev_ioctl(net, cmd, &ifreq, NULL);
7a229387
AB
2958}
2959
a2116ed2
AB
2960static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2961 struct compat_ifreq __user *uifr32)
2962{
2963 struct ifreq ifr;
2964 struct compat_ifmap __user *uifmap32;
a2116ed2
AB
2965 int err;
2966
2967 uifmap32 = &uifr32->ifr_ifru.ifru_map;
2968 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
2969 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2970 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2971 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2972 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
2973 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
2974 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2975 if (err)
2976 return -EFAULT;
2977
44c02a2c 2978 err = dev_ioctl(net, cmd, &ifr, NULL);
a2116ed2
AB
2979
2980 if (cmd == SIOCGIFMAP && !err) {
2981 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
2982 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2983 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2984 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2985 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
2986 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
2987 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2988 if (err)
2989 err = -EFAULT;
2990 }
2991 return err;
2992}
2993
7a229387 2994struct rtentry32 {
c6d409cf 2995 u32 rt_pad1;
7a229387
AB
2996 struct sockaddr rt_dst; /* target address */
2997 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
2998 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
2999 unsigned short rt_flags;
3000 short rt_pad2;
3001 u32 rt_pad3;
3002 unsigned char rt_tos;
3003 unsigned char rt_class;
3004 short rt_pad4;
3005 short rt_metric; /* +1 for binary compatibility! */
7a229387 3006 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3007 u32 rt_mtu; /* per route MTU/Window */
3008 u32 rt_window; /* Window clamping */
7a229387
AB
3009 unsigned short rt_irtt; /* Initial RTT */
3010};
3011
3012struct in6_rtmsg32 {
3013 struct in6_addr rtmsg_dst;
3014 struct in6_addr rtmsg_src;
3015 struct in6_addr rtmsg_gateway;
3016 u32 rtmsg_type;
3017 u16 rtmsg_dst_len;
3018 u16 rtmsg_src_len;
3019 u32 rtmsg_metric;
3020 u32 rtmsg_info;
3021 u32 rtmsg_flags;
3022 s32 rtmsg_ifindex;
3023};
3024
6b96018b
AB
3025static int routing_ioctl(struct net *net, struct socket *sock,
3026 unsigned int cmd, void __user *argp)
7a229387
AB
3027{
3028 int ret;
3029 void *r = NULL;
3030 struct in6_rtmsg r6;
3031 struct rtentry r4;
3032 char devname[16];
3033 u32 rtdev;
3034 mm_segment_t old_fs = get_fs();
3035
6b96018b
AB
3036 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3037 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3038 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3039 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3040 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3041 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3042 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3043 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3044 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3045 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3046 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3047
3048 r = (void *) &r6;
3049 } else { /* ipv4 */
6b96018b 3050 struct rtentry32 __user *ur4 = argp;
c6d409cf 3051 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3052 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3053 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3054 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3055 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3056 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3057 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3058 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3059 if (rtdev) {
c6d409cf 3060 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3061 r4.rt_dev = (char __user __force *)devname;
3062 devname[15] = 0;
7a229387
AB
3063 } else
3064 r4.rt_dev = NULL;
3065
3066 r = (void *) &r4;
3067 }
3068
3069 if (ret) {
3070 ret = -EFAULT;
3071 goto out;
3072 }
3073
c6d409cf 3074 set_fs(KERNEL_DS);
6b96018b 3075 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3076 set_fs(old_fs);
7a229387
AB
3077
3078out:
7a229387
AB
3079 return ret;
3080}
3081
3082/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3083 * for some operations; this forces use of the newer bridge-utils that
25985edc 3084 * use compatible ioctls
7a229387 3085 */
6b96018b 3086static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3087{
6b96018b 3088 compat_ulong_t tmp;
7a229387 3089
6b96018b 3090 if (get_user(tmp, argp))
7a229387
AB
3091 return -EFAULT;
3092 if (tmp == BRCTL_GET_VERSION)
3093 return BRCTL_VERSION + 1;
3094 return -EINVAL;
3095}
3096
6b96018b
AB
3097static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3098 unsigned int cmd, unsigned long arg)
3099{
3100 void __user *argp = compat_ptr(arg);
3101 struct sock *sk = sock->sk;
3102 struct net *net = sock_net(sk);
7a229387 3103
6b96018b 3104 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3105 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3106
3107 switch (cmd) {
3108 case SIOCSIFBR:
3109 case SIOCGIFBR:
3110 return old_bridge_ioctl(argp);
6b96018b 3111 case SIOCGIFCONF:
36fd633e 3112 return compat_dev_ifconf(net, argp);
6b96018b
AB
3113 case SIOCETHTOOL:
3114 return ethtool_ioctl(net, argp);
7a50a240
AB
3115 case SIOCWANDEV:
3116 return compat_siocwandev(net, argp);
a2116ed2
AB
3117 case SIOCGIFMAP:
3118 case SIOCSIFMAP:
3119 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3120 case SIOCADDRT:
3121 case SIOCDELRT:
3122 return routing_ioctl(net, sock, cmd, argp);
3123 case SIOCGSTAMP:
3124 return do_siocgstamp(net, sock, cmd, argp);
3125 case SIOCGSTAMPNS:
3126 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3127 case SIOCBONDSLAVEINFOQUERY:
3128 case SIOCBONDINFOQUERY:
a2116ed2 3129 case SIOCSHWTSTAMP:
fd468c74 3130 case SIOCGHWTSTAMP:
590d4693 3131 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3132
3133 case FIOSETOWN:
3134 case SIOCSPGRP:
3135 case FIOGETOWN:
3136 case SIOCGPGRP:
3137 case SIOCBRADDBR:
3138 case SIOCBRDELBR:
3139 case SIOCGIFVLAN:
3140 case SIOCSIFVLAN:
3141 case SIOCADDDLCI:
3142 case SIOCDELDLCI:
c62cce2c 3143 case SIOCGSKNS:
6b96018b
AB
3144 return sock_ioctl(file, cmd, arg);
3145
3146 case SIOCGIFFLAGS:
3147 case SIOCSIFFLAGS:
3148 case SIOCGIFMETRIC:
3149 case SIOCSIFMETRIC:
3150 case SIOCGIFMTU:
3151 case SIOCSIFMTU:
3152 case SIOCGIFMEM:
3153 case SIOCSIFMEM:
3154 case SIOCGIFHWADDR:
3155 case SIOCSIFHWADDR:
3156 case SIOCADDMULTI:
3157 case SIOCDELMULTI:
3158 case SIOCGIFINDEX:
6b96018b
AB
3159 case SIOCGIFADDR:
3160 case SIOCSIFADDR:
3161 case SIOCSIFHWBROADCAST:
6b96018b 3162 case SIOCDIFADDR:
6b96018b
AB
3163 case SIOCGIFBRDADDR:
3164 case SIOCSIFBRDADDR:
3165 case SIOCGIFDSTADDR:
3166 case SIOCSIFDSTADDR:
3167 case SIOCGIFNETMASK:
3168 case SIOCSIFNETMASK:
3169 case SIOCSIFPFLAGS:
3170 case SIOCGIFPFLAGS:
3171 case SIOCGIFTXQLEN:
3172 case SIOCSIFTXQLEN:
3173 case SIOCBRADDIF:
3174 case SIOCBRDELIF:
9177efd3
AB
3175 case SIOCSIFNAME:
3176 case SIOCGMIIPHY:
3177 case SIOCGMIIREG:
3178 case SIOCSMIIREG:
6b96018b
AB
3179 case SIOCSARP:
3180 case SIOCGARP:
3181 case SIOCDARP:
6b96018b 3182 case SIOCATMARK:
f92d4fc9
AV
3183 case SIOCBONDENSLAVE:
3184 case SIOCBONDRELEASE:
3185 case SIOCBONDSETHWADDR:
3186 case SIOCBONDCHANGEACTIVE:
4cf808e7 3187 case SIOCGIFNAME:
9177efd3
AB
3188 return sock_do_ioctl(net, sock, cmd, arg);
3189 }
3190
6b96018b
AB
3191 return -ENOIOCTLCMD;
3192}
7a229387 3193
95c96174 3194static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3195 unsigned long arg)
89bbfc95
SP
3196{
3197 struct socket *sock = file->private_data;
3198 int ret = -ENOIOCTLCMD;
87de87d5
DM
3199 struct sock *sk;
3200 struct net *net;
3201
3202 sk = sock->sk;
3203 net = sock_net(sk);
89bbfc95
SP
3204
3205 if (sock->ops->compat_ioctl)
3206 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3207
87de87d5
DM
3208 if (ret == -ENOIOCTLCMD &&
3209 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3210 ret = compat_wext_handle_ioctl(net, cmd, arg);
3211
6b96018b
AB
3212 if (ret == -ENOIOCTLCMD)
3213 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3214
89bbfc95
SP
3215 return ret;
3216}
3217#endif
3218
ac5a488e
SS
3219int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3220{
3221 return sock->ops->bind(sock, addr, addrlen);
3222}
c6d409cf 3223EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3224
3225int kernel_listen(struct socket *sock, int backlog)
3226{
3227 return sock->ops->listen(sock, backlog);
3228}
c6d409cf 3229EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3230
3231int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3232{
3233 struct sock *sk = sock->sk;
3234 int err;
3235
3236 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3237 newsock);
3238 if (err < 0)
3239 goto done;
3240
cdfbabfb 3241 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3242 if (err < 0) {
3243 sock_release(*newsock);
fa8705b0 3244 *newsock = NULL;
ac5a488e
SS
3245 goto done;
3246 }
3247
3248 (*newsock)->ops = sock->ops;
1b08534e 3249 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3250
3251done:
3252 return err;
3253}
c6d409cf 3254EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3255
3256int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3257 int flags)
ac5a488e
SS
3258{
3259 return sock->ops->connect(sock, addr, addrlen, flags);
3260}
c6d409cf 3261EXPORT_SYMBOL(kernel_connect);
ac5a488e 3262
9b2c45d4 3263int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3264{
9b2c45d4 3265 return sock->ops->getname(sock, addr, 0);
ac5a488e 3266}
c6d409cf 3267EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3268
9b2c45d4 3269int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3270{
9b2c45d4 3271 return sock->ops->getname(sock, addr, 1);
ac5a488e 3272}
c6d409cf 3273EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3274
3275int kernel_getsockopt(struct socket *sock, int level, int optname,
3276 char *optval, int *optlen)
3277{
3278 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3279 char __user *uoptval;
3280 int __user *uoptlen;
ac5a488e
SS
3281 int err;
3282
fb8621bb
NK
3283 uoptval = (char __user __force *) optval;
3284 uoptlen = (int __user __force *) optlen;
3285
ac5a488e
SS
3286 set_fs(KERNEL_DS);
3287 if (level == SOL_SOCKET)
fb8621bb 3288 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3289 else
fb8621bb
NK
3290 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3291 uoptlen);
ac5a488e
SS
3292 set_fs(oldfs);
3293 return err;
3294}
c6d409cf 3295EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3296
3297int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3298 char *optval, unsigned int optlen)
ac5a488e
SS
3299{
3300 mm_segment_t oldfs = get_fs();
fb8621bb 3301 char __user *uoptval;
ac5a488e
SS
3302 int err;
3303
fb8621bb
NK
3304 uoptval = (char __user __force *) optval;
3305
ac5a488e
SS
3306 set_fs(KERNEL_DS);
3307 if (level == SOL_SOCKET)
fb8621bb 3308 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3309 else
fb8621bb 3310 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3311 optlen);
3312 set_fs(oldfs);
3313 return err;
3314}
c6d409cf 3315EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3316
3317int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3318 size_t size, int flags)
3319{
3320 if (sock->ops->sendpage)
3321 return sock->ops->sendpage(sock, page, offset, size, flags);
3322
3323 return sock_no_sendpage(sock, page, offset, size, flags);
3324}
c6d409cf 3325EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3326
306b13eb
TH
3327int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3328 size_t size, int flags)
3329{
3330 struct socket *sock = sk->sk_socket;
3331
3332 if (sock->ops->sendpage_locked)
3333 return sock->ops->sendpage_locked(sk, page, offset, size,
3334 flags);
3335
3336 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3337}
3338EXPORT_SYMBOL(kernel_sendpage_locked);
3339
91cf45f0
TM
3340int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3341{
3342 return sock->ops->shutdown(sock, how);
3343}
91cf45f0 3344EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075
P
3345
3346/* This routine returns the IP overhead imposed by a socket i.e.
3347 * the length of the underlying IP header, depending on whether
3348 * this is an IPv4 or IPv6 socket and the length from IP options turned
57240d00 3349 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075
P
3350 */
3351u32 kernel_sock_ip_overhead(struct sock *sk)
3352{
3353 struct inet_sock *inet;
3354 struct ip_options_rcu *opt;
3355 u32 overhead = 0;
113c3075
P
3356#if IS_ENABLED(CONFIG_IPV6)
3357 struct ipv6_pinfo *np;
3358 struct ipv6_txoptions *optv6 = NULL;
3359#endif /* IS_ENABLED(CONFIG_IPV6) */
3360
3361 if (!sk)
3362 return overhead;
3363
113c3075
P
3364 switch (sk->sk_family) {
3365 case AF_INET:
3366 inet = inet_sk(sk);
3367 overhead += sizeof(struct iphdr);
3368 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3369 sock_owned_by_user(sk));
113c3075
P
3370 if (opt)
3371 overhead += opt->opt.optlen;
3372 return overhead;
3373#if IS_ENABLED(CONFIG_IPV6)
3374 case AF_INET6:
3375 np = inet6_sk(sk);
3376 overhead += sizeof(struct ipv6hdr);
3377 if (np)
3378 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3379 sock_owned_by_user(sk));
113c3075
P
3380 if (optv6)
3381 overhead += (optv6->opt_flen + optv6->opt_nflen);
3382 return overhead;
3383#endif /* IS_ENABLED(CONFIG_IPV6) */
3384 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3385 return overhead;
3386 }
3387}
3388EXPORT_SYMBOL(kernel_sock_ip_overhead);