ieee802154: hwsim: using right kind of iteration
[linux-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
c8e8cd57 92#include <linux/nospec.h>
1da177e4 93
7c0f6ba6 94#include <linux/uaccess.h>
1da177e4
LT
95#include <asm/unistd.h>
96
97#include <net/compat.h>
87de87d5 98#include <net/wext.h>
f8451725 99#include <net/cls_cgroup.h>
1da177e4
LT
100
101#include <net/sock.h>
102#include <linux/netfilter.h>
103
6b96018b
AB
104#include <linux/if_tun.h>
105#include <linux/ipv6_route.h>
106#include <linux/route.h>
6b96018b 107#include <linux/sockios.h>
076bb0c8 108#include <net/busy_poll.h>
f24b9be5 109#include <linux/errqueue.h>
06021292 110
e0d1095a 111#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
112unsigned int sysctl_net_busy_read __read_mostly;
113unsigned int sysctl_net_busy_poll __read_mostly;
06021292 114#endif
6b96018b 115
8ae5e030
AV
116static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
117static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 118static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
119
120static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
121static __poll_t sock_poll(struct file *file,
122 struct poll_table_struct *wait);
89bddce5 123static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
124#ifdef CONFIG_COMPAT
125static long compat_sock_ioctl(struct file *file,
89bddce5 126 unsigned int cmd, unsigned long arg);
89bbfc95 127#endif
1da177e4 128static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
129static ssize_t sock_sendpage(struct file *file, struct page *page,
130 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 132 struct pipe_inode_info *pipe, size_t len,
9c55e01c 133 unsigned int flags);
1da177e4 134
1da177e4
LT
135/*
136 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
137 * in the operation structures but are done directly via the socketcall() multiplexor.
138 */
139
da7071d7 140static const struct file_operations socket_file_ops = {
1da177e4
LT
141 .owner = THIS_MODULE,
142 .llseek = no_llseek,
8ae5e030
AV
143 .read_iter = sock_read_iter,
144 .write_iter = sock_write_iter,
1da177e4
LT
145 .poll = sock_poll,
146 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
147#ifdef CONFIG_COMPAT
148 .compat_ioctl = compat_sock_ioctl,
149#endif
1da177e4 150 .mmap = sock_mmap,
1da177e4
LT
151 .release = sock_close,
152 .fasync = sock_fasync,
5274f052
JA
153 .sendpage = sock_sendpage,
154 .splice_write = generic_splice_sendpage,
9c55e01c 155 .splice_read = sock_splice_read,
1da177e4
LT
156};
157
158/*
159 * The protocol list. Each protocol is registered in here.
160 */
161
1da177e4 162static DEFINE_SPINLOCK(net_family_lock);
190683a9 163static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 164
1da177e4 165/*
89bddce5
SH
166 * Support routines.
167 * Move socket addresses back and forth across the kernel/user
168 * divide and look after the messy bits.
1da177e4
LT
169 */
170
1da177e4
LT
171/**
172 * move_addr_to_kernel - copy a socket address into kernel space
173 * @uaddr: Address in user space
174 * @kaddr: Address in kernel space
175 * @ulen: Length in user space
176 *
177 * The address is copied into kernel space. If the provided address is
178 * too long an error code of -EINVAL is returned. If the copy gives
179 * invalid addresses -EFAULT is returned. On a success 0 is returned.
180 */
181
43db362d 182int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 183{
230b1839 184 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 185 return -EINVAL;
89bddce5 186 if (ulen == 0)
1da177e4 187 return 0;
89bddce5 188 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 189 return -EFAULT;
3ec3b2fb 190 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
191}
192
193/**
194 * move_addr_to_user - copy an address to user space
195 * @kaddr: kernel space address
196 * @klen: length of address in kernel
197 * @uaddr: user space address
198 * @ulen: pointer to user length field
199 *
200 * The value pointed to by ulen on entry is the buffer length available.
201 * This is overwritten with the buffer space used. -EINVAL is returned
202 * if an overlong buffer is specified or a negative buffer size. -EFAULT
203 * is returned if either the buffer or the length field are not
204 * accessible.
205 * After copying the data up to the limit the user specifies, the true
206 * length of the data is written over the length limit the user
207 * specified. Zero is returned for a success.
208 */
89bddce5 209
43db362d 210static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 211 void __user *uaddr, int __user *ulen)
1da177e4
LT
212{
213 int err;
214 int len;
215
68c6beb3 216 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
217 err = get_user(len, ulen);
218 if (err)
1da177e4 219 return err;
89bddce5
SH
220 if (len > klen)
221 len = klen;
68c6beb3 222 if (len < 0)
1da177e4 223 return -EINVAL;
89bddce5 224 if (len) {
d6fe3945
SG
225 if (audit_sockaddr(klen, kaddr))
226 return -ENOMEM;
89bddce5 227 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
228 return -EFAULT;
229 }
230 /*
89bddce5
SH
231 * "fromlen shall refer to the value before truncation.."
232 * 1003.1g
1da177e4
LT
233 */
234 return __put_user(klen, ulen);
235}
236
08009a76 237static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
238
239static struct inode *sock_alloc_inode(struct super_block *sb)
240{
241 struct socket_alloc *ei;
eaefd110 242 struct socket_wq *wq;
89bddce5 243
e94b1766 244 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
245 if (!ei)
246 return NULL;
eaefd110
ED
247 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
248 if (!wq) {
43815482
ED
249 kmem_cache_free(sock_inode_cachep, ei);
250 return NULL;
251 }
eaefd110
ED
252 init_waitqueue_head(&wq->wait);
253 wq->fasync_list = NULL;
574aab1e 254 wq->flags = 0;
e6476c21 255 ei->socket.wq = wq;
89bddce5 256
1da177e4
LT
257 ei->socket.state = SS_UNCONNECTED;
258 ei->socket.flags = 0;
259 ei->socket.ops = NULL;
260 ei->socket.sk = NULL;
261 ei->socket.file = NULL;
1da177e4
LT
262
263 return &ei->vfs_inode;
264}
265
266static void sock_destroy_inode(struct inode *inode)
267{
43815482
ED
268 struct socket_alloc *ei;
269
270 ei = container_of(inode, struct socket_alloc, vfs_inode);
e6476c21 271 kfree_rcu(ei->socket.wq, rcu);
43815482 272 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
273}
274
51cc5068 275static void init_once(void *foo)
1da177e4 276{
89bddce5 277 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 278
a35afb83 279 inode_init_once(&ei->vfs_inode);
1da177e4 280}
89bddce5 281
1e911632 282static void init_inodecache(void)
1da177e4
LT
283{
284 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
285 sizeof(struct socket_alloc),
286 0,
287 (SLAB_HWCACHE_ALIGN |
288 SLAB_RECLAIM_ACCOUNT |
5d097056 289 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 290 init_once);
1e911632 291 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
292}
293
b87221de 294static const struct super_operations sockfs_ops = {
c6d409cf
ED
295 .alloc_inode = sock_alloc_inode,
296 .destroy_inode = sock_destroy_inode,
297 .statfs = simple_statfs,
1da177e4
LT
298};
299
c23fbb6b
ED
300/*
301 * sockfs_dname() is called from d_path().
302 */
303static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
304{
305 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 306 d_inode(dentry)->i_ino);
c23fbb6b
ED
307}
308
3ba13d17 309static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 310 .d_dname = sockfs_dname,
1da177e4
LT
311};
312
bba0bd31
AG
313static int sockfs_xattr_get(const struct xattr_handler *handler,
314 struct dentry *dentry, struct inode *inode,
315 const char *suffix, void *value, size_t size)
316{
317 if (value) {
318 if (dentry->d_name.len + 1 > size)
319 return -ERANGE;
320 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
321 }
322 return dentry->d_name.len + 1;
323}
324
325#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
326#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
327#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
328
329static const struct xattr_handler sockfs_xattr_handler = {
330 .name = XATTR_NAME_SOCKPROTONAME,
331 .get = sockfs_xattr_get,
332};
333
4a590153
AG
334static int sockfs_security_xattr_set(const struct xattr_handler *handler,
335 struct dentry *dentry, struct inode *inode,
336 const char *suffix, const void *value,
337 size_t size, int flags)
338{
339 /* Handled by LSM. */
340 return -EAGAIN;
341}
342
343static const struct xattr_handler sockfs_security_xattr_handler = {
344 .prefix = XATTR_SECURITY_PREFIX,
345 .set = sockfs_security_xattr_set,
346};
347
bba0bd31
AG
348static const struct xattr_handler *sockfs_xattr_handlers[] = {
349 &sockfs_xattr_handler,
4a590153 350 &sockfs_security_xattr_handler,
bba0bd31
AG
351 NULL
352};
353
c74a1cbb
AV
354static struct dentry *sockfs_mount(struct file_system_type *fs_type,
355 int flags, const char *dev_name, void *data)
356{
bba0bd31
AG
357 return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
358 sockfs_xattr_handlers,
359 &sockfs_dentry_operations, SOCKFS_MAGIC);
c74a1cbb
AV
360}
361
362static struct vfsmount *sock_mnt __read_mostly;
363
364static struct file_system_type sock_fs_type = {
365 .name = "sockfs",
366 .mount = sockfs_mount,
367 .kill_sb = kill_anon_super,
368};
369
1da177e4
LT
370/*
371 * Obtains the first available file descriptor and sets it up for use.
372 *
39d8c1b6
DM
373 * These functions create file structures and maps them to fd space
374 * of the current process. On success it returns file descriptor
1da177e4
LT
375 * and file struct implicitly stored in sock->file.
376 * Note that another thread may close file descriptor before we return
377 * from this function. We use the fact that now we do not refer
378 * to socket after mapping. If one day we will need it, this
379 * function will increment ref. count on file by 1.
380 *
381 * In any case returned fd MAY BE not valid!
382 * This race condition is unavoidable
383 * with shared fd spaces, we cannot solve it inside kernel,
384 * but we take care of internal coherence yet.
385 */
386
aab174f0 387struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 388{
7cbe66b6 389 struct qstr name = { .name = "" };
2c48b9c4 390 struct path path;
7cbe66b6 391 struct file *file;
1da177e4 392
600e1779
MY
393 if (dname) {
394 name.name = dname;
395 name.len = strlen(name.name);
396 } else if (sock->sk) {
397 name.name = sock->sk->sk_prot_creator->name;
398 name.len = strlen(name.name);
399 }
4b936885 400 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
8e1611e2
AV
401 if (unlikely(!path.dentry)) {
402 sock_release(sock);
28407630 403 return ERR_PTR(-ENOMEM);
8e1611e2 404 }
2c48b9c4 405 path.mnt = mntget(sock_mnt);
39d8c1b6 406
2c48b9c4 407 d_instantiate(path.dentry, SOCK_INODE(sock));
39d8c1b6 408
2c48b9c4 409 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 410 &socket_file_ops);
b5ffe634 411 if (IS_ERR(file)) {
8e1611e2 412 /* drop dentry, keep inode for a bit */
c5ef6035 413 ihold(d_inode(path.dentry));
2c48b9c4 414 path_put(&path);
8e1611e2
AV
415 /* ... and now kill it properly */
416 sock_release(sock);
39b65252 417 return file;
cc3808f8
AV
418 }
419
420 sock->file = file;
77d27200 421 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 422 file->private_data = sock;
28407630 423 return file;
39d8c1b6 424}
56b31d1c 425EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 426
56b31d1c 427static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
428{
429 struct file *newfile;
28407630 430 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
431 if (unlikely(fd < 0)) {
432 sock_release(sock);
28407630 433 return fd;
ce4bb04c 434 }
39d8c1b6 435
aab174f0 436 newfile = sock_alloc_file(sock, flags, NULL);
28407630 437 if (likely(!IS_ERR(newfile))) {
39d8c1b6 438 fd_install(fd, newfile);
28407630
AV
439 return fd;
440 }
7cbe66b6 441
28407630
AV
442 put_unused_fd(fd);
443 return PTR_ERR(newfile);
1da177e4
LT
444}
445
406a3c63 446struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 447{
6cb153ca
BL
448 if (file->f_op == &socket_file_ops)
449 return file->private_data; /* set in sock_map_fd */
450
23bb80d2
ED
451 *err = -ENOTSOCK;
452 return NULL;
6cb153ca 453}
406a3c63 454EXPORT_SYMBOL(sock_from_file);
6cb153ca 455
1da177e4 456/**
c6d409cf 457 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
458 * @fd: file handle
459 * @err: pointer to an error code return
460 *
461 * The file handle passed in is locked and the socket it is bound
241c4667 462 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
463 * with a negative errno code and NULL is returned. The function checks
464 * for both invalid handles and passing a handle which is not a socket.
465 *
466 * On a success the socket object pointer is returned.
467 */
468
469struct socket *sockfd_lookup(int fd, int *err)
470{
471 struct file *file;
1da177e4
LT
472 struct socket *sock;
473
89bddce5
SH
474 file = fget(fd);
475 if (!file) {
1da177e4
LT
476 *err = -EBADF;
477 return NULL;
478 }
89bddce5 479
6cb153ca
BL
480 sock = sock_from_file(file, err);
481 if (!sock)
1da177e4 482 fput(file);
6cb153ca
BL
483 return sock;
484}
c6d409cf 485EXPORT_SYMBOL(sockfd_lookup);
1da177e4 486
6cb153ca
BL
487static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
488{
00e188ef 489 struct fd f = fdget(fd);
6cb153ca
BL
490 struct socket *sock;
491
3672558c 492 *err = -EBADF;
00e188ef
AV
493 if (f.file) {
494 sock = sock_from_file(f.file, err);
495 if (likely(sock)) {
496 *fput_needed = f.flags;
6cb153ca 497 return sock;
00e188ef
AV
498 }
499 fdput(f);
1da177e4 500 }
6cb153ca 501 return NULL;
1da177e4
LT
502}
503
600e1779
MY
504static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
505 size_t size)
506{
507 ssize_t len;
508 ssize_t used = 0;
509
c5ef6035 510 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
511 if (len < 0)
512 return len;
513 used += len;
514 if (buffer) {
515 if (size < used)
516 return -ERANGE;
517 buffer += len;
518 }
519
520 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
521 used += len;
522 if (buffer) {
523 if (size < used)
524 return -ERANGE;
525 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
526 buffer += len;
527 }
528
529 return used;
530}
531
dc647ec8 532static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
86741ec2
LC
533{
534 int err = simple_setattr(dentry, iattr);
535
e1a3a60a 536 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
537 struct socket *sock = SOCKET_I(d_inode(dentry));
538
6d8c50dc
CW
539 if (sock->sk)
540 sock->sk->sk_uid = iattr->ia_uid;
541 else
542 err = -ENOENT;
86741ec2
LC
543 }
544
545 return err;
546}
547
600e1779 548static const struct inode_operations sockfs_inode_ops = {
600e1779 549 .listxattr = sockfs_listxattr,
86741ec2 550 .setattr = sockfs_setattr,
600e1779
MY
551};
552
1da177e4
LT
553/**
554 * sock_alloc - allocate a socket
89bddce5 555 *
1da177e4
LT
556 * Allocate a new inode and socket object. The two are bound together
557 * and initialised. The socket is then returned. If we are out of inodes
558 * NULL is returned.
559 */
560
f4a00aac 561struct socket *sock_alloc(void)
1da177e4 562{
89bddce5
SH
563 struct inode *inode;
564 struct socket *sock;
1da177e4 565
a209dfc7 566 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
567 if (!inode)
568 return NULL;
569
570 sock = SOCKET_I(inode);
571
85fe4025 572 inode->i_ino = get_next_ino();
89bddce5 573 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
574 inode->i_uid = current_fsuid();
575 inode->i_gid = current_fsgid();
600e1779 576 inode->i_op = &sockfs_inode_ops;
1da177e4 577
1da177e4
LT
578 return sock;
579}
f4a00aac 580EXPORT_SYMBOL(sock_alloc);
1da177e4 581
1da177e4
LT
582/**
583 * sock_release - close a socket
584 * @sock: socket to close
585 *
586 * The socket is released from the protocol stack if it has a release
587 * callback, and the inode is then released if the socket is bound to
89bddce5 588 * an inode not a file.
1da177e4 589 */
89bddce5 590
6d8c50dc 591static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
592{
593 if (sock->ops) {
594 struct module *owner = sock->ops->owner;
595
6d8c50dc
CW
596 if (inode)
597 inode_lock(inode);
1da177e4 598 sock->ops->release(sock);
6d8c50dc
CW
599 if (inode)
600 inode_unlock(inode);
1da177e4
LT
601 sock->ops = NULL;
602 module_put(owner);
603 }
604
e6476c21 605 if (sock->wq->fasync_list)
3410f22e 606 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 607
1da177e4
LT
608 if (!sock->file) {
609 iput(SOCK_INODE(sock));
610 return;
611 }
89bddce5 612 sock->file = NULL;
1da177e4 613}
6d8c50dc
CW
614
615void sock_release(struct socket *sock)
616{
617 __sock_release(sock, NULL);
618}
c6d409cf 619EXPORT_SYMBOL(sock_release);
1da177e4 620
c14ac945 621void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 622{
140c55d4
ED
623 u8 flags = *tx_flags;
624
c14ac945 625 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
626 flags |= SKBTX_HW_TSTAMP;
627
c14ac945 628 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
629 flags |= SKBTX_SW_TSTAMP;
630
c14ac945 631 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
632 flags |= SKBTX_SCHED_TSTAMP;
633
140c55d4 634 *tx_flags = flags;
20d49473 635}
67cc0d40 636EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 637
d8725c86 638static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 639{
01e97e65 640 int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
d8725c86
AV
641 BUG_ON(ret == -EIOCBQUEUED);
642 return ret;
1da177e4
LT
643}
644
d8725c86 645int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 646{
d8725c86 647 int err = security_socket_sendmsg(sock, msg,
01e97e65 648 msg_data_left(msg));
228e548e 649
d8725c86 650 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 651}
c6d409cf 652EXPORT_SYMBOL(sock_sendmsg);
1da177e4
LT
653
654int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
655 struct kvec *vec, size_t num, size_t size)
656{
6aa24814 657 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
d8725c86 658 return sock_sendmsg(sock, msg);
1da177e4 659}
c6d409cf 660EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 661
306b13eb
TH
662int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
663 struct kvec *vec, size_t num, size_t size)
664{
665 struct socket *sock = sk->sk_socket;
666
667 if (!sock->ops->sendmsg_locked)
db5980d8 668 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb
TH
669
670 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
671
672 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
673}
674EXPORT_SYMBOL(kernel_sendmsg_locked);
675
8605330a
SHY
676static bool skb_is_err_queue(const struct sk_buff *skb)
677{
678 /* pkt_type of skbs enqueued on the error queue are set to
679 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
680 * in recvmsg, since skbs received on a local socket will never
681 * have a pkt_type of PACKET_OUTGOING.
682 */
683 return skb->pkt_type == PACKET_OUTGOING;
684}
685
b50a5c70
ML
686/* On transmit, software and hardware timestamps are returned independently.
687 * As the two skb clones share the hardware timestamp, which may be updated
688 * before the software timestamp is received, a hardware TX timestamp may be
689 * returned only if there is no software TX timestamp. Ignore false software
690 * timestamps, which may be made in the __sock_recv_timestamp() call when the
691 * option SO_TIMESTAMP(NS) is enabled on the socket, even when the skb has a
692 * hardware timestamp.
693 */
694static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
695{
696 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
697}
698
aad9c8c4
ML
699static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
700{
701 struct scm_ts_pktinfo ts_pktinfo;
702 struct net_device *orig_dev;
703
704 if (!skb_mac_header_was_set(skb))
705 return;
706
707 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
708
709 rcu_read_lock();
710 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
711 if (orig_dev)
712 ts_pktinfo.if_index = orig_dev->ifindex;
713 rcu_read_unlock();
714
715 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
716 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
717 sizeof(ts_pktinfo), &ts_pktinfo);
718}
719
92f37fd2
ED
720/*
721 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
722 */
723void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
724 struct sk_buff *skb)
725{
20d49473 726 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
f24b9be5 727 struct scm_timestamping tss;
b50a5c70 728 int empty = 1, false_tstamp = 0;
20d49473
PO
729 struct skb_shared_hwtstamps *shhwtstamps =
730 skb_hwtstamps(skb);
731
732 /* Race occurred between timestamp enabling and packet
733 receiving. Fill in the current time for now. */
b50a5c70 734 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 735 __net_timestamp(skb);
b50a5c70
ML
736 false_tstamp = 1;
737 }
20d49473
PO
738
739 if (need_software_tstamp) {
740 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
741 struct timeval tv;
742 skb_get_timestamp(skb, &tv);
743 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
744 sizeof(tv), &tv);
745 } else {
f24b9be5
WB
746 struct timespec ts;
747 skb_get_timestampns(skb, &ts);
20d49473 748 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
f24b9be5 749 sizeof(ts), &ts);
20d49473
PO
750 }
751 }
752
f24b9be5 753 memset(&tss, 0, sizeof(tss));
c199105d 754 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
f24b9be5 755 ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
20d49473 756 empty = 0;
4d276eb6 757 if (shhwtstamps &&
b9f40e21 758 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
b50a5c70 759 !skb_is_swtx_tstamp(skb, false_tstamp) &&
aad9c8c4 760 ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
4d276eb6 761 empty = 0;
aad9c8c4
ML
762 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
763 !skb_is_err_queue(skb))
764 put_ts_pktinfo(msg, skb);
765 }
1c885808 766 if (!empty) {
20d49473 767 put_cmsg(msg, SOL_SOCKET,
f24b9be5 768 SCM_TIMESTAMPING, sizeof(tss), &tss);
1c885808 769
8605330a 770 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 771 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
772 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
773 skb->len, skb->data);
774 }
92f37fd2 775}
7c81fd8b
ACM
776EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
777
6e3e939f
JB
778void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
779 struct sk_buff *skb)
780{
781 int ack;
782
783 if (!sock_flag(sk, SOCK_WIFI_STATUS))
784 return;
785 if (!skb->wifi_acked_valid)
786 return;
787
788 ack = skb->wifi_acked;
789
790 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
791}
792EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
793
11165f14 794static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
795 struct sk_buff *skb)
3b885787 796{
744d5a3e 797 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 798 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 799 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
800}
801
767dd033 802void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
803 struct sk_buff *skb)
804{
805 sock_recv_timestamp(msg, sk, skb);
806 sock_recv_drops(msg, sk, skb);
807}
767dd033 808EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 809
1b784140 810static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 811 int flags)
1da177e4 812{
2da62906 813 return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags);
1da177e4
LT
814}
815
2da62906 816int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 817{
2da62906 818 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 819
2da62906 820 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 821}
c6d409cf 822EXPORT_SYMBOL(sock_recvmsg);
1da177e4 823
c1249c0a
ML
824/**
825 * kernel_recvmsg - Receive a message from a socket (kernel space)
826 * @sock: The socket to receive the message from
827 * @msg: Received message
828 * @vec: Input s/g array for message data
829 * @num: Size of input s/g array
830 * @size: Number of bytes to read
831 * @flags: Message flags (MSG_DONTWAIT, etc...)
832 *
833 * On return the msg structure contains the scatter/gather array passed in the
834 * vec argument. The array is modified so that it consists of the unfilled
835 * portion of the original array.
836 *
837 * The returned value is the total number of bytes received, or an error.
838 */
89bddce5
SH
839int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
840 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
841{
842 mm_segment_t oldfs = get_fs();
843 int result;
844
6aa24814 845 iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
1da177e4 846 set_fs(KERNEL_DS);
2da62906 847 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
848 set_fs(oldfs);
849 return result;
850}
c6d409cf 851EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 852
ce1d4d3e
CH
853static ssize_t sock_sendpage(struct file *file, struct page *page,
854 int offset, size_t size, loff_t *ppos, int more)
1da177e4 855{
1da177e4
LT
856 struct socket *sock;
857 int flags;
858
ce1d4d3e
CH
859 sock = file->private_data;
860
35f9c09f
ED
861 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
862 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
863 flags |= more;
ce1d4d3e 864
e6949583 865 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 866}
1da177e4 867
9c55e01c 868static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 869 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
870 unsigned int flags)
871{
872 struct socket *sock = file->private_data;
873
997b37da
RDC
874 if (unlikely(!sock->ops->splice_read))
875 return -EINVAL;
876
9c55e01c
JA
877 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
878}
879
8ae5e030 880static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 881{
6d652330
AV
882 struct file *file = iocb->ki_filp;
883 struct socket *sock = file->private_data;
0345f931 884 struct msghdr msg = {.msg_iter = *to,
885 .msg_iocb = iocb};
8ae5e030 886 ssize_t res;
ce1d4d3e 887
8ae5e030
AV
888 if (file->f_flags & O_NONBLOCK)
889 msg.msg_flags = MSG_DONTWAIT;
890
891 if (iocb->ki_pos != 0)
1da177e4 892 return -ESPIPE;
027445c3 893
66ee59af 894 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
895 return 0;
896
2da62906 897 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
898 *to = msg.msg_iter;
899 return res;
1da177e4
LT
900}
901
8ae5e030 902static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 903{
6d652330
AV
904 struct file *file = iocb->ki_filp;
905 struct socket *sock = file->private_data;
0345f931 906 struct msghdr msg = {.msg_iter = *from,
907 .msg_iocb = iocb};
8ae5e030 908 ssize_t res;
1da177e4 909
8ae5e030 910 if (iocb->ki_pos != 0)
ce1d4d3e 911 return -ESPIPE;
027445c3 912
8ae5e030
AV
913 if (file->f_flags & O_NONBLOCK)
914 msg.msg_flags = MSG_DONTWAIT;
915
6d652330
AV
916 if (sock->type == SOCK_SEQPACKET)
917 msg.msg_flags |= MSG_EOR;
918
d8725c86 919 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
920 *from = msg.msg_iter;
921 return res;
1da177e4
LT
922}
923
1da177e4
LT
924/*
925 * Atomic setting of ioctl hooks to avoid race
926 * with module unload.
927 */
928
4a3e2f71 929static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 930static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 931
881d966b 932void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 933{
4a3e2f71 934 mutex_lock(&br_ioctl_mutex);
1da177e4 935 br_ioctl_hook = hook;
4a3e2f71 936 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
937}
938EXPORT_SYMBOL(brioctl_set);
939
4a3e2f71 940static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 941static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 942
881d966b 943void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 944{
4a3e2f71 945 mutex_lock(&vlan_ioctl_mutex);
1da177e4 946 vlan_ioctl_hook = hook;
4a3e2f71 947 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
948}
949EXPORT_SYMBOL(vlan_ioctl_set);
950
4a3e2f71 951static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 952static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 953
89bddce5 954void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 955{
4a3e2f71 956 mutex_lock(&dlci_ioctl_mutex);
1da177e4 957 dlci_ioctl_hook = hook;
4a3e2f71 958 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
959}
960EXPORT_SYMBOL(dlci_ioctl_set);
961
6b96018b
AB
962static long sock_do_ioctl(struct net *net, struct socket *sock,
963 unsigned int cmd, unsigned long arg)
964{
965 int err;
966 void __user *argp = (void __user *)arg;
967
968 err = sock->ops->ioctl(sock, cmd, arg);
969
970 /*
971 * If this ioctl is unknown try to hand it down
972 * to the NIC driver.
973 */
36fd633e
AV
974 if (err != -ENOIOCTLCMD)
975 return err;
6b96018b 976
36fd633e
AV
977 if (cmd == SIOCGIFCONF) {
978 struct ifconf ifc;
979 if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
980 return -EFAULT;
981 rtnl_lock();
982 err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
983 rtnl_unlock();
984 if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
985 err = -EFAULT;
44c02a2c
AV
986 } else {
987 struct ifreq ifr;
988 bool need_copyout;
989 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
990 return -EFAULT;
991 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
992 if (!err && need_copyout)
993 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
994 return -EFAULT;
36fd633e 995 }
6b96018b
AB
996 return err;
997}
998
1da177e4
LT
999/*
1000 * With an ioctl, arg may well be a user mode pointer, but we don't know
1001 * what to do with it - that's up to the protocol still.
1002 */
1003
d8d211a2 1004struct ns_common *get_net_ns(struct ns_common *ns)
c62cce2c
AV
1005{
1006 return &get_net(container_of(ns, struct net, ns))->ns;
1007}
d8d211a2 1008EXPORT_SYMBOL_GPL(get_net_ns);
c62cce2c 1009
1da177e4
LT
1010static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1011{
1012 struct socket *sock;
881d966b 1013 struct sock *sk;
1da177e4
LT
1014 void __user *argp = (void __user *)arg;
1015 int pid, err;
881d966b 1016 struct net *net;
1da177e4 1017
b69aee04 1018 sock = file->private_data;
881d966b 1019 sk = sock->sk;
3b1e0a65 1020 net = sock_net(sk);
44c02a2c
AV
1021 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1022 struct ifreq ifr;
1023 bool need_copyout;
1024 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
1025 return -EFAULT;
1026 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1027 if (!err && need_copyout)
1028 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
1029 return -EFAULT;
1da177e4 1030 } else
3d23e349 1031#ifdef CONFIG_WEXT_CORE
1da177e4 1032 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1033 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1034 } else
3d23e349 1035#endif
89bddce5 1036 switch (cmd) {
1da177e4
LT
1037 case FIOSETOWN:
1038 case SIOCSPGRP:
1039 err = -EFAULT;
1040 if (get_user(pid, (int __user *)argp))
1041 break;
393cc3f5 1042 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1043 break;
1044 case FIOGETOWN:
1045 case SIOCGPGRP:
609d7fa9 1046 err = put_user(f_getown(sock->file),
89bddce5 1047 (int __user *)argp);
1da177e4
LT
1048 break;
1049 case SIOCGIFBR:
1050 case SIOCSIFBR:
1051 case SIOCBRADDBR:
1052 case SIOCBRDELBR:
1053 err = -ENOPKG;
1054 if (!br_ioctl_hook)
1055 request_module("bridge");
1056
4a3e2f71 1057 mutex_lock(&br_ioctl_mutex);
89bddce5 1058 if (br_ioctl_hook)
881d966b 1059 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1060 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1061 break;
1062 case SIOCGIFVLAN:
1063 case SIOCSIFVLAN:
1064 err = -ENOPKG;
1065 if (!vlan_ioctl_hook)
1066 request_module("8021q");
1067
4a3e2f71 1068 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1069 if (vlan_ioctl_hook)
881d966b 1070 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1071 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1072 break;
1da177e4
LT
1073 case SIOCADDDLCI:
1074 case SIOCDELDLCI:
1075 err = -ENOPKG;
1076 if (!dlci_ioctl_hook)
1077 request_module("dlci");
1078
7512cbf6
PE
1079 mutex_lock(&dlci_ioctl_mutex);
1080 if (dlci_ioctl_hook)
1da177e4 1081 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1082 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 1083 break;
c62cce2c
AV
1084 case SIOCGSKNS:
1085 err = -EPERM;
1086 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1087 break;
1088
1089 err = open_related_ns(&net->ns, get_net_ns);
1090 break;
1da177e4 1091 default:
6b96018b 1092 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1093 break;
89bddce5 1094 }
1da177e4
LT
1095 return err;
1096}
1097
1098int sock_create_lite(int family, int type, int protocol, struct socket **res)
1099{
1100 int err;
1101 struct socket *sock = NULL;
89bddce5 1102
1da177e4
LT
1103 err = security_socket_create(family, type, protocol, 1);
1104 if (err)
1105 goto out;
1106
1107 sock = sock_alloc();
1108 if (!sock) {
1109 err = -ENOMEM;
1110 goto out;
1111 }
1112
1da177e4 1113 sock->type = type;
7420ed23
VY
1114 err = security_socket_post_create(sock, family, type, protocol, 1);
1115 if (err)
1116 goto out_release;
1117
1da177e4
LT
1118out:
1119 *res = sock;
1120 return err;
7420ed23
VY
1121out_release:
1122 sock_release(sock);
1123 sock = NULL;
1124 goto out;
1da177e4 1125}
c6d409cf 1126EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1127
1128/* No kernel lock held - perfect */
ade994f4 1129static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1130{
3cafb376 1131 struct socket *sock = file->private_data;
a331de3b 1132 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1133
e88958e6
CH
1134 if (!sock->ops->poll)
1135 return 0;
f641f13b 1136
a331de3b
CH
1137 if (sk_can_busy_loop(sock->sk)) {
1138 /* poll once if requested by the syscall */
1139 if (events & POLL_BUSY_LOOP)
1140 sk_busy_loop(sock->sk, 1);
1141
1142 /* if this socket can poll_ll, tell the system call */
1143 flag = POLL_BUSY_LOOP;
1144 }
1145
1146 return sock->ops->poll(file, sock, wait) | flag;
1da177e4
LT
1147}
1148
89bddce5 1149static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1150{
b69aee04 1151 struct socket *sock = file->private_data;
1da177e4
LT
1152
1153 return sock->ops->mmap(file, sock, vma);
1154}
1155
20380731 1156static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1157{
6d8c50dc 1158 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1159 return 0;
1160}
1161
1162/*
1163 * Update the socket async list
1164 *
1165 * Fasync_list locking strategy.
1166 *
1167 * 1. fasync_list is modified only under process context socket lock
1168 * i.e. under semaphore.
1169 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1170 * or under socket lock
1da177e4
LT
1171 */
1172
1173static int sock_fasync(int fd, struct file *filp, int on)
1174{
989a2979
ED
1175 struct socket *sock = filp->private_data;
1176 struct sock *sk = sock->sk;
eaefd110 1177 struct socket_wq *wq;
1da177e4 1178
989a2979 1179 if (sk == NULL)
1da177e4 1180 return -EINVAL;
1da177e4
LT
1181
1182 lock_sock(sk);
e6476c21 1183 wq = sock->wq;
eaefd110 1184 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1185
eaefd110 1186 if (!wq->fasync_list)
989a2979
ED
1187 sock_reset_flag(sk, SOCK_FASYNC);
1188 else
bcdce719 1189 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1190
989a2979 1191 release_sock(sk);
1da177e4
LT
1192 return 0;
1193}
1194
ceb5d58b 1195/* This function may be called only under rcu_lock */
1da177e4 1196
ceb5d58b 1197int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1198{
ceb5d58b 1199 if (!wq || !wq->fasync_list)
1da177e4 1200 return -1;
ceb5d58b 1201
89bddce5 1202 switch (how) {
8d8ad9d7 1203 case SOCK_WAKE_WAITD:
ceb5d58b 1204 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1205 break;
1206 goto call_kill;
8d8ad9d7 1207 case SOCK_WAKE_SPACE:
ceb5d58b 1208 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1209 break;
1210 /* fall through */
8d8ad9d7 1211 case SOCK_WAKE_IO:
89bddce5 1212call_kill:
43815482 1213 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1214 break;
8d8ad9d7 1215 case SOCK_WAKE_URG:
43815482 1216 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1217 }
ceb5d58b 1218
1da177e4
LT
1219 return 0;
1220}
c6d409cf 1221EXPORT_SYMBOL(sock_wake_async);
1da177e4 1222
721db93a 1223int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1224 struct socket **res, int kern)
1da177e4
LT
1225{
1226 int err;
1227 struct socket *sock;
55737fda 1228 const struct net_proto_family *pf;
1da177e4
LT
1229
1230 /*
89bddce5 1231 * Check protocol is in range
1da177e4
LT
1232 */
1233 if (family < 0 || family >= NPROTO)
1234 return -EAFNOSUPPORT;
1235 if (type < 0 || type >= SOCK_MAX)
1236 return -EINVAL;
1237
1238 /* Compatibility.
1239
1240 This uglymoron is moved from INET layer to here to avoid
1241 deadlock in module load.
1242 */
1243 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1244 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1245 current->comm);
1da177e4
LT
1246 family = PF_PACKET;
1247 }
1248
1249 err = security_socket_create(family, type, protocol, kern);
1250 if (err)
1251 return err;
89bddce5 1252
55737fda
SH
1253 /*
1254 * Allocate the socket and allow the family to set things up. if
1255 * the protocol is 0, the family is instructed to select an appropriate
1256 * default.
1257 */
1258 sock = sock_alloc();
1259 if (!sock) {
e87cc472 1260 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1261 return -ENFILE; /* Not exactly a match, but its the
1262 closest posix thing */
1263 }
1264
1265 sock->type = type;
1266
95a5afca 1267#ifdef CONFIG_MODULES
89bddce5
SH
1268 /* Attempt to load a protocol module if the find failed.
1269 *
1270 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1271 * requested real, full-featured networking support upon configuration.
1272 * Otherwise module support will break!
1273 */
190683a9 1274 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1275 request_module("net-pf-%d", family);
1da177e4
LT
1276#endif
1277
55737fda
SH
1278 rcu_read_lock();
1279 pf = rcu_dereference(net_families[family]);
1280 err = -EAFNOSUPPORT;
1281 if (!pf)
1282 goto out_release;
1da177e4
LT
1283
1284 /*
1285 * We will call the ->create function, that possibly is in a loadable
1286 * module, so we have to bump that loadable module refcnt first.
1287 */
55737fda 1288 if (!try_module_get(pf->owner))
1da177e4
LT
1289 goto out_release;
1290
55737fda
SH
1291 /* Now protected by module ref count */
1292 rcu_read_unlock();
1293
3f378b68 1294 err = pf->create(net, sock, protocol, kern);
55737fda 1295 if (err < 0)
1da177e4 1296 goto out_module_put;
a79af59e 1297
1da177e4
LT
1298 /*
1299 * Now to bump the refcnt of the [loadable] module that owns this
1300 * socket at sock_release time we decrement its refcnt.
1301 */
55737fda
SH
1302 if (!try_module_get(sock->ops->owner))
1303 goto out_module_busy;
1304
1da177e4
LT
1305 /*
1306 * Now that we're done with the ->create function, the [loadable]
1307 * module can have its refcnt decremented
1308 */
55737fda 1309 module_put(pf->owner);
7420ed23
VY
1310 err = security_socket_post_create(sock, family, type, protocol, kern);
1311 if (err)
3b185525 1312 goto out_sock_release;
55737fda 1313 *res = sock;
1da177e4 1314
55737fda
SH
1315 return 0;
1316
1317out_module_busy:
1318 err = -EAFNOSUPPORT;
1da177e4 1319out_module_put:
55737fda
SH
1320 sock->ops = NULL;
1321 module_put(pf->owner);
1322out_sock_release:
1da177e4 1323 sock_release(sock);
55737fda
SH
1324 return err;
1325
1326out_release:
1327 rcu_read_unlock();
1328 goto out_sock_release;
1da177e4 1329}
721db93a 1330EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1331
1332int sock_create(int family, int type, int protocol, struct socket **res)
1333{
1b8d7ae4 1334 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1335}
c6d409cf 1336EXPORT_SYMBOL(sock_create);
1da177e4 1337
eeb1bd5c 1338int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1339{
eeb1bd5c 1340 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1341}
c6d409cf 1342EXPORT_SYMBOL(sock_create_kern);
1da177e4 1343
9d6a15c3 1344int __sys_socket(int family, int type, int protocol)
1da177e4
LT
1345{
1346 int retval;
1347 struct socket *sock;
a677a039
UD
1348 int flags;
1349
e38b36f3
UD
1350 /* Check the SOCK_* constants for consistency. */
1351 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1352 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1353 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1354 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1355
a677a039 1356 flags = type & ~SOCK_TYPE_MASK;
77d27200 1357 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1358 return -EINVAL;
1359 type &= SOCK_TYPE_MASK;
1da177e4 1360
aaca0bdc
UD
1361 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1362 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1363
1da177e4
LT
1364 retval = sock_create(family, type, protocol, &sock);
1365 if (retval < 0)
8e1611e2 1366 return retval;
1da177e4 1367
8e1611e2 1368 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1369}
1370
9d6a15c3
DB
1371SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1372{
1373 return __sys_socket(family, type, protocol);
1374}
1375
1da177e4
LT
1376/*
1377 * Create a pair of connected sockets.
1378 */
1379
6debc8d8 1380int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1381{
1382 struct socket *sock1, *sock2;
1383 int fd1, fd2, err;
db349509 1384 struct file *newfile1, *newfile2;
a677a039
UD
1385 int flags;
1386
1387 flags = type & ~SOCK_TYPE_MASK;
77d27200 1388 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1389 return -EINVAL;
1390 type &= SOCK_TYPE_MASK;
1da177e4 1391
aaca0bdc
UD
1392 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1393 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1394
016a266b
AV
1395 /*
1396 * reserve descriptors and make sure we won't fail
1397 * to return them to userland.
1398 */
1399 fd1 = get_unused_fd_flags(flags);
1400 if (unlikely(fd1 < 0))
1401 return fd1;
1402
1403 fd2 = get_unused_fd_flags(flags);
1404 if (unlikely(fd2 < 0)) {
1405 put_unused_fd(fd1);
1406 return fd2;
1407 }
1408
1409 err = put_user(fd1, &usockvec[0]);
1410 if (err)
1411 goto out;
1412
1413 err = put_user(fd2, &usockvec[1]);
1414 if (err)
1415 goto out;
1416
1da177e4
LT
1417 /*
1418 * Obtain the first socket and check if the underlying protocol
1419 * supports the socketpair call.
1420 */
1421
1422 err = sock_create(family, type, protocol, &sock1);
016a266b 1423 if (unlikely(err < 0))
1da177e4
LT
1424 goto out;
1425
1426 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1427 if (unlikely(err < 0)) {
1428 sock_release(sock1);
1429 goto out;
bf3c23d1 1430 }
d73aa286 1431
d47cd945
DH
1432 err = security_socket_socketpair(sock1, sock2);
1433 if (unlikely(err)) {
1434 sock_release(sock2);
1435 sock_release(sock1);
1436 goto out;
1437 }
1438
016a266b
AV
1439 err = sock1->ops->socketpair(sock1, sock2);
1440 if (unlikely(err < 0)) {
1441 sock_release(sock2);
1442 sock_release(sock1);
1443 goto out;
28407630
AV
1444 }
1445
aab174f0 1446 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1447 if (IS_ERR(newfile1)) {
28407630 1448 err = PTR_ERR(newfile1);
016a266b
AV
1449 sock_release(sock2);
1450 goto out;
28407630
AV
1451 }
1452
aab174f0 1453 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1454 if (IS_ERR(newfile2)) {
1455 err = PTR_ERR(newfile2);
016a266b
AV
1456 fput(newfile1);
1457 goto out;
db349509
AV
1458 }
1459
157cf649 1460 audit_fd_pair(fd1, fd2);
d73aa286 1461
db349509
AV
1462 fd_install(fd1, newfile1);
1463 fd_install(fd2, newfile2);
d73aa286 1464 return 0;
1da177e4 1465
016a266b 1466out:
d73aa286 1467 put_unused_fd(fd2);
d73aa286 1468 put_unused_fd(fd1);
1da177e4
LT
1469 return err;
1470}
1471
6debc8d8
DB
1472SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1473 int __user *, usockvec)
1474{
1475 return __sys_socketpair(family, type, protocol, usockvec);
1476}
1477
1da177e4
LT
1478/*
1479 * Bind a name to a socket. Nothing much to do here since it's
1480 * the protocol's responsibility to handle the local address.
1481 *
1482 * We move the socket address to kernel space before we call
1483 * the protocol layer (having also checked the address is ok).
1484 */
1485
a87d35d8 1486int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1487{
1488 struct socket *sock;
230b1839 1489 struct sockaddr_storage address;
6cb153ca 1490 int err, fput_needed;
1da177e4 1491
89bddce5 1492 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1493 if (sock) {
43db362d 1494 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1495 if (err >= 0) {
1496 err = security_socket_bind(sock,
230b1839 1497 (struct sockaddr *)&address,
89bddce5 1498 addrlen);
6cb153ca
BL
1499 if (!err)
1500 err = sock->ops->bind(sock,
89bddce5 1501 (struct sockaddr *)
230b1839 1502 &address, addrlen);
1da177e4 1503 }
6cb153ca 1504 fput_light(sock->file, fput_needed);
89bddce5 1505 }
1da177e4
LT
1506 return err;
1507}
1508
a87d35d8
DB
1509SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1510{
1511 return __sys_bind(fd, umyaddr, addrlen);
1512}
1513
1da177e4
LT
1514/*
1515 * Perform a listen. Basically, we allow the protocol to do anything
1516 * necessary for a listen, and if that works, we mark the socket as
1517 * ready for listening.
1518 */
1519
25e290ee 1520int __sys_listen(int fd, int backlog)
1da177e4
LT
1521{
1522 struct socket *sock;
6cb153ca 1523 int err, fput_needed;
b8e1f9b5 1524 int somaxconn;
89bddce5
SH
1525
1526 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1527 if (sock) {
8efa6e93 1528 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1529 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1530 backlog = somaxconn;
1da177e4
LT
1531
1532 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1533 if (!err)
1534 err = sock->ops->listen(sock, backlog);
1da177e4 1535
6cb153ca 1536 fput_light(sock->file, fput_needed);
1da177e4
LT
1537 }
1538 return err;
1539}
1540
25e290ee
DB
1541SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1542{
1543 return __sys_listen(fd, backlog);
1544}
1545
1da177e4
LT
1546/*
1547 * For accept, we attempt to create a new socket, set up the link
1548 * with the client, wake up the client, then return the new
1549 * connected fd. We collect the address of the connector in kernel
1550 * space and move it to user at the very end. This is unclean because
1551 * we open the socket then return an error.
1552 *
1553 * 1003.1g adds the ability to recvmsg() to query connection pending
1554 * status to recvmsg. We need to add that support in a way thats
b903036a 1555 * clean when we restructure accept also.
1da177e4
LT
1556 */
1557
4541e805
DB
1558int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1559 int __user *upeer_addrlen, int flags)
1da177e4
LT
1560{
1561 struct socket *sock, *newsock;
39d8c1b6 1562 struct file *newfile;
6cb153ca 1563 int err, len, newfd, fput_needed;
230b1839 1564 struct sockaddr_storage address;
1da177e4 1565
77d27200 1566 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1567 return -EINVAL;
1568
1569 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1570 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1571
6cb153ca 1572 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1573 if (!sock)
1574 goto out;
1575
1576 err = -ENFILE;
c6d409cf
ED
1577 newsock = sock_alloc();
1578 if (!newsock)
1da177e4
LT
1579 goto out_put;
1580
1581 newsock->type = sock->type;
1582 newsock->ops = sock->ops;
1583
1da177e4
LT
1584 /*
1585 * We don't need try_module_get here, as the listening socket (sock)
1586 * has the protocol module (sock->ops->owner) held.
1587 */
1588 __module_get(newsock->ops->owner);
1589
28407630 1590 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1591 if (unlikely(newfd < 0)) {
1592 err = newfd;
9a1875e6
DM
1593 sock_release(newsock);
1594 goto out_put;
39d8c1b6 1595 }
aab174f0 1596 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1597 if (IS_ERR(newfile)) {
28407630
AV
1598 err = PTR_ERR(newfile);
1599 put_unused_fd(newfd);
28407630
AV
1600 goto out_put;
1601 }
39d8c1b6 1602
a79af59e
FF
1603 err = security_socket_accept(sock, newsock);
1604 if (err)
39d8c1b6 1605 goto out_fd;
a79af59e 1606
cdfbabfb 1607 err = sock->ops->accept(sock, newsock, sock->file->f_flags, false);
1da177e4 1608 if (err < 0)
39d8c1b6 1609 goto out_fd;
1da177e4
LT
1610
1611 if (upeer_sockaddr) {
9b2c45d4
DV
1612 len = newsock->ops->getname(newsock,
1613 (struct sockaddr *)&address, 2);
1614 if (len < 0) {
1da177e4 1615 err = -ECONNABORTED;
39d8c1b6 1616 goto out_fd;
1da177e4 1617 }
43db362d 1618 err = move_addr_to_user(&address,
230b1839 1619 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1620 if (err < 0)
39d8c1b6 1621 goto out_fd;
1da177e4
LT
1622 }
1623
1624 /* File flags are not inherited via accept() unlike another OSes. */
1625
39d8c1b6
DM
1626 fd_install(newfd, newfile);
1627 err = newfd;
1da177e4 1628
1da177e4 1629out_put:
6cb153ca 1630 fput_light(sock->file, fput_needed);
1da177e4
LT
1631out:
1632 return err;
39d8c1b6 1633out_fd:
9606a216 1634 fput(newfile);
39d8c1b6 1635 put_unused_fd(newfd);
1da177e4
LT
1636 goto out_put;
1637}
1638
4541e805
DB
1639SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1640 int __user *, upeer_addrlen, int, flags)
1641{
1642 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1643}
1644
20f37034
HC
1645SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1646 int __user *, upeer_addrlen)
aaca0bdc 1647{
4541e805 1648 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1649}
1650
1da177e4
LT
1651/*
1652 * Attempt to connect to a socket with the server address. The address
1653 * is in user space so we verify it is OK and move it to kernel space.
1654 *
1655 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1656 * break bindings
1657 *
1658 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1659 * other SEQPACKET protocols that take time to connect() as it doesn't
1660 * include the -EINPROGRESS status for such sockets.
1661 */
1662
1387c2c2 1663int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1da177e4
LT
1664{
1665 struct socket *sock;
230b1839 1666 struct sockaddr_storage address;
6cb153ca 1667 int err, fput_needed;
1da177e4 1668
6cb153ca 1669 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1670 if (!sock)
1671 goto out;
43db362d 1672 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1673 if (err < 0)
1674 goto out_put;
1675
89bddce5 1676 err =
230b1839 1677 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1678 if (err)
1679 goto out_put;
1680
230b1839 1681 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1682 sock->file->f_flags);
1683out_put:
6cb153ca 1684 fput_light(sock->file, fput_needed);
1da177e4
LT
1685out:
1686 return err;
1687}
1688
1387c2c2
DB
1689SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1690 int, addrlen)
1691{
1692 return __sys_connect(fd, uservaddr, addrlen);
1693}
1694
1da177e4
LT
1695/*
1696 * Get the local address ('name') of a socket object. Move the obtained
1697 * name to user space.
1698 */
1699
8882a107
DB
1700int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1701 int __user *usockaddr_len)
1da177e4
LT
1702{
1703 struct socket *sock;
230b1839 1704 struct sockaddr_storage address;
9b2c45d4 1705 int err, fput_needed;
89bddce5 1706
6cb153ca 1707 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1708 if (!sock)
1709 goto out;
1710
1711 err = security_socket_getsockname(sock);
1712 if (err)
1713 goto out_put;
1714
9b2c45d4
DV
1715 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
1716 if (err < 0)
1da177e4 1717 goto out_put;
9b2c45d4
DV
1718 /* "err" is actually length in this case */
1719 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
1720
1721out_put:
6cb153ca 1722 fput_light(sock->file, fput_needed);
1da177e4
LT
1723out:
1724 return err;
1725}
1726
8882a107
DB
1727SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1728 int __user *, usockaddr_len)
1729{
1730 return __sys_getsockname(fd, usockaddr, usockaddr_len);
1731}
1732
1da177e4
LT
1733/*
1734 * Get the remote address ('name') of a socket object. Move the obtained
1735 * name to user space.
1736 */
1737
b21c8f83
DB
1738int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1739 int __user *usockaddr_len)
1da177e4
LT
1740{
1741 struct socket *sock;
230b1839 1742 struct sockaddr_storage address;
9b2c45d4 1743 int err, fput_needed;
1da177e4 1744
89bddce5
SH
1745 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1746 if (sock != NULL) {
1da177e4
LT
1747 err = security_socket_getpeername(sock);
1748 if (err) {
6cb153ca 1749 fput_light(sock->file, fput_needed);
1da177e4
LT
1750 return err;
1751 }
1752
9b2c45d4
DV
1753 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
1754 if (err >= 0)
1755 /* "err" is actually length in this case */
1756 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 1757 usockaddr_len);
6cb153ca 1758 fput_light(sock->file, fput_needed);
1da177e4
LT
1759 }
1760 return err;
1761}
1762
b21c8f83
DB
1763SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1764 int __user *, usockaddr_len)
1765{
1766 return __sys_getpeername(fd, usockaddr, usockaddr_len);
1767}
1768
1da177e4
LT
1769/*
1770 * Send a datagram to a given address. We move the address into kernel
1771 * space and check the user space data area is readable before invoking
1772 * the protocol.
1773 */
211b634b
DB
1774int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
1775 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
1776{
1777 struct socket *sock;
230b1839 1778 struct sockaddr_storage address;
1da177e4
LT
1779 int err;
1780 struct msghdr msg;
1781 struct iovec iov;
6cb153ca 1782 int fput_needed;
6cb153ca 1783
602bd0e9
AV
1784 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1785 if (unlikely(err))
1786 return err;
de0fa95c
PE
1787 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1788 if (!sock)
4387ff75 1789 goto out;
6cb153ca 1790
89bddce5 1791 msg.msg_name = NULL;
89bddce5
SH
1792 msg.msg_control = NULL;
1793 msg.msg_controllen = 0;
1794 msg.msg_namelen = 0;
6cb153ca 1795 if (addr) {
43db362d 1796 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1797 if (err < 0)
1798 goto out_put;
230b1839 1799 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1800 msg.msg_namelen = addr_len;
1da177e4
LT
1801 }
1802 if (sock->file->f_flags & O_NONBLOCK)
1803 flags |= MSG_DONTWAIT;
1804 msg.msg_flags = flags;
d8725c86 1805 err = sock_sendmsg(sock, &msg);
1da177e4 1806
89bddce5 1807out_put:
de0fa95c 1808 fput_light(sock->file, fput_needed);
4387ff75 1809out:
1da177e4
LT
1810 return err;
1811}
1812
211b634b
DB
1813SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1814 unsigned int, flags, struct sockaddr __user *, addr,
1815 int, addr_len)
1816{
1817 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
1818}
1819
1da177e4 1820/*
89bddce5 1821 * Send a datagram down a socket.
1da177e4
LT
1822 */
1823
3e0fa65f 1824SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1825 unsigned int, flags)
1da177e4 1826{
211b634b 1827 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
1828}
1829
1830/*
89bddce5 1831 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1832 * sender. We verify the buffers are writable and if needed move the
1833 * sender address from kernel to user space.
1834 */
7a09e1eb
DB
1835int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
1836 struct sockaddr __user *addr, int __user *addr_len)
1da177e4
LT
1837{
1838 struct socket *sock;
1839 struct iovec iov;
1840 struct msghdr msg;
230b1839 1841 struct sockaddr_storage address;
89bddce5 1842 int err, err2;
6cb153ca
BL
1843 int fput_needed;
1844
602bd0e9
AV
1845 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1846 if (unlikely(err))
1847 return err;
de0fa95c 1848 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1849 if (!sock)
de0fa95c 1850 goto out;
1da177e4 1851
89bddce5
SH
1852 msg.msg_control = NULL;
1853 msg.msg_controllen = 0;
f3d33426
HFS
1854 /* Save some cycles and don't copy the address if not needed */
1855 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1856 /* We assume all kernel code knows the size of sockaddr_storage */
1857 msg.msg_namelen = 0;
130ed5d1 1858 msg.msg_iocb = NULL;
9f138fa6 1859 msg.msg_flags = 0;
1da177e4
LT
1860 if (sock->file->f_flags & O_NONBLOCK)
1861 flags |= MSG_DONTWAIT;
2da62906 1862 err = sock_recvmsg(sock, &msg, flags);
1da177e4 1863
89bddce5 1864 if (err >= 0 && addr != NULL) {
43db362d 1865 err2 = move_addr_to_user(&address,
230b1839 1866 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1867 if (err2 < 0)
1868 err = err2;
1da177e4 1869 }
de0fa95c
PE
1870
1871 fput_light(sock->file, fput_needed);
4387ff75 1872out:
1da177e4
LT
1873 return err;
1874}
1875
7a09e1eb
DB
1876SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
1877 unsigned int, flags, struct sockaddr __user *, addr,
1878 int __user *, addr_len)
1879{
1880 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
1881}
1882
1da177e4 1883/*
89bddce5 1884 * Receive a datagram from a socket.
1da177e4
LT
1885 */
1886
b7c0ddf5
JG
1887SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1888 unsigned int, flags)
1da177e4 1889{
7a09e1eb 1890 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
1891}
1892
1893/*
1894 * Set a socket option. Because we don't know the option lengths we have
1895 * to pass the user mode parameter for the protocols to sort out.
1896 */
1897
cc36dca0
DB
1898static int __sys_setsockopt(int fd, int level, int optname,
1899 char __user *optval, int optlen)
1da177e4 1900{
6cb153ca 1901 int err, fput_needed;
1da177e4
LT
1902 struct socket *sock;
1903
1904 if (optlen < 0)
1905 return -EINVAL;
89bddce5
SH
1906
1907 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1908 if (sock != NULL) {
1909 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1910 if (err)
1911 goto out_put;
1da177e4
LT
1912
1913 if (level == SOL_SOCKET)
89bddce5
SH
1914 err =
1915 sock_setsockopt(sock, level, optname, optval,
1916 optlen);
1da177e4 1917 else
89bddce5
SH
1918 err =
1919 sock->ops->setsockopt(sock, level, optname, optval,
1920 optlen);
6cb153ca
BL
1921out_put:
1922 fput_light(sock->file, fput_needed);
1da177e4
LT
1923 }
1924 return err;
1925}
1926
cc36dca0
DB
1927SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1928 char __user *, optval, int, optlen)
1929{
1930 return __sys_setsockopt(fd, level, optname, optval, optlen);
1931}
1932
1da177e4
LT
1933/*
1934 * Get a socket option. Because we don't know the option lengths we have
1935 * to pass a user mode parameter for the protocols to sort out.
1936 */
1937
13a2d70e
DB
1938static int __sys_getsockopt(int fd, int level, int optname,
1939 char __user *optval, int __user *optlen)
1da177e4 1940{
6cb153ca 1941 int err, fput_needed;
1da177e4
LT
1942 struct socket *sock;
1943
89bddce5
SH
1944 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1945 if (sock != NULL) {
6cb153ca
BL
1946 err = security_socket_getsockopt(sock, level, optname);
1947 if (err)
1948 goto out_put;
1da177e4
LT
1949
1950 if (level == SOL_SOCKET)
89bddce5
SH
1951 err =
1952 sock_getsockopt(sock, level, optname, optval,
1953 optlen);
1da177e4 1954 else
89bddce5
SH
1955 err =
1956 sock->ops->getsockopt(sock, level, optname, optval,
1957 optlen);
6cb153ca
BL
1958out_put:
1959 fput_light(sock->file, fput_needed);
1da177e4
LT
1960 }
1961 return err;
1962}
1963
13a2d70e
DB
1964SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1965 char __user *, optval, int __user *, optlen)
1966{
1967 return __sys_getsockopt(fd, level, optname, optval, optlen);
1968}
1969
1da177e4
LT
1970/*
1971 * Shutdown a socket.
1972 */
1973
005a1aea 1974int __sys_shutdown(int fd, int how)
1da177e4 1975{
6cb153ca 1976 int err, fput_needed;
1da177e4
LT
1977 struct socket *sock;
1978
89bddce5
SH
1979 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1980 if (sock != NULL) {
1da177e4 1981 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1982 if (!err)
1983 err = sock->ops->shutdown(sock, how);
1984 fput_light(sock->file, fput_needed);
1da177e4
LT
1985 }
1986 return err;
1987}
1988
005a1aea
DB
1989SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1990{
1991 return __sys_shutdown(fd, how);
1992}
1993
89bddce5 1994/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1995 * fields which are the same type (int / unsigned) on our platforms.
1996 */
1997#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1998#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1999#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2000
c71d8ebe
TH
2001struct used_address {
2002 struct sockaddr_storage name;
2003 unsigned int name_len;
2004};
2005
da184284
AV
2006static int copy_msghdr_from_user(struct msghdr *kmsg,
2007 struct user_msghdr __user *umsg,
2008 struct sockaddr __user **save_addr,
2009 struct iovec **iov)
1661bf36 2010{
ffb07550 2011 struct user_msghdr msg;
08adb7da
AV
2012 ssize_t err;
2013
ffb07550 2014 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 2015 return -EFAULT;
dbb490b9 2016
864d9664 2017 kmsg->msg_control = (void __force *)msg.msg_control;
ffb07550
AV
2018 kmsg->msg_controllen = msg.msg_controllen;
2019 kmsg->msg_flags = msg.msg_flags;
2020
2021 kmsg->msg_namelen = msg.msg_namelen;
2022 if (!msg.msg_name)
6a2a2b3a
AS
2023 kmsg->msg_namelen = 0;
2024
dbb490b9
ML
2025 if (kmsg->msg_namelen < 0)
2026 return -EINVAL;
2027
1661bf36 2028 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2029 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2030
2031 if (save_addr)
ffb07550 2032 *save_addr = msg.msg_name;
08adb7da 2033
ffb07550 2034 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 2035 if (!save_addr) {
864d9664
PA
2036 err = move_addr_to_kernel(msg.msg_name,
2037 kmsg->msg_namelen,
08adb7da
AV
2038 kmsg->msg_name);
2039 if (err < 0)
2040 return err;
2041 }
2042 } else {
2043 kmsg->msg_name = NULL;
2044 kmsg->msg_namelen = 0;
2045 }
2046
ffb07550 2047 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2048 return -EMSGSIZE;
2049
0345f931 2050 kmsg->msg_iocb = NULL;
2051
ffb07550
AV
2052 return import_iovec(save_addr ? READ : WRITE,
2053 msg.msg_iov, msg.msg_iovlen,
da184284 2054 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
2055}
2056
666547ff 2057static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2058 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
2059 struct used_address *used_address,
2060 unsigned int allowed_msghdr_flags)
1da177e4 2061{
89bddce5
SH
2062 struct compat_msghdr __user *msg_compat =
2063 (struct compat_msghdr __user *)msg;
230b1839 2064 struct sockaddr_storage address;
1da177e4 2065 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 2066 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2067 __aligned(sizeof(__kernel_size_t));
89bddce5 2068 /* 20 is size of ipv6_pktinfo */
1da177e4 2069 unsigned char *ctl_buf = ctl;
d8725c86 2070 int ctl_len;
08adb7da 2071 ssize_t err;
89bddce5 2072
08adb7da 2073 msg_sys->msg_name = &address;
1da177e4 2074
08449320 2075 if (MSG_CMSG_COMPAT & flags)
08adb7da 2076 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 2077 else
08adb7da 2078 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 2079 if (err < 0)
da184284 2080 return err;
1da177e4
LT
2081
2082 err = -ENOBUFS;
2083
228e548e 2084 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 2085 goto out_freeiov;
28a94d8f 2086 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2087 ctl_len = msg_sys->msg_controllen;
1da177e4 2088 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2089 err =
228e548e 2090 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2091 sizeof(ctl));
1da177e4
LT
2092 if (err)
2093 goto out_freeiov;
228e548e
AB
2094 ctl_buf = msg_sys->msg_control;
2095 ctl_len = msg_sys->msg_controllen;
1da177e4 2096 } else if (ctl_len) {
ac4340fc
DM
2097 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2098 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2099 if (ctl_len > sizeof(ctl)) {
1da177e4 2100 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2101 if (ctl_buf == NULL)
1da177e4
LT
2102 goto out_freeiov;
2103 }
2104 err = -EFAULT;
2105 /*
228e548e 2106 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2107 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2108 * checking falls down on this.
2109 */
fb8621bb 2110 if (copy_from_user(ctl_buf,
228e548e 2111 (void __user __force *)msg_sys->msg_control,
89bddce5 2112 ctl_len))
1da177e4 2113 goto out_freectl;
228e548e 2114 msg_sys->msg_control = ctl_buf;
1da177e4 2115 }
228e548e 2116 msg_sys->msg_flags = flags;
1da177e4
LT
2117
2118 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2119 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2120 /*
2121 * If this is sendmmsg() and current destination address is same as
2122 * previously succeeded address, omit asking LSM's decision.
2123 * used_address->name_len is initialized to UINT_MAX so that the first
2124 * destination address never matches.
2125 */
bc909d9d
MD
2126 if (used_address && msg_sys->msg_name &&
2127 used_address->name_len == msg_sys->msg_namelen &&
2128 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2129 used_address->name_len)) {
d8725c86 2130 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2131 goto out_freectl;
2132 }
d8725c86 2133 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2134 /*
2135 * If this is sendmmsg() and sending to current destination address was
2136 * successful, remember it.
2137 */
2138 if (used_address && err >= 0) {
2139 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2140 if (msg_sys->msg_name)
2141 memcpy(&used_address->name, msg_sys->msg_name,
2142 used_address->name_len);
c71d8ebe 2143 }
1da177e4
LT
2144
2145out_freectl:
89bddce5 2146 if (ctl_buf != ctl)
1da177e4
LT
2147 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2148out_freeiov:
da184284 2149 kfree(iov);
228e548e
AB
2150 return err;
2151}
2152
2153/*
2154 * BSD sendmsg interface
2155 */
2156
e1834a32
DB
2157long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2158 bool forbid_cmsg_compat)
228e548e
AB
2159{
2160 int fput_needed, err;
2161 struct msghdr msg_sys;
1be374a0
AL
2162 struct socket *sock;
2163
e1834a32
DB
2164 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2165 return -EINVAL;
2166
1be374a0 2167 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2168 if (!sock)
2169 goto out;
2170
28a94d8f 2171 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2172
6cb153ca 2173 fput_light(sock->file, fput_needed);
89bddce5 2174out:
1da177e4
LT
2175 return err;
2176}
2177
666547ff 2178SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2179{
e1834a32 2180 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2181}
2182
228e548e
AB
2183/*
2184 * Linux sendmmsg interface
2185 */
2186
2187int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2188 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2189{
2190 int fput_needed, err, datagrams;
2191 struct socket *sock;
2192 struct mmsghdr __user *entry;
2193 struct compat_mmsghdr __user *compat_entry;
2194 struct msghdr msg_sys;
c71d8ebe 2195 struct used_address used_address;
f092276d 2196 unsigned int oflags = flags;
228e548e 2197
e1834a32
DB
2198 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2199 return -EINVAL;
2200
98382f41
AB
2201 if (vlen > UIO_MAXIOV)
2202 vlen = UIO_MAXIOV;
228e548e
AB
2203
2204 datagrams = 0;
2205
2206 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2207 if (!sock)
2208 return err;
2209
c71d8ebe 2210 used_address.name_len = UINT_MAX;
228e548e
AB
2211 entry = mmsg;
2212 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2213 err = 0;
f092276d 2214 flags |= MSG_BATCH;
228e548e
AB
2215
2216 while (datagrams < vlen) {
f092276d
TH
2217 if (datagrams == vlen - 1)
2218 flags = oflags;
2219
228e548e 2220 if (MSG_CMSG_COMPAT & flags) {
666547ff 2221 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2222 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2223 if (err < 0)
2224 break;
2225 err = __put_user(err, &compat_entry->msg_len);
2226 ++compat_entry;
2227 } else {
a7526eb5 2228 err = ___sys_sendmsg(sock,
666547ff 2229 (struct user_msghdr __user *)entry,
28a94d8f 2230 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2231 if (err < 0)
2232 break;
2233 err = put_user(err, &entry->msg_len);
2234 ++entry;
2235 }
2236
2237 if (err)
2238 break;
2239 ++datagrams;
3023898b
SHY
2240 if (msg_data_left(&msg_sys))
2241 break;
a78cb84c 2242 cond_resched();
228e548e
AB
2243 }
2244
228e548e
AB
2245 fput_light(sock->file, fput_needed);
2246
728ffb86
AB
2247 /* We only return an error if no datagrams were able to be sent */
2248 if (datagrams != 0)
228e548e
AB
2249 return datagrams;
2250
228e548e
AB
2251 return err;
2252}
2253
2254SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2255 unsigned int, vlen, unsigned int, flags)
2256{
e1834a32 2257 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2258}
2259
666547ff 2260static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2261 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2262{
89bddce5
SH
2263 struct compat_msghdr __user *msg_compat =
2264 (struct compat_msghdr __user *)msg;
1da177e4 2265 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2266 struct iovec *iov = iovstack;
1da177e4 2267 unsigned long cmsg_ptr;
2da62906 2268 int len;
08adb7da 2269 ssize_t err;
1da177e4
LT
2270
2271 /* kernel mode address */
230b1839 2272 struct sockaddr_storage addr;
1da177e4
LT
2273
2274 /* user mode address pointers */
2275 struct sockaddr __user *uaddr;
08adb7da 2276 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2277
08adb7da 2278 msg_sys->msg_name = &addr;
1da177e4 2279
f3d33426 2280 if (MSG_CMSG_COMPAT & flags)
08adb7da 2281 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2282 else
08adb7da 2283 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2284 if (err < 0)
da184284 2285 return err;
1da177e4 2286
a2e27255
ACM
2287 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2288 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2289
f3d33426
HFS
2290 /* We assume all kernel code knows the size of sockaddr_storage */
2291 msg_sys->msg_namelen = 0;
2292
1da177e4
LT
2293 if (sock->file->f_flags & O_NONBLOCK)
2294 flags |= MSG_DONTWAIT;
2da62906 2295 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4
LT
2296 if (err < 0)
2297 goto out_freeiov;
2298 len = err;
2299
2300 if (uaddr != NULL) {
43db362d 2301 err = move_addr_to_user(&addr,
a2e27255 2302 msg_sys->msg_namelen, uaddr,
89bddce5 2303 uaddr_len);
1da177e4
LT
2304 if (err < 0)
2305 goto out_freeiov;
2306 }
a2e27255 2307 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2308 COMPAT_FLAGS(msg));
1da177e4
LT
2309 if (err)
2310 goto out_freeiov;
2311 if (MSG_CMSG_COMPAT & flags)
a2e27255 2312 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2313 &msg_compat->msg_controllen);
2314 else
a2e27255 2315 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2316 &msg->msg_controllen);
2317 if (err)
2318 goto out_freeiov;
2319 err = len;
2320
2321out_freeiov:
da184284 2322 kfree(iov);
a2e27255
ACM
2323 return err;
2324}
2325
2326/*
2327 * BSD recvmsg interface
2328 */
2329
e1834a32
DB
2330long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2331 bool forbid_cmsg_compat)
a2e27255
ACM
2332{
2333 int fput_needed, err;
2334 struct msghdr msg_sys;
1be374a0
AL
2335 struct socket *sock;
2336
e1834a32
DB
2337 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2338 return -EINVAL;
2339
1be374a0 2340 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2341 if (!sock)
2342 goto out;
2343
a7526eb5 2344 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2345
6cb153ca 2346 fput_light(sock->file, fput_needed);
1da177e4
LT
2347out:
2348 return err;
2349}
2350
666547ff 2351SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2352 unsigned int, flags)
2353{
e1834a32 2354 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2355}
2356
a2e27255
ACM
2357/*
2358 * Linux recvmmsg interface
2359 */
2360
2361int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2362 unsigned int flags, struct timespec *timeout)
2363{
2364 int fput_needed, err, datagrams;
2365 struct socket *sock;
2366 struct mmsghdr __user *entry;
d7256d0e 2367 struct compat_mmsghdr __user *compat_entry;
a2e27255 2368 struct msghdr msg_sys;
766b9f92
DD
2369 struct timespec64 end_time;
2370 struct timespec64 timeout64;
a2e27255
ACM
2371
2372 if (timeout &&
2373 poll_select_set_timeout(&end_time, timeout->tv_sec,
2374 timeout->tv_nsec))
2375 return -EINVAL;
2376
2377 datagrams = 0;
2378
2379 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2380 if (!sock)
2381 return err;
2382
7797dc41
SHY
2383 if (likely(!(flags & MSG_ERRQUEUE))) {
2384 err = sock_error(sock->sk);
2385 if (err) {
2386 datagrams = err;
2387 goto out_put;
2388 }
e623a9e9 2389 }
a2e27255
ACM
2390
2391 entry = mmsg;
d7256d0e 2392 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2393
2394 while (datagrams < vlen) {
2395 /*
2396 * No need to ask LSM for more than the first datagram.
2397 */
d7256d0e 2398 if (MSG_CMSG_COMPAT & flags) {
666547ff 2399 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2400 &msg_sys, flags & ~MSG_WAITFORONE,
2401 datagrams);
d7256d0e
JMG
2402 if (err < 0)
2403 break;
2404 err = __put_user(err, &compat_entry->msg_len);
2405 ++compat_entry;
2406 } else {
a7526eb5 2407 err = ___sys_recvmsg(sock,
666547ff 2408 (struct user_msghdr __user *)entry,
a7526eb5
AL
2409 &msg_sys, flags & ~MSG_WAITFORONE,
2410 datagrams);
d7256d0e
JMG
2411 if (err < 0)
2412 break;
2413 err = put_user(err, &entry->msg_len);
2414 ++entry;
2415 }
2416
a2e27255
ACM
2417 if (err)
2418 break;
a2e27255
ACM
2419 ++datagrams;
2420
71c5c159
BB
2421 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2422 if (flags & MSG_WAITFORONE)
2423 flags |= MSG_DONTWAIT;
2424
a2e27255 2425 if (timeout) {
766b9f92
DD
2426 ktime_get_ts64(&timeout64);
2427 *timeout = timespec64_to_timespec(
2428 timespec64_sub(end_time, timeout64));
a2e27255
ACM
2429 if (timeout->tv_sec < 0) {
2430 timeout->tv_sec = timeout->tv_nsec = 0;
2431 break;
2432 }
2433
2434 /* Timeout, return less than vlen datagrams */
2435 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2436 break;
2437 }
2438
2439 /* Out of band data, return right away */
2440 if (msg_sys.msg_flags & MSG_OOB)
2441 break;
a78cb84c 2442 cond_resched();
a2e27255
ACM
2443 }
2444
a2e27255 2445 if (err == 0)
34b88a68
ACM
2446 goto out_put;
2447
2448 if (datagrams == 0) {
2449 datagrams = err;
2450 goto out_put;
2451 }
a2e27255 2452
34b88a68
ACM
2453 /*
2454 * We may return less entries than requested (vlen) if the
2455 * sock is non block and there aren't enough datagrams...
2456 */
2457 if (err != -EAGAIN) {
a2e27255 2458 /*
34b88a68
ACM
2459 * ... or if recvmsg returns an error after we
2460 * received some datagrams, where we record the
2461 * error to return on the next call or if the
2462 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2463 */
34b88a68 2464 sock->sk->sk_err = -err;
a2e27255 2465 }
34b88a68
ACM
2466out_put:
2467 fput_light(sock->file, fput_needed);
a2e27255 2468
34b88a68 2469 return datagrams;
a2e27255
ACM
2470}
2471
1255e269
DB
2472static int do_sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2473 unsigned int vlen, unsigned int flags,
2474 struct timespec __user *timeout)
a2e27255
ACM
2475{
2476 int datagrams;
2477 struct timespec timeout_sys;
2478
1be374a0
AL
2479 if (flags & MSG_CMSG_COMPAT)
2480 return -EINVAL;
2481
a2e27255
ACM
2482 if (!timeout)
2483 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2484
2485 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2486 return -EFAULT;
2487
2488 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2489
2490 if (datagrams > 0 &&
2491 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2492 datagrams = -EFAULT;
2493
2494 return datagrams;
2495}
2496
1255e269
DB
2497SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2498 unsigned int, vlen, unsigned int, flags,
2499 struct timespec __user *, timeout)
2500{
2501 return do_sys_recvmmsg(fd, mmsg, vlen, flags, timeout);
2502}
2503
a2e27255 2504#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2505/* Argument list sizes for sys_socketcall */
2506#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2507static const unsigned char nargs[21] = {
c6d409cf
ED
2508 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2509 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2510 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2511 AL(4), AL(5), AL(4)
89bddce5
SH
2512};
2513
1da177e4
LT
2514#undef AL
2515
2516/*
89bddce5 2517 * System call vectors.
1da177e4
LT
2518 *
2519 * Argument checking cleaned up. Saved 20% in size.
2520 * This function doesn't need to set the kernel lock because
89bddce5 2521 * it is set by the callees.
1da177e4
LT
2522 */
2523
3e0fa65f 2524SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2525{
2950fa9d 2526 unsigned long a[AUDITSC_ARGS];
89bddce5 2527 unsigned long a0, a1;
1da177e4 2528 int err;
47379052 2529 unsigned int len;
1da177e4 2530
228e548e 2531 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2532 return -EINVAL;
c8e8cd57 2533 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2534
47379052
AV
2535 len = nargs[call];
2536 if (len > sizeof(a))
2537 return -EINVAL;
2538
1da177e4 2539 /* copy_from_user should be SMP safe. */
47379052 2540 if (copy_from_user(a, args, len))
1da177e4 2541 return -EFAULT;
3ec3b2fb 2542
2950fa9d
CG
2543 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2544 if (err)
2545 return err;
3ec3b2fb 2546
89bddce5
SH
2547 a0 = a[0];
2548 a1 = a[1];
2549
2550 switch (call) {
2551 case SYS_SOCKET:
9d6a15c3 2552 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
2553 break;
2554 case SYS_BIND:
a87d35d8 2555 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2556 break;
2557 case SYS_CONNECT:
1387c2c2 2558 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2559 break;
2560 case SYS_LISTEN:
25e290ee 2561 err = __sys_listen(a0, a1);
89bddce5
SH
2562 break;
2563 case SYS_ACCEPT:
4541e805
DB
2564 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2565 (int __user *)a[2], 0);
89bddce5
SH
2566 break;
2567 case SYS_GETSOCKNAME:
2568 err =
8882a107
DB
2569 __sys_getsockname(a0, (struct sockaddr __user *)a1,
2570 (int __user *)a[2]);
89bddce5
SH
2571 break;
2572 case SYS_GETPEERNAME:
2573 err =
b21c8f83
DB
2574 __sys_getpeername(a0, (struct sockaddr __user *)a1,
2575 (int __user *)a[2]);
89bddce5
SH
2576 break;
2577 case SYS_SOCKETPAIR:
6debc8d8 2578 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
2579 break;
2580 case SYS_SEND:
f3bf896b
DB
2581 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2582 NULL, 0);
89bddce5
SH
2583 break;
2584 case SYS_SENDTO:
211b634b
DB
2585 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2586 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
2587 break;
2588 case SYS_RECV:
d27e9afc
DB
2589 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2590 NULL, NULL);
89bddce5
SH
2591 break;
2592 case SYS_RECVFROM:
7a09e1eb
DB
2593 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2594 (struct sockaddr __user *)a[4],
2595 (int __user *)a[5]);
89bddce5
SH
2596 break;
2597 case SYS_SHUTDOWN:
005a1aea 2598 err = __sys_shutdown(a0, a1);
89bddce5
SH
2599 break;
2600 case SYS_SETSOCKOPT:
cc36dca0
DB
2601 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
2602 a[4]);
89bddce5
SH
2603 break;
2604 case SYS_GETSOCKOPT:
2605 err =
13a2d70e
DB
2606 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2607 (int __user *)a[4]);
89bddce5
SH
2608 break;
2609 case SYS_SENDMSG:
e1834a32
DB
2610 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
2611 a[2], true);
89bddce5 2612 break;
228e548e 2613 case SYS_SENDMMSG:
e1834a32
DB
2614 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2615 a[3], true);
228e548e 2616 break;
89bddce5 2617 case SYS_RECVMSG:
e1834a32
DB
2618 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
2619 a[2], true);
89bddce5 2620 break;
a2e27255 2621 case SYS_RECVMMSG:
1255e269
DB
2622 err = do_sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2623 a[3], (struct timespec __user *)a[4]);
a2e27255 2624 break;
de11defe 2625 case SYS_ACCEPT4:
4541e805
DB
2626 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2627 (int __user *)a[2], a[3]);
aaca0bdc 2628 break;
89bddce5
SH
2629 default:
2630 err = -EINVAL;
2631 break;
1da177e4
LT
2632 }
2633 return err;
2634}
2635
89bddce5 2636#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2637
55737fda
SH
2638/**
2639 * sock_register - add a socket protocol handler
2640 * @ops: description of protocol
2641 *
1da177e4
LT
2642 * This function is called by a protocol handler that wants to
2643 * advertise its address family, and have it linked into the
e793c0f7 2644 * socket interface. The value ops->family corresponds to the
55737fda 2645 * socket system call protocol family.
1da177e4 2646 */
f0fd27d4 2647int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2648{
2649 int err;
2650
2651 if (ops->family >= NPROTO) {
3410f22e 2652 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2653 return -ENOBUFS;
2654 }
55737fda
SH
2655
2656 spin_lock(&net_family_lock);
190683a9
ED
2657 if (rcu_dereference_protected(net_families[ops->family],
2658 lockdep_is_held(&net_family_lock)))
55737fda
SH
2659 err = -EEXIST;
2660 else {
cf778b00 2661 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2662 err = 0;
2663 }
55737fda
SH
2664 spin_unlock(&net_family_lock);
2665
3410f22e 2666 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2667 return err;
2668}
c6d409cf 2669EXPORT_SYMBOL(sock_register);
1da177e4 2670
55737fda
SH
2671/**
2672 * sock_unregister - remove a protocol handler
2673 * @family: protocol family to remove
2674 *
1da177e4
LT
2675 * This function is called by a protocol handler that wants to
2676 * remove its address family, and have it unlinked from the
55737fda
SH
2677 * new socket creation.
2678 *
2679 * If protocol handler is a module, then it can use module reference
2680 * counts to protect against new references. If protocol handler is not
2681 * a module then it needs to provide its own protection in
2682 * the ops->create routine.
1da177e4 2683 */
f0fd27d4 2684void sock_unregister(int family)
1da177e4 2685{
f0fd27d4 2686 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2687
55737fda 2688 spin_lock(&net_family_lock);
a9b3cd7f 2689 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2690 spin_unlock(&net_family_lock);
2691
2692 synchronize_rcu();
2693
3410f22e 2694 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2695}
c6d409cf 2696EXPORT_SYMBOL(sock_unregister);
1da177e4 2697
bf2ae2e4
XL
2698bool sock_is_registered(int family)
2699{
e978de7a
JC
2700 return family < NPROTO &&
2701 rcu_access_pointer(net_families[array_index_nospec(family, NPROTO)]);
bf2ae2e4
XL
2702}
2703
77d76ea3 2704static int __init sock_init(void)
1da177e4 2705{
b3e19d92 2706 int err;
2ca794e5
EB
2707 /*
2708 * Initialize the network sysctl infrastructure.
2709 */
2710 err = net_sysctl_init();
2711 if (err)
2712 goto out;
b3e19d92 2713
1da177e4 2714 /*
89bddce5 2715 * Initialize skbuff SLAB cache
1da177e4
LT
2716 */
2717 skb_init();
1da177e4
LT
2718
2719 /*
89bddce5 2720 * Initialize the protocols module.
1da177e4
LT
2721 */
2722
2723 init_inodecache();
b3e19d92
NP
2724
2725 err = register_filesystem(&sock_fs_type);
2726 if (err)
2727 goto out_fs;
1da177e4 2728 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2729 if (IS_ERR(sock_mnt)) {
2730 err = PTR_ERR(sock_mnt);
2731 goto out_mount;
2732 }
77d76ea3
AK
2733
2734 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2735 */
2736
2737#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2738 err = netfilter_init();
2739 if (err)
2740 goto out;
1da177e4 2741#endif
cbeb321a 2742
408eccce 2743 ptp_classifier_init();
c1f19b51 2744
b3e19d92
NP
2745out:
2746 return err;
2747
2748out_mount:
2749 unregister_filesystem(&sock_fs_type);
2750out_fs:
2751 goto out;
1da177e4
LT
2752}
2753
77d76ea3
AK
2754core_initcall(sock_init); /* early initcall */
2755
1da177e4
LT
2756#ifdef CONFIG_PROC_FS
2757void socket_seq_show(struct seq_file *seq)
2758{
648845ab
TZ
2759 seq_printf(seq, "sockets: used %d\n",
2760 sock_inuse_get(seq->private));
1da177e4 2761}
89bddce5 2762#endif /* CONFIG_PROC_FS */
1da177e4 2763
89bbfc95 2764#ifdef CONFIG_COMPAT
6b96018b 2765static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2766 unsigned int cmd, void __user *up)
7a229387 2767{
7a229387
AB
2768 mm_segment_t old_fs = get_fs();
2769 struct timeval ktv;
2770 int err;
2771
2772 set_fs(KERNEL_DS);
6b96018b 2773 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2774 set_fs(old_fs);
644595f8 2775 if (!err)
ed6fe9d6 2776 err = compat_put_timeval(&ktv, up);
644595f8 2777
7a229387
AB
2778 return err;
2779}
2780
6b96018b 2781static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2782 unsigned int cmd, void __user *up)
7a229387 2783{
7a229387
AB
2784 mm_segment_t old_fs = get_fs();
2785 struct timespec kts;
2786 int err;
2787
2788 set_fs(KERNEL_DS);
6b96018b 2789 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2790 set_fs(old_fs);
644595f8 2791 if (!err)
ed6fe9d6 2792 err = compat_put_timespec(&kts, up);
644595f8 2793
7a229387
AB
2794 return err;
2795}
2796
36fd633e 2797static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2798{
6b96018b 2799 struct compat_ifconf ifc32;
7a229387 2800 struct ifconf ifc;
7a229387
AB
2801 int err;
2802
6b96018b 2803 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2804 return -EFAULT;
2805
36fd633e
AV
2806 ifc.ifc_len = ifc32.ifc_len;
2807 ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
7a229387 2808
36fd633e
AV
2809 rtnl_lock();
2810 err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
2811 rtnl_unlock();
7a229387
AB
2812 if (err)
2813 return err;
2814
36fd633e 2815 ifc32.ifc_len = ifc.ifc_len;
6b96018b 2816 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2817 return -EFAULT;
2818
2819 return 0;
2820}
2821
6b96018b 2822static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2823{
3a7da39d
BH
2824 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2825 bool convert_in = false, convert_out = false;
44c02a2c
AV
2826 size_t buf_size = 0;
2827 struct ethtool_rxnfc __user *rxnfc = NULL;
2828 struct ifreq ifr;
3a7da39d
BH
2829 u32 rule_cnt = 0, actual_rule_cnt;
2830 u32 ethcmd;
7a229387 2831 u32 data;
3a7da39d 2832 int ret;
7a229387 2833
3a7da39d
BH
2834 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2835 return -EFAULT;
7a229387 2836
3a7da39d
BH
2837 compat_rxnfc = compat_ptr(data);
2838
2839 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2840 return -EFAULT;
2841
3a7da39d
BH
2842 /* Most ethtool structures are defined without padding.
2843 * Unfortunately struct ethtool_rxnfc is an exception.
2844 */
2845 switch (ethcmd) {
2846 default:
2847 break;
2848 case ETHTOOL_GRXCLSRLALL:
2849 /* Buffer size is variable */
2850 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2851 return -EFAULT;
2852 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2853 return -ENOMEM;
2854 buf_size += rule_cnt * sizeof(u32);
2855 /* fall through */
2856 case ETHTOOL_GRXRINGS:
2857 case ETHTOOL_GRXCLSRLCNT:
2858 case ETHTOOL_GRXCLSRULE:
55664f32 2859 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2860 convert_out = true;
2861 /* fall through */
2862 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2863 buf_size += sizeof(struct ethtool_rxnfc);
2864 convert_in = true;
44c02a2c 2865 rxnfc = compat_alloc_user_space(buf_size);
3a7da39d
BH
2866 break;
2867 }
2868
44c02a2c 2869 if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2870 return -EFAULT;
2871
44c02a2c 2872 ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
7a229387 2873
3a7da39d 2874 if (convert_in) {
127fe533 2875 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2876 * fs.ring_cookie and at the end of fs, but nowhere else.
2877 */
127fe533
AD
2878 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2879 sizeof(compat_rxnfc->fs.m_ext) !=
2880 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2881 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2882 BUILD_BUG_ON(
2883 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2884 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2885 offsetof(struct ethtool_rxnfc, fs.location) -
2886 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2887
2888 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2889 (void __user *)(&rxnfc->fs.m_ext + 1) -
2890 (void __user *)rxnfc) ||
3a7da39d
BH
2891 copy_in_user(&rxnfc->fs.ring_cookie,
2892 &compat_rxnfc->fs.ring_cookie,
954b1244
SH
2893 (void __user *)(&rxnfc->fs.location + 1) -
2894 (void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2895 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2896 sizeof(rxnfc->rule_cnt)))
2897 return -EFAULT;
2898 }
2899
44c02a2c 2900 ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
3a7da39d
BH
2901 if (ret)
2902 return ret;
2903
2904 if (convert_out) {
2905 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2906 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2907 (const void __user *)rxnfc) ||
3a7da39d
BH
2908 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2909 &rxnfc->fs.ring_cookie,
954b1244
SH
2910 (const void __user *)(&rxnfc->fs.location + 1) -
2911 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2912 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2913 sizeof(rxnfc->rule_cnt)))
2914 return -EFAULT;
2915
2916 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2917 /* As an optimisation, we only copy the actual
2918 * number of rules that the underlying
2919 * function returned. Since Mallory might
2920 * change the rule count in user memory, we
2921 * check that it is less than the rule count
2922 * originally given (as the user buffer size),
2923 * which has been range-checked.
2924 */
2925 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2926 return -EFAULT;
2927 if (actual_rule_cnt < rule_cnt)
2928 rule_cnt = actual_rule_cnt;
2929 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2930 &rxnfc->rule_locs[0],
2931 rule_cnt * sizeof(u32)))
2932 return -EFAULT;
2933 }
2934 }
2935
2936 return 0;
7a229387
AB
2937}
2938
7a50a240
AB
2939static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2940{
7a50a240 2941 compat_uptr_t uptr32;
44c02a2c
AV
2942 struct ifreq ifr;
2943 void __user *saved;
2944 int err;
7a50a240 2945
44c02a2c 2946 if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
7a50a240
AB
2947 return -EFAULT;
2948
2949 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2950 return -EFAULT;
2951
44c02a2c
AV
2952 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
2953 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 2954
44c02a2c
AV
2955 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
2956 if (!err) {
2957 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
2958 if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
2959 err = -EFAULT;
ccbd6a5a 2960 }
44c02a2c 2961 return err;
7a229387
AB
2962}
2963
590d4693
BH
2964/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
2965static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 2966 struct compat_ifreq __user *u_ifreq32)
7a229387 2967{
44c02a2c 2968 struct ifreq ifreq;
7a229387
AB
2969 u32 data32;
2970
44c02a2c 2971 if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
7a229387 2972 return -EFAULT;
44c02a2c 2973 if (get_user(data32, &u_ifreq32->ifr_data))
7a229387 2974 return -EFAULT;
44c02a2c 2975 ifreq.ifr_data = compat_ptr(data32);
7a229387 2976
44c02a2c 2977 return dev_ioctl(net, cmd, &ifreq, NULL);
7a229387
AB
2978}
2979
a2116ed2
AB
2980static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2981 struct compat_ifreq __user *uifr32)
2982{
2983 struct ifreq ifr;
2984 struct compat_ifmap __user *uifmap32;
a2116ed2
AB
2985 int err;
2986
2987 uifmap32 = &uifr32->ifr_ifru.ifru_map;
2988 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
2989 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2990 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2991 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2992 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
2993 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
2994 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2995 if (err)
2996 return -EFAULT;
2997
44c02a2c 2998 err = dev_ioctl(net, cmd, &ifr, NULL);
a2116ed2
AB
2999
3000 if (cmd == SIOCGIFMAP && !err) {
3001 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3002 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3003 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3004 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3005 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3006 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3007 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3008 if (err)
3009 err = -EFAULT;
3010 }
3011 return err;
3012}
3013
7a229387 3014struct rtentry32 {
c6d409cf 3015 u32 rt_pad1;
7a229387
AB
3016 struct sockaddr rt_dst; /* target address */
3017 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3018 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3019 unsigned short rt_flags;
3020 short rt_pad2;
3021 u32 rt_pad3;
3022 unsigned char rt_tos;
3023 unsigned char rt_class;
3024 short rt_pad4;
3025 short rt_metric; /* +1 for binary compatibility! */
7a229387 3026 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3027 u32 rt_mtu; /* per route MTU/Window */
3028 u32 rt_window; /* Window clamping */
7a229387
AB
3029 unsigned short rt_irtt; /* Initial RTT */
3030};
3031
3032struct in6_rtmsg32 {
3033 struct in6_addr rtmsg_dst;
3034 struct in6_addr rtmsg_src;
3035 struct in6_addr rtmsg_gateway;
3036 u32 rtmsg_type;
3037 u16 rtmsg_dst_len;
3038 u16 rtmsg_src_len;
3039 u32 rtmsg_metric;
3040 u32 rtmsg_info;
3041 u32 rtmsg_flags;
3042 s32 rtmsg_ifindex;
3043};
3044
6b96018b
AB
3045static int routing_ioctl(struct net *net, struct socket *sock,
3046 unsigned int cmd, void __user *argp)
7a229387
AB
3047{
3048 int ret;
3049 void *r = NULL;
3050 struct in6_rtmsg r6;
3051 struct rtentry r4;
3052 char devname[16];
3053 u32 rtdev;
3054 mm_segment_t old_fs = get_fs();
3055
6b96018b
AB
3056 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3057 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3058 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3059 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3060 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3061 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3062 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3063 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3064 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3065 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3066 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3067
3068 r = (void *) &r6;
3069 } else { /* ipv4 */
6b96018b 3070 struct rtentry32 __user *ur4 = argp;
c6d409cf 3071 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3072 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3073 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3074 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3075 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3076 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3077 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3078 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3079 if (rtdev) {
c6d409cf 3080 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3081 r4.rt_dev = (char __user __force *)devname;
3082 devname[15] = 0;
7a229387
AB
3083 } else
3084 r4.rt_dev = NULL;
3085
3086 r = (void *) &r4;
3087 }
3088
3089 if (ret) {
3090 ret = -EFAULT;
3091 goto out;
3092 }
3093
c6d409cf 3094 set_fs(KERNEL_DS);
6b96018b 3095 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3096 set_fs(old_fs);
7a229387
AB
3097
3098out:
7a229387
AB
3099 return ret;
3100}
3101
3102/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3103 * for some operations; this forces use of the newer bridge-utils that
25985edc 3104 * use compatible ioctls
7a229387 3105 */
6b96018b 3106static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3107{
6b96018b 3108 compat_ulong_t tmp;
7a229387 3109
6b96018b 3110 if (get_user(tmp, argp))
7a229387
AB
3111 return -EFAULT;
3112 if (tmp == BRCTL_GET_VERSION)
3113 return BRCTL_VERSION + 1;
3114 return -EINVAL;
3115}
3116
6b96018b
AB
3117static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3118 unsigned int cmd, unsigned long arg)
3119{
3120 void __user *argp = compat_ptr(arg);
3121 struct sock *sk = sock->sk;
3122 struct net *net = sock_net(sk);
7a229387 3123
6b96018b 3124 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3125 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3126
3127 switch (cmd) {
3128 case SIOCSIFBR:
3129 case SIOCGIFBR:
3130 return old_bridge_ioctl(argp);
6b96018b 3131 case SIOCGIFCONF:
36fd633e 3132 return compat_dev_ifconf(net, argp);
6b96018b
AB
3133 case SIOCETHTOOL:
3134 return ethtool_ioctl(net, argp);
7a50a240
AB
3135 case SIOCWANDEV:
3136 return compat_siocwandev(net, argp);
a2116ed2
AB
3137 case SIOCGIFMAP:
3138 case SIOCSIFMAP:
3139 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3140 case SIOCADDRT:
3141 case SIOCDELRT:
3142 return routing_ioctl(net, sock, cmd, argp);
3143 case SIOCGSTAMP:
3144 return do_siocgstamp(net, sock, cmd, argp);
3145 case SIOCGSTAMPNS:
3146 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3147 case SIOCBONDSLAVEINFOQUERY:
3148 case SIOCBONDINFOQUERY:
a2116ed2 3149 case SIOCSHWTSTAMP:
fd468c74 3150 case SIOCGHWTSTAMP:
590d4693 3151 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3152
3153 case FIOSETOWN:
3154 case SIOCSPGRP:
3155 case FIOGETOWN:
3156 case SIOCGPGRP:
3157 case SIOCBRADDBR:
3158 case SIOCBRDELBR:
3159 case SIOCGIFVLAN:
3160 case SIOCSIFVLAN:
3161 case SIOCADDDLCI:
3162 case SIOCDELDLCI:
c62cce2c 3163 case SIOCGSKNS:
6b96018b
AB
3164 return sock_ioctl(file, cmd, arg);
3165
3166 case SIOCGIFFLAGS:
3167 case SIOCSIFFLAGS:
3168 case SIOCGIFMETRIC:
3169 case SIOCSIFMETRIC:
3170 case SIOCGIFMTU:
3171 case SIOCSIFMTU:
3172 case SIOCGIFMEM:
3173 case SIOCSIFMEM:
3174 case SIOCGIFHWADDR:
3175 case SIOCSIFHWADDR:
3176 case SIOCADDMULTI:
3177 case SIOCDELMULTI:
3178 case SIOCGIFINDEX:
6b96018b
AB
3179 case SIOCGIFADDR:
3180 case SIOCSIFADDR:
3181 case SIOCSIFHWBROADCAST:
6b96018b 3182 case SIOCDIFADDR:
6b96018b
AB
3183 case SIOCGIFBRDADDR:
3184 case SIOCSIFBRDADDR:
3185 case SIOCGIFDSTADDR:
3186 case SIOCSIFDSTADDR:
3187 case SIOCGIFNETMASK:
3188 case SIOCSIFNETMASK:
3189 case SIOCSIFPFLAGS:
3190 case SIOCGIFPFLAGS:
3191 case SIOCGIFTXQLEN:
3192 case SIOCSIFTXQLEN:
3193 case SIOCBRADDIF:
3194 case SIOCBRDELIF:
9177efd3
AB
3195 case SIOCSIFNAME:
3196 case SIOCGMIIPHY:
3197 case SIOCGMIIREG:
3198 case SIOCSMIIREG:
6b96018b
AB
3199 case SIOCSARP:
3200 case SIOCGARP:
3201 case SIOCDARP:
6b96018b 3202 case SIOCATMARK:
f92d4fc9
AV
3203 case SIOCBONDENSLAVE:
3204 case SIOCBONDRELEASE:
3205 case SIOCBONDSETHWADDR:
3206 case SIOCBONDCHANGEACTIVE:
4cf808e7 3207 case SIOCGIFNAME:
9177efd3
AB
3208 return sock_do_ioctl(net, sock, cmd, arg);
3209 }
3210
6b96018b
AB
3211 return -ENOIOCTLCMD;
3212}
7a229387 3213
95c96174 3214static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3215 unsigned long arg)
89bbfc95
SP
3216{
3217 struct socket *sock = file->private_data;
3218 int ret = -ENOIOCTLCMD;
87de87d5
DM
3219 struct sock *sk;
3220 struct net *net;
3221
3222 sk = sock->sk;
3223 net = sock_net(sk);
89bbfc95
SP
3224
3225 if (sock->ops->compat_ioctl)
3226 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3227
87de87d5
DM
3228 if (ret == -ENOIOCTLCMD &&
3229 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3230 ret = compat_wext_handle_ioctl(net, cmd, arg);
3231
6b96018b
AB
3232 if (ret == -ENOIOCTLCMD)
3233 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3234
89bbfc95
SP
3235 return ret;
3236}
3237#endif
3238
ac5a488e
SS
3239int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3240{
3241 return sock->ops->bind(sock, addr, addrlen);
3242}
c6d409cf 3243EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3244
3245int kernel_listen(struct socket *sock, int backlog)
3246{
3247 return sock->ops->listen(sock, backlog);
3248}
c6d409cf 3249EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3250
3251int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3252{
3253 struct sock *sk = sock->sk;
3254 int err;
3255
3256 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3257 newsock);
3258 if (err < 0)
3259 goto done;
3260
cdfbabfb 3261 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3262 if (err < 0) {
3263 sock_release(*newsock);
fa8705b0 3264 *newsock = NULL;
ac5a488e
SS
3265 goto done;
3266 }
3267
3268 (*newsock)->ops = sock->ops;
1b08534e 3269 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3270
3271done:
3272 return err;
3273}
c6d409cf 3274EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3275
3276int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3277 int flags)
ac5a488e
SS
3278{
3279 return sock->ops->connect(sock, addr, addrlen, flags);
3280}
c6d409cf 3281EXPORT_SYMBOL(kernel_connect);
ac5a488e 3282
9b2c45d4 3283int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3284{
9b2c45d4 3285 return sock->ops->getname(sock, addr, 0);
ac5a488e 3286}
c6d409cf 3287EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3288
9b2c45d4 3289int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3290{
9b2c45d4 3291 return sock->ops->getname(sock, addr, 1);
ac5a488e 3292}
c6d409cf 3293EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3294
3295int kernel_getsockopt(struct socket *sock, int level, int optname,
3296 char *optval, int *optlen)
3297{
3298 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3299 char __user *uoptval;
3300 int __user *uoptlen;
ac5a488e
SS
3301 int err;
3302
fb8621bb
NK
3303 uoptval = (char __user __force *) optval;
3304 uoptlen = (int __user __force *) optlen;
3305
ac5a488e
SS
3306 set_fs(KERNEL_DS);
3307 if (level == SOL_SOCKET)
fb8621bb 3308 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3309 else
fb8621bb
NK
3310 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3311 uoptlen);
ac5a488e
SS
3312 set_fs(oldfs);
3313 return err;
3314}
c6d409cf 3315EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3316
3317int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3318 char *optval, unsigned int optlen)
ac5a488e
SS
3319{
3320 mm_segment_t oldfs = get_fs();
fb8621bb 3321 char __user *uoptval;
ac5a488e
SS
3322 int err;
3323
fb8621bb
NK
3324 uoptval = (char __user __force *) optval;
3325
ac5a488e
SS
3326 set_fs(KERNEL_DS);
3327 if (level == SOL_SOCKET)
fb8621bb 3328 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3329 else
fb8621bb 3330 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3331 optlen);
3332 set_fs(oldfs);
3333 return err;
3334}
c6d409cf 3335EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3336
3337int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3338 size_t size, int flags)
3339{
3340 if (sock->ops->sendpage)
3341 return sock->ops->sendpage(sock, page, offset, size, flags);
3342
3343 return sock_no_sendpage(sock, page, offset, size, flags);
3344}
c6d409cf 3345EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3346
306b13eb
TH
3347int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3348 size_t size, int flags)
3349{
3350 struct socket *sock = sk->sk_socket;
3351
3352 if (sock->ops->sendpage_locked)
3353 return sock->ops->sendpage_locked(sk, page, offset, size,
3354 flags);
3355
3356 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3357}
3358EXPORT_SYMBOL(kernel_sendpage_locked);
3359
91cf45f0
TM
3360int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3361{
3362 return sock->ops->shutdown(sock, how);
3363}
91cf45f0 3364EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075
P
3365
3366/* This routine returns the IP overhead imposed by a socket i.e.
3367 * the length of the underlying IP header, depending on whether
3368 * this is an IPv4 or IPv6 socket and the length from IP options turned
57240d00 3369 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075
P
3370 */
3371u32 kernel_sock_ip_overhead(struct sock *sk)
3372{
3373 struct inet_sock *inet;
3374 struct ip_options_rcu *opt;
3375 u32 overhead = 0;
113c3075
P
3376#if IS_ENABLED(CONFIG_IPV6)
3377 struct ipv6_pinfo *np;
3378 struct ipv6_txoptions *optv6 = NULL;
3379#endif /* IS_ENABLED(CONFIG_IPV6) */
3380
3381 if (!sk)
3382 return overhead;
3383
113c3075
P
3384 switch (sk->sk_family) {
3385 case AF_INET:
3386 inet = inet_sk(sk);
3387 overhead += sizeof(struct iphdr);
3388 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3389 sock_owned_by_user(sk));
113c3075
P
3390 if (opt)
3391 overhead += opt->opt.optlen;
3392 return overhead;
3393#if IS_ENABLED(CONFIG_IPV6)
3394 case AF_INET6:
3395 np = inet6_sk(sk);
3396 overhead += sizeof(struct ipv6hdr);
3397 if (np)
3398 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3399 sock_owned_by_user(sk));
113c3075
P
3400 if (optv6)
3401 overhead += (optv6->opt_flen + optv6->opt_nflen);
3402 return overhead;
3403#endif /* IS_ENABLED(CONFIG_IPV6) */
3404 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3405 return overhead;
3406 }
3407}
3408EXPORT_SYMBOL(kernel_sock_ip_overhead);