Merge branch 'overlayfs-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszer...
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
1da177e4
LT
92
93#include <asm/uaccess.h>
94#include <asm/unistd.h>
95
96#include <net/compat.h>
87de87d5 97#include <net/wext.h>
f8451725 98#include <net/cls_cgroup.h>
1da177e4
LT
99
100#include <net/sock.h>
101#include <linux/netfilter.h>
102
6b96018b
AB
103#include <linux/if_tun.h>
104#include <linux/ipv6_route.h>
105#include <linux/route.h>
6b96018b
AB
106#include <linux/sockios.h>
107#include <linux/atalk.h>
076bb0c8 108#include <net/busy_poll.h>
f24b9be5 109#include <linux/errqueue.h>
06021292 110
e0d1095a 111#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
112unsigned int sysctl_net_busy_read __read_mostly;
113unsigned int sysctl_net_busy_poll __read_mostly;
06021292 114#endif
6b96018b 115
8ae5e030
AV
116static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
117static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 118static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
119
120static int sock_close(struct inode *inode, struct file *file);
121static unsigned int sock_poll(struct file *file,
122 struct poll_table_struct *wait);
89bddce5 123static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
124#ifdef CONFIG_COMPAT
125static long compat_sock_ioctl(struct file *file,
89bddce5 126 unsigned int cmd, unsigned long arg);
89bbfc95 127#endif
1da177e4 128static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
129static ssize_t sock_sendpage(struct file *file, struct page *page,
130 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 132 struct pipe_inode_info *pipe, size_t len,
9c55e01c 133 unsigned int flags);
1da177e4 134
1da177e4
LT
135/*
136 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
137 * in the operation structures but are done directly via the socketcall() multiplexor.
138 */
139
da7071d7 140static const struct file_operations socket_file_ops = {
1da177e4
LT
141 .owner = THIS_MODULE,
142 .llseek = no_llseek,
8ae5e030
AV
143 .read_iter = sock_read_iter,
144 .write_iter = sock_write_iter,
1da177e4
LT
145 .poll = sock_poll,
146 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
147#ifdef CONFIG_COMPAT
148 .compat_ioctl = compat_sock_ioctl,
149#endif
1da177e4 150 .mmap = sock_mmap,
1da177e4
LT
151 .release = sock_close,
152 .fasync = sock_fasync,
5274f052
JA
153 .sendpage = sock_sendpage,
154 .splice_write = generic_splice_sendpage,
9c55e01c 155 .splice_read = sock_splice_read,
1da177e4
LT
156};
157
158/*
159 * The protocol list. Each protocol is registered in here.
160 */
161
1da177e4 162static DEFINE_SPINLOCK(net_family_lock);
190683a9 163static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 164
1da177e4
LT
165/*
166 * Statistics counters of the socket lists
167 */
168
c6d409cf 169static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
170
171/*
89bddce5
SH
172 * Support routines.
173 * Move socket addresses back and forth across the kernel/user
174 * divide and look after the messy bits.
1da177e4
LT
175 */
176
1da177e4
LT
177/**
178 * move_addr_to_kernel - copy a socket address into kernel space
179 * @uaddr: Address in user space
180 * @kaddr: Address in kernel space
181 * @ulen: Length in user space
182 *
183 * The address is copied into kernel space. If the provided address is
184 * too long an error code of -EINVAL is returned. If the copy gives
185 * invalid addresses -EFAULT is returned. On a success 0 is returned.
186 */
187
43db362d 188int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 189{
230b1839 190 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 191 return -EINVAL;
89bddce5 192 if (ulen == 0)
1da177e4 193 return 0;
89bddce5 194 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 195 return -EFAULT;
3ec3b2fb 196 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
197}
198
199/**
200 * move_addr_to_user - copy an address to user space
201 * @kaddr: kernel space address
202 * @klen: length of address in kernel
203 * @uaddr: user space address
204 * @ulen: pointer to user length field
205 *
206 * The value pointed to by ulen on entry is the buffer length available.
207 * This is overwritten with the buffer space used. -EINVAL is returned
208 * if an overlong buffer is specified or a negative buffer size. -EFAULT
209 * is returned if either the buffer or the length field are not
210 * accessible.
211 * After copying the data up to the limit the user specifies, the true
212 * length of the data is written over the length limit the user
213 * specified. Zero is returned for a success.
214 */
89bddce5 215
43db362d 216static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 217 void __user *uaddr, int __user *ulen)
1da177e4
LT
218{
219 int err;
220 int len;
221
68c6beb3 222 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
223 err = get_user(len, ulen);
224 if (err)
1da177e4 225 return err;
89bddce5
SH
226 if (len > klen)
227 len = klen;
68c6beb3 228 if (len < 0)
1da177e4 229 return -EINVAL;
89bddce5 230 if (len) {
d6fe3945
SG
231 if (audit_sockaddr(klen, kaddr))
232 return -ENOMEM;
89bddce5 233 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
234 return -EFAULT;
235 }
236 /*
89bddce5
SH
237 * "fromlen shall refer to the value before truncation.."
238 * 1003.1g
1da177e4
LT
239 */
240 return __put_user(klen, ulen);
241}
242
e18b890b 243static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
244
245static struct inode *sock_alloc_inode(struct super_block *sb)
246{
247 struct socket_alloc *ei;
eaefd110 248 struct socket_wq *wq;
89bddce5 249
e94b1766 250 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
251 if (!ei)
252 return NULL;
eaefd110
ED
253 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
254 if (!wq) {
43815482
ED
255 kmem_cache_free(sock_inode_cachep, ei);
256 return NULL;
257 }
eaefd110
ED
258 init_waitqueue_head(&wq->wait);
259 wq->fasync_list = NULL;
574aab1e 260 wq->flags = 0;
eaefd110 261 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 262
1da177e4
LT
263 ei->socket.state = SS_UNCONNECTED;
264 ei->socket.flags = 0;
265 ei->socket.ops = NULL;
266 ei->socket.sk = NULL;
267 ei->socket.file = NULL;
1da177e4
LT
268
269 return &ei->vfs_inode;
270}
271
272static void sock_destroy_inode(struct inode *inode)
273{
43815482 274 struct socket_alloc *ei;
eaefd110 275 struct socket_wq *wq;
43815482
ED
276
277 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 278 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 279 kfree_rcu(wq, rcu);
43815482 280 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
281}
282
51cc5068 283static void init_once(void *foo)
1da177e4 284{
89bddce5 285 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 286
a35afb83 287 inode_init_once(&ei->vfs_inode);
1da177e4 288}
89bddce5 289
1da177e4
LT
290static int init_inodecache(void)
291{
292 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
293 sizeof(struct socket_alloc),
294 0,
295 (SLAB_HWCACHE_ALIGN |
296 SLAB_RECLAIM_ACCOUNT |
5d097056 297 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 298 init_once);
1da177e4
LT
299 if (sock_inode_cachep == NULL)
300 return -ENOMEM;
301 return 0;
302}
303
b87221de 304static const struct super_operations sockfs_ops = {
c6d409cf
ED
305 .alloc_inode = sock_alloc_inode,
306 .destroy_inode = sock_destroy_inode,
307 .statfs = simple_statfs,
1da177e4
LT
308};
309
c23fbb6b
ED
310/*
311 * sockfs_dname() is called from d_path().
312 */
313static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
314{
315 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 316 d_inode(dentry)->i_ino);
c23fbb6b
ED
317}
318
3ba13d17 319static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 320 .d_dname = sockfs_dname,
1da177e4
LT
321};
322
c74a1cbb
AV
323static struct dentry *sockfs_mount(struct file_system_type *fs_type,
324 int flags, const char *dev_name, void *data)
325{
326 return mount_pseudo(fs_type, "socket:", &sockfs_ops,
327 &sockfs_dentry_operations, SOCKFS_MAGIC);
328}
329
330static struct vfsmount *sock_mnt __read_mostly;
331
332static struct file_system_type sock_fs_type = {
333 .name = "sockfs",
334 .mount = sockfs_mount,
335 .kill_sb = kill_anon_super,
336};
337
1da177e4
LT
338/*
339 * Obtains the first available file descriptor and sets it up for use.
340 *
39d8c1b6
DM
341 * These functions create file structures and maps them to fd space
342 * of the current process. On success it returns file descriptor
1da177e4
LT
343 * and file struct implicitly stored in sock->file.
344 * Note that another thread may close file descriptor before we return
345 * from this function. We use the fact that now we do not refer
346 * to socket after mapping. If one day we will need it, this
347 * function will increment ref. count on file by 1.
348 *
349 * In any case returned fd MAY BE not valid!
350 * This race condition is unavoidable
351 * with shared fd spaces, we cannot solve it inside kernel,
352 * but we take care of internal coherence yet.
353 */
354
aab174f0 355struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 356{
7cbe66b6 357 struct qstr name = { .name = "" };
2c48b9c4 358 struct path path;
7cbe66b6 359 struct file *file;
1da177e4 360
600e1779
MY
361 if (dname) {
362 name.name = dname;
363 name.len = strlen(name.name);
364 } else if (sock->sk) {
365 name.name = sock->sk->sk_prot_creator->name;
366 name.len = strlen(name.name);
367 }
4b936885 368 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
28407630
AV
369 if (unlikely(!path.dentry))
370 return ERR_PTR(-ENOMEM);
2c48b9c4 371 path.mnt = mntget(sock_mnt);
39d8c1b6 372
2c48b9c4 373 d_instantiate(path.dentry, SOCK_INODE(sock));
39d8c1b6 374
2c48b9c4 375 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 376 &socket_file_ops);
b5ffe634 377 if (IS_ERR(file)) {
cc3808f8 378 /* drop dentry, keep inode */
c5ef6035 379 ihold(d_inode(path.dentry));
2c48b9c4 380 path_put(&path);
39b65252 381 return file;
cc3808f8
AV
382 }
383
384 sock->file = file;
77d27200 385 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 386 file->private_data = sock;
28407630 387 return file;
39d8c1b6 388}
56b31d1c 389EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 390
56b31d1c 391static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
392{
393 struct file *newfile;
28407630
AV
394 int fd = get_unused_fd_flags(flags);
395 if (unlikely(fd < 0))
396 return fd;
39d8c1b6 397
aab174f0 398 newfile = sock_alloc_file(sock, flags, NULL);
28407630 399 if (likely(!IS_ERR(newfile))) {
39d8c1b6 400 fd_install(fd, newfile);
28407630
AV
401 return fd;
402 }
7cbe66b6 403
28407630
AV
404 put_unused_fd(fd);
405 return PTR_ERR(newfile);
1da177e4
LT
406}
407
406a3c63 408struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 409{
6cb153ca
BL
410 if (file->f_op == &socket_file_ops)
411 return file->private_data; /* set in sock_map_fd */
412
23bb80d2
ED
413 *err = -ENOTSOCK;
414 return NULL;
6cb153ca 415}
406a3c63 416EXPORT_SYMBOL(sock_from_file);
6cb153ca 417
1da177e4 418/**
c6d409cf 419 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
420 * @fd: file handle
421 * @err: pointer to an error code return
422 *
423 * The file handle passed in is locked and the socket it is bound
424 * too is returned. If an error occurs the err pointer is overwritten
425 * with a negative errno code and NULL is returned. The function checks
426 * for both invalid handles and passing a handle which is not a socket.
427 *
428 * On a success the socket object pointer is returned.
429 */
430
431struct socket *sockfd_lookup(int fd, int *err)
432{
433 struct file *file;
1da177e4
LT
434 struct socket *sock;
435
89bddce5
SH
436 file = fget(fd);
437 if (!file) {
1da177e4
LT
438 *err = -EBADF;
439 return NULL;
440 }
89bddce5 441
6cb153ca
BL
442 sock = sock_from_file(file, err);
443 if (!sock)
1da177e4 444 fput(file);
6cb153ca
BL
445 return sock;
446}
c6d409cf 447EXPORT_SYMBOL(sockfd_lookup);
1da177e4 448
6cb153ca
BL
449static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
450{
00e188ef 451 struct fd f = fdget(fd);
6cb153ca
BL
452 struct socket *sock;
453
3672558c 454 *err = -EBADF;
00e188ef
AV
455 if (f.file) {
456 sock = sock_from_file(f.file, err);
457 if (likely(sock)) {
458 *fput_needed = f.flags;
6cb153ca 459 return sock;
00e188ef
AV
460 }
461 fdput(f);
1da177e4 462 }
6cb153ca 463 return NULL;
1da177e4
LT
464}
465
600e1779
MY
466#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
467#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
468#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
ce23e640 469static ssize_t sockfs_getxattr(struct dentry *dentry, struct inode *inode,
600e1779
MY
470 const char *name, void *value, size_t size)
471{
472 const char *proto_name;
473 size_t proto_size;
474 int error;
475
476 error = -ENODATA;
477 if (!strncmp(name, XATTR_NAME_SOCKPROTONAME, XATTR_NAME_SOCKPROTONAME_LEN)) {
478 proto_name = dentry->d_name.name;
479 proto_size = strlen(proto_name);
480
481 if (value) {
482 error = -ERANGE;
483 if (proto_size + 1 > size)
484 goto out;
485
486 strncpy(value, proto_name, proto_size + 1);
487 }
488 error = proto_size + 1;
489 }
490
491out:
492 return error;
493}
494
495static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
496 size_t size)
497{
498 ssize_t len;
499 ssize_t used = 0;
500
c5ef6035 501 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
502 if (len < 0)
503 return len;
504 used += len;
505 if (buffer) {
506 if (size < used)
507 return -ERANGE;
508 buffer += len;
509 }
510
511 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
512 used += len;
513 if (buffer) {
514 if (size < used)
515 return -ERANGE;
516 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
517 buffer += len;
518 }
519
520 return used;
521}
522
523static const struct inode_operations sockfs_inode_ops = {
524 .getxattr = sockfs_getxattr,
525 .listxattr = sockfs_listxattr,
526};
527
1da177e4
LT
528/**
529 * sock_alloc - allocate a socket
89bddce5 530 *
1da177e4
LT
531 * Allocate a new inode and socket object. The two are bound together
532 * and initialised. The socket is then returned. If we are out of inodes
533 * NULL is returned.
534 */
535
f4a00aac 536struct socket *sock_alloc(void)
1da177e4 537{
89bddce5
SH
538 struct inode *inode;
539 struct socket *sock;
1da177e4 540
a209dfc7 541 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
542 if (!inode)
543 return NULL;
544
545 sock = SOCKET_I(inode);
546
29a020d3 547 kmemcheck_annotate_bitfield(sock, type);
85fe4025 548 inode->i_ino = get_next_ino();
89bddce5 549 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
550 inode->i_uid = current_fsuid();
551 inode->i_gid = current_fsgid();
600e1779 552 inode->i_op = &sockfs_inode_ops;
1da177e4 553
19e8d69c 554 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
555 return sock;
556}
f4a00aac 557EXPORT_SYMBOL(sock_alloc);
1da177e4 558
1da177e4
LT
559/**
560 * sock_release - close a socket
561 * @sock: socket to close
562 *
563 * The socket is released from the protocol stack if it has a release
564 * callback, and the inode is then released if the socket is bound to
89bddce5 565 * an inode not a file.
1da177e4 566 */
89bddce5 567
1da177e4
LT
568void sock_release(struct socket *sock)
569{
570 if (sock->ops) {
571 struct module *owner = sock->ops->owner;
572
573 sock->ops->release(sock);
574 sock->ops = NULL;
575 module_put(owner);
576 }
577
eaefd110 578 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
3410f22e 579 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 580
19e8d69c 581 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
582 if (!sock->file) {
583 iput(SOCK_INODE(sock));
584 return;
585 }
89bddce5 586 sock->file = NULL;
1da177e4 587}
c6d409cf 588EXPORT_SYMBOL(sock_release);
1da177e4 589
c14ac945 590void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 591{
140c55d4
ED
592 u8 flags = *tx_flags;
593
c14ac945 594 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
595 flags |= SKBTX_HW_TSTAMP;
596
c14ac945 597 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
598 flags |= SKBTX_SW_TSTAMP;
599
c14ac945 600 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
601 flags |= SKBTX_SCHED_TSTAMP;
602
140c55d4 603 *tx_flags = flags;
20d49473 604}
67cc0d40 605EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 606
d8725c86 607static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 608{
01e97e65 609 int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
d8725c86
AV
610 BUG_ON(ret == -EIOCBQUEUED);
611 return ret;
1da177e4
LT
612}
613
d8725c86 614int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 615{
d8725c86 616 int err = security_socket_sendmsg(sock, msg,
01e97e65 617 msg_data_left(msg));
228e548e 618
d8725c86 619 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 620}
c6d409cf 621EXPORT_SYMBOL(sock_sendmsg);
1da177e4
LT
622
623int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
624 struct kvec *vec, size_t num, size_t size)
625{
6aa24814 626 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
d8725c86 627 return sock_sendmsg(sock, msg);
1da177e4 628}
c6d409cf 629EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 630
92f37fd2
ED
631/*
632 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
633 */
634void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
635 struct sk_buff *skb)
636{
20d49473 637 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
f24b9be5 638 struct scm_timestamping tss;
20d49473
PO
639 int empty = 1;
640 struct skb_shared_hwtstamps *shhwtstamps =
641 skb_hwtstamps(skb);
642
643 /* Race occurred between timestamp enabling and packet
644 receiving. Fill in the current time for now. */
645 if (need_software_tstamp && skb->tstamp.tv64 == 0)
646 __net_timestamp(skb);
647
648 if (need_software_tstamp) {
649 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
650 struct timeval tv;
651 skb_get_timestamp(skb, &tv);
652 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
653 sizeof(tv), &tv);
654 } else {
f24b9be5
WB
655 struct timespec ts;
656 skb_get_timestampns(skb, &ts);
20d49473 657 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
f24b9be5 658 sizeof(ts), &ts);
20d49473
PO
659 }
660 }
661
f24b9be5 662 memset(&tss, 0, sizeof(tss));
c199105d 663 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
f24b9be5 664 ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
20d49473 665 empty = 0;
4d276eb6 666 if (shhwtstamps &&
b9f40e21 667 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
f24b9be5 668 ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
4d276eb6 669 empty = 0;
20d49473
PO
670 if (!empty)
671 put_cmsg(msg, SOL_SOCKET,
f24b9be5 672 SCM_TIMESTAMPING, sizeof(tss), &tss);
92f37fd2 673}
7c81fd8b
ACM
674EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
675
6e3e939f
JB
676void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
677 struct sk_buff *skb)
678{
679 int ack;
680
681 if (!sock_flag(sk, SOCK_WIFI_STATUS))
682 return;
683 if (!skb->wifi_acked_valid)
684 return;
685
686 ack = skb->wifi_acked;
687
688 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
689}
690EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
691
11165f14 692static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
693 struct sk_buff *skb)
3b885787 694{
744d5a3e 695 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 696 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 697 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
698}
699
767dd033 700void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
701 struct sk_buff *skb)
702{
703 sock_recv_timestamp(msg, sk, skb);
704 sock_recv_drops(msg, sk, skb);
705}
767dd033 706EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 707
1b784140 708static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 709 int flags)
1da177e4 710{
2da62906 711 return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags);
1da177e4
LT
712}
713
2da62906 714int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 715{
2da62906 716 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 717
2da62906 718 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 719}
c6d409cf 720EXPORT_SYMBOL(sock_recvmsg);
1da177e4 721
c1249c0a
ML
722/**
723 * kernel_recvmsg - Receive a message from a socket (kernel space)
724 * @sock: The socket to receive the message from
725 * @msg: Received message
726 * @vec: Input s/g array for message data
727 * @num: Size of input s/g array
728 * @size: Number of bytes to read
729 * @flags: Message flags (MSG_DONTWAIT, etc...)
730 *
731 * On return the msg structure contains the scatter/gather array passed in the
732 * vec argument. The array is modified so that it consists of the unfilled
733 * portion of the original array.
734 *
735 * The returned value is the total number of bytes received, or an error.
736 */
89bddce5
SH
737int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
738 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
739{
740 mm_segment_t oldfs = get_fs();
741 int result;
742
6aa24814 743 iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
1da177e4 744 set_fs(KERNEL_DS);
2da62906 745 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
746 set_fs(oldfs);
747 return result;
748}
c6d409cf 749EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 750
ce1d4d3e
CH
751static ssize_t sock_sendpage(struct file *file, struct page *page,
752 int offset, size_t size, loff_t *ppos, int more)
1da177e4 753{
1da177e4
LT
754 struct socket *sock;
755 int flags;
756
ce1d4d3e
CH
757 sock = file->private_data;
758
35f9c09f
ED
759 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
760 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
761 flags |= more;
ce1d4d3e 762
e6949583 763 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 764}
1da177e4 765
9c55e01c 766static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 767 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
768 unsigned int flags)
769{
770 struct socket *sock = file->private_data;
771
997b37da
RDC
772 if (unlikely(!sock->ops->splice_read))
773 return -EINVAL;
774
9c55e01c
JA
775 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
776}
777
8ae5e030 778static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 779{
6d652330
AV
780 struct file *file = iocb->ki_filp;
781 struct socket *sock = file->private_data;
0345f931 782 struct msghdr msg = {.msg_iter = *to,
783 .msg_iocb = iocb};
8ae5e030 784 ssize_t res;
ce1d4d3e 785
8ae5e030
AV
786 if (file->f_flags & O_NONBLOCK)
787 msg.msg_flags = MSG_DONTWAIT;
788
789 if (iocb->ki_pos != 0)
1da177e4 790 return -ESPIPE;
027445c3 791
66ee59af 792 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
793 return 0;
794
2da62906 795 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
796 *to = msg.msg_iter;
797 return res;
1da177e4
LT
798}
799
8ae5e030 800static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 801{
6d652330
AV
802 struct file *file = iocb->ki_filp;
803 struct socket *sock = file->private_data;
0345f931 804 struct msghdr msg = {.msg_iter = *from,
805 .msg_iocb = iocb};
8ae5e030 806 ssize_t res;
1da177e4 807
8ae5e030 808 if (iocb->ki_pos != 0)
ce1d4d3e 809 return -ESPIPE;
027445c3 810
8ae5e030
AV
811 if (file->f_flags & O_NONBLOCK)
812 msg.msg_flags = MSG_DONTWAIT;
813
6d652330
AV
814 if (sock->type == SOCK_SEQPACKET)
815 msg.msg_flags |= MSG_EOR;
816
d8725c86 817 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
818 *from = msg.msg_iter;
819 return res;
1da177e4
LT
820}
821
1da177e4
LT
822/*
823 * Atomic setting of ioctl hooks to avoid race
824 * with module unload.
825 */
826
4a3e2f71 827static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 828static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 829
881d966b 830void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 831{
4a3e2f71 832 mutex_lock(&br_ioctl_mutex);
1da177e4 833 br_ioctl_hook = hook;
4a3e2f71 834 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
835}
836EXPORT_SYMBOL(brioctl_set);
837
4a3e2f71 838static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 839static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 840
881d966b 841void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 842{
4a3e2f71 843 mutex_lock(&vlan_ioctl_mutex);
1da177e4 844 vlan_ioctl_hook = hook;
4a3e2f71 845 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
846}
847EXPORT_SYMBOL(vlan_ioctl_set);
848
4a3e2f71 849static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 850static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 851
89bddce5 852void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 853{
4a3e2f71 854 mutex_lock(&dlci_ioctl_mutex);
1da177e4 855 dlci_ioctl_hook = hook;
4a3e2f71 856 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
857}
858EXPORT_SYMBOL(dlci_ioctl_set);
859
6b96018b
AB
860static long sock_do_ioctl(struct net *net, struct socket *sock,
861 unsigned int cmd, unsigned long arg)
862{
863 int err;
864 void __user *argp = (void __user *)arg;
865
866 err = sock->ops->ioctl(sock, cmd, arg);
867
868 /*
869 * If this ioctl is unknown try to hand it down
870 * to the NIC driver.
871 */
872 if (err == -ENOIOCTLCMD)
873 err = dev_ioctl(net, cmd, argp);
874
875 return err;
876}
877
1da177e4
LT
878/*
879 * With an ioctl, arg may well be a user mode pointer, but we don't know
880 * what to do with it - that's up to the protocol still.
881 */
882
883static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
884{
885 struct socket *sock;
881d966b 886 struct sock *sk;
1da177e4
LT
887 void __user *argp = (void __user *)arg;
888 int pid, err;
881d966b 889 struct net *net;
1da177e4 890
b69aee04 891 sock = file->private_data;
881d966b 892 sk = sock->sk;
3b1e0a65 893 net = sock_net(sk);
1da177e4 894 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 895 err = dev_ioctl(net, cmd, argp);
1da177e4 896 } else
3d23e349 897#ifdef CONFIG_WEXT_CORE
1da177e4 898 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 899 err = dev_ioctl(net, cmd, argp);
1da177e4 900 } else
3d23e349 901#endif
89bddce5 902 switch (cmd) {
1da177e4
LT
903 case FIOSETOWN:
904 case SIOCSPGRP:
905 err = -EFAULT;
906 if (get_user(pid, (int __user *)argp))
907 break;
e0b93edd
JL
908 f_setown(sock->file, pid, 1);
909 err = 0;
1da177e4
LT
910 break;
911 case FIOGETOWN:
912 case SIOCGPGRP:
609d7fa9 913 err = put_user(f_getown(sock->file),
89bddce5 914 (int __user *)argp);
1da177e4
LT
915 break;
916 case SIOCGIFBR:
917 case SIOCSIFBR:
918 case SIOCBRADDBR:
919 case SIOCBRDELBR:
920 err = -ENOPKG;
921 if (!br_ioctl_hook)
922 request_module("bridge");
923
4a3e2f71 924 mutex_lock(&br_ioctl_mutex);
89bddce5 925 if (br_ioctl_hook)
881d966b 926 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 927 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
928 break;
929 case SIOCGIFVLAN:
930 case SIOCSIFVLAN:
931 err = -ENOPKG;
932 if (!vlan_ioctl_hook)
933 request_module("8021q");
934
4a3e2f71 935 mutex_lock(&vlan_ioctl_mutex);
1da177e4 936 if (vlan_ioctl_hook)
881d966b 937 err = vlan_ioctl_hook(net, argp);
4a3e2f71 938 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 939 break;
1da177e4
LT
940 case SIOCADDDLCI:
941 case SIOCDELDLCI:
942 err = -ENOPKG;
943 if (!dlci_ioctl_hook)
944 request_module("dlci");
945
7512cbf6
PE
946 mutex_lock(&dlci_ioctl_mutex);
947 if (dlci_ioctl_hook)
1da177e4 948 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 949 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
950 break;
951 default:
6b96018b 952 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 953 break;
89bddce5 954 }
1da177e4
LT
955 return err;
956}
957
958int sock_create_lite(int family, int type, int protocol, struct socket **res)
959{
960 int err;
961 struct socket *sock = NULL;
89bddce5 962
1da177e4
LT
963 err = security_socket_create(family, type, protocol, 1);
964 if (err)
965 goto out;
966
967 sock = sock_alloc();
968 if (!sock) {
969 err = -ENOMEM;
970 goto out;
971 }
972
1da177e4 973 sock->type = type;
7420ed23
VY
974 err = security_socket_post_create(sock, family, type, protocol, 1);
975 if (err)
976 goto out_release;
977
1da177e4
LT
978out:
979 *res = sock;
980 return err;
7420ed23
VY
981out_release:
982 sock_release(sock);
983 sock = NULL;
984 goto out;
1da177e4 985}
c6d409cf 986EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
987
988/* No kernel lock held - perfect */
89bddce5 989static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4 990{
cbf55001 991 unsigned int busy_flag = 0;
1da177e4
LT
992 struct socket *sock;
993
994 /*
89bddce5 995 * We can't return errors to poll, so it's either yes or no.
1da177e4 996 */
b69aee04 997 sock = file->private_data;
2d48d67f 998
cbf55001 999 if (sk_can_busy_loop(sock->sk)) {
2d48d67f 1000 /* this socket can poll_ll so tell the system call */
cbf55001 1001 busy_flag = POLL_BUSY_LOOP;
2d48d67f
ET
1002
1003 /* once, only if requested by syscall */
cbf55001
ET
1004 if (wait && (wait->_key & POLL_BUSY_LOOP))
1005 sk_busy_loop(sock->sk, 1);
2d48d67f
ET
1006 }
1007
cbf55001 1008 return busy_flag | sock->ops->poll(file, sock, wait);
1da177e4
LT
1009}
1010
89bddce5 1011static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1012{
b69aee04 1013 struct socket *sock = file->private_data;
1da177e4
LT
1014
1015 return sock->ops->mmap(file, sock, vma);
1016}
1017
20380731 1018static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1019{
1da177e4
LT
1020 sock_release(SOCKET_I(inode));
1021 return 0;
1022}
1023
1024/*
1025 * Update the socket async list
1026 *
1027 * Fasync_list locking strategy.
1028 *
1029 * 1. fasync_list is modified only under process context socket lock
1030 * i.e. under semaphore.
1031 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1032 * or under socket lock
1da177e4
LT
1033 */
1034
1035static int sock_fasync(int fd, struct file *filp, int on)
1036{
989a2979
ED
1037 struct socket *sock = filp->private_data;
1038 struct sock *sk = sock->sk;
eaefd110 1039 struct socket_wq *wq;
1da177e4 1040
989a2979 1041 if (sk == NULL)
1da177e4 1042 return -EINVAL;
1da177e4
LT
1043
1044 lock_sock(sk);
1e1d04e6 1045 wq = rcu_dereference_protected(sock->wq, lockdep_sock_is_held(sk));
eaefd110 1046 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1047
eaefd110 1048 if (!wq->fasync_list)
989a2979
ED
1049 sock_reset_flag(sk, SOCK_FASYNC);
1050 else
bcdce719 1051 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1052
989a2979 1053 release_sock(sk);
1da177e4
LT
1054 return 0;
1055}
1056
ceb5d58b 1057/* This function may be called only under rcu_lock */
1da177e4 1058
ceb5d58b 1059int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1060{
ceb5d58b 1061 if (!wq || !wq->fasync_list)
1da177e4 1062 return -1;
ceb5d58b 1063
89bddce5 1064 switch (how) {
8d8ad9d7 1065 case SOCK_WAKE_WAITD:
ceb5d58b 1066 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1067 break;
1068 goto call_kill;
8d8ad9d7 1069 case SOCK_WAKE_SPACE:
ceb5d58b 1070 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1071 break;
1072 /* fall through */
8d8ad9d7 1073 case SOCK_WAKE_IO:
89bddce5 1074call_kill:
43815482 1075 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1076 break;
8d8ad9d7 1077 case SOCK_WAKE_URG:
43815482 1078 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1079 }
ceb5d58b 1080
1da177e4
LT
1081 return 0;
1082}
c6d409cf 1083EXPORT_SYMBOL(sock_wake_async);
1da177e4 1084
721db93a 1085int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1086 struct socket **res, int kern)
1da177e4
LT
1087{
1088 int err;
1089 struct socket *sock;
55737fda 1090 const struct net_proto_family *pf;
1da177e4
LT
1091
1092 /*
89bddce5 1093 * Check protocol is in range
1da177e4
LT
1094 */
1095 if (family < 0 || family >= NPROTO)
1096 return -EAFNOSUPPORT;
1097 if (type < 0 || type >= SOCK_MAX)
1098 return -EINVAL;
1099
1100 /* Compatibility.
1101
1102 This uglymoron is moved from INET layer to here to avoid
1103 deadlock in module load.
1104 */
1105 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1106 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1107 current->comm);
1da177e4
LT
1108 family = PF_PACKET;
1109 }
1110
1111 err = security_socket_create(family, type, protocol, kern);
1112 if (err)
1113 return err;
89bddce5 1114
55737fda
SH
1115 /*
1116 * Allocate the socket and allow the family to set things up. if
1117 * the protocol is 0, the family is instructed to select an appropriate
1118 * default.
1119 */
1120 sock = sock_alloc();
1121 if (!sock) {
e87cc472 1122 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1123 return -ENFILE; /* Not exactly a match, but its the
1124 closest posix thing */
1125 }
1126
1127 sock->type = type;
1128
95a5afca 1129#ifdef CONFIG_MODULES
89bddce5
SH
1130 /* Attempt to load a protocol module if the find failed.
1131 *
1132 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1133 * requested real, full-featured networking support upon configuration.
1134 * Otherwise module support will break!
1135 */
190683a9 1136 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1137 request_module("net-pf-%d", family);
1da177e4
LT
1138#endif
1139
55737fda
SH
1140 rcu_read_lock();
1141 pf = rcu_dereference(net_families[family]);
1142 err = -EAFNOSUPPORT;
1143 if (!pf)
1144 goto out_release;
1da177e4
LT
1145
1146 /*
1147 * We will call the ->create function, that possibly is in a loadable
1148 * module, so we have to bump that loadable module refcnt first.
1149 */
55737fda 1150 if (!try_module_get(pf->owner))
1da177e4
LT
1151 goto out_release;
1152
55737fda
SH
1153 /* Now protected by module ref count */
1154 rcu_read_unlock();
1155
3f378b68 1156 err = pf->create(net, sock, protocol, kern);
55737fda 1157 if (err < 0)
1da177e4 1158 goto out_module_put;
a79af59e 1159
1da177e4
LT
1160 /*
1161 * Now to bump the refcnt of the [loadable] module that owns this
1162 * socket at sock_release time we decrement its refcnt.
1163 */
55737fda
SH
1164 if (!try_module_get(sock->ops->owner))
1165 goto out_module_busy;
1166
1da177e4
LT
1167 /*
1168 * Now that we're done with the ->create function, the [loadable]
1169 * module can have its refcnt decremented
1170 */
55737fda 1171 module_put(pf->owner);
7420ed23
VY
1172 err = security_socket_post_create(sock, family, type, protocol, kern);
1173 if (err)
3b185525 1174 goto out_sock_release;
55737fda 1175 *res = sock;
1da177e4 1176
55737fda
SH
1177 return 0;
1178
1179out_module_busy:
1180 err = -EAFNOSUPPORT;
1da177e4 1181out_module_put:
55737fda
SH
1182 sock->ops = NULL;
1183 module_put(pf->owner);
1184out_sock_release:
1da177e4 1185 sock_release(sock);
55737fda
SH
1186 return err;
1187
1188out_release:
1189 rcu_read_unlock();
1190 goto out_sock_release;
1da177e4 1191}
721db93a 1192EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1193
1194int sock_create(int family, int type, int protocol, struct socket **res)
1195{
1b8d7ae4 1196 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1197}
c6d409cf 1198EXPORT_SYMBOL(sock_create);
1da177e4 1199
eeb1bd5c 1200int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1201{
eeb1bd5c 1202 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1203}
c6d409cf 1204EXPORT_SYMBOL(sock_create_kern);
1da177e4 1205
3e0fa65f 1206SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1207{
1208 int retval;
1209 struct socket *sock;
a677a039
UD
1210 int flags;
1211
e38b36f3
UD
1212 /* Check the SOCK_* constants for consistency. */
1213 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1214 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1215 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1216 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1217
a677a039 1218 flags = type & ~SOCK_TYPE_MASK;
77d27200 1219 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1220 return -EINVAL;
1221 type &= SOCK_TYPE_MASK;
1da177e4 1222
aaca0bdc
UD
1223 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1224 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1225
1da177e4
LT
1226 retval = sock_create(family, type, protocol, &sock);
1227 if (retval < 0)
1228 goto out;
1229
77d27200 1230 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1231 if (retval < 0)
1232 goto out_release;
1233
1234out:
1235 /* It may be already another descriptor 8) Not kernel problem. */
1236 return retval;
1237
1238out_release:
1239 sock_release(sock);
1240 return retval;
1241}
1242
1243/*
1244 * Create a pair of connected sockets.
1245 */
1246
3e0fa65f
HC
1247SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1248 int __user *, usockvec)
1da177e4
LT
1249{
1250 struct socket *sock1, *sock2;
1251 int fd1, fd2, err;
db349509 1252 struct file *newfile1, *newfile2;
a677a039
UD
1253 int flags;
1254
1255 flags = type & ~SOCK_TYPE_MASK;
77d27200 1256 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1257 return -EINVAL;
1258 type &= SOCK_TYPE_MASK;
1da177e4 1259
aaca0bdc
UD
1260 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1261 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1262
1da177e4
LT
1263 /*
1264 * Obtain the first socket and check if the underlying protocol
1265 * supports the socketpair call.
1266 */
1267
1268 err = sock_create(family, type, protocol, &sock1);
1269 if (err < 0)
1270 goto out;
1271
1272 err = sock_create(family, type, protocol, &sock2);
1273 if (err < 0)
1274 goto out_release_1;
1275
1276 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1277 if (err < 0)
1da177e4
LT
1278 goto out_release_both;
1279
28407630 1280 fd1 = get_unused_fd_flags(flags);
bf3c23d1
DM
1281 if (unlikely(fd1 < 0)) {
1282 err = fd1;
db349509 1283 goto out_release_both;
bf3c23d1 1284 }
d73aa286 1285
28407630 1286 fd2 = get_unused_fd_flags(flags);
198de4d7
AV
1287 if (unlikely(fd2 < 0)) {
1288 err = fd2;
d73aa286 1289 goto out_put_unused_1;
28407630
AV
1290 }
1291
aab174f0 1292 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1293 if (IS_ERR(newfile1)) {
28407630 1294 err = PTR_ERR(newfile1);
d73aa286 1295 goto out_put_unused_both;
28407630
AV
1296 }
1297
aab174f0 1298 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1299 if (IS_ERR(newfile2)) {
1300 err = PTR_ERR(newfile2);
d73aa286 1301 goto out_fput_1;
db349509
AV
1302 }
1303
d73aa286
YD
1304 err = put_user(fd1, &usockvec[0]);
1305 if (err)
1306 goto out_fput_both;
1307
1308 err = put_user(fd2, &usockvec[1]);
1309 if (err)
1310 goto out_fput_both;
1311
157cf649 1312 audit_fd_pair(fd1, fd2);
d73aa286 1313
db349509
AV
1314 fd_install(fd1, newfile1);
1315 fd_install(fd2, newfile2);
1da177e4
LT
1316 /* fd1 and fd2 may be already another descriptors.
1317 * Not kernel problem.
1318 */
1319
d73aa286 1320 return 0;
1da177e4 1321
d73aa286
YD
1322out_fput_both:
1323 fput(newfile2);
1324 fput(newfile1);
1325 put_unused_fd(fd2);
1326 put_unused_fd(fd1);
1327 goto out;
1328
1329out_fput_1:
1330 fput(newfile1);
1331 put_unused_fd(fd2);
1332 put_unused_fd(fd1);
1333 sock_release(sock2);
1334 goto out;
1da177e4 1335
d73aa286
YD
1336out_put_unused_both:
1337 put_unused_fd(fd2);
1338out_put_unused_1:
1339 put_unused_fd(fd1);
1da177e4 1340out_release_both:
89bddce5 1341 sock_release(sock2);
1da177e4 1342out_release_1:
89bddce5 1343 sock_release(sock1);
1da177e4
LT
1344out:
1345 return err;
1346}
1347
1da177e4
LT
1348/*
1349 * Bind a name to a socket. Nothing much to do here since it's
1350 * the protocol's responsibility to handle the local address.
1351 *
1352 * We move the socket address to kernel space before we call
1353 * the protocol layer (having also checked the address is ok).
1354 */
1355
20f37034 1356SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1357{
1358 struct socket *sock;
230b1839 1359 struct sockaddr_storage address;
6cb153ca 1360 int err, fput_needed;
1da177e4 1361
89bddce5 1362 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1363 if (sock) {
43db362d 1364 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1365 if (err >= 0) {
1366 err = security_socket_bind(sock,
230b1839 1367 (struct sockaddr *)&address,
89bddce5 1368 addrlen);
6cb153ca
BL
1369 if (!err)
1370 err = sock->ops->bind(sock,
89bddce5 1371 (struct sockaddr *)
230b1839 1372 &address, addrlen);
1da177e4 1373 }
6cb153ca 1374 fput_light(sock->file, fput_needed);
89bddce5 1375 }
1da177e4
LT
1376 return err;
1377}
1378
1da177e4
LT
1379/*
1380 * Perform a listen. Basically, we allow the protocol to do anything
1381 * necessary for a listen, and if that works, we mark the socket as
1382 * ready for listening.
1383 */
1384
3e0fa65f 1385SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1386{
1387 struct socket *sock;
6cb153ca 1388 int err, fput_needed;
b8e1f9b5 1389 int somaxconn;
89bddce5
SH
1390
1391 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1392 if (sock) {
8efa6e93 1393 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1394 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1395 backlog = somaxconn;
1da177e4
LT
1396
1397 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1398 if (!err)
1399 err = sock->ops->listen(sock, backlog);
1da177e4 1400
6cb153ca 1401 fput_light(sock->file, fput_needed);
1da177e4
LT
1402 }
1403 return err;
1404}
1405
1da177e4
LT
1406/*
1407 * For accept, we attempt to create a new socket, set up the link
1408 * with the client, wake up the client, then return the new
1409 * connected fd. We collect the address of the connector in kernel
1410 * space and move it to user at the very end. This is unclean because
1411 * we open the socket then return an error.
1412 *
1413 * 1003.1g adds the ability to recvmsg() to query connection pending
1414 * status to recvmsg. We need to add that support in a way thats
1415 * clean when we restucture accept also.
1416 */
1417
20f37034
HC
1418SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1419 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1420{
1421 struct socket *sock, *newsock;
39d8c1b6 1422 struct file *newfile;
6cb153ca 1423 int err, len, newfd, fput_needed;
230b1839 1424 struct sockaddr_storage address;
1da177e4 1425
77d27200 1426 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1427 return -EINVAL;
1428
1429 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1430 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1431
6cb153ca 1432 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1433 if (!sock)
1434 goto out;
1435
1436 err = -ENFILE;
c6d409cf
ED
1437 newsock = sock_alloc();
1438 if (!newsock)
1da177e4
LT
1439 goto out_put;
1440
1441 newsock->type = sock->type;
1442 newsock->ops = sock->ops;
1443
1da177e4
LT
1444 /*
1445 * We don't need try_module_get here, as the listening socket (sock)
1446 * has the protocol module (sock->ops->owner) held.
1447 */
1448 __module_get(newsock->ops->owner);
1449
28407630 1450 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1451 if (unlikely(newfd < 0)) {
1452 err = newfd;
9a1875e6
DM
1453 sock_release(newsock);
1454 goto out_put;
39d8c1b6 1455 }
aab174f0 1456 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1457 if (IS_ERR(newfile)) {
28407630
AV
1458 err = PTR_ERR(newfile);
1459 put_unused_fd(newfd);
1460 sock_release(newsock);
1461 goto out_put;
1462 }
39d8c1b6 1463
a79af59e
FF
1464 err = security_socket_accept(sock, newsock);
1465 if (err)
39d8c1b6 1466 goto out_fd;
a79af59e 1467
1da177e4
LT
1468 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1469 if (err < 0)
39d8c1b6 1470 goto out_fd;
1da177e4
LT
1471
1472 if (upeer_sockaddr) {
230b1839 1473 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1474 &len, 2) < 0) {
1da177e4 1475 err = -ECONNABORTED;
39d8c1b6 1476 goto out_fd;
1da177e4 1477 }
43db362d 1478 err = move_addr_to_user(&address,
230b1839 1479 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1480 if (err < 0)
39d8c1b6 1481 goto out_fd;
1da177e4
LT
1482 }
1483
1484 /* File flags are not inherited via accept() unlike another OSes. */
1485
39d8c1b6
DM
1486 fd_install(newfd, newfile);
1487 err = newfd;
1da177e4 1488
1da177e4 1489out_put:
6cb153ca 1490 fput_light(sock->file, fput_needed);
1da177e4
LT
1491out:
1492 return err;
39d8c1b6 1493out_fd:
9606a216 1494 fput(newfile);
39d8c1b6 1495 put_unused_fd(newfd);
1da177e4
LT
1496 goto out_put;
1497}
1498
20f37034
HC
1499SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1500 int __user *, upeer_addrlen)
aaca0bdc 1501{
de11defe 1502 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1503}
1504
1da177e4
LT
1505/*
1506 * Attempt to connect to a socket with the server address. The address
1507 * is in user space so we verify it is OK and move it to kernel space.
1508 *
1509 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1510 * break bindings
1511 *
1512 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1513 * other SEQPACKET protocols that take time to connect() as it doesn't
1514 * include the -EINPROGRESS status for such sockets.
1515 */
1516
20f37034
HC
1517SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1518 int, addrlen)
1da177e4
LT
1519{
1520 struct socket *sock;
230b1839 1521 struct sockaddr_storage address;
6cb153ca 1522 int err, fput_needed;
1da177e4 1523
6cb153ca 1524 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1525 if (!sock)
1526 goto out;
43db362d 1527 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1528 if (err < 0)
1529 goto out_put;
1530
89bddce5 1531 err =
230b1839 1532 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1533 if (err)
1534 goto out_put;
1535
230b1839 1536 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1537 sock->file->f_flags);
1538out_put:
6cb153ca 1539 fput_light(sock->file, fput_needed);
1da177e4
LT
1540out:
1541 return err;
1542}
1543
1544/*
1545 * Get the local address ('name') of a socket object. Move the obtained
1546 * name to user space.
1547 */
1548
20f37034
HC
1549SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1550 int __user *, usockaddr_len)
1da177e4
LT
1551{
1552 struct socket *sock;
230b1839 1553 struct sockaddr_storage address;
6cb153ca 1554 int len, err, fput_needed;
89bddce5 1555
6cb153ca 1556 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1557 if (!sock)
1558 goto out;
1559
1560 err = security_socket_getsockname(sock);
1561 if (err)
1562 goto out_put;
1563
230b1839 1564 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1565 if (err)
1566 goto out_put;
43db362d 1567 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1568
1569out_put:
6cb153ca 1570 fput_light(sock->file, fput_needed);
1da177e4
LT
1571out:
1572 return err;
1573}
1574
1575/*
1576 * Get the remote address ('name') of a socket object. Move the obtained
1577 * name to user space.
1578 */
1579
20f37034
HC
1580SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1581 int __user *, usockaddr_len)
1da177e4
LT
1582{
1583 struct socket *sock;
230b1839 1584 struct sockaddr_storage address;
6cb153ca 1585 int len, err, fput_needed;
1da177e4 1586
89bddce5
SH
1587 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1588 if (sock != NULL) {
1da177e4
LT
1589 err = security_socket_getpeername(sock);
1590 if (err) {
6cb153ca 1591 fput_light(sock->file, fput_needed);
1da177e4
LT
1592 return err;
1593 }
1594
89bddce5 1595 err =
230b1839 1596 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1597 1);
1da177e4 1598 if (!err)
43db362d 1599 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1600 usockaddr_len);
6cb153ca 1601 fput_light(sock->file, fput_needed);
1da177e4
LT
1602 }
1603 return err;
1604}
1605
1606/*
1607 * Send a datagram to a given address. We move the address into kernel
1608 * space and check the user space data area is readable before invoking
1609 * the protocol.
1610 */
1611
3e0fa65f 1612SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1613 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1614 int, addr_len)
1da177e4
LT
1615{
1616 struct socket *sock;
230b1839 1617 struct sockaddr_storage address;
1da177e4
LT
1618 int err;
1619 struct msghdr msg;
1620 struct iovec iov;
6cb153ca 1621 int fput_needed;
6cb153ca 1622
602bd0e9
AV
1623 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1624 if (unlikely(err))
1625 return err;
de0fa95c
PE
1626 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1627 if (!sock)
4387ff75 1628 goto out;
6cb153ca 1629
89bddce5 1630 msg.msg_name = NULL;
89bddce5
SH
1631 msg.msg_control = NULL;
1632 msg.msg_controllen = 0;
1633 msg.msg_namelen = 0;
6cb153ca 1634 if (addr) {
43db362d 1635 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1636 if (err < 0)
1637 goto out_put;
230b1839 1638 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1639 msg.msg_namelen = addr_len;
1da177e4
LT
1640 }
1641 if (sock->file->f_flags & O_NONBLOCK)
1642 flags |= MSG_DONTWAIT;
1643 msg.msg_flags = flags;
d8725c86 1644 err = sock_sendmsg(sock, &msg);
1da177e4 1645
89bddce5 1646out_put:
de0fa95c 1647 fput_light(sock->file, fput_needed);
4387ff75 1648out:
1da177e4
LT
1649 return err;
1650}
1651
1652/*
89bddce5 1653 * Send a datagram down a socket.
1da177e4
LT
1654 */
1655
3e0fa65f 1656SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1657 unsigned int, flags)
1da177e4
LT
1658{
1659 return sys_sendto(fd, buff, len, flags, NULL, 0);
1660}
1661
1662/*
89bddce5 1663 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1664 * sender. We verify the buffers are writable and if needed move the
1665 * sender address from kernel to user space.
1666 */
1667
3e0fa65f 1668SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1669 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1670 int __user *, addr_len)
1da177e4
LT
1671{
1672 struct socket *sock;
1673 struct iovec iov;
1674 struct msghdr msg;
230b1839 1675 struct sockaddr_storage address;
89bddce5 1676 int err, err2;
6cb153ca
BL
1677 int fput_needed;
1678
602bd0e9
AV
1679 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1680 if (unlikely(err))
1681 return err;
de0fa95c 1682 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1683 if (!sock)
de0fa95c 1684 goto out;
1da177e4 1685
89bddce5
SH
1686 msg.msg_control = NULL;
1687 msg.msg_controllen = 0;
f3d33426
HFS
1688 /* Save some cycles and don't copy the address if not needed */
1689 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1690 /* We assume all kernel code knows the size of sockaddr_storage */
1691 msg.msg_namelen = 0;
130ed5d1 1692 msg.msg_iocb = NULL;
1da177e4
LT
1693 if (sock->file->f_flags & O_NONBLOCK)
1694 flags |= MSG_DONTWAIT;
2da62906 1695 err = sock_recvmsg(sock, &msg, flags);
1da177e4 1696
89bddce5 1697 if (err >= 0 && addr != NULL) {
43db362d 1698 err2 = move_addr_to_user(&address,
230b1839 1699 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1700 if (err2 < 0)
1701 err = err2;
1da177e4 1702 }
de0fa95c
PE
1703
1704 fput_light(sock->file, fput_needed);
4387ff75 1705out:
1da177e4
LT
1706 return err;
1707}
1708
1709/*
89bddce5 1710 * Receive a datagram from a socket.
1da177e4
LT
1711 */
1712
b7c0ddf5
JG
1713SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1714 unsigned int, flags)
1da177e4
LT
1715{
1716 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1717}
1718
1719/*
1720 * Set a socket option. Because we don't know the option lengths we have
1721 * to pass the user mode parameter for the protocols to sort out.
1722 */
1723
20f37034
HC
1724SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1725 char __user *, optval, int, optlen)
1da177e4 1726{
6cb153ca 1727 int err, fput_needed;
1da177e4
LT
1728 struct socket *sock;
1729
1730 if (optlen < 0)
1731 return -EINVAL;
89bddce5
SH
1732
1733 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1734 if (sock != NULL) {
1735 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1736 if (err)
1737 goto out_put;
1da177e4
LT
1738
1739 if (level == SOL_SOCKET)
89bddce5
SH
1740 err =
1741 sock_setsockopt(sock, level, optname, optval,
1742 optlen);
1da177e4 1743 else
89bddce5
SH
1744 err =
1745 sock->ops->setsockopt(sock, level, optname, optval,
1746 optlen);
6cb153ca
BL
1747out_put:
1748 fput_light(sock->file, fput_needed);
1da177e4
LT
1749 }
1750 return err;
1751}
1752
1753/*
1754 * Get a socket option. Because we don't know the option lengths we have
1755 * to pass a user mode parameter for the protocols to sort out.
1756 */
1757
20f37034
HC
1758SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1759 char __user *, optval, int __user *, optlen)
1da177e4 1760{
6cb153ca 1761 int err, fput_needed;
1da177e4
LT
1762 struct socket *sock;
1763
89bddce5
SH
1764 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1765 if (sock != NULL) {
6cb153ca
BL
1766 err = security_socket_getsockopt(sock, level, optname);
1767 if (err)
1768 goto out_put;
1da177e4
LT
1769
1770 if (level == SOL_SOCKET)
89bddce5
SH
1771 err =
1772 sock_getsockopt(sock, level, optname, optval,
1773 optlen);
1da177e4 1774 else
89bddce5
SH
1775 err =
1776 sock->ops->getsockopt(sock, level, optname, optval,
1777 optlen);
6cb153ca
BL
1778out_put:
1779 fput_light(sock->file, fput_needed);
1da177e4
LT
1780 }
1781 return err;
1782}
1783
1da177e4
LT
1784/*
1785 * Shutdown a socket.
1786 */
1787
754fe8d2 1788SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1789{
6cb153ca 1790 int err, fput_needed;
1da177e4
LT
1791 struct socket *sock;
1792
89bddce5
SH
1793 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1794 if (sock != NULL) {
1da177e4 1795 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1796 if (!err)
1797 err = sock->ops->shutdown(sock, how);
1798 fput_light(sock->file, fput_needed);
1da177e4
LT
1799 }
1800 return err;
1801}
1802
89bddce5 1803/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1804 * fields which are the same type (int / unsigned) on our platforms.
1805 */
1806#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1807#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1808#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1809
c71d8ebe
TH
1810struct used_address {
1811 struct sockaddr_storage name;
1812 unsigned int name_len;
1813};
1814
da184284
AV
1815static int copy_msghdr_from_user(struct msghdr *kmsg,
1816 struct user_msghdr __user *umsg,
1817 struct sockaddr __user **save_addr,
1818 struct iovec **iov)
1661bf36 1819{
08adb7da
AV
1820 struct sockaddr __user *uaddr;
1821 struct iovec __user *uiov;
c0371da6 1822 size_t nr_segs;
08adb7da
AV
1823 ssize_t err;
1824
1825 if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) ||
1826 __get_user(uaddr, &umsg->msg_name) ||
1827 __get_user(kmsg->msg_namelen, &umsg->msg_namelen) ||
1828 __get_user(uiov, &umsg->msg_iov) ||
c0371da6 1829 __get_user(nr_segs, &umsg->msg_iovlen) ||
08adb7da
AV
1830 __get_user(kmsg->msg_control, &umsg->msg_control) ||
1831 __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
1832 __get_user(kmsg->msg_flags, &umsg->msg_flags))
1661bf36 1833 return -EFAULT;
dbb490b9 1834
08adb7da 1835 if (!uaddr)
6a2a2b3a
AS
1836 kmsg->msg_namelen = 0;
1837
dbb490b9
ML
1838 if (kmsg->msg_namelen < 0)
1839 return -EINVAL;
1840
1661bf36 1841 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 1842 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
1843
1844 if (save_addr)
1845 *save_addr = uaddr;
1846
1847 if (uaddr && kmsg->msg_namelen) {
1848 if (!save_addr) {
1849 err = move_addr_to_kernel(uaddr, kmsg->msg_namelen,
1850 kmsg->msg_name);
1851 if (err < 0)
1852 return err;
1853 }
1854 } else {
1855 kmsg->msg_name = NULL;
1856 kmsg->msg_namelen = 0;
1857 }
1858
c0371da6 1859 if (nr_segs > UIO_MAXIOV)
08adb7da
AV
1860 return -EMSGSIZE;
1861
0345f931 1862 kmsg->msg_iocb = NULL;
1863
da184284
AV
1864 return import_iovec(save_addr ? READ : WRITE, uiov, nr_segs,
1865 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
1866}
1867
666547ff 1868static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 1869 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
1870 struct used_address *used_address,
1871 unsigned int allowed_msghdr_flags)
1da177e4 1872{
89bddce5
SH
1873 struct compat_msghdr __user *msg_compat =
1874 (struct compat_msghdr __user *)msg;
230b1839 1875 struct sockaddr_storage address;
1da177e4 1876 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1877 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1878 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1879 /* 20 is size of ipv6_pktinfo */
1da177e4 1880 unsigned char *ctl_buf = ctl;
d8725c86 1881 int ctl_len;
08adb7da 1882 ssize_t err;
89bddce5 1883
08adb7da 1884 msg_sys->msg_name = &address;
1da177e4 1885
08449320 1886 if (MSG_CMSG_COMPAT & flags)
08adb7da 1887 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 1888 else
08adb7da 1889 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 1890 if (err < 0)
da184284 1891 return err;
1da177e4
LT
1892
1893 err = -ENOBUFS;
1894
228e548e 1895 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 1896 goto out_freeiov;
28a94d8f 1897 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 1898 ctl_len = msg_sys->msg_controllen;
1da177e4 1899 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 1900 err =
228e548e 1901 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 1902 sizeof(ctl));
1da177e4
LT
1903 if (err)
1904 goto out_freeiov;
228e548e
AB
1905 ctl_buf = msg_sys->msg_control;
1906 ctl_len = msg_sys->msg_controllen;
1da177e4 1907 } else if (ctl_len) {
89bddce5 1908 if (ctl_len > sizeof(ctl)) {
1da177e4 1909 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1910 if (ctl_buf == NULL)
1da177e4
LT
1911 goto out_freeiov;
1912 }
1913 err = -EFAULT;
1914 /*
228e548e 1915 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
1916 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1917 * checking falls down on this.
1918 */
fb8621bb 1919 if (copy_from_user(ctl_buf,
228e548e 1920 (void __user __force *)msg_sys->msg_control,
89bddce5 1921 ctl_len))
1da177e4 1922 goto out_freectl;
228e548e 1923 msg_sys->msg_control = ctl_buf;
1da177e4 1924 }
228e548e 1925 msg_sys->msg_flags = flags;
1da177e4
LT
1926
1927 if (sock->file->f_flags & O_NONBLOCK)
228e548e 1928 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
1929 /*
1930 * If this is sendmmsg() and current destination address is same as
1931 * previously succeeded address, omit asking LSM's decision.
1932 * used_address->name_len is initialized to UINT_MAX so that the first
1933 * destination address never matches.
1934 */
bc909d9d
MD
1935 if (used_address && msg_sys->msg_name &&
1936 used_address->name_len == msg_sys->msg_namelen &&
1937 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 1938 used_address->name_len)) {
d8725c86 1939 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
1940 goto out_freectl;
1941 }
d8725c86 1942 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
1943 /*
1944 * If this is sendmmsg() and sending to current destination address was
1945 * successful, remember it.
1946 */
1947 if (used_address && err >= 0) {
1948 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
1949 if (msg_sys->msg_name)
1950 memcpy(&used_address->name, msg_sys->msg_name,
1951 used_address->name_len);
c71d8ebe 1952 }
1da177e4
LT
1953
1954out_freectl:
89bddce5 1955 if (ctl_buf != ctl)
1da177e4
LT
1956 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1957out_freeiov:
da184284 1958 kfree(iov);
228e548e
AB
1959 return err;
1960}
1961
1962/*
1963 * BSD sendmsg interface
1964 */
1965
666547ff 1966long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
228e548e
AB
1967{
1968 int fput_needed, err;
1969 struct msghdr msg_sys;
1be374a0
AL
1970 struct socket *sock;
1971
1be374a0 1972 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
1973 if (!sock)
1974 goto out;
1975
28a94d8f 1976 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 1977
6cb153ca 1978 fput_light(sock->file, fput_needed);
89bddce5 1979out:
1da177e4
LT
1980 return err;
1981}
1982
666547ff 1983SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5
AL
1984{
1985 if (flags & MSG_CMSG_COMPAT)
1986 return -EINVAL;
1987 return __sys_sendmsg(fd, msg, flags);
1988}
1989
228e548e
AB
1990/*
1991 * Linux sendmmsg interface
1992 */
1993
1994int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
1995 unsigned int flags)
1996{
1997 int fput_needed, err, datagrams;
1998 struct socket *sock;
1999 struct mmsghdr __user *entry;
2000 struct compat_mmsghdr __user *compat_entry;
2001 struct msghdr msg_sys;
c71d8ebe 2002 struct used_address used_address;
f092276d 2003 unsigned int oflags = flags;
228e548e 2004
98382f41
AB
2005 if (vlen > UIO_MAXIOV)
2006 vlen = UIO_MAXIOV;
228e548e
AB
2007
2008 datagrams = 0;
2009
2010 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2011 if (!sock)
2012 return err;
2013
c71d8ebe 2014 used_address.name_len = UINT_MAX;
228e548e
AB
2015 entry = mmsg;
2016 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2017 err = 0;
f092276d 2018 flags |= MSG_BATCH;
228e548e
AB
2019
2020 while (datagrams < vlen) {
f092276d
TH
2021 if (datagrams == vlen - 1)
2022 flags = oflags;
2023
228e548e 2024 if (MSG_CMSG_COMPAT & flags) {
666547ff 2025 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2026 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2027 if (err < 0)
2028 break;
2029 err = __put_user(err, &compat_entry->msg_len);
2030 ++compat_entry;
2031 } else {
a7526eb5 2032 err = ___sys_sendmsg(sock,
666547ff 2033 (struct user_msghdr __user *)entry,
28a94d8f 2034 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2035 if (err < 0)
2036 break;
2037 err = put_user(err, &entry->msg_len);
2038 ++entry;
2039 }
2040
2041 if (err)
2042 break;
2043 ++datagrams;
a78cb84c 2044 cond_resched();
228e548e
AB
2045 }
2046
228e548e
AB
2047 fput_light(sock->file, fput_needed);
2048
728ffb86
AB
2049 /* We only return an error if no datagrams were able to be sent */
2050 if (datagrams != 0)
228e548e
AB
2051 return datagrams;
2052
228e548e
AB
2053 return err;
2054}
2055
2056SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2057 unsigned int, vlen, unsigned int, flags)
2058{
1be374a0
AL
2059 if (flags & MSG_CMSG_COMPAT)
2060 return -EINVAL;
228e548e
AB
2061 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2062}
2063
666547ff 2064static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2065 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2066{
89bddce5
SH
2067 struct compat_msghdr __user *msg_compat =
2068 (struct compat_msghdr __user *)msg;
1da177e4 2069 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2070 struct iovec *iov = iovstack;
1da177e4 2071 unsigned long cmsg_ptr;
2da62906 2072 int len;
08adb7da 2073 ssize_t err;
1da177e4
LT
2074
2075 /* kernel mode address */
230b1839 2076 struct sockaddr_storage addr;
1da177e4
LT
2077
2078 /* user mode address pointers */
2079 struct sockaddr __user *uaddr;
08adb7da 2080 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2081
08adb7da 2082 msg_sys->msg_name = &addr;
1da177e4 2083
f3d33426 2084 if (MSG_CMSG_COMPAT & flags)
08adb7da 2085 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2086 else
08adb7da 2087 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2088 if (err < 0)
da184284 2089 return err;
1da177e4 2090
a2e27255
ACM
2091 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2092 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2093
f3d33426
HFS
2094 /* We assume all kernel code knows the size of sockaddr_storage */
2095 msg_sys->msg_namelen = 0;
2096
1da177e4
LT
2097 if (sock->file->f_flags & O_NONBLOCK)
2098 flags |= MSG_DONTWAIT;
2da62906 2099 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4
LT
2100 if (err < 0)
2101 goto out_freeiov;
2102 len = err;
2103
2104 if (uaddr != NULL) {
43db362d 2105 err = move_addr_to_user(&addr,
a2e27255 2106 msg_sys->msg_namelen, uaddr,
89bddce5 2107 uaddr_len);
1da177e4
LT
2108 if (err < 0)
2109 goto out_freeiov;
2110 }
a2e27255 2111 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2112 COMPAT_FLAGS(msg));
1da177e4
LT
2113 if (err)
2114 goto out_freeiov;
2115 if (MSG_CMSG_COMPAT & flags)
a2e27255 2116 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2117 &msg_compat->msg_controllen);
2118 else
a2e27255 2119 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2120 &msg->msg_controllen);
2121 if (err)
2122 goto out_freeiov;
2123 err = len;
2124
2125out_freeiov:
da184284 2126 kfree(iov);
a2e27255
ACM
2127 return err;
2128}
2129
2130/*
2131 * BSD recvmsg interface
2132 */
2133
666547ff 2134long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
a2e27255
ACM
2135{
2136 int fput_needed, err;
2137 struct msghdr msg_sys;
1be374a0
AL
2138 struct socket *sock;
2139
1be374a0 2140 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2141 if (!sock)
2142 goto out;
2143
a7526eb5 2144 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2145
6cb153ca 2146 fput_light(sock->file, fput_needed);
1da177e4
LT
2147out:
2148 return err;
2149}
2150
666547ff 2151SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2152 unsigned int, flags)
2153{
2154 if (flags & MSG_CMSG_COMPAT)
2155 return -EINVAL;
2156 return __sys_recvmsg(fd, msg, flags);
2157}
2158
a2e27255
ACM
2159/*
2160 * Linux recvmmsg interface
2161 */
2162
2163int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2164 unsigned int flags, struct timespec *timeout)
2165{
2166 int fput_needed, err, datagrams;
2167 struct socket *sock;
2168 struct mmsghdr __user *entry;
d7256d0e 2169 struct compat_mmsghdr __user *compat_entry;
a2e27255 2170 struct msghdr msg_sys;
766b9f92
DD
2171 struct timespec64 end_time;
2172 struct timespec64 timeout64;
a2e27255
ACM
2173
2174 if (timeout &&
2175 poll_select_set_timeout(&end_time, timeout->tv_sec,
2176 timeout->tv_nsec))
2177 return -EINVAL;
2178
2179 datagrams = 0;
2180
2181 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2182 if (!sock)
2183 return err;
2184
2185 err = sock_error(sock->sk);
2186 if (err)
2187 goto out_put;
2188
2189 entry = mmsg;
d7256d0e 2190 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2191
2192 while (datagrams < vlen) {
2193 /*
2194 * No need to ask LSM for more than the first datagram.
2195 */
d7256d0e 2196 if (MSG_CMSG_COMPAT & flags) {
666547ff 2197 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2198 &msg_sys, flags & ~MSG_WAITFORONE,
2199 datagrams);
d7256d0e
JMG
2200 if (err < 0)
2201 break;
2202 err = __put_user(err, &compat_entry->msg_len);
2203 ++compat_entry;
2204 } else {
a7526eb5 2205 err = ___sys_recvmsg(sock,
666547ff 2206 (struct user_msghdr __user *)entry,
a7526eb5
AL
2207 &msg_sys, flags & ~MSG_WAITFORONE,
2208 datagrams);
d7256d0e
JMG
2209 if (err < 0)
2210 break;
2211 err = put_user(err, &entry->msg_len);
2212 ++entry;
2213 }
2214
a2e27255
ACM
2215 if (err)
2216 break;
a2e27255
ACM
2217 ++datagrams;
2218
71c5c159
BB
2219 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2220 if (flags & MSG_WAITFORONE)
2221 flags |= MSG_DONTWAIT;
2222
a2e27255 2223 if (timeout) {
766b9f92
DD
2224 ktime_get_ts64(&timeout64);
2225 *timeout = timespec64_to_timespec(
2226 timespec64_sub(end_time, timeout64));
a2e27255
ACM
2227 if (timeout->tv_sec < 0) {
2228 timeout->tv_sec = timeout->tv_nsec = 0;
2229 break;
2230 }
2231
2232 /* Timeout, return less than vlen datagrams */
2233 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2234 break;
2235 }
2236
2237 /* Out of band data, return right away */
2238 if (msg_sys.msg_flags & MSG_OOB)
2239 break;
a78cb84c 2240 cond_resched();
a2e27255
ACM
2241 }
2242
a2e27255 2243 if (err == 0)
34b88a68
ACM
2244 goto out_put;
2245
2246 if (datagrams == 0) {
2247 datagrams = err;
2248 goto out_put;
2249 }
a2e27255 2250
34b88a68
ACM
2251 /*
2252 * We may return less entries than requested (vlen) if the
2253 * sock is non block and there aren't enough datagrams...
2254 */
2255 if (err != -EAGAIN) {
a2e27255 2256 /*
34b88a68
ACM
2257 * ... or if recvmsg returns an error after we
2258 * received some datagrams, where we record the
2259 * error to return on the next call or if the
2260 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2261 */
34b88a68 2262 sock->sk->sk_err = -err;
a2e27255 2263 }
34b88a68
ACM
2264out_put:
2265 fput_light(sock->file, fput_needed);
a2e27255 2266
34b88a68 2267 return datagrams;
a2e27255
ACM
2268}
2269
2270SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2271 unsigned int, vlen, unsigned int, flags,
2272 struct timespec __user *, timeout)
2273{
2274 int datagrams;
2275 struct timespec timeout_sys;
2276
1be374a0
AL
2277 if (flags & MSG_CMSG_COMPAT)
2278 return -EINVAL;
2279
a2e27255
ACM
2280 if (!timeout)
2281 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2282
2283 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2284 return -EFAULT;
2285
2286 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2287
2288 if (datagrams > 0 &&
2289 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2290 datagrams = -EFAULT;
2291
2292 return datagrams;
2293}
2294
2295#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2296/* Argument list sizes for sys_socketcall */
2297#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2298static const unsigned char nargs[21] = {
c6d409cf
ED
2299 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2300 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2301 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2302 AL(4), AL(5), AL(4)
89bddce5
SH
2303};
2304
1da177e4
LT
2305#undef AL
2306
2307/*
89bddce5 2308 * System call vectors.
1da177e4
LT
2309 *
2310 * Argument checking cleaned up. Saved 20% in size.
2311 * This function doesn't need to set the kernel lock because
89bddce5 2312 * it is set by the callees.
1da177e4
LT
2313 */
2314
3e0fa65f 2315SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2316{
2950fa9d 2317 unsigned long a[AUDITSC_ARGS];
89bddce5 2318 unsigned long a0, a1;
1da177e4 2319 int err;
47379052 2320 unsigned int len;
1da177e4 2321
228e548e 2322 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2323 return -EINVAL;
2324
47379052
AV
2325 len = nargs[call];
2326 if (len > sizeof(a))
2327 return -EINVAL;
2328
1da177e4 2329 /* copy_from_user should be SMP safe. */
47379052 2330 if (copy_from_user(a, args, len))
1da177e4 2331 return -EFAULT;
3ec3b2fb 2332
2950fa9d
CG
2333 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2334 if (err)
2335 return err;
3ec3b2fb 2336
89bddce5
SH
2337 a0 = a[0];
2338 a1 = a[1];
2339
2340 switch (call) {
2341 case SYS_SOCKET:
2342 err = sys_socket(a0, a1, a[2]);
2343 break;
2344 case SYS_BIND:
2345 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2346 break;
2347 case SYS_CONNECT:
2348 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2349 break;
2350 case SYS_LISTEN:
2351 err = sys_listen(a0, a1);
2352 break;
2353 case SYS_ACCEPT:
de11defe
UD
2354 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2355 (int __user *)a[2], 0);
89bddce5
SH
2356 break;
2357 case SYS_GETSOCKNAME:
2358 err =
2359 sys_getsockname(a0, (struct sockaddr __user *)a1,
2360 (int __user *)a[2]);
2361 break;
2362 case SYS_GETPEERNAME:
2363 err =
2364 sys_getpeername(a0, (struct sockaddr __user *)a1,
2365 (int __user *)a[2]);
2366 break;
2367 case SYS_SOCKETPAIR:
2368 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2369 break;
2370 case SYS_SEND:
2371 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2372 break;
2373 case SYS_SENDTO:
2374 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2375 (struct sockaddr __user *)a[4], a[5]);
2376 break;
2377 case SYS_RECV:
2378 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2379 break;
2380 case SYS_RECVFROM:
2381 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2382 (struct sockaddr __user *)a[4],
2383 (int __user *)a[5]);
2384 break;
2385 case SYS_SHUTDOWN:
2386 err = sys_shutdown(a0, a1);
2387 break;
2388 case SYS_SETSOCKOPT:
2389 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2390 break;
2391 case SYS_GETSOCKOPT:
2392 err =
2393 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2394 (int __user *)a[4]);
2395 break;
2396 case SYS_SENDMSG:
666547ff 2397 err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2398 break;
228e548e
AB
2399 case SYS_SENDMMSG:
2400 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2401 break;
89bddce5 2402 case SYS_RECVMSG:
666547ff 2403 err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2404 break;
a2e27255
ACM
2405 case SYS_RECVMMSG:
2406 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2407 (struct timespec __user *)a[4]);
2408 break;
de11defe
UD
2409 case SYS_ACCEPT4:
2410 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2411 (int __user *)a[2], a[3]);
aaca0bdc 2412 break;
89bddce5
SH
2413 default:
2414 err = -EINVAL;
2415 break;
1da177e4
LT
2416 }
2417 return err;
2418}
2419
89bddce5 2420#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2421
55737fda
SH
2422/**
2423 * sock_register - add a socket protocol handler
2424 * @ops: description of protocol
2425 *
1da177e4
LT
2426 * This function is called by a protocol handler that wants to
2427 * advertise its address family, and have it linked into the
e793c0f7 2428 * socket interface. The value ops->family corresponds to the
55737fda 2429 * socket system call protocol family.
1da177e4 2430 */
f0fd27d4 2431int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2432{
2433 int err;
2434
2435 if (ops->family >= NPROTO) {
3410f22e 2436 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2437 return -ENOBUFS;
2438 }
55737fda
SH
2439
2440 spin_lock(&net_family_lock);
190683a9
ED
2441 if (rcu_dereference_protected(net_families[ops->family],
2442 lockdep_is_held(&net_family_lock)))
55737fda
SH
2443 err = -EEXIST;
2444 else {
cf778b00 2445 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2446 err = 0;
2447 }
55737fda
SH
2448 spin_unlock(&net_family_lock);
2449
3410f22e 2450 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2451 return err;
2452}
c6d409cf 2453EXPORT_SYMBOL(sock_register);
1da177e4 2454
55737fda
SH
2455/**
2456 * sock_unregister - remove a protocol handler
2457 * @family: protocol family to remove
2458 *
1da177e4
LT
2459 * This function is called by a protocol handler that wants to
2460 * remove its address family, and have it unlinked from the
55737fda
SH
2461 * new socket creation.
2462 *
2463 * If protocol handler is a module, then it can use module reference
2464 * counts to protect against new references. If protocol handler is not
2465 * a module then it needs to provide its own protection in
2466 * the ops->create routine.
1da177e4 2467 */
f0fd27d4 2468void sock_unregister(int family)
1da177e4 2469{
f0fd27d4 2470 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2471
55737fda 2472 spin_lock(&net_family_lock);
a9b3cd7f 2473 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2474 spin_unlock(&net_family_lock);
2475
2476 synchronize_rcu();
2477
3410f22e 2478 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2479}
c6d409cf 2480EXPORT_SYMBOL(sock_unregister);
1da177e4 2481
77d76ea3 2482static int __init sock_init(void)
1da177e4 2483{
b3e19d92 2484 int err;
2ca794e5
EB
2485 /*
2486 * Initialize the network sysctl infrastructure.
2487 */
2488 err = net_sysctl_init();
2489 if (err)
2490 goto out;
b3e19d92 2491
1da177e4 2492 /*
89bddce5 2493 * Initialize skbuff SLAB cache
1da177e4
LT
2494 */
2495 skb_init();
1da177e4
LT
2496
2497 /*
89bddce5 2498 * Initialize the protocols module.
1da177e4
LT
2499 */
2500
2501 init_inodecache();
b3e19d92
NP
2502
2503 err = register_filesystem(&sock_fs_type);
2504 if (err)
2505 goto out_fs;
1da177e4 2506 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2507 if (IS_ERR(sock_mnt)) {
2508 err = PTR_ERR(sock_mnt);
2509 goto out_mount;
2510 }
77d76ea3
AK
2511
2512 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2513 */
2514
2515#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2516 err = netfilter_init();
2517 if (err)
2518 goto out;
1da177e4 2519#endif
cbeb321a 2520
408eccce 2521 ptp_classifier_init();
c1f19b51 2522
b3e19d92
NP
2523out:
2524 return err;
2525
2526out_mount:
2527 unregister_filesystem(&sock_fs_type);
2528out_fs:
2529 goto out;
1da177e4
LT
2530}
2531
77d76ea3
AK
2532core_initcall(sock_init); /* early initcall */
2533
1da177e4
LT
2534#ifdef CONFIG_PROC_FS
2535void socket_seq_show(struct seq_file *seq)
2536{
2537 int cpu;
2538 int counter = 0;
2539
6f912042 2540 for_each_possible_cpu(cpu)
89bddce5 2541 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2542
2543 /* It can be negative, by the way. 8) */
2544 if (counter < 0)
2545 counter = 0;
2546
2547 seq_printf(seq, "sockets: used %d\n", counter);
2548}
89bddce5 2549#endif /* CONFIG_PROC_FS */
1da177e4 2550
89bbfc95 2551#ifdef CONFIG_COMPAT
6b96018b 2552static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2553 unsigned int cmd, void __user *up)
7a229387 2554{
7a229387
AB
2555 mm_segment_t old_fs = get_fs();
2556 struct timeval ktv;
2557 int err;
2558
2559 set_fs(KERNEL_DS);
6b96018b 2560 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2561 set_fs(old_fs);
644595f8 2562 if (!err)
ed6fe9d6 2563 err = compat_put_timeval(&ktv, up);
644595f8 2564
7a229387
AB
2565 return err;
2566}
2567
6b96018b 2568static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2569 unsigned int cmd, void __user *up)
7a229387 2570{
7a229387
AB
2571 mm_segment_t old_fs = get_fs();
2572 struct timespec kts;
2573 int err;
2574
2575 set_fs(KERNEL_DS);
6b96018b 2576 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2577 set_fs(old_fs);
644595f8 2578 if (!err)
ed6fe9d6 2579 err = compat_put_timespec(&kts, up);
644595f8 2580
7a229387
AB
2581 return err;
2582}
2583
6b96018b 2584static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2585{
2586 struct ifreq __user *uifr;
2587 int err;
2588
2589 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2590 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2591 return -EFAULT;
2592
6b96018b 2593 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2594 if (err)
2595 return err;
2596
6b96018b 2597 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2598 return -EFAULT;
2599
2600 return 0;
2601}
2602
6b96018b 2603static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2604{
6b96018b 2605 struct compat_ifconf ifc32;
7a229387
AB
2606 struct ifconf ifc;
2607 struct ifconf __user *uifc;
6b96018b 2608 struct compat_ifreq __user *ifr32;
7a229387
AB
2609 struct ifreq __user *ifr;
2610 unsigned int i, j;
2611 int err;
2612
6b96018b 2613 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2614 return -EFAULT;
2615
43da5f2e 2616 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2617 if (ifc32.ifcbuf == 0) {
2618 ifc32.ifc_len = 0;
2619 ifc.ifc_len = 0;
2620 ifc.ifc_req = NULL;
2621 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2622 } else {
c6d409cf
ED
2623 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2624 sizeof(struct ifreq);
7a229387
AB
2625 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2626 ifc.ifc_len = len;
2627 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2628 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2629 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2630 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2631 return -EFAULT;
2632 ifr++;
2633 ifr32++;
2634 }
2635 }
2636 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2637 return -EFAULT;
2638
6b96018b 2639 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2640 if (err)
2641 return err;
2642
2643 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2644 return -EFAULT;
2645
2646 ifr = ifc.ifc_req;
2647 ifr32 = compat_ptr(ifc32.ifcbuf);
2648 for (i = 0, j = 0;
c6d409cf
ED
2649 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2650 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2651 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2652 return -EFAULT;
2653 ifr32++;
2654 ifr++;
2655 }
2656
2657 if (ifc32.ifcbuf == 0) {
2658 /* Translate from 64-bit structure multiple to
2659 * a 32-bit one.
2660 */
2661 i = ifc.ifc_len;
6b96018b 2662 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2663 ifc32.ifc_len = i;
2664 } else {
2665 ifc32.ifc_len = i;
2666 }
6b96018b 2667 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2668 return -EFAULT;
2669
2670 return 0;
2671}
2672
6b96018b 2673static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2674{
3a7da39d
BH
2675 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2676 bool convert_in = false, convert_out = false;
2677 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2678 struct ethtool_rxnfc __user *rxnfc;
7a229387 2679 struct ifreq __user *ifr;
3a7da39d
BH
2680 u32 rule_cnt = 0, actual_rule_cnt;
2681 u32 ethcmd;
7a229387 2682 u32 data;
3a7da39d 2683 int ret;
7a229387 2684
3a7da39d
BH
2685 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2686 return -EFAULT;
7a229387 2687
3a7da39d
BH
2688 compat_rxnfc = compat_ptr(data);
2689
2690 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2691 return -EFAULT;
2692
3a7da39d
BH
2693 /* Most ethtool structures are defined without padding.
2694 * Unfortunately struct ethtool_rxnfc is an exception.
2695 */
2696 switch (ethcmd) {
2697 default:
2698 break;
2699 case ETHTOOL_GRXCLSRLALL:
2700 /* Buffer size is variable */
2701 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2702 return -EFAULT;
2703 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2704 return -ENOMEM;
2705 buf_size += rule_cnt * sizeof(u32);
2706 /* fall through */
2707 case ETHTOOL_GRXRINGS:
2708 case ETHTOOL_GRXCLSRLCNT:
2709 case ETHTOOL_GRXCLSRULE:
55664f32 2710 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2711 convert_out = true;
2712 /* fall through */
2713 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2714 buf_size += sizeof(struct ethtool_rxnfc);
2715 convert_in = true;
2716 break;
2717 }
2718
2719 ifr = compat_alloc_user_space(buf_size);
954b1244 2720 rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
3a7da39d
BH
2721
2722 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2723 return -EFAULT;
2724
3a7da39d
BH
2725 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2726 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2727 return -EFAULT;
2728
3a7da39d 2729 if (convert_in) {
127fe533 2730 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2731 * fs.ring_cookie and at the end of fs, but nowhere else.
2732 */
127fe533
AD
2733 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2734 sizeof(compat_rxnfc->fs.m_ext) !=
2735 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2736 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2737 BUILD_BUG_ON(
2738 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2739 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2740 offsetof(struct ethtool_rxnfc, fs.location) -
2741 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2742
2743 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2744 (void __user *)(&rxnfc->fs.m_ext + 1) -
2745 (void __user *)rxnfc) ||
3a7da39d
BH
2746 copy_in_user(&rxnfc->fs.ring_cookie,
2747 &compat_rxnfc->fs.ring_cookie,
954b1244
SH
2748 (void __user *)(&rxnfc->fs.location + 1) -
2749 (void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2750 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2751 sizeof(rxnfc->rule_cnt)))
2752 return -EFAULT;
2753 }
2754
2755 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2756 if (ret)
2757 return ret;
2758
2759 if (convert_out) {
2760 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2761 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2762 (const void __user *)rxnfc) ||
3a7da39d
BH
2763 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2764 &rxnfc->fs.ring_cookie,
954b1244
SH
2765 (const void __user *)(&rxnfc->fs.location + 1) -
2766 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2767 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2768 sizeof(rxnfc->rule_cnt)))
2769 return -EFAULT;
2770
2771 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2772 /* As an optimisation, we only copy the actual
2773 * number of rules that the underlying
2774 * function returned. Since Mallory might
2775 * change the rule count in user memory, we
2776 * check that it is less than the rule count
2777 * originally given (as the user buffer size),
2778 * which has been range-checked.
2779 */
2780 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2781 return -EFAULT;
2782 if (actual_rule_cnt < rule_cnt)
2783 rule_cnt = actual_rule_cnt;
2784 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2785 &rxnfc->rule_locs[0],
2786 rule_cnt * sizeof(u32)))
2787 return -EFAULT;
2788 }
2789 }
2790
2791 return 0;
7a229387
AB
2792}
2793
7a50a240
AB
2794static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2795{
2796 void __user *uptr;
2797 compat_uptr_t uptr32;
2798 struct ifreq __user *uifr;
2799
c6d409cf 2800 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2801 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2802 return -EFAULT;
2803
2804 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2805 return -EFAULT;
2806
2807 uptr = compat_ptr(uptr32);
2808
2809 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2810 return -EFAULT;
2811
2812 return dev_ioctl(net, SIOCWANDEV, uifr);
2813}
2814
6b96018b
AB
2815static int bond_ioctl(struct net *net, unsigned int cmd,
2816 struct compat_ifreq __user *ifr32)
7a229387
AB
2817{
2818 struct ifreq kifr;
7a229387
AB
2819 mm_segment_t old_fs;
2820 int err;
7a229387
AB
2821
2822 switch (cmd) {
2823 case SIOCBONDENSLAVE:
2824 case SIOCBONDRELEASE:
2825 case SIOCBONDSETHWADDR:
2826 case SIOCBONDCHANGEACTIVE:
6b96018b 2827 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2828 return -EFAULT;
2829
2830 old_fs = get_fs();
c6d409cf 2831 set_fs(KERNEL_DS);
c3f52ae6 2832 err = dev_ioctl(net, cmd,
2833 (struct ifreq __user __force *) &kifr);
c6d409cf 2834 set_fs(old_fs);
7a229387
AB
2835
2836 return err;
7a229387 2837 default:
07d106d0 2838 return -ENOIOCTLCMD;
ccbd6a5a 2839 }
7a229387
AB
2840}
2841
590d4693
BH
2842/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
2843static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 2844 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2845{
2846 struct ifreq __user *u_ifreq64;
7a229387
AB
2847 char tmp_buf[IFNAMSIZ];
2848 void __user *data64;
2849 u32 data32;
2850
2851 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2852 IFNAMSIZ))
2853 return -EFAULT;
417c3522 2854 if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
7a229387
AB
2855 return -EFAULT;
2856 data64 = compat_ptr(data32);
2857
2858 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2859
7a229387
AB
2860 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2861 IFNAMSIZ))
2862 return -EFAULT;
417c3522 2863 if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
7a229387
AB
2864 return -EFAULT;
2865
6b96018b 2866 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2867}
2868
6b96018b
AB
2869static int dev_ifsioc(struct net *net, struct socket *sock,
2870 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 2871{
a2116ed2 2872 struct ifreq __user *uifr;
7a229387
AB
2873 int err;
2874
a2116ed2
AB
2875 uifr = compat_alloc_user_space(sizeof(*uifr));
2876 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2877 return -EFAULT;
2878
2879 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2880
7a229387
AB
2881 if (!err) {
2882 switch (cmd) {
2883 case SIOCGIFFLAGS:
2884 case SIOCGIFMETRIC:
2885 case SIOCGIFMTU:
2886 case SIOCGIFMEM:
2887 case SIOCGIFHWADDR:
2888 case SIOCGIFINDEX:
2889 case SIOCGIFADDR:
2890 case SIOCGIFBRDADDR:
2891 case SIOCGIFDSTADDR:
2892 case SIOCGIFNETMASK:
fab2532b 2893 case SIOCGIFPFLAGS:
7a229387 2894 case SIOCGIFTXQLEN:
fab2532b
AB
2895 case SIOCGMIIPHY:
2896 case SIOCGMIIREG:
a2116ed2 2897 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
2898 err = -EFAULT;
2899 break;
2900 }
2901 }
2902 return err;
2903}
2904
a2116ed2
AB
2905static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2906 struct compat_ifreq __user *uifr32)
2907{
2908 struct ifreq ifr;
2909 struct compat_ifmap __user *uifmap32;
2910 mm_segment_t old_fs;
2911 int err;
2912
2913 uifmap32 = &uifr32->ifr_ifru.ifru_map;
2914 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
2915 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2916 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2917 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2918 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
2919 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
2920 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2921 if (err)
2922 return -EFAULT;
2923
2924 old_fs = get_fs();
c6d409cf 2925 set_fs(KERNEL_DS);
c3f52ae6 2926 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 2927 set_fs(old_fs);
a2116ed2
AB
2928
2929 if (cmd == SIOCGIFMAP && !err) {
2930 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
2931 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2932 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2933 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2934 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
2935 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
2936 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2937 if (err)
2938 err = -EFAULT;
2939 }
2940 return err;
2941}
2942
7a229387 2943struct rtentry32 {
c6d409cf 2944 u32 rt_pad1;
7a229387
AB
2945 struct sockaddr rt_dst; /* target address */
2946 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
2947 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
2948 unsigned short rt_flags;
2949 short rt_pad2;
2950 u32 rt_pad3;
2951 unsigned char rt_tos;
2952 unsigned char rt_class;
2953 short rt_pad4;
2954 short rt_metric; /* +1 for binary compatibility! */
7a229387 2955 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
2956 u32 rt_mtu; /* per route MTU/Window */
2957 u32 rt_window; /* Window clamping */
7a229387
AB
2958 unsigned short rt_irtt; /* Initial RTT */
2959};
2960
2961struct in6_rtmsg32 {
2962 struct in6_addr rtmsg_dst;
2963 struct in6_addr rtmsg_src;
2964 struct in6_addr rtmsg_gateway;
2965 u32 rtmsg_type;
2966 u16 rtmsg_dst_len;
2967 u16 rtmsg_src_len;
2968 u32 rtmsg_metric;
2969 u32 rtmsg_info;
2970 u32 rtmsg_flags;
2971 s32 rtmsg_ifindex;
2972};
2973
6b96018b
AB
2974static int routing_ioctl(struct net *net, struct socket *sock,
2975 unsigned int cmd, void __user *argp)
7a229387
AB
2976{
2977 int ret;
2978 void *r = NULL;
2979 struct in6_rtmsg r6;
2980 struct rtentry r4;
2981 char devname[16];
2982 u32 rtdev;
2983 mm_segment_t old_fs = get_fs();
2984
6b96018b
AB
2985 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
2986 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 2987 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 2988 3 * sizeof(struct in6_addr));
3ddc5b46
MD
2989 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
2990 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
2991 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
2992 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
2993 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
2994 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
2995 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
2996
2997 r = (void *) &r6;
2998 } else { /* ipv4 */
6b96018b 2999 struct rtentry32 __user *ur4 = argp;
c6d409cf 3000 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3001 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3002 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3003 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3004 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3005 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3006 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3007 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3008 if (rtdev) {
c6d409cf 3009 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3010 r4.rt_dev = (char __user __force *)devname;
3011 devname[15] = 0;
7a229387
AB
3012 } else
3013 r4.rt_dev = NULL;
3014
3015 r = (void *) &r4;
3016 }
3017
3018 if (ret) {
3019 ret = -EFAULT;
3020 goto out;
3021 }
3022
c6d409cf 3023 set_fs(KERNEL_DS);
6b96018b 3024 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3025 set_fs(old_fs);
7a229387
AB
3026
3027out:
7a229387
AB
3028 return ret;
3029}
3030
3031/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3032 * for some operations; this forces use of the newer bridge-utils that
25985edc 3033 * use compatible ioctls
7a229387 3034 */
6b96018b 3035static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3036{
6b96018b 3037 compat_ulong_t tmp;
7a229387 3038
6b96018b 3039 if (get_user(tmp, argp))
7a229387
AB
3040 return -EFAULT;
3041 if (tmp == BRCTL_GET_VERSION)
3042 return BRCTL_VERSION + 1;
3043 return -EINVAL;
3044}
3045
6b96018b
AB
3046static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3047 unsigned int cmd, unsigned long arg)
3048{
3049 void __user *argp = compat_ptr(arg);
3050 struct sock *sk = sock->sk;
3051 struct net *net = sock_net(sk);
7a229387 3052
6b96018b 3053 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3054 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3055
3056 switch (cmd) {
3057 case SIOCSIFBR:
3058 case SIOCGIFBR:
3059 return old_bridge_ioctl(argp);
3060 case SIOCGIFNAME:
3061 return dev_ifname32(net, argp);
3062 case SIOCGIFCONF:
3063 return dev_ifconf(net, argp);
3064 case SIOCETHTOOL:
3065 return ethtool_ioctl(net, argp);
7a50a240
AB
3066 case SIOCWANDEV:
3067 return compat_siocwandev(net, argp);
a2116ed2
AB
3068 case SIOCGIFMAP:
3069 case SIOCSIFMAP:
3070 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3071 case SIOCBONDENSLAVE:
3072 case SIOCBONDRELEASE:
3073 case SIOCBONDSETHWADDR:
6b96018b
AB
3074 case SIOCBONDCHANGEACTIVE:
3075 return bond_ioctl(net, cmd, argp);
3076 case SIOCADDRT:
3077 case SIOCDELRT:
3078 return routing_ioctl(net, sock, cmd, argp);
3079 case SIOCGSTAMP:
3080 return do_siocgstamp(net, sock, cmd, argp);
3081 case SIOCGSTAMPNS:
3082 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3083 case SIOCBONDSLAVEINFOQUERY:
3084 case SIOCBONDINFOQUERY:
a2116ed2 3085 case SIOCSHWTSTAMP:
fd468c74 3086 case SIOCGHWTSTAMP:
590d4693 3087 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3088
3089 case FIOSETOWN:
3090 case SIOCSPGRP:
3091 case FIOGETOWN:
3092 case SIOCGPGRP:
3093 case SIOCBRADDBR:
3094 case SIOCBRDELBR:
3095 case SIOCGIFVLAN:
3096 case SIOCSIFVLAN:
3097 case SIOCADDDLCI:
3098 case SIOCDELDLCI:
3099 return sock_ioctl(file, cmd, arg);
3100
3101 case SIOCGIFFLAGS:
3102 case SIOCSIFFLAGS:
3103 case SIOCGIFMETRIC:
3104 case SIOCSIFMETRIC:
3105 case SIOCGIFMTU:
3106 case SIOCSIFMTU:
3107 case SIOCGIFMEM:
3108 case SIOCSIFMEM:
3109 case SIOCGIFHWADDR:
3110 case SIOCSIFHWADDR:
3111 case SIOCADDMULTI:
3112 case SIOCDELMULTI:
3113 case SIOCGIFINDEX:
6b96018b
AB
3114 case SIOCGIFADDR:
3115 case SIOCSIFADDR:
3116 case SIOCSIFHWBROADCAST:
6b96018b 3117 case SIOCDIFADDR:
6b96018b
AB
3118 case SIOCGIFBRDADDR:
3119 case SIOCSIFBRDADDR:
3120 case SIOCGIFDSTADDR:
3121 case SIOCSIFDSTADDR:
3122 case SIOCGIFNETMASK:
3123 case SIOCSIFNETMASK:
3124 case SIOCSIFPFLAGS:
3125 case SIOCGIFPFLAGS:
3126 case SIOCGIFTXQLEN:
3127 case SIOCSIFTXQLEN:
3128 case SIOCBRADDIF:
3129 case SIOCBRDELIF:
9177efd3
AB
3130 case SIOCSIFNAME:
3131 case SIOCGMIIPHY:
3132 case SIOCGMIIREG:
3133 case SIOCSMIIREG:
6b96018b 3134 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3135
6b96018b
AB
3136 case SIOCSARP:
3137 case SIOCGARP:
3138 case SIOCDARP:
6b96018b 3139 case SIOCATMARK:
9177efd3
AB
3140 return sock_do_ioctl(net, sock, cmd, arg);
3141 }
3142
6b96018b
AB
3143 return -ENOIOCTLCMD;
3144}
7a229387 3145
95c96174 3146static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3147 unsigned long arg)
89bbfc95
SP
3148{
3149 struct socket *sock = file->private_data;
3150 int ret = -ENOIOCTLCMD;
87de87d5
DM
3151 struct sock *sk;
3152 struct net *net;
3153
3154 sk = sock->sk;
3155 net = sock_net(sk);
89bbfc95
SP
3156
3157 if (sock->ops->compat_ioctl)
3158 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3159
87de87d5
DM
3160 if (ret == -ENOIOCTLCMD &&
3161 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3162 ret = compat_wext_handle_ioctl(net, cmd, arg);
3163
6b96018b
AB
3164 if (ret == -ENOIOCTLCMD)
3165 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3166
89bbfc95
SP
3167 return ret;
3168}
3169#endif
3170
ac5a488e
SS
3171int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3172{
3173 return sock->ops->bind(sock, addr, addrlen);
3174}
c6d409cf 3175EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3176
3177int kernel_listen(struct socket *sock, int backlog)
3178{
3179 return sock->ops->listen(sock, backlog);
3180}
c6d409cf 3181EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3182
3183int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3184{
3185 struct sock *sk = sock->sk;
3186 int err;
3187
3188 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3189 newsock);
3190 if (err < 0)
3191 goto done;
3192
3193 err = sock->ops->accept(sock, *newsock, flags);
3194 if (err < 0) {
3195 sock_release(*newsock);
fa8705b0 3196 *newsock = NULL;
ac5a488e
SS
3197 goto done;
3198 }
3199
3200 (*newsock)->ops = sock->ops;
1b08534e 3201 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3202
3203done:
3204 return err;
3205}
c6d409cf 3206EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3207
3208int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3209 int flags)
ac5a488e
SS
3210{
3211 return sock->ops->connect(sock, addr, addrlen, flags);
3212}
c6d409cf 3213EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3214
3215int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3216 int *addrlen)
3217{
3218 return sock->ops->getname(sock, addr, addrlen, 0);
3219}
c6d409cf 3220EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3221
3222int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3223 int *addrlen)
3224{
3225 return sock->ops->getname(sock, addr, addrlen, 1);
3226}
c6d409cf 3227EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3228
3229int kernel_getsockopt(struct socket *sock, int level, int optname,
3230 char *optval, int *optlen)
3231{
3232 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3233 char __user *uoptval;
3234 int __user *uoptlen;
ac5a488e
SS
3235 int err;
3236
fb8621bb
NK
3237 uoptval = (char __user __force *) optval;
3238 uoptlen = (int __user __force *) optlen;
3239
ac5a488e
SS
3240 set_fs(KERNEL_DS);
3241 if (level == SOL_SOCKET)
fb8621bb 3242 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3243 else
fb8621bb
NK
3244 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3245 uoptlen);
ac5a488e
SS
3246 set_fs(oldfs);
3247 return err;
3248}
c6d409cf 3249EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3250
3251int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3252 char *optval, unsigned int optlen)
ac5a488e
SS
3253{
3254 mm_segment_t oldfs = get_fs();
fb8621bb 3255 char __user *uoptval;
ac5a488e
SS
3256 int err;
3257
fb8621bb
NK
3258 uoptval = (char __user __force *) optval;
3259
ac5a488e
SS
3260 set_fs(KERNEL_DS);
3261 if (level == SOL_SOCKET)
fb8621bb 3262 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3263 else
fb8621bb 3264 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3265 optlen);
3266 set_fs(oldfs);
3267 return err;
3268}
c6d409cf 3269EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3270
3271int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3272 size_t size, int flags)
3273{
3274 if (sock->ops->sendpage)
3275 return sock->ops->sendpage(sock, page, offset, size, flags);
3276
3277 return sock_no_sendpage(sock, page, offset, size, flags);
3278}
c6d409cf 3279EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3280
3281int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3282{
3283 mm_segment_t oldfs = get_fs();
3284 int err;
3285
3286 set_fs(KERNEL_DS);
3287 err = sock->ops->ioctl(sock, cmd, arg);
3288 set_fs(oldfs);
3289
3290 return err;
3291}
c6d409cf 3292EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3293
91cf45f0
TM
3294int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3295{
3296 return sock->ops->shutdown(sock, how);
3297}
91cf45f0 3298EXPORT_SYMBOL(kernel_sock_shutdown);