PCI: Tolerate hierarchies with no Root Port
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
1da177e4
LT
92
93#include <asm/uaccess.h>
94#include <asm/unistd.h>
95
96#include <net/compat.h>
87de87d5 97#include <net/wext.h>
f8451725 98#include <net/cls_cgroup.h>
1da177e4
LT
99
100#include <net/sock.h>
101#include <linux/netfilter.h>
102
6b96018b
AB
103#include <linux/if_tun.h>
104#include <linux/ipv6_route.h>
105#include <linux/route.h>
6b96018b
AB
106#include <linux/sockios.h>
107#include <linux/atalk.h>
076bb0c8 108#include <net/busy_poll.h>
f24b9be5 109#include <linux/errqueue.h>
06021292 110
e0d1095a 111#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
112unsigned int sysctl_net_busy_read __read_mostly;
113unsigned int sysctl_net_busy_poll __read_mostly;
06021292 114#endif
6b96018b 115
8ae5e030
AV
116static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
117static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 118static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
119
120static int sock_close(struct inode *inode, struct file *file);
121static unsigned int sock_poll(struct file *file,
122 struct poll_table_struct *wait);
89bddce5 123static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
124#ifdef CONFIG_COMPAT
125static long compat_sock_ioctl(struct file *file,
89bddce5 126 unsigned int cmd, unsigned long arg);
89bbfc95 127#endif
1da177e4 128static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
129static ssize_t sock_sendpage(struct file *file, struct page *page,
130 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 132 struct pipe_inode_info *pipe, size_t len,
9c55e01c 133 unsigned int flags);
1da177e4 134
1da177e4
LT
135/*
136 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
137 * in the operation structures but are done directly via the socketcall() multiplexor.
138 */
139
da7071d7 140static const struct file_operations socket_file_ops = {
1da177e4
LT
141 .owner = THIS_MODULE,
142 .llseek = no_llseek,
8ae5e030
AV
143 .read_iter = sock_read_iter,
144 .write_iter = sock_write_iter,
1da177e4
LT
145 .poll = sock_poll,
146 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
147#ifdef CONFIG_COMPAT
148 .compat_ioctl = compat_sock_ioctl,
149#endif
1da177e4 150 .mmap = sock_mmap,
1da177e4
LT
151 .release = sock_close,
152 .fasync = sock_fasync,
5274f052
JA
153 .sendpage = sock_sendpage,
154 .splice_write = generic_splice_sendpage,
9c55e01c 155 .splice_read = sock_splice_read,
1da177e4
LT
156};
157
158/*
159 * The protocol list. Each protocol is registered in here.
160 */
161
1da177e4 162static DEFINE_SPINLOCK(net_family_lock);
190683a9 163static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 164
1da177e4
LT
165/*
166 * Statistics counters of the socket lists
167 */
168
c6d409cf 169static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
170
171/*
89bddce5
SH
172 * Support routines.
173 * Move socket addresses back and forth across the kernel/user
174 * divide and look after the messy bits.
1da177e4
LT
175 */
176
1da177e4
LT
177/**
178 * move_addr_to_kernel - copy a socket address into kernel space
179 * @uaddr: Address in user space
180 * @kaddr: Address in kernel space
181 * @ulen: Length in user space
182 *
183 * The address is copied into kernel space. If the provided address is
184 * too long an error code of -EINVAL is returned. If the copy gives
185 * invalid addresses -EFAULT is returned. On a success 0 is returned.
186 */
187
43db362d 188int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 189{
230b1839 190 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 191 return -EINVAL;
89bddce5 192 if (ulen == 0)
1da177e4 193 return 0;
89bddce5 194 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 195 return -EFAULT;
3ec3b2fb 196 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
197}
198
199/**
200 * move_addr_to_user - copy an address to user space
201 * @kaddr: kernel space address
202 * @klen: length of address in kernel
203 * @uaddr: user space address
204 * @ulen: pointer to user length field
205 *
206 * The value pointed to by ulen on entry is the buffer length available.
207 * This is overwritten with the buffer space used. -EINVAL is returned
208 * if an overlong buffer is specified or a negative buffer size. -EFAULT
209 * is returned if either the buffer or the length field are not
210 * accessible.
211 * After copying the data up to the limit the user specifies, the true
212 * length of the data is written over the length limit the user
213 * specified. Zero is returned for a success.
214 */
89bddce5 215
43db362d 216static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 217 void __user *uaddr, int __user *ulen)
1da177e4
LT
218{
219 int err;
220 int len;
221
68c6beb3 222 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
223 err = get_user(len, ulen);
224 if (err)
1da177e4 225 return err;
89bddce5
SH
226 if (len > klen)
227 len = klen;
68c6beb3 228 if (len < 0)
1da177e4 229 return -EINVAL;
89bddce5 230 if (len) {
d6fe3945
SG
231 if (audit_sockaddr(klen, kaddr))
232 return -ENOMEM;
89bddce5 233 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
234 return -EFAULT;
235 }
236 /*
89bddce5
SH
237 * "fromlen shall refer to the value before truncation.."
238 * 1003.1g
1da177e4
LT
239 */
240 return __put_user(klen, ulen);
241}
242
e18b890b 243static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
244
245static struct inode *sock_alloc_inode(struct super_block *sb)
246{
247 struct socket_alloc *ei;
eaefd110 248 struct socket_wq *wq;
89bddce5 249
e94b1766 250 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
251 if (!ei)
252 return NULL;
eaefd110
ED
253 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
254 if (!wq) {
43815482
ED
255 kmem_cache_free(sock_inode_cachep, ei);
256 return NULL;
257 }
eaefd110
ED
258 init_waitqueue_head(&wq->wait);
259 wq->fasync_list = NULL;
260 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 261
1da177e4
LT
262 ei->socket.state = SS_UNCONNECTED;
263 ei->socket.flags = 0;
264 ei->socket.ops = NULL;
265 ei->socket.sk = NULL;
266 ei->socket.file = NULL;
1da177e4
LT
267
268 return &ei->vfs_inode;
269}
270
271static void sock_destroy_inode(struct inode *inode)
272{
43815482 273 struct socket_alloc *ei;
eaefd110 274 struct socket_wq *wq;
43815482
ED
275
276 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 277 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 278 kfree_rcu(wq, rcu);
43815482 279 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
280}
281
51cc5068 282static void init_once(void *foo)
1da177e4 283{
89bddce5 284 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 285
a35afb83 286 inode_init_once(&ei->vfs_inode);
1da177e4 287}
89bddce5 288
1da177e4
LT
289static int init_inodecache(void)
290{
291 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
292 sizeof(struct socket_alloc),
293 0,
294 (SLAB_HWCACHE_ALIGN |
295 SLAB_RECLAIM_ACCOUNT |
296 SLAB_MEM_SPREAD),
20c2df83 297 init_once);
1da177e4
LT
298 if (sock_inode_cachep == NULL)
299 return -ENOMEM;
300 return 0;
301}
302
b87221de 303static const struct super_operations sockfs_ops = {
c6d409cf
ED
304 .alloc_inode = sock_alloc_inode,
305 .destroy_inode = sock_destroy_inode,
306 .statfs = simple_statfs,
1da177e4
LT
307};
308
c23fbb6b
ED
309/*
310 * sockfs_dname() is called from d_path().
311 */
312static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
313{
314 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 315 d_inode(dentry)->i_ino);
c23fbb6b
ED
316}
317
3ba13d17 318static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 319 .d_dname = sockfs_dname,
1da177e4
LT
320};
321
c74a1cbb
AV
322static struct dentry *sockfs_mount(struct file_system_type *fs_type,
323 int flags, const char *dev_name, void *data)
324{
325 return mount_pseudo(fs_type, "socket:", &sockfs_ops,
326 &sockfs_dentry_operations, SOCKFS_MAGIC);
327}
328
329static struct vfsmount *sock_mnt __read_mostly;
330
331static struct file_system_type sock_fs_type = {
332 .name = "sockfs",
333 .mount = sockfs_mount,
334 .kill_sb = kill_anon_super,
335};
336
1da177e4
LT
337/*
338 * Obtains the first available file descriptor and sets it up for use.
339 *
39d8c1b6
DM
340 * These functions create file structures and maps them to fd space
341 * of the current process. On success it returns file descriptor
1da177e4
LT
342 * and file struct implicitly stored in sock->file.
343 * Note that another thread may close file descriptor before we return
344 * from this function. We use the fact that now we do not refer
345 * to socket after mapping. If one day we will need it, this
346 * function will increment ref. count on file by 1.
347 *
348 * In any case returned fd MAY BE not valid!
349 * This race condition is unavoidable
350 * with shared fd spaces, we cannot solve it inside kernel,
351 * but we take care of internal coherence yet.
352 */
353
aab174f0 354struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 355{
7cbe66b6 356 struct qstr name = { .name = "" };
2c48b9c4 357 struct path path;
7cbe66b6 358 struct file *file;
1da177e4 359
600e1779
MY
360 if (dname) {
361 name.name = dname;
362 name.len = strlen(name.name);
363 } else if (sock->sk) {
364 name.name = sock->sk->sk_prot_creator->name;
365 name.len = strlen(name.name);
366 }
4b936885 367 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
28407630
AV
368 if (unlikely(!path.dentry))
369 return ERR_PTR(-ENOMEM);
2c48b9c4 370 path.mnt = mntget(sock_mnt);
39d8c1b6 371
2c48b9c4 372 d_instantiate(path.dentry, SOCK_INODE(sock));
39d8c1b6 373
2c48b9c4 374 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 375 &socket_file_ops);
39b65252 376 if (unlikely(IS_ERR(file))) {
cc3808f8 377 /* drop dentry, keep inode */
c5ef6035 378 ihold(d_inode(path.dentry));
2c48b9c4 379 path_put(&path);
39b65252 380 return file;
cc3808f8
AV
381 }
382
383 sock->file = file;
77d27200 384 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 385 file->private_data = sock;
28407630 386 return file;
39d8c1b6 387}
56b31d1c 388EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 389
56b31d1c 390static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
391{
392 struct file *newfile;
28407630
AV
393 int fd = get_unused_fd_flags(flags);
394 if (unlikely(fd < 0))
395 return fd;
39d8c1b6 396
aab174f0 397 newfile = sock_alloc_file(sock, flags, NULL);
28407630 398 if (likely(!IS_ERR(newfile))) {
39d8c1b6 399 fd_install(fd, newfile);
28407630
AV
400 return fd;
401 }
7cbe66b6 402
28407630
AV
403 put_unused_fd(fd);
404 return PTR_ERR(newfile);
1da177e4
LT
405}
406
406a3c63 407struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 408{
6cb153ca
BL
409 if (file->f_op == &socket_file_ops)
410 return file->private_data; /* set in sock_map_fd */
411
23bb80d2
ED
412 *err = -ENOTSOCK;
413 return NULL;
6cb153ca 414}
406a3c63 415EXPORT_SYMBOL(sock_from_file);
6cb153ca 416
1da177e4 417/**
c6d409cf 418 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
419 * @fd: file handle
420 * @err: pointer to an error code return
421 *
422 * The file handle passed in is locked and the socket it is bound
423 * too is returned. If an error occurs the err pointer is overwritten
424 * with a negative errno code and NULL is returned. The function checks
425 * for both invalid handles and passing a handle which is not a socket.
426 *
427 * On a success the socket object pointer is returned.
428 */
429
430struct socket *sockfd_lookup(int fd, int *err)
431{
432 struct file *file;
1da177e4
LT
433 struct socket *sock;
434
89bddce5
SH
435 file = fget(fd);
436 if (!file) {
1da177e4
LT
437 *err = -EBADF;
438 return NULL;
439 }
89bddce5 440
6cb153ca
BL
441 sock = sock_from_file(file, err);
442 if (!sock)
1da177e4 443 fput(file);
6cb153ca
BL
444 return sock;
445}
c6d409cf 446EXPORT_SYMBOL(sockfd_lookup);
1da177e4 447
6cb153ca
BL
448static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
449{
00e188ef 450 struct fd f = fdget(fd);
6cb153ca
BL
451 struct socket *sock;
452
3672558c 453 *err = -EBADF;
00e188ef
AV
454 if (f.file) {
455 sock = sock_from_file(f.file, err);
456 if (likely(sock)) {
457 *fput_needed = f.flags;
6cb153ca 458 return sock;
00e188ef
AV
459 }
460 fdput(f);
1da177e4 461 }
6cb153ca 462 return NULL;
1da177e4
LT
463}
464
600e1779
MY
465#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
466#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
467#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
468static ssize_t sockfs_getxattr(struct dentry *dentry,
469 const char *name, void *value, size_t size)
470{
471 const char *proto_name;
472 size_t proto_size;
473 int error;
474
475 error = -ENODATA;
476 if (!strncmp(name, XATTR_NAME_SOCKPROTONAME, XATTR_NAME_SOCKPROTONAME_LEN)) {
477 proto_name = dentry->d_name.name;
478 proto_size = strlen(proto_name);
479
480 if (value) {
481 error = -ERANGE;
482 if (proto_size + 1 > size)
483 goto out;
484
485 strncpy(value, proto_name, proto_size + 1);
486 }
487 error = proto_size + 1;
488 }
489
490out:
491 return error;
492}
493
494static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
495 size_t size)
496{
497 ssize_t len;
498 ssize_t used = 0;
499
c5ef6035 500 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
501 if (len < 0)
502 return len;
503 used += len;
504 if (buffer) {
505 if (size < used)
506 return -ERANGE;
507 buffer += len;
508 }
509
510 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
511 used += len;
512 if (buffer) {
513 if (size < used)
514 return -ERANGE;
515 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
516 buffer += len;
517 }
518
519 return used;
520}
521
522static const struct inode_operations sockfs_inode_ops = {
523 .getxattr = sockfs_getxattr,
524 .listxattr = sockfs_listxattr,
525};
526
1da177e4
LT
527/**
528 * sock_alloc - allocate a socket
89bddce5 529 *
1da177e4
LT
530 * Allocate a new inode and socket object. The two are bound together
531 * and initialised. The socket is then returned. If we are out of inodes
532 * NULL is returned.
533 */
534
535static struct socket *sock_alloc(void)
536{
89bddce5
SH
537 struct inode *inode;
538 struct socket *sock;
1da177e4 539
a209dfc7 540 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
541 if (!inode)
542 return NULL;
543
544 sock = SOCKET_I(inode);
545
29a020d3 546 kmemcheck_annotate_bitfield(sock, type);
85fe4025 547 inode->i_ino = get_next_ino();
89bddce5 548 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
549 inode->i_uid = current_fsuid();
550 inode->i_gid = current_fsgid();
600e1779 551 inode->i_op = &sockfs_inode_ops;
1da177e4 552
19e8d69c 553 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
554 return sock;
555}
556
1da177e4
LT
557/**
558 * sock_release - close a socket
559 * @sock: socket to close
560 *
561 * The socket is released from the protocol stack if it has a release
562 * callback, and the inode is then released if the socket is bound to
89bddce5 563 * an inode not a file.
1da177e4 564 */
89bddce5 565
1da177e4
LT
566void sock_release(struct socket *sock)
567{
568 if (sock->ops) {
569 struct module *owner = sock->ops->owner;
570
571 sock->ops->release(sock);
572 sock->ops = NULL;
573 module_put(owner);
574 }
575
eaefd110 576 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
3410f22e 577 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 578
19e8d69c 579 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
580 if (!sock->file) {
581 iput(SOCK_INODE(sock));
582 return;
583 }
89bddce5 584 sock->file = NULL;
1da177e4 585}
c6d409cf 586EXPORT_SYMBOL(sock_release);
1da177e4 587
67cc0d40 588void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags)
20d49473 589{
140c55d4
ED
590 u8 flags = *tx_flags;
591
b9f40e21 592 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
593 flags |= SKBTX_HW_TSTAMP;
594
b9f40e21 595 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
596 flags |= SKBTX_SW_TSTAMP;
597
e7fd2885 598 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
599 flags |= SKBTX_SCHED_TSTAMP;
600
e1c8a607 601 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)
140c55d4 602 flags |= SKBTX_ACK_TSTAMP;
e7fd2885 603
140c55d4 604 *tx_flags = flags;
20d49473 605}
67cc0d40 606EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 607
d8725c86 608static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 609{
01e97e65 610 int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
d8725c86
AV
611 BUG_ON(ret == -EIOCBQUEUED);
612 return ret;
1da177e4
LT
613}
614
d8725c86 615int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 616{
d8725c86 617 int err = security_socket_sendmsg(sock, msg,
01e97e65 618 msg_data_left(msg));
228e548e 619
d8725c86 620 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 621}
c6d409cf 622EXPORT_SYMBOL(sock_sendmsg);
1da177e4
LT
623
624int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
625 struct kvec *vec, size_t num, size_t size)
626{
6aa24814 627 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
d8725c86 628 return sock_sendmsg(sock, msg);
1da177e4 629}
c6d409cf 630EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 631
92f37fd2
ED
632/*
633 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
634 */
635void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
636 struct sk_buff *skb)
637{
20d49473 638 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
f24b9be5 639 struct scm_timestamping tss;
20d49473
PO
640 int empty = 1;
641 struct skb_shared_hwtstamps *shhwtstamps =
642 skb_hwtstamps(skb);
643
644 /* Race occurred between timestamp enabling and packet
645 receiving. Fill in the current time for now. */
646 if (need_software_tstamp && skb->tstamp.tv64 == 0)
647 __net_timestamp(skb);
648
649 if (need_software_tstamp) {
650 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
651 struct timeval tv;
652 skb_get_timestamp(skb, &tv);
653 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
654 sizeof(tv), &tv);
655 } else {
f24b9be5
WB
656 struct timespec ts;
657 skb_get_timestampns(skb, &ts);
20d49473 658 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
f24b9be5 659 sizeof(ts), &ts);
20d49473
PO
660 }
661 }
662
f24b9be5 663 memset(&tss, 0, sizeof(tss));
c199105d 664 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
f24b9be5 665 ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
20d49473 666 empty = 0;
4d276eb6 667 if (shhwtstamps &&
b9f40e21 668 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
f24b9be5 669 ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
4d276eb6 670 empty = 0;
20d49473
PO
671 if (!empty)
672 put_cmsg(msg, SOL_SOCKET,
f24b9be5 673 SCM_TIMESTAMPING, sizeof(tss), &tss);
92f37fd2 674}
7c81fd8b
ACM
675EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
676
6e3e939f
JB
677void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
678 struct sk_buff *skb)
679{
680 int ack;
681
682 if (!sock_flag(sk, SOCK_WIFI_STATUS))
683 return;
684 if (!skb->wifi_acked_valid)
685 return;
686
687 ack = skb->wifi_acked;
688
689 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
690}
691EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
692
11165f14 693static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
694 struct sk_buff *skb)
3b885787 695{
744d5a3e 696 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 697 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 698 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
699}
700
767dd033 701void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
702 struct sk_buff *skb)
703{
704 sock_recv_timestamp(msg, sk, skb);
705 sock_recv_drops(msg, sk, skb);
706}
767dd033 707EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 708
1b784140
YX
709static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
710 size_t size, int flags)
1da177e4 711{
1b784140 712 return sock->ops->recvmsg(sock, msg, size, flags);
1da177e4
LT
713}
714
1b784140
YX
715int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
716 int flags)
a2e27255
ACM
717{
718 int err = security_socket_recvmsg(sock, msg, size, flags);
719
1b784140 720 return err ?: sock_recvmsg_nosec(sock, msg, size, flags);
1da177e4 721}
c6d409cf 722EXPORT_SYMBOL(sock_recvmsg);
1da177e4 723
c1249c0a
ML
724/**
725 * kernel_recvmsg - Receive a message from a socket (kernel space)
726 * @sock: The socket to receive the message from
727 * @msg: Received message
728 * @vec: Input s/g array for message data
729 * @num: Size of input s/g array
730 * @size: Number of bytes to read
731 * @flags: Message flags (MSG_DONTWAIT, etc...)
732 *
733 * On return the msg structure contains the scatter/gather array passed in the
734 * vec argument. The array is modified so that it consists of the unfilled
735 * portion of the original array.
736 *
737 * The returned value is the total number of bytes received, or an error.
738 */
89bddce5
SH
739int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
740 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
741{
742 mm_segment_t oldfs = get_fs();
743 int result;
744
6aa24814 745 iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
1da177e4 746 set_fs(KERNEL_DS);
1da177e4
LT
747 result = sock_recvmsg(sock, msg, size, flags);
748 set_fs(oldfs);
749 return result;
750}
c6d409cf 751EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 752
ce1d4d3e
CH
753static ssize_t sock_sendpage(struct file *file, struct page *page,
754 int offset, size_t size, loff_t *ppos, int more)
1da177e4 755{
1da177e4
LT
756 struct socket *sock;
757 int flags;
758
ce1d4d3e
CH
759 sock = file->private_data;
760
35f9c09f
ED
761 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
762 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
763 flags |= more;
ce1d4d3e 764
e6949583 765 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 766}
1da177e4 767
9c55e01c 768static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 769 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
770 unsigned int flags)
771{
772 struct socket *sock = file->private_data;
773
997b37da
RDC
774 if (unlikely(!sock->ops->splice_read))
775 return -EINVAL;
776
9c55e01c
JA
777 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
778}
779
8ae5e030 780static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 781{
6d652330
AV
782 struct file *file = iocb->ki_filp;
783 struct socket *sock = file->private_data;
0345f931 784 struct msghdr msg = {.msg_iter = *to,
785 .msg_iocb = iocb};
8ae5e030 786 ssize_t res;
ce1d4d3e 787
8ae5e030
AV
788 if (file->f_flags & O_NONBLOCK)
789 msg.msg_flags = MSG_DONTWAIT;
790
791 if (iocb->ki_pos != 0)
1da177e4 792 return -ESPIPE;
027445c3 793
66ee59af 794 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
795 return 0;
796
237dae88 797 res = sock_recvmsg(sock, &msg, iov_iter_count(to), msg.msg_flags);
8ae5e030
AV
798 *to = msg.msg_iter;
799 return res;
1da177e4
LT
800}
801
8ae5e030 802static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 803{
6d652330
AV
804 struct file *file = iocb->ki_filp;
805 struct socket *sock = file->private_data;
0345f931 806 struct msghdr msg = {.msg_iter = *from,
807 .msg_iocb = iocb};
8ae5e030 808 ssize_t res;
1da177e4 809
8ae5e030 810 if (iocb->ki_pos != 0)
ce1d4d3e 811 return -ESPIPE;
027445c3 812
8ae5e030
AV
813 if (file->f_flags & O_NONBLOCK)
814 msg.msg_flags = MSG_DONTWAIT;
815
6d652330
AV
816 if (sock->type == SOCK_SEQPACKET)
817 msg.msg_flags |= MSG_EOR;
818
d8725c86 819 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
820 *from = msg.msg_iter;
821 return res;
1da177e4
LT
822}
823
1da177e4
LT
824/*
825 * Atomic setting of ioctl hooks to avoid race
826 * with module unload.
827 */
828
4a3e2f71 829static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 830static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 831
881d966b 832void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 833{
4a3e2f71 834 mutex_lock(&br_ioctl_mutex);
1da177e4 835 br_ioctl_hook = hook;
4a3e2f71 836 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
837}
838EXPORT_SYMBOL(brioctl_set);
839
4a3e2f71 840static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 841static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 842
881d966b 843void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 844{
4a3e2f71 845 mutex_lock(&vlan_ioctl_mutex);
1da177e4 846 vlan_ioctl_hook = hook;
4a3e2f71 847 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
848}
849EXPORT_SYMBOL(vlan_ioctl_set);
850
4a3e2f71 851static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 852static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 853
89bddce5 854void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 855{
4a3e2f71 856 mutex_lock(&dlci_ioctl_mutex);
1da177e4 857 dlci_ioctl_hook = hook;
4a3e2f71 858 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
859}
860EXPORT_SYMBOL(dlci_ioctl_set);
861
6b96018b
AB
862static long sock_do_ioctl(struct net *net, struct socket *sock,
863 unsigned int cmd, unsigned long arg)
864{
865 int err;
866 void __user *argp = (void __user *)arg;
867
868 err = sock->ops->ioctl(sock, cmd, arg);
869
870 /*
871 * If this ioctl is unknown try to hand it down
872 * to the NIC driver.
873 */
874 if (err == -ENOIOCTLCMD)
875 err = dev_ioctl(net, cmd, argp);
876
877 return err;
878}
879
1da177e4
LT
880/*
881 * With an ioctl, arg may well be a user mode pointer, but we don't know
882 * what to do with it - that's up to the protocol still.
883 */
884
885static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
886{
887 struct socket *sock;
881d966b 888 struct sock *sk;
1da177e4
LT
889 void __user *argp = (void __user *)arg;
890 int pid, err;
881d966b 891 struct net *net;
1da177e4 892
b69aee04 893 sock = file->private_data;
881d966b 894 sk = sock->sk;
3b1e0a65 895 net = sock_net(sk);
1da177e4 896 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 897 err = dev_ioctl(net, cmd, argp);
1da177e4 898 } else
3d23e349 899#ifdef CONFIG_WEXT_CORE
1da177e4 900 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 901 err = dev_ioctl(net, cmd, argp);
1da177e4 902 } else
3d23e349 903#endif
89bddce5 904 switch (cmd) {
1da177e4
LT
905 case FIOSETOWN:
906 case SIOCSPGRP:
907 err = -EFAULT;
908 if (get_user(pid, (int __user *)argp))
909 break;
e0b93edd
JL
910 f_setown(sock->file, pid, 1);
911 err = 0;
1da177e4
LT
912 break;
913 case FIOGETOWN:
914 case SIOCGPGRP:
609d7fa9 915 err = put_user(f_getown(sock->file),
89bddce5 916 (int __user *)argp);
1da177e4
LT
917 break;
918 case SIOCGIFBR:
919 case SIOCSIFBR:
920 case SIOCBRADDBR:
921 case SIOCBRDELBR:
922 err = -ENOPKG;
923 if (!br_ioctl_hook)
924 request_module("bridge");
925
4a3e2f71 926 mutex_lock(&br_ioctl_mutex);
89bddce5 927 if (br_ioctl_hook)
881d966b 928 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 929 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
930 break;
931 case SIOCGIFVLAN:
932 case SIOCSIFVLAN:
933 err = -ENOPKG;
934 if (!vlan_ioctl_hook)
935 request_module("8021q");
936
4a3e2f71 937 mutex_lock(&vlan_ioctl_mutex);
1da177e4 938 if (vlan_ioctl_hook)
881d966b 939 err = vlan_ioctl_hook(net, argp);
4a3e2f71 940 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 941 break;
1da177e4
LT
942 case SIOCADDDLCI:
943 case SIOCDELDLCI:
944 err = -ENOPKG;
945 if (!dlci_ioctl_hook)
946 request_module("dlci");
947
7512cbf6
PE
948 mutex_lock(&dlci_ioctl_mutex);
949 if (dlci_ioctl_hook)
1da177e4 950 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 951 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
952 break;
953 default:
6b96018b 954 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 955 break;
89bddce5 956 }
1da177e4
LT
957 return err;
958}
959
960int sock_create_lite(int family, int type, int protocol, struct socket **res)
961{
962 int err;
963 struct socket *sock = NULL;
89bddce5 964
1da177e4
LT
965 err = security_socket_create(family, type, protocol, 1);
966 if (err)
967 goto out;
968
969 sock = sock_alloc();
970 if (!sock) {
971 err = -ENOMEM;
972 goto out;
973 }
974
1da177e4 975 sock->type = type;
7420ed23
VY
976 err = security_socket_post_create(sock, family, type, protocol, 1);
977 if (err)
978 goto out_release;
979
1da177e4
LT
980out:
981 *res = sock;
982 return err;
7420ed23
VY
983out_release:
984 sock_release(sock);
985 sock = NULL;
986 goto out;
1da177e4 987}
c6d409cf 988EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
989
990/* No kernel lock held - perfect */
89bddce5 991static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4 992{
cbf55001 993 unsigned int busy_flag = 0;
1da177e4
LT
994 struct socket *sock;
995
996 /*
89bddce5 997 * We can't return errors to poll, so it's either yes or no.
1da177e4 998 */
b69aee04 999 sock = file->private_data;
2d48d67f 1000
cbf55001 1001 if (sk_can_busy_loop(sock->sk)) {
2d48d67f 1002 /* this socket can poll_ll so tell the system call */
cbf55001 1003 busy_flag = POLL_BUSY_LOOP;
2d48d67f
ET
1004
1005 /* once, only if requested by syscall */
cbf55001
ET
1006 if (wait && (wait->_key & POLL_BUSY_LOOP))
1007 sk_busy_loop(sock->sk, 1);
2d48d67f
ET
1008 }
1009
cbf55001 1010 return busy_flag | sock->ops->poll(file, sock, wait);
1da177e4
LT
1011}
1012
89bddce5 1013static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1014{
b69aee04 1015 struct socket *sock = file->private_data;
1da177e4
LT
1016
1017 return sock->ops->mmap(file, sock, vma);
1018}
1019
20380731 1020static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1021{
1da177e4
LT
1022 sock_release(SOCKET_I(inode));
1023 return 0;
1024}
1025
1026/*
1027 * Update the socket async list
1028 *
1029 * Fasync_list locking strategy.
1030 *
1031 * 1. fasync_list is modified only under process context socket lock
1032 * i.e. under semaphore.
1033 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1034 * or under socket lock
1da177e4
LT
1035 */
1036
1037static int sock_fasync(int fd, struct file *filp, int on)
1038{
989a2979
ED
1039 struct socket *sock = filp->private_data;
1040 struct sock *sk = sock->sk;
eaefd110 1041 struct socket_wq *wq;
1da177e4 1042
989a2979 1043 if (sk == NULL)
1da177e4 1044 return -EINVAL;
1da177e4
LT
1045
1046 lock_sock(sk);
eaefd110
ED
1047 wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
1048 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1049
eaefd110 1050 if (!wq->fasync_list)
989a2979
ED
1051 sock_reset_flag(sk, SOCK_FASYNC);
1052 else
bcdce719 1053 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1054
989a2979 1055 release_sock(sk);
1da177e4
LT
1056 return 0;
1057}
1058
43815482 1059/* This function may be called only under socket lock or callback_lock or rcu_lock */
1da177e4
LT
1060
1061int sock_wake_async(struct socket *sock, int how, int band)
1062{
43815482
ED
1063 struct socket_wq *wq;
1064
1065 if (!sock)
1066 return -1;
1067 rcu_read_lock();
1068 wq = rcu_dereference(sock->wq);
1069 if (!wq || !wq->fasync_list) {
1070 rcu_read_unlock();
1da177e4 1071 return -1;
43815482 1072 }
89bddce5 1073 switch (how) {
8d8ad9d7 1074 case SOCK_WAKE_WAITD:
1da177e4
LT
1075 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1076 break;
1077 goto call_kill;
8d8ad9d7 1078 case SOCK_WAKE_SPACE:
1da177e4
LT
1079 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1080 break;
1081 /* fall through */
8d8ad9d7 1082 case SOCK_WAKE_IO:
89bddce5 1083call_kill:
43815482 1084 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1085 break;
8d8ad9d7 1086 case SOCK_WAKE_URG:
43815482 1087 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1088 }
43815482 1089 rcu_read_unlock();
1da177e4
LT
1090 return 0;
1091}
c6d409cf 1092EXPORT_SYMBOL(sock_wake_async);
1da177e4 1093
721db93a 1094int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1095 struct socket **res, int kern)
1da177e4
LT
1096{
1097 int err;
1098 struct socket *sock;
55737fda 1099 const struct net_proto_family *pf;
1da177e4
LT
1100
1101 /*
89bddce5 1102 * Check protocol is in range
1da177e4
LT
1103 */
1104 if (family < 0 || family >= NPROTO)
1105 return -EAFNOSUPPORT;
1106 if (type < 0 || type >= SOCK_MAX)
1107 return -EINVAL;
1108
1109 /* Compatibility.
1110
1111 This uglymoron is moved from INET layer to here to avoid
1112 deadlock in module load.
1113 */
1114 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1115 static int warned;
1da177e4
LT
1116 if (!warned) {
1117 warned = 1;
3410f22e
YY
1118 pr_info("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1119 current->comm);
1da177e4
LT
1120 }
1121 family = PF_PACKET;
1122 }
1123
1124 err = security_socket_create(family, type, protocol, kern);
1125 if (err)
1126 return err;
89bddce5 1127
55737fda
SH
1128 /*
1129 * Allocate the socket and allow the family to set things up. if
1130 * the protocol is 0, the family is instructed to select an appropriate
1131 * default.
1132 */
1133 sock = sock_alloc();
1134 if (!sock) {
e87cc472 1135 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1136 return -ENFILE; /* Not exactly a match, but its the
1137 closest posix thing */
1138 }
1139
1140 sock->type = type;
1141
95a5afca 1142#ifdef CONFIG_MODULES
89bddce5
SH
1143 /* Attempt to load a protocol module if the find failed.
1144 *
1145 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1146 * requested real, full-featured networking support upon configuration.
1147 * Otherwise module support will break!
1148 */
190683a9 1149 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1150 request_module("net-pf-%d", family);
1da177e4
LT
1151#endif
1152
55737fda
SH
1153 rcu_read_lock();
1154 pf = rcu_dereference(net_families[family]);
1155 err = -EAFNOSUPPORT;
1156 if (!pf)
1157 goto out_release;
1da177e4
LT
1158
1159 /*
1160 * We will call the ->create function, that possibly is in a loadable
1161 * module, so we have to bump that loadable module refcnt first.
1162 */
55737fda 1163 if (!try_module_get(pf->owner))
1da177e4
LT
1164 goto out_release;
1165
55737fda
SH
1166 /* Now protected by module ref count */
1167 rcu_read_unlock();
1168
3f378b68 1169 err = pf->create(net, sock, protocol, kern);
55737fda 1170 if (err < 0)
1da177e4 1171 goto out_module_put;
a79af59e 1172
1da177e4
LT
1173 /*
1174 * Now to bump the refcnt of the [loadable] module that owns this
1175 * socket at sock_release time we decrement its refcnt.
1176 */
55737fda
SH
1177 if (!try_module_get(sock->ops->owner))
1178 goto out_module_busy;
1179
1da177e4
LT
1180 /*
1181 * Now that we're done with the ->create function, the [loadable]
1182 * module can have its refcnt decremented
1183 */
55737fda 1184 module_put(pf->owner);
7420ed23
VY
1185 err = security_socket_post_create(sock, family, type, protocol, kern);
1186 if (err)
3b185525 1187 goto out_sock_release;
55737fda 1188 *res = sock;
1da177e4 1189
55737fda
SH
1190 return 0;
1191
1192out_module_busy:
1193 err = -EAFNOSUPPORT;
1da177e4 1194out_module_put:
55737fda
SH
1195 sock->ops = NULL;
1196 module_put(pf->owner);
1197out_sock_release:
1da177e4 1198 sock_release(sock);
55737fda
SH
1199 return err;
1200
1201out_release:
1202 rcu_read_unlock();
1203 goto out_sock_release;
1da177e4 1204}
721db93a 1205EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1206
1207int sock_create(int family, int type, int protocol, struct socket **res)
1208{
1b8d7ae4 1209 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1210}
c6d409cf 1211EXPORT_SYMBOL(sock_create);
1da177e4 1212
eeb1bd5c 1213int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1214{
eeb1bd5c 1215 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1216}
c6d409cf 1217EXPORT_SYMBOL(sock_create_kern);
1da177e4 1218
3e0fa65f 1219SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1220{
1221 int retval;
1222 struct socket *sock;
a677a039
UD
1223 int flags;
1224
e38b36f3
UD
1225 /* Check the SOCK_* constants for consistency. */
1226 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1227 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1228 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1229 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1230
a677a039 1231 flags = type & ~SOCK_TYPE_MASK;
77d27200 1232 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1233 return -EINVAL;
1234 type &= SOCK_TYPE_MASK;
1da177e4 1235
aaca0bdc
UD
1236 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1237 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1238
1da177e4
LT
1239 retval = sock_create(family, type, protocol, &sock);
1240 if (retval < 0)
1241 goto out;
1242
77d27200 1243 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1244 if (retval < 0)
1245 goto out_release;
1246
1247out:
1248 /* It may be already another descriptor 8) Not kernel problem. */
1249 return retval;
1250
1251out_release:
1252 sock_release(sock);
1253 return retval;
1254}
1255
1256/*
1257 * Create a pair of connected sockets.
1258 */
1259
3e0fa65f
HC
1260SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1261 int __user *, usockvec)
1da177e4
LT
1262{
1263 struct socket *sock1, *sock2;
1264 int fd1, fd2, err;
db349509 1265 struct file *newfile1, *newfile2;
a677a039
UD
1266 int flags;
1267
1268 flags = type & ~SOCK_TYPE_MASK;
77d27200 1269 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1270 return -EINVAL;
1271 type &= SOCK_TYPE_MASK;
1da177e4 1272
aaca0bdc
UD
1273 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1274 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1275
1da177e4
LT
1276 /*
1277 * Obtain the first socket and check if the underlying protocol
1278 * supports the socketpair call.
1279 */
1280
1281 err = sock_create(family, type, protocol, &sock1);
1282 if (err < 0)
1283 goto out;
1284
1285 err = sock_create(family, type, protocol, &sock2);
1286 if (err < 0)
1287 goto out_release_1;
1288
1289 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1290 if (err < 0)
1da177e4
LT
1291 goto out_release_both;
1292
28407630 1293 fd1 = get_unused_fd_flags(flags);
bf3c23d1
DM
1294 if (unlikely(fd1 < 0)) {
1295 err = fd1;
db349509 1296 goto out_release_both;
bf3c23d1 1297 }
d73aa286 1298
28407630 1299 fd2 = get_unused_fd_flags(flags);
198de4d7
AV
1300 if (unlikely(fd2 < 0)) {
1301 err = fd2;
d73aa286 1302 goto out_put_unused_1;
28407630
AV
1303 }
1304
aab174f0 1305 newfile1 = sock_alloc_file(sock1, flags, NULL);
28407630
AV
1306 if (unlikely(IS_ERR(newfile1))) {
1307 err = PTR_ERR(newfile1);
d73aa286 1308 goto out_put_unused_both;
28407630
AV
1309 }
1310
aab174f0 1311 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1312 if (IS_ERR(newfile2)) {
1313 err = PTR_ERR(newfile2);
d73aa286 1314 goto out_fput_1;
db349509
AV
1315 }
1316
d73aa286
YD
1317 err = put_user(fd1, &usockvec[0]);
1318 if (err)
1319 goto out_fput_both;
1320
1321 err = put_user(fd2, &usockvec[1]);
1322 if (err)
1323 goto out_fput_both;
1324
157cf649 1325 audit_fd_pair(fd1, fd2);
d73aa286 1326
db349509
AV
1327 fd_install(fd1, newfile1);
1328 fd_install(fd2, newfile2);
1da177e4
LT
1329 /* fd1 and fd2 may be already another descriptors.
1330 * Not kernel problem.
1331 */
1332
d73aa286 1333 return 0;
1da177e4 1334
d73aa286
YD
1335out_fput_both:
1336 fput(newfile2);
1337 fput(newfile1);
1338 put_unused_fd(fd2);
1339 put_unused_fd(fd1);
1340 goto out;
1341
1342out_fput_1:
1343 fput(newfile1);
1344 put_unused_fd(fd2);
1345 put_unused_fd(fd1);
1346 sock_release(sock2);
1347 goto out;
1da177e4 1348
d73aa286
YD
1349out_put_unused_both:
1350 put_unused_fd(fd2);
1351out_put_unused_1:
1352 put_unused_fd(fd1);
1da177e4 1353out_release_both:
89bddce5 1354 sock_release(sock2);
1da177e4 1355out_release_1:
89bddce5 1356 sock_release(sock1);
1da177e4
LT
1357out:
1358 return err;
1359}
1360
1da177e4
LT
1361/*
1362 * Bind a name to a socket. Nothing much to do here since it's
1363 * the protocol's responsibility to handle the local address.
1364 *
1365 * We move the socket address to kernel space before we call
1366 * the protocol layer (having also checked the address is ok).
1367 */
1368
20f37034 1369SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1370{
1371 struct socket *sock;
230b1839 1372 struct sockaddr_storage address;
6cb153ca 1373 int err, fput_needed;
1da177e4 1374
89bddce5 1375 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1376 if (sock) {
43db362d 1377 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1378 if (err >= 0) {
1379 err = security_socket_bind(sock,
230b1839 1380 (struct sockaddr *)&address,
89bddce5 1381 addrlen);
6cb153ca
BL
1382 if (!err)
1383 err = sock->ops->bind(sock,
89bddce5 1384 (struct sockaddr *)
230b1839 1385 &address, addrlen);
1da177e4 1386 }
6cb153ca 1387 fput_light(sock->file, fput_needed);
89bddce5 1388 }
1da177e4
LT
1389 return err;
1390}
1391
1da177e4
LT
1392/*
1393 * Perform a listen. Basically, we allow the protocol to do anything
1394 * necessary for a listen, and if that works, we mark the socket as
1395 * ready for listening.
1396 */
1397
3e0fa65f 1398SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1399{
1400 struct socket *sock;
6cb153ca 1401 int err, fput_needed;
b8e1f9b5 1402 int somaxconn;
89bddce5
SH
1403
1404 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1405 if (sock) {
8efa6e93 1406 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1407 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1408 backlog = somaxconn;
1da177e4
LT
1409
1410 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1411 if (!err)
1412 err = sock->ops->listen(sock, backlog);
1da177e4 1413
6cb153ca 1414 fput_light(sock->file, fput_needed);
1da177e4
LT
1415 }
1416 return err;
1417}
1418
1da177e4
LT
1419/*
1420 * For accept, we attempt to create a new socket, set up the link
1421 * with the client, wake up the client, then return the new
1422 * connected fd. We collect the address of the connector in kernel
1423 * space and move it to user at the very end. This is unclean because
1424 * we open the socket then return an error.
1425 *
1426 * 1003.1g adds the ability to recvmsg() to query connection pending
1427 * status to recvmsg. We need to add that support in a way thats
1428 * clean when we restucture accept also.
1429 */
1430
20f37034
HC
1431SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1432 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1433{
1434 struct socket *sock, *newsock;
39d8c1b6 1435 struct file *newfile;
6cb153ca 1436 int err, len, newfd, fput_needed;
230b1839 1437 struct sockaddr_storage address;
1da177e4 1438
77d27200 1439 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1440 return -EINVAL;
1441
1442 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1443 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1444
6cb153ca 1445 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1446 if (!sock)
1447 goto out;
1448
1449 err = -ENFILE;
c6d409cf
ED
1450 newsock = sock_alloc();
1451 if (!newsock)
1da177e4
LT
1452 goto out_put;
1453
1454 newsock->type = sock->type;
1455 newsock->ops = sock->ops;
1456
1da177e4
LT
1457 /*
1458 * We don't need try_module_get here, as the listening socket (sock)
1459 * has the protocol module (sock->ops->owner) held.
1460 */
1461 __module_get(newsock->ops->owner);
1462
28407630 1463 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1464 if (unlikely(newfd < 0)) {
1465 err = newfd;
9a1875e6
DM
1466 sock_release(newsock);
1467 goto out_put;
39d8c1b6 1468 }
aab174f0 1469 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
28407630
AV
1470 if (unlikely(IS_ERR(newfile))) {
1471 err = PTR_ERR(newfile);
1472 put_unused_fd(newfd);
1473 sock_release(newsock);
1474 goto out_put;
1475 }
39d8c1b6 1476
a79af59e
FF
1477 err = security_socket_accept(sock, newsock);
1478 if (err)
39d8c1b6 1479 goto out_fd;
a79af59e 1480
1da177e4
LT
1481 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1482 if (err < 0)
39d8c1b6 1483 goto out_fd;
1da177e4
LT
1484
1485 if (upeer_sockaddr) {
230b1839 1486 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1487 &len, 2) < 0) {
1da177e4 1488 err = -ECONNABORTED;
39d8c1b6 1489 goto out_fd;
1da177e4 1490 }
43db362d 1491 err = move_addr_to_user(&address,
230b1839 1492 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1493 if (err < 0)
39d8c1b6 1494 goto out_fd;
1da177e4
LT
1495 }
1496
1497 /* File flags are not inherited via accept() unlike another OSes. */
1498
39d8c1b6
DM
1499 fd_install(newfd, newfile);
1500 err = newfd;
1da177e4 1501
1da177e4 1502out_put:
6cb153ca 1503 fput_light(sock->file, fput_needed);
1da177e4
LT
1504out:
1505 return err;
39d8c1b6 1506out_fd:
9606a216 1507 fput(newfile);
39d8c1b6 1508 put_unused_fd(newfd);
1da177e4
LT
1509 goto out_put;
1510}
1511
20f37034
HC
1512SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1513 int __user *, upeer_addrlen)
aaca0bdc 1514{
de11defe 1515 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1516}
1517
1da177e4
LT
1518/*
1519 * Attempt to connect to a socket with the server address. The address
1520 * is in user space so we verify it is OK and move it to kernel space.
1521 *
1522 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1523 * break bindings
1524 *
1525 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1526 * other SEQPACKET protocols that take time to connect() as it doesn't
1527 * include the -EINPROGRESS status for such sockets.
1528 */
1529
20f37034
HC
1530SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1531 int, addrlen)
1da177e4
LT
1532{
1533 struct socket *sock;
230b1839 1534 struct sockaddr_storage address;
6cb153ca 1535 int err, fput_needed;
1da177e4 1536
6cb153ca 1537 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1538 if (!sock)
1539 goto out;
43db362d 1540 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1541 if (err < 0)
1542 goto out_put;
1543
89bddce5 1544 err =
230b1839 1545 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1546 if (err)
1547 goto out_put;
1548
230b1839 1549 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1550 sock->file->f_flags);
1551out_put:
6cb153ca 1552 fput_light(sock->file, fput_needed);
1da177e4
LT
1553out:
1554 return err;
1555}
1556
1557/*
1558 * Get the local address ('name') of a socket object. Move the obtained
1559 * name to user space.
1560 */
1561
20f37034
HC
1562SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1563 int __user *, usockaddr_len)
1da177e4
LT
1564{
1565 struct socket *sock;
230b1839 1566 struct sockaddr_storage address;
6cb153ca 1567 int len, err, fput_needed;
89bddce5 1568
6cb153ca 1569 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1570 if (!sock)
1571 goto out;
1572
1573 err = security_socket_getsockname(sock);
1574 if (err)
1575 goto out_put;
1576
230b1839 1577 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1578 if (err)
1579 goto out_put;
43db362d 1580 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1581
1582out_put:
6cb153ca 1583 fput_light(sock->file, fput_needed);
1da177e4
LT
1584out:
1585 return err;
1586}
1587
1588/*
1589 * Get the remote address ('name') of a socket object. Move the obtained
1590 * name to user space.
1591 */
1592
20f37034
HC
1593SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1594 int __user *, usockaddr_len)
1da177e4
LT
1595{
1596 struct socket *sock;
230b1839 1597 struct sockaddr_storage address;
6cb153ca 1598 int len, err, fput_needed;
1da177e4 1599
89bddce5
SH
1600 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1601 if (sock != NULL) {
1da177e4
LT
1602 err = security_socket_getpeername(sock);
1603 if (err) {
6cb153ca 1604 fput_light(sock->file, fput_needed);
1da177e4
LT
1605 return err;
1606 }
1607
89bddce5 1608 err =
230b1839 1609 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1610 1);
1da177e4 1611 if (!err)
43db362d 1612 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1613 usockaddr_len);
6cb153ca 1614 fput_light(sock->file, fput_needed);
1da177e4
LT
1615 }
1616 return err;
1617}
1618
1619/*
1620 * Send a datagram to a given address. We move the address into kernel
1621 * space and check the user space data area is readable before invoking
1622 * the protocol.
1623 */
1624
3e0fa65f 1625SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1626 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1627 int, addr_len)
1da177e4
LT
1628{
1629 struct socket *sock;
230b1839 1630 struct sockaddr_storage address;
1da177e4
LT
1631 int err;
1632 struct msghdr msg;
1633 struct iovec iov;
6cb153ca 1634 int fput_needed;
6cb153ca 1635
602bd0e9
AV
1636 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1637 if (unlikely(err))
1638 return err;
de0fa95c
PE
1639 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1640 if (!sock)
4387ff75 1641 goto out;
6cb153ca 1642
89bddce5 1643 msg.msg_name = NULL;
89bddce5
SH
1644 msg.msg_control = NULL;
1645 msg.msg_controllen = 0;
1646 msg.msg_namelen = 0;
6cb153ca 1647 if (addr) {
43db362d 1648 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1649 if (err < 0)
1650 goto out_put;
230b1839 1651 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1652 msg.msg_namelen = addr_len;
1da177e4
LT
1653 }
1654 if (sock->file->f_flags & O_NONBLOCK)
1655 flags |= MSG_DONTWAIT;
1656 msg.msg_flags = flags;
d8725c86 1657 err = sock_sendmsg(sock, &msg);
1da177e4 1658
89bddce5 1659out_put:
de0fa95c 1660 fput_light(sock->file, fput_needed);
4387ff75 1661out:
1da177e4
LT
1662 return err;
1663}
1664
1665/*
89bddce5 1666 * Send a datagram down a socket.
1da177e4
LT
1667 */
1668
3e0fa65f 1669SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1670 unsigned int, flags)
1da177e4
LT
1671{
1672 return sys_sendto(fd, buff, len, flags, NULL, 0);
1673}
1674
1675/*
89bddce5 1676 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1677 * sender. We verify the buffers are writable and if needed move the
1678 * sender address from kernel to user space.
1679 */
1680
3e0fa65f 1681SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1682 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1683 int __user *, addr_len)
1da177e4
LT
1684{
1685 struct socket *sock;
1686 struct iovec iov;
1687 struct msghdr msg;
230b1839 1688 struct sockaddr_storage address;
89bddce5 1689 int err, err2;
6cb153ca
BL
1690 int fput_needed;
1691
602bd0e9
AV
1692 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1693 if (unlikely(err))
1694 return err;
de0fa95c 1695 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1696 if (!sock)
de0fa95c 1697 goto out;
1da177e4 1698
89bddce5
SH
1699 msg.msg_control = NULL;
1700 msg.msg_controllen = 0;
f3d33426
HFS
1701 /* Save some cycles and don't copy the address if not needed */
1702 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1703 /* We assume all kernel code knows the size of sockaddr_storage */
1704 msg.msg_namelen = 0;
1da177e4
LT
1705 if (sock->file->f_flags & O_NONBLOCK)
1706 flags |= MSG_DONTWAIT;
602bd0e9 1707 err = sock_recvmsg(sock, &msg, iov_iter_count(&msg.msg_iter), flags);
1da177e4 1708
89bddce5 1709 if (err >= 0 && addr != NULL) {
43db362d 1710 err2 = move_addr_to_user(&address,
230b1839 1711 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1712 if (err2 < 0)
1713 err = err2;
1da177e4 1714 }
de0fa95c
PE
1715
1716 fput_light(sock->file, fput_needed);
4387ff75 1717out:
1da177e4
LT
1718 return err;
1719}
1720
1721/*
89bddce5 1722 * Receive a datagram from a socket.
1da177e4
LT
1723 */
1724
b7c0ddf5
JG
1725SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1726 unsigned int, flags)
1da177e4
LT
1727{
1728 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1729}
1730
1731/*
1732 * Set a socket option. Because we don't know the option lengths we have
1733 * to pass the user mode parameter for the protocols to sort out.
1734 */
1735
20f37034
HC
1736SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1737 char __user *, optval, int, optlen)
1da177e4 1738{
6cb153ca 1739 int err, fput_needed;
1da177e4
LT
1740 struct socket *sock;
1741
1742 if (optlen < 0)
1743 return -EINVAL;
89bddce5
SH
1744
1745 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1746 if (sock != NULL) {
1747 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1748 if (err)
1749 goto out_put;
1da177e4
LT
1750
1751 if (level == SOL_SOCKET)
89bddce5
SH
1752 err =
1753 sock_setsockopt(sock, level, optname, optval,
1754 optlen);
1da177e4 1755 else
89bddce5
SH
1756 err =
1757 sock->ops->setsockopt(sock, level, optname, optval,
1758 optlen);
6cb153ca
BL
1759out_put:
1760 fput_light(sock->file, fput_needed);
1da177e4
LT
1761 }
1762 return err;
1763}
1764
1765/*
1766 * Get a socket option. Because we don't know the option lengths we have
1767 * to pass a user mode parameter for the protocols to sort out.
1768 */
1769
20f37034
HC
1770SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1771 char __user *, optval, int __user *, optlen)
1da177e4 1772{
6cb153ca 1773 int err, fput_needed;
1da177e4
LT
1774 struct socket *sock;
1775
89bddce5
SH
1776 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1777 if (sock != NULL) {
6cb153ca
BL
1778 err = security_socket_getsockopt(sock, level, optname);
1779 if (err)
1780 goto out_put;
1da177e4
LT
1781
1782 if (level == SOL_SOCKET)
89bddce5
SH
1783 err =
1784 sock_getsockopt(sock, level, optname, optval,
1785 optlen);
1da177e4 1786 else
89bddce5
SH
1787 err =
1788 sock->ops->getsockopt(sock, level, optname, optval,
1789 optlen);
6cb153ca
BL
1790out_put:
1791 fput_light(sock->file, fput_needed);
1da177e4
LT
1792 }
1793 return err;
1794}
1795
1da177e4
LT
1796/*
1797 * Shutdown a socket.
1798 */
1799
754fe8d2 1800SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1801{
6cb153ca 1802 int err, fput_needed;
1da177e4
LT
1803 struct socket *sock;
1804
89bddce5
SH
1805 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1806 if (sock != NULL) {
1da177e4 1807 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1808 if (!err)
1809 err = sock->ops->shutdown(sock, how);
1810 fput_light(sock->file, fput_needed);
1da177e4
LT
1811 }
1812 return err;
1813}
1814
89bddce5 1815/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1816 * fields which are the same type (int / unsigned) on our platforms.
1817 */
1818#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1819#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1820#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1821
c71d8ebe
TH
1822struct used_address {
1823 struct sockaddr_storage name;
1824 unsigned int name_len;
1825};
1826
da184284
AV
1827static int copy_msghdr_from_user(struct msghdr *kmsg,
1828 struct user_msghdr __user *umsg,
1829 struct sockaddr __user **save_addr,
1830 struct iovec **iov)
1661bf36 1831{
08adb7da
AV
1832 struct sockaddr __user *uaddr;
1833 struct iovec __user *uiov;
c0371da6 1834 size_t nr_segs;
08adb7da
AV
1835 ssize_t err;
1836
1837 if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) ||
1838 __get_user(uaddr, &umsg->msg_name) ||
1839 __get_user(kmsg->msg_namelen, &umsg->msg_namelen) ||
1840 __get_user(uiov, &umsg->msg_iov) ||
c0371da6 1841 __get_user(nr_segs, &umsg->msg_iovlen) ||
08adb7da
AV
1842 __get_user(kmsg->msg_control, &umsg->msg_control) ||
1843 __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
1844 __get_user(kmsg->msg_flags, &umsg->msg_flags))
1661bf36 1845 return -EFAULT;
dbb490b9 1846
08adb7da 1847 if (!uaddr)
6a2a2b3a
AS
1848 kmsg->msg_namelen = 0;
1849
dbb490b9
ML
1850 if (kmsg->msg_namelen < 0)
1851 return -EINVAL;
1852
1661bf36 1853 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 1854 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
1855
1856 if (save_addr)
1857 *save_addr = uaddr;
1858
1859 if (uaddr && kmsg->msg_namelen) {
1860 if (!save_addr) {
1861 err = move_addr_to_kernel(uaddr, kmsg->msg_namelen,
1862 kmsg->msg_name);
1863 if (err < 0)
1864 return err;
1865 }
1866 } else {
1867 kmsg->msg_name = NULL;
1868 kmsg->msg_namelen = 0;
1869 }
1870
c0371da6 1871 if (nr_segs > UIO_MAXIOV)
08adb7da
AV
1872 return -EMSGSIZE;
1873
0345f931 1874 kmsg->msg_iocb = NULL;
1875
da184284
AV
1876 return import_iovec(save_addr ? READ : WRITE, uiov, nr_segs,
1877 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
1878}
1879
666547ff 1880static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 1881 struct msghdr *msg_sys, unsigned int flags,
c71d8ebe 1882 struct used_address *used_address)
1da177e4 1883{
89bddce5
SH
1884 struct compat_msghdr __user *msg_compat =
1885 (struct compat_msghdr __user *)msg;
230b1839 1886 struct sockaddr_storage address;
1da177e4 1887 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1888 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1889 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1890 /* 20 is size of ipv6_pktinfo */
1da177e4 1891 unsigned char *ctl_buf = ctl;
d8725c86 1892 int ctl_len;
08adb7da 1893 ssize_t err;
89bddce5 1894
08adb7da 1895 msg_sys->msg_name = &address;
1da177e4 1896
08449320 1897 if (MSG_CMSG_COMPAT & flags)
08adb7da 1898 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 1899 else
08adb7da 1900 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 1901 if (err < 0)
da184284 1902 return err;
1da177e4
LT
1903
1904 err = -ENOBUFS;
1905
228e548e 1906 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 1907 goto out_freeiov;
228e548e 1908 ctl_len = msg_sys->msg_controllen;
1da177e4 1909 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 1910 err =
228e548e 1911 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 1912 sizeof(ctl));
1da177e4
LT
1913 if (err)
1914 goto out_freeiov;
228e548e
AB
1915 ctl_buf = msg_sys->msg_control;
1916 ctl_len = msg_sys->msg_controllen;
1da177e4 1917 } else if (ctl_len) {
89bddce5 1918 if (ctl_len > sizeof(ctl)) {
1da177e4 1919 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1920 if (ctl_buf == NULL)
1da177e4
LT
1921 goto out_freeiov;
1922 }
1923 err = -EFAULT;
1924 /*
228e548e 1925 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
1926 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1927 * checking falls down on this.
1928 */
fb8621bb 1929 if (copy_from_user(ctl_buf,
228e548e 1930 (void __user __force *)msg_sys->msg_control,
89bddce5 1931 ctl_len))
1da177e4 1932 goto out_freectl;
228e548e 1933 msg_sys->msg_control = ctl_buf;
1da177e4 1934 }
228e548e 1935 msg_sys->msg_flags = flags;
1da177e4
LT
1936
1937 if (sock->file->f_flags & O_NONBLOCK)
228e548e 1938 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
1939 /*
1940 * If this is sendmmsg() and current destination address is same as
1941 * previously succeeded address, omit asking LSM's decision.
1942 * used_address->name_len is initialized to UINT_MAX so that the first
1943 * destination address never matches.
1944 */
bc909d9d
MD
1945 if (used_address && msg_sys->msg_name &&
1946 used_address->name_len == msg_sys->msg_namelen &&
1947 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 1948 used_address->name_len)) {
d8725c86 1949 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
1950 goto out_freectl;
1951 }
d8725c86 1952 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
1953 /*
1954 * If this is sendmmsg() and sending to current destination address was
1955 * successful, remember it.
1956 */
1957 if (used_address && err >= 0) {
1958 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
1959 if (msg_sys->msg_name)
1960 memcpy(&used_address->name, msg_sys->msg_name,
1961 used_address->name_len);
c71d8ebe 1962 }
1da177e4
LT
1963
1964out_freectl:
89bddce5 1965 if (ctl_buf != ctl)
1da177e4
LT
1966 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1967out_freeiov:
da184284 1968 kfree(iov);
228e548e
AB
1969 return err;
1970}
1971
1972/*
1973 * BSD sendmsg interface
1974 */
1975
666547ff 1976long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
228e548e
AB
1977{
1978 int fput_needed, err;
1979 struct msghdr msg_sys;
1be374a0
AL
1980 struct socket *sock;
1981
1be374a0 1982 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
1983 if (!sock)
1984 goto out;
1985
a7526eb5 1986 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
228e548e 1987
6cb153ca 1988 fput_light(sock->file, fput_needed);
89bddce5 1989out:
1da177e4
LT
1990 return err;
1991}
1992
666547ff 1993SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5
AL
1994{
1995 if (flags & MSG_CMSG_COMPAT)
1996 return -EINVAL;
1997 return __sys_sendmsg(fd, msg, flags);
1998}
1999
228e548e
AB
2000/*
2001 * Linux sendmmsg interface
2002 */
2003
2004int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2005 unsigned int flags)
2006{
2007 int fput_needed, err, datagrams;
2008 struct socket *sock;
2009 struct mmsghdr __user *entry;
2010 struct compat_mmsghdr __user *compat_entry;
2011 struct msghdr msg_sys;
c71d8ebe 2012 struct used_address used_address;
228e548e 2013
98382f41
AB
2014 if (vlen > UIO_MAXIOV)
2015 vlen = UIO_MAXIOV;
228e548e
AB
2016
2017 datagrams = 0;
2018
2019 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2020 if (!sock)
2021 return err;
2022
c71d8ebe 2023 used_address.name_len = UINT_MAX;
228e548e
AB
2024 entry = mmsg;
2025 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2026 err = 0;
228e548e
AB
2027
2028 while (datagrams < vlen) {
228e548e 2029 if (MSG_CMSG_COMPAT & flags) {
666547ff 2030 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5 2031 &msg_sys, flags, &used_address);
228e548e
AB
2032 if (err < 0)
2033 break;
2034 err = __put_user(err, &compat_entry->msg_len);
2035 ++compat_entry;
2036 } else {
a7526eb5 2037 err = ___sys_sendmsg(sock,
666547ff 2038 (struct user_msghdr __user *)entry,
a7526eb5 2039 &msg_sys, flags, &used_address);
228e548e
AB
2040 if (err < 0)
2041 break;
2042 err = put_user(err, &entry->msg_len);
2043 ++entry;
2044 }
2045
2046 if (err)
2047 break;
2048 ++datagrams;
2049 }
2050
228e548e
AB
2051 fput_light(sock->file, fput_needed);
2052
728ffb86
AB
2053 /* We only return an error if no datagrams were able to be sent */
2054 if (datagrams != 0)
228e548e
AB
2055 return datagrams;
2056
228e548e
AB
2057 return err;
2058}
2059
2060SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2061 unsigned int, vlen, unsigned int, flags)
2062{
1be374a0
AL
2063 if (flags & MSG_CMSG_COMPAT)
2064 return -EINVAL;
228e548e
AB
2065 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2066}
2067
666547ff 2068static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2069 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2070{
89bddce5
SH
2071 struct compat_msghdr __user *msg_compat =
2072 (struct compat_msghdr __user *)msg;
1da177e4 2073 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2074 struct iovec *iov = iovstack;
1da177e4 2075 unsigned long cmsg_ptr;
08adb7da
AV
2076 int total_len, len;
2077 ssize_t err;
1da177e4
LT
2078
2079 /* kernel mode address */
230b1839 2080 struct sockaddr_storage addr;
1da177e4
LT
2081
2082 /* user mode address pointers */
2083 struct sockaddr __user *uaddr;
08adb7da 2084 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2085
08adb7da 2086 msg_sys->msg_name = &addr;
1da177e4 2087
f3d33426 2088 if (MSG_CMSG_COMPAT & flags)
08adb7da 2089 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2090 else
08adb7da 2091 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2092 if (err < 0)
da184284
AV
2093 return err;
2094 total_len = iov_iter_count(&msg_sys->msg_iter);
1da177e4 2095
a2e27255
ACM
2096 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2097 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2098
f3d33426
HFS
2099 /* We assume all kernel code knows the size of sockaddr_storage */
2100 msg_sys->msg_namelen = 0;
2101
1da177e4
LT
2102 if (sock->file->f_flags & O_NONBLOCK)
2103 flags |= MSG_DONTWAIT;
a2e27255
ACM
2104 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2105 total_len, flags);
1da177e4
LT
2106 if (err < 0)
2107 goto out_freeiov;
2108 len = err;
2109
2110 if (uaddr != NULL) {
43db362d 2111 err = move_addr_to_user(&addr,
a2e27255 2112 msg_sys->msg_namelen, uaddr,
89bddce5 2113 uaddr_len);
1da177e4
LT
2114 if (err < 0)
2115 goto out_freeiov;
2116 }
a2e27255 2117 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2118 COMPAT_FLAGS(msg));
1da177e4
LT
2119 if (err)
2120 goto out_freeiov;
2121 if (MSG_CMSG_COMPAT & flags)
a2e27255 2122 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2123 &msg_compat->msg_controllen);
2124 else
a2e27255 2125 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2126 &msg->msg_controllen);
2127 if (err)
2128 goto out_freeiov;
2129 err = len;
2130
2131out_freeiov:
da184284 2132 kfree(iov);
a2e27255
ACM
2133 return err;
2134}
2135
2136/*
2137 * BSD recvmsg interface
2138 */
2139
666547ff 2140long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
a2e27255
ACM
2141{
2142 int fput_needed, err;
2143 struct msghdr msg_sys;
1be374a0
AL
2144 struct socket *sock;
2145
1be374a0 2146 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2147 if (!sock)
2148 goto out;
2149
a7526eb5 2150 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2151
6cb153ca 2152 fput_light(sock->file, fput_needed);
1da177e4
LT
2153out:
2154 return err;
2155}
2156
666547ff 2157SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2158 unsigned int, flags)
2159{
2160 if (flags & MSG_CMSG_COMPAT)
2161 return -EINVAL;
2162 return __sys_recvmsg(fd, msg, flags);
2163}
2164
a2e27255
ACM
2165/*
2166 * Linux recvmmsg interface
2167 */
2168
2169int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2170 unsigned int flags, struct timespec *timeout)
2171{
2172 int fput_needed, err, datagrams;
2173 struct socket *sock;
2174 struct mmsghdr __user *entry;
d7256d0e 2175 struct compat_mmsghdr __user *compat_entry;
a2e27255
ACM
2176 struct msghdr msg_sys;
2177 struct timespec end_time;
2178
2179 if (timeout &&
2180 poll_select_set_timeout(&end_time, timeout->tv_sec,
2181 timeout->tv_nsec))
2182 return -EINVAL;
2183
2184 datagrams = 0;
2185
2186 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2187 if (!sock)
2188 return err;
2189
2190 err = sock_error(sock->sk);
2191 if (err)
2192 goto out_put;
2193
2194 entry = mmsg;
d7256d0e 2195 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2196
2197 while (datagrams < vlen) {
2198 /*
2199 * No need to ask LSM for more than the first datagram.
2200 */
d7256d0e 2201 if (MSG_CMSG_COMPAT & flags) {
666547ff 2202 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2203 &msg_sys, flags & ~MSG_WAITFORONE,
2204 datagrams);
d7256d0e
JMG
2205 if (err < 0)
2206 break;
2207 err = __put_user(err, &compat_entry->msg_len);
2208 ++compat_entry;
2209 } else {
a7526eb5 2210 err = ___sys_recvmsg(sock,
666547ff 2211 (struct user_msghdr __user *)entry,
a7526eb5
AL
2212 &msg_sys, flags & ~MSG_WAITFORONE,
2213 datagrams);
d7256d0e
JMG
2214 if (err < 0)
2215 break;
2216 err = put_user(err, &entry->msg_len);
2217 ++entry;
2218 }
2219
a2e27255
ACM
2220 if (err)
2221 break;
a2e27255
ACM
2222 ++datagrams;
2223
71c5c159
BB
2224 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2225 if (flags & MSG_WAITFORONE)
2226 flags |= MSG_DONTWAIT;
2227
a2e27255
ACM
2228 if (timeout) {
2229 ktime_get_ts(timeout);
2230 *timeout = timespec_sub(end_time, *timeout);
2231 if (timeout->tv_sec < 0) {
2232 timeout->tv_sec = timeout->tv_nsec = 0;
2233 break;
2234 }
2235
2236 /* Timeout, return less than vlen datagrams */
2237 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2238 break;
2239 }
2240
2241 /* Out of band data, return right away */
2242 if (msg_sys.msg_flags & MSG_OOB)
2243 break;
2244 }
2245
2246out_put:
2247 fput_light(sock->file, fput_needed);
1da177e4 2248
a2e27255
ACM
2249 if (err == 0)
2250 return datagrams;
2251
2252 if (datagrams != 0) {
2253 /*
2254 * We may return less entries than requested (vlen) if the
2255 * sock is non block and there aren't enough datagrams...
2256 */
2257 if (err != -EAGAIN) {
2258 /*
2259 * ... or if recvmsg returns an error after we
2260 * received some datagrams, where we record the
2261 * error to return on the next call or if the
2262 * app asks about it using getsockopt(SO_ERROR).
2263 */
2264 sock->sk->sk_err = -err;
2265 }
2266
2267 return datagrams;
2268 }
2269
2270 return err;
2271}
2272
2273SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2274 unsigned int, vlen, unsigned int, flags,
2275 struct timespec __user *, timeout)
2276{
2277 int datagrams;
2278 struct timespec timeout_sys;
2279
1be374a0
AL
2280 if (flags & MSG_CMSG_COMPAT)
2281 return -EINVAL;
2282
a2e27255
ACM
2283 if (!timeout)
2284 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2285
2286 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2287 return -EFAULT;
2288
2289 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2290
2291 if (datagrams > 0 &&
2292 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2293 datagrams = -EFAULT;
2294
2295 return datagrams;
2296}
2297
2298#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2299/* Argument list sizes for sys_socketcall */
2300#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2301static const unsigned char nargs[21] = {
c6d409cf
ED
2302 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2303 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2304 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2305 AL(4), AL(5), AL(4)
89bddce5
SH
2306};
2307
1da177e4
LT
2308#undef AL
2309
2310/*
89bddce5 2311 * System call vectors.
1da177e4
LT
2312 *
2313 * Argument checking cleaned up. Saved 20% in size.
2314 * This function doesn't need to set the kernel lock because
89bddce5 2315 * it is set by the callees.
1da177e4
LT
2316 */
2317
3e0fa65f 2318SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2319{
2950fa9d 2320 unsigned long a[AUDITSC_ARGS];
89bddce5 2321 unsigned long a0, a1;
1da177e4 2322 int err;
47379052 2323 unsigned int len;
1da177e4 2324
228e548e 2325 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2326 return -EINVAL;
2327
47379052
AV
2328 len = nargs[call];
2329 if (len > sizeof(a))
2330 return -EINVAL;
2331
1da177e4 2332 /* copy_from_user should be SMP safe. */
47379052 2333 if (copy_from_user(a, args, len))
1da177e4 2334 return -EFAULT;
3ec3b2fb 2335
2950fa9d
CG
2336 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2337 if (err)
2338 return err;
3ec3b2fb 2339
89bddce5
SH
2340 a0 = a[0];
2341 a1 = a[1];
2342
2343 switch (call) {
2344 case SYS_SOCKET:
2345 err = sys_socket(a0, a1, a[2]);
2346 break;
2347 case SYS_BIND:
2348 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2349 break;
2350 case SYS_CONNECT:
2351 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2352 break;
2353 case SYS_LISTEN:
2354 err = sys_listen(a0, a1);
2355 break;
2356 case SYS_ACCEPT:
de11defe
UD
2357 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2358 (int __user *)a[2], 0);
89bddce5
SH
2359 break;
2360 case SYS_GETSOCKNAME:
2361 err =
2362 sys_getsockname(a0, (struct sockaddr __user *)a1,
2363 (int __user *)a[2]);
2364 break;
2365 case SYS_GETPEERNAME:
2366 err =
2367 sys_getpeername(a0, (struct sockaddr __user *)a1,
2368 (int __user *)a[2]);
2369 break;
2370 case SYS_SOCKETPAIR:
2371 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2372 break;
2373 case SYS_SEND:
2374 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2375 break;
2376 case SYS_SENDTO:
2377 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2378 (struct sockaddr __user *)a[4], a[5]);
2379 break;
2380 case SYS_RECV:
2381 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2382 break;
2383 case SYS_RECVFROM:
2384 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2385 (struct sockaddr __user *)a[4],
2386 (int __user *)a[5]);
2387 break;
2388 case SYS_SHUTDOWN:
2389 err = sys_shutdown(a0, a1);
2390 break;
2391 case SYS_SETSOCKOPT:
2392 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2393 break;
2394 case SYS_GETSOCKOPT:
2395 err =
2396 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2397 (int __user *)a[4]);
2398 break;
2399 case SYS_SENDMSG:
666547ff 2400 err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2401 break;
228e548e
AB
2402 case SYS_SENDMMSG:
2403 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2404 break;
89bddce5 2405 case SYS_RECVMSG:
666547ff 2406 err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2407 break;
a2e27255
ACM
2408 case SYS_RECVMMSG:
2409 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2410 (struct timespec __user *)a[4]);
2411 break;
de11defe
UD
2412 case SYS_ACCEPT4:
2413 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2414 (int __user *)a[2], a[3]);
aaca0bdc 2415 break;
89bddce5
SH
2416 default:
2417 err = -EINVAL;
2418 break;
1da177e4
LT
2419 }
2420 return err;
2421}
2422
89bddce5 2423#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2424
55737fda
SH
2425/**
2426 * sock_register - add a socket protocol handler
2427 * @ops: description of protocol
2428 *
1da177e4
LT
2429 * This function is called by a protocol handler that wants to
2430 * advertise its address family, and have it linked into the
e793c0f7 2431 * socket interface. The value ops->family corresponds to the
55737fda 2432 * socket system call protocol family.
1da177e4 2433 */
f0fd27d4 2434int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2435{
2436 int err;
2437
2438 if (ops->family >= NPROTO) {
3410f22e 2439 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2440 return -ENOBUFS;
2441 }
55737fda
SH
2442
2443 spin_lock(&net_family_lock);
190683a9
ED
2444 if (rcu_dereference_protected(net_families[ops->family],
2445 lockdep_is_held(&net_family_lock)))
55737fda
SH
2446 err = -EEXIST;
2447 else {
cf778b00 2448 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2449 err = 0;
2450 }
55737fda
SH
2451 spin_unlock(&net_family_lock);
2452
3410f22e 2453 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2454 return err;
2455}
c6d409cf 2456EXPORT_SYMBOL(sock_register);
1da177e4 2457
55737fda
SH
2458/**
2459 * sock_unregister - remove a protocol handler
2460 * @family: protocol family to remove
2461 *
1da177e4
LT
2462 * This function is called by a protocol handler that wants to
2463 * remove its address family, and have it unlinked from the
55737fda
SH
2464 * new socket creation.
2465 *
2466 * If protocol handler is a module, then it can use module reference
2467 * counts to protect against new references. If protocol handler is not
2468 * a module then it needs to provide its own protection in
2469 * the ops->create routine.
1da177e4 2470 */
f0fd27d4 2471void sock_unregister(int family)
1da177e4 2472{
f0fd27d4 2473 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2474
55737fda 2475 spin_lock(&net_family_lock);
a9b3cd7f 2476 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2477 spin_unlock(&net_family_lock);
2478
2479 synchronize_rcu();
2480
3410f22e 2481 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2482}
c6d409cf 2483EXPORT_SYMBOL(sock_unregister);
1da177e4 2484
77d76ea3 2485static int __init sock_init(void)
1da177e4 2486{
b3e19d92 2487 int err;
2ca794e5
EB
2488 /*
2489 * Initialize the network sysctl infrastructure.
2490 */
2491 err = net_sysctl_init();
2492 if (err)
2493 goto out;
b3e19d92 2494
1da177e4 2495 /*
89bddce5 2496 * Initialize skbuff SLAB cache
1da177e4
LT
2497 */
2498 skb_init();
1da177e4
LT
2499
2500 /*
89bddce5 2501 * Initialize the protocols module.
1da177e4
LT
2502 */
2503
2504 init_inodecache();
b3e19d92
NP
2505
2506 err = register_filesystem(&sock_fs_type);
2507 if (err)
2508 goto out_fs;
1da177e4 2509 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2510 if (IS_ERR(sock_mnt)) {
2511 err = PTR_ERR(sock_mnt);
2512 goto out_mount;
2513 }
77d76ea3
AK
2514
2515 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2516 */
2517
2518#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2519 err = netfilter_init();
2520 if (err)
2521 goto out;
1da177e4 2522#endif
cbeb321a 2523
408eccce 2524 ptp_classifier_init();
c1f19b51 2525
b3e19d92
NP
2526out:
2527 return err;
2528
2529out_mount:
2530 unregister_filesystem(&sock_fs_type);
2531out_fs:
2532 goto out;
1da177e4
LT
2533}
2534
77d76ea3
AK
2535core_initcall(sock_init); /* early initcall */
2536
1da177e4
LT
2537#ifdef CONFIG_PROC_FS
2538void socket_seq_show(struct seq_file *seq)
2539{
2540 int cpu;
2541 int counter = 0;
2542
6f912042 2543 for_each_possible_cpu(cpu)
89bddce5 2544 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2545
2546 /* It can be negative, by the way. 8) */
2547 if (counter < 0)
2548 counter = 0;
2549
2550 seq_printf(seq, "sockets: used %d\n", counter);
2551}
89bddce5 2552#endif /* CONFIG_PROC_FS */
1da177e4 2553
89bbfc95 2554#ifdef CONFIG_COMPAT
6b96018b 2555static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2556 unsigned int cmd, void __user *up)
7a229387 2557{
7a229387
AB
2558 mm_segment_t old_fs = get_fs();
2559 struct timeval ktv;
2560 int err;
2561
2562 set_fs(KERNEL_DS);
6b96018b 2563 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2564 set_fs(old_fs);
644595f8 2565 if (!err)
ed6fe9d6 2566 err = compat_put_timeval(&ktv, up);
644595f8 2567
7a229387
AB
2568 return err;
2569}
2570
6b96018b 2571static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2572 unsigned int cmd, void __user *up)
7a229387 2573{
7a229387
AB
2574 mm_segment_t old_fs = get_fs();
2575 struct timespec kts;
2576 int err;
2577
2578 set_fs(KERNEL_DS);
6b96018b 2579 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2580 set_fs(old_fs);
644595f8 2581 if (!err)
ed6fe9d6 2582 err = compat_put_timespec(&kts, up);
644595f8 2583
7a229387
AB
2584 return err;
2585}
2586
6b96018b 2587static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2588{
2589 struct ifreq __user *uifr;
2590 int err;
2591
2592 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2593 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2594 return -EFAULT;
2595
6b96018b 2596 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2597 if (err)
2598 return err;
2599
6b96018b 2600 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2601 return -EFAULT;
2602
2603 return 0;
2604}
2605
6b96018b 2606static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2607{
6b96018b 2608 struct compat_ifconf ifc32;
7a229387
AB
2609 struct ifconf ifc;
2610 struct ifconf __user *uifc;
6b96018b 2611 struct compat_ifreq __user *ifr32;
7a229387
AB
2612 struct ifreq __user *ifr;
2613 unsigned int i, j;
2614 int err;
2615
6b96018b 2616 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2617 return -EFAULT;
2618
43da5f2e 2619 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2620 if (ifc32.ifcbuf == 0) {
2621 ifc32.ifc_len = 0;
2622 ifc.ifc_len = 0;
2623 ifc.ifc_req = NULL;
2624 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2625 } else {
c6d409cf
ED
2626 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2627 sizeof(struct ifreq);
7a229387
AB
2628 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2629 ifc.ifc_len = len;
2630 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2631 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2632 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2633 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2634 return -EFAULT;
2635 ifr++;
2636 ifr32++;
2637 }
2638 }
2639 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2640 return -EFAULT;
2641
6b96018b 2642 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2643 if (err)
2644 return err;
2645
2646 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2647 return -EFAULT;
2648
2649 ifr = ifc.ifc_req;
2650 ifr32 = compat_ptr(ifc32.ifcbuf);
2651 for (i = 0, j = 0;
c6d409cf
ED
2652 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2653 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2654 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2655 return -EFAULT;
2656 ifr32++;
2657 ifr++;
2658 }
2659
2660 if (ifc32.ifcbuf == 0) {
2661 /* Translate from 64-bit structure multiple to
2662 * a 32-bit one.
2663 */
2664 i = ifc.ifc_len;
6b96018b 2665 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2666 ifc32.ifc_len = i;
2667 } else {
2668 ifc32.ifc_len = i;
2669 }
6b96018b 2670 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2671 return -EFAULT;
2672
2673 return 0;
2674}
2675
6b96018b 2676static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2677{
3a7da39d
BH
2678 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2679 bool convert_in = false, convert_out = false;
2680 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2681 struct ethtool_rxnfc __user *rxnfc;
7a229387 2682 struct ifreq __user *ifr;
3a7da39d
BH
2683 u32 rule_cnt = 0, actual_rule_cnt;
2684 u32 ethcmd;
7a229387 2685 u32 data;
3a7da39d 2686 int ret;
7a229387 2687
3a7da39d
BH
2688 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2689 return -EFAULT;
7a229387 2690
3a7da39d
BH
2691 compat_rxnfc = compat_ptr(data);
2692
2693 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2694 return -EFAULT;
2695
3a7da39d
BH
2696 /* Most ethtool structures are defined without padding.
2697 * Unfortunately struct ethtool_rxnfc is an exception.
2698 */
2699 switch (ethcmd) {
2700 default:
2701 break;
2702 case ETHTOOL_GRXCLSRLALL:
2703 /* Buffer size is variable */
2704 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2705 return -EFAULT;
2706 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2707 return -ENOMEM;
2708 buf_size += rule_cnt * sizeof(u32);
2709 /* fall through */
2710 case ETHTOOL_GRXRINGS:
2711 case ETHTOOL_GRXCLSRLCNT:
2712 case ETHTOOL_GRXCLSRULE:
55664f32 2713 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2714 convert_out = true;
2715 /* fall through */
2716 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2717 buf_size += sizeof(struct ethtool_rxnfc);
2718 convert_in = true;
2719 break;
2720 }
2721
2722 ifr = compat_alloc_user_space(buf_size);
954b1244 2723 rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
3a7da39d
BH
2724
2725 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2726 return -EFAULT;
2727
3a7da39d
BH
2728 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2729 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2730 return -EFAULT;
2731
3a7da39d 2732 if (convert_in) {
127fe533 2733 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2734 * fs.ring_cookie and at the end of fs, but nowhere else.
2735 */
127fe533
AD
2736 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2737 sizeof(compat_rxnfc->fs.m_ext) !=
2738 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2739 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2740 BUILD_BUG_ON(
2741 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2742 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2743 offsetof(struct ethtool_rxnfc, fs.location) -
2744 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2745
2746 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2747 (void __user *)(&rxnfc->fs.m_ext + 1) -
2748 (void __user *)rxnfc) ||
3a7da39d
BH
2749 copy_in_user(&rxnfc->fs.ring_cookie,
2750 &compat_rxnfc->fs.ring_cookie,
954b1244
SH
2751 (void __user *)(&rxnfc->fs.location + 1) -
2752 (void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2753 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2754 sizeof(rxnfc->rule_cnt)))
2755 return -EFAULT;
2756 }
2757
2758 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2759 if (ret)
2760 return ret;
2761
2762 if (convert_out) {
2763 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2764 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2765 (const void __user *)rxnfc) ||
3a7da39d
BH
2766 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2767 &rxnfc->fs.ring_cookie,
954b1244
SH
2768 (const void __user *)(&rxnfc->fs.location + 1) -
2769 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2770 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2771 sizeof(rxnfc->rule_cnt)))
2772 return -EFAULT;
2773
2774 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2775 /* As an optimisation, we only copy the actual
2776 * number of rules that the underlying
2777 * function returned. Since Mallory might
2778 * change the rule count in user memory, we
2779 * check that it is less than the rule count
2780 * originally given (as the user buffer size),
2781 * which has been range-checked.
2782 */
2783 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2784 return -EFAULT;
2785 if (actual_rule_cnt < rule_cnt)
2786 rule_cnt = actual_rule_cnt;
2787 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2788 &rxnfc->rule_locs[0],
2789 rule_cnt * sizeof(u32)))
2790 return -EFAULT;
2791 }
2792 }
2793
2794 return 0;
7a229387
AB
2795}
2796
7a50a240
AB
2797static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2798{
2799 void __user *uptr;
2800 compat_uptr_t uptr32;
2801 struct ifreq __user *uifr;
2802
c6d409cf 2803 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2804 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2805 return -EFAULT;
2806
2807 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2808 return -EFAULT;
2809
2810 uptr = compat_ptr(uptr32);
2811
2812 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2813 return -EFAULT;
2814
2815 return dev_ioctl(net, SIOCWANDEV, uifr);
2816}
2817
6b96018b
AB
2818static int bond_ioctl(struct net *net, unsigned int cmd,
2819 struct compat_ifreq __user *ifr32)
7a229387
AB
2820{
2821 struct ifreq kifr;
7a229387
AB
2822 mm_segment_t old_fs;
2823 int err;
7a229387
AB
2824
2825 switch (cmd) {
2826 case SIOCBONDENSLAVE:
2827 case SIOCBONDRELEASE:
2828 case SIOCBONDSETHWADDR:
2829 case SIOCBONDCHANGEACTIVE:
6b96018b 2830 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2831 return -EFAULT;
2832
2833 old_fs = get_fs();
c6d409cf 2834 set_fs(KERNEL_DS);
c3f52ae6 2835 err = dev_ioctl(net, cmd,
2836 (struct ifreq __user __force *) &kifr);
c6d409cf 2837 set_fs(old_fs);
7a229387
AB
2838
2839 return err;
7a229387 2840 default:
07d106d0 2841 return -ENOIOCTLCMD;
ccbd6a5a 2842 }
7a229387
AB
2843}
2844
590d4693
BH
2845/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
2846static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 2847 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2848{
2849 struct ifreq __user *u_ifreq64;
7a229387
AB
2850 char tmp_buf[IFNAMSIZ];
2851 void __user *data64;
2852 u32 data32;
2853
2854 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2855 IFNAMSIZ))
2856 return -EFAULT;
417c3522 2857 if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
7a229387
AB
2858 return -EFAULT;
2859 data64 = compat_ptr(data32);
2860
2861 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2862
7a229387
AB
2863 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2864 IFNAMSIZ))
2865 return -EFAULT;
417c3522 2866 if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
7a229387
AB
2867 return -EFAULT;
2868
6b96018b 2869 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2870}
2871
6b96018b
AB
2872static int dev_ifsioc(struct net *net, struct socket *sock,
2873 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 2874{
a2116ed2 2875 struct ifreq __user *uifr;
7a229387
AB
2876 int err;
2877
a2116ed2
AB
2878 uifr = compat_alloc_user_space(sizeof(*uifr));
2879 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2880 return -EFAULT;
2881
2882 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2883
7a229387
AB
2884 if (!err) {
2885 switch (cmd) {
2886 case SIOCGIFFLAGS:
2887 case SIOCGIFMETRIC:
2888 case SIOCGIFMTU:
2889 case SIOCGIFMEM:
2890 case SIOCGIFHWADDR:
2891 case SIOCGIFINDEX:
2892 case SIOCGIFADDR:
2893 case SIOCGIFBRDADDR:
2894 case SIOCGIFDSTADDR:
2895 case SIOCGIFNETMASK:
fab2532b 2896 case SIOCGIFPFLAGS:
7a229387 2897 case SIOCGIFTXQLEN:
fab2532b
AB
2898 case SIOCGMIIPHY:
2899 case SIOCGMIIREG:
a2116ed2 2900 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
2901 err = -EFAULT;
2902 break;
2903 }
2904 }
2905 return err;
2906}
2907
a2116ed2
AB
2908static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2909 struct compat_ifreq __user *uifr32)
2910{
2911 struct ifreq ifr;
2912 struct compat_ifmap __user *uifmap32;
2913 mm_segment_t old_fs;
2914 int err;
2915
2916 uifmap32 = &uifr32->ifr_ifru.ifru_map;
2917 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
2918 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2919 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2920 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2921 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
2922 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
2923 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2924 if (err)
2925 return -EFAULT;
2926
2927 old_fs = get_fs();
c6d409cf 2928 set_fs(KERNEL_DS);
c3f52ae6 2929 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 2930 set_fs(old_fs);
a2116ed2
AB
2931
2932 if (cmd == SIOCGIFMAP && !err) {
2933 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
2934 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2935 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2936 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2937 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
2938 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
2939 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2940 if (err)
2941 err = -EFAULT;
2942 }
2943 return err;
2944}
2945
7a229387 2946struct rtentry32 {
c6d409cf 2947 u32 rt_pad1;
7a229387
AB
2948 struct sockaddr rt_dst; /* target address */
2949 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
2950 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
2951 unsigned short rt_flags;
2952 short rt_pad2;
2953 u32 rt_pad3;
2954 unsigned char rt_tos;
2955 unsigned char rt_class;
2956 short rt_pad4;
2957 short rt_metric; /* +1 for binary compatibility! */
7a229387 2958 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
2959 u32 rt_mtu; /* per route MTU/Window */
2960 u32 rt_window; /* Window clamping */
7a229387
AB
2961 unsigned short rt_irtt; /* Initial RTT */
2962};
2963
2964struct in6_rtmsg32 {
2965 struct in6_addr rtmsg_dst;
2966 struct in6_addr rtmsg_src;
2967 struct in6_addr rtmsg_gateway;
2968 u32 rtmsg_type;
2969 u16 rtmsg_dst_len;
2970 u16 rtmsg_src_len;
2971 u32 rtmsg_metric;
2972 u32 rtmsg_info;
2973 u32 rtmsg_flags;
2974 s32 rtmsg_ifindex;
2975};
2976
6b96018b
AB
2977static int routing_ioctl(struct net *net, struct socket *sock,
2978 unsigned int cmd, void __user *argp)
7a229387
AB
2979{
2980 int ret;
2981 void *r = NULL;
2982 struct in6_rtmsg r6;
2983 struct rtentry r4;
2984 char devname[16];
2985 u32 rtdev;
2986 mm_segment_t old_fs = get_fs();
2987
6b96018b
AB
2988 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
2989 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 2990 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 2991 3 * sizeof(struct in6_addr));
3ddc5b46
MD
2992 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
2993 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
2994 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
2995 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
2996 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
2997 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
2998 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
2999
3000 r = (void *) &r6;
3001 } else { /* ipv4 */
6b96018b 3002 struct rtentry32 __user *ur4 = argp;
c6d409cf 3003 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3004 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3005 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3006 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3007 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3008 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3009 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3010 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3011 if (rtdev) {
c6d409cf 3012 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3013 r4.rt_dev = (char __user __force *)devname;
3014 devname[15] = 0;
7a229387
AB
3015 } else
3016 r4.rt_dev = NULL;
3017
3018 r = (void *) &r4;
3019 }
3020
3021 if (ret) {
3022 ret = -EFAULT;
3023 goto out;
3024 }
3025
c6d409cf 3026 set_fs(KERNEL_DS);
6b96018b 3027 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3028 set_fs(old_fs);
7a229387
AB
3029
3030out:
7a229387
AB
3031 return ret;
3032}
3033
3034/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3035 * for some operations; this forces use of the newer bridge-utils that
25985edc 3036 * use compatible ioctls
7a229387 3037 */
6b96018b 3038static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3039{
6b96018b 3040 compat_ulong_t tmp;
7a229387 3041
6b96018b 3042 if (get_user(tmp, argp))
7a229387
AB
3043 return -EFAULT;
3044 if (tmp == BRCTL_GET_VERSION)
3045 return BRCTL_VERSION + 1;
3046 return -EINVAL;
3047}
3048
6b96018b
AB
3049static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3050 unsigned int cmd, unsigned long arg)
3051{
3052 void __user *argp = compat_ptr(arg);
3053 struct sock *sk = sock->sk;
3054 struct net *net = sock_net(sk);
7a229387 3055
6b96018b 3056 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3057 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3058
3059 switch (cmd) {
3060 case SIOCSIFBR:
3061 case SIOCGIFBR:
3062 return old_bridge_ioctl(argp);
3063 case SIOCGIFNAME:
3064 return dev_ifname32(net, argp);
3065 case SIOCGIFCONF:
3066 return dev_ifconf(net, argp);
3067 case SIOCETHTOOL:
3068 return ethtool_ioctl(net, argp);
7a50a240
AB
3069 case SIOCWANDEV:
3070 return compat_siocwandev(net, argp);
a2116ed2
AB
3071 case SIOCGIFMAP:
3072 case SIOCSIFMAP:
3073 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3074 case SIOCBONDENSLAVE:
3075 case SIOCBONDRELEASE:
3076 case SIOCBONDSETHWADDR:
6b96018b
AB
3077 case SIOCBONDCHANGEACTIVE:
3078 return bond_ioctl(net, cmd, argp);
3079 case SIOCADDRT:
3080 case SIOCDELRT:
3081 return routing_ioctl(net, sock, cmd, argp);
3082 case SIOCGSTAMP:
3083 return do_siocgstamp(net, sock, cmd, argp);
3084 case SIOCGSTAMPNS:
3085 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3086 case SIOCBONDSLAVEINFOQUERY:
3087 case SIOCBONDINFOQUERY:
a2116ed2 3088 case SIOCSHWTSTAMP:
fd468c74 3089 case SIOCGHWTSTAMP:
590d4693 3090 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3091
3092 case FIOSETOWN:
3093 case SIOCSPGRP:
3094 case FIOGETOWN:
3095 case SIOCGPGRP:
3096 case SIOCBRADDBR:
3097 case SIOCBRDELBR:
3098 case SIOCGIFVLAN:
3099 case SIOCSIFVLAN:
3100 case SIOCADDDLCI:
3101 case SIOCDELDLCI:
3102 return sock_ioctl(file, cmd, arg);
3103
3104 case SIOCGIFFLAGS:
3105 case SIOCSIFFLAGS:
3106 case SIOCGIFMETRIC:
3107 case SIOCSIFMETRIC:
3108 case SIOCGIFMTU:
3109 case SIOCSIFMTU:
3110 case SIOCGIFMEM:
3111 case SIOCSIFMEM:
3112 case SIOCGIFHWADDR:
3113 case SIOCSIFHWADDR:
3114 case SIOCADDMULTI:
3115 case SIOCDELMULTI:
3116 case SIOCGIFINDEX:
6b96018b
AB
3117 case SIOCGIFADDR:
3118 case SIOCSIFADDR:
3119 case SIOCSIFHWBROADCAST:
6b96018b 3120 case SIOCDIFADDR:
6b96018b
AB
3121 case SIOCGIFBRDADDR:
3122 case SIOCSIFBRDADDR:
3123 case SIOCGIFDSTADDR:
3124 case SIOCSIFDSTADDR:
3125 case SIOCGIFNETMASK:
3126 case SIOCSIFNETMASK:
3127 case SIOCSIFPFLAGS:
3128 case SIOCGIFPFLAGS:
3129 case SIOCGIFTXQLEN:
3130 case SIOCSIFTXQLEN:
3131 case SIOCBRADDIF:
3132 case SIOCBRDELIF:
9177efd3
AB
3133 case SIOCSIFNAME:
3134 case SIOCGMIIPHY:
3135 case SIOCGMIIREG:
3136 case SIOCSMIIREG:
6b96018b 3137 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3138
6b96018b
AB
3139 case SIOCSARP:
3140 case SIOCGARP:
3141 case SIOCDARP:
6b96018b 3142 case SIOCATMARK:
9177efd3
AB
3143 return sock_do_ioctl(net, sock, cmd, arg);
3144 }
3145
6b96018b
AB
3146 return -ENOIOCTLCMD;
3147}
7a229387 3148
95c96174 3149static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3150 unsigned long arg)
89bbfc95
SP
3151{
3152 struct socket *sock = file->private_data;
3153 int ret = -ENOIOCTLCMD;
87de87d5
DM
3154 struct sock *sk;
3155 struct net *net;
3156
3157 sk = sock->sk;
3158 net = sock_net(sk);
89bbfc95
SP
3159
3160 if (sock->ops->compat_ioctl)
3161 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3162
87de87d5
DM
3163 if (ret == -ENOIOCTLCMD &&
3164 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3165 ret = compat_wext_handle_ioctl(net, cmd, arg);
3166
6b96018b
AB
3167 if (ret == -ENOIOCTLCMD)
3168 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3169
89bbfc95
SP
3170 return ret;
3171}
3172#endif
3173
ac5a488e
SS
3174int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3175{
3176 return sock->ops->bind(sock, addr, addrlen);
3177}
c6d409cf 3178EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3179
3180int kernel_listen(struct socket *sock, int backlog)
3181{
3182 return sock->ops->listen(sock, backlog);
3183}
c6d409cf 3184EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3185
3186int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3187{
3188 struct sock *sk = sock->sk;
3189 int err;
3190
3191 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3192 newsock);
3193 if (err < 0)
3194 goto done;
3195
3196 err = sock->ops->accept(sock, *newsock, flags);
3197 if (err < 0) {
3198 sock_release(*newsock);
fa8705b0 3199 *newsock = NULL;
ac5a488e
SS
3200 goto done;
3201 }
3202
3203 (*newsock)->ops = sock->ops;
1b08534e 3204 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3205
3206done:
3207 return err;
3208}
c6d409cf 3209EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3210
3211int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3212 int flags)
ac5a488e
SS
3213{
3214 return sock->ops->connect(sock, addr, addrlen, flags);
3215}
c6d409cf 3216EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3217
3218int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3219 int *addrlen)
3220{
3221 return sock->ops->getname(sock, addr, addrlen, 0);
3222}
c6d409cf 3223EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3224
3225int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3226 int *addrlen)
3227{
3228 return sock->ops->getname(sock, addr, addrlen, 1);
3229}
c6d409cf 3230EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3231
3232int kernel_getsockopt(struct socket *sock, int level, int optname,
3233 char *optval, int *optlen)
3234{
3235 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3236 char __user *uoptval;
3237 int __user *uoptlen;
ac5a488e
SS
3238 int err;
3239
fb8621bb
NK
3240 uoptval = (char __user __force *) optval;
3241 uoptlen = (int __user __force *) optlen;
3242
ac5a488e
SS
3243 set_fs(KERNEL_DS);
3244 if (level == SOL_SOCKET)
fb8621bb 3245 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3246 else
fb8621bb
NK
3247 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3248 uoptlen);
ac5a488e
SS
3249 set_fs(oldfs);
3250 return err;
3251}
c6d409cf 3252EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3253
3254int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3255 char *optval, unsigned int optlen)
ac5a488e
SS
3256{
3257 mm_segment_t oldfs = get_fs();
fb8621bb 3258 char __user *uoptval;
ac5a488e
SS
3259 int err;
3260
fb8621bb
NK
3261 uoptval = (char __user __force *) optval;
3262
ac5a488e
SS
3263 set_fs(KERNEL_DS);
3264 if (level == SOL_SOCKET)
fb8621bb 3265 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3266 else
fb8621bb 3267 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3268 optlen);
3269 set_fs(oldfs);
3270 return err;
3271}
c6d409cf 3272EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3273
3274int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3275 size_t size, int flags)
3276{
3277 if (sock->ops->sendpage)
3278 return sock->ops->sendpage(sock, page, offset, size, flags);
3279
3280 return sock_no_sendpage(sock, page, offset, size, flags);
3281}
c6d409cf 3282EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3283
3284int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3285{
3286 mm_segment_t oldfs = get_fs();
3287 int err;
3288
3289 set_fs(KERNEL_DS);
3290 err = sock->ops->ioctl(sock, cmd, arg);
3291 set_fs(oldfs);
3292
3293 return err;
3294}
c6d409cf 3295EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3296
91cf45f0
TM
3297int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3298{
3299 return sock->ops->shutdown(sock, how);
3300}
91cf45f0 3301EXPORT_SYMBOL(kernel_sock_shutdown);