Merge branch 'thunderx-perf'
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
1da177e4
LT
92
93#include <asm/uaccess.h>
94#include <asm/unistd.h>
95
96#include <net/compat.h>
87de87d5 97#include <net/wext.h>
f8451725 98#include <net/cls_cgroup.h>
1da177e4
LT
99
100#include <net/sock.h>
101#include <linux/netfilter.h>
102
6b96018b
AB
103#include <linux/if_tun.h>
104#include <linux/ipv6_route.h>
105#include <linux/route.h>
6b96018b
AB
106#include <linux/sockios.h>
107#include <linux/atalk.h>
076bb0c8 108#include <net/busy_poll.h>
f24b9be5 109#include <linux/errqueue.h>
06021292 110
e0d1095a 111#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
112unsigned int sysctl_net_busy_read __read_mostly;
113unsigned int sysctl_net_busy_poll __read_mostly;
06021292 114#endif
6b96018b 115
8ae5e030
AV
116static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
117static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 118static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
119
120static int sock_close(struct inode *inode, struct file *file);
121static unsigned int sock_poll(struct file *file,
122 struct poll_table_struct *wait);
89bddce5 123static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
124#ifdef CONFIG_COMPAT
125static long compat_sock_ioctl(struct file *file,
89bddce5 126 unsigned int cmd, unsigned long arg);
89bbfc95 127#endif
1da177e4 128static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
129static ssize_t sock_sendpage(struct file *file, struct page *page,
130 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 132 struct pipe_inode_info *pipe, size_t len,
9c55e01c 133 unsigned int flags);
1da177e4 134
1da177e4
LT
135/*
136 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
137 * in the operation structures but are done directly via the socketcall() multiplexor.
138 */
139
da7071d7 140static const struct file_operations socket_file_ops = {
1da177e4
LT
141 .owner = THIS_MODULE,
142 .llseek = no_llseek,
8ae5e030
AV
143 .read_iter = sock_read_iter,
144 .write_iter = sock_write_iter,
1da177e4
LT
145 .poll = sock_poll,
146 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
147#ifdef CONFIG_COMPAT
148 .compat_ioctl = compat_sock_ioctl,
149#endif
1da177e4 150 .mmap = sock_mmap,
1da177e4
LT
151 .release = sock_close,
152 .fasync = sock_fasync,
5274f052
JA
153 .sendpage = sock_sendpage,
154 .splice_write = generic_splice_sendpage,
9c55e01c 155 .splice_read = sock_splice_read,
1da177e4
LT
156};
157
158/*
159 * The protocol list. Each protocol is registered in here.
160 */
161
1da177e4 162static DEFINE_SPINLOCK(net_family_lock);
190683a9 163static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 164
1da177e4
LT
165/*
166 * Statistics counters of the socket lists
167 */
168
c6d409cf 169static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
170
171/*
89bddce5
SH
172 * Support routines.
173 * Move socket addresses back and forth across the kernel/user
174 * divide and look after the messy bits.
1da177e4
LT
175 */
176
1da177e4
LT
177/**
178 * move_addr_to_kernel - copy a socket address into kernel space
179 * @uaddr: Address in user space
180 * @kaddr: Address in kernel space
181 * @ulen: Length in user space
182 *
183 * The address is copied into kernel space. If the provided address is
184 * too long an error code of -EINVAL is returned. If the copy gives
185 * invalid addresses -EFAULT is returned. On a success 0 is returned.
186 */
187
43db362d 188int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 189{
230b1839 190 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 191 return -EINVAL;
89bddce5 192 if (ulen == 0)
1da177e4 193 return 0;
89bddce5 194 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 195 return -EFAULT;
3ec3b2fb 196 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
197}
198
199/**
200 * move_addr_to_user - copy an address to user space
201 * @kaddr: kernel space address
202 * @klen: length of address in kernel
203 * @uaddr: user space address
204 * @ulen: pointer to user length field
205 *
206 * The value pointed to by ulen on entry is the buffer length available.
207 * This is overwritten with the buffer space used. -EINVAL is returned
208 * if an overlong buffer is specified or a negative buffer size. -EFAULT
209 * is returned if either the buffer or the length field are not
210 * accessible.
211 * After copying the data up to the limit the user specifies, the true
212 * length of the data is written over the length limit the user
213 * specified. Zero is returned for a success.
214 */
89bddce5 215
43db362d 216static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 217 void __user *uaddr, int __user *ulen)
1da177e4
LT
218{
219 int err;
220 int len;
221
68c6beb3 222 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
223 err = get_user(len, ulen);
224 if (err)
1da177e4 225 return err;
89bddce5
SH
226 if (len > klen)
227 len = klen;
68c6beb3 228 if (len < 0)
1da177e4 229 return -EINVAL;
89bddce5 230 if (len) {
d6fe3945
SG
231 if (audit_sockaddr(klen, kaddr))
232 return -ENOMEM;
89bddce5 233 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
234 return -EFAULT;
235 }
236 /*
89bddce5
SH
237 * "fromlen shall refer to the value before truncation.."
238 * 1003.1g
1da177e4
LT
239 */
240 return __put_user(klen, ulen);
241}
242
e18b890b 243static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
244
245static struct inode *sock_alloc_inode(struct super_block *sb)
246{
247 struct socket_alloc *ei;
eaefd110 248 struct socket_wq *wq;
89bddce5 249
e94b1766 250 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
251 if (!ei)
252 return NULL;
eaefd110
ED
253 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
254 if (!wq) {
43815482
ED
255 kmem_cache_free(sock_inode_cachep, ei);
256 return NULL;
257 }
eaefd110
ED
258 init_waitqueue_head(&wq->wait);
259 wq->fasync_list = NULL;
574aab1e 260 wq->flags = 0;
eaefd110 261 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 262
1da177e4
LT
263 ei->socket.state = SS_UNCONNECTED;
264 ei->socket.flags = 0;
265 ei->socket.ops = NULL;
266 ei->socket.sk = NULL;
267 ei->socket.file = NULL;
1da177e4
LT
268
269 return &ei->vfs_inode;
270}
271
272static void sock_destroy_inode(struct inode *inode)
273{
43815482 274 struct socket_alloc *ei;
eaefd110 275 struct socket_wq *wq;
43815482
ED
276
277 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 278 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 279 kfree_rcu(wq, rcu);
43815482 280 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
281}
282
51cc5068 283static void init_once(void *foo)
1da177e4 284{
89bddce5 285 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 286
a35afb83 287 inode_init_once(&ei->vfs_inode);
1da177e4 288}
89bddce5 289
1da177e4
LT
290static int init_inodecache(void)
291{
292 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
293 sizeof(struct socket_alloc),
294 0,
295 (SLAB_HWCACHE_ALIGN |
296 SLAB_RECLAIM_ACCOUNT |
5d097056 297 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 298 init_once);
1da177e4
LT
299 if (sock_inode_cachep == NULL)
300 return -ENOMEM;
301 return 0;
302}
303
b87221de 304static const struct super_operations sockfs_ops = {
c6d409cf
ED
305 .alloc_inode = sock_alloc_inode,
306 .destroy_inode = sock_destroy_inode,
307 .statfs = simple_statfs,
1da177e4
LT
308};
309
c23fbb6b
ED
310/*
311 * sockfs_dname() is called from d_path().
312 */
313static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
314{
315 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 316 d_inode(dentry)->i_ino);
c23fbb6b
ED
317}
318
3ba13d17 319static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 320 .d_dname = sockfs_dname,
1da177e4
LT
321};
322
c74a1cbb
AV
323static struct dentry *sockfs_mount(struct file_system_type *fs_type,
324 int flags, const char *dev_name, void *data)
325{
326 return mount_pseudo(fs_type, "socket:", &sockfs_ops,
327 &sockfs_dentry_operations, SOCKFS_MAGIC);
328}
329
330static struct vfsmount *sock_mnt __read_mostly;
331
332static struct file_system_type sock_fs_type = {
333 .name = "sockfs",
334 .mount = sockfs_mount,
335 .kill_sb = kill_anon_super,
336};
337
1da177e4
LT
338/*
339 * Obtains the first available file descriptor and sets it up for use.
340 *
39d8c1b6
DM
341 * These functions create file structures and maps them to fd space
342 * of the current process. On success it returns file descriptor
1da177e4
LT
343 * and file struct implicitly stored in sock->file.
344 * Note that another thread may close file descriptor before we return
345 * from this function. We use the fact that now we do not refer
346 * to socket after mapping. If one day we will need it, this
347 * function will increment ref. count on file by 1.
348 *
349 * In any case returned fd MAY BE not valid!
350 * This race condition is unavoidable
351 * with shared fd spaces, we cannot solve it inside kernel,
352 * but we take care of internal coherence yet.
353 */
354
aab174f0 355struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 356{
7cbe66b6 357 struct qstr name = { .name = "" };
2c48b9c4 358 struct path path;
7cbe66b6 359 struct file *file;
1da177e4 360
600e1779
MY
361 if (dname) {
362 name.name = dname;
363 name.len = strlen(name.name);
364 } else if (sock->sk) {
365 name.name = sock->sk->sk_prot_creator->name;
366 name.len = strlen(name.name);
367 }
4b936885 368 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
28407630
AV
369 if (unlikely(!path.dentry))
370 return ERR_PTR(-ENOMEM);
2c48b9c4 371 path.mnt = mntget(sock_mnt);
39d8c1b6 372
2c48b9c4 373 d_instantiate(path.dentry, SOCK_INODE(sock));
39d8c1b6 374
2c48b9c4 375 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 376 &socket_file_ops);
b5ffe634 377 if (IS_ERR(file)) {
cc3808f8 378 /* drop dentry, keep inode */
c5ef6035 379 ihold(d_inode(path.dentry));
2c48b9c4 380 path_put(&path);
39b65252 381 return file;
cc3808f8
AV
382 }
383
384 sock->file = file;
77d27200 385 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 386 file->private_data = sock;
28407630 387 return file;
39d8c1b6 388}
56b31d1c 389EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 390
56b31d1c 391static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
392{
393 struct file *newfile;
28407630
AV
394 int fd = get_unused_fd_flags(flags);
395 if (unlikely(fd < 0))
396 return fd;
39d8c1b6 397
aab174f0 398 newfile = sock_alloc_file(sock, flags, NULL);
28407630 399 if (likely(!IS_ERR(newfile))) {
39d8c1b6 400 fd_install(fd, newfile);
28407630
AV
401 return fd;
402 }
7cbe66b6 403
28407630
AV
404 put_unused_fd(fd);
405 return PTR_ERR(newfile);
1da177e4
LT
406}
407
406a3c63 408struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 409{
6cb153ca
BL
410 if (file->f_op == &socket_file_ops)
411 return file->private_data; /* set in sock_map_fd */
412
23bb80d2
ED
413 *err = -ENOTSOCK;
414 return NULL;
6cb153ca 415}
406a3c63 416EXPORT_SYMBOL(sock_from_file);
6cb153ca 417
1da177e4 418/**
c6d409cf 419 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
420 * @fd: file handle
421 * @err: pointer to an error code return
422 *
423 * The file handle passed in is locked and the socket it is bound
424 * too is returned. If an error occurs the err pointer is overwritten
425 * with a negative errno code and NULL is returned. The function checks
426 * for both invalid handles and passing a handle which is not a socket.
427 *
428 * On a success the socket object pointer is returned.
429 */
430
431struct socket *sockfd_lookup(int fd, int *err)
432{
433 struct file *file;
1da177e4
LT
434 struct socket *sock;
435
89bddce5
SH
436 file = fget(fd);
437 if (!file) {
1da177e4
LT
438 *err = -EBADF;
439 return NULL;
440 }
89bddce5 441
6cb153ca
BL
442 sock = sock_from_file(file, err);
443 if (!sock)
1da177e4 444 fput(file);
6cb153ca
BL
445 return sock;
446}
c6d409cf 447EXPORT_SYMBOL(sockfd_lookup);
1da177e4 448
6cb153ca
BL
449static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
450{
00e188ef 451 struct fd f = fdget(fd);
6cb153ca
BL
452 struct socket *sock;
453
3672558c 454 *err = -EBADF;
00e188ef
AV
455 if (f.file) {
456 sock = sock_from_file(f.file, err);
457 if (likely(sock)) {
458 *fput_needed = f.flags;
6cb153ca 459 return sock;
00e188ef
AV
460 }
461 fdput(f);
1da177e4 462 }
6cb153ca 463 return NULL;
1da177e4
LT
464}
465
600e1779
MY
466#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
467#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
468#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
469static ssize_t sockfs_getxattr(struct dentry *dentry,
470 const char *name, void *value, size_t size)
471{
472 const char *proto_name;
473 size_t proto_size;
474 int error;
475
476 error = -ENODATA;
477 if (!strncmp(name, XATTR_NAME_SOCKPROTONAME, XATTR_NAME_SOCKPROTONAME_LEN)) {
478 proto_name = dentry->d_name.name;
479 proto_size = strlen(proto_name);
480
481 if (value) {
482 error = -ERANGE;
483 if (proto_size + 1 > size)
484 goto out;
485
486 strncpy(value, proto_name, proto_size + 1);
487 }
488 error = proto_size + 1;
489 }
490
491out:
492 return error;
493}
494
495static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
496 size_t size)
497{
498 ssize_t len;
499 ssize_t used = 0;
500
c5ef6035 501 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
502 if (len < 0)
503 return len;
504 used += len;
505 if (buffer) {
506 if (size < used)
507 return -ERANGE;
508 buffer += len;
509 }
510
511 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
512 used += len;
513 if (buffer) {
514 if (size < used)
515 return -ERANGE;
516 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
517 buffer += len;
518 }
519
520 return used;
521}
522
523static const struct inode_operations sockfs_inode_ops = {
524 .getxattr = sockfs_getxattr,
525 .listxattr = sockfs_listxattr,
526};
527
1da177e4
LT
528/**
529 * sock_alloc - allocate a socket
89bddce5 530 *
1da177e4
LT
531 * Allocate a new inode and socket object. The two are bound together
532 * and initialised. The socket is then returned. If we are out of inodes
533 * NULL is returned.
534 */
535
f4a00aac 536struct socket *sock_alloc(void)
1da177e4 537{
89bddce5
SH
538 struct inode *inode;
539 struct socket *sock;
1da177e4 540
a209dfc7 541 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
542 if (!inode)
543 return NULL;
544
545 sock = SOCKET_I(inode);
546
29a020d3 547 kmemcheck_annotate_bitfield(sock, type);
85fe4025 548 inode->i_ino = get_next_ino();
89bddce5 549 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
550 inode->i_uid = current_fsuid();
551 inode->i_gid = current_fsgid();
600e1779 552 inode->i_op = &sockfs_inode_ops;
1da177e4 553
19e8d69c 554 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
555 return sock;
556}
f4a00aac 557EXPORT_SYMBOL(sock_alloc);
1da177e4 558
1da177e4
LT
559/**
560 * sock_release - close a socket
561 * @sock: socket to close
562 *
563 * The socket is released from the protocol stack if it has a release
564 * callback, and the inode is then released if the socket is bound to
89bddce5 565 * an inode not a file.
1da177e4 566 */
89bddce5 567
1da177e4
LT
568void sock_release(struct socket *sock)
569{
570 if (sock->ops) {
571 struct module *owner = sock->ops->owner;
572
573 sock->ops->release(sock);
574 sock->ops = NULL;
575 module_put(owner);
576 }
577
eaefd110 578 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
3410f22e 579 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 580
19e8d69c 581 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
582 if (!sock->file) {
583 iput(SOCK_INODE(sock));
584 return;
585 }
89bddce5 586 sock->file = NULL;
1da177e4 587}
c6d409cf 588EXPORT_SYMBOL(sock_release);
1da177e4 589
67cc0d40 590void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags)
20d49473 591{
140c55d4
ED
592 u8 flags = *tx_flags;
593
b9f40e21 594 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
595 flags |= SKBTX_HW_TSTAMP;
596
b9f40e21 597 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
598 flags |= SKBTX_SW_TSTAMP;
599
e7fd2885 600 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
601 flags |= SKBTX_SCHED_TSTAMP;
602
e1c8a607 603 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)
140c55d4 604 flags |= SKBTX_ACK_TSTAMP;
e7fd2885 605
140c55d4 606 *tx_flags = flags;
20d49473 607}
67cc0d40 608EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 609
d8725c86 610static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 611{
01e97e65 612 int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
d8725c86
AV
613 BUG_ON(ret == -EIOCBQUEUED);
614 return ret;
1da177e4
LT
615}
616
d8725c86 617int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 618{
d8725c86 619 int err = security_socket_sendmsg(sock, msg,
01e97e65 620 msg_data_left(msg));
228e548e 621
d8725c86 622 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 623}
c6d409cf 624EXPORT_SYMBOL(sock_sendmsg);
1da177e4
LT
625
626int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
627 struct kvec *vec, size_t num, size_t size)
628{
6aa24814 629 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
d8725c86 630 return sock_sendmsg(sock, msg);
1da177e4 631}
c6d409cf 632EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 633
92f37fd2
ED
634/*
635 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
636 */
637void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
638 struct sk_buff *skb)
639{
20d49473 640 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
f24b9be5 641 struct scm_timestamping tss;
20d49473
PO
642 int empty = 1;
643 struct skb_shared_hwtstamps *shhwtstamps =
644 skb_hwtstamps(skb);
645
646 /* Race occurred between timestamp enabling and packet
647 receiving. Fill in the current time for now. */
648 if (need_software_tstamp && skb->tstamp.tv64 == 0)
649 __net_timestamp(skb);
650
651 if (need_software_tstamp) {
652 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
653 struct timeval tv;
654 skb_get_timestamp(skb, &tv);
655 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
656 sizeof(tv), &tv);
657 } else {
f24b9be5
WB
658 struct timespec ts;
659 skb_get_timestampns(skb, &ts);
20d49473 660 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
f24b9be5 661 sizeof(ts), &ts);
20d49473
PO
662 }
663 }
664
f24b9be5 665 memset(&tss, 0, sizeof(tss));
c199105d 666 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
f24b9be5 667 ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
20d49473 668 empty = 0;
4d276eb6 669 if (shhwtstamps &&
b9f40e21 670 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
f24b9be5 671 ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
4d276eb6 672 empty = 0;
20d49473
PO
673 if (!empty)
674 put_cmsg(msg, SOL_SOCKET,
f24b9be5 675 SCM_TIMESTAMPING, sizeof(tss), &tss);
92f37fd2 676}
7c81fd8b
ACM
677EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
678
6e3e939f
JB
679void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
680 struct sk_buff *skb)
681{
682 int ack;
683
684 if (!sock_flag(sk, SOCK_WIFI_STATUS))
685 return;
686 if (!skb->wifi_acked_valid)
687 return;
688
689 ack = skb->wifi_acked;
690
691 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
692}
693EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
694
11165f14 695static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
696 struct sk_buff *skb)
3b885787 697{
744d5a3e 698 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 699 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 700 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
701}
702
767dd033 703void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
704 struct sk_buff *skb)
705{
706 sock_recv_timestamp(msg, sk, skb);
707 sock_recv_drops(msg, sk, skb);
708}
767dd033 709EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 710
1b784140
YX
711static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
712 size_t size, int flags)
1da177e4 713{
1b784140 714 return sock->ops->recvmsg(sock, msg, size, flags);
1da177e4
LT
715}
716
1b784140
YX
717int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
718 int flags)
a2e27255
ACM
719{
720 int err = security_socket_recvmsg(sock, msg, size, flags);
721
1b784140 722 return err ?: sock_recvmsg_nosec(sock, msg, size, flags);
1da177e4 723}
c6d409cf 724EXPORT_SYMBOL(sock_recvmsg);
1da177e4 725
c1249c0a
ML
726/**
727 * kernel_recvmsg - Receive a message from a socket (kernel space)
728 * @sock: The socket to receive the message from
729 * @msg: Received message
730 * @vec: Input s/g array for message data
731 * @num: Size of input s/g array
732 * @size: Number of bytes to read
733 * @flags: Message flags (MSG_DONTWAIT, etc...)
734 *
735 * On return the msg structure contains the scatter/gather array passed in the
736 * vec argument. The array is modified so that it consists of the unfilled
737 * portion of the original array.
738 *
739 * The returned value is the total number of bytes received, or an error.
740 */
89bddce5
SH
741int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
742 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
743{
744 mm_segment_t oldfs = get_fs();
745 int result;
746
6aa24814 747 iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
1da177e4 748 set_fs(KERNEL_DS);
1da177e4
LT
749 result = sock_recvmsg(sock, msg, size, flags);
750 set_fs(oldfs);
751 return result;
752}
c6d409cf 753EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 754
ce1d4d3e
CH
755static ssize_t sock_sendpage(struct file *file, struct page *page,
756 int offset, size_t size, loff_t *ppos, int more)
1da177e4 757{
1da177e4
LT
758 struct socket *sock;
759 int flags;
760
ce1d4d3e
CH
761 sock = file->private_data;
762
35f9c09f
ED
763 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
764 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
765 flags |= more;
ce1d4d3e 766
e6949583 767 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 768}
1da177e4 769
9c55e01c 770static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 771 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
772 unsigned int flags)
773{
774 struct socket *sock = file->private_data;
775
997b37da
RDC
776 if (unlikely(!sock->ops->splice_read))
777 return -EINVAL;
778
9c55e01c
JA
779 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
780}
781
8ae5e030 782static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 783{
6d652330
AV
784 struct file *file = iocb->ki_filp;
785 struct socket *sock = file->private_data;
0345f931 786 struct msghdr msg = {.msg_iter = *to,
787 .msg_iocb = iocb};
8ae5e030 788 ssize_t res;
ce1d4d3e 789
8ae5e030
AV
790 if (file->f_flags & O_NONBLOCK)
791 msg.msg_flags = MSG_DONTWAIT;
792
793 if (iocb->ki_pos != 0)
1da177e4 794 return -ESPIPE;
027445c3 795
66ee59af 796 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
797 return 0;
798
237dae88 799 res = sock_recvmsg(sock, &msg, iov_iter_count(to), msg.msg_flags);
8ae5e030
AV
800 *to = msg.msg_iter;
801 return res;
1da177e4
LT
802}
803
8ae5e030 804static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 805{
6d652330
AV
806 struct file *file = iocb->ki_filp;
807 struct socket *sock = file->private_data;
0345f931 808 struct msghdr msg = {.msg_iter = *from,
809 .msg_iocb = iocb};
8ae5e030 810 ssize_t res;
1da177e4 811
8ae5e030 812 if (iocb->ki_pos != 0)
ce1d4d3e 813 return -ESPIPE;
027445c3 814
8ae5e030
AV
815 if (file->f_flags & O_NONBLOCK)
816 msg.msg_flags = MSG_DONTWAIT;
817
6d652330
AV
818 if (sock->type == SOCK_SEQPACKET)
819 msg.msg_flags |= MSG_EOR;
820
d8725c86 821 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
822 *from = msg.msg_iter;
823 return res;
1da177e4
LT
824}
825
1da177e4
LT
826/*
827 * Atomic setting of ioctl hooks to avoid race
828 * with module unload.
829 */
830
4a3e2f71 831static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 832static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 833
881d966b 834void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 835{
4a3e2f71 836 mutex_lock(&br_ioctl_mutex);
1da177e4 837 br_ioctl_hook = hook;
4a3e2f71 838 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
839}
840EXPORT_SYMBOL(brioctl_set);
841
4a3e2f71 842static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 843static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 844
881d966b 845void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 846{
4a3e2f71 847 mutex_lock(&vlan_ioctl_mutex);
1da177e4 848 vlan_ioctl_hook = hook;
4a3e2f71 849 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
850}
851EXPORT_SYMBOL(vlan_ioctl_set);
852
4a3e2f71 853static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 854static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 855
89bddce5 856void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 857{
4a3e2f71 858 mutex_lock(&dlci_ioctl_mutex);
1da177e4 859 dlci_ioctl_hook = hook;
4a3e2f71 860 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
861}
862EXPORT_SYMBOL(dlci_ioctl_set);
863
6b96018b
AB
864static long sock_do_ioctl(struct net *net, struct socket *sock,
865 unsigned int cmd, unsigned long arg)
866{
867 int err;
868 void __user *argp = (void __user *)arg;
869
870 err = sock->ops->ioctl(sock, cmd, arg);
871
872 /*
873 * If this ioctl is unknown try to hand it down
874 * to the NIC driver.
875 */
876 if (err == -ENOIOCTLCMD)
877 err = dev_ioctl(net, cmd, argp);
878
879 return err;
880}
881
1da177e4
LT
882/*
883 * With an ioctl, arg may well be a user mode pointer, but we don't know
884 * what to do with it - that's up to the protocol still.
885 */
886
887static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
888{
889 struct socket *sock;
881d966b 890 struct sock *sk;
1da177e4
LT
891 void __user *argp = (void __user *)arg;
892 int pid, err;
881d966b 893 struct net *net;
1da177e4 894
b69aee04 895 sock = file->private_data;
881d966b 896 sk = sock->sk;
3b1e0a65 897 net = sock_net(sk);
1da177e4 898 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 899 err = dev_ioctl(net, cmd, argp);
1da177e4 900 } else
3d23e349 901#ifdef CONFIG_WEXT_CORE
1da177e4 902 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 903 err = dev_ioctl(net, cmd, argp);
1da177e4 904 } else
3d23e349 905#endif
89bddce5 906 switch (cmd) {
1da177e4
LT
907 case FIOSETOWN:
908 case SIOCSPGRP:
909 err = -EFAULT;
910 if (get_user(pid, (int __user *)argp))
911 break;
e0b93edd
JL
912 f_setown(sock->file, pid, 1);
913 err = 0;
1da177e4
LT
914 break;
915 case FIOGETOWN:
916 case SIOCGPGRP:
609d7fa9 917 err = put_user(f_getown(sock->file),
89bddce5 918 (int __user *)argp);
1da177e4
LT
919 break;
920 case SIOCGIFBR:
921 case SIOCSIFBR:
922 case SIOCBRADDBR:
923 case SIOCBRDELBR:
924 err = -ENOPKG;
925 if (!br_ioctl_hook)
926 request_module("bridge");
927
4a3e2f71 928 mutex_lock(&br_ioctl_mutex);
89bddce5 929 if (br_ioctl_hook)
881d966b 930 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 931 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
932 break;
933 case SIOCGIFVLAN:
934 case SIOCSIFVLAN:
935 err = -ENOPKG;
936 if (!vlan_ioctl_hook)
937 request_module("8021q");
938
4a3e2f71 939 mutex_lock(&vlan_ioctl_mutex);
1da177e4 940 if (vlan_ioctl_hook)
881d966b 941 err = vlan_ioctl_hook(net, argp);
4a3e2f71 942 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 943 break;
1da177e4
LT
944 case SIOCADDDLCI:
945 case SIOCDELDLCI:
946 err = -ENOPKG;
947 if (!dlci_ioctl_hook)
948 request_module("dlci");
949
7512cbf6
PE
950 mutex_lock(&dlci_ioctl_mutex);
951 if (dlci_ioctl_hook)
1da177e4 952 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 953 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
954 break;
955 default:
6b96018b 956 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 957 break;
89bddce5 958 }
1da177e4
LT
959 return err;
960}
961
962int sock_create_lite(int family, int type, int protocol, struct socket **res)
963{
964 int err;
965 struct socket *sock = NULL;
89bddce5 966
1da177e4
LT
967 err = security_socket_create(family, type, protocol, 1);
968 if (err)
969 goto out;
970
971 sock = sock_alloc();
972 if (!sock) {
973 err = -ENOMEM;
974 goto out;
975 }
976
1da177e4 977 sock->type = type;
7420ed23
VY
978 err = security_socket_post_create(sock, family, type, protocol, 1);
979 if (err)
980 goto out_release;
981
1da177e4
LT
982out:
983 *res = sock;
984 return err;
7420ed23
VY
985out_release:
986 sock_release(sock);
987 sock = NULL;
988 goto out;
1da177e4 989}
c6d409cf 990EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
991
992/* No kernel lock held - perfect */
89bddce5 993static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4 994{
cbf55001 995 unsigned int busy_flag = 0;
1da177e4
LT
996 struct socket *sock;
997
998 /*
89bddce5 999 * We can't return errors to poll, so it's either yes or no.
1da177e4 1000 */
b69aee04 1001 sock = file->private_data;
2d48d67f 1002
cbf55001 1003 if (sk_can_busy_loop(sock->sk)) {
2d48d67f 1004 /* this socket can poll_ll so tell the system call */
cbf55001 1005 busy_flag = POLL_BUSY_LOOP;
2d48d67f
ET
1006
1007 /* once, only if requested by syscall */
cbf55001
ET
1008 if (wait && (wait->_key & POLL_BUSY_LOOP))
1009 sk_busy_loop(sock->sk, 1);
2d48d67f
ET
1010 }
1011
cbf55001 1012 return busy_flag | sock->ops->poll(file, sock, wait);
1da177e4
LT
1013}
1014
89bddce5 1015static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1016{
b69aee04 1017 struct socket *sock = file->private_data;
1da177e4
LT
1018
1019 return sock->ops->mmap(file, sock, vma);
1020}
1021
20380731 1022static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1023{
1da177e4
LT
1024 sock_release(SOCKET_I(inode));
1025 return 0;
1026}
1027
1028/*
1029 * Update the socket async list
1030 *
1031 * Fasync_list locking strategy.
1032 *
1033 * 1. fasync_list is modified only under process context socket lock
1034 * i.e. under semaphore.
1035 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1036 * or under socket lock
1da177e4
LT
1037 */
1038
1039static int sock_fasync(int fd, struct file *filp, int on)
1040{
989a2979
ED
1041 struct socket *sock = filp->private_data;
1042 struct sock *sk = sock->sk;
eaefd110 1043 struct socket_wq *wq;
1da177e4 1044
989a2979 1045 if (sk == NULL)
1da177e4 1046 return -EINVAL;
1da177e4
LT
1047
1048 lock_sock(sk);
eaefd110
ED
1049 wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
1050 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1051
eaefd110 1052 if (!wq->fasync_list)
989a2979
ED
1053 sock_reset_flag(sk, SOCK_FASYNC);
1054 else
bcdce719 1055 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1056
989a2979 1057 release_sock(sk);
1da177e4
LT
1058 return 0;
1059}
1060
ceb5d58b 1061/* This function may be called only under rcu_lock */
1da177e4 1062
ceb5d58b 1063int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1064{
ceb5d58b 1065 if (!wq || !wq->fasync_list)
1da177e4 1066 return -1;
ceb5d58b 1067
89bddce5 1068 switch (how) {
8d8ad9d7 1069 case SOCK_WAKE_WAITD:
ceb5d58b 1070 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1071 break;
1072 goto call_kill;
8d8ad9d7 1073 case SOCK_WAKE_SPACE:
ceb5d58b 1074 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1075 break;
1076 /* fall through */
8d8ad9d7 1077 case SOCK_WAKE_IO:
89bddce5 1078call_kill:
43815482 1079 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1080 break;
8d8ad9d7 1081 case SOCK_WAKE_URG:
43815482 1082 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1083 }
ceb5d58b 1084
1da177e4
LT
1085 return 0;
1086}
c6d409cf 1087EXPORT_SYMBOL(sock_wake_async);
1da177e4 1088
721db93a 1089int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1090 struct socket **res, int kern)
1da177e4
LT
1091{
1092 int err;
1093 struct socket *sock;
55737fda 1094 const struct net_proto_family *pf;
1da177e4
LT
1095
1096 /*
89bddce5 1097 * Check protocol is in range
1da177e4
LT
1098 */
1099 if (family < 0 || family >= NPROTO)
1100 return -EAFNOSUPPORT;
1101 if (type < 0 || type >= SOCK_MAX)
1102 return -EINVAL;
1103
1104 /* Compatibility.
1105
1106 This uglymoron is moved from INET layer to here to avoid
1107 deadlock in module load.
1108 */
1109 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1110 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1111 current->comm);
1da177e4
LT
1112 family = PF_PACKET;
1113 }
1114
1115 err = security_socket_create(family, type, protocol, kern);
1116 if (err)
1117 return err;
89bddce5 1118
55737fda
SH
1119 /*
1120 * Allocate the socket and allow the family to set things up. if
1121 * the protocol is 0, the family is instructed to select an appropriate
1122 * default.
1123 */
1124 sock = sock_alloc();
1125 if (!sock) {
e87cc472 1126 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1127 return -ENFILE; /* Not exactly a match, but its the
1128 closest posix thing */
1129 }
1130
1131 sock->type = type;
1132
95a5afca 1133#ifdef CONFIG_MODULES
89bddce5
SH
1134 /* Attempt to load a protocol module if the find failed.
1135 *
1136 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1137 * requested real, full-featured networking support upon configuration.
1138 * Otherwise module support will break!
1139 */
190683a9 1140 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1141 request_module("net-pf-%d", family);
1da177e4
LT
1142#endif
1143
55737fda
SH
1144 rcu_read_lock();
1145 pf = rcu_dereference(net_families[family]);
1146 err = -EAFNOSUPPORT;
1147 if (!pf)
1148 goto out_release;
1da177e4
LT
1149
1150 /*
1151 * We will call the ->create function, that possibly is in a loadable
1152 * module, so we have to bump that loadable module refcnt first.
1153 */
55737fda 1154 if (!try_module_get(pf->owner))
1da177e4
LT
1155 goto out_release;
1156
55737fda
SH
1157 /* Now protected by module ref count */
1158 rcu_read_unlock();
1159
3f378b68 1160 err = pf->create(net, sock, protocol, kern);
55737fda 1161 if (err < 0)
1da177e4 1162 goto out_module_put;
a79af59e 1163
1da177e4
LT
1164 /*
1165 * Now to bump the refcnt of the [loadable] module that owns this
1166 * socket at sock_release time we decrement its refcnt.
1167 */
55737fda
SH
1168 if (!try_module_get(sock->ops->owner))
1169 goto out_module_busy;
1170
1da177e4
LT
1171 /*
1172 * Now that we're done with the ->create function, the [loadable]
1173 * module can have its refcnt decremented
1174 */
55737fda 1175 module_put(pf->owner);
7420ed23
VY
1176 err = security_socket_post_create(sock, family, type, protocol, kern);
1177 if (err)
3b185525 1178 goto out_sock_release;
55737fda 1179 *res = sock;
1da177e4 1180
55737fda
SH
1181 return 0;
1182
1183out_module_busy:
1184 err = -EAFNOSUPPORT;
1da177e4 1185out_module_put:
55737fda
SH
1186 sock->ops = NULL;
1187 module_put(pf->owner);
1188out_sock_release:
1da177e4 1189 sock_release(sock);
55737fda
SH
1190 return err;
1191
1192out_release:
1193 rcu_read_unlock();
1194 goto out_sock_release;
1da177e4 1195}
721db93a 1196EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1197
1198int sock_create(int family, int type, int protocol, struct socket **res)
1199{
1b8d7ae4 1200 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1201}
c6d409cf 1202EXPORT_SYMBOL(sock_create);
1da177e4 1203
eeb1bd5c 1204int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1205{
eeb1bd5c 1206 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1207}
c6d409cf 1208EXPORT_SYMBOL(sock_create_kern);
1da177e4 1209
3e0fa65f 1210SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1211{
1212 int retval;
1213 struct socket *sock;
a677a039
UD
1214 int flags;
1215
e38b36f3
UD
1216 /* Check the SOCK_* constants for consistency. */
1217 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1218 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1219 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1220 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1221
a677a039 1222 flags = type & ~SOCK_TYPE_MASK;
77d27200 1223 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1224 return -EINVAL;
1225 type &= SOCK_TYPE_MASK;
1da177e4 1226
aaca0bdc
UD
1227 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1228 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1229
1da177e4
LT
1230 retval = sock_create(family, type, protocol, &sock);
1231 if (retval < 0)
1232 goto out;
1233
77d27200 1234 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1235 if (retval < 0)
1236 goto out_release;
1237
1238out:
1239 /* It may be already another descriptor 8) Not kernel problem. */
1240 return retval;
1241
1242out_release:
1243 sock_release(sock);
1244 return retval;
1245}
1246
1247/*
1248 * Create a pair of connected sockets.
1249 */
1250
3e0fa65f
HC
1251SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1252 int __user *, usockvec)
1da177e4
LT
1253{
1254 struct socket *sock1, *sock2;
1255 int fd1, fd2, err;
db349509 1256 struct file *newfile1, *newfile2;
a677a039
UD
1257 int flags;
1258
1259 flags = type & ~SOCK_TYPE_MASK;
77d27200 1260 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1261 return -EINVAL;
1262 type &= SOCK_TYPE_MASK;
1da177e4 1263
aaca0bdc
UD
1264 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1265 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1266
1da177e4
LT
1267 /*
1268 * Obtain the first socket and check if the underlying protocol
1269 * supports the socketpair call.
1270 */
1271
1272 err = sock_create(family, type, protocol, &sock1);
1273 if (err < 0)
1274 goto out;
1275
1276 err = sock_create(family, type, protocol, &sock2);
1277 if (err < 0)
1278 goto out_release_1;
1279
1280 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1281 if (err < 0)
1da177e4
LT
1282 goto out_release_both;
1283
28407630 1284 fd1 = get_unused_fd_flags(flags);
bf3c23d1
DM
1285 if (unlikely(fd1 < 0)) {
1286 err = fd1;
db349509 1287 goto out_release_both;
bf3c23d1 1288 }
d73aa286 1289
28407630 1290 fd2 = get_unused_fd_flags(flags);
198de4d7
AV
1291 if (unlikely(fd2 < 0)) {
1292 err = fd2;
d73aa286 1293 goto out_put_unused_1;
28407630
AV
1294 }
1295
aab174f0 1296 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1297 if (IS_ERR(newfile1)) {
28407630 1298 err = PTR_ERR(newfile1);
d73aa286 1299 goto out_put_unused_both;
28407630
AV
1300 }
1301
aab174f0 1302 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1303 if (IS_ERR(newfile2)) {
1304 err = PTR_ERR(newfile2);
d73aa286 1305 goto out_fput_1;
db349509
AV
1306 }
1307
d73aa286
YD
1308 err = put_user(fd1, &usockvec[0]);
1309 if (err)
1310 goto out_fput_both;
1311
1312 err = put_user(fd2, &usockvec[1]);
1313 if (err)
1314 goto out_fput_both;
1315
157cf649 1316 audit_fd_pair(fd1, fd2);
d73aa286 1317
db349509
AV
1318 fd_install(fd1, newfile1);
1319 fd_install(fd2, newfile2);
1da177e4
LT
1320 /* fd1 and fd2 may be already another descriptors.
1321 * Not kernel problem.
1322 */
1323
d73aa286 1324 return 0;
1da177e4 1325
d73aa286
YD
1326out_fput_both:
1327 fput(newfile2);
1328 fput(newfile1);
1329 put_unused_fd(fd2);
1330 put_unused_fd(fd1);
1331 goto out;
1332
1333out_fput_1:
1334 fput(newfile1);
1335 put_unused_fd(fd2);
1336 put_unused_fd(fd1);
1337 sock_release(sock2);
1338 goto out;
1da177e4 1339
d73aa286
YD
1340out_put_unused_both:
1341 put_unused_fd(fd2);
1342out_put_unused_1:
1343 put_unused_fd(fd1);
1da177e4 1344out_release_both:
89bddce5 1345 sock_release(sock2);
1da177e4 1346out_release_1:
89bddce5 1347 sock_release(sock1);
1da177e4
LT
1348out:
1349 return err;
1350}
1351
1da177e4
LT
1352/*
1353 * Bind a name to a socket. Nothing much to do here since it's
1354 * the protocol's responsibility to handle the local address.
1355 *
1356 * We move the socket address to kernel space before we call
1357 * the protocol layer (having also checked the address is ok).
1358 */
1359
20f37034 1360SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1361{
1362 struct socket *sock;
230b1839 1363 struct sockaddr_storage address;
6cb153ca 1364 int err, fput_needed;
1da177e4 1365
89bddce5 1366 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1367 if (sock) {
43db362d 1368 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1369 if (err >= 0) {
1370 err = security_socket_bind(sock,
230b1839 1371 (struct sockaddr *)&address,
89bddce5 1372 addrlen);
6cb153ca
BL
1373 if (!err)
1374 err = sock->ops->bind(sock,
89bddce5 1375 (struct sockaddr *)
230b1839 1376 &address, addrlen);
1da177e4 1377 }
6cb153ca 1378 fput_light(sock->file, fput_needed);
89bddce5 1379 }
1da177e4
LT
1380 return err;
1381}
1382
1da177e4
LT
1383/*
1384 * Perform a listen. Basically, we allow the protocol to do anything
1385 * necessary for a listen, and if that works, we mark the socket as
1386 * ready for listening.
1387 */
1388
3e0fa65f 1389SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1390{
1391 struct socket *sock;
6cb153ca 1392 int err, fput_needed;
b8e1f9b5 1393 int somaxconn;
89bddce5
SH
1394
1395 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1396 if (sock) {
8efa6e93 1397 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1398 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1399 backlog = somaxconn;
1da177e4
LT
1400
1401 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1402 if (!err)
1403 err = sock->ops->listen(sock, backlog);
1da177e4 1404
6cb153ca 1405 fput_light(sock->file, fput_needed);
1da177e4
LT
1406 }
1407 return err;
1408}
1409
1da177e4
LT
1410/*
1411 * For accept, we attempt to create a new socket, set up the link
1412 * with the client, wake up the client, then return the new
1413 * connected fd. We collect the address of the connector in kernel
1414 * space and move it to user at the very end. This is unclean because
1415 * we open the socket then return an error.
1416 *
1417 * 1003.1g adds the ability to recvmsg() to query connection pending
1418 * status to recvmsg. We need to add that support in a way thats
1419 * clean when we restucture accept also.
1420 */
1421
20f37034
HC
1422SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1423 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1424{
1425 struct socket *sock, *newsock;
39d8c1b6 1426 struct file *newfile;
6cb153ca 1427 int err, len, newfd, fput_needed;
230b1839 1428 struct sockaddr_storage address;
1da177e4 1429
77d27200 1430 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1431 return -EINVAL;
1432
1433 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1434 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1435
6cb153ca 1436 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1437 if (!sock)
1438 goto out;
1439
1440 err = -ENFILE;
c6d409cf
ED
1441 newsock = sock_alloc();
1442 if (!newsock)
1da177e4
LT
1443 goto out_put;
1444
1445 newsock->type = sock->type;
1446 newsock->ops = sock->ops;
1447
1da177e4
LT
1448 /*
1449 * We don't need try_module_get here, as the listening socket (sock)
1450 * has the protocol module (sock->ops->owner) held.
1451 */
1452 __module_get(newsock->ops->owner);
1453
28407630 1454 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1455 if (unlikely(newfd < 0)) {
1456 err = newfd;
9a1875e6
DM
1457 sock_release(newsock);
1458 goto out_put;
39d8c1b6 1459 }
aab174f0 1460 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1461 if (IS_ERR(newfile)) {
28407630
AV
1462 err = PTR_ERR(newfile);
1463 put_unused_fd(newfd);
1464 sock_release(newsock);
1465 goto out_put;
1466 }
39d8c1b6 1467
a79af59e
FF
1468 err = security_socket_accept(sock, newsock);
1469 if (err)
39d8c1b6 1470 goto out_fd;
a79af59e 1471
1da177e4
LT
1472 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1473 if (err < 0)
39d8c1b6 1474 goto out_fd;
1da177e4
LT
1475
1476 if (upeer_sockaddr) {
230b1839 1477 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1478 &len, 2) < 0) {
1da177e4 1479 err = -ECONNABORTED;
39d8c1b6 1480 goto out_fd;
1da177e4 1481 }
43db362d 1482 err = move_addr_to_user(&address,
230b1839 1483 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1484 if (err < 0)
39d8c1b6 1485 goto out_fd;
1da177e4
LT
1486 }
1487
1488 /* File flags are not inherited via accept() unlike another OSes. */
1489
39d8c1b6
DM
1490 fd_install(newfd, newfile);
1491 err = newfd;
1da177e4 1492
1da177e4 1493out_put:
6cb153ca 1494 fput_light(sock->file, fput_needed);
1da177e4
LT
1495out:
1496 return err;
39d8c1b6 1497out_fd:
9606a216 1498 fput(newfile);
39d8c1b6 1499 put_unused_fd(newfd);
1da177e4
LT
1500 goto out_put;
1501}
1502
20f37034
HC
1503SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1504 int __user *, upeer_addrlen)
aaca0bdc 1505{
de11defe 1506 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1507}
1508
1da177e4
LT
1509/*
1510 * Attempt to connect to a socket with the server address. The address
1511 * is in user space so we verify it is OK and move it to kernel space.
1512 *
1513 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1514 * break bindings
1515 *
1516 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1517 * other SEQPACKET protocols that take time to connect() as it doesn't
1518 * include the -EINPROGRESS status for such sockets.
1519 */
1520
20f37034
HC
1521SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1522 int, addrlen)
1da177e4
LT
1523{
1524 struct socket *sock;
230b1839 1525 struct sockaddr_storage address;
6cb153ca 1526 int err, fput_needed;
1da177e4 1527
6cb153ca 1528 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1529 if (!sock)
1530 goto out;
43db362d 1531 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1532 if (err < 0)
1533 goto out_put;
1534
89bddce5 1535 err =
230b1839 1536 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1537 if (err)
1538 goto out_put;
1539
230b1839 1540 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1541 sock->file->f_flags);
1542out_put:
6cb153ca 1543 fput_light(sock->file, fput_needed);
1da177e4
LT
1544out:
1545 return err;
1546}
1547
1548/*
1549 * Get the local address ('name') of a socket object. Move the obtained
1550 * name to user space.
1551 */
1552
20f37034
HC
1553SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1554 int __user *, usockaddr_len)
1da177e4
LT
1555{
1556 struct socket *sock;
230b1839 1557 struct sockaddr_storage address;
6cb153ca 1558 int len, err, fput_needed;
89bddce5 1559
6cb153ca 1560 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1561 if (!sock)
1562 goto out;
1563
1564 err = security_socket_getsockname(sock);
1565 if (err)
1566 goto out_put;
1567
230b1839 1568 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1569 if (err)
1570 goto out_put;
43db362d 1571 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1572
1573out_put:
6cb153ca 1574 fput_light(sock->file, fput_needed);
1da177e4
LT
1575out:
1576 return err;
1577}
1578
1579/*
1580 * Get the remote address ('name') of a socket object. Move the obtained
1581 * name to user space.
1582 */
1583
20f37034
HC
1584SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1585 int __user *, usockaddr_len)
1da177e4
LT
1586{
1587 struct socket *sock;
230b1839 1588 struct sockaddr_storage address;
6cb153ca 1589 int len, err, fput_needed;
1da177e4 1590
89bddce5
SH
1591 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1592 if (sock != NULL) {
1da177e4
LT
1593 err = security_socket_getpeername(sock);
1594 if (err) {
6cb153ca 1595 fput_light(sock->file, fput_needed);
1da177e4
LT
1596 return err;
1597 }
1598
89bddce5 1599 err =
230b1839 1600 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1601 1);
1da177e4 1602 if (!err)
43db362d 1603 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1604 usockaddr_len);
6cb153ca 1605 fput_light(sock->file, fput_needed);
1da177e4
LT
1606 }
1607 return err;
1608}
1609
1610/*
1611 * Send a datagram to a given address. We move the address into kernel
1612 * space and check the user space data area is readable before invoking
1613 * the protocol.
1614 */
1615
3e0fa65f 1616SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1617 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1618 int, addr_len)
1da177e4
LT
1619{
1620 struct socket *sock;
230b1839 1621 struct sockaddr_storage address;
1da177e4
LT
1622 int err;
1623 struct msghdr msg;
1624 struct iovec iov;
6cb153ca 1625 int fput_needed;
6cb153ca 1626
602bd0e9
AV
1627 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1628 if (unlikely(err))
1629 return err;
de0fa95c
PE
1630 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1631 if (!sock)
4387ff75 1632 goto out;
6cb153ca 1633
89bddce5 1634 msg.msg_name = NULL;
89bddce5
SH
1635 msg.msg_control = NULL;
1636 msg.msg_controllen = 0;
1637 msg.msg_namelen = 0;
6cb153ca 1638 if (addr) {
43db362d 1639 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1640 if (err < 0)
1641 goto out_put;
230b1839 1642 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1643 msg.msg_namelen = addr_len;
1da177e4
LT
1644 }
1645 if (sock->file->f_flags & O_NONBLOCK)
1646 flags |= MSG_DONTWAIT;
1647 msg.msg_flags = flags;
d8725c86 1648 err = sock_sendmsg(sock, &msg);
1da177e4 1649
89bddce5 1650out_put:
de0fa95c 1651 fput_light(sock->file, fput_needed);
4387ff75 1652out:
1da177e4
LT
1653 return err;
1654}
1655
1656/*
89bddce5 1657 * Send a datagram down a socket.
1da177e4
LT
1658 */
1659
3e0fa65f 1660SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1661 unsigned int, flags)
1da177e4
LT
1662{
1663 return sys_sendto(fd, buff, len, flags, NULL, 0);
1664}
1665
1666/*
89bddce5 1667 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1668 * sender. We verify the buffers are writable and if needed move the
1669 * sender address from kernel to user space.
1670 */
1671
3e0fa65f 1672SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1673 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1674 int __user *, addr_len)
1da177e4
LT
1675{
1676 struct socket *sock;
1677 struct iovec iov;
1678 struct msghdr msg;
230b1839 1679 struct sockaddr_storage address;
89bddce5 1680 int err, err2;
6cb153ca
BL
1681 int fput_needed;
1682
602bd0e9
AV
1683 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1684 if (unlikely(err))
1685 return err;
de0fa95c 1686 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1687 if (!sock)
de0fa95c 1688 goto out;
1da177e4 1689
89bddce5
SH
1690 msg.msg_control = NULL;
1691 msg.msg_controllen = 0;
f3d33426
HFS
1692 /* Save some cycles and don't copy the address if not needed */
1693 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1694 /* We assume all kernel code knows the size of sockaddr_storage */
1695 msg.msg_namelen = 0;
130ed5d1 1696 msg.msg_iocb = NULL;
1da177e4
LT
1697 if (sock->file->f_flags & O_NONBLOCK)
1698 flags |= MSG_DONTWAIT;
602bd0e9 1699 err = sock_recvmsg(sock, &msg, iov_iter_count(&msg.msg_iter), flags);
1da177e4 1700
89bddce5 1701 if (err >= 0 && addr != NULL) {
43db362d 1702 err2 = move_addr_to_user(&address,
230b1839 1703 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1704 if (err2 < 0)
1705 err = err2;
1da177e4 1706 }
de0fa95c
PE
1707
1708 fput_light(sock->file, fput_needed);
4387ff75 1709out:
1da177e4
LT
1710 return err;
1711}
1712
1713/*
89bddce5 1714 * Receive a datagram from a socket.
1da177e4
LT
1715 */
1716
b7c0ddf5
JG
1717SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1718 unsigned int, flags)
1da177e4
LT
1719{
1720 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1721}
1722
1723/*
1724 * Set a socket option. Because we don't know the option lengths we have
1725 * to pass the user mode parameter for the protocols to sort out.
1726 */
1727
20f37034
HC
1728SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1729 char __user *, optval, int, optlen)
1da177e4 1730{
6cb153ca 1731 int err, fput_needed;
1da177e4
LT
1732 struct socket *sock;
1733
1734 if (optlen < 0)
1735 return -EINVAL;
89bddce5
SH
1736
1737 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1738 if (sock != NULL) {
1739 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1740 if (err)
1741 goto out_put;
1da177e4
LT
1742
1743 if (level == SOL_SOCKET)
89bddce5
SH
1744 err =
1745 sock_setsockopt(sock, level, optname, optval,
1746 optlen);
1da177e4 1747 else
89bddce5
SH
1748 err =
1749 sock->ops->setsockopt(sock, level, optname, optval,
1750 optlen);
6cb153ca
BL
1751out_put:
1752 fput_light(sock->file, fput_needed);
1da177e4
LT
1753 }
1754 return err;
1755}
1756
1757/*
1758 * Get a socket option. Because we don't know the option lengths we have
1759 * to pass a user mode parameter for the protocols to sort out.
1760 */
1761
20f37034
HC
1762SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1763 char __user *, optval, int __user *, optlen)
1da177e4 1764{
6cb153ca 1765 int err, fput_needed;
1da177e4
LT
1766 struct socket *sock;
1767
89bddce5
SH
1768 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1769 if (sock != NULL) {
6cb153ca
BL
1770 err = security_socket_getsockopt(sock, level, optname);
1771 if (err)
1772 goto out_put;
1da177e4
LT
1773
1774 if (level == SOL_SOCKET)
89bddce5
SH
1775 err =
1776 sock_getsockopt(sock, level, optname, optval,
1777 optlen);
1da177e4 1778 else
89bddce5
SH
1779 err =
1780 sock->ops->getsockopt(sock, level, optname, optval,
1781 optlen);
6cb153ca
BL
1782out_put:
1783 fput_light(sock->file, fput_needed);
1da177e4
LT
1784 }
1785 return err;
1786}
1787
1da177e4
LT
1788/*
1789 * Shutdown a socket.
1790 */
1791
754fe8d2 1792SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1793{
6cb153ca 1794 int err, fput_needed;
1da177e4
LT
1795 struct socket *sock;
1796
89bddce5
SH
1797 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1798 if (sock != NULL) {
1da177e4 1799 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1800 if (!err)
1801 err = sock->ops->shutdown(sock, how);
1802 fput_light(sock->file, fput_needed);
1da177e4
LT
1803 }
1804 return err;
1805}
1806
89bddce5 1807/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1808 * fields which are the same type (int / unsigned) on our platforms.
1809 */
1810#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1811#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1812#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1813
c71d8ebe
TH
1814struct used_address {
1815 struct sockaddr_storage name;
1816 unsigned int name_len;
1817};
1818
da184284
AV
1819static int copy_msghdr_from_user(struct msghdr *kmsg,
1820 struct user_msghdr __user *umsg,
1821 struct sockaddr __user **save_addr,
1822 struct iovec **iov)
1661bf36 1823{
08adb7da
AV
1824 struct sockaddr __user *uaddr;
1825 struct iovec __user *uiov;
c0371da6 1826 size_t nr_segs;
08adb7da
AV
1827 ssize_t err;
1828
1829 if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) ||
1830 __get_user(uaddr, &umsg->msg_name) ||
1831 __get_user(kmsg->msg_namelen, &umsg->msg_namelen) ||
1832 __get_user(uiov, &umsg->msg_iov) ||
c0371da6 1833 __get_user(nr_segs, &umsg->msg_iovlen) ||
08adb7da
AV
1834 __get_user(kmsg->msg_control, &umsg->msg_control) ||
1835 __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
1836 __get_user(kmsg->msg_flags, &umsg->msg_flags))
1661bf36 1837 return -EFAULT;
dbb490b9 1838
08adb7da 1839 if (!uaddr)
6a2a2b3a
AS
1840 kmsg->msg_namelen = 0;
1841
dbb490b9
ML
1842 if (kmsg->msg_namelen < 0)
1843 return -EINVAL;
1844
1661bf36 1845 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 1846 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
1847
1848 if (save_addr)
1849 *save_addr = uaddr;
1850
1851 if (uaddr && kmsg->msg_namelen) {
1852 if (!save_addr) {
1853 err = move_addr_to_kernel(uaddr, kmsg->msg_namelen,
1854 kmsg->msg_name);
1855 if (err < 0)
1856 return err;
1857 }
1858 } else {
1859 kmsg->msg_name = NULL;
1860 kmsg->msg_namelen = 0;
1861 }
1862
c0371da6 1863 if (nr_segs > UIO_MAXIOV)
08adb7da
AV
1864 return -EMSGSIZE;
1865
0345f931 1866 kmsg->msg_iocb = NULL;
1867
da184284
AV
1868 return import_iovec(save_addr ? READ : WRITE, uiov, nr_segs,
1869 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
1870}
1871
666547ff 1872static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 1873 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
1874 struct used_address *used_address,
1875 unsigned int allowed_msghdr_flags)
1da177e4 1876{
89bddce5
SH
1877 struct compat_msghdr __user *msg_compat =
1878 (struct compat_msghdr __user *)msg;
230b1839 1879 struct sockaddr_storage address;
1da177e4 1880 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1881 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1882 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1883 /* 20 is size of ipv6_pktinfo */
1da177e4 1884 unsigned char *ctl_buf = ctl;
d8725c86 1885 int ctl_len;
08adb7da 1886 ssize_t err;
89bddce5 1887
08adb7da 1888 msg_sys->msg_name = &address;
1da177e4 1889
08449320 1890 if (MSG_CMSG_COMPAT & flags)
08adb7da 1891 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 1892 else
08adb7da 1893 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 1894 if (err < 0)
da184284 1895 return err;
1da177e4
LT
1896
1897 err = -ENOBUFS;
1898
228e548e 1899 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 1900 goto out_freeiov;
28a94d8f 1901 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 1902 ctl_len = msg_sys->msg_controllen;
1da177e4 1903 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 1904 err =
228e548e 1905 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 1906 sizeof(ctl));
1da177e4
LT
1907 if (err)
1908 goto out_freeiov;
228e548e
AB
1909 ctl_buf = msg_sys->msg_control;
1910 ctl_len = msg_sys->msg_controllen;
1da177e4 1911 } else if (ctl_len) {
89bddce5 1912 if (ctl_len > sizeof(ctl)) {
1da177e4 1913 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1914 if (ctl_buf == NULL)
1da177e4
LT
1915 goto out_freeiov;
1916 }
1917 err = -EFAULT;
1918 /*
228e548e 1919 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
1920 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1921 * checking falls down on this.
1922 */
fb8621bb 1923 if (copy_from_user(ctl_buf,
228e548e 1924 (void __user __force *)msg_sys->msg_control,
89bddce5 1925 ctl_len))
1da177e4 1926 goto out_freectl;
228e548e 1927 msg_sys->msg_control = ctl_buf;
1da177e4 1928 }
228e548e 1929 msg_sys->msg_flags = flags;
1da177e4
LT
1930
1931 if (sock->file->f_flags & O_NONBLOCK)
228e548e 1932 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
1933 /*
1934 * If this is sendmmsg() and current destination address is same as
1935 * previously succeeded address, omit asking LSM's decision.
1936 * used_address->name_len is initialized to UINT_MAX so that the first
1937 * destination address never matches.
1938 */
bc909d9d
MD
1939 if (used_address && msg_sys->msg_name &&
1940 used_address->name_len == msg_sys->msg_namelen &&
1941 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 1942 used_address->name_len)) {
d8725c86 1943 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
1944 goto out_freectl;
1945 }
d8725c86 1946 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
1947 /*
1948 * If this is sendmmsg() and sending to current destination address was
1949 * successful, remember it.
1950 */
1951 if (used_address && err >= 0) {
1952 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
1953 if (msg_sys->msg_name)
1954 memcpy(&used_address->name, msg_sys->msg_name,
1955 used_address->name_len);
c71d8ebe 1956 }
1da177e4
LT
1957
1958out_freectl:
89bddce5 1959 if (ctl_buf != ctl)
1da177e4
LT
1960 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1961out_freeiov:
da184284 1962 kfree(iov);
228e548e
AB
1963 return err;
1964}
1965
1966/*
1967 * BSD sendmsg interface
1968 */
1969
666547ff 1970long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
228e548e
AB
1971{
1972 int fput_needed, err;
1973 struct msghdr msg_sys;
1be374a0
AL
1974 struct socket *sock;
1975
1be374a0 1976 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
1977 if (!sock)
1978 goto out;
1979
28a94d8f 1980 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 1981
6cb153ca 1982 fput_light(sock->file, fput_needed);
89bddce5 1983out:
1da177e4
LT
1984 return err;
1985}
1986
666547ff 1987SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5
AL
1988{
1989 if (flags & MSG_CMSG_COMPAT)
1990 return -EINVAL;
1991 return __sys_sendmsg(fd, msg, flags);
1992}
1993
228e548e
AB
1994/*
1995 * Linux sendmmsg interface
1996 */
1997
1998int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
1999 unsigned int flags)
2000{
2001 int fput_needed, err, datagrams;
2002 struct socket *sock;
2003 struct mmsghdr __user *entry;
2004 struct compat_mmsghdr __user *compat_entry;
2005 struct msghdr msg_sys;
c71d8ebe 2006 struct used_address used_address;
f092276d 2007 unsigned int oflags = flags;
228e548e 2008
98382f41
AB
2009 if (vlen > UIO_MAXIOV)
2010 vlen = UIO_MAXIOV;
228e548e
AB
2011
2012 datagrams = 0;
2013
2014 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2015 if (!sock)
2016 return err;
2017
c71d8ebe 2018 used_address.name_len = UINT_MAX;
228e548e
AB
2019 entry = mmsg;
2020 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2021 err = 0;
f092276d 2022 flags |= MSG_BATCH;
228e548e
AB
2023
2024 while (datagrams < vlen) {
f092276d
TH
2025 if (datagrams == vlen - 1)
2026 flags = oflags;
2027
228e548e 2028 if (MSG_CMSG_COMPAT & flags) {
666547ff 2029 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2030 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2031 if (err < 0)
2032 break;
2033 err = __put_user(err, &compat_entry->msg_len);
2034 ++compat_entry;
2035 } else {
a7526eb5 2036 err = ___sys_sendmsg(sock,
666547ff 2037 (struct user_msghdr __user *)entry,
28a94d8f 2038 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2039 if (err < 0)
2040 break;
2041 err = put_user(err, &entry->msg_len);
2042 ++entry;
2043 }
2044
2045 if (err)
2046 break;
2047 ++datagrams;
a78cb84c 2048 cond_resched();
228e548e
AB
2049 }
2050
228e548e
AB
2051 fput_light(sock->file, fput_needed);
2052
728ffb86
AB
2053 /* We only return an error if no datagrams were able to be sent */
2054 if (datagrams != 0)
228e548e
AB
2055 return datagrams;
2056
228e548e
AB
2057 return err;
2058}
2059
2060SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2061 unsigned int, vlen, unsigned int, flags)
2062{
1be374a0
AL
2063 if (flags & MSG_CMSG_COMPAT)
2064 return -EINVAL;
228e548e
AB
2065 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2066}
2067
666547ff 2068static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2069 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2070{
89bddce5
SH
2071 struct compat_msghdr __user *msg_compat =
2072 (struct compat_msghdr __user *)msg;
1da177e4 2073 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2074 struct iovec *iov = iovstack;
1da177e4 2075 unsigned long cmsg_ptr;
08adb7da
AV
2076 int total_len, len;
2077 ssize_t err;
1da177e4
LT
2078
2079 /* kernel mode address */
230b1839 2080 struct sockaddr_storage addr;
1da177e4
LT
2081
2082 /* user mode address pointers */
2083 struct sockaddr __user *uaddr;
08adb7da 2084 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2085
08adb7da 2086 msg_sys->msg_name = &addr;
1da177e4 2087
f3d33426 2088 if (MSG_CMSG_COMPAT & flags)
08adb7da 2089 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2090 else
08adb7da 2091 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2092 if (err < 0)
da184284
AV
2093 return err;
2094 total_len = iov_iter_count(&msg_sys->msg_iter);
1da177e4 2095
a2e27255
ACM
2096 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2097 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2098
f3d33426
HFS
2099 /* We assume all kernel code knows the size of sockaddr_storage */
2100 msg_sys->msg_namelen = 0;
2101
1da177e4
LT
2102 if (sock->file->f_flags & O_NONBLOCK)
2103 flags |= MSG_DONTWAIT;
a2e27255
ACM
2104 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2105 total_len, flags);
1da177e4
LT
2106 if (err < 0)
2107 goto out_freeiov;
2108 len = err;
2109
2110 if (uaddr != NULL) {
43db362d 2111 err = move_addr_to_user(&addr,
a2e27255 2112 msg_sys->msg_namelen, uaddr,
89bddce5 2113 uaddr_len);
1da177e4
LT
2114 if (err < 0)
2115 goto out_freeiov;
2116 }
a2e27255 2117 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2118 COMPAT_FLAGS(msg));
1da177e4
LT
2119 if (err)
2120 goto out_freeiov;
2121 if (MSG_CMSG_COMPAT & flags)
a2e27255 2122 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2123 &msg_compat->msg_controllen);
2124 else
a2e27255 2125 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2126 &msg->msg_controllen);
2127 if (err)
2128 goto out_freeiov;
2129 err = len;
2130
2131out_freeiov:
da184284 2132 kfree(iov);
a2e27255
ACM
2133 return err;
2134}
2135
2136/*
2137 * BSD recvmsg interface
2138 */
2139
666547ff 2140long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
a2e27255
ACM
2141{
2142 int fput_needed, err;
2143 struct msghdr msg_sys;
1be374a0
AL
2144 struct socket *sock;
2145
1be374a0 2146 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2147 if (!sock)
2148 goto out;
2149
a7526eb5 2150 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2151
6cb153ca 2152 fput_light(sock->file, fput_needed);
1da177e4
LT
2153out:
2154 return err;
2155}
2156
666547ff 2157SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2158 unsigned int, flags)
2159{
2160 if (flags & MSG_CMSG_COMPAT)
2161 return -EINVAL;
2162 return __sys_recvmsg(fd, msg, flags);
2163}
2164
a2e27255
ACM
2165/*
2166 * Linux recvmmsg interface
2167 */
2168
2169int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2170 unsigned int flags, struct timespec *timeout)
2171{
2172 int fput_needed, err, datagrams;
2173 struct socket *sock;
2174 struct mmsghdr __user *entry;
d7256d0e 2175 struct compat_mmsghdr __user *compat_entry;
a2e27255
ACM
2176 struct msghdr msg_sys;
2177 struct timespec end_time;
2178
2179 if (timeout &&
2180 poll_select_set_timeout(&end_time, timeout->tv_sec,
2181 timeout->tv_nsec))
2182 return -EINVAL;
2183
2184 datagrams = 0;
2185
2186 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2187 if (!sock)
2188 return err;
2189
2190 err = sock_error(sock->sk);
2191 if (err)
2192 goto out_put;
2193
2194 entry = mmsg;
d7256d0e 2195 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2196
2197 while (datagrams < vlen) {
2198 /*
2199 * No need to ask LSM for more than the first datagram.
2200 */
d7256d0e 2201 if (MSG_CMSG_COMPAT & flags) {
666547ff 2202 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2203 &msg_sys, flags & ~MSG_WAITFORONE,
2204 datagrams);
d7256d0e
JMG
2205 if (err < 0)
2206 break;
2207 err = __put_user(err, &compat_entry->msg_len);
2208 ++compat_entry;
2209 } else {
a7526eb5 2210 err = ___sys_recvmsg(sock,
666547ff 2211 (struct user_msghdr __user *)entry,
a7526eb5
AL
2212 &msg_sys, flags & ~MSG_WAITFORONE,
2213 datagrams);
d7256d0e
JMG
2214 if (err < 0)
2215 break;
2216 err = put_user(err, &entry->msg_len);
2217 ++entry;
2218 }
2219
a2e27255
ACM
2220 if (err)
2221 break;
a2e27255
ACM
2222 ++datagrams;
2223
71c5c159
BB
2224 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2225 if (flags & MSG_WAITFORONE)
2226 flags |= MSG_DONTWAIT;
2227
a2e27255
ACM
2228 if (timeout) {
2229 ktime_get_ts(timeout);
2230 *timeout = timespec_sub(end_time, *timeout);
2231 if (timeout->tv_sec < 0) {
2232 timeout->tv_sec = timeout->tv_nsec = 0;
2233 break;
2234 }
2235
2236 /* Timeout, return less than vlen datagrams */
2237 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2238 break;
2239 }
2240
2241 /* Out of band data, return right away */
2242 if (msg_sys.msg_flags & MSG_OOB)
2243 break;
a78cb84c 2244 cond_resched();
a2e27255
ACM
2245 }
2246
2247out_put:
2248 fput_light(sock->file, fput_needed);
1da177e4 2249
a2e27255
ACM
2250 if (err == 0)
2251 return datagrams;
2252
2253 if (datagrams != 0) {
2254 /*
2255 * We may return less entries than requested (vlen) if the
2256 * sock is non block and there aren't enough datagrams...
2257 */
2258 if (err != -EAGAIN) {
2259 /*
2260 * ... or if recvmsg returns an error after we
2261 * received some datagrams, where we record the
2262 * error to return on the next call or if the
2263 * app asks about it using getsockopt(SO_ERROR).
2264 */
2265 sock->sk->sk_err = -err;
2266 }
2267
2268 return datagrams;
2269 }
2270
2271 return err;
2272}
2273
2274SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2275 unsigned int, vlen, unsigned int, flags,
2276 struct timespec __user *, timeout)
2277{
2278 int datagrams;
2279 struct timespec timeout_sys;
2280
1be374a0
AL
2281 if (flags & MSG_CMSG_COMPAT)
2282 return -EINVAL;
2283
a2e27255
ACM
2284 if (!timeout)
2285 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2286
2287 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2288 return -EFAULT;
2289
2290 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2291
2292 if (datagrams > 0 &&
2293 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2294 datagrams = -EFAULT;
2295
2296 return datagrams;
2297}
2298
2299#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2300/* Argument list sizes for sys_socketcall */
2301#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2302static const unsigned char nargs[21] = {
c6d409cf
ED
2303 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2304 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2305 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2306 AL(4), AL(5), AL(4)
89bddce5
SH
2307};
2308
1da177e4
LT
2309#undef AL
2310
2311/*
89bddce5 2312 * System call vectors.
1da177e4
LT
2313 *
2314 * Argument checking cleaned up. Saved 20% in size.
2315 * This function doesn't need to set the kernel lock because
89bddce5 2316 * it is set by the callees.
1da177e4
LT
2317 */
2318
3e0fa65f 2319SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2320{
2950fa9d 2321 unsigned long a[AUDITSC_ARGS];
89bddce5 2322 unsigned long a0, a1;
1da177e4 2323 int err;
47379052 2324 unsigned int len;
1da177e4 2325
228e548e 2326 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2327 return -EINVAL;
2328
47379052
AV
2329 len = nargs[call];
2330 if (len > sizeof(a))
2331 return -EINVAL;
2332
1da177e4 2333 /* copy_from_user should be SMP safe. */
47379052 2334 if (copy_from_user(a, args, len))
1da177e4 2335 return -EFAULT;
3ec3b2fb 2336
2950fa9d
CG
2337 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2338 if (err)
2339 return err;
3ec3b2fb 2340
89bddce5
SH
2341 a0 = a[0];
2342 a1 = a[1];
2343
2344 switch (call) {
2345 case SYS_SOCKET:
2346 err = sys_socket(a0, a1, a[2]);
2347 break;
2348 case SYS_BIND:
2349 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2350 break;
2351 case SYS_CONNECT:
2352 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2353 break;
2354 case SYS_LISTEN:
2355 err = sys_listen(a0, a1);
2356 break;
2357 case SYS_ACCEPT:
de11defe
UD
2358 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2359 (int __user *)a[2], 0);
89bddce5
SH
2360 break;
2361 case SYS_GETSOCKNAME:
2362 err =
2363 sys_getsockname(a0, (struct sockaddr __user *)a1,
2364 (int __user *)a[2]);
2365 break;
2366 case SYS_GETPEERNAME:
2367 err =
2368 sys_getpeername(a0, (struct sockaddr __user *)a1,
2369 (int __user *)a[2]);
2370 break;
2371 case SYS_SOCKETPAIR:
2372 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2373 break;
2374 case SYS_SEND:
2375 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2376 break;
2377 case SYS_SENDTO:
2378 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2379 (struct sockaddr __user *)a[4], a[5]);
2380 break;
2381 case SYS_RECV:
2382 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2383 break;
2384 case SYS_RECVFROM:
2385 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2386 (struct sockaddr __user *)a[4],
2387 (int __user *)a[5]);
2388 break;
2389 case SYS_SHUTDOWN:
2390 err = sys_shutdown(a0, a1);
2391 break;
2392 case SYS_SETSOCKOPT:
2393 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2394 break;
2395 case SYS_GETSOCKOPT:
2396 err =
2397 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2398 (int __user *)a[4]);
2399 break;
2400 case SYS_SENDMSG:
666547ff 2401 err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2402 break;
228e548e
AB
2403 case SYS_SENDMMSG:
2404 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2405 break;
89bddce5 2406 case SYS_RECVMSG:
666547ff 2407 err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2408 break;
a2e27255
ACM
2409 case SYS_RECVMMSG:
2410 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2411 (struct timespec __user *)a[4]);
2412 break;
de11defe
UD
2413 case SYS_ACCEPT4:
2414 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2415 (int __user *)a[2], a[3]);
aaca0bdc 2416 break;
89bddce5
SH
2417 default:
2418 err = -EINVAL;
2419 break;
1da177e4
LT
2420 }
2421 return err;
2422}
2423
89bddce5 2424#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2425
55737fda
SH
2426/**
2427 * sock_register - add a socket protocol handler
2428 * @ops: description of protocol
2429 *
1da177e4
LT
2430 * This function is called by a protocol handler that wants to
2431 * advertise its address family, and have it linked into the
e793c0f7 2432 * socket interface. The value ops->family corresponds to the
55737fda 2433 * socket system call protocol family.
1da177e4 2434 */
f0fd27d4 2435int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2436{
2437 int err;
2438
2439 if (ops->family >= NPROTO) {
3410f22e 2440 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2441 return -ENOBUFS;
2442 }
55737fda
SH
2443
2444 spin_lock(&net_family_lock);
190683a9
ED
2445 if (rcu_dereference_protected(net_families[ops->family],
2446 lockdep_is_held(&net_family_lock)))
55737fda
SH
2447 err = -EEXIST;
2448 else {
cf778b00 2449 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2450 err = 0;
2451 }
55737fda
SH
2452 spin_unlock(&net_family_lock);
2453
3410f22e 2454 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2455 return err;
2456}
c6d409cf 2457EXPORT_SYMBOL(sock_register);
1da177e4 2458
55737fda
SH
2459/**
2460 * sock_unregister - remove a protocol handler
2461 * @family: protocol family to remove
2462 *
1da177e4
LT
2463 * This function is called by a protocol handler that wants to
2464 * remove its address family, and have it unlinked from the
55737fda
SH
2465 * new socket creation.
2466 *
2467 * If protocol handler is a module, then it can use module reference
2468 * counts to protect against new references. If protocol handler is not
2469 * a module then it needs to provide its own protection in
2470 * the ops->create routine.
1da177e4 2471 */
f0fd27d4 2472void sock_unregister(int family)
1da177e4 2473{
f0fd27d4 2474 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2475
55737fda 2476 spin_lock(&net_family_lock);
a9b3cd7f 2477 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2478 spin_unlock(&net_family_lock);
2479
2480 synchronize_rcu();
2481
3410f22e 2482 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2483}
c6d409cf 2484EXPORT_SYMBOL(sock_unregister);
1da177e4 2485
77d76ea3 2486static int __init sock_init(void)
1da177e4 2487{
b3e19d92 2488 int err;
2ca794e5
EB
2489 /*
2490 * Initialize the network sysctl infrastructure.
2491 */
2492 err = net_sysctl_init();
2493 if (err)
2494 goto out;
b3e19d92 2495
1da177e4 2496 /*
89bddce5 2497 * Initialize skbuff SLAB cache
1da177e4
LT
2498 */
2499 skb_init();
1da177e4
LT
2500
2501 /*
89bddce5 2502 * Initialize the protocols module.
1da177e4
LT
2503 */
2504
2505 init_inodecache();
b3e19d92
NP
2506
2507 err = register_filesystem(&sock_fs_type);
2508 if (err)
2509 goto out_fs;
1da177e4 2510 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2511 if (IS_ERR(sock_mnt)) {
2512 err = PTR_ERR(sock_mnt);
2513 goto out_mount;
2514 }
77d76ea3
AK
2515
2516 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2517 */
2518
2519#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2520 err = netfilter_init();
2521 if (err)
2522 goto out;
1da177e4 2523#endif
cbeb321a 2524
408eccce 2525 ptp_classifier_init();
c1f19b51 2526
b3e19d92
NP
2527out:
2528 return err;
2529
2530out_mount:
2531 unregister_filesystem(&sock_fs_type);
2532out_fs:
2533 goto out;
1da177e4
LT
2534}
2535
77d76ea3
AK
2536core_initcall(sock_init); /* early initcall */
2537
1da177e4
LT
2538#ifdef CONFIG_PROC_FS
2539void socket_seq_show(struct seq_file *seq)
2540{
2541 int cpu;
2542 int counter = 0;
2543
6f912042 2544 for_each_possible_cpu(cpu)
89bddce5 2545 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2546
2547 /* It can be negative, by the way. 8) */
2548 if (counter < 0)
2549 counter = 0;
2550
2551 seq_printf(seq, "sockets: used %d\n", counter);
2552}
89bddce5 2553#endif /* CONFIG_PROC_FS */
1da177e4 2554
89bbfc95 2555#ifdef CONFIG_COMPAT
6b96018b 2556static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2557 unsigned int cmd, void __user *up)
7a229387 2558{
7a229387
AB
2559 mm_segment_t old_fs = get_fs();
2560 struct timeval ktv;
2561 int err;
2562
2563 set_fs(KERNEL_DS);
6b96018b 2564 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2565 set_fs(old_fs);
644595f8 2566 if (!err)
ed6fe9d6 2567 err = compat_put_timeval(&ktv, up);
644595f8 2568
7a229387
AB
2569 return err;
2570}
2571
6b96018b 2572static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2573 unsigned int cmd, void __user *up)
7a229387 2574{
7a229387
AB
2575 mm_segment_t old_fs = get_fs();
2576 struct timespec kts;
2577 int err;
2578
2579 set_fs(KERNEL_DS);
6b96018b 2580 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2581 set_fs(old_fs);
644595f8 2582 if (!err)
ed6fe9d6 2583 err = compat_put_timespec(&kts, up);
644595f8 2584
7a229387
AB
2585 return err;
2586}
2587
6b96018b 2588static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2589{
2590 struct ifreq __user *uifr;
2591 int err;
2592
2593 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2594 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2595 return -EFAULT;
2596
6b96018b 2597 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2598 if (err)
2599 return err;
2600
6b96018b 2601 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2602 return -EFAULT;
2603
2604 return 0;
2605}
2606
6b96018b 2607static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2608{
6b96018b 2609 struct compat_ifconf ifc32;
7a229387
AB
2610 struct ifconf ifc;
2611 struct ifconf __user *uifc;
6b96018b 2612 struct compat_ifreq __user *ifr32;
7a229387
AB
2613 struct ifreq __user *ifr;
2614 unsigned int i, j;
2615 int err;
2616
6b96018b 2617 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2618 return -EFAULT;
2619
43da5f2e 2620 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2621 if (ifc32.ifcbuf == 0) {
2622 ifc32.ifc_len = 0;
2623 ifc.ifc_len = 0;
2624 ifc.ifc_req = NULL;
2625 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2626 } else {
c6d409cf
ED
2627 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2628 sizeof(struct ifreq);
7a229387
AB
2629 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2630 ifc.ifc_len = len;
2631 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2632 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2633 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2634 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2635 return -EFAULT;
2636 ifr++;
2637 ifr32++;
2638 }
2639 }
2640 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2641 return -EFAULT;
2642
6b96018b 2643 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2644 if (err)
2645 return err;
2646
2647 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2648 return -EFAULT;
2649
2650 ifr = ifc.ifc_req;
2651 ifr32 = compat_ptr(ifc32.ifcbuf);
2652 for (i = 0, j = 0;
c6d409cf
ED
2653 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2654 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2655 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2656 return -EFAULT;
2657 ifr32++;
2658 ifr++;
2659 }
2660
2661 if (ifc32.ifcbuf == 0) {
2662 /* Translate from 64-bit structure multiple to
2663 * a 32-bit one.
2664 */
2665 i = ifc.ifc_len;
6b96018b 2666 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2667 ifc32.ifc_len = i;
2668 } else {
2669 ifc32.ifc_len = i;
2670 }
6b96018b 2671 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2672 return -EFAULT;
2673
2674 return 0;
2675}
2676
6b96018b 2677static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2678{
3a7da39d
BH
2679 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2680 bool convert_in = false, convert_out = false;
2681 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2682 struct ethtool_rxnfc __user *rxnfc;
7a229387 2683 struct ifreq __user *ifr;
3a7da39d
BH
2684 u32 rule_cnt = 0, actual_rule_cnt;
2685 u32 ethcmd;
7a229387 2686 u32 data;
3a7da39d 2687 int ret;
7a229387 2688
3a7da39d
BH
2689 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2690 return -EFAULT;
7a229387 2691
3a7da39d
BH
2692 compat_rxnfc = compat_ptr(data);
2693
2694 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2695 return -EFAULT;
2696
3a7da39d
BH
2697 /* Most ethtool structures are defined without padding.
2698 * Unfortunately struct ethtool_rxnfc is an exception.
2699 */
2700 switch (ethcmd) {
2701 default:
2702 break;
2703 case ETHTOOL_GRXCLSRLALL:
2704 /* Buffer size is variable */
2705 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2706 return -EFAULT;
2707 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2708 return -ENOMEM;
2709 buf_size += rule_cnt * sizeof(u32);
2710 /* fall through */
2711 case ETHTOOL_GRXRINGS:
2712 case ETHTOOL_GRXCLSRLCNT:
2713 case ETHTOOL_GRXCLSRULE:
55664f32 2714 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2715 convert_out = true;
2716 /* fall through */
2717 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2718 buf_size += sizeof(struct ethtool_rxnfc);
2719 convert_in = true;
2720 break;
2721 }
2722
2723 ifr = compat_alloc_user_space(buf_size);
954b1244 2724 rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
3a7da39d
BH
2725
2726 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2727 return -EFAULT;
2728
3a7da39d
BH
2729 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2730 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2731 return -EFAULT;
2732
3a7da39d 2733 if (convert_in) {
127fe533 2734 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2735 * fs.ring_cookie and at the end of fs, but nowhere else.
2736 */
127fe533
AD
2737 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2738 sizeof(compat_rxnfc->fs.m_ext) !=
2739 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2740 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2741 BUILD_BUG_ON(
2742 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2743 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2744 offsetof(struct ethtool_rxnfc, fs.location) -
2745 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2746
2747 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2748 (void __user *)(&rxnfc->fs.m_ext + 1) -
2749 (void __user *)rxnfc) ||
3a7da39d
BH
2750 copy_in_user(&rxnfc->fs.ring_cookie,
2751 &compat_rxnfc->fs.ring_cookie,
954b1244
SH
2752 (void __user *)(&rxnfc->fs.location + 1) -
2753 (void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2754 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2755 sizeof(rxnfc->rule_cnt)))
2756 return -EFAULT;
2757 }
2758
2759 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2760 if (ret)
2761 return ret;
2762
2763 if (convert_out) {
2764 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2765 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2766 (const void __user *)rxnfc) ||
3a7da39d
BH
2767 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2768 &rxnfc->fs.ring_cookie,
954b1244
SH
2769 (const void __user *)(&rxnfc->fs.location + 1) -
2770 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2771 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2772 sizeof(rxnfc->rule_cnt)))
2773 return -EFAULT;
2774
2775 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2776 /* As an optimisation, we only copy the actual
2777 * number of rules that the underlying
2778 * function returned. Since Mallory might
2779 * change the rule count in user memory, we
2780 * check that it is less than the rule count
2781 * originally given (as the user buffer size),
2782 * which has been range-checked.
2783 */
2784 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2785 return -EFAULT;
2786 if (actual_rule_cnt < rule_cnt)
2787 rule_cnt = actual_rule_cnt;
2788 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2789 &rxnfc->rule_locs[0],
2790 rule_cnt * sizeof(u32)))
2791 return -EFAULT;
2792 }
2793 }
2794
2795 return 0;
7a229387
AB
2796}
2797
7a50a240
AB
2798static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2799{
2800 void __user *uptr;
2801 compat_uptr_t uptr32;
2802 struct ifreq __user *uifr;
2803
c6d409cf 2804 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2805 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2806 return -EFAULT;
2807
2808 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2809 return -EFAULT;
2810
2811 uptr = compat_ptr(uptr32);
2812
2813 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2814 return -EFAULT;
2815
2816 return dev_ioctl(net, SIOCWANDEV, uifr);
2817}
2818
6b96018b
AB
2819static int bond_ioctl(struct net *net, unsigned int cmd,
2820 struct compat_ifreq __user *ifr32)
7a229387
AB
2821{
2822 struct ifreq kifr;
7a229387
AB
2823 mm_segment_t old_fs;
2824 int err;
7a229387
AB
2825
2826 switch (cmd) {
2827 case SIOCBONDENSLAVE:
2828 case SIOCBONDRELEASE:
2829 case SIOCBONDSETHWADDR:
2830 case SIOCBONDCHANGEACTIVE:
6b96018b 2831 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2832 return -EFAULT;
2833
2834 old_fs = get_fs();
c6d409cf 2835 set_fs(KERNEL_DS);
c3f52ae6 2836 err = dev_ioctl(net, cmd,
2837 (struct ifreq __user __force *) &kifr);
c6d409cf 2838 set_fs(old_fs);
7a229387
AB
2839
2840 return err;
7a229387 2841 default:
07d106d0 2842 return -ENOIOCTLCMD;
ccbd6a5a 2843 }
7a229387
AB
2844}
2845
590d4693
BH
2846/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
2847static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 2848 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2849{
2850 struct ifreq __user *u_ifreq64;
7a229387
AB
2851 char tmp_buf[IFNAMSIZ];
2852 void __user *data64;
2853 u32 data32;
2854
2855 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2856 IFNAMSIZ))
2857 return -EFAULT;
417c3522 2858 if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
7a229387
AB
2859 return -EFAULT;
2860 data64 = compat_ptr(data32);
2861
2862 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2863
7a229387
AB
2864 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2865 IFNAMSIZ))
2866 return -EFAULT;
417c3522 2867 if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
7a229387
AB
2868 return -EFAULT;
2869
6b96018b 2870 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2871}
2872
6b96018b
AB
2873static int dev_ifsioc(struct net *net, struct socket *sock,
2874 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 2875{
a2116ed2 2876 struct ifreq __user *uifr;
7a229387
AB
2877 int err;
2878
a2116ed2
AB
2879 uifr = compat_alloc_user_space(sizeof(*uifr));
2880 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2881 return -EFAULT;
2882
2883 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2884
7a229387
AB
2885 if (!err) {
2886 switch (cmd) {
2887 case SIOCGIFFLAGS:
2888 case SIOCGIFMETRIC:
2889 case SIOCGIFMTU:
2890 case SIOCGIFMEM:
2891 case SIOCGIFHWADDR:
2892 case SIOCGIFINDEX:
2893 case SIOCGIFADDR:
2894 case SIOCGIFBRDADDR:
2895 case SIOCGIFDSTADDR:
2896 case SIOCGIFNETMASK:
fab2532b 2897 case SIOCGIFPFLAGS:
7a229387 2898 case SIOCGIFTXQLEN:
fab2532b
AB
2899 case SIOCGMIIPHY:
2900 case SIOCGMIIREG:
a2116ed2 2901 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
2902 err = -EFAULT;
2903 break;
2904 }
2905 }
2906 return err;
2907}
2908
a2116ed2
AB
2909static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2910 struct compat_ifreq __user *uifr32)
2911{
2912 struct ifreq ifr;
2913 struct compat_ifmap __user *uifmap32;
2914 mm_segment_t old_fs;
2915 int err;
2916
2917 uifmap32 = &uifr32->ifr_ifru.ifru_map;
2918 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
2919 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2920 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2921 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2922 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
2923 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
2924 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2925 if (err)
2926 return -EFAULT;
2927
2928 old_fs = get_fs();
c6d409cf 2929 set_fs(KERNEL_DS);
c3f52ae6 2930 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 2931 set_fs(old_fs);
a2116ed2
AB
2932
2933 if (cmd == SIOCGIFMAP && !err) {
2934 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
2935 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2936 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2937 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2938 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
2939 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
2940 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2941 if (err)
2942 err = -EFAULT;
2943 }
2944 return err;
2945}
2946
7a229387 2947struct rtentry32 {
c6d409cf 2948 u32 rt_pad1;
7a229387
AB
2949 struct sockaddr rt_dst; /* target address */
2950 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
2951 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
2952 unsigned short rt_flags;
2953 short rt_pad2;
2954 u32 rt_pad3;
2955 unsigned char rt_tos;
2956 unsigned char rt_class;
2957 short rt_pad4;
2958 short rt_metric; /* +1 for binary compatibility! */
7a229387 2959 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
2960 u32 rt_mtu; /* per route MTU/Window */
2961 u32 rt_window; /* Window clamping */
7a229387
AB
2962 unsigned short rt_irtt; /* Initial RTT */
2963};
2964
2965struct in6_rtmsg32 {
2966 struct in6_addr rtmsg_dst;
2967 struct in6_addr rtmsg_src;
2968 struct in6_addr rtmsg_gateway;
2969 u32 rtmsg_type;
2970 u16 rtmsg_dst_len;
2971 u16 rtmsg_src_len;
2972 u32 rtmsg_metric;
2973 u32 rtmsg_info;
2974 u32 rtmsg_flags;
2975 s32 rtmsg_ifindex;
2976};
2977
6b96018b
AB
2978static int routing_ioctl(struct net *net, struct socket *sock,
2979 unsigned int cmd, void __user *argp)
7a229387
AB
2980{
2981 int ret;
2982 void *r = NULL;
2983 struct in6_rtmsg r6;
2984 struct rtentry r4;
2985 char devname[16];
2986 u32 rtdev;
2987 mm_segment_t old_fs = get_fs();
2988
6b96018b
AB
2989 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
2990 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 2991 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 2992 3 * sizeof(struct in6_addr));
3ddc5b46
MD
2993 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
2994 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
2995 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
2996 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
2997 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
2998 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
2999 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3000
3001 r = (void *) &r6;
3002 } else { /* ipv4 */
6b96018b 3003 struct rtentry32 __user *ur4 = argp;
c6d409cf 3004 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3005 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3006 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3007 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3008 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3009 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3010 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3011 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3012 if (rtdev) {
c6d409cf 3013 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3014 r4.rt_dev = (char __user __force *)devname;
3015 devname[15] = 0;
7a229387
AB
3016 } else
3017 r4.rt_dev = NULL;
3018
3019 r = (void *) &r4;
3020 }
3021
3022 if (ret) {
3023 ret = -EFAULT;
3024 goto out;
3025 }
3026
c6d409cf 3027 set_fs(KERNEL_DS);
6b96018b 3028 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3029 set_fs(old_fs);
7a229387
AB
3030
3031out:
7a229387
AB
3032 return ret;
3033}
3034
3035/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3036 * for some operations; this forces use of the newer bridge-utils that
25985edc 3037 * use compatible ioctls
7a229387 3038 */
6b96018b 3039static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3040{
6b96018b 3041 compat_ulong_t tmp;
7a229387 3042
6b96018b 3043 if (get_user(tmp, argp))
7a229387
AB
3044 return -EFAULT;
3045 if (tmp == BRCTL_GET_VERSION)
3046 return BRCTL_VERSION + 1;
3047 return -EINVAL;
3048}
3049
6b96018b
AB
3050static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3051 unsigned int cmd, unsigned long arg)
3052{
3053 void __user *argp = compat_ptr(arg);
3054 struct sock *sk = sock->sk;
3055 struct net *net = sock_net(sk);
7a229387 3056
6b96018b 3057 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3058 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3059
3060 switch (cmd) {
3061 case SIOCSIFBR:
3062 case SIOCGIFBR:
3063 return old_bridge_ioctl(argp);
3064 case SIOCGIFNAME:
3065 return dev_ifname32(net, argp);
3066 case SIOCGIFCONF:
3067 return dev_ifconf(net, argp);
3068 case SIOCETHTOOL:
3069 return ethtool_ioctl(net, argp);
7a50a240
AB
3070 case SIOCWANDEV:
3071 return compat_siocwandev(net, argp);
a2116ed2
AB
3072 case SIOCGIFMAP:
3073 case SIOCSIFMAP:
3074 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3075 case SIOCBONDENSLAVE:
3076 case SIOCBONDRELEASE:
3077 case SIOCBONDSETHWADDR:
6b96018b
AB
3078 case SIOCBONDCHANGEACTIVE:
3079 return bond_ioctl(net, cmd, argp);
3080 case SIOCADDRT:
3081 case SIOCDELRT:
3082 return routing_ioctl(net, sock, cmd, argp);
3083 case SIOCGSTAMP:
3084 return do_siocgstamp(net, sock, cmd, argp);
3085 case SIOCGSTAMPNS:
3086 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3087 case SIOCBONDSLAVEINFOQUERY:
3088 case SIOCBONDINFOQUERY:
a2116ed2 3089 case SIOCSHWTSTAMP:
fd468c74 3090 case SIOCGHWTSTAMP:
590d4693 3091 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3092
3093 case FIOSETOWN:
3094 case SIOCSPGRP:
3095 case FIOGETOWN:
3096 case SIOCGPGRP:
3097 case SIOCBRADDBR:
3098 case SIOCBRDELBR:
3099 case SIOCGIFVLAN:
3100 case SIOCSIFVLAN:
3101 case SIOCADDDLCI:
3102 case SIOCDELDLCI:
3103 return sock_ioctl(file, cmd, arg);
3104
3105 case SIOCGIFFLAGS:
3106 case SIOCSIFFLAGS:
3107 case SIOCGIFMETRIC:
3108 case SIOCSIFMETRIC:
3109 case SIOCGIFMTU:
3110 case SIOCSIFMTU:
3111 case SIOCGIFMEM:
3112 case SIOCSIFMEM:
3113 case SIOCGIFHWADDR:
3114 case SIOCSIFHWADDR:
3115 case SIOCADDMULTI:
3116 case SIOCDELMULTI:
3117 case SIOCGIFINDEX:
6b96018b
AB
3118 case SIOCGIFADDR:
3119 case SIOCSIFADDR:
3120 case SIOCSIFHWBROADCAST:
6b96018b 3121 case SIOCDIFADDR:
6b96018b
AB
3122 case SIOCGIFBRDADDR:
3123 case SIOCSIFBRDADDR:
3124 case SIOCGIFDSTADDR:
3125 case SIOCSIFDSTADDR:
3126 case SIOCGIFNETMASK:
3127 case SIOCSIFNETMASK:
3128 case SIOCSIFPFLAGS:
3129 case SIOCGIFPFLAGS:
3130 case SIOCGIFTXQLEN:
3131 case SIOCSIFTXQLEN:
3132 case SIOCBRADDIF:
3133 case SIOCBRDELIF:
9177efd3
AB
3134 case SIOCSIFNAME:
3135 case SIOCGMIIPHY:
3136 case SIOCGMIIREG:
3137 case SIOCSMIIREG:
6b96018b 3138 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3139
6b96018b
AB
3140 case SIOCSARP:
3141 case SIOCGARP:
3142 case SIOCDARP:
6b96018b 3143 case SIOCATMARK:
9177efd3
AB
3144 return sock_do_ioctl(net, sock, cmd, arg);
3145 }
3146
6b96018b
AB
3147 return -ENOIOCTLCMD;
3148}
7a229387 3149
95c96174 3150static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3151 unsigned long arg)
89bbfc95
SP
3152{
3153 struct socket *sock = file->private_data;
3154 int ret = -ENOIOCTLCMD;
87de87d5
DM
3155 struct sock *sk;
3156 struct net *net;
3157
3158 sk = sock->sk;
3159 net = sock_net(sk);
89bbfc95
SP
3160
3161 if (sock->ops->compat_ioctl)
3162 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3163
87de87d5
DM
3164 if (ret == -ENOIOCTLCMD &&
3165 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3166 ret = compat_wext_handle_ioctl(net, cmd, arg);
3167
6b96018b
AB
3168 if (ret == -ENOIOCTLCMD)
3169 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3170
89bbfc95
SP
3171 return ret;
3172}
3173#endif
3174
ac5a488e
SS
3175int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3176{
3177 return sock->ops->bind(sock, addr, addrlen);
3178}
c6d409cf 3179EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3180
3181int kernel_listen(struct socket *sock, int backlog)
3182{
3183 return sock->ops->listen(sock, backlog);
3184}
c6d409cf 3185EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3186
3187int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3188{
3189 struct sock *sk = sock->sk;
3190 int err;
3191
3192 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3193 newsock);
3194 if (err < 0)
3195 goto done;
3196
3197 err = sock->ops->accept(sock, *newsock, flags);
3198 if (err < 0) {
3199 sock_release(*newsock);
fa8705b0 3200 *newsock = NULL;
ac5a488e
SS
3201 goto done;
3202 }
3203
3204 (*newsock)->ops = sock->ops;
1b08534e 3205 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3206
3207done:
3208 return err;
3209}
c6d409cf 3210EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3211
3212int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3213 int flags)
ac5a488e
SS
3214{
3215 return sock->ops->connect(sock, addr, addrlen, flags);
3216}
c6d409cf 3217EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3218
3219int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3220 int *addrlen)
3221{
3222 return sock->ops->getname(sock, addr, addrlen, 0);
3223}
c6d409cf 3224EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3225
3226int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3227 int *addrlen)
3228{
3229 return sock->ops->getname(sock, addr, addrlen, 1);
3230}
c6d409cf 3231EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3232
3233int kernel_getsockopt(struct socket *sock, int level, int optname,
3234 char *optval, int *optlen)
3235{
3236 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3237 char __user *uoptval;
3238 int __user *uoptlen;
ac5a488e
SS
3239 int err;
3240
fb8621bb
NK
3241 uoptval = (char __user __force *) optval;
3242 uoptlen = (int __user __force *) optlen;
3243
ac5a488e
SS
3244 set_fs(KERNEL_DS);
3245 if (level == SOL_SOCKET)
fb8621bb 3246 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3247 else
fb8621bb
NK
3248 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3249 uoptlen);
ac5a488e
SS
3250 set_fs(oldfs);
3251 return err;
3252}
c6d409cf 3253EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3254
3255int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3256 char *optval, unsigned int optlen)
ac5a488e
SS
3257{
3258 mm_segment_t oldfs = get_fs();
fb8621bb 3259 char __user *uoptval;
ac5a488e
SS
3260 int err;
3261
fb8621bb
NK
3262 uoptval = (char __user __force *) optval;
3263
ac5a488e
SS
3264 set_fs(KERNEL_DS);
3265 if (level == SOL_SOCKET)
fb8621bb 3266 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3267 else
fb8621bb 3268 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3269 optlen);
3270 set_fs(oldfs);
3271 return err;
3272}
c6d409cf 3273EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3274
3275int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3276 size_t size, int flags)
3277{
3278 if (sock->ops->sendpage)
3279 return sock->ops->sendpage(sock, page, offset, size, flags);
3280
3281 return sock_no_sendpage(sock, page, offset, size, flags);
3282}
c6d409cf 3283EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3284
3285int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3286{
3287 mm_segment_t oldfs = get_fs();
3288 int err;
3289
3290 set_fs(KERNEL_DS);
3291 err = sock->ops->ioctl(sock, cmd, arg);
3292 set_fs(oldfs);
3293
3294 return err;
3295}
c6d409cf 3296EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3297
91cf45f0
TM
3298int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3299{
3300 return sock->ops->shutdown(sock, how);
3301}
91cf45f0 3302EXPORT_SYMBOL(kernel_sock_shutdown);