Merge branch 'for-v3.7' of git://git.linaro.org/people/mszyprowski/linux-dma-mapping
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4
LT
72#include <linux/wanrouter.h>
73#include <linux/if_bridge.h>
20380731
ACM
74#include <linux/if_frad.h>
75#include <linux/if_vlan.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
1da177e4
LT
92
93#include <asm/uaccess.h>
94#include <asm/unistd.h>
95
96#include <net/compat.h>
87de87d5 97#include <net/wext.h>
f8451725 98#include <net/cls_cgroup.h>
1da177e4
LT
99
100#include <net/sock.h>
101#include <linux/netfilter.h>
102
6b96018b
AB
103#include <linux/if_tun.h>
104#include <linux/ipv6_route.h>
105#include <linux/route.h>
6b96018b
AB
106#include <linux/sockios.h>
107#include <linux/atalk.h>
108
1da177e4 109static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
110static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
111 unsigned long nr_segs, loff_t pos);
112static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
113 unsigned long nr_segs, loff_t pos);
89bddce5 114static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
115
116static int sock_close(struct inode *inode, struct file *file);
117static unsigned int sock_poll(struct file *file,
118 struct poll_table_struct *wait);
89bddce5 119static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
120#ifdef CONFIG_COMPAT
121static long compat_sock_ioctl(struct file *file,
89bddce5 122 unsigned int cmd, unsigned long arg);
89bbfc95 123#endif
1da177e4 124static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
125static ssize_t sock_sendpage(struct file *file, struct page *page,
126 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 127static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 128 struct pipe_inode_info *pipe, size_t len,
9c55e01c 129 unsigned int flags);
1da177e4 130
1da177e4
LT
131/*
132 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
133 * in the operation structures but are done directly via the socketcall() multiplexor.
134 */
135
da7071d7 136static const struct file_operations socket_file_ops = {
1da177e4
LT
137 .owner = THIS_MODULE,
138 .llseek = no_llseek,
139 .aio_read = sock_aio_read,
140 .aio_write = sock_aio_write,
141 .poll = sock_poll,
142 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
143#ifdef CONFIG_COMPAT
144 .compat_ioctl = compat_sock_ioctl,
145#endif
1da177e4
LT
146 .mmap = sock_mmap,
147 .open = sock_no_open, /* special open code to disallow open via /proc */
148 .release = sock_close,
149 .fasync = sock_fasync,
5274f052
JA
150 .sendpage = sock_sendpage,
151 .splice_write = generic_splice_sendpage,
9c55e01c 152 .splice_read = sock_splice_read,
1da177e4
LT
153};
154
155/*
156 * The protocol list. Each protocol is registered in here.
157 */
158
1da177e4 159static DEFINE_SPINLOCK(net_family_lock);
190683a9 160static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 161
1da177e4
LT
162/*
163 * Statistics counters of the socket lists
164 */
165
c6d409cf 166static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
167
168/*
89bddce5
SH
169 * Support routines.
170 * Move socket addresses back and forth across the kernel/user
171 * divide and look after the messy bits.
1da177e4
LT
172 */
173
1da177e4
LT
174/**
175 * move_addr_to_kernel - copy a socket address into kernel space
176 * @uaddr: Address in user space
177 * @kaddr: Address in kernel space
178 * @ulen: Length in user space
179 *
180 * The address is copied into kernel space. If the provided address is
181 * too long an error code of -EINVAL is returned. If the copy gives
182 * invalid addresses -EFAULT is returned. On a success 0 is returned.
183 */
184
43db362d 185int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 186{
230b1839 187 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 188 return -EINVAL;
89bddce5 189 if (ulen == 0)
1da177e4 190 return 0;
89bddce5 191 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 192 return -EFAULT;
3ec3b2fb 193 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
194}
195
196/**
197 * move_addr_to_user - copy an address to user space
198 * @kaddr: kernel space address
199 * @klen: length of address in kernel
200 * @uaddr: user space address
201 * @ulen: pointer to user length field
202 *
203 * The value pointed to by ulen on entry is the buffer length available.
204 * This is overwritten with the buffer space used. -EINVAL is returned
205 * if an overlong buffer is specified or a negative buffer size. -EFAULT
206 * is returned if either the buffer or the length field are not
207 * accessible.
208 * After copying the data up to the limit the user specifies, the true
209 * length of the data is written over the length limit the user
210 * specified. Zero is returned for a success.
211 */
89bddce5 212
43db362d 213static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 214 void __user *uaddr, int __user *ulen)
1da177e4
LT
215{
216 int err;
217 int len;
218
89bddce5
SH
219 err = get_user(len, ulen);
220 if (err)
1da177e4 221 return err;
89bddce5
SH
222 if (len > klen)
223 len = klen;
230b1839 224 if (len < 0 || len > sizeof(struct sockaddr_storage))
1da177e4 225 return -EINVAL;
89bddce5 226 if (len) {
d6fe3945
SG
227 if (audit_sockaddr(klen, kaddr))
228 return -ENOMEM;
89bddce5 229 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
230 return -EFAULT;
231 }
232 /*
89bddce5
SH
233 * "fromlen shall refer to the value before truncation.."
234 * 1003.1g
1da177e4
LT
235 */
236 return __put_user(klen, ulen);
237}
238
e18b890b 239static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
240
241static struct inode *sock_alloc_inode(struct super_block *sb)
242{
243 struct socket_alloc *ei;
eaefd110 244 struct socket_wq *wq;
89bddce5 245
e94b1766 246 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
247 if (!ei)
248 return NULL;
eaefd110
ED
249 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
250 if (!wq) {
43815482
ED
251 kmem_cache_free(sock_inode_cachep, ei);
252 return NULL;
253 }
eaefd110
ED
254 init_waitqueue_head(&wq->wait);
255 wq->fasync_list = NULL;
256 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 257
1da177e4
LT
258 ei->socket.state = SS_UNCONNECTED;
259 ei->socket.flags = 0;
260 ei->socket.ops = NULL;
261 ei->socket.sk = NULL;
262 ei->socket.file = NULL;
1da177e4
LT
263
264 return &ei->vfs_inode;
265}
266
267static void sock_destroy_inode(struct inode *inode)
268{
43815482 269 struct socket_alloc *ei;
eaefd110 270 struct socket_wq *wq;
43815482
ED
271
272 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 273 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 274 kfree_rcu(wq, rcu);
43815482 275 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
276}
277
51cc5068 278static void init_once(void *foo)
1da177e4 279{
89bddce5 280 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 281
a35afb83 282 inode_init_once(&ei->vfs_inode);
1da177e4 283}
89bddce5 284
1da177e4
LT
285static int init_inodecache(void)
286{
287 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
288 sizeof(struct socket_alloc),
289 0,
290 (SLAB_HWCACHE_ALIGN |
291 SLAB_RECLAIM_ACCOUNT |
292 SLAB_MEM_SPREAD),
20c2df83 293 init_once);
1da177e4
LT
294 if (sock_inode_cachep == NULL)
295 return -ENOMEM;
296 return 0;
297}
298
b87221de 299static const struct super_operations sockfs_ops = {
c6d409cf
ED
300 .alloc_inode = sock_alloc_inode,
301 .destroy_inode = sock_destroy_inode,
302 .statfs = simple_statfs,
1da177e4
LT
303};
304
c23fbb6b
ED
305/*
306 * sockfs_dname() is called from d_path().
307 */
308static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
309{
310 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
311 dentry->d_inode->i_ino);
312}
313
3ba13d17 314static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 315 .d_dname = sockfs_dname,
1da177e4
LT
316};
317
c74a1cbb
AV
318static struct dentry *sockfs_mount(struct file_system_type *fs_type,
319 int flags, const char *dev_name, void *data)
320{
321 return mount_pseudo(fs_type, "socket:", &sockfs_ops,
322 &sockfs_dentry_operations, SOCKFS_MAGIC);
323}
324
325static struct vfsmount *sock_mnt __read_mostly;
326
327static struct file_system_type sock_fs_type = {
328 .name = "sockfs",
329 .mount = sockfs_mount,
330 .kill_sb = kill_anon_super,
331};
332
1da177e4
LT
333/*
334 * Obtains the first available file descriptor and sets it up for use.
335 *
39d8c1b6
DM
336 * These functions create file structures and maps them to fd space
337 * of the current process. On success it returns file descriptor
1da177e4
LT
338 * and file struct implicitly stored in sock->file.
339 * Note that another thread may close file descriptor before we return
340 * from this function. We use the fact that now we do not refer
341 * to socket after mapping. If one day we will need it, this
342 * function will increment ref. count on file by 1.
343 *
344 * In any case returned fd MAY BE not valid!
345 * This race condition is unavoidable
346 * with shared fd spaces, we cannot solve it inside kernel,
347 * but we take care of internal coherence yet.
348 */
349
600e1779
MY
350static int sock_alloc_file(struct socket *sock, struct file **f, int flags,
351 const char *dname)
1da177e4 352{
7cbe66b6 353 struct qstr name = { .name = "" };
2c48b9c4 354 struct path path;
7cbe66b6 355 struct file *file;
1da177e4 356 int fd;
1da177e4 357
a677a039 358 fd = get_unused_fd_flags(flags);
7cbe66b6
AV
359 if (unlikely(fd < 0))
360 return fd;
1da177e4 361
600e1779
MY
362 if (dname) {
363 name.name = dname;
364 name.len = strlen(name.name);
365 } else if (sock->sk) {
366 name.name = sock->sk->sk_prot_creator->name;
367 name.len = strlen(name.name);
368 }
4b936885 369 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
2c48b9c4 370 if (unlikely(!path.dentry)) {
7cbe66b6 371 put_unused_fd(fd);
39d8c1b6 372 return -ENOMEM;
7cbe66b6 373 }
2c48b9c4 374 path.mnt = mntget(sock_mnt);
39d8c1b6 375
2c48b9c4 376 d_instantiate(path.dentry, SOCK_INODE(sock));
cc3808f8 377 SOCK_INODE(sock)->i_fop = &socket_file_ops;
39d8c1b6 378
2c48b9c4 379 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 380 &socket_file_ops);
cc3808f8
AV
381 if (unlikely(!file)) {
382 /* drop dentry, keep inode */
7de9c6ee 383 ihold(path.dentry->d_inode);
2c48b9c4 384 path_put(&path);
cc3808f8
AV
385 put_unused_fd(fd);
386 return -ENFILE;
387 }
388
389 sock->file = file;
77d27200 390 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6
DM
391 file->f_pos = 0;
392 file->private_data = sock;
1da177e4 393
7cbe66b6
AV
394 *f = file;
395 return fd;
39d8c1b6
DM
396}
397
a677a039 398int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
399{
400 struct file *newfile;
600e1779 401 int fd = sock_alloc_file(sock, &newfile, flags, NULL);
39d8c1b6 402
7cbe66b6 403 if (likely(fd >= 0))
39d8c1b6 404 fd_install(fd, newfile);
7cbe66b6 405
1da177e4
LT
406 return fd;
407}
c6d409cf 408EXPORT_SYMBOL(sock_map_fd);
1da177e4 409
406a3c63 410struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 411{
6cb153ca
BL
412 if (file->f_op == &socket_file_ops)
413 return file->private_data; /* set in sock_map_fd */
414
23bb80d2
ED
415 *err = -ENOTSOCK;
416 return NULL;
6cb153ca 417}
406a3c63 418EXPORT_SYMBOL(sock_from_file);
6cb153ca 419
1da177e4 420/**
c6d409cf 421 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
422 * @fd: file handle
423 * @err: pointer to an error code return
424 *
425 * The file handle passed in is locked and the socket it is bound
426 * too is returned. If an error occurs the err pointer is overwritten
427 * with a negative errno code and NULL is returned. The function checks
428 * for both invalid handles and passing a handle which is not a socket.
429 *
430 * On a success the socket object pointer is returned.
431 */
432
433struct socket *sockfd_lookup(int fd, int *err)
434{
435 struct file *file;
1da177e4
LT
436 struct socket *sock;
437
89bddce5
SH
438 file = fget(fd);
439 if (!file) {
1da177e4
LT
440 *err = -EBADF;
441 return NULL;
442 }
89bddce5 443
6cb153ca
BL
444 sock = sock_from_file(file, err);
445 if (!sock)
1da177e4 446 fput(file);
6cb153ca
BL
447 return sock;
448}
c6d409cf 449EXPORT_SYMBOL(sockfd_lookup);
1da177e4 450
6cb153ca
BL
451static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
452{
453 struct file *file;
454 struct socket *sock;
455
3672558c 456 *err = -EBADF;
6cb153ca
BL
457 file = fget_light(fd, fput_needed);
458 if (file) {
459 sock = sock_from_file(file, err);
460 if (sock)
461 return sock;
462 fput_light(file, *fput_needed);
1da177e4 463 }
6cb153ca 464 return NULL;
1da177e4
LT
465}
466
600e1779
MY
467#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
468#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
469#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
470static ssize_t sockfs_getxattr(struct dentry *dentry,
471 const char *name, void *value, size_t size)
472{
473 const char *proto_name;
474 size_t proto_size;
475 int error;
476
477 error = -ENODATA;
478 if (!strncmp(name, XATTR_NAME_SOCKPROTONAME, XATTR_NAME_SOCKPROTONAME_LEN)) {
479 proto_name = dentry->d_name.name;
480 proto_size = strlen(proto_name);
481
482 if (value) {
483 error = -ERANGE;
484 if (proto_size + 1 > size)
485 goto out;
486
487 strncpy(value, proto_name, proto_size + 1);
488 }
489 error = proto_size + 1;
490 }
491
492out:
493 return error;
494}
495
496static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
497 size_t size)
498{
499 ssize_t len;
500 ssize_t used = 0;
501
502 len = security_inode_listsecurity(dentry->d_inode, buffer, size);
503 if (len < 0)
504 return len;
505 used += len;
506 if (buffer) {
507 if (size < used)
508 return -ERANGE;
509 buffer += len;
510 }
511
512 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
513 used += len;
514 if (buffer) {
515 if (size < used)
516 return -ERANGE;
517 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
518 buffer += len;
519 }
520
521 return used;
522}
523
524static const struct inode_operations sockfs_inode_ops = {
525 .getxattr = sockfs_getxattr,
526 .listxattr = sockfs_listxattr,
527};
528
1da177e4
LT
529/**
530 * sock_alloc - allocate a socket
89bddce5 531 *
1da177e4
LT
532 * Allocate a new inode and socket object. The two are bound together
533 * and initialised. The socket is then returned. If we are out of inodes
534 * NULL is returned.
535 */
536
537static struct socket *sock_alloc(void)
538{
89bddce5
SH
539 struct inode *inode;
540 struct socket *sock;
1da177e4 541
a209dfc7 542 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
543 if (!inode)
544 return NULL;
545
546 sock = SOCKET_I(inode);
547
29a020d3 548 kmemcheck_annotate_bitfield(sock, type);
85fe4025 549 inode->i_ino = get_next_ino();
89bddce5 550 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
551 inode->i_uid = current_fsuid();
552 inode->i_gid = current_fsgid();
600e1779 553 inode->i_op = &sockfs_inode_ops;
1da177e4 554
19e8d69c 555 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
556 return sock;
557}
558
559/*
560 * In theory you can't get an open on this inode, but /proc provides
561 * a back door. Remember to keep it shut otherwise you'll let the
562 * creepy crawlies in.
563 */
89bddce5 564
1da177e4
LT
565static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
566{
567 return -ENXIO;
568}
569
4b6f5d20 570const struct file_operations bad_sock_fops = {
1da177e4
LT
571 .owner = THIS_MODULE,
572 .open = sock_no_open,
6038f373 573 .llseek = noop_llseek,
1da177e4
LT
574};
575
576/**
577 * sock_release - close a socket
578 * @sock: socket to close
579 *
580 * The socket is released from the protocol stack if it has a release
581 * callback, and the inode is then released if the socket is bound to
89bddce5 582 * an inode not a file.
1da177e4 583 */
89bddce5 584
1da177e4
LT
585void sock_release(struct socket *sock)
586{
587 if (sock->ops) {
588 struct module *owner = sock->ops->owner;
589
590 sock->ops->release(sock);
591 sock->ops = NULL;
592 module_put(owner);
593 }
594
eaefd110 595 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
1da177e4
LT
596 printk(KERN_ERR "sock_release: fasync list not empty!\n");
597
b09e786b
MP
598 if (test_bit(SOCK_EXTERNALLY_ALLOCATED, &sock->flags))
599 return;
600
19e8d69c 601 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
602 if (!sock->file) {
603 iput(SOCK_INODE(sock));
604 return;
605 }
89bddce5 606 sock->file = NULL;
1da177e4 607}
c6d409cf 608EXPORT_SYMBOL(sock_release);
1da177e4 609
2244d07b 610int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
20d49473 611{
2244d07b 612 *tx_flags = 0;
20d49473 613 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
2244d07b 614 *tx_flags |= SKBTX_HW_TSTAMP;
20d49473 615 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
2244d07b 616 *tx_flags |= SKBTX_SW_TSTAMP;
6e3e939f
JB
617 if (sock_flag(sk, SOCK_WIFI_STATUS))
618 *tx_flags |= SKBTX_WIFI_STATUS;
20d49473
PO
619 return 0;
620}
621EXPORT_SYMBOL(sock_tx_timestamp);
622
228e548e
AB
623static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
624 struct msghdr *msg, size_t size)
1da177e4
LT
625{
626 struct sock_iocb *si = kiocb_to_siocb(iocb);
1da177e4 627
f8451725
HX
628 sock_update_classid(sock->sk);
629
1da177e4
LT
630 si->sock = sock;
631 si->scm = NULL;
632 si->msg = msg;
633 si->size = size;
634
1da177e4
LT
635 return sock->ops->sendmsg(iocb, sock, msg, size);
636}
637
228e548e
AB
638static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
639 struct msghdr *msg, size_t size)
640{
641 int err = security_socket_sendmsg(sock, msg, size);
642
643 return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
644}
645
1da177e4
LT
646int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
647{
648 struct kiocb iocb;
649 struct sock_iocb siocb;
650 int ret;
651
652 init_sync_kiocb(&iocb, NULL);
653 iocb.private = &siocb;
654 ret = __sock_sendmsg(&iocb, sock, msg, size);
655 if (-EIOCBQUEUED == ret)
656 ret = wait_on_sync_kiocb(&iocb);
657 return ret;
658}
c6d409cf 659EXPORT_SYMBOL(sock_sendmsg);
1da177e4 660
894dc24c 661static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
228e548e
AB
662{
663 struct kiocb iocb;
664 struct sock_iocb siocb;
665 int ret;
666
667 init_sync_kiocb(&iocb, NULL);
668 iocb.private = &siocb;
669 ret = __sock_sendmsg_nosec(&iocb, sock, msg, size);
670 if (-EIOCBQUEUED == ret)
671 ret = wait_on_sync_kiocb(&iocb);
672 return ret;
673}
674
1da177e4
LT
675int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
676 struct kvec *vec, size_t num, size_t size)
677{
678 mm_segment_t oldfs = get_fs();
679 int result;
680
681 set_fs(KERNEL_DS);
682 /*
683 * the following is safe, since for compiler definitions of kvec and
684 * iovec are identical, yielding the same in-core layout and alignment
685 */
89bddce5 686 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
687 msg->msg_iovlen = num;
688 result = sock_sendmsg(sock, msg, size);
689 set_fs(oldfs);
690 return result;
691}
c6d409cf 692EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 693
20d49473
PO
694static int ktime2ts(ktime_t kt, struct timespec *ts)
695{
696 if (kt.tv64) {
697 *ts = ktime_to_timespec(kt);
698 return 1;
699 } else {
700 return 0;
701 }
702}
703
92f37fd2
ED
704/*
705 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
706 */
707void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
708 struct sk_buff *skb)
709{
20d49473
PO
710 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
711 struct timespec ts[3];
712 int empty = 1;
713 struct skb_shared_hwtstamps *shhwtstamps =
714 skb_hwtstamps(skb);
715
716 /* Race occurred between timestamp enabling and packet
717 receiving. Fill in the current time for now. */
718 if (need_software_tstamp && skb->tstamp.tv64 == 0)
719 __net_timestamp(skb);
720
721 if (need_software_tstamp) {
722 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
723 struct timeval tv;
724 skb_get_timestamp(skb, &tv);
725 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
726 sizeof(tv), &tv);
727 } else {
842509b8 728 skb_get_timestampns(skb, &ts[0]);
20d49473 729 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
842509b8 730 sizeof(ts[0]), &ts[0]);
20d49473
PO
731 }
732 }
733
734
735 memset(ts, 0, sizeof(ts));
736 if (skb->tstamp.tv64 &&
737 sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) {
738 skb_get_timestampns(skb, ts + 0);
739 empty = 0;
740 }
741 if (shhwtstamps) {
742 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
743 ktime2ts(shhwtstamps->syststamp, ts + 1))
744 empty = 0;
745 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
746 ktime2ts(shhwtstamps->hwtstamp, ts + 2))
747 empty = 0;
92f37fd2 748 }
20d49473
PO
749 if (!empty)
750 put_cmsg(msg, SOL_SOCKET,
751 SCM_TIMESTAMPING, sizeof(ts), &ts);
92f37fd2 752}
7c81fd8b
ACM
753EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
754
6e3e939f
JB
755void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
756 struct sk_buff *skb)
757{
758 int ack;
759
760 if (!sock_flag(sk, SOCK_WIFI_STATUS))
761 return;
762 if (!skb->wifi_acked_valid)
763 return;
764
765 ack = skb->wifi_acked;
766
767 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
768}
769EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
770
11165f14 771static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
772 struct sk_buff *skb)
3b885787
NH
773{
774 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
775 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
776 sizeof(__u32), &skb->dropcount);
777}
778
767dd033 779void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
780 struct sk_buff *skb)
781{
782 sock_recv_timestamp(msg, sk, skb);
783 sock_recv_drops(msg, sk, skb);
784}
767dd033 785EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 786
a2e27255
ACM
787static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
788 struct msghdr *msg, size_t size, int flags)
1da177e4 789{
1da177e4
LT
790 struct sock_iocb *si = kiocb_to_siocb(iocb);
791
f8451725
HX
792 sock_update_classid(sock->sk);
793
1da177e4
LT
794 si->sock = sock;
795 si->scm = NULL;
796 si->msg = msg;
797 si->size = size;
798 si->flags = flags;
799
1da177e4
LT
800 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
801}
802
a2e27255
ACM
803static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
804 struct msghdr *msg, size_t size, int flags)
805{
806 int err = security_socket_recvmsg(sock, msg, size, flags);
807
808 return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
809}
810
89bddce5 811int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
812 size_t size, int flags)
813{
814 struct kiocb iocb;
815 struct sock_iocb siocb;
816 int ret;
817
89bddce5 818 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
819 iocb.private = &siocb;
820 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
821 if (-EIOCBQUEUED == ret)
822 ret = wait_on_sync_kiocb(&iocb);
823 return ret;
824}
c6d409cf 825EXPORT_SYMBOL(sock_recvmsg);
1da177e4 826
a2e27255
ACM
827static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
828 size_t size, int flags)
829{
830 struct kiocb iocb;
831 struct sock_iocb siocb;
832 int ret;
833
834 init_sync_kiocb(&iocb, NULL);
835 iocb.private = &siocb;
836 ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
837 if (-EIOCBQUEUED == ret)
838 ret = wait_on_sync_kiocb(&iocb);
839 return ret;
840}
841
c1249c0a
ML
842/**
843 * kernel_recvmsg - Receive a message from a socket (kernel space)
844 * @sock: The socket to receive the message from
845 * @msg: Received message
846 * @vec: Input s/g array for message data
847 * @num: Size of input s/g array
848 * @size: Number of bytes to read
849 * @flags: Message flags (MSG_DONTWAIT, etc...)
850 *
851 * On return the msg structure contains the scatter/gather array passed in the
852 * vec argument. The array is modified so that it consists of the unfilled
853 * portion of the original array.
854 *
855 * The returned value is the total number of bytes received, or an error.
856 */
89bddce5
SH
857int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
858 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
859{
860 mm_segment_t oldfs = get_fs();
861 int result;
862
863 set_fs(KERNEL_DS);
864 /*
865 * the following is safe, since for compiler definitions of kvec and
866 * iovec are identical, yielding the same in-core layout and alignment
867 */
89bddce5 868 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
869 result = sock_recvmsg(sock, msg, size, flags);
870 set_fs(oldfs);
871 return result;
872}
c6d409cf 873EXPORT_SYMBOL(kernel_recvmsg);
1da177e4
LT
874
875static void sock_aio_dtor(struct kiocb *iocb)
876{
877 kfree(iocb->private);
878}
879
ce1d4d3e
CH
880static ssize_t sock_sendpage(struct file *file, struct page *page,
881 int offset, size_t size, loff_t *ppos, int more)
1da177e4 882{
1da177e4
LT
883 struct socket *sock;
884 int flags;
885
ce1d4d3e
CH
886 sock = file->private_data;
887
35f9c09f
ED
888 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
889 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
890 flags |= more;
ce1d4d3e 891
e6949583 892 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 893}
1da177e4 894
9c55e01c 895static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 896 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
897 unsigned int flags)
898{
899 struct socket *sock = file->private_data;
900
997b37da
RDC
901 if (unlikely(!sock->ops->splice_read))
902 return -EINVAL;
903
f8451725
HX
904 sock_update_classid(sock->sk);
905
9c55e01c
JA
906 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
907}
908
ce1d4d3e 909static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 910 struct sock_iocb *siocb)
ce1d4d3e
CH
911{
912 if (!is_sync_kiocb(iocb)) {
913 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
914 if (!siocb)
915 return NULL;
1da177e4
LT
916 iocb->ki_dtor = sock_aio_dtor;
917 }
1da177e4 918
ce1d4d3e 919 siocb->kiocb = iocb;
ce1d4d3e
CH
920 iocb->private = siocb;
921 return siocb;
1da177e4
LT
922}
923
ce1d4d3e 924static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
925 struct file *file, const struct iovec *iov,
926 unsigned long nr_segs)
ce1d4d3e
CH
927{
928 struct socket *sock = file->private_data;
929 size_t size = 0;
930 int i;
1da177e4 931
89bddce5
SH
932 for (i = 0; i < nr_segs; i++)
933 size += iov[i].iov_len;
1da177e4 934
ce1d4d3e
CH
935 msg->msg_name = NULL;
936 msg->msg_namelen = 0;
937 msg->msg_control = NULL;
938 msg->msg_controllen = 0;
89bddce5 939 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
940 msg->msg_iovlen = nr_segs;
941 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
942
943 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
944}
945
027445c3
BP
946static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
947 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
948{
949 struct sock_iocb siocb, *x;
950
1da177e4
LT
951 if (pos != 0)
952 return -ESPIPE;
027445c3
BP
953
954 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
955 return 0;
956
027445c3
BP
957
958 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
959 if (!x)
960 return -ENOMEM;
027445c3 961 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
962}
963
ce1d4d3e 964static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
965 struct file *file, const struct iovec *iov,
966 unsigned long nr_segs)
1da177e4 967{
ce1d4d3e
CH
968 struct socket *sock = file->private_data;
969 size_t size = 0;
970 int i;
1da177e4 971
89bddce5
SH
972 for (i = 0; i < nr_segs; i++)
973 size += iov[i].iov_len;
1da177e4 974
ce1d4d3e
CH
975 msg->msg_name = NULL;
976 msg->msg_namelen = 0;
977 msg->msg_control = NULL;
978 msg->msg_controllen = 0;
89bddce5 979 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
980 msg->msg_iovlen = nr_segs;
981 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
982 if (sock->type == SOCK_SEQPACKET)
983 msg->msg_flags |= MSG_EOR;
1da177e4 984
ce1d4d3e 985 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
986}
987
027445c3
BP
988static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
989 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
990{
991 struct sock_iocb siocb, *x;
1da177e4 992
ce1d4d3e
CH
993 if (pos != 0)
994 return -ESPIPE;
027445c3 995
027445c3 996 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
997 if (!x)
998 return -ENOMEM;
1da177e4 999
027445c3 1000 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
1001}
1002
1da177e4
LT
1003/*
1004 * Atomic setting of ioctl hooks to avoid race
1005 * with module unload.
1006 */
1007
4a3e2f71 1008static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 1009static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 1010
881d966b 1011void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 1012{
4a3e2f71 1013 mutex_lock(&br_ioctl_mutex);
1da177e4 1014 br_ioctl_hook = hook;
4a3e2f71 1015 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1016}
1017EXPORT_SYMBOL(brioctl_set);
1018
4a3e2f71 1019static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1020static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1021
881d966b 1022void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1023{
4a3e2f71 1024 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1025 vlan_ioctl_hook = hook;
4a3e2f71 1026 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1027}
1028EXPORT_SYMBOL(vlan_ioctl_set);
1029
4a3e2f71 1030static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 1031static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 1032
89bddce5 1033void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 1034{
4a3e2f71 1035 mutex_lock(&dlci_ioctl_mutex);
1da177e4 1036 dlci_ioctl_hook = hook;
4a3e2f71 1037 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1038}
1039EXPORT_SYMBOL(dlci_ioctl_set);
1040
6b96018b
AB
1041static long sock_do_ioctl(struct net *net, struct socket *sock,
1042 unsigned int cmd, unsigned long arg)
1043{
1044 int err;
1045 void __user *argp = (void __user *)arg;
1046
1047 err = sock->ops->ioctl(sock, cmd, arg);
1048
1049 /*
1050 * If this ioctl is unknown try to hand it down
1051 * to the NIC driver.
1052 */
1053 if (err == -ENOIOCTLCMD)
1054 err = dev_ioctl(net, cmd, argp);
1055
1056 return err;
1057}
1058
1da177e4
LT
1059/*
1060 * With an ioctl, arg may well be a user mode pointer, but we don't know
1061 * what to do with it - that's up to the protocol still.
1062 */
1063
1064static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1065{
1066 struct socket *sock;
881d966b 1067 struct sock *sk;
1da177e4
LT
1068 void __user *argp = (void __user *)arg;
1069 int pid, err;
881d966b 1070 struct net *net;
1da177e4 1071
b69aee04 1072 sock = file->private_data;
881d966b 1073 sk = sock->sk;
3b1e0a65 1074 net = sock_net(sk);
1da177e4 1075 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 1076 err = dev_ioctl(net, cmd, argp);
1da177e4 1077 } else
3d23e349 1078#ifdef CONFIG_WEXT_CORE
1da177e4 1079 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 1080 err = dev_ioctl(net, cmd, argp);
1da177e4 1081 } else
3d23e349 1082#endif
89bddce5 1083 switch (cmd) {
1da177e4
LT
1084 case FIOSETOWN:
1085 case SIOCSPGRP:
1086 err = -EFAULT;
1087 if (get_user(pid, (int __user *)argp))
1088 break;
1089 err = f_setown(sock->file, pid, 1);
1090 break;
1091 case FIOGETOWN:
1092 case SIOCGPGRP:
609d7fa9 1093 err = put_user(f_getown(sock->file),
89bddce5 1094 (int __user *)argp);
1da177e4
LT
1095 break;
1096 case SIOCGIFBR:
1097 case SIOCSIFBR:
1098 case SIOCBRADDBR:
1099 case SIOCBRDELBR:
1100 err = -ENOPKG;
1101 if (!br_ioctl_hook)
1102 request_module("bridge");
1103
4a3e2f71 1104 mutex_lock(&br_ioctl_mutex);
89bddce5 1105 if (br_ioctl_hook)
881d966b 1106 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1107 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1108 break;
1109 case SIOCGIFVLAN:
1110 case SIOCSIFVLAN:
1111 err = -ENOPKG;
1112 if (!vlan_ioctl_hook)
1113 request_module("8021q");
1114
4a3e2f71 1115 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1116 if (vlan_ioctl_hook)
881d966b 1117 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1118 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1119 break;
1da177e4
LT
1120 case SIOCADDDLCI:
1121 case SIOCDELDLCI:
1122 err = -ENOPKG;
1123 if (!dlci_ioctl_hook)
1124 request_module("dlci");
1125
7512cbf6
PE
1126 mutex_lock(&dlci_ioctl_mutex);
1127 if (dlci_ioctl_hook)
1da177e4 1128 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1129 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1130 break;
1131 default:
6b96018b 1132 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1133 break;
89bddce5 1134 }
1da177e4
LT
1135 return err;
1136}
1137
1138int sock_create_lite(int family, int type, int protocol, struct socket **res)
1139{
1140 int err;
1141 struct socket *sock = NULL;
89bddce5 1142
1da177e4
LT
1143 err = security_socket_create(family, type, protocol, 1);
1144 if (err)
1145 goto out;
1146
1147 sock = sock_alloc();
1148 if (!sock) {
1149 err = -ENOMEM;
1150 goto out;
1151 }
1152
1da177e4 1153 sock->type = type;
7420ed23
VY
1154 err = security_socket_post_create(sock, family, type, protocol, 1);
1155 if (err)
1156 goto out_release;
1157
1da177e4
LT
1158out:
1159 *res = sock;
1160 return err;
7420ed23
VY
1161out_release:
1162 sock_release(sock);
1163 sock = NULL;
1164 goto out;
1da177e4 1165}
c6d409cf 1166EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1167
1168/* No kernel lock held - perfect */
89bddce5 1169static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
1170{
1171 struct socket *sock;
1172
1173 /*
89bddce5 1174 * We can't return errors to poll, so it's either yes or no.
1da177e4 1175 */
b69aee04 1176 sock = file->private_data;
1da177e4
LT
1177 return sock->ops->poll(file, sock, wait);
1178}
1179
89bddce5 1180static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1181{
b69aee04 1182 struct socket *sock = file->private_data;
1da177e4
LT
1183
1184 return sock->ops->mmap(file, sock, vma);
1185}
1186
20380731 1187static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
1188{
1189 /*
89bddce5
SH
1190 * It was possible the inode is NULL we were
1191 * closing an unfinished socket.
1da177e4
LT
1192 */
1193
89bddce5 1194 if (!inode) {
1da177e4
LT
1195 printk(KERN_DEBUG "sock_close: NULL inode\n");
1196 return 0;
1197 }
1da177e4
LT
1198 sock_release(SOCKET_I(inode));
1199 return 0;
1200}
1201
1202/*
1203 * Update the socket async list
1204 *
1205 * Fasync_list locking strategy.
1206 *
1207 * 1. fasync_list is modified only under process context socket lock
1208 * i.e. under semaphore.
1209 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1210 * or under socket lock
1da177e4
LT
1211 */
1212
1213static int sock_fasync(int fd, struct file *filp, int on)
1214{
989a2979
ED
1215 struct socket *sock = filp->private_data;
1216 struct sock *sk = sock->sk;
eaefd110 1217 struct socket_wq *wq;
1da177e4 1218
989a2979 1219 if (sk == NULL)
1da177e4 1220 return -EINVAL;
1da177e4
LT
1221
1222 lock_sock(sk);
eaefd110
ED
1223 wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
1224 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1225
eaefd110 1226 if (!wq->fasync_list)
989a2979
ED
1227 sock_reset_flag(sk, SOCK_FASYNC);
1228 else
bcdce719 1229 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1230
989a2979 1231 release_sock(sk);
1da177e4
LT
1232 return 0;
1233}
1234
43815482 1235/* This function may be called only under socket lock or callback_lock or rcu_lock */
1da177e4
LT
1236
1237int sock_wake_async(struct socket *sock, int how, int band)
1238{
43815482
ED
1239 struct socket_wq *wq;
1240
1241 if (!sock)
1242 return -1;
1243 rcu_read_lock();
1244 wq = rcu_dereference(sock->wq);
1245 if (!wq || !wq->fasync_list) {
1246 rcu_read_unlock();
1da177e4 1247 return -1;
43815482 1248 }
89bddce5 1249 switch (how) {
8d8ad9d7 1250 case SOCK_WAKE_WAITD:
1da177e4
LT
1251 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1252 break;
1253 goto call_kill;
8d8ad9d7 1254 case SOCK_WAKE_SPACE:
1da177e4
LT
1255 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1256 break;
1257 /* fall through */
8d8ad9d7 1258 case SOCK_WAKE_IO:
89bddce5 1259call_kill:
43815482 1260 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1261 break;
8d8ad9d7 1262 case SOCK_WAKE_URG:
43815482 1263 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1264 }
43815482 1265 rcu_read_unlock();
1da177e4
LT
1266 return 0;
1267}
c6d409cf 1268EXPORT_SYMBOL(sock_wake_async);
1da177e4 1269
721db93a 1270int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1271 struct socket **res, int kern)
1da177e4
LT
1272{
1273 int err;
1274 struct socket *sock;
55737fda 1275 const struct net_proto_family *pf;
1da177e4
LT
1276
1277 /*
89bddce5 1278 * Check protocol is in range
1da177e4
LT
1279 */
1280 if (family < 0 || family >= NPROTO)
1281 return -EAFNOSUPPORT;
1282 if (type < 0 || type >= SOCK_MAX)
1283 return -EINVAL;
1284
1285 /* Compatibility.
1286
1287 This uglymoron is moved from INET layer to here to avoid
1288 deadlock in module load.
1289 */
1290 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1291 static int warned;
1da177e4
LT
1292 if (!warned) {
1293 warned = 1;
89bddce5
SH
1294 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1295 current->comm);
1da177e4
LT
1296 }
1297 family = PF_PACKET;
1298 }
1299
1300 err = security_socket_create(family, type, protocol, kern);
1301 if (err)
1302 return err;
89bddce5 1303
55737fda
SH
1304 /*
1305 * Allocate the socket and allow the family to set things up. if
1306 * the protocol is 0, the family is instructed to select an appropriate
1307 * default.
1308 */
1309 sock = sock_alloc();
1310 if (!sock) {
e87cc472 1311 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1312 return -ENFILE; /* Not exactly a match, but its the
1313 closest posix thing */
1314 }
1315
1316 sock->type = type;
1317
95a5afca 1318#ifdef CONFIG_MODULES
89bddce5
SH
1319 /* Attempt to load a protocol module if the find failed.
1320 *
1321 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1322 * requested real, full-featured networking support upon configuration.
1323 * Otherwise module support will break!
1324 */
190683a9 1325 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1326 request_module("net-pf-%d", family);
1da177e4
LT
1327#endif
1328
55737fda
SH
1329 rcu_read_lock();
1330 pf = rcu_dereference(net_families[family]);
1331 err = -EAFNOSUPPORT;
1332 if (!pf)
1333 goto out_release;
1da177e4
LT
1334
1335 /*
1336 * We will call the ->create function, that possibly is in a loadable
1337 * module, so we have to bump that loadable module refcnt first.
1338 */
55737fda 1339 if (!try_module_get(pf->owner))
1da177e4
LT
1340 goto out_release;
1341
55737fda
SH
1342 /* Now protected by module ref count */
1343 rcu_read_unlock();
1344
3f378b68 1345 err = pf->create(net, sock, protocol, kern);
55737fda 1346 if (err < 0)
1da177e4 1347 goto out_module_put;
a79af59e 1348
1da177e4
LT
1349 /*
1350 * Now to bump the refcnt of the [loadable] module that owns this
1351 * socket at sock_release time we decrement its refcnt.
1352 */
55737fda
SH
1353 if (!try_module_get(sock->ops->owner))
1354 goto out_module_busy;
1355
1da177e4
LT
1356 /*
1357 * Now that we're done with the ->create function, the [loadable]
1358 * module can have its refcnt decremented
1359 */
55737fda 1360 module_put(pf->owner);
7420ed23
VY
1361 err = security_socket_post_create(sock, family, type, protocol, kern);
1362 if (err)
3b185525 1363 goto out_sock_release;
55737fda 1364 *res = sock;
1da177e4 1365
55737fda
SH
1366 return 0;
1367
1368out_module_busy:
1369 err = -EAFNOSUPPORT;
1da177e4 1370out_module_put:
55737fda
SH
1371 sock->ops = NULL;
1372 module_put(pf->owner);
1373out_sock_release:
1da177e4 1374 sock_release(sock);
55737fda
SH
1375 return err;
1376
1377out_release:
1378 rcu_read_unlock();
1379 goto out_sock_release;
1da177e4 1380}
721db93a 1381EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1382
1383int sock_create(int family, int type, int protocol, struct socket **res)
1384{
1b8d7ae4 1385 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1386}
c6d409cf 1387EXPORT_SYMBOL(sock_create);
1da177e4
LT
1388
1389int sock_create_kern(int family, int type, int protocol, struct socket **res)
1390{
1b8d7ae4 1391 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4 1392}
c6d409cf 1393EXPORT_SYMBOL(sock_create_kern);
1da177e4 1394
3e0fa65f 1395SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1396{
1397 int retval;
1398 struct socket *sock;
a677a039
UD
1399 int flags;
1400
e38b36f3
UD
1401 /* Check the SOCK_* constants for consistency. */
1402 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1403 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1404 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1405 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1406
a677a039 1407 flags = type & ~SOCK_TYPE_MASK;
77d27200 1408 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1409 return -EINVAL;
1410 type &= SOCK_TYPE_MASK;
1da177e4 1411
aaca0bdc
UD
1412 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1413 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1414
1da177e4
LT
1415 retval = sock_create(family, type, protocol, &sock);
1416 if (retval < 0)
1417 goto out;
1418
77d27200 1419 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1420 if (retval < 0)
1421 goto out_release;
1422
1423out:
1424 /* It may be already another descriptor 8) Not kernel problem. */
1425 return retval;
1426
1427out_release:
1428 sock_release(sock);
1429 return retval;
1430}
1431
1432/*
1433 * Create a pair of connected sockets.
1434 */
1435
3e0fa65f
HC
1436SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1437 int __user *, usockvec)
1da177e4
LT
1438{
1439 struct socket *sock1, *sock2;
1440 int fd1, fd2, err;
db349509 1441 struct file *newfile1, *newfile2;
a677a039
UD
1442 int flags;
1443
1444 flags = type & ~SOCK_TYPE_MASK;
77d27200 1445 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1446 return -EINVAL;
1447 type &= SOCK_TYPE_MASK;
1da177e4 1448
aaca0bdc
UD
1449 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1450 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1451
1da177e4
LT
1452 /*
1453 * Obtain the first socket and check if the underlying protocol
1454 * supports the socketpair call.
1455 */
1456
1457 err = sock_create(family, type, protocol, &sock1);
1458 if (err < 0)
1459 goto out;
1460
1461 err = sock_create(family, type, protocol, &sock2);
1462 if (err < 0)
1463 goto out_release_1;
1464
1465 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1466 if (err < 0)
1da177e4
LT
1467 goto out_release_both;
1468
600e1779 1469 fd1 = sock_alloc_file(sock1, &newfile1, flags, NULL);
bf3c23d1
DM
1470 if (unlikely(fd1 < 0)) {
1471 err = fd1;
db349509 1472 goto out_release_both;
bf3c23d1 1473 }
1da177e4 1474
600e1779 1475 fd2 = sock_alloc_file(sock2, &newfile2, flags, NULL);
198de4d7
AV
1476 if (unlikely(fd2 < 0)) {
1477 err = fd2;
1478 fput(newfile1);
1479 put_unused_fd(fd1);
1480 sock_release(sock2);
1481 goto out;
db349509
AV
1482 }
1483
157cf649 1484 audit_fd_pair(fd1, fd2);
db349509
AV
1485 fd_install(fd1, newfile1);
1486 fd_install(fd2, newfile2);
1da177e4
LT
1487 /* fd1 and fd2 may be already another descriptors.
1488 * Not kernel problem.
1489 */
1490
89bddce5 1491 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1492 if (!err)
1493 err = put_user(fd2, &usockvec[1]);
1494 if (!err)
1495 return 0;
1496
1497 sys_close(fd2);
1498 sys_close(fd1);
1499 return err;
1500
1da177e4 1501out_release_both:
89bddce5 1502 sock_release(sock2);
1da177e4 1503out_release_1:
89bddce5 1504 sock_release(sock1);
1da177e4
LT
1505out:
1506 return err;
1507}
1508
1da177e4
LT
1509/*
1510 * Bind a name to a socket. Nothing much to do here since it's
1511 * the protocol's responsibility to handle the local address.
1512 *
1513 * We move the socket address to kernel space before we call
1514 * the protocol layer (having also checked the address is ok).
1515 */
1516
20f37034 1517SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1518{
1519 struct socket *sock;
230b1839 1520 struct sockaddr_storage address;
6cb153ca 1521 int err, fput_needed;
1da177e4 1522
89bddce5 1523 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1524 if (sock) {
43db362d 1525 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1526 if (err >= 0) {
1527 err = security_socket_bind(sock,
230b1839 1528 (struct sockaddr *)&address,
89bddce5 1529 addrlen);
6cb153ca
BL
1530 if (!err)
1531 err = sock->ops->bind(sock,
89bddce5 1532 (struct sockaddr *)
230b1839 1533 &address, addrlen);
1da177e4 1534 }
6cb153ca 1535 fput_light(sock->file, fput_needed);
89bddce5 1536 }
1da177e4
LT
1537 return err;
1538}
1539
1da177e4
LT
1540/*
1541 * Perform a listen. Basically, we allow the protocol to do anything
1542 * necessary for a listen, and if that works, we mark the socket as
1543 * ready for listening.
1544 */
1545
3e0fa65f 1546SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1547{
1548 struct socket *sock;
6cb153ca 1549 int err, fput_needed;
b8e1f9b5 1550 int somaxconn;
89bddce5
SH
1551
1552 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1553 if (sock) {
8efa6e93 1554 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1555 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1556 backlog = somaxconn;
1da177e4
LT
1557
1558 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1559 if (!err)
1560 err = sock->ops->listen(sock, backlog);
1da177e4 1561
6cb153ca 1562 fput_light(sock->file, fput_needed);
1da177e4
LT
1563 }
1564 return err;
1565}
1566
1da177e4
LT
1567/*
1568 * For accept, we attempt to create a new socket, set up the link
1569 * with the client, wake up the client, then return the new
1570 * connected fd. We collect the address of the connector in kernel
1571 * space and move it to user at the very end. This is unclean because
1572 * we open the socket then return an error.
1573 *
1574 * 1003.1g adds the ability to recvmsg() to query connection pending
1575 * status to recvmsg. We need to add that support in a way thats
1576 * clean when we restucture accept also.
1577 */
1578
20f37034
HC
1579SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1580 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1581{
1582 struct socket *sock, *newsock;
39d8c1b6 1583 struct file *newfile;
6cb153ca 1584 int err, len, newfd, fput_needed;
230b1839 1585 struct sockaddr_storage address;
1da177e4 1586
77d27200 1587 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1588 return -EINVAL;
1589
1590 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1591 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1592
6cb153ca 1593 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1594 if (!sock)
1595 goto out;
1596
1597 err = -ENFILE;
c6d409cf
ED
1598 newsock = sock_alloc();
1599 if (!newsock)
1da177e4
LT
1600 goto out_put;
1601
1602 newsock->type = sock->type;
1603 newsock->ops = sock->ops;
1604
1da177e4
LT
1605 /*
1606 * We don't need try_module_get here, as the listening socket (sock)
1607 * has the protocol module (sock->ops->owner) held.
1608 */
1609 __module_get(newsock->ops->owner);
1610
600e1779
MY
1611 newfd = sock_alloc_file(newsock, &newfile, flags,
1612 sock->sk->sk_prot_creator->name);
39d8c1b6
DM
1613 if (unlikely(newfd < 0)) {
1614 err = newfd;
9a1875e6
DM
1615 sock_release(newsock);
1616 goto out_put;
39d8c1b6
DM
1617 }
1618
a79af59e
FF
1619 err = security_socket_accept(sock, newsock);
1620 if (err)
39d8c1b6 1621 goto out_fd;
a79af59e 1622
1da177e4
LT
1623 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1624 if (err < 0)
39d8c1b6 1625 goto out_fd;
1da177e4
LT
1626
1627 if (upeer_sockaddr) {
230b1839 1628 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1629 &len, 2) < 0) {
1da177e4 1630 err = -ECONNABORTED;
39d8c1b6 1631 goto out_fd;
1da177e4 1632 }
43db362d 1633 err = move_addr_to_user(&address,
230b1839 1634 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1635 if (err < 0)
39d8c1b6 1636 goto out_fd;
1da177e4
LT
1637 }
1638
1639 /* File flags are not inherited via accept() unlike another OSes. */
1640
39d8c1b6
DM
1641 fd_install(newfd, newfile);
1642 err = newfd;
1da177e4 1643
1da177e4 1644out_put:
6cb153ca 1645 fput_light(sock->file, fput_needed);
1da177e4
LT
1646out:
1647 return err;
39d8c1b6 1648out_fd:
9606a216 1649 fput(newfile);
39d8c1b6 1650 put_unused_fd(newfd);
1da177e4
LT
1651 goto out_put;
1652}
1653
20f37034
HC
1654SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1655 int __user *, upeer_addrlen)
aaca0bdc 1656{
de11defe 1657 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1658}
1659
1da177e4
LT
1660/*
1661 * Attempt to connect to a socket with the server address. The address
1662 * is in user space so we verify it is OK and move it to kernel space.
1663 *
1664 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1665 * break bindings
1666 *
1667 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1668 * other SEQPACKET protocols that take time to connect() as it doesn't
1669 * include the -EINPROGRESS status for such sockets.
1670 */
1671
20f37034
HC
1672SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1673 int, addrlen)
1da177e4
LT
1674{
1675 struct socket *sock;
230b1839 1676 struct sockaddr_storage address;
6cb153ca 1677 int err, fput_needed;
1da177e4 1678
6cb153ca 1679 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1680 if (!sock)
1681 goto out;
43db362d 1682 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1683 if (err < 0)
1684 goto out_put;
1685
89bddce5 1686 err =
230b1839 1687 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1688 if (err)
1689 goto out_put;
1690
230b1839 1691 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1692 sock->file->f_flags);
1693out_put:
6cb153ca 1694 fput_light(sock->file, fput_needed);
1da177e4
LT
1695out:
1696 return err;
1697}
1698
1699/*
1700 * Get the local address ('name') of a socket object. Move the obtained
1701 * name to user space.
1702 */
1703
20f37034
HC
1704SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1705 int __user *, usockaddr_len)
1da177e4
LT
1706{
1707 struct socket *sock;
230b1839 1708 struct sockaddr_storage address;
6cb153ca 1709 int len, err, fput_needed;
89bddce5 1710
6cb153ca 1711 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1712 if (!sock)
1713 goto out;
1714
1715 err = security_socket_getsockname(sock);
1716 if (err)
1717 goto out_put;
1718
230b1839 1719 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1720 if (err)
1721 goto out_put;
43db362d 1722 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1723
1724out_put:
6cb153ca 1725 fput_light(sock->file, fput_needed);
1da177e4
LT
1726out:
1727 return err;
1728}
1729
1730/*
1731 * Get the remote address ('name') of a socket object. Move the obtained
1732 * name to user space.
1733 */
1734
20f37034
HC
1735SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1736 int __user *, usockaddr_len)
1da177e4
LT
1737{
1738 struct socket *sock;
230b1839 1739 struct sockaddr_storage address;
6cb153ca 1740 int len, err, fput_needed;
1da177e4 1741
89bddce5
SH
1742 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1743 if (sock != NULL) {
1da177e4
LT
1744 err = security_socket_getpeername(sock);
1745 if (err) {
6cb153ca 1746 fput_light(sock->file, fput_needed);
1da177e4
LT
1747 return err;
1748 }
1749
89bddce5 1750 err =
230b1839 1751 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1752 1);
1da177e4 1753 if (!err)
43db362d 1754 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1755 usockaddr_len);
6cb153ca 1756 fput_light(sock->file, fput_needed);
1da177e4
LT
1757 }
1758 return err;
1759}
1760
1761/*
1762 * Send a datagram to a given address. We move the address into kernel
1763 * space and check the user space data area is readable before invoking
1764 * the protocol.
1765 */
1766
3e0fa65f 1767SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1768 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1769 int, addr_len)
1da177e4
LT
1770{
1771 struct socket *sock;
230b1839 1772 struct sockaddr_storage address;
1da177e4
LT
1773 int err;
1774 struct msghdr msg;
1775 struct iovec iov;
6cb153ca 1776 int fput_needed;
6cb153ca 1777
253eacc0
LT
1778 if (len > INT_MAX)
1779 len = INT_MAX;
de0fa95c
PE
1780 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1781 if (!sock)
4387ff75 1782 goto out;
6cb153ca 1783
89bddce5
SH
1784 iov.iov_base = buff;
1785 iov.iov_len = len;
1786 msg.msg_name = NULL;
1787 msg.msg_iov = &iov;
1788 msg.msg_iovlen = 1;
1789 msg.msg_control = NULL;
1790 msg.msg_controllen = 0;
1791 msg.msg_namelen = 0;
6cb153ca 1792 if (addr) {
43db362d 1793 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1794 if (err < 0)
1795 goto out_put;
230b1839 1796 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1797 msg.msg_namelen = addr_len;
1da177e4
LT
1798 }
1799 if (sock->file->f_flags & O_NONBLOCK)
1800 flags |= MSG_DONTWAIT;
1801 msg.msg_flags = flags;
1802 err = sock_sendmsg(sock, &msg, len);
1803
89bddce5 1804out_put:
de0fa95c 1805 fput_light(sock->file, fput_needed);
4387ff75 1806out:
1da177e4
LT
1807 return err;
1808}
1809
1810/*
89bddce5 1811 * Send a datagram down a socket.
1da177e4
LT
1812 */
1813
3e0fa65f 1814SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1815 unsigned int, flags)
1da177e4
LT
1816{
1817 return sys_sendto(fd, buff, len, flags, NULL, 0);
1818}
1819
1820/*
89bddce5 1821 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1822 * sender. We verify the buffers are writable and if needed move the
1823 * sender address from kernel to user space.
1824 */
1825
3e0fa65f 1826SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1827 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1828 int __user *, addr_len)
1da177e4
LT
1829{
1830 struct socket *sock;
1831 struct iovec iov;
1832 struct msghdr msg;
230b1839 1833 struct sockaddr_storage address;
89bddce5 1834 int err, err2;
6cb153ca
BL
1835 int fput_needed;
1836
253eacc0
LT
1837 if (size > INT_MAX)
1838 size = INT_MAX;
de0fa95c 1839 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1840 if (!sock)
de0fa95c 1841 goto out;
1da177e4 1842
89bddce5
SH
1843 msg.msg_control = NULL;
1844 msg.msg_controllen = 0;
1845 msg.msg_iovlen = 1;
1846 msg.msg_iov = &iov;
1847 iov.iov_len = size;
1848 iov.iov_base = ubuf;
230b1839
YH
1849 msg.msg_name = (struct sockaddr *)&address;
1850 msg.msg_namelen = sizeof(address);
1da177e4
LT
1851 if (sock->file->f_flags & O_NONBLOCK)
1852 flags |= MSG_DONTWAIT;
89bddce5 1853 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1854
89bddce5 1855 if (err >= 0 && addr != NULL) {
43db362d 1856 err2 = move_addr_to_user(&address,
230b1839 1857 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1858 if (err2 < 0)
1859 err = err2;
1da177e4 1860 }
de0fa95c
PE
1861
1862 fput_light(sock->file, fput_needed);
4387ff75 1863out:
1da177e4
LT
1864 return err;
1865}
1866
1867/*
89bddce5 1868 * Receive a datagram from a socket.
1da177e4
LT
1869 */
1870
89bddce5 1871asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
95c96174 1872 unsigned int flags)
1da177e4
LT
1873{
1874 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1875}
1876
1877/*
1878 * Set a socket option. Because we don't know the option lengths we have
1879 * to pass the user mode parameter for the protocols to sort out.
1880 */
1881
20f37034
HC
1882SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1883 char __user *, optval, int, optlen)
1da177e4 1884{
6cb153ca 1885 int err, fput_needed;
1da177e4
LT
1886 struct socket *sock;
1887
1888 if (optlen < 0)
1889 return -EINVAL;
89bddce5
SH
1890
1891 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1892 if (sock != NULL) {
1893 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1894 if (err)
1895 goto out_put;
1da177e4
LT
1896
1897 if (level == SOL_SOCKET)
89bddce5
SH
1898 err =
1899 sock_setsockopt(sock, level, optname, optval,
1900 optlen);
1da177e4 1901 else
89bddce5
SH
1902 err =
1903 sock->ops->setsockopt(sock, level, optname, optval,
1904 optlen);
6cb153ca
BL
1905out_put:
1906 fput_light(sock->file, fput_needed);
1da177e4
LT
1907 }
1908 return err;
1909}
1910
1911/*
1912 * Get a socket option. Because we don't know the option lengths we have
1913 * to pass a user mode parameter for the protocols to sort out.
1914 */
1915
20f37034
HC
1916SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1917 char __user *, optval, int __user *, optlen)
1da177e4 1918{
6cb153ca 1919 int err, fput_needed;
1da177e4
LT
1920 struct socket *sock;
1921
89bddce5
SH
1922 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1923 if (sock != NULL) {
6cb153ca
BL
1924 err = security_socket_getsockopt(sock, level, optname);
1925 if (err)
1926 goto out_put;
1da177e4
LT
1927
1928 if (level == SOL_SOCKET)
89bddce5
SH
1929 err =
1930 sock_getsockopt(sock, level, optname, optval,
1931 optlen);
1da177e4 1932 else
89bddce5
SH
1933 err =
1934 sock->ops->getsockopt(sock, level, optname, optval,
1935 optlen);
6cb153ca
BL
1936out_put:
1937 fput_light(sock->file, fput_needed);
1da177e4
LT
1938 }
1939 return err;
1940}
1941
1da177e4
LT
1942/*
1943 * Shutdown a socket.
1944 */
1945
754fe8d2 1946SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1947{
6cb153ca 1948 int err, fput_needed;
1da177e4
LT
1949 struct socket *sock;
1950
89bddce5
SH
1951 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1952 if (sock != NULL) {
1da177e4 1953 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1954 if (!err)
1955 err = sock->ops->shutdown(sock, how);
1956 fput_light(sock->file, fput_needed);
1da177e4
LT
1957 }
1958 return err;
1959}
1960
89bddce5 1961/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1962 * fields which are the same type (int / unsigned) on our platforms.
1963 */
1964#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1965#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1966#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1967
c71d8ebe
TH
1968struct used_address {
1969 struct sockaddr_storage name;
1970 unsigned int name_len;
1971};
1972
228e548e 1973static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
95c96174 1974 struct msghdr *msg_sys, unsigned int flags,
c71d8ebe 1975 struct used_address *used_address)
1da177e4 1976{
89bddce5
SH
1977 struct compat_msghdr __user *msg_compat =
1978 (struct compat_msghdr __user *)msg;
230b1839 1979 struct sockaddr_storage address;
1da177e4 1980 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1981 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1982 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1983 /* 20 is size of ipv6_pktinfo */
1da177e4 1984 unsigned char *ctl_buf = ctl;
a74e9106 1985 int err, ctl_len, total_len;
89bddce5 1986
1da177e4
LT
1987 err = -EFAULT;
1988 if (MSG_CMSG_COMPAT & flags) {
228e548e 1989 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 1990 return -EFAULT;
228e548e 1991 } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1992 return -EFAULT;
1993
228e548e 1994 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
a74e9106
ED
1995 err = -EMSGSIZE;
1996 if (msg_sys->msg_iovlen > UIO_MAXIOV)
1997 goto out;
1998 err = -ENOMEM;
1999 iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec),
2000 GFP_KERNEL);
1da177e4 2001 if (!iov)
228e548e 2002 goto out;
1da177e4
LT
2003 }
2004
2005 /* This will also move the address data into kernel space */
2006 if (MSG_CMSG_COMPAT & flags) {
43db362d 2007 err = verify_compat_iovec(msg_sys, iov, &address, VERIFY_READ);
1da177e4 2008 } else
43db362d 2009 err = verify_iovec(msg_sys, iov, &address, VERIFY_READ);
89bddce5 2010 if (err < 0)
1da177e4
LT
2011 goto out_freeiov;
2012 total_len = err;
2013
2014 err = -ENOBUFS;
2015
228e548e 2016 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 2017 goto out_freeiov;
228e548e 2018 ctl_len = msg_sys->msg_controllen;
1da177e4 2019 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2020 err =
228e548e 2021 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2022 sizeof(ctl));
1da177e4
LT
2023 if (err)
2024 goto out_freeiov;
228e548e
AB
2025 ctl_buf = msg_sys->msg_control;
2026 ctl_len = msg_sys->msg_controllen;
1da177e4 2027 } else if (ctl_len) {
89bddce5 2028 if (ctl_len > sizeof(ctl)) {
1da177e4 2029 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2030 if (ctl_buf == NULL)
1da177e4
LT
2031 goto out_freeiov;
2032 }
2033 err = -EFAULT;
2034 /*
228e548e 2035 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2036 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2037 * checking falls down on this.
2038 */
fb8621bb 2039 if (copy_from_user(ctl_buf,
228e548e 2040 (void __user __force *)msg_sys->msg_control,
89bddce5 2041 ctl_len))
1da177e4 2042 goto out_freectl;
228e548e 2043 msg_sys->msg_control = ctl_buf;
1da177e4 2044 }
228e548e 2045 msg_sys->msg_flags = flags;
1da177e4
LT
2046
2047 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2048 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2049 /*
2050 * If this is sendmmsg() and current destination address is same as
2051 * previously succeeded address, omit asking LSM's decision.
2052 * used_address->name_len is initialized to UINT_MAX so that the first
2053 * destination address never matches.
2054 */
bc909d9d
MD
2055 if (used_address && msg_sys->msg_name &&
2056 used_address->name_len == msg_sys->msg_namelen &&
2057 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe
TH
2058 used_address->name_len)) {
2059 err = sock_sendmsg_nosec(sock, msg_sys, total_len);
2060 goto out_freectl;
2061 }
2062 err = sock_sendmsg(sock, msg_sys, total_len);
2063 /*
2064 * If this is sendmmsg() and sending to current destination address was
2065 * successful, remember it.
2066 */
2067 if (used_address && err >= 0) {
2068 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2069 if (msg_sys->msg_name)
2070 memcpy(&used_address->name, msg_sys->msg_name,
2071 used_address->name_len);
c71d8ebe 2072 }
1da177e4
LT
2073
2074out_freectl:
89bddce5 2075 if (ctl_buf != ctl)
1da177e4
LT
2076 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2077out_freeiov:
2078 if (iov != iovstack)
a74e9106 2079 kfree(iov);
228e548e
AB
2080out:
2081 return err;
2082}
2083
2084/*
2085 * BSD sendmsg interface
2086 */
2087
95c96174 2088SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned int, flags)
228e548e
AB
2089{
2090 int fput_needed, err;
2091 struct msghdr msg_sys;
2092 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
2093
2094 if (!sock)
2095 goto out;
2096
c71d8ebe 2097 err = __sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
228e548e 2098
6cb153ca 2099 fput_light(sock->file, fput_needed);
89bddce5 2100out:
1da177e4
LT
2101 return err;
2102}
2103
228e548e
AB
2104/*
2105 * Linux sendmmsg interface
2106 */
2107
2108int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2109 unsigned int flags)
2110{
2111 int fput_needed, err, datagrams;
2112 struct socket *sock;
2113 struct mmsghdr __user *entry;
2114 struct compat_mmsghdr __user *compat_entry;
2115 struct msghdr msg_sys;
c71d8ebe 2116 struct used_address used_address;
228e548e 2117
98382f41
AB
2118 if (vlen > UIO_MAXIOV)
2119 vlen = UIO_MAXIOV;
228e548e
AB
2120
2121 datagrams = 0;
2122
2123 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2124 if (!sock)
2125 return err;
2126
c71d8ebe 2127 used_address.name_len = UINT_MAX;
228e548e
AB
2128 entry = mmsg;
2129 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2130 err = 0;
228e548e
AB
2131
2132 while (datagrams < vlen) {
228e548e
AB
2133 if (MSG_CMSG_COMPAT & flags) {
2134 err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
c71d8ebe 2135 &msg_sys, flags, &used_address);
228e548e
AB
2136 if (err < 0)
2137 break;
2138 err = __put_user(err, &compat_entry->msg_len);
2139 ++compat_entry;
2140 } else {
2141 err = __sys_sendmsg(sock, (struct msghdr __user *)entry,
c71d8ebe 2142 &msg_sys, flags, &used_address);
228e548e
AB
2143 if (err < 0)
2144 break;
2145 err = put_user(err, &entry->msg_len);
2146 ++entry;
2147 }
2148
2149 if (err)
2150 break;
2151 ++datagrams;
2152 }
2153
228e548e
AB
2154 fput_light(sock->file, fput_needed);
2155
728ffb86
AB
2156 /* We only return an error if no datagrams were able to be sent */
2157 if (datagrams != 0)
228e548e
AB
2158 return datagrams;
2159
228e548e
AB
2160 return err;
2161}
2162
2163SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2164 unsigned int, vlen, unsigned int, flags)
2165{
2166 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2167}
2168
a2e27255 2169static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
95c96174 2170 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2171{
89bddce5
SH
2172 struct compat_msghdr __user *msg_compat =
2173 (struct compat_msghdr __user *)msg;
1da177e4 2174 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2175 struct iovec *iov = iovstack;
1da177e4 2176 unsigned long cmsg_ptr;
a74e9106 2177 int err, total_len, len;
1da177e4
LT
2178
2179 /* kernel mode address */
230b1839 2180 struct sockaddr_storage addr;
1da177e4
LT
2181
2182 /* user mode address pointers */
2183 struct sockaddr __user *uaddr;
2184 int __user *uaddr_len;
89bddce5 2185
1da177e4 2186 if (MSG_CMSG_COMPAT & flags) {
a2e27255 2187 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 2188 return -EFAULT;
c6d409cf 2189 } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
89bddce5 2190 return -EFAULT;
1da177e4 2191
a2e27255 2192 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
a74e9106
ED
2193 err = -EMSGSIZE;
2194 if (msg_sys->msg_iovlen > UIO_MAXIOV)
2195 goto out;
2196 err = -ENOMEM;
2197 iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec),
2198 GFP_KERNEL);
1da177e4 2199 if (!iov)
a2e27255 2200 goto out;
1da177e4
LT
2201 }
2202
2203 /*
89bddce5
SH
2204 * Save the user-mode address (verify_iovec will change the
2205 * kernel msghdr to use the kernel address space)
1da177e4 2206 */
89bddce5 2207
a2e27255 2208 uaddr = (__force void __user *)msg_sys->msg_name;
1da177e4
LT
2209 uaddr_len = COMPAT_NAMELEN(msg);
2210 if (MSG_CMSG_COMPAT & flags) {
43db362d 2211 err = verify_compat_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
1da177e4 2212 } else
43db362d 2213 err = verify_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
1da177e4
LT
2214 if (err < 0)
2215 goto out_freeiov;
89bddce5 2216 total_len = err;
1da177e4 2217
a2e27255
ACM
2218 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2219 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2220
1da177e4
LT
2221 if (sock->file->f_flags & O_NONBLOCK)
2222 flags |= MSG_DONTWAIT;
a2e27255
ACM
2223 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2224 total_len, flags);
1da177e4
LT
2225 if (err < 0)
2226 goto out_freeiov;
2227 len = err;
2228
2229 if (uaddr != NULL) {
43db362d 2230 err = move_addr_to_user(&addr,
a2e27255 2231 msg_sys->msg_namelen, uaddr,
89bddce5 2232 uaddr_len);
1da177e4
LT
2233 if (err < 0)
2234 goto out_freeiov;
2235 }
a2e27255 2236 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2237 COMPAT_FLAGS(msg));
1da177e4
LT
2238 if (err)
2239 goto out_freeiov;
2240 if (MSG_CMSG_COMPAT & flags)
a2e27255 2241 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2242 &msg_compat->msg_controllen);
2243 else
a2e27255 2244 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2245 &msg->msg_controllen);
2246 if (err)
2247 goto out_freeiov;
2248 err = len;
2249
2250out_freeiov:
2251 if (iov != iovstack)
a74e9106 2252 kfree(iov);
a2e27255
ACM
2253out:
2254 return err;
2255}
2256
2257/*
2258 * BSD recvmsg interface
2259 */
2260
2261SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
2262 unsigned int, flags)
2263{
2264 int fput_needed, err;
2265 struct msghdr msg_sys;
2266 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
2267
2268 if (!sock)
2269 goto out;
2270
2271 err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0);
2272
6cb153ca 2273 fput_light(sock->file, fput_needed);
1da177e4
LT
2274out:
2275 return err;
2276}
2277
a2e27255
ACM
2278/*
2279 * Linux recvmmsg interface
2280 */
2281
2282int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2283 unsigned int flags, struct timespec *timeout)
2284{
2285 int fput_needed, err, datagrams;
2286 struct socket *sock;
2287 struct mmsghdr __user *entry;
d7256d0e 2288 struct compat_mmsghdr __user *compat_entry;
a2e27255
ACM
2289 struct msghdr msg_sys;
2290 struct timespec end_time;
2291
2292 if (timeout &&
2293 poll_select_set_timeout(&end_time, timeout->tv_sec,
2294 timeout->tv_nsec))
2295 return -EINVAL;
2296
2297 datagrams = 0;
2298
2299 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2300 if (!sock)
2301 return err;
2302
2303 err = sock_error(sock->sk);
2304 if (err)
2305 goto out_put;
2306
2307 entry = mmsg;
d7256d0e 2308 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2309
2310 while (datagrams < vlen) {
2311 /*
2312 * No need to ask LSM for more than the first datagram.
2313 */
d7256d0e
JMG
2314 if (MSG_CMSG_COMPAT & flags) {
2315 err = __sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
b9eb8b87
AB
2316 &msg_sys, flags & ~MSG_WAITFORONE,
2317 datagrams);
d7256d0e
JMG
2318 if (err < 0)
2319 break;
2320 err = __put_user(err, &compat_entry->msg_len);
2321 ++compat_entry;
2322 } else {
2323 err = __sys_recvmsg(sock, (struct msghdr __user *)entry,
b9eb8b87
AB
2324 &msg_sys, flags & ~MSG_WAITFORONE,
2325 datagrams);
d7256d0e
JMG
2326 if (err < 0)
2327 break;
2328 err = put_user(err, &entry->msg_len);
2329 ++entry;
2330 }
2331
a2e27255
ACM
2332 if (err)
2333 break;
a2e27255
ACM
2334 ++datagrams;
2335
71c5c159
BB
2336 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2337 if (flags & MSG_WAITFORONE)
2338 flags |= MSG_DONTWAIT;
2339
a2e27255
ACM
2340 if (timeout) {
2341 ktime_get_ts(timeout);
2342 *timeout = timespec_sub(end_time, *timeout);
2343 if (timeout->tv_sec < 0) {
2344 timeout->tv_sec = timeout->tv_nsec = 0;
2345 break;
2346 }
2347
2348 /* Timeout, return less than vlen datagrams */
2349 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2350 break;
2351 }
2352
2353 /* Out of band data, return right away */
2354 if (msg_sys.msg_flags & MSG_OOB)
2355 break;
2356 }
2357
2358out_put:
2359 fput_light(sock->file, fput_needed);
1da177e4 2360
a2e27255
ACM
2361 if (err == 0)
2362 return datagrams;
2363
2364 if (datagrams != 0) {
2365 /*
2366 * We may return less entries than requested (vlen) if the
2367 * sock is non block and there aren't enough datagrams...
2368 */
2369 if (err != -EAGAIN) {
2370 /*
2371 * ... or if recvmsg returns an error after we
2372 * received some datagrams, where we record the
2373 * error to return on the next call or if the
2374 * app asks about it using getsockopt(SO_ERROR).
2375 */
2376 sock->sk->sk_err = -err;
2377 }
2378
2379 return datagrams;
2380 }
2381
2382 return err;
2383}
2384
2385SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2386 unsigned int, vlen, unsigned int, flags,
2387 struct timespec __user *, timeout)
2388{
2389 int datagrams;
2390 struct timespec timeout_sys;
2391
2392 if (!timeout)
2393 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2394
2395 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2396 return -EFAULT;
2397
2398 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2399
2400 if (datagrams > 0 &&
2401 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2402 datagrams = -EFAULT;
2403
2404 return datagrams;
2405}
2406
2407#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2408/* Argument list sizes for sys_socketcall */
2409#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2410static const unsigned char nargs[21] = {
c6d409cf
ED
2411 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2412 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2413 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2414 AL(4), AL(5), AL(4)
89bddce5
SH
2415};
2416
1da177e4
LT
2417#undef AL
2418
2419/*
89bddce5 2420 * System call vectors.
1da177e4
LT
2421 *
2422 * Argument checking cleaned up. Saved 20% in size.
2423 * This function doesn't need to set the kernel lock because
89bddce5 2424 * it is set by the callees.
1da177e4
LT
2425 */
2426
3e0fa65f 2427SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4
LT
2428{
2429 unsigned long a[6];
89bddce5 2430 unsigned long a0, a1;
1da177e4 2431 int err;
47379052 2432 unsigned int len;
1da177e4 2433
228e548e 2434 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2435 return -EINVAL;
2436
47379052
AV
2437 len = nargs[call];
2438 if (len > sizeof(a))
2439 return -EINVAL;
2440
1da177e4 2441 /* copy_from_user should be SMP safe. */
47379052 2442 if (copy_from_user(a, args, len))
1da177e4 2443 return -EFAULT;
3ec3b2fb 2444
f3298dc4 2445 audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb 2446
89bddce5
SH
2447 a0 = a[0];
2448 a1 = a[1];
2449
2450 switch (call) {
2451 case SYS_SOCKET:
2452 err = sys_socket(a0, a1, a[2]);
2453 break;
2454 case SYS_BIND:
2455 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2456 break;
2457 case SYS_CONNECT:
2458 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2459 break;
2460 case SYS_LISTEN:
2461 err = sys_listen(a0, a1);
2462 break;
2463 case SYS_ACCEPT:
de11defe
UD
2464 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2465 (int __user *)a[2], 0);
89bddce5
SH
2466 break;
2467 case SYS_GETSOCKNAME:
2468 err =
2469 sys_getsockname(a0, (struct sockaddr __user *)a1,
2470 (int __user *)a[2]);
2471 break;
2472 case SYS_GETPEERNAME:
2473 err =
2474 sys_getpeername(a0, (struct sockaddr __user *)a1,
2475 (int __user *)a[2]);
2476 break;
2477 case SYS_SOCKETPAIR:
2478 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2479 break;
2480 case SYS_SEND:
2481 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2482 break;
2483 case SYS_SENDTO:
2484 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2485 (struct sockaddr __user *)a[4], a[5]);
2486 break;
2487 case SYS_RECV:
2488 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2489 break;
2490 case SYS_RECVFROM:
2491 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2492 (struct sockaddr __user *)a[4],
2493 (int __user *)a[5]);
2494 break;
2495 case SYS_SHUTDOWN:
2496 err = sys_shutdown(a0, a1);
2497 break;
2498 case SYS_SETSOCKOPT:
2499 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2500 break;
2501 case SYS_GETSOCKOPT:
2502 err =
2503 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2504 (int __user *)a[4]);
2505 break;
2506 case SYS_SENDMSG:
2507 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2508 break;
228e548e
AB
2509 case SYS_SENDMMSG:
2510 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2511 break;
89bddce5
SH
2512 case SYS_RECVMSG:
2513 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2514 break;
a2e27255
ACM
2515 case SYS_RECVMMSG:
2516 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2517 (struct timespec __user *)a[4]);
2518 break;
de11defe
UD
2519 case SYS_ACCEPT4:
2520 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2521 (int __user *)a[2], a[3]);
aaca0bdc 2522 break;
89bddce5
SH
2523 default:
2524 err = -EINVAL;
2525 break;
1da177e4
LT
2526 }
2527 return err;
2528}
2529
89bddce5 2530#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2531
55737fda
SH
2532/**
2533 * sock_register - add a socket protocol handler
2534 * @ops: description of protocol
2535 *
1da177e4
LT
2536 * This function is called by a protocol handler that wants to
2537 * advertise its address family, and have it linked into the
55737fda
SH
2538 * socket interface. The value ops->family coresponds to the
2539 * socket system call protocol family.
1da177e4 2540 */
f0fd27d4 2541int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2542{
2543 int err;
2544
2545 if (ops->family >= NPROTO) {
89bddce5
SH
2546 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2547 NPROTO);
1da177e4
LT
2548 return -ENOBUFS;
2549 }
55737fda
SH
2550
2551 spin_lock(&net_family_lock);
190683a9
ED
2552 if (rcu_dereference_protected(net_families[ops->family],
2553 lockdep_is_held(&net_family_lock)))
55737fda
SH
2554 err = -EEXIST;
2555 else {
cf778b00 2556 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2557 err = 0;
2558 }
55737fda
SH
2559 spin_unlock(&net_family_lock);
2560
89bddce5 2561 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2562 return err;
2563}
c6d409cf 2564EXPORT_SYMBOL(sock_register);
1da177e4 2565
55737fda
SH
2566/**
2567 * sock_unregister - remove a protocol handler
2568 * @family: protocol family to remove
2569 *
1da177e4
LT
2570 * This function is called by a protocol handler that wants to
2571 * remove its address family, and have it unlinked from the
55737fda
SH
2572 * new socket creation.
2573 *
2574 * If protocol handler is a module, then it can use module reference
2575 * counts to protect against new references. If protocol handler is not
2576 * a module then it needs to provide its own protection in
2577 * the ops->create routine.
1da177e4 2578 */
f0fd27d4 2579void sock_unregister(int family)
1da177e4 2580{
f0fd27d4 2581 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2582
55737fda 2583 spin_lock(&net_family_lock);
a9b3cd7f 2584 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2585 spin_unlock(&net_family_lock);
2586
2587 synchronize_rcu();
2588
89bddce5 2589 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4 2590}
c6d409cf 2591EXPORT_SYMBOL(sock_unregister);
1da177e4 2592
77d76ea3 2593static int __init sock_init(void)
1da177e4 2594{
b3e19d92 2595 int err;
2ca794e5
EB
2596 /*
2597 * Initialize the network sysctl infrastructure.
2598 */
2599 err = net_sysctl_init();
2600 if (err)
2601 goto out;
b3e19d92 2602
1da177e4 2603 /*
89bddce5 2604 * Initialize skbuff SLAB cache
1da177e4
LT
2605 */
2606 skb_init();
1da177e4
LT
2607
2608 /*
89bddce5 2609 * Initialize the protocols module.
1da177e4
LT
2610 */
2611
2612 init_inodecache();
b3e19d92
NP
2613
2614 err = register_filesystem(&sock_fs_type);
2615 if (err)
2616 goto out_fs;
1da177e4 2617 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2618 if (IS_ERR(sock_mnt)) {
2619 err = PTR_ERR(sock_mnt);
2620 goto out_mount;
2621 }
77d76ea3
AK
2622
2623 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2624 */
2625
2626#ifdef CONFIG_NETFILTER
2627 netfilter_init();
2628#endif
cbeb321a 2629
c1f19b51
RC
2630#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
2631 skb_timestamping_init();
2632#endif
2633
b3e19d92
NP
2634out:
2635 return err;
2636
2637out_mount:
2638 unregister_filesystem(&sock_fs_type);
2639out_fs:
2640 goto out;
1da177e4
LT
2641}
2642
77d76ea3
AK
2643core_initcall(sock_init); /* early initcall */
2644
1da177e4
LT
2645#ifdef CONFIG_PROC_FS
2646void socket_seq_show(struct seq_file *seq)
2647{
2648 int cpu;
2649 int counter = 0;
2650
6f912042 2651 for_each_possible_cpu(cpu)
89bddce5 2652 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2653
2654 /* It can be negative, by the way. 8) */
2655 if (counter < 0)
2656 counter = 0;
2657
2658 seq_printf(seq, "sockets: used %d\n", counter);
2659}
89bddce5 2660#endif /* CONFIG_PROC_FS */
1da177e4 2661
89bbfc95 2662#ifdef CONFIG_COMPAT
6b96018b 2663static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2664 unsigned int cmd, void __user *up)
7a229387 2665{
7a229387
AB
2666 mm_segment_t old_fs = get_fs();
2667 struct timeval ktv;
2668 int err;
2669
2670 set_fs(KERNEL_DS);
6b96018b 2671 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2672 set_fs(old_fs);
644595f8 2673 if (!err)
ed6fe9d6 2674 err = compat_put_timeval(&ktv, up);
644595f8 2675
7a229387
AB
2676 return err;
2677}
2678
6b96018b 2679static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2680 unsigned int cmd, void __user *up)
7a229387 2681{
7a229387
AB
2682 mm_segment_t old_fs = get_fs();
2683 struct timespec kts;
2684 int err;
2685
2686 set_fs(KERNEL_DS);
6b96018b 2687 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2688 set_fs(old_fs);
644595f8 2689 if (!err)
ed6fe9d6 2690 err = compat_put_timespec(&kts, up);
644595f8 2691
7a229387
AB
2692 return err;
2693}
2694
6b96018b 2695static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2696{
2697 struct ifreq __user *uifr;
2698 int err;
2699
2700 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2701 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2702 return -EFAULT;
2703
6b96018b 2704 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2705 if (err)
2706 return err;
2707
6b96018b 2708 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2709 return -EFAULT;
2710
2711 return 0;
2712}
2713
6b96018b 2714static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2715{
6b96018b 2716 struct compat_ifconf ifc32;
7a229387
AB
2717 struct ifconf ifc;
2718 struct ifconf __user *uifc;
6b96018b 2719 struct compat_ifreq __user *ifr32;
7a229387
AB
2720 struct ifreq __user *ifr;
2721 unsigned int i, j;
2722 int err;
2723
6b96018b 2724 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2725 return -EFAULT;
2726
43da5f2e 2727 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2728 if (ifc32.ifcbuf == 0) {
2729 ifc32.ifc_len = 0;
2730 ifc.ifc_len = 0;
2731 ifc.ifc_req = NULL;
2732 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2733 } else {
c6d409cf
ED
2734 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2735 sizeof(struct ifreq);
7a229387
AB
2736 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2737 ifc.ifc_len = len;
2738 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2739 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2740 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2741 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2742 return -EFAULT;
2743 ifr++;
2744 ifr32++;
2745 }
2746 }
2747 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2748 return -EFAULT;
2749
6b96018b 2750 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2751 if (err)
2752 return err;
2753
2754 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2755 return -EFAULT;
2756
2757 ifr = ifc.ifc_req;
2758 ifr32 = compat_ptr(ifc32.ifcbuf);
2759 for (i = 0, j = 0;
c6d409cf
ED
2760 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2761 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2762 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2763 return -EFAULT;
2764 ifr32++;
2765 ifr++;
2766 }
2767
2768 if (ifc32.ifcbuf == 0) {
2769 /* Translate from 64-bit structure multiple to
2770 * a 32-bit one.
2771 */
2772 i = ifc.ifc_len;
6b96018b 2773 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2774 ifc32.ifc_len = i;
2775 } else {
2776 ifc32.ifc_len = i;
2777 }
6b96018b 2778 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2779 return -EFAULT;
2780
2781 return 0;
2782}
2783
6b96018b 2784static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2785{
3a7da39d
BH
2786 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2787 bool convert_in = false, convert_out = false;
2788 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2789 struct ethtool_rxnfc __user *rxnfc;
7a229387 2790 struct ifreq __user *ifr;
3a7da39d
BH
2791 u32 rule_cnt = 0, actual_rule_cnt;
2792 u32 ethcmd;
7a229387 2793 u32 data;
3a7da39d 2794 int ret;
7a229387 2795
3a7da39d
BH
2796 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2797 return -EFAULT;
7a229387 2798
3a7da39d
BH
2799 compat_rxnfc = compat_ptr(data);
2800
2801 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2802 return -EFAULT;
2803
3a7da39d
BH
2804 /* Most ethtool structures are defined without padding.
2805 * Unfortunately struct ethtool_rxnfc is an exception.
2806 */
2807 switch (ethcmd) {
2808 default:
2809 break;
2810 case ETHTOOL_GRXCLSRLALL:
2811 /* Buffer size is variable */
2812 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2813 return -EFAULT;
2814 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2815 return -ENOMEM;
2816 buf_size += rule_cnt * sizeof(u32);
2817 /* fall through */
2818 case ETHTOOL_GRXRINGS:
2819 case ETHTOOL_GRXCLSRLCNT:
2820 case ETHTOOL_GRXCLSRULE:
55664f32 2821 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2822 convert_out = true;
2823 /* fall through */
2824 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2825 buf_size += sizeof(struct ethtool_rxnfc);
2826 convert_in = true;
2827 break;
2828 }
2829
2830 ifr = compat_alloc_user_space(buf_size);
2831 rxnfc = (void *)ifr + ALIGN(sizeof(struct ifreq), 8);
2832
2833 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2834 return -EFAULT;
2835
3a7da39d
BH
2836 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2837 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2838 return -EFAULT;
2839
3a7da39d 2840 if (convert_in) {
127fe533 2841 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2842 * fs.ring_cookie and at the end of fs, but nowhere else.
2843 */
127fe533
AD
2844 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2845 sizeof(compat_rxnfc->fs.m_ext) !=
2846 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2847 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2848 BUILD_BUG_ON(
2849 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2850 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2851 offsetof(struct ethtool_rxnfc, fs.location) -
2852 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2853
2854 if (copy_in_user(rxnfc, compat_rxnfc,
127fe533 2855 (void *)(&rxnfc->fs.m_ext + 1) -
3a7da39d
BH
2856 (void *)rxnfc) ||
2857 copy_in_user(&rxnfc->fs.ring_cookie,
2858 &compat_rxnfc->fs.ring_cookie,
2859 (void *)(&rxnfc->fs.location + 1) -
2860 (void *)&rxnfc->fs.ring_cookie) ||
2861 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2862 sizeof(rxnfc->rule_cnt)))
2863 return -EFAULT;
2864 }
2865
2866 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2867 if (ret)
2868 return ret;
2869
2870 if (convert_out) {
2871 if (copy_in_user(compat_rxnfc, rxnfc,
127fe533 2872 (const void *)(&rxnfc->fs.m_ext + 1) -
3a7da39d
BH
2873 (const void *)rxnfc) ||
2874 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2875 &rxnfc->fs.ring_cookie,
2876 (const void *)(&rxnfc->fs.location + 1) -
2877 (const void *)&rxnfc->fs.ring_cookie) ||
2878 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2879 sizeof(rxnfc->rule_cnt)))
2880 return -EFAULT;
2881
2882 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2883 /* As an optimisation, we only copy the actual
2884 * number of rules that the underlying
2885 * function returned. Since Mallory might
2886 * change the rule count in user memory, we
2887 * check that it is less than the rule count
2888 * originally given (as the user buffer size),
2889 * which has been range-checked.
2890 */
2891 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2892 return -EFAULT;
2893 if (actual_rule_cnt < rule_cnt)
2894 rule_cnt = actual_rule_cnt;
2895 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2896 &rxnfc->rule_locs[0],
2897 rule_cnt * sizeof(u32)))
2898 return -EFAULT;
2899 }
2900 }
2901
2902 return 0;
7a229387
AB
2903}
2904
7a50a240
AB
2905static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2906{
2907 void __user *uptr;
2908 compat_uptr_t uptr32;
2909 struct ifreq __user *uifr;
2910
c6d409cf 2911 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2912 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2913 return -EFAULT;
2914
2915 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2916 return -EFAULT;
2917
2918 uptr = compat_ptr(uptr32);
2919
2920 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2921 return -EFAULT;
2922
2923 return dev_ioctl(net, SIOCWANDEV, uifr);
2924}
2925
6b96018b
AB
2926static int bond_ioctl(struct net *net, unsigned int cmd,
2927 struct compat_ifreq __user *ifr32)
7a229387
AB
2928{
2929 struct ifreq kifr;
2930 struct ifreq __user *uifr;
7a229387
AB
2931 mm_segment_t old_fs;
2932 int err;
2933 u32 data;
2934 void __user *datap;
2935
2936 switch (cmd) {
2937 case SIOCBONDENSLAVE:
2938 case SIOCBONDRELEASE:
2939 case SIOCBONDSETHWADDR:
2940 case SIOCBONDCHANGEACTIVE:
6b96018b 2941 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2942 return -EFAULT;
2943
2944 old_fs = get_fs();
c6d409cf 2945 set_fs(KERNEL_DS);
c3f52ae6 2946 err = dev_ioctl(net, cmd,
2947 (struct ifreq __user __force *) &kifr);
c6d409cf 2948 set_fs(old_fs);
7a229387
AB
2949
2950 return err;
2951 case SIOCBONDSLAVEINFOQUERY:
2952 case SIOCBONDINFOQUERY:
2953 uifr = compat_alloc_user_space(sizeof(*uifr));
2954 if (copy_in_user(&uifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
2955 return -EFAULT;
2956
2957 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2958 return -EFAULT;
2959
2960 datap = compat_ptr(data);
2961 if (put_user(datap, &uifr->ifr_ifru.ifru_data))
2962 return -EFAULT;
2963
6b96018b 2964 return dev_ioctl(net, cmd, uifr);
7a229387 2965 default:
07d106d0 2966 return -ENOIOCTLCMD;
ccbd6a5a 2967 }
7a229387
AB
2968}
2969
6b96018b
AB
2970static int siocdevprivate_ioctl(struct net *net, unsigned int cmd,
2971 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2972{
2973 struct ifreq __user *u_ifreq64;
7a229387
AB
2974 char tmp_buf[IFNAMSIZ];
2975 void __user *data64;
2976 u32 data32;
2977
2978 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2979 IFNAMSIZ))
2980 return -EFAULT;
2981 if (__get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
2982 return -EFAULT;
2983 data64 = compat_ptr(data32);
2984
2985 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2986
2987 /* Don't check these user accesses, just let that get trapped
2988 * in the ioctl handler instead.
2989 */
2990 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2991 IFNAMSIZ))
2992 return -EFAULT;
2993 if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
2994 return -EFAULT;
2995
6b96018b 2996 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2997}
2998
6b96018b
AB
2999static int dev_ifsioc(struct net *net, struct socket *sock,
3000 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 3001{
a2116ed2 3002 struct ifreq __user *uifr;
7a229387
AB
3003 int err;
3004
a2116ed2
AB
3005 uifr = compat_alloc_user_space(sizeof(*uifr));
3006 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3007 return -EFAULT;
3008
3009 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3010
7a229387
AB
3011 if (!err) {
3012 switch (cmd) {
3013 case SIOCGIFFLAGS:
3014 case SIOCGIFMETRIC:
3015 case SIOCGIFMTU:
3016 case SIOCGIFMEM:
3017 case SIOCGIFHWADDR:
3018 case SIOCGIFINDEX:
3019 case SIOCGIFADDR:
3020 case SIOCGIFBRDADDR:
3021 case SIOCGIFDSTADDR:
3022 case SIOCGIFNETMASK:
fab2532b 3023 case SIOCGIFPFLAGS:
7a229387 3024 case SIOCGIFTXQLEN:
fab2532b
AB
3025 case SIOCGMIIPHY:
3026 case SIOCGMIIREG:
a2116ed2 3027 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
3028 err = -EFAULT;
3029 break;
3030 }
3031 }
3032 return err;
3033}
3034
a2116ed2
AB
3035static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3036 struct compat_ifreq __user *uifr32)
3037{
3038 struct ifreq ifr;
3039 struct compat_ifmap __user *uifmap32;
3040 mm_segment_t old_fs;
3041 int err;
3042
3043 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3044 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3045 err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3046 err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3047 err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3048 err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq);
3049 err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma);
3050 err |= __get_user(ifr.ifr_map.port, &uifmap32->port);
3051 if (err)
3052 return -EFAULT;
3053
3054 old_fs = get_fs();
c6d409cf 3055 set_fs(KERNEL_DS);
c3f52ae6 3056 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 3057 set_fs(old_fs);
a2116ed2
AB
3058
3059 if (cmd == SIOCGIFMAP && !err) {
3060 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3061 err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3062 err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3063 err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3064 err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq);
3065 err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma);
3066 err |= __put_user(ifr.ifr_map.port, &uifmap32->port);
3067 if (err)
3068 err = -EFAULT;
3069 }
3070 return err;
3071}
3072
3073static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uifr32)
3074{
3075 void __user *uptr;
3076 compat_uptr_t uptr32;
3077 struct ifreq __user *uifr;
3078
c6d409cf 3079 uifr = compat_alloc_user_space(sizeof(*uifr));
a2116ed2
AB
3080 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
3081 return -EFAULT;
3082
3083 if (get_user(uptr32, &uifr32->ifr_data))
3084 return -EFAULT;
3085
3086 uptr = compat_ptr(uptr32);
3087
3088 if (put_user(uptr, &uifr->ifr_data))
3089 return -EFAULT;
3090
3091 return dev_ioctl(net, SIOCSHWTSTAMP, uifr);
3092}
3093
7a229387 3094struct rtentry32 {
c6d409cf 3095 u32 rt_pad1;
7a229387
AB
3096 struct sockaddr rt_dst; /* target address */
3097 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3098 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3099 unsigned short rt_flags;
3100 short rt_pad2;
3101 u32 rt_pad3;
3102 unsigned char rt_tos;
3103 unsigned char rt_class;
3104 short rt_pad4;
3105 short rt_metric; /* +1 for binary compatibility! */
7a229387 3106 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3107 u32 rt_mtu; /* per route MTU/Window */
3108 u32 rt_window; /* Window clamping */
7a229387
AB
3109 unsigned short rt_irtt; /* Initial RTT */
3110};
3111
3112struct in6_rtmsg32 {
3113 struct in6_addr rtmsg_dst;
3114 struct in6_addr rtmsg_src;
3115 struct in6_addr rtmsg_gateway;
3116 u32 rtmsg_type;
3117 u16 rtmsg_dst_len;
3118 u16 rtmsg_src_len;
3119 u32 rtmsg_metric;
3120 u32 rtmsg_info;
3121 u32 rtmsg_flags;
3122 s32 rtmsg_ifindex;
3123};
3124
6b96018b
AB
3125static int routing_ioctl(struct net *net, struct socket *sock,
3126 unsigned int cmd, void __user *argp)
7a229387
AB
3127{
3128 int ret;
3129 void *r = NULL;
3130 struct in6_rtmsg r6;
3131 struct rtentry r4;
3132 char devname[16];
3133 u32 rtdev;
3134 mm_segment_t old_fs = get_fs();
3135
6b96018b
AB
3136 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3137 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3138 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3139 3 * sizeof(struct in6_addr));
c6d409cf
ED
3140 ret |= __get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3141 ret |= __get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3142 ret |= __get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3143 ret |= __get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3144 ret |= __get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3145 ret |= __get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3146 ret |= __get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3147
3148 r = (void *) &r6;
3149 } else { /* ipv4 */
6b96018b 3150 struct rtentry32 __user *ur4 = argp;
c6d409cf 3151 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3152 3 * sizeof(struct sockaddr));
c6d409cf
ED
3153 ret |= __get_user(r4.rt_flags, &(ur4->rt_flags));
3154 ret |= __get_user(r4.rt_metric, &(ur4->rt_metric));
3155 ret |= __get_user(r4.rt_mtu, &(ur4->rt_mtu));
3156 ret |= __get_user(r4.rt_window, &(ur4->rt_window));
3157 ret |= __get_user(r4.rt_irtt, &(ur4->rt_irtt));
3158 ret |= __get_user(rtdev, &(ur4->rt_dev));
7a229387 3159 if (rtdev) {
c6d409cf 3160 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3161 r4.rt_dev = (char __user __force *)devname;
3162 devname[15] = 0;
7a229387
AB
3163 } else
3164 r4.rt_dev = NULL;
3165
3166 r = (void *) &r4;
3167 }
3168
3169 if (ret) {
3170 ret = -EFAULT;
3171 goto out;
3172 }
3173
c6d409cf 3174 set_fs(KERNEL_DS);
6b96018b 3175 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3176 set_fs(old_fs);
7a229387
AB
3177
3178out:
7a229387
AB
3179 return ret;
3180}
3181
3182/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3183 * for some operations; this forces use of the newer bridge-utils that
25985edc 3184 * use compatible ioctls
7a229387 3185 */
6b96018b 3186static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3187{
6b96018b 3188 compat_ulong_t tmp;
7a229387 3189
6b96018b 3190 if (get_user(tmp, argp))
7a229387
AB
3191 return -EFAULT;
3192 if (tmp == BRCTL_GET_VERSION)
3193 return BRCTL_VERSION + 1;
3194 return -EINVAL;
3195}
3196
6b96018b
AB
3197static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3198 unsigned int cmd, unsigned long arg)
3199{
3200 void __user *argp = compat_ptr(arg);
3201 struct sock *sk = sock->sk;
3202 struct net *net = sock_net(sk);
7a229387 3203
6b96018b
AB
3204 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
3205 return siocdevprivate_ioctl(net, cmd, argp);
3206
3207 switch (cmd) {
3208 case SIOCSIFBR:
3209 case SIOCGIFBR:
3210 return old_bridge_ioctl(argp);
3211 case SIOCGIFNAME:
3212 return dev_ifname32(net, argp);
3213 case SIOCGIFCONF:
3214 return dev_ifconf(net, argp);
3215 case SIOCETHTOOL:
3216 return ethtool_ioctl(net, argp);
7a50a240
AB
3217 case SIOCWANDEV:
3218 return compat_siocwandev(net, argp);
a2116ed2
AB
3219 case SIOCGIFMAP:
3220 case SIOCSIFMAP:
3221 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3222 case SIOCBONDENSLAVE:
3223 case SIOCBONDRELEASE:
3224 case SIOCBONDSETHWADDR:
3225 case SIOCBONDSLAVEINFOQUERY:
3226 case SIOCBONDINFOQUERY:
3227 case SIOCBONDCHANGEACTIVE:
3228 return bond_ioctl(net, cmd, argp);
3229 case SIOCADDRT:
3230 case SIOCDELRT:
3231 return routing_ioctl(net, sock, cmd, argp);
3232 case SIOCGSTAMP:
3233 return do_siocgstamp(net, sock, cmd, argp);
3234 case SIOCGSTAMPNS:
3235 return do_siocgstampns(net, sock, cmd, argp);
a2116ed2
AB
3236 case SIOCSHWTSTAMP:
3237 return compat_siocshwtstamp(net, argp);
6b96018b
AB
3238
3239 case FIOSETOWN:
3240 case SIOCSPGRP:
3241 case FIOGETOWN:
3242 case SIOCGPGRP:
3243 case SIOCBRADDBR:
3244 case SIOCBRDELBR:
3245 case SIOCGIFVLAN:
3246 case SIOCSIFVLAN:
3247 case SIOCADDDLCI:
3248 case SIOCDELDLCI:
3249 return sock_ioctl(file, cmd, arg);
3250
3251 case SIOCGIFFLAGS:
3252 case SIOCSIFFLAGS:
3253 case SIOCGIFMETRIC:
3254 case SIOCSIFMETRIC:
3255 case SIOCGIFMTU:
3256 case SIOCSIFMTU:
3257 case SIOCGIFMEM:
3258 case SIOCSIFMEM:
3259 case SIOCGIFHWADDR:
3260 case SIOCSIFHWADDR:
3261 case SIOCADDMULTI:
3262 case SIOCDELMULTI:
3263 case SIOCGIFINDEX:
6b96018b
AB
3264 case SIOCGIFADDR:
3265 case SIOCSIFADDR:
3266 case SIOCSIFHWBROADCAST:
6b96018b 3267 case SIOCDIFADDR:
6b96018b
AB
3268 case SIOCGIFBRDADDR:
3269 case SIOCSIFBRDADDR:
3270 case SIOCGIFDSTADDR:
3271 case SIOCSIFDSTADDR:
3272 case SIOCGIFNETMASK:
3273 case SIOCSIFNETMASK:
3274 case SIOCSIFPFLAGS:
3275 case SIOCGIFPFLAGS:
3276 case SIOCGIFTXQLEN:
3277 case SIOCSIFTXQLEN:
3278 case SIOCBRADDIF:
3279 case SIOCBRDELIF:
9177efd3
AB
3280 case SIOCSIFNAME:
3281 case SIOCGMIIPHY:
3282 case SIOCGMIIREG:
3283 case SIOCSMIIREG:
6b96018b 3284 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3285
6b96018b
AB
3286 case SIOCSARP:
3287 case SIOCGARP:
3288 case SIOCDARP:
6b96018b 3289 case SIOCATMARK:
9177efd3
AB
3290 return sock_do_ioctl(net, sock, cmd, arg);
3291 }
3292
6b96018b
AB
3293 return -ENOIOCTLCMD;
3294}
7a229387 3295
95c96174 3296static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3297 unsigned long arg)
89bbfc95
SP
3298{
3299 struct socket *sock = file->private_data;
3300 int ret = -ENOIOCTLCMD;
87de87d5
DM
3301 struct sock *sk;
3302 struct net *net;
3303
3304 sk = sock->sk;
3305 net = sock_net(sk);
89bbfc95
SP
3306
3307 if (sock->ops->compat_ioctl)
3308 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3309
87de87d5
DM
3310 if (ret == -ENOIOCTLCMD &&
3311 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3312 ret = compat_wext_handle_ioctl(net, cmd, arg);
3313
6b96018b
AB
3314 if (ret == -ENOIOCTLCMD)
3315 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3316
89bbfc95
SP
3317 return ret;
3318}
3319#endif
3320
ac5a488e
SS
3321int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3322{
3323 return sock->ops->bind(sock, addr, addrlen);
3324}
c6d409cf 3325EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3326
3327int kernel_listen(struct socket *sock, int backlog)
3328{
3329 return sock->ops->listen(sock, backlog);
3330}
c6d409cf 3331EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3332
3333int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3334{
3335 struct sock *sk = sock->sk;
3336 int err;
3337
3338 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3339 newsock);
3340 if (err < 0)
3341 goto done;
3342
3343 err = sock->ops->accept(sock, *newsock, flags);
3344 if (err < 0) {
3345 sock_release(*newsock);
fa8705b0 3346 *newsock = NULL;
ac5a488e
SS
3347 goto done;
3348 }
3349
3350 (*newsock)->ops = sock->ops;
1b08534e 3351 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3352
3353done:
3354 return err;
3355}
c6d409cf 3356EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3357
3358int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3359 int flags)
ac5a488e
SS
3360{
3361 return sock->ops->connect(sock, addr, addrlen, flags);
3362}
c6d409cf 3363EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3364
3365int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3366 int *addrlen)
3367{
3368 return sock->ops->getname(sock, addr, addrlen, 0);
3369}
c6d409cf 3370EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3371
3372int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3373 int *addrlen)
3374{
3375 return sock->ops->getname(sock, addr, addrlen, 1);
3376}
c6d409cf 3377EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3378
3379int kernel_getsockopt(struct socket *sock, int level, int optname,
3380 char *optval, int *optlen)
3381{
3382 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3383 char __user *uoptval;
3384 int __user *uoptlen;
ac5a488e
SS
3385 int err;
3386
fb8621bb
NK
3387 uoptval = (char __user __force *) optval;
3388 uoptlen = (int __user __force *) optlen;
3389
ac5a488e
SS
3390 set_fs(KERNEL_DS);
3391 if (level == SOL_SOCKET)
fb8621bb 3392 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3393 else
fb8621bb
NK
3394 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3395 uoptlen);
ac5a488e
SS
3396 set_fs(oldfs);
3397 return err;
3398}
c6d409cf 3399EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3400
3401int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3402 char *optval, unsigned int optlen)
ac5a488e
SS
3403{
3404 mm_segment_t oldfs = get_fs();
fb8621bb 3405 char __user *uoptval;
ac5a488e
SS
3406 int err;
3407
fb8621bb
NK
3408 uoptval = (char __user __force *) optval;
3409
ac5a488e
SS
3410 set_fs(KERNEL_DS);
3411 if (level == SOL_SOCKET)
fb8621bb 3412 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3413 else
fb8621bb 3414 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3415 optlen);
3416 set_fs(oldfs);
3417 return err;
3418}
c6d409cf 3419EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3420
3421int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3422 size_t size, int flags)
3423{
f8451725
HX
3424 sock_update_classid(sock->sk);
3425
ac5a488e
SS
3426 if (sock->ops->sendpage)
3427 return sock->ops->sendpage(sock, page, offset, size, flags);
3428
3429 return sock_no_sendpage(sock, page, offset, size, flags);
3430}
c6d409cf 3431EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3432
3433int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3434{
3435 mm_segment_t oldfs = get_fs();
3436 int err;
3437
3438 set_fs(KERNEL_DS);
3439 err = sock->ops->ioctl(sock, cmd, arg);
3440 set_fs(oldfs);
3441
3442 return err;
3443}
c6d409cf 3444EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3445
91cf45f0
TM
3446int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3447{
3448 return sock->ops->shutdown(sock, how);
3449}
91cf45f0 3450EXPORT_SYMBOL(kernel_sock_shutdown);