Merge remote branch 'origin' into for-next
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4
LT
72#include <linux/wanrouter.h>
73#include <linux/if_bridge.h>
20380731
ACM
74#include <linux/if_frad.h>
75#include <linux/if_vlan.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
1da177e4
LT
91
92#include <asm/uaccess.h>
93#include <asm/unistd.h>
94
95#include <net/compat.h>
87de87d5 96#include <net/wext.h>
f8451725 97#include <net/cls_cgroup.h>
1da177e4
LT
98
99#include <net/sock.h>
100#include <linux/netfilter.h>
101
6b96018b
AB
102#include <linux/if_tun.h>
103#include <linux/ipv6_route.h>
104#include <linux/route.h>
6b96018b
AB
105#include <linux/sockios.h>
106#include <linux/atalk.h>
107
1da177e4 108static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
109static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
110 unsigned long nr_segs, loff_t pos);
111static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
112 unsigned long nr_segs, loff_t pos);
89bddce5 113static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
114
115static int sock_close(struct inode *inode, struct file *file);
116static unsigned int sock_poll(struct file *file,
117 struct poll_table_struct *wait);
89bddce5 118static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
119#ifdef CONFIG_COMPAT
120static long compat_sock_ioctl(struct file *file,
89bddce5 121 unsigned int cmd, unsigned long arg);
89bbfc95 122#endif
1da177e4 123static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
124static ssize_t sock_sendpage(struct file *file, struct page *page,
125 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 126static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 127 struct pipe_inode_info *pipe, size_t len,
9c55e01c 128 unsigned int flags);
1da177e4 129
1da177e4
LT
130/*
131 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
132 * in the operation structures but are done directly via the socketcall() multiplexor.
133 */
134
da7071d7 135static const struct file_operations socket_file_ops = {
1da177e4
LT
136 .owner = THIS_MODULE,
137 .llseek = no_llseek,
138 .aio_read = sock_aio_read,
139 .aio_write = sock_aio_write,
140 .poll = sock_poll,
141 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
142#ifdef CONFIG_COMPAT
143 .compat_ioctl = compat_sock_ioctl,
144#endif
1da177e4
LT
145 .mmap = sock_mmap,
146 .open = sock_no_open, /* special open code to disallow open via /proc */
147 .release = sock_close,
148 .fasync = sock_fasync,
5274f052
JA
149 .sendpage = sock_sendpage,
150 .splice_write = generic_splice_sendpage,
9c55e01c 151 .splice_read = sock_splice_read,
1da177e4
LT
152};
153
154/*
155 * The protocol list. Each protocol is registered in here.
156 */
157
1da177e4 158static DEFINE_SPINLOCK(net_family_lock);
190683a9 159static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 160
1da177e4
LT
161/*
162 * Statistics counters of the socket lists
163 */
164
c6d409cf 165static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
166
167/*
89bddce5
SH
168 * Support routines.
169 * Move socket addresses back and forth across the kernel/user
170 * divide and look after the messy bits.
1da177e4
LT
171 */
172
1da177e4
LT
173/**
174 * move_addr_to_kernel - copy a socket address into kernel space
175 * @uaddr: Address in user space
176 * @kaddr: Address in kernel space
177 * @ulen: Length in user space
178 *
179 * The address is copied into kernel space. If the provided address is
180 * too long an error code of -EINVAL is returned. If the copy gives
181 * invalid addresses -EFAULT is returned. On a success 0 is returned.
182 */
183
43db362d 184int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 185{
230b1839 186 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 187 return -EINVAL;
89bddce5 188 if (ulen == 0)
1da177e4 189 return 0;
89bddce5 190 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 191 return -EFAULT;
3ec3b2fb 192 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
193}
194
195/**
196 * move_addr_to_user - copy an address to user space
197 * @kaddr: kernel space address
198 * @klen: length of address in kernel
199 * @uaddr: user space address
200 * @ulen: pointer to user length field
201 *
202 * The value pointed to by ulen on entry is the buffer length available.
203 * This is overwritten with the buffer space used. -EINVAL is returned
204 * if an overlong buffer is specified or a negative buffer size. -EFAULT
205 * is returned if either the buffer or the length field are not
206 * accessible.
207 * After copying the data up to the limit the user specifies, the true
208 * length of the data is written over the length limit the user
209 * specified. Zero is returned for a success.
210 */
89bddce5 211
43db362d 212static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 213 void __user *uaddr, int __user *ulen)
1da177e4
LT
214{
215 int err;
216 int len;
217
89bddce5
SH
218 err = get_user(len, ulen);
219 if (err)
1da177e4 220 return err;
89bddce5
SH
221 if (len > klen)
222 len = klen;
230b1839 223 if (len < 0 || len > sizeof(struct sockaddr_storage))
1da177e4 224 return -EINVAL;
89bddce5 225 if (len) {
d6fe3945
SG
226 if (audit_sockaddr(klen, kaddr))
227 return -ENOMEM;
89bddce5 228 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
229 return -EFAULT;
230 }
231 /*
89bddce5
SH
232 * "fromlen shall refer to the value before truncation.."
233 * 1003.1g
1da177e4
LT
234 */
235 return __put_user(klen, ulen);
236}
237
e18b890b 238static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
239
240static struct inode *sock_alloc_inode(struct super_block *sb)
241{
242 struct socket_alloc *ei;
eaefd110 243 struct socket_wq *wq;
89bddce5 244
e94b1766 245 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
246 if (!ei)
247 return NULL;
eaefd110
ED
248 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
249 if (!wq) {
43815482
ED
250 kmem_cache_free(sock_inode_cachep, ei);
251 return NULL;
252 }
eaefd110
ED
253 init_waitqueue_head(&wq->wait);
254 wq->fasync_list = NULL;
255 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 256
1da177e4
LT
257 ei->socket.state = SS_UNCONNECTED;
258 ei->socket.flags = 0;
259 ei->socket.ops = NULL;
260 ei->socket.sk = NULL;
261 ei->socket.file = NULL;
1da177e4
LT
262
263 return &ei->vfs_inode;
264}
265
266static void sock_destroy_inode(struct inode *inode)
267{
43815482 268 struct socket_alloc *ei;
eaefd110 269 struct socket_wq *wq;
43815482
ED
270
271 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 272 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 273 kfree_rcu(wq, rcu);
43815482 274 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
275}
276
51cc5068 277static void init_once(void *foo)
1da177e4 278{
89bddce5 279 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 280
a35afb83 281 inode_init_once(&ei->vfs_inode);
1da177e4 282}
89bddce5 283
1da177e4
LT
284static int init_inodecache(void)
285{
286 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
287 sizeof(struct socket_alloc),
288 0,
289 (SLAB_HWCACHE_ALIGN |
290 SLAB_RECLAIM_ACCOUNT |
291 SLAB_MEM_SPREAD),
20c2df83 292 init_once);
1da177e4
LT
293 if (sock_inode_cachep == NULL)
294 return -ENOMEM;
295 return 0;
296}
297
b87221de 298static const struct super_operations sockfs_ops = {
c6d409cf
ED
299 .alloc_inode = sock_alloc_inode,
300 .destroy_inode = sock_destroy_inode,
301 .statfs = simple_statfs,
1da177e4
LT
302};
303
c23fbb6b
ED
304/*
305 * sockfs_dname() is called from d_path().
306 */
307static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
308{
309 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
310 dentry->d_inode->i_ino);
311}
312
3ba13d17 313static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 314 .d_dname = sockfs_dname,
1da177e4
LT
315};
316
c74a1cbb
AV
317static struct dentry *sockfs_mount(struct file_system_type *fs_type,
318 int flags, const char *dev_name, void *data)
319{
320 return mount_pseudo(fs_type, "socket:", &sockfs_ops,
321 &sockfs_dentry_operations, SOCKFS_MAGIC);
322}
323
324static struct vfsmount *sock_mnt __read_mostly;
325
326static struct file_system_type sock_fs_type = {
327 .name = "sockfs",
328 .mount = sockfs_mount,
329 .kill_sb = kill_anon_super,
330};
331
1da177e4
LT
332/*
333 * Obtains the first available file descriptor and sets it up for use.
334 *
39d8c1b6
DM
335 * These functions create file structures and maps them to fd space
336 * of the current process. On success it returns file descriptor
1da177e4
LT
337 * and file struct implicitly stored in sock->file.
338 * Note that another thread may close file descriptor before we return
339 * from this function. We use the fact that now we do not refer
340 * to socket after mapping. If one day we will need it, this
341 * function will increment ref. count on file by 1.
342 *
343 * In any case returned fd MAY BE not valid!
344 * This race condition is unavoidable
345 * with shared fd spaces, we cannot solve it inside kernel,
346 * but we take care of internal coherence yet.
347 */
348
7cbe66b6 349static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
1da177e4 350{
7cbe66b6 351 struct qstr name = { .name = "" };
2c48b9c4 352 struct path path;
7cbe66b6 353 struct file *file;
1da177e4 354 int fd;
1da177e4 355
a677a039 356 fd = get_unused_fd_flags(flags);
7cbe66b6
AV
357 if (unlikely(fd < 0))
358 return fd;
1da177e4 359
4b936885 360 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
2c48b9c4 361 if (unlikely(!path.dentry)) {
7cbe66b6 362 put_unused_fd(fd);
39d8c1b6 363 return -ENOMEM;
7cbe66b6 364 }
2c48b9c4 365 path.mnt = mntget(sock_mnt);
39d8c1b6 366
2c48b9c4 367 d_instantiate(path.dentry, SOCK_INODE(sock));
cc3808f8 368 SOCK_INODE(sock)->i_fop = &socket_file_ops;
39d8c1b6 369
2c48b9c4 370 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 371 &socket_file_ops);
cc3808f8
AV
372 if (unlikely(!file)) {
373 /* drop dentry, keep inode */
7de9c6ee 374 ihold(path.dentry->d_inode);
2c48b9c4 375 path_put(&path);
cc3808f8
AV
376 put_unused_fd(fd);
377 return -ENFILE;
378 }
379
380 sock->file = file;
77d27200 381 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6
DM
382 file->f_pos = 0;
383 file->private_data = sock;
1da177e4 384
7cbe66b6
AV
385 *f = file;
386 return fd;
39d8c1b6
DM
387}
388
a677a039 389int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
390{
391 struct file *newfile;
7cbe66b6 392 int fd = sock_alloc_file(sock, &newfile, flags);
39d8c1b6 393
7cbe66b6 394 if (likely(fd >= 0))
39d8c1b6 395 fd_install(fd, newfile);
7cbe66b6 396
1da177e4
LT
397 return fd;
398}
c6d409cf 399EXPORT_SYMBOL(sock_map_fd);
1da177e4 400
406a3c63 401struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 402{
6cb153ca
BL
403 if (file->f_op == &socket_file_ops)
404 return file->private_data; /* set in sock_map_fd */
405
23bb80d2
ED
406 *err = -ENOTSOCK;
407 return NULL;
6cb153ca 408}
406a3c63 409EXPORT_SYMBOL(sock_from_file);
6cb153ca 410
1da177e4 411/**
c6d409cf 412 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
413 * @fd: file handle
414 * @err: pointer to an error code return
415 *
416 * The file handle passed in is locked and the socket it is bound
417 * too is returned. If an error occurs the err pointer is overwritten
418 * with a negative errno code and NULL is returned. The function checks
419 * for both invalid handles and passing a handle which is not a socket.
420 *
421 * On a success the socket object pointer is returned.
422 */
423
424struct socket *sockfd_lookup(int fd, int *err)
425{
426 struct file *file;
1da177e4
LT
427 struct socket *sock;
428
89bddce5
SH
429 file = fget(fd);
430 if (!file) {
1da177e4
LT
431 *err = -EBADF;
432 return NULL;
433 }
89bddce5 434
6cb153ca
BL
435 sock = sock_from_file(file, err);
436 if (!sock)
1da177e4 437 fput(file);
6cb153ca
BL
438 return sock;
439}
c6d409cf 440EXPORT_SYMBOL(sockfd_lookup);
1da177e4 441
6cb153ca
BL
442static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
443{
444 struct file *file;
445 struct socket *sock;
446
3672558c 447 *err = -EBADF;
6cb153ca
BL
448 file = fget_light(fd, fput_needed);
449 if (file) {
450 sock = sock_from_file(file, err);
451 if (sock)
452 return sock;
453 fput_light(file, *fput_needed);
1da177e4 454 }
6cb153ca 455 return NULL;
1da177e4
LT
456}
457
458/**
459 * sock_alloc - allocate a socket
89bddce5 460 *
1da177e4
LT
461 * Allocate a new inode and socket object. The two are bound together
462 * and initialised. The socket is then returned. If we are out of inodes
463 * NULL is returned.
464 */
465
466static struct socket *sock_alloc(void)
467{
89bddce5
SH
468 struct inode *inode;
469 struct socket *sock;
1da177e4 470
a209dfc7 471 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
472 if (!inode)
473 return NULL;
474
475 sock = SOCKET_I(inode);
476
29a020d3 477 kmemcheck_annotate_bitfield(sock, type);
85fe4025 478 inode->i_ino = get_next_ino();
89bddce5 479 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
480 inode->i_uid = current_fsuid();
481 inode->i_gid = current_fsgid();
1da177e4 482
19e8d69c 483 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
484 return sock;
485}
486
487/*
488 * In theory you can't get an open on this inode, but /proc provides
489 * a back door. Remember to keep it shut otherwise you'll let the
490 * creepy crawlies in.
491 */
89bddce5 492
1da177e4
LT
493static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
494{
495 return -ENXIO;
496}
497
4b6f5d20 498const struct file_operations bad_sock_fops = {
1da177e4
LT
499 .owner = THIS_MODULE,
500 .open = sock_no_open,
6038f373 501 .llseek = noop_llseek,
1da177e4
LT
502};
503
504/**
505 * sock_release - close a socket
506 * @sock: socket to close
507 *
508 * The socket is released from the protocol stack if it has a release
509 * callback, and the inode is then released if the socket is bound to
89bddce5 510 * an inode not a file.
1da177e4 511 */
89bddce5 512
1da177e4
LT
513void sock_release(struct socket *sock)
514{
515 if (sock->ops) {
516 struct module *owner = sock->ops->owner;
517
518 sock->ops->release(sock);
519 sock->ops = NULL;
520 module_put(owner);
521 }
522
eaefd110 523 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
1da177e4
LT
524 printk(KERN_ERR "sock_release: fasync list not empty!\n");
525
b09e786b
MP
526 if (test_bit(SOCK_EXTERNALLY_ALLOCATED, &sock->flags))
527 return;
528
19e8d69c 529 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
530 if (!sock->file) {
531 iput(SOCK_INODE(sock));
532 return;
533 }
89bddce5 534 sock->file = NULL;
1da177e4 535}
c6d409cf 536EXPORT_SYMBOL(sock_release);
1da177e4 537
2244d07b 538int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
20d49473 539{
2244d07b 540 *tx_flags = 0;
20d49473 541 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
2244d07b 542 *tx_flags |= SKBTX_HW_TSTAMP;
20d49473 543 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
2244d07b 544 *tx_flags |= SKBTX_SW_TSTAMP;
6e3e939f
JB
545 if (sock_flag(sk, SOCK_WIFI_STATUS))
546 *tx_flags |= SKBTX_WIFI_STATUS;
20d49473
PO
547 return 0;
548}
549EXPORT_SYMBOL(sock_tx_timestamp);
550
228e548e
AB
551static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
552 struct msghdr *msg, size_t size)
1da177e4
LT
553{
554 struct sock_iocb *si = kiocb_to_siocb(iocb);
1da177e4 555
f8451725
HX
556 sock_update_classid(sock->sk);
557
1da177e4
LT
558 si->sock = sock;
559 si->scm = NULL;
560 si->msg = msg;
561 si->size = size;
562
1da177e4
LT
563 return sock->ops->sendmsg(iocb, sock, msg, size);
564}
565
228e548e
AB
566static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
567 struct msghdr *msg, size_t size)
568{
569 int err = security_socket_sendmsg(sock, msg, size);
570
571 return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
572}
573
1da177e4
LT
574int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
575{
576 struct kiocb iocb;
577 struct sock_iocb siocb;
578 int ret;
579
580 init_sync_kiocb(&iocb, NULL);
581 iocb.private = &siocb;
582 ret = __sock_sendmsg(&iocb, sock, msg, size);
583 if (-EIOCBQUEUED == ret)
584 ret = wait_on_sync_kiocb(&iocb);
585 return ret;
586}
c6d409cf 587EXPORT_SYMBOL(sock_sendmsg);
1da177e4 588
894dc24c 589static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
228e548e
AB
590{
591 struct kiocb iocb;
592 struct sock_iocb siocb;
593 int ret;
594
595 init_sync_kiocb(&iocb, NULL);
596 iocb.private = &siocb;
597 ret = __sock_sendmsg_nosec(&iocb, sock, msg, size);
598 if (-EIOCBQUEUED == ret)
599 ret = wait_on_sync_kiocb(&iocb);
600 return ret;
601}
602
1da177e4
LT
603int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
604 struct kvec *vec, size_t num, size_t size)
605{
606 mm_segment_t oldfs = get_fs();
607 int result;
608
609 set_fs(KERNEL_DS);
610 /*
611 * the following is safe, since for compiler definitions of kvec and
612 * iovec are identical, yielding the same in-core layout and alignment
613 */
89bddce5 614 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
615 msg->msg_iovlen = num;
616 result = sock_sendmsg(sock, msg, size);
617 set_fs(oldfs);
618 return result;
619}
c6d409cf 620EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 621
20d49473
PO
622static int ktime2ts(ktime_t kt, struct timespec *ts)
623{
624 if (kt.tv64) {
625 *ts = ktime_to_timespec(kt);
626 return 1;
627 } else {
628 return 0;
629 }
630}
631
92f37fd2
ED
632/*
633 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
634 */
635void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
636 struct sk_buff *skb)
637{
20d49473
PO
638 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
639 struct timespec ts[3];
640 int empty = 1;
641 struct skb_shared_hwtstamps *shhwtstamps =
642 skb_hwtstamps(skb);
643
644 /* Race occurred between timestamp enabling and packet
645 receiving. Fill in the current time for now. */
646 if (need_software_tstamp && skb->tstamp.tv64 == 0)
647 __net_timestamp(skb);
648
649 if (need_software_tstamp) {
650 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
651 struct timeval tv;
652 skb_get_timestamp(skb, &tv);
653 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
654 sizeof(tv), &tv);
655 } else {
842509b8 656 skb_get_timestampns(skb, &ts[0]);
20d49473 657 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
842509b8 658 sizeof(ts[0]), &ts[0]);
20d49473
PO
659 }
660 }
661
662
663 memset(ts, 0, sizeof(ts));
664 if (skb->tstamp.tv64 &&
665 sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) {
666 skb_get_timestampns(skb, ts + 0);
667 empty = 0;
668 }
669 if (shhwtstamps) {
670 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
671 ktime2ts(shhwtstamps->syststamp, ts + 1))
672 empty = 0;
673 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
674 ktime2ts(shhwtstamps->hwtstamp, ts + 2))
675 empty = 0;
92f37fd2 676 }
20d49473
PO
677 if (!empty)
678 put_cmsg(msg, SOL_SOCKET,
679 SCM_TIMESTAMPING, sizeof(ts), &ts);
92f37fd2 680}
7c81fd8b
ACM
681EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
682
6e3e939f
JB
683void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
684 struct sk_buff *skb)
685{
686 int ack;
687
688 if (!sock_flag(sk, SOCK_WIFI_STATUS))
689 return;
690 if (!skb->wifi_acked_valid)
691 return;
692
693 ack = skb->wifi_acked;
694
695 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
696}
697EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
698
11165f14 699static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
700 struct sk_buff *skb)
3b885787
NH
701{
702 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
703 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
704 sizeof(__u32), &skb->dropcount);
705}
706
767dd033 707void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
708 struct sk_buff *skb)
709{
710 sock_recv_timestamp(msg, sk, skb);
711 sock_recv_drops(msg, sk, skb);
712}
767dd033 713EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 714
a2e27255
ACM
715static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
716 struct msghdr *msg, size_t size, int flags)
1da177e4 717{
1da177e4
LT
718 struct sock_iocb *si = kiocb_to_siocb(iocb);
719
f8451725
HX
720 sock_update_classid(sock->sk);
721
1da177e4
LT
722 si->sock = sock;
723 si->scm = NULL;
724 si->msg = msg;
725 si->size = size;
726 si->flags = flags;
727
1da177e4
LT
728 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
729}
730
a2e27255
ACM
731static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
732 struct msghdr *msg, size_t size, int flags)
733{
734 int err = security_socket_recvmsg(sock, msg, size, flags);
735
736 return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
737}
738
89bddce5 739int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
740 size_t size, int flags)
741{
742 struct kiocb iocb;
743 struct sock_iocb siocb;
744 int ret;
745
89bddce5 746 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
747 iocb.private = &siocb;
748 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
749 if (-EIOCBQUEUED == ret)
750 ret = wait_on_sync_kiocb(&iocb);
751 return ret;
752}
c6d409cf 753EXPORT_SYMBOL(sock_recvmsg);
1da177e4 754
a2e27255
ACM
755static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
756 size_t size, int flags)
757{
758 struct kiocb iocb;
759 struct sock_iocb siocb;
760 int ret;
761
762 init_sync_kiocb(&iocb, NULL);
763 iocb.private = &siocb;
764 ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
765 if (-EIOCBQUEUED == ret)
766 ret = wait_on_sync_kiocb(&iocb);
767 return ret;
768}
769
c1249c0a
ML
770/**
771 * kernel_recvmsg - Receive a message from a socket (kernel space)
772 * @sock: The socket to receive the message from
773 * @msg: Received message
774 * @vec: Input s/g array for message data
775 * @num: Size of input s/g array
776 * @size: Number of bytes to read
777 * @flags: Message flags (MSG_DONTWAIT, etc...)
778 *
779 * On return the msg structure contains the scatter/gather array passed in the
780 * vec argument. The array is modified so that it consists of the unfilled
781 * portion of the original array.
782 *
783 * The returned value is the total number of bytes received, or an error.
784 */
89bddce5
SH
785int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
786 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
787{
788 mm_segment_t oldfs = get_fs();
789 int result;
790
791 set_fs(KERNEL_DS);
792 /*
793 * the following is safe, since for compiler definitions of kvec and
794 * iovec are identical, yielding the same in-core layout and alignment
795 */
89bddce5 796 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
797 result = sock_recvmsg(sock, msg, size, flags);
798 set_fs(oldfs);
799 return result;
800}
c6d409cf 801EXPORT_SYMBOL(kernel_recvmsg);
1da177e4
LT
802
803static void sock_aio_dtor(struct kiocb *iocb)
804{
805 kfree(iocb->private);
806}
807
ce1d4d3e
CH
808static ssize_t sock_sendpage(struct file *file, struct page *page,
809 int offset, size_t size, loff_t *ppos, int more)
1da177e4 810{
1da177e4
LT
811 struct socket *sock;
812 int flags;
813
ce1d4d3e
CH
814 sock = file->private_data;
815
35f9c09f
ED
816 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
817 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
818 flags |= more;
ce1d4d3e 819
e6949583 820 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 821}
1da177e4 822
9c55e01c 823static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 824 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
825 unsigned int flags)
826{
827 struct socket *sock = file->private_data;
828
997b37da
RDC
829 if (unlikely(!sock->ops->splice_read))
830 return -EINVAL;
831
f8451725
HX
832 sock_update_classid(sock->sk);
833
9c55e01c
JA
834 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
835}
836
ce1d4d3e 837static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 838 struct sock_iocb *siocb)
ce1d4d3e
CH
839{
840 if (!is_sync_kiocb(iocb)) {
841 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
842 if (!siocb)
843 return NULL;
1da177e4
LT
844 iocb->ki_dtor = sock_aio_dtor;
845 }
1da177e4 846
ce1d4d3e 847 siocb->kiocb = iocb;
ce1d4d3e
CH
848 iocb->private = siocb;
849 return siocb;
1da177e4
LT
850}
851
ce1d4d3e 852static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
853 struct file *file, const struct iovec *iov,
854 unsigned long nr_segs)
ce1d4d3e
CH
855{
856 struct socket *sock = file->private_data;
857 size_t size = 0;
858 int i;
1da177e4 859
89bddce5
SH
860 for (i = 0; i < nr_segs; i++)
861 size += iov[i].iov_len;
1da177e4 862
ce1d4d3e
CH
863 msg->msg_name = NULL;
864 msg->msg_namelen = 0;
865 msg->msg_control = NULL;
866 msg->msg_controllen = 0;
89bddce5 867 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
868 msg->msg_iovlen = nr_segs;
869 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
870
871 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
872}
873
027445c3
BP
874static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
875 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
876{
877 struct sock_iocb siocb, *x;
878
1da177e4
LT
879 if (pos != 0)
880 return -ESPIPE;
027445c3
BP
881
882 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
883 return 0;
884
027445c3
BP
885
886 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
887 if (!x)
888 return -ENOMEM;
027445c3 889 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
890}
891
ce1d4d3e 892static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
893 struct file *file, const struct iovec *iov,
894 unsigned long nr_segs)
1da177e4 895{
ce1d4d3e
CH
896 struct socket *sock = file->private_data;
897 size_t size = 0;
898 int i;
1da177e4 899
89bddce5
SH
900 for (i = 0; i < nr_segs; i++)
901 size += iov[i].iov_len;
1da177e4 902
ce1d4d3e
CH
903 msg->msg_name = NULL;
904 msg->msg_namelen = 0;
905 msg->msg_control = NULL;
906 msg->msg_controllen = 0;
89bddce5 907 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
908 msg->msg_iovlen = nr_segs;
909 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
910 if (sock->type == SOCK_SEQPACKET)
911 msg->msg_flags |= MSG_EOR;
1da177e4 912
ce1d4d3e 913 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
914}
915
027445c3
BP
916static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
917 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
918{
919 struct sock_iocb siocb, *x;
1da177e4 920
ce1d4d3e
CH
921 if (pos != 0)
922 return -ESPIPE;
027445c3 923
027445c3 924 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
925 if (!x)
926 return -ENOMEM;
1da177e4 927
027445c3 928 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
929}
930
1da177e4
LT
931/*
932 * Atomic setting of ioctl hooks to avoid race
933 * with module unload.
934 */
935
4a3e2f71 936static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 937static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 938
881d966b 939void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 940{
4a3e2f71 941 mutex_lock(&br_ioctl_mutex);
1da177e4 942 br_ioctl_hook = hook;
4a3e2f71 943 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
944}
945EXPORT_SYMBOL(brioctl_set);
946
4a3e2f71 947static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 948static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 949
881d966b 950void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 951{
4a3e2f71 952 mutex_lock(&vlan_ioctl_mutex);
1da177e4 953 vlan_ioctl_hook = hook;
4a3e2f71 954 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
955}
956EXPORT_SYMBOL(vlan_ioctl_set);
957
4a3e2f71 958static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 959static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 960
89bddce5 961void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 962{
4a3e2f71 963 mutex_lock(&dlci_ioctl_mutex);
1da177e4 964 dlci_ioctl_hook = hook;
4a3e2f71 965 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
966}
967EXPORT_SYMBOL(dlci_ioctl_set);
968
6b96018b
AB
969static long sock_do_ioctl(struct net *net, struct socket *sock,
970 unsigned int cmd, unsigned long arg)
971{
972 int err;
973 void __user *argp = (void __user *)arg;
974
975 err = sock->ops->ioctl(sock, cmd, arg);
976
977 /*
978 * If this ioctl is unknown try to hand it down
979 * to the NIC driver.
980 */
981 if (err == -ENOIOCTLCMD)
982 err = dev_ioctl(net, cmd, argp);
983
984 return err;
985}
986
1da177e4
LT
987/*
988 * With an ioctl, arg may well be a user mode pointer, but we don't know
989 * what to do with it - that's up to the protocol still.
990 */
991
992static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
993{
994 struct socket *sock;
881d966b 995 struct sock *sk;
1da177e4
LT
996 void __user *argp = (void __user *)arg;
997 int pid, err;
881d966b 998 struct net *net;
1da177e4 999
b69aee04 1000 sock = file->private_data;
881d966b 1001 sk = sock->sk;
3b1e0a65 1002 net = sock_net(sk);
1da177e4 1003 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 1004 err = dev_ioctl(net, cmd, argp);
1da177e4 1005 } else
3d23e349 1006#ifdef CONFIG_WEXT_CORE
1da177e4 1007 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 1008 err = dev_ioctl(net, cmd, argp);
1da177e4 1009 } else
3d23e349 1010#endif
89bddce5 1011 switch (cmd) {
1da177e4
LT
1012 case FIOSETOWN:
1013 case SIOCSPGRP:
1014 err = -EFAULT;
1015 if (get_user(pid, (int __user *)argp))
1016 break;
1017 err = f_setown(sock->file, pid, 1);
1018 break;
1019 case FIOGETOWN:
1020 case SIOCGPGRP:
609d7fa9 1021 err = put_user(f_getown(sock->file),
89bddce5 1022 (int __user *)argp);
1da177e4
LT
1023 break;
1024 case SIOCGIFBR:
1025 case SIOCSIFBR:
1026 case SIOCBRADDBR:
1027 case SIOCBRDELBR:
1028 err = -ENOPKG;
1029 if (!br_ioctl_hook)
1030 request_module("bridge");
1031
4a3e2f71 1032 mutex_lock(&br_ioctl_mutex);
89bddce5 1033 if (br_ioctl_hook)
881d966b 1034 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1035 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1036 break;
1037 case SIOCGIFVLAN:
1038 case SIOCSIFVLAN:
1039 err = -ENOPKG;
1040 if (!vlan_ioctl_hook)
1041 request_module("8021q");
1042
4a3e2f71 1043 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1044 if (vlan_ioctl_hook)
881d966b 1045 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1046 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1047 break;
1da177e4
LT
1048 case SIOCADDDLCI:
1049 case SIOCDELDLCI:
1050 err = -ENOPKG;
1051 if (!dlci_ioctl_hook)
1052 request_module("dlci");
1053
7512cbf6
PE
1054 mutex_lock(&dlci_ioctl_mutex);
1055 if (dlci_ioctl_hook)
1da177e4 1056 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1057 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1058 break;
1059 default:
6b96018b 1060 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1061 break;
89bddce5 1062 }
1da177e4
LT
1063 return err;
1064}
1065
1066int sock_create_lite(int family, int type, int protocol, struct socket **res)
1067{
1068 int err;
1069 struct socket *sock = NULL;
89bddce5 1070
1da177e4
LT
1071 err = security_socket_create(family, type, protocol, 1);
1072 if (err)
1073 goto out;
1074
1075 sock = sock_alloc();
1076 if (!sock) {
1077 err = -ENOMEM;
1078 goto out;
1079 }
1080
1da177e4 1081 sock->type = type;
7420ed23
VY
1082 err = security_socket_post_create(sock, family, type, protocol, 1);
1083 if (err)
1084 goto out_release;
1085
1da177e4
LT
1086out:
1087 *res = sock;
1088 return err;
7420ed23
VY
1089out_release:
1090 sock_release(sock);
1091 sock = NULL;
1092 goto out;
1da177e4 1093}
c6d409cf 1094EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1095
1096/* No kernel lock held - perfect */
89bddce5 1097static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
1098{
1099 struct socket *sock;
1100
1101 /*
89bddce5 1102 * We can't return errors to poll, so it's either yes or no.
1da177e4 1103 */
b69aee04 1104 sock = file->private_data;
1da177e4
LT
1105 return sock->ops->poll(file, sock, wait);
1106}
1107
89bddce5 1108static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1109{
b69aee04 1110 struct socket *sock = file->private_data;
1da177e4
LT
1111
1112 return sock->ops->mmap(file, sock, vma);
1113}
1114
20380731 1115static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
1116{
1117 /*
89bddce5
SH
1118 * It was possible the inode is NULL we were
1119 * closing an unfinished socket.
1da177e4
LT
1120 */
1121
89bddce5 1122 if (!inode) {
1da177e4
LT
1123 printk(KERN_DEBUG "sock_close: NULL inode\n");
1124 return 0;
1125 }
1da177e4
LT
1126 sock_release(SOCKET_I(inode));
1127 return 0;
1128}
1129
1130/*
1131 * Update the socket async list
1132 *
1133 * Fasync_list locking strategy.
1134 *
1135 * 1. fasync_list is modified only under process context socket lock
1136 * i.e. under semaphore.
1137 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1138 * or under socket lock
1da177e4
LT
1139 */
1140
1141static int sock_fasync(int fd, struct file *filp, int on)
1142{
989a2979
ED
1143 struct socket *sock = filp->private_data;
1144 struct sock *sk = sock->sk;
eaefd110 1145 struct socket_wq *wq;
1da177e4 1146
989a2979 1147 if (sk == NULL)
1da177e4 1148 return -EINVAL;
1da177e4
LT
1149
1150 lock_sock(sk);
eaefd110
ED
1151 wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
1152 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1153
eaefd110 1154 if (!wq->fasync_list)
989a2979
ED
1155 sock_reset_flag(sk, SOCK_FASYNC);
1156 else
bcdce719 1157 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1158
989a2979 1159 release_sock(sk);
1da177e4
LT
1160 return 0;
1161}
1162
43815482 1163/* This function may be called only under socket lock or callback_lock or rcu_lock */
1da177e4
LT
1164
1165int sock_wake_async(struct socket *sock, int how, int band)
1166{
43815482
ED
1167 struct socket_wq *wq;
1168
1169 if (!sock)
1170 return -1;
1171 rcu_read_lock();
1172 wq = rcu_dereference(sock->wq);
1173 if (!wq || !wq->fasync_list) {
1174 rcu_read_unlock();
1da177e4 1175 return -1;
43815482 1176 }
89bddce5 1177 switch (how) {
8d8ad9d7 1178 case SOCK_WAKE_WAITD:
1da177e4
LT
1179 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1180 break;
1181 goto call_kill;
8d8ad9d7 1182 case SOCK_WAKE_SPACE:
1da177e4
LT
1183 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1184 break;
1185 /* fall through */
8d8ad9d7 1186 case SOCK_WAKE_IO:
89bddce5 1187call_kill:
43815482 1188 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1189 break;
8d8ad9d7 1190 case SOCK_WAKE_URG:
43815482 1191 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1192 }
43815482 1193 rcu_read_unlock();
1da177e4
LT
1194 return 0;
1195}
c6d409cf 1196EXPORT_SYMBOL(sock_wake_async);
1da177e4 1197
721db93a 1198int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1199 struct socket **res, int kern)
1da177e4
LT
1200{
1201 int err;
1202 struct socket *sock;
55737fda 1203 const struct net_proto_family *pf;
1da177e4
LT
1204
1205 /*
89bddce5 1206 * Check protocol is in range
1da177e4
LT
1207 */
1208 if (family < 0 || family >= NPROTO)
1209 return -EAFNOSUPPORT;
1210 if (type < 0 || type >= SOCK_MAX)
1211 return -EINVAL;
1212
1213 /* Compatibility.
1214
1215 This uglymoron is moved from INET layer to here to avoid
1216 deadlock in module load.
1217 */
1218 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1219 static int warned;
1da177e4
LT
1220 if (!warned) {
1221 warned = 1;
89bddce5
SH
1222 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1223 current->comm);
1da177e4
LT
1224 }
1225 family = PF_PACKET;
1226 }
1227
1228 err = security_socket_create(family, type, protocol, kern);
1229 if (err)
1230 return err;
89bddce5 1231
55737fda
SH
1232 /*
1233 * Allocate the socket and allow the family to set things up. if
1234 * the protocol is 0, the family is instructed to select an appropriate
1235 * default.
1236 */
1237 sock = sock_alloc();
1238 if (!sock) {
e87cc472 1239 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1240 return -ENFILE; /* Not exactly a match, but its the
1241 closest posix thing */
1242 }
1243
1244 sock->type = type;
1245
95a5afca 1246#ifdef CONFIG_MODULES
89bddce5
SH
1247 /* Attempt to load a protocol module if the find failed.
1248 *
1249 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1250 * requested real, full-featured networking support upon configuration.
1251 * Otherwise module support will break!
1252 */
190683a9 1253 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1254 request_module("net-pf-%d", family);
1da177e4
LT
1255#endif
1256
55737fda
SH
1257 rcu_read_lock();
1258 pf = rcu_dereference(net_families[family]);
1259 err = -EAFNOSUPPORT;
1260 if (!pf)
1261 goto out_release;
1da177e4
LT
1262
1263 /*
1264 * We will call the ->create function, that possibly is in a loadable
1265 * module, so we have to bump that loadable module refcnt first.
1266 */
55737fda 1267 if (!try_module_get(pf->owner))
1da177e4
LT
1268 goto out_release;
1269
55737fda
SH
1270 /* Now protected by module ref count */
1271 rcu_read_unlock();
1272
3f378b68 1273 err = pf->create(net, sock, protocol, kern);
55737fda 1274 if (err < 0)
1da177e4 1275 goto out_module_put;
a79af59e 1276
1da177e4
LT
1277 /*
1278 * Now to bump the refcnt of the [loadable] module that owns this
1279 * socket at sock_release time we decrement its refcnt.
1280 */
55737fda
SH
1281 if (!try_module_get(sock->ops->owner))
1282 goto out_module_busy;
1283
1da177e4
LT
1284 /*
1285 * Now that we're done with the ->create function, the [loadable]
1286 * module can have its refcnt decremented
1287 */
55737fda 1288 module_put(pf->owner);
7420ed23
VY
1289 err = security_socket_post_create(sock, family, type, protocol, kern);
1290 if (err)
3b185525 1291 goto out_sock_release;
55737fda 1292 *res = sock;
1da177e4 1293
55737fda
SH
1294 return 0;
1295
1296out_module_busy:
1297 err = -EAFNOSUPPORT;
1da177e4 1298out_module_put:
55737fda
SH
1299 sock->ops = NULL;
1300 module_put(pf->owner);
1301out_sock_release:
1da177e4 1302 sock_release(sock);
55737fda
SH
1303 return err;
1304
1305out_release:
1306 rcu_read_unlock();
1307 goto out_sock_release;
1da177e4 1308}
721db93a 1309EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1310
1311int sock_create(int family, int type, int protocol, struct socket **res)
1312{
1b8d7ae4 1313 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1314}
c6d409cf 1315EXPORT_SYMBOL(sock_create);
1da177e4
LT
1316
1317int sock_create_kern(int family, int type, int protocol, struct socket **res)
1318{
1b8d7ae4 1319 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4 1320}
c6d409cf 1321EXPORT_SYMBOL(sock_create_kern);
1da177e4 1322
3e0fa65f 1323SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1324{
1325 int retval;
1326 struct socket *sock;
a677a039
UD
1327 int flags;
1328
e38b36f3
UD
1329 /* Check the SOCK_* constants for consistency. */
1330 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1331 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1332 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1333 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1334
a677a039 1335 flags = type & ~SOCK_TYPE_MASK;
77d27200 1336 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1337 return -EINVAL;
1338 type &= SOCK_TYPE_MASK;
1da177e4 1339
aaca0bdc
UD
1340 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1341 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1342
1da177e4
LT
1343 retval = sock_create(family, type, protocol, &sock);
1344 if (retval < 0)
1345 goto out;
1346
77d27200 1347 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1348 if (retval < 0)
1349 goto out_release;
1350
1351out:
1352 /* It may be already another descriptor 8) Not kernel problem. */
1353 return retval;
1354
1355out_release:
1356 sock_release(sock);
1357 return retval;
1358}
1359
1360/*
1361 * Create a pair of connected sockets.
1362 */
1363
3e0fa65f
HC
1364SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1365 int __user *, usockvec)
1da177e4
LT
1366{
1367 struct socket *sock1, *sock2;
1368 int fd1, fd2, err;
db349509 1369 struct file *newfile1, *newfile2;
a677a039
UD
1370 int flags;
1371
1372 flags = type & ~SOCK_TYPE_MASK;
77d27200 1373 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1374 return -EINVAL;
1375 type &= SOCK_TYPE_MASK;
1da177e4 1376
aaca0bdc
UD
1377 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1378 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1379
1da177e4
LT
1380 /*
1381 * Obtain the first socket and check if the underlying protocol
1382 * supports the socketpair call.
1383 */
1384
1385 err = sock_create(family, type, protocol, &sock1);
1386 if (err < 0)
1387 goto out;
1388
1389 err = sock_create(family, type, protocol, &sock2);
1390 if (err < 0)
1391 goto out_release_1;
1392
1393 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1394 if (err < 0)
1da177e4
LT
1395 goto out_release_both;
1396
7cbe66b6 1397 fd1 = sock_alloc_file(sock1, &newfile1, flags);
bf3c23d1
DM
1398 if (unlikely(fd1 < 0)) {
1399 err = fd1;
db349509 1400 goto out_release_both;
bf3c23d1 1401 }
1da177e4 1402
7cbe66b6 1403 fd2 = sock_alloc_file(sock2, &newfile2, flags);
198de4d7
AV
1404 if (unlikely(fd2 < 0)) {
1405 err = fd2;
1406 fput(newfile1);
1407 put_unused_fd(fd1);
1408 sock_release(sock2);
1409 goto out;
db349509
AV
1410 }
1411
157cf649 1412 audit_fd_pair(fd1, fd2);
db349509
AV
1413 fd_install(fd1, newfile1);
1414 fd_install(fd2, newfile2);
1da177e4
LT
1415 /* fd1 and fd2 may be already another descriptors.
1416 * Not kernel problem.
1417 */
1418
89bddce5 1419 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1420 if (!err)
1421 err = put_user(fd2, &usockvec[1]);
1422 if (!err)
1423 return 0;
1424
1425 sys_close(fd2);
1426 sys_close(fd1);
1427 return err;
1428
1da177e4 1429out_release_both:
89bddce5 1430 sock_release(sock2);
1da177e4 1431out_release_1:
89bddce5 1432 sock_release(sock1);
1da177e4
LT
1433out:
1434 return err;
1435}
1436
1da177e4
LT
1437/*
1438 * Bind a name to a socket. Nothing much to do here since it's
1439 * the protocol's responsibility to handle the local address.
1440 *
1441 * We move the socket address to kernel space before we call
1442 * the protocol layer (having also checked the address is ok).
1443 */
1444
20f37034 1445SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1446{
1447 struct socket *sock;
230b1839 1448 struct sockaddr_storage address;
6cb153ca 1449 int err, fput_needed;
1da177e4 1450
89bddce5 1451 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1452 if (sock) {
43db362d 1453 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1454 if (err >= 0) {
1455 err = security_socket_bind(sock,
230b1839 1456 (struct sockaddr *)&address,
89bddce5 1457 addrlen);
6cb153ca
BL
1458 if (!err)
1459 err = sock->ops->bind(sock,
89bddce5 1460 (struct sockaddr *)
230b1839 1461 &address, addrlen);
1da177e4 1462 }
6cb153ca 1463 fput_light(sock->file, fput_needed);
89bddce5 1464 }
1da177e4
LT
1465 return err;
1466}
1467
1da177e4
LT
1468/*
1469 * Perform a listen. Basically, we allow the protocol to do anything
1470 * necessary for a listen, and if that works, we mark the socket as
1471 * ready for listening.
1472 */
1473
3e0fa65f 1474SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1475{
1476 struct socket *sock;
6cb153ca 1477 int err, fput_needed;
b8e1f9b5 1478 int somaxconn;
89bddce5
SH
1479
1480 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1481 if (sock) {
8efa6e93 1482 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1483 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1484 backlog = somaxconn;
1da177e4
LT
1485
1486 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1487 if (!err)
1488 err = sock->ops->listen(sock, backlog);
1da177e4 1489
6cb153ca 1490 fput_light(sock->file, fput_needed);
1da177e4
LT
1491 }
1492 return err;
1493}
1494
1da177e4
LT
1495/*
1496 * For accept, we attempt to create a new socket, set up the link
1497 * with the client, wake up the client, then return the new
1498 * connected fd. We collect the address of the connector in kernel
1499 * space and move it to user at the very end. This is unclean because
1500 * we open the socket then return an error.
1501 *
1502 * 1003.1g adds the ability to recvmsg() to query connection pending
1503 * status to recvmsg. We need to add that support in a way thats
1504 * clean when we restucture accept also.
1505 */
1506
20f37034
HC
1507SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1508 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1509{
1510 struct socket *sock, *newsock;
39d8c1b6 1511 struct file *newfile;
6cb153ca 1512 int err, len, newfd, fput_needed;
230b1839 1513 struct sockaddr_storage address;
1da177e4 1514
77d27200 1515 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1516 return -EINVAL;
1517
1518 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1519 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1520
6cb153ca 1521 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1522 if (!sock)
1523 goto out;
1524
1525 err = -ENFILE;
c6d409cf
ED
1526 newsock = sock_alloc();
1527 if (!newsock)
1da177e4
LT
1528 goto out_put;
1529
1530 newsock->type = sock->type;
1531 newsock->ops = sock->ops;
1532
1da177e4
LT
1533 /*
1534 * We don't need try_module_get here, as the listening socket (sock)
1535 * has the protocol module (sock->ops->owner) held.
1536 */
1537 __module_get(newsock->ops->owner);
1538
7cbe66b6 1539 newfd = sock_alloc_file(newsock, &newfile, flags);
39d8c1b6
DM
1540 if (unlikely(newfd < 0)) {
1541 err = newfd;
9a1875e6
DM
1542 sock_release(newsock);
1543 goto out_put;
39d8c1b6
DM
1544 }
1545
a79af59e
FF
1546 err = security_socket_accept(sock, newsock);
1547 if (err)
39d8c1b6 1548 goto out_fd;
a79af59e 1549
1da177e4
LT
1550 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1551 if (err < 0)
39d8c1b6 1552 goto out_fd;
1da177e4
LT
1553
1554 if (upeer_sockaddr) {
230b1839 1555 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1556 &len, 2) < 0) {
1da177e4 1557 err = -ECONNABORTED;
39d8c1b6 1558 goto out_fd;
1da177e4 1559 }
43db362d 1560 err = move_addr_to_user(&address,
230b1839 1561 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1562 if (err < 0)
39d8c1b6 1563 goto out_fd;
1da177e4
LT
1564 }
1565
1566 /* File flags are not inherited via accept() unlike another OSes. */
1567
39d8c1b6
DM
1568 fd_install(newfd, newfile);
1569 err = newfd;
1da177e4 1570
1da177e4 1571out_put:
6cb153ca 1572 fput_light(sock->file, fput_needed);
1da177e4
LT
1573out:
1574 return err;
39d8c1b6 1575out_fd:
9606a216 1576 fput(newfile);
39d8c1b6 1577 put_unused_fd(newfd);
1da177e4
LT
1578 goto out_put;
1579}
1580
20f37034
HC
1581SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1582 int __user *, upeer_addrlen)
aaca0bdc 1583{
de11defe 1584 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1585}
1586
1da177e4
LT
1587/*
1588 * Attempt to connect to a socket with the server address. The address
1589 * is in user space so we verify it is OK and move it to kernel space.
1590 *
1591 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1592 * break bindings
1593 *
1594 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1595 * other SEQPACKET protocols that take time to connect() as it doesn't
1596 * include the -EINPROGRESS status for such sockets.
1597 */
1598
20f37034
HC
1599SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1600 int, addrlen)
1da177e4
LT
1601{
1602 struct socket *sock;
230b1839 1603 struct sockaddr_storage address;
6cb153ca 1604 int err, fput_needed;
1da177e4 1605
6cb153ca 1606 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1607 if (!sock)
1608 goto out;
43db362d 1609 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1610 if (err < 0)
1611 goto out_put;
1612
89bddce5 1613 err =
230b1839 1614 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1615 if (err)
1616 goto out_put;
1617
230b1839 1618 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1619 sock->file->f_flags);
1620out_put:
6cb153ca 1621 fput_light(sock->file, fput_needed);
1da177e4
LT
1622out:
1623 return err;
1624}
1625
1626/*
1627 * Get the local address ('name') of a socket object. Move the obtained
1628 * name to user space.
1629 */
1630
20f37034
HC
1631SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1632 int __user *, usockaddr_len)
1da177e4
LT
1633{
1634 struct socket *sock;
230b1839 1635 struct sockaddr_storage address;
6cb153ca 1636 int len, err, fput_needed;
89bddce5 1637
6cb153ca 1638 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1639 if (!sock)
1640 goto out;
1641
1642 err = security_socket_getsockname(sock);
1643 if (err)
1644 goto out_put;
1645
230b1839 1646 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1647 if (err)
1648 goto out_put;
43db362d 1649 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1650
1651out_put:
6cb153ca 1652 fput_light(sock->file, fput_needed);
1da177e4
LT
1653out:
1654 return err;
1655}
1656
1657/*
1658 * Get the remote address ('name') of a socket object. Move the obtained
1659 * name to user space.
1660 */
1661
20f37034
HC
1662SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1663 int __user *, usockaddr_len)
1da177e4
LT
1664{
1665 struct socket *sock;
230b1839 1666 struct sockaddr_storage address;
6cb153ca 1667 int len, err, fput_needed;
1da177e4 1668
89bddce5
SH
1669 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1670 if (sock != NULL) {
1da177e4
LT
1671 err = security_socket_getpeername(sock);
1672 if (err) {
6cb153ca 1673 fput_light(sock->file, fput_needed);
1da177e4
LT
1674 return err;
1675 }
1676
89bddce5 1677 err =
230b1839 1678 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1679 1);
1da177e4 1680 if (!err)
43db362d 1681 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1682 usockaddr_len);
6cb153ca 1683 fput_light(sock->file, fput_needed);
1da177e4
LT
1684 }
1685 return err;
1686}
1687
1688/*
1689 * Send a datagram to a given address. We move the address into kernel
1690 * space and check the user space data area is readable before invoking
1691 * the protocol.
1692 */
1693
3e0fa65f 1694SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1695 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1696 int, addr_len)
1da177e4
LT
1697{
1698 struct socket *sock;
230b1839 1699 struct sockaddr_storage address;
1da177e4
LT
1700 int err;
1701 struct msghdr msg;
1702 struct iovec iov;
6cb153ca 1703 int fput_needed;
6cb153ca 1704
253eacc0
LT
1705 if (len > INT_MAX)
1706 len = INT_MAX;
de0fa95c
PE
1707 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1708 if (!sock)
4387ff75 1709 goto out;
6cb153ca 1710
89bddce5
SH
1711 iov.iov_base = buff;
1712 iov.iov_len = len;
1713 msg.msg_name = NULL;
1714 msg.msg_iov = &iov;
1715 msg.msg_iovlen = 1;
1716 msg.msg_control = NULL;
1717 msg.msg_controllen = 0;
1718 msg.msg_namelen = 0;
6cb153ca 1719 if (addr) {
43db362d 1720 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1721 if (err < 0)
1722 goto out_put;
230b1839 1723 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1724 msg.msg_namelen = addr_len;
1da177e4
LT
1725 }
1726 if (sock->file->f_flags & O_NONBLOCK)
1727 flags |= MSG_DONTWAIT;
1728 msg.msg_flags = flags;
1729 err = sock_sendmsg(sock, &msg, len);
1730
89bddce5 1731out_put:
de0fa95c 1732 fput_light(sock->file, fput_needed);
4387ff75 1733out:
1da177e4
LT
1734 return err;
1735}
1736
1737/*
89bddce5 1738 * Send a datagram down a socket.
1da177e4
LT
1739 */
1740
3e0fa65f 1741SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1742 unsigned int, flags)
1da177e4
LT
1743{
1744 return sys_sendto(fd, buff, len, flags, NULL, 0);
1745}
1746
1747/*
89bddce5 1748 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1749 * sender. We verify the buffers are writable and if needed move the
1750 * sender address from kernel to user space.
1751 */
1752
3e0fa65f 1753SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1754 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1755 int __user *, addr_len)
1da177e4
LT
1756{
1757 struct socket *sock;
1758 struct iovec iov;
1759 struct msghdr msg;
230b1839 1760 struct sockaddr_storage address;
89bddce5 1761 int err, err2;
6cb153ca
BL
1762 int fput_needed;
1763
253eacc0
LT
1764 if (size > INT_MAX)
1765 size = INT_MAX;
de0fa95c 1766 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1767 if (!sock)
de0fa95c 1768 goto out;
1da177e4 1769
89bddce5
SH
1770 msg.msg_control = NULL;
1771 msg.msg_controllen = 0;
1772 msg.msg_iovlen = 1;
1773 msg.msg_iov = &iov;
1774 iov.iov_len = size;
1775 iov.iov_base = ubuf;
230b1839
YH
1776 msg.msg_name = (struct sockaddr *)&address;
1777 msg.msg_namelen = sizeof(address);
1da177e4
LT
1778 if (sock->file->f_flags & O_NONBLOCK)
1779 flags |= MSG_DONTWAIT;
89bddce5 1780 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1781
89bddce5 1782 if (err >= 0 && addr != NULL) {
43db362d 1783 err2 = move_addr_to_user(&address,
230b1839 1784 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1785 if (err2 < 0)
1786 err = err2;
1da177e4 1787 }
de0fa95c
PE
1788
1789 fput_light(sock->file, fput_needed);
4387ff75 1790out:
1da177e4
LT
1791 return err;
1792}
1793
1794/*
89bddce5 1795 * Receive a datagram from a socket.
1da177e4
LT
1796 */
1797
89bddce5 1798asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
95c96174 1799 unsigned int flags)
1da177e4
LT
1800{
1801 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1802}
1803
1804/*
1805 * Set a socket option. Because we don't know the option lengths we have
1806 * to pass the user mode parameter for the protocols to sort out.
1807 */
1808
20f37034
HC
1809SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1810 char __user *, optval, int, optlen)
1da177e4 1811{
6cb153ca 1812 int err, fput_needed;
1da177e4
LT
1813 struct socket *sock;
1814
1815 if (optlen < 0)
1816 return -EINVAL;
89bddce5
SH
1817
1818 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1819 if (sock != NULL) {
1820 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1821 if (err)
1822 goto out_put;
1da177e4
LT
1823
1824 if (level == SOL_SOCKET)
89bddce5
SH
1825 err =
1826 sock_setsockopt(sock, level, optname, optval,
1827 optlen);
1da177e4 1828 else
89bddce5
SH
1829 err =
1830 sock->ops->setsockopt(sock, level, optname, optval,
1831 optlen);
6cb153ca
BL
1832out_put:
1833 fput_light(sock->file, fput_needed);
1da177e4
LT
1834 }
1835 return err;
1836}
1837
1838/*
1839 * Get a socket option. Because we don't know the option lengths we have
1840 * to pass a user mode parameter for the protocols to sort out.
1841 */
1842
20f37034
HC
1843SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1844 char __user *, optval, int __user *, optlen)
1da177e4 1845{
6cb153ca 1846 int err, fput_needed;
1da177e4
LT
1847 struct socket *sock;
1848
89bddce5
SH
1849 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1850 if (sock != NULL) {
6cb153ca
BL
1851 err = security_socket_getsockopt(sock, level, optname);
1852 if (err)
1853 goto out_put;
1da177e4
LT
1854
1855 if (level == SOL_SOCKET)
89bddce5
SH
1856 err =
1857 sock_getsockopt(sock, level, optname, optval,
1858 optlen);
1da177e4 1859 else
89bddce5
SH
1860 err =
1861 sock->ops->getsockopt(sock, level, optname, optval,
1862 optlen);
6cb153ca
BL
1863out_put:
1864 fput_light(sock->file, fput_needed);
1da177e4
LT
1865 }
1866 return err;
1867}
1868
1da177e4
LT
1869/*
1870 * Shutdown a socket.
1871 */
1872
754fe8d2 1873SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1874{
6cb153ca 1875 int err, fput_needed;
1da177e4
LT
1876 struct socket *sock;
1877
89bddce5
SH
1878 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1879 if (sock != NULL) {
1da177e4 1880 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1881 if (!err)
1882 err = sock->ops->shutdown(sock, how);
1883 fput_light(sock->file, fput_needed);
1da177e4
LT
1884 }
1885 return err;
1886}
1887
89bddce5 1888/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1889 * fields which are the same type (int / unsigned) on our platforms.
1890 */
1891#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1892#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1893#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1894
c71d8ebe
TH
1895struct used_address {
1896 struct sockaddr_storage name;
1897 unsigned int name_len;
1898};
1899
228e548e 1900static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
95c96174 1901 struct msghdr *msg_sys, unsigned int flags,
c71d8ebe 1902 struct used_address *used_address)
1da177e4 1903{
89bddce5
SH
1904 struct compat_msghdr __user *msg_compat =
1905 (struct compat_msghdr __user *)msg;
230b1839 1906 struct sockaddr_storage address;
1da177e4 1907 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1908 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1909 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1910 /* 20 is size of ipv6_pktinfo */
1da177e4 1911 unsigned char *ctl_buf = ctl;
a74e9106 1912 int err, ctl_len, total_len;
89bddce5 1913
1da177e4
LT
1914 err = -EFAULT;
1915 if (MSG_CMSG_COMPAT & flags) {
228e548e 1916 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 1917 return -EFAULT;
228e548e 1918 } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1919 return -EFAULT;
1920
228e548e 1921 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
a74e9106
ED
1922 err = -EMSGSIZE;
1923 if (msg_sys->msg_iovlen > UIO_MAXIOV)
1924 goto out;
1925 err = -ENOMEM;
1926 iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec),
1927 GFP_KERNEL);
1da177e4 1928 if (!iov)
228e548e 1929 goto out;
1da177e4
LT
1930 }
1931
1932 /* This will also move the address data into kernel space */
1933 if (MSG_CMSG_COMPAT & flags) {
43db362d 1934 err = verify_compat_iovec(msg_sys, iov, &address, VERIFY_READ);
1da177e4 1935 } else
43db362d 1936 err = verify_iovec(msg_sys, iov, &address, VERIFY_READ);
89bddce5 1937 if (err < 0)
1da177e4
LT
1938 goto out_freeiov;
1939 total_len = err;
1940
1941 err = -ENOBUFS;
1942
228e548e 1943 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 1944 goto out_freeiov;
228e548e 1945 ctl_len = msg_sys->msg_controllen;
1da177e4 1946 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 1947 err =
228e548e 1948 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 1949 sizeof(ctl));
1da177e4
LT
1950 if (err)
1951 goto out_freeiov;
228e548e
AB
1952 ctl_buf = msg_sys->msg_control;
1953 ctl_len = msg_sys->msg_controllen;
1da177e4 1954 } else if (ctl_len) {
89bddce5 1955 if (ctl_len > sizeof(ctl)) {
1da177e4 1956 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1957 if (ctl_buf == NULL)
1da177e4
LT
1958 goto out_freeiov;
1959 }
1960 err = -EFAULT;
1961 /*
228e548e 1962 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
1963 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1964 * checking falls down on this.
1965 */
fb8621bb 1966 if (copy_from_user(ctl_buf,
228e548e 1967 (void __user __force *)msg_sys->msg_control,
89bddce5 1968 ctl_len))
1da177e4 1969 goto out_freectl;
228e548e 1970 msg_sys->msg_control = ctl_buf;
1da177e4 1971 }
228e548e 1972 msg_sys->msg_flags = flags;
1da177e4
LT
1973
1974 if (sock->file->f_flags & O_NONBLOCK)
228e548e 1975 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
1976 /*
1977 * If this is sendmmsg() and current destination address is same as
1978 * previously succeeded address, omit asking LSM's decision.
1979 * used_address->name_len is initialized to UINT_MAX so that the first
1980 * destination address never matches.
1981 */
bc909d9d
MD
1982 if (used_address && msg_sys->msg_name &&
1983 used_address->name_len == msg_sys->msg_namelen &&
1984 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe
TH
1985 used_address->name_len)) {
1986 err = sock_sendmsg_nosec(sock, msg_sys, total_len);
1987 goto out_freectl;
1988 }
1989 err = sock_sendmsg(sock, msg_sys, total_len);
1990 /*
1991 * If this is sendmmsg() and sending to current destination address was
1992 * successful, remember it.
1993 */
1994 if (used_address && err >= 0) {
1995 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
1996 if (msg_sys->msg_name)
1997 memcpy(&used_address->name, msg_sys->msg_name,
1998 used_address->name_len);
c71d8ebe 1999 }
1da177e4
LT
2000
2001out_freectl:
89bddce5 2002 if (ctl_buf != ctl)
1da177e4
LT
2003 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2004out_freeiov:
2005 if (iov != iovstack)
a74e9106 2006 kfree(iov);
228e548e
AB
2007out:
2008 return err;
2009}
2010
2011/*
2012 * BSD sendmsg interface
2013 */
2014
95c96174 2015SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned int, flags)
228e548e
AB
2016{
2017 int fput_needed, err;
2018 struct msghdr msg_sys;
2019 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
2020
2021 if (!sock)
2022 goto out;
2023
c71d8ebe 2024 err = __sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
228e548e 2025
6cb153ca 2026 fput_light(sock->file, fput_needed);
89bddce5 2027out:
1da177e4
LT
2028 return err;
2029}
2030
228e548e
AB
2031/*
2032 * Linux sendmmsg interface
2033 */
2034
2035int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2036 unsigned int flags)
2037{
2038 int fput_needed, err, datagrams;
2039 struct socket *sock;
2040 struct mmsghdr __user *entry;
2041 struct compat_mmsghdr __user *compat_entry;
2042 struct msghdr msg_sys;
c71d8ebe 2043 struct used_address used_address;
228e548e 2044
98382f41
AB
2045 if (vlen > UIO_MAXIOV)
2046 vlen = UIO_MAXIOV;
228e548e
AB
2047
2048 datagrams = 0;
2049
2050 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2051 if (!sock)
2052 return err;
2053
c71d8ebe 2054 used_address.name_len = UINT_MAX;
228e548e
AB
2055 entry = mmsg;
2056 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2057 err = 0;
228e548e
AB
2058
2059 while (datagrams < vlen) {
228e548e
AB
2060 if (MSG_CMSG_COMPAT & flags) {
2061 err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
c71d8ebe 2062 &msg_sys, flags, &used_address);
228e548e
AB
2063 if (err < 0)
2064 break;
2065 err = __put_user(err, &compat_entry->msg_len);
2066 ++compat_entry;
2067 } else {
2068 err = __sys_sendmsg(sock, (struct msghdr __user *)entry,
c71d8ebe 2069 &msg_sys, flags, &used_address);
228e548e
AB
2070 if (err < 0)
2071 break;
2072 err = put_user(err, &entry->msg_len);
2073 ++entry;
2074 }
2075
2076 if (err)
2077 break;
2078 ++datagrams;
2079 }
2080
228e548e
AB
2081 fput_light(sock->file, fput_needed);
2082
728ffb86
AB
2083 /* We only return an error if no datagrams were able to be sent */
2084 if (datagrams != 0)
228e548e
AB
2085 return datagrams;
2086
228e548e
AB
2087 return err;
2088}
2089
2090SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2091 unsigned int, vlen, unsigned int, flags)
2092{
2093 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2094}
2095
a2e27255 2096static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
95c96174 2097 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2098{
89bddce5
SH
2099 struct compat_msghdr __user *msg_compat =
2100 (struct compat_msghdr __user *)msg;
1da177e4 2101 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2102 struct iovec *iov = iovstack;
1da177e4 2103 unsigned long cmsg_ptr;
a74e9106 2104 int err, total_len, len;
1da177e4
LT
2105
2106 /* kernel mode address */
230b1839 2107 struct sockaddr_storage addr;
1da177e4
LT
2108
2109 /* user mode address pointers */
2110 struct sockaddr __user *uaddr;
2111 int __user *uaddr_len;
89bddce5 2112
1da177e4 2113 if (MSG_CMSG_COMPAT & flags) {
a2e27255 2114 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 2115 return -EFAULT;
c6d409cf 2116 } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
89bddce5 2117 return -EFAULT;
1da177e4 2118
a2e27255 2119 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
a74e9106
ED
2120 err = -EMSGSIZE;
2121 if (msg_sys->msg_iovlen > UIO_MAXIOV)
2122 goto out;
2123 err = -ENOMEM;
2124 iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec),
2125 GFP_KERNEL);
1da177e4 2126 if (!iov)
a2e27255 2127 goto out;
1da177e4
LT
2128 }
2129
2130 /*
89bddce5
SH
2131 * Save the user-mode address (verify_iovec will change the
2132 * kernel msghdr to use the kernel address space)
1da177e4 2133 */
89bddce5 2134
a2e27255 2135 uaddr = (__force void __user *)msg_sys->msg_name;
1da177e4
LT
2136 uaddr_len = COMPAT_NAMELEN(msg);
2137 if (MSG_CMSG_COMPAT & flags) {
43db362d 2138 err = verify_compat_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
1da177e4 2139 } else
43db362d 2140 err = verify_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
1da177e4
LT
2141 if (err < 0)
2142 goto out_freeiov;
89bddce5 2143 total_len = err;
1da177e4 2144
a2e27255
ACM
2145 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2146 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2147
1da177e4
LT
2148 if (sock->file->f_flags & O_NONBLOCK)
2149 flags |= MSG_DONTWAIT;
a2e27255
ACM
2150 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2151 total_len, flags);
1da177e4
LT
2152 if (err < 0)
2153 goto out_freeiov;
2154 len = err;
2155
2156 if (uaddr != NULL) {
43db362d 2157 err = move_addr_to_user(&addr,
a2e27255 2158 msg_sys->msg_namelen, uaddr,
89bddce5 2159 uaddr_len);
1da177e4
LT
2160 if (err < 0)
2161 goto out_freeiov;
2162 }
a2e27255 2163 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2164 COMPAT_FLAGS(msg));
1da177e4
LT
2165 if (err)
2166 goto out_freeiov;
2167 if (MSG_CMSG_COMPAT & flags)
a2e27255 2168 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2169 &msg_compat->msg_controllen);
2170 else
a2e27255 2171 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2172 &msg->msg_controllen);
2173 if (err)
2174 goto out_freeiov;
2175 err = len;
2176
2177out_freeiov:
2178 if (iov != iovstack)
a74e9106 2179 kfree(iov);
a2e27255
ACM
2180out:
2181 return err;
2182}
2183
2184/*
2185 * BSD recvmsg interface
2186 */
2187
2188SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
2189 unsigned int, flags)
2190{
2191 int fput_needed, err;
2192 struct msghdr msg_sys;
2193 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
2194
2195 if (!sock)
2196 goto out;
2197
2198 err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0);
2199
6cb153ca 2200 fput_light(sock->file, fput_needed);
1da177e4
LT
2201out:
2202 return err;
2203}
2204
a2e27255
ACM
2205/*
2206 * Linux recvmmsg interface
2207 */
2208
2209int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2210 unsigned int flags, struct timespec *timeout)
2211{
2212 int fput_needed, err, datagrams;
2213 struct socket *sock;
2214 struct mmsghdr __user *entry;
d7256d0e 2215 struct compat_mmsghdr __user *compat_entry;
a2e27255
ACM
2216 struct msghdr msg_sys;
2217 struct timespec end_time;
2218
2219 if (timeout &&
2220 poll_select_set_timeout(&end_time, timeout->tv_sec,
2221 timeout->tv_nsec))
2222 return -EINVAL;
2223
2224 datagrams = 0;
2225
2226 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2227 if (!sock)
2228 return err;
2229
2230 err = sock_error(sock->sk);
2231 if (err)
2232 goto out_put;
2233
2234 entry = mmsg;
d7256d0e 2235 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2236
2237 while (datagrams < vlen) {
2238 /*
2239 * No need to ask LSM for more than the first datagram.
2240 */
d7256d0e
JMG
2241 if (MSG_CMSG_COMPAT & flags) {
2242 err = __sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
b9eb8b87
AB
2243 &msg_sys, flags & ~MSG_WAITFORONE,
2244 datagrams);
d7256d0e
JMG
2245 if (err < 0)
2246 break;
2247 err = __put_user(err, &compat_entry->msg_len);
2248 ++compat_entry;
2249 } else {
2250 err = __sys_recvmsg(sock, (struct msghdr __user *)entry,
b9eb8b87
AB
2251 &msg_sys, flags & ~MSG_WAITFORONE,
2252 datagrams);
d7256d0e
JMG
2253 if (err < 0)
2254 break;
2255 err = put_user(err, &entry->msg_len);
2256 ++entry;
2257 }
2258
a2e27255
ACM
2259 if (err)
2260 break;
a2e27255
ACM
2261 ++datagrams;
2262
71c5c159
BB
2263 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2264 if (flags & MSG_WAITFORONE)
2265 flags |= MSG_DONTWAIT;
2266
a2e27255
ACM
2267 if (timeout) {
2268 ktime_get_ts(timeout);
2269 *timeout = timespec_sub(end_time, *timeout);
2270 if (timeout->tv_sec < 0) {
2271 timeout->tv_sec = timeout->tv_nsec = 0;
2272 break;
2273 }
2274
2275 /* Timeout, return less than vlen datagrams */
2276 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2277 break;
2278 }
2279
2280 /* Out of band data, return right away */
2281 if (msg_sys.msg_flags & MSG_OOB)
2282 break;
2283 }
2284
2285out_put:
2286 fput_light(sock->file, fput_needed);
1da177e4 2287
a2e27255
ACM
2288 if (err == 0)
2289 return datagrams;
2290
2291 if (datagrams != 0) {
2292 /*
2293 * We may return less entries than requested (vlen) if the
2294 * sock is non block and there aren't enough datagrams...
2295 */
2296 if (err != -EAGAIN) {
2297 /*
2298 * ... or if recvmsg returns an error after we
2299 * received some datagrams, where we record the
2300 * error to return on the next call or if the
2301 * app asks about it using getsockopt(SO_ERROR).
2302 */
2303 sock->sk->sk_err = -err;
2304 }
2305
2306 return datagrams;
2307 }
2308
2309 return err;
2310}
2311
2312SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2313 unsigned int, vlen, unsigned int, flags,
2314 struct timespec __user *, timeout)
2315{
2316 int datagrams;
2317 struct timespec timeout_sys;
2318
2319 if (!timeout)
2320 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2321
2322 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2323 return -EFAULT;
2324
2325 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2326
2327 if (datagrams > 0 &&
2328 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2329 datagrams = -EFAULT;
2330
2331 return datagrams;
2332}
2333
2334#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2335/* Argument list sizes for sys_socketcall */
2336#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2337static const unsigned char nargs[21] = {
c6d409cf
ED
2338 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2339 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2340 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2341 AL(4), AL(5), AL(4)
89bddce5
SH
2342};
2343
1da177e4
LT
2344#undef AL
2345
2346/*
89bddce5 2347 * System call vectors.
1da177e4
LT
2348 *
2349 * Argument checking cleaned up. Saved 20% in size.
2350 * This function doesn't need to set the kernel lock because
89bddce5 2351 * it is set by the callees.
1da177e4
LT
2352 */
2353
3e0fa65f 2354SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4
LT
2355{
2356 unsigned long a[6];
89bddce5 2357 unsigned long a0, a1;
1da177e4 2358 int err;
47379052 2359 unsigned int len;
1da177e4 2360
228e548e 2361 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2362 return -EINVAL;
2363
47379052
AV
2364 len = nargs[call];
2365 if (len > sizeof(a))
2366 return -EINVAL;
2367
1da177e4 2368 /* copy_from_user should be SMP safe. */
47379052 2369 if (copy_from_user(a, args, len))
1da177e4 2370 return -EFAULT;
3ec3b2fb 2371
f3298dc4 2372 audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb 2373
89bddce5
SH
2374 a0 = a[0];
2375 a1 = a[1];
2376
2377 switch (call) {
2378 case SYS_SOCKET:
2379 err = sys_socket(a0, a1, a[2]);
2380 break;
2381 case SYS_BIND:
2382 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2383 break;
2384 case SYS_CONNECT:
2385 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2386 break;
2387 case SYS_LISTEN:
2388 err = sys_listen(a0, a1);
2389 break;
2390 case SYS_ACCEPT:
de11defe
UD
2391 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2392 (int __user *)a[2], 0);
89bddce5
SH
2393 break;
2394 case SYS_GETSOCKNAME:
2395 err =
2396 sys_getsockname(a0, (struct sockaddr __user *)a1,
2397 (int __user *)a[2]);
2398 break;
2399 case SYS_GETPEERNAME:
2400 err =
2401 sys_getpeername(a0, (struct sockaddr __user *)a1,
2402 (int __user *)a[2]);
2403 break;
2404 case SYS_SOCKETPAIR:
2405 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2406 break;
2407 case SYS_SEND:
2408 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2409 break;
2410 case SYS_SENDTO:
2411 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2412 (struct sockaddr __user *)a[4], a[5]);
2413 break;
2414 case SYS_RECV:
2415 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2416 break;
2417 case SYS_RECVFROM:
2418 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2419 (struct sockaddr __user *)a[4],
2420 (int __user *)a[5]);
2421 break;
2422 case SYS_SHUTDOWN:
2423 err = sys_shutdown(a0, a1);
2424 break;
2425 case SYS_SETSOCKOPT:
2426 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2427 break;
2428 case SYS_GETSOCKOPT:
2429 err =
2430 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2431 (int __user *)a[4]);
2432 break;
2433 case SYS_SENDMSG:
2434 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2435 break;
228e548e
AB
2436 case SYS_SENDMMSG:
2437 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2438 break;
89bddce5
SH
2439 case SYS_RECVMSG:
2440 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2441 break;
a2e27255
ACM
2442 case SYS_RECVMMSG:
2443 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2444 (struct timespec __user *)a[4]);
2445 break;
de11defe
UD
2446 case SYS_ACCEPT4:
2447 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2448 (int __user *)a[2], a[3]);
aaca0bdc 2449 break;
89bddce5
SH
2450 default:
2451 err = -EINVAL;
2452 break;
1da177e4
LT
2453 }
2454 return err;
2455}
2456
89bddce5 2457#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2458
55737fda
SH
2459/**
2460 * sock_register - add a socket protocol handler
2461 * @ops: description of protocol
2462 *
1da177e4
LT
2463 * This function is called by a protocol handler that wants to
2464 * advertise its address family, and have it linked into the
55737fda
SH
2465 * socket interface. The value ops->family coresponds to the
2466 * socket system call protocol family.
1da177e4 2467 */
f0fd27d4 2468int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2469{
2470 int err;
2471
2472 if (ops->family >= NPROTO) {
89bddce5
SH
2473 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2474 NPROTO);
1da177e4
LT
2475 return -ENOBUFS;
2476 }
55737fda
SH
2477
2478 spin_lock(&net_family_lock);
190683a9
ED
2479 if (rcu_dereference_protected(net_families[ops->family],
2480 lockdep_is_held(&net_family_lock)))
55737fda
SH
2481 err = -EEXIST;
2482 else {
cf778b00 2483 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2484 err = 0;
2485 }
55737fda
SH
2486 spin_unlock(&net_family_lock);
2487
89bddce5 2488 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2489 return err;
2490}
c6d409cf 2491EXPORT_SYMBOL(sock_register);
1da177e4 2492
55737fda
SH
2493/**
2494 * sock_unregister - remove a protocol handler
2495 * @family: protocol family to remove
2496 *
1da177e4
LT
2497 * This function is called by a protocol handler that wants to
2498 * remove its address family, and have it unlinked from the
55737fda
SH
2499 * new socket creation.
2500 *
2501 * If protocol handler is a module, then it can use module reference
2502 * counts to protect against new references. If protocol handler is not
2503 * a module then it needs to provide its own protection in
2504 * the ops->create routine.
1da177e4 2505 */
f0fd27d4 2506void sock_unregister(int family)
1da177e4 2507{
f0fd27d4 2508 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2509
55737fda 2510 spin_lock(&net_family_lock);
a9b3cd7f 2511 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2512 spin_unlock(&net_family_lock);
2513
2514 synchronize_rcu();
2515
89bddce5 2516 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4 2517}
c6d409cf 2518EXPORT_SYMBOL(sock_unregister);
1da177e4 2519
77d76ea3 2520static int __init sock_init(void)
1da177e4 2521{
b3e19d92 2522 int err;
2ca794e5
EB
2523 /*
2524 * Initialize the network sysctl infrastructure.
2525 */
2526 err = net_sysctl_init();
2527 if (err)
2528 goto out;
b3e19d92 2529
1da177e4 2530 /*
89bddce5 2531 * Initialize sock SLAB cache.
1da177e4 2532 */
89bddce5 2533
1da177e4
LT
2534 sk_init();
2535
1da177e4 2536 /*
89bddce5 2537 * Initialize skbuff SLAB cache
1da177e4
LT
2538 */
2539 skb_init();
1da177e4
LT
2540
2541 /*
89bddce5 2542 * Initialize the protocols module.
1da177e4
LT
2543 */
2544
2545 init_inodecache();
b3e19d92
NP
2546
2547 err = register_filesystem(&sock_fs_type);
2548 if (err)
2549 goto out_fs;
1da177e4 2550 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2551 if (IS_ERR(sock_mnt)) {
2552 err = PTR_ERR(sock_mnt);
2553 goto out_mount;
2554 }
77d76ea3
AK
2555
2556 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2557 */
2558
2559#ifdef CONFIG_NETFILTER
2560 netfilter_init();
2561#endif
cbeb321a 2562
c1f19b51
RC
2563#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
2564 skb_timestamping_init();
2565#endif
2566
b3e19d92
NP
2567out:
2568 return err;
2569
2570out_mount:
2571 unregister_filesystem(&sock_fs_type);
2572out_fs:
2573 goto out;
1da177e4
LT
2574}
2575
77d76ea3
AK
2576core_initcall(sock_init); /* early initcall */
2577
1da177e4
LT
2578#ifdef CONFIG_PROC_FS
2579void socket_seq_show(struct seq_file *seq)
2580{
2581 int cpu;
2582 int counter = 0;
2583
6f912042 2584 for_each_possible_cpu(cpu)
89bddce5 2585 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2586
2587 /* It can be negative, by the way. 8) */
2588 if (counter < 0)
2589 counter = 0;
2590
2591 seq_printf(seq, "sockets: used %d\n", counter);
2592}
89bddce5 2593#endif /* CONFIG_PROC_FS */
1da177e4 2594
89bbfc95 2595#ifdef CONFIG_COMPAT
6b96018b 2596static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2597 unsigned int cmd, void __user *up)
7a229387 2598{
7a229387
AB
2599 mm_segment_t old_fs = get_fs();
2600 struct timeval ktv;
2601 int err;
2602
2603 set_fs(KERNEL_DS);
6b96018b 2604 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2605 set_fs(old_fs);
644595f8 2606 if (!err)
ed6fe9d6 2607 err = compat_put_timeval(&ktv, up);
644595f8 2608
7a229387
AB
2609 return err;
2610}
2611
6b96018b 2612static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2613 unsigned int cmd, void __user *up)
7a229387 2614{
7a229387
AB
2615 mm_segment_t old_fs = get_fs();
2616 struct timespec kts;
2617 int err;
2618
2619 set_fs(KERNEL_DS);
6b96018b 2620 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2621 set_fs(old_fs);
644595f8 2622 if (!err)
ed6fe9d6 2623 err = compat_put_timespec(&kts, up);
644595f8 2624
7a229387
AB
2625 return err;
2626}
2627
6b96018b 2628static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2629{
2630 struct ifreq __user *uifr;
2631 int err;
2632
2633 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2634 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2635 return -EFAULT;
2636
6b96018b 2637 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2638 if (err)
2639 return err;
2640
6b96018b 2641 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2642 return -EFAULT;
2643
2644 return 0;
2645}
2646
6b96018b 2647static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2648{
6b96018b 2649 struct compat_ifconf ifc32;
7a229387
AB
2650 struct ifconf ifc;
2651 struct ifconf __user *uifc;
6b96018b 2652 struct compat_ifreq __user *ifr32;
7a229387
AB
2653 struct ifreq __user *ifr;
2654 unsigned int i, j;
2655 int err;
2656
6b96018b 2657 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2658 return -EFAULT;
2659
43da5f2e 2660 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2661 if (ifc32.ifcbuf == 0) {
2662 ifc32.ifc_len = 0;
2663 ifc.ifc_len = 0;
2664 ifc.ifc_req = NULL;
2665 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2666 } else {
c6d409cf
ED
2667 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2668 sizeof(struct ifreq);
7a229387
AB
2669 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2670 ifc.ifc_len = len;
2671 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2672 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2673 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2674 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2675 return -EFAULT;
2676 ifr++;
2677 ifr32++;
2678 }
2679 }
2680 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2681 return -EFAULT;
2682
6b96018b 2683 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2684 if (err)
2685 return err;
2686
2687 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2688 return -EFAULT;
2689
2690 ifr = ifc.ifc_req;
2691 ifr32 = compat_ptr(ifc32.ifcbuf);
2692 for (i = 0, j = 0;
c6d409cf
ED
2693 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2694 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2695 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2696 return -EFAULT;
2697 ifr32++;
2698 ifr++;
2699 }
2700
2701 if (ifc32.ifcbuf == 0) {
2702 /* Translate from 64-bit structure multiple to
2703 * a 32-bit one.
2704 */
2705 i = ifc.ifc_len;
6b96018b 2706 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2707 ifc32.ifc_len = i;
2708 } else {
2709 ifc32.ifc_len = i;
2710 }
6b96018b 2711 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2712 return -EFAULT;
2713
2714 return 0;
2715}
2716
6b96018b 2717static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2718{
3a7da39d
BH
2719 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2720 bool convert_in = false, convert_out = false;
2721 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2722 struct ethtool_rxnfc __user *rxnfc;
7a229387 2723 struct ifreq __user *ifr;
3a7da39d
BH
2724 u32 rule_cnt = 0, actual_rule_cnt;
2725 u32 ethcmd;
7a229387 2726 u32 data;
3a7da39d 2727 int ret;
7a229387 2728
3a7da39d
BH
2729 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2730 return -EFAULT;
7a229387 2731
3a7da39d
BH
2732 compat_rxnfc = compat_ptr(data);
2733
2734 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2735 return -EFAULT;
2736
3a7da39d
BH
2737 /* Most ethtool structures are defined without padding.
2738 * Unfortunately struct ethtool_rxnfc is an exception.
2739 */
2740 switch (ethcmd) {
2741 default:
2742 break;
2743 case ETHTOOL_GRXCLSRLALL:
2744 /* Buffer size is variable */
2745 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2746 return -EFAULT;
2747 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2748 return -ENOMEM;
2749 buf_size += rule_cnt * sizeof(u32);
2750 /* fall through */
2751 case ETHTOOL_GRXRINGS:
2752 case ETHTOOL_GRXCLSRLCNT:
2753 case ETHTOOL_GRXCLSRULE:
55664f32 2754 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2755 convert_out = true;
2756 /* fall through */
2757 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2758 buf_size += sizeof(struct ethtool_rxnfc);
2759 convert_in = true;
2760 break;
2761 }
2762
2763 ifr = compat_alloc_user_space(buf_size);
2764 rxnfc = (void *)ifr + ALIGN(sizeof(struct ifreq), 8);
2765
2766 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2767 return -EFAULT;
2768
3a7da39d
BH
2769 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2770 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2771 return -EFAULT;
2772
3a7da39d 2773 if (convert_in) {
127fe533 2774 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2775 * fs.ring_cookie and at the end of fs, but nowhere else.
2776 */
127fe533
AD
2777 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2778 sizeof(compat_rxnfc->fs.m_ext) !=
2779 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2780 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2781 BUILD_BUG_ON(
2782 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2783 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2784 offsetof(struct ethtool_rxnfc, fs.location) -
2785 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2786
2787 if (copy_in_user(rxnfc, compat_rxnfc,
127fe533 2788 (void *)(&rxnfc->fs.m_ext + 1) -
3a7da39d
BH
2789 (void *)rxnfc) ||
2790 copy_in_user(&rxnfc->fs.ring_cookie,
2791 &compat_rxnfc->fs.ring_cookie,
2792 (void *)(&rxnfc->fs.location + 1) -
2793 (void *)&rxnfc->fs.ring_cookie) ||
2794 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2795 sizeof(rxnfc->rule_cnt)))
2796 return -EFAULT;
2797 }
2798
2799 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2800 if (ret)
2801 return ret;
2802
2803 if (convert_out) {
2804 if (copy_in_user(compat_rxnfc, rxnfc,
127fe533 2805 (const void *)(&rxnfc->fs.m_ext + 1) -
3a7da39d
BH
2806 (const void *)rxnfc) ||
2807 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2808 &rxnfc->fs.ring_cookie,
2809 (const void *)(&rxnfc->fs.location + 1) -
2810 (const void *)&rxnfc->fs.ring_cookie) ||
2811 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2812 sizeof(rxnfc->rule_cnt)))
2813 return -EFAULT;
2814
2815 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2816 /* As an optimisation, we only copy the actual
2817 * number of rules that the underlying
2818 * function returned. Since Mallory might
2819 * change the rule count in user memory, we
2820 * check that it is less than the rule count
2821 * originally given (as the user buffer size),
2822 * which has been range-checked.
2823 */
2824 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2825 return -EFAULT;
2826 if (actual_rule_cnt < rule_cnt)
2827 rule_cnt = actual_rule_cnt;
2828 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2829 &rxnfc->rule_locs[0],
2830 rule_cnt * sizeof(u32)))
2831 return -EFAULT;
2832 }
2833 }
2834
2835 return 0;
7a229387
AB
2836}
2837
7a50a240
AB
2838static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2839{
2840 void __user *uptr;
2841 compat_uptr_t uptr32;
2842 struct ifreq __user *uifr;
2843
c6d409cf 2844 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2845 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2846 return -EFAULT;
2847
2848 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2849 return -EFAULT;
2850
2851 uptr = compat_ptr(uptr32);
2852
2853 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2854 return -EFAULT;
2855
2856 return dev_ioctl(net, SIOCWANDEV, uifr);
2857}
2858
6b96018b
AB
2859static int bond_ioctl(struct net *net, unsigned int cmd,
2860 struct compat_ifreq __user *ifr32)
7a229387
AB
2861{
2862 struct ifreq kifr;
2863 struct ifreq __user *uifr;
7a229387
AB
2864 mm_segment_t old_fs;
2865 int err;
2866 u32 data;
2867 void __user *datap;
2868
2869 switch (cmd) {
2870 case SIOCBONDENSLAVE:
2871 case SIOCBONDRELEASE:
2872 case SIOCBONDSETHWADDR:
2873 case SIOCBONDCHANGEACTIVE:
6b96018b 2874 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2875 return -EFAULT;
2876
2877 old_fs = get_fs();
c6d409cf 2878 set_fs(KERNEL_DS);
c3f52ae6 2879 err = dev_ioctl(net, cmd,
2880 (struct ifreq __user __force *) &kifr);
c6d409cf 2881 set_fs(old_fs);
7a229387
AB
2882
2883 return err;
2884 case SIOCBONDSLAVEINFOQUERY:
2885 case SIOCBONDINFOQUERY:
2886 uifr = compat_alloc_user_space(sizeof(*uifr));
2887 if (copy_in_user(&uifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
2888 return -EFAULT;
2889
2890 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2891 return -EFAULT;
2892
2893 datap = compat_ptr(data);
2894 if (put_user(datap, &uifr->ifr_ifru.ifru_data))
2895 return -EFAULT;
2896
6b96018b 2897 return dev_ioctl(net, cmd, uifr);
7a229387 2898 default:
07d106d0 2899 return -ENOIOCTLCMD;
ccbd6a5a 2900 }
7a229387
AB
2901}
2902
6b96018b
AB
2903static int siocdevprivate_ioctl(struct net *net, unsigned int cmd,
2904 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2905{
2906 struct ifreq __user *u_ifreq64;
7a229387
AB
2907 char tmp_buf[IFNAMSIZ];
2908 void __user *data64;
2909 u32 data32;
2910
2911 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2912 IFNAMSIZ))
2913 return -EFAULT;
2914 if (__get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
2915 return -EFAULT;
2916 data64 = compat_ptr(data32);
2917
2918 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2919
2920 /* Don't check these user accesses, just let that get trapped
2921 * in the ioctl handler instead.
2922 */
2923 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2924 IFNAMSIZ))
2925 return -EFAULT;
2926 if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
2927 return -EFAULT;
2928
6b96018b 2929 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2930}
2931
6b96018b
AB
2932static int dev_ifsioc(struct net *net, struct socket *sock,
2933 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 2934{
a2116ed2 2935 struct ifreq __user *uifr;
7a229387
AB
2936 int err;
2937
a2116ed2
AB
2938 uifr = compat_alloc_user_space(sizeof(*uifr));
2939 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2940 return -EFAULT;
2941
2942 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2943
7a229387
AB
2944 if (!err) {
2945 switch (cmd) {
2946 case SIOCGIFFLAGS:
2947 case SIOCGIFMETRIC:
2948 case SIOCGIFMTU:
2949 case SIOCGIFMEM:
2950 case SIOCGIFHWADDR:
2951 case SIOCGIFINDEX:
2952 case SIOCGIFADDR:
2953 case SIOCGIFBRDADDR:
2954 case SIOCGIFDSTADDR:
2955 case SIOCGIFNETMASK:
fab2532b 2956 case SIOCGIFPFLAGS:
7a229387 2957 case SIOCGIFTXQLEN:
fab2532b
AB
2958 case SIOCGMIIPHY:
2959 case SIOCGMIIREG:
a2116ed2 2960 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
2961 err = -EFAULT;
2962 break;
2963 }
2964 }
2965 return err;
2966}
2967
a2116ed2
AB
2968static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2969 struct compat_ifreq __user *uifr32)
2970{
2971 struct ifreq ifr;
2972 struct compat_ifmap __user *uifmap32;
2973 mm_segment_t old_fs;
2974 int err;
2975
2976 uifmap32 = &uifr32->ifr_ifru.ifru_map;
2977 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
2978 err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2979 err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2980 err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2981 err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq);
2982 err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma);
2983 err |= __get_user(ifr.ifr_map.port, &uifmap32->port);
2984 if (err)
2985 return -EFAULT;
2986
2987 old_fs = get_fs();
c6d409cf 2988 set_fs(KERNEL_DS);
c3f52ae6 2989 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 2990 set_fs(old_fs);
a2116ed2
AB
2991
2992 if (cmd == SIOCGIFMAP && !err) {
2993 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
2994 err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2995 err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2996 err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2997 err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq);
2998 err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma);
2999 err |= __put_user(ifr.ifr_map.port, &uifmap32->port);
3000 if (err)
3001 err = -EFAULT;
3002 }
3003 return err;
3004}
3005
3006static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uifr32)
3007{
3008 void __user *uptr;
3009 compat_uptr_t uptr32;
3010 struct ifreq __user *uifr;
3011
c6d409cf 3012 uifr = compat_alloc_user_space(sizeof(*uifr));
a2116ed2
AB
3013 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
3014 return -EFAULT;
3015
3016 if (get_user(uptr32, &uifr32->ifr_data))
3017 return -EFAULT;
3018
3019 uptr = compat_ptr(uptr32);
3020
3021 if (put_user(uptr, &uifr->ifr_data))
3022 return -EFAULT;
3023
3024 return dev_ioctl(net, SIOCSHWTSTAMP, uifr);
3025}
3026
7a229387 3027struct rtentry32 {
c6d409cf 3028 u32 rt_pad1;
7a229387
AB
3029 struct sockaddr rt_dst; /* target address */
3030 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3031 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3032 unsigned short rt_flags;
3033 short rt_pad2;
3034 u32 rt_pad3;
3035 unsigned char rt_tos;
3036 unsigned char rt_class;
3037 short rt_pad4;
3038 short rt_metric; /* +1 for binary compatibility! */
7a229387 3039 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3040 u32 rt_mtu; /* per route MTU/Window */
3041 u32 rt_window; /* Window clamping */
7a229387
AB
3042 unsigned short rt_irtt; /* Initial RTT */
3043};
3044
3045struct in6_rtmsg32 {
3046 struct in6_addr rtmsg_dst;
3047 struct in6_addr rtmsg_src;
3048 struct in6_addr rtmsg_gateway;
3049 u32 rtmsg_type;
3050 u16 rtmsg_dst_len;
3051 u16 rtmsg_src_len;
3052 u32 rtmsg_metric;
3053 u32 rtmsg_info;
3054 u32 rtmsg_flags;
3055 s32 rtmsg_ifindex;
3056};
3057
6b96018b
AB
3058static int routing_ioctl(struct net *net, struct socket *sock,
3059 unsigned int cmd, void __user *argp)
7a229387
AB
3060{
3061 int ret;
3062 void *r = NULL;
3063 struct in6_rtmsg r6;
3064 struct rtentry r4;
3065 char devname[16];
3066 u32 rtdev;
3067 mm_segment_t old_fs = get_fs();
3068
6b96018b
AB
3069 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3070 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3071 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3072 3 * sizeof(struct in6_addr));
c6d409cf
ED
3073 ret |= __get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3074 ret |= __get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3075 ret |= __get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3076 ret |= __get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3077 ret |= __get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3078 ret |= __get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3079 ret |= __get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3080
3081 r = (void *) &r6;
3082 } else { /* ipv4 */
6b96018b 3083 struct rtentry32 __user *ur4 = argp;
c6d409cf 3084 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3085 3 * sizeof(struct sockaddr));
c6d409cf
ED
3086 ret |= __get_user(r4.rt_flags, &(ur4->rt_flags));
3087 ret |= __get_user(r4.rt_metric, &(ur4->rt_metric));
3088 ret |= __get_user(r4.rt_mtu, &(ur4->rt_mtu));
3089 ret |= __get_user(r4.rt_window, &(ur4->rt_window));
3090 ret |= __get_user(r4.rt_irtt, &(ur4->rt_irtt));
3091 ret |= __get_user(rtdev, &(ur4->rt_dev));
7a229387 3092 if (rtdev) {
c6d409cf 3093 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3094 r4.rt_dev = (char __user __force *)devname;
3095 devname[15] = 0;
7a229387
AB
3096 } else
3097 r4.rt_dev = NULL;
3098
3099 r = (void *) &r4;
3100 }
3101
3102 if (ret) {
3103 ret = -EFAULT;
3104 goto out;
3105 }
3106
c6d409cf 3107 set_fs(KERNEL_DS);
6b96018b 3108 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3109 set_fs(old_fs);
7a229387
AB
3110
3111out:
7a229387
AB
3112 return ret;
3113}
3114
3115/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3116 * for some operations; this forces use of the newer bridge-utils that
25985edc 3117 * use compatible ioctls
7a229387 3118 */
6b96018b 3119static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3120{
6b96018b 3121 compat_ulong_t tmp;
7a229387 3122
6b96018b 3123 if (get_user(tmp, argp))
7a229387
AB
3124 return -EFAULT;
3125 if (tmp == BRCTL_GET_VERSION)
3126 return BRCTL_VERSION + 1;
3127 return -EINVAL;
3128}
3129
6b96018b
AB
3130static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3131 unsigned int cmd, unsigned long arg)
3132{
3133 void __user *argp = compat_ptr(arg);
3134 struct sock *sk = sock->sk;
3135 struct net *net = sock_net(sk);
7a229387 3136
6b96018b
AB
3137 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
3138 return siocdevprivate_ioctl(net, cmd, argp);
3139
3140 switch (cmd) {
3141 case SIOCSIFBR:
3142 case SIOCGIFBR:
3143 return old_bridge_ioctl(argp);
3144 case SIOCGIFNAME:
3145 return dev_ifname32(net, argp);
3146 case SIOCGIFCONF:
3147 return dev_ifconf(net, argp);
3148 case SIOCETHTOOL:
3149 return ethtool_ioctl(net, argp);
7a50a240
AB
3150 case SIOCWANDEV:
3151 return compat_siocwandev(net, argp);
a2116ed2
AB
3152 case SIOCGIFMAP:
3153 case SIOCSIFMAP:
3154 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3155 case SIOCBONDENSLAVE:
3156 case SIOCBONDRELEASE:
3157 case SIOCBONDSETHWADDR:
3158 case SIOCBONDSLAVEINFOQUERY:
3159 case SIOCBONDINFOQUERY:
3160 case SIOCBONDCHANGEACTIVE:
3161 return bond_ioctl(net, cmd, argp);
3162 case SIOCADDRT:
3163 case SIOCDELRT:
3164 return routing_ioctl(net, sock, cmd, argp);
3165 case SIOCGSTAMP:
3166 return do_siocgstamp(net, sock, cmd, argp);
3167 case SIOCGSTAMPNS:
3168 return do_siocgstampns(net, sock, cmd, argp);
a2116ed2
AB
3169 case SIOCSHWTSTAMP:
3170 return compat_siocshwtstamp(net, argp);
6b96018b
AB
3171
3172 case FIOSETOWN:
3173 case SIOCSPGRP:
3174 case FIOGETOWN:
3175 case SIOCGPGRP:
3176 case SIOCBRADDBR:
3177 case SIOCBRDELBR:
3178 case SIOCGIFVLAN:
3179 case SIOCSIFVLAN:
3180 case SIOCADDDLCI:
3181 case SIOCDELDLCI:
3182 return sock_ioctl(file, cmd, arg);
3183
3184 case SIOCGIFFLAGS:
3185 case SIOCSIFFLAGS:
3186 case SIOCGIFMETRIC:
3187 case SIOCSIFMETRIC:
3188 case SIOCGIFMTU:
3189 case SIOCSIFMTU:
3190 case SIOCGIFMEM:
3191 case SIOCSIFMEM:
3192 case SIOCGIFHWADDR:
3193 case SIOCSIFHWADDR:
3194 case SIOCADDMULTI:
3195 case SIOCDELMULTI:
3196 case SIOCGIFINDEX:
6b96018b
AB
3197 case SIOCGIFADDR:
3198 case SIOCSIFADDR:
3199 case SIOCSIFHWBROADCAST:
6b96018b 3200 case SIOCDIFADDR:
6b96018b
AB
3201 case SIOCGIFBRDADDR:
3202 case SIOCSIFBRDADDR:
3203 case SIOCGIFDSTADDR:
3204 case SIOCSIFDSTADDR:
3205 case SIOCGIFNETMASK:
3206 case SIOCSIFNETMASK:
3207 case SIOCSIFPFLAGS:
3208 case SIOCGIFPFLAGS:
3209 case SIOCGIFTXQLEN:
3210 case SIOCSIFTXQLEN:
3211 case SIOCBRADDIF:
3212 case SIOCBRDELIF:
9177efd3
AB
3213 case SIOCSIFNAME:
3214 case SIOCGMIIPHY:
3215 case SIOCGMIIREG:
3216 case SIOCSMIIREG:
6b96018b 3217 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3218
6b96018b
AB
3219 case SIOCSARP:
3220 case SIOCGARP:
3221 case SIOCDARP:
6b96018b 3222 case SIOCATMARK:
9177efd3
AB
3223 return sock_do_ioctl(net, sock, cmd, arg);
3224 }
3225
6b96018b
AB
3226 return -ENOIOCTLCMD;
3227}
7a229387 3228
95c96174 3229static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3230 unsigned long arg)
89bbfc95
SP
3231{
3232 struct socket *sock = file->private_data;
3233 int ret = -ENOIOCTLCMD;
87de87d5
DM
3234 struct sock *sk;
3235 struct net *net;
3236
3237 sk = sock->sk;
3238 net = sock_net(sk);
89bbfc95
SP
3239
3240 if (sock->ops->compat_ioctl)
3241 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3242
87de87d5
DM
3243 if (ret == -ENOIOCTLCMD &&
3244 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3245 ret = compat_wext_handle_ioctl(net, cmd, arg);
3246
6b96018b
AB
3247 if (ret == -ENOIOCTLCMD)
3248 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3249
89bbfc95
SP
3250 return ret;
3251}
3252#endif
3253
ac5a488e
SS
3254int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3255{
3256 return sock->ops->bind(sock, addr, addrlen);
3257}
c6d409cf 3258EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3259
3260int kernel_listen(struct socket *sock, int backlog)
3261{
3262 return sock->ops->listen(sock, backlog);
3263}
c6d409cf 3264EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3265
3266int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3267{
3268 struct sock *sk = sock->sk;
3269 int err;
3270
3271 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3272 newsock);
3273 if (err < 0)
3274 goto done;
3275
3276 err = sock->ops->accept(sock, *newsock, flags);
3277 if (err < 0) {
3278 sock_release(*newsock);
fa8705b0 3279 *newsock = NULL;
ac5a488e
SS
3280 goto done;
3281 }
3282
3283 (*newsock)->ops = sock->ops;
1b08534e 3284 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3285
3286done:
3287 return err;
3288}
c6d409cf 3289EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3290
3291int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3292 int flags)
ac5a488e
SS
3293{
3294 return sock->ops->connect(sock, addr, addrlen, flags);
3295}
c6d409cf 3296EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3297
3298int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3299 int *addrlen)
3300{
3301 return sock->ops->getname(sock, addr, addrlen, 0);
3302}
c6d409cf 3303EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3304
3305int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3306 int *addrlen)
3307{
3308 return sock->ops->getname(sock, addr, addrlen, 1);
3309}
c6d409cf 3310EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3311
3312int kernel_getsockopt(struct socket *sock, int level, int optname,
3313 char *optval, int *optlen)
3314{
3315 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3316 char __user *uoptval;
3317 int __user *uoptlen;
ac5a488e
SS
3318 int err;
3319
fb8621bb
NK
3320 uoptval = (char __user __force *) optval;
3321 uoptlen = (int __user __force *) optlen;
3322
ac5a488e
SS
3323 set_fs(KERNEL_DS);
3324 if (level == SOL_SOCKET)
fb8621bb 3325 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3326 else
fb8621bb
NK
3327 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3328 uoptlen);
ac5a488e
SS
3329 set_fs(oldfs);
3330 return err;
3331}
c6d409cf 3332EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3333
3334int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3335 char *optval, unsigned int optlen)
ac5a488e
SS
3336{
3337 mm_segment_t oldfs = get_fs();
fb8621bb 3338 char __user *uoptval;
ac5a488e
SS
3339 int err;
3340
fb8621bb
NK
3341 uoptval = (char __user __force *) optval;
3342
ac5a488e
SS
3343 set_fs(KERNEL_DS);
3344 if (level == SOL_SOCKET)
fb8621bb 3345 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3346 else
fb8621bb 3347 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3348 optlen);
3349 set_fs(oldfs);
3350 return err;
3351}
c6d409cf 3352EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3353
3354int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3355 size_t size, int flags)
3356{
f8451725
HX
3357 sock_update_classid(sock->sk);
3358
ac5a488e
SS
3359 if (sock->ops->sendpage)
3360 return sock->ops->sendpage(sock, page, offset, size, flags);
3361
3362 return sock_no_sendpage(sock, page, offset, size, flags);
3363}
c6d409cf 3364EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3365
3366int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3367{
3368 mm_segment_t oldfs = get_fs();
3369 int err;
3370
3371 set_fs(KERNEL_DS);
3372 err = sock->ops->ioctl(sock, cmd, arg);
3373 set_fs(oldfs);
3374
3375 return err;
3376}
c6d409cf 3377EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3378
91cf45f0
TM
3379int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3380{
3381 return sock->ops->shutdown(sock, how);
3382}
91cf45f0 3383EXPORT_SYMBOL(kernel_sock_shutdown);