net sysctl: Register an empty /proc/sys/net
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4
LT
72#include <linux/wanrouter.h>
73#include <linux/if_bridge.h>
20380731
ACM
74#include <linux/if_frad.h>
75#include <linux/if_vlan.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
1da177e4
LT
91
92#include <asm/uaccess.h>
93#include <asm/unistd.h>
94
95#include <net/compat.h>
87de87d5 96#include <net/wext.h>
f8451725 97#include <net/cls_cgroup.h>
1da177e4
LT
98
99#include <net/sock.h>
100#include <linux/netfilter.h>
101
6b96018b
AB
102#include <linux/if_tun.h>
103#include <linux/ipv6_route.h>
104#include <linux/route.h>
6b96018b
AB
105#include <linux/sockios.h>
106#include <linux/atalk.h>
107
1da177e4 108static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
109static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
110 unsigned long nr_segs, loff_t pos);
111static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
112 unsigned long nr_segs, loff_t pos);
89bddce5 113static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
114
115static int sock_close(struct inode *inode, struct file *file);
116static unsigned int sock_poll(struct file *file,
117 struct poll_table_struct *wait);
89bddce5 118static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
119#ifdef CONFIG_COMPAT
120static long compat_sock_ioctl(struct file *file,
89bddce5 121 unsigned int cmd, unsigned long arg);
89bbfc95 122#endif
1da177e4 123static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
124static ssize_t sock_sendpage(struct file *file, struct page *page,
125 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 126static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 127 struct pipe_inode_info *pipe, size_t len,
9c55e01c 128 unsigned int flags);
1da177e4 129
1da177e4
LT
130/*
131 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
132 * in the operation structures but are done directly via the socketcall() multiplexor.
133 */
134
da7071d7 135static const struct file_operations socket_file_ops = {
1da177e4
LT
136 .owner = THIS_MODULE,
137 .llseek = no_llseek,
138 .aio_read = sock_aio_read,
139 .aio_write = sock_aio_write,
140 .poll = sock_poll,
141 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
142#ifdef CONFIG_COMPAT
143 .compat_ioctl = compat_sock_ioctl,
144#endif
1da177e4
LT
145 .mmap = sock_mmap,
146 .open = sock_no_open, /* special open code to disallow open via /proc */
147 .release = sock_close,
148 .fasync = sock_fasync,
5274f052
JA
149 .sendpage = sock_sendpage,
150 .splice_write = generic_splice_sendpage,
9c55e01c 151 .splice_read = sock_splice_read,
1da177e4
LT
152};
153
154/*
155 * The protocol list. Each protocol is registered in here.
156 */
157
1da177e4 158static DEFINE_SPINLOCK(net_family_lock);
190683a9 159static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 160
1da177e4
LT
161/*
162 * Statistics counters of the socket lists
163 */
164
c6d409cf 165static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
166
167/*
89bddce5
SH
168 * Support routines.
169 * Move socket addresses back and forth across the kernel/user
170 * divide and look after the messy bits.
1da177e4
LT
171 */
172
1da177e4
LT
173/**
174 * move_addr_to_kernel - copy a socket address into kernel space
175 * @uaddr: Address in user space
176 * @kaddr: Address in kernel space
177 * @ulen: Length in user space
178 *
179 * The address is copied into kernel space. If the provided address is
180 * too long an error code of -EINVAL is returned. If the copy gives
181 * invalid addresses -EFAULT is returned. On a success 0 is returned.
182 */
183
43db362d 184int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 185{
230b1839 186 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 187 return -EINVAL;
89bddce5 188 if (ulen == 0)
1da177e4 189 return 0;
89bddce5 190 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 191 return -EFAULT;
3ec3b2fb 192 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
193}
194
195/**
196 * move_addr_to_user - copy an address to user space
197 * @kaddr: kernel space address
198 * @klen: length of address in kernel
199 * @uaddr: user space address
200 * @ulen: pointer to user length field
201 *
202 * The value pointed to by ulen on entry is the buffer length available.
203 * This is overwritten with the buffer space used. -EINVAL is returned
204 * if an overlong buffer is specified or a negative buffer size. -EFAULT
205 * is returned if either the buffer or the length field are not
206 * accessible.
207 * After copying the data up to the limit the user specifies, the true
208 * length of the data is written over the length limit the user
209 * specified. Zero is returned for a success.
210 */
89bddce5 211
43db362d 212static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 213 void __user *uaddr, int __user *ulen)
1da177e4
LT
214{
215 int err;
216 int len;
217
89bddce5
SH
218 err = get_user(len, ulen);
219 if (err)
1da177e4 220 return err;
89bddce5
SH
221 if (len > klen)
222 len = klen;
230b1839 223 if (len < 0 || len > sizeof(struct sockaddr_storage))
1da177e4 224 return -EINVAL;
89bddce5 225 if (len) {
d6fe3945
SG
226 if (audit_sockaddr(klen, kaddr))
227 return -ENOMEM;
89bddce5 228 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
229 return -EFAULT;
230 }
231 /*
89bddce5
SH
232 * "fromlen shall refer to the value before truncation.."
233 * 1003.1g
1da177e4
LT
234 */
235 return __put_user(klen, ulen);
236}
237
e18b890b 238static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
239
240static struct inode *sock_alloc_inode(struct super_block *sb)
241{
242 struct socket_alloc *ei;
eaefd110 243 struct socket_wq *wq;
89bddce5 244
e94b1766 245 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
246 if (!ei)
247 return NULL;
eaefd110
ED
248 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
249 if (!wq) {
43815482
ED
250 kmem_cache_free(sock_inode_cachep, ei);
251 return NULL;
252 }
eaefd110
ED
253 init_waitqueue_head(&wq->wait);
254 wq->fasync_list = NULL;
255 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 256
1da177e4
LT
257 ei->socket.state = SS_UNCONNECTED;
258 ei->socket.flags = 0;
259 ei->socket.ops = NULL;
260 ei->socket.sk = NULL;
261 ei->socket.file = NULL;
1da177e4
LT
262
263 return &ei->vfs_inode;
264}
265
266static void sock_destroy_inode(struct inode *inode)
267{
43815482 268 struct socket_alloc *ei;
eaefd110 269 struct socket_wq *wq;
43815482
ED
270
271 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 272 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 273 kfree_rcu(wq, rcu);
43815482 274 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
275}
276
51cc5068 277static void init_once(void *foo)
1da177e4 278{
89bddce5 279 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 280
a35afb83 281 inode_init_once(&ei->vfs_inode);
1da177e4 282}
89bddce5 283
1da177e4
LT
284static int init_inodecache(void)
285{
286 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
287 sizeof(struct socket_alloc),
288 0,
289 (SLAB_HWCACHE_ALIGN |
290 SLAB_RECLAIM_ACCOUNT |
291 SLAB_MEM_SPREAD),
20c2df83 292 init_once);
1da177e4
LT
293 if (sock_inode_cachep == NULL)
294 return -ENOMEM;
295 return 0;
296}
297
b87221de 298static const struct super_operations sockfs_ops = {
c6d409cf
ED
299 .alloc_inode = sock_alloc_inode,
300 .destroy_inode = sock_destroy_inode,
301 .statfs = simple_statfs,
1da177e4
LT
302};
303
c23fbb6b
ED
304/*
305 * sockfs_dname() is called from d_path().
306 */
307static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
308{
309 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
310 dentry->d_inode->i_ino);
311}
312
3ba13d17 313static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 314 .d_dname = sockfs_dname,
1da177e4
LT
315};
316
c74a1cbb
AV
317static struct dentry *sockfs_mount(struct file_system_type *fs_type,
318 int flags, const char *dev_name, void *data)
319{
320 return mount_pseudo(fs_type, "socket:", &sockfs_ops,
321 &sockfs_dentry_operations, SOCKFS_MAGIC);
322}
323
324static struct vfsmount *sock_mnt __read_mostly;
325
326static struct file_system_type sock_fs_type = {
327 .name = "sockfs",
328 .mount = sockfs_mount,
329 .kill_sb = kill_anon_super,
330};
331
1da177e4
LT
332/*
333 * Obtains the first available file descriptor and sets it up for use.
334 *
39d8c1b6
DM
335 * These functions create file structures and maps them to fd space
336 * of the current process. On success it returns file descriptor
1da177e4
LT
337 * and file struct implicitly stored in sock->file.
338 * Note that another thread may close file descriptor before we return
339 * from this function. We use the fact that now we do not refer
340 * to socket after mapping. If one day we will need it, this
341 * function will increment ref. count on file by 1.
342 *
343 * In any case returned fd MAY BE not valid!
344 * This race condition is unavoidable
345 * with shared fd spaces, we cannot solve it inside kernel,
346 * but we take care of internal coherence yet.
347 */
348
7cbe66b6 349static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
1da177e4 350{
7cbe66b6 351 struct qstr name = { .name = "" };
2c48b9c4 352 struct path path;
7cbe66b6 353 struct file *file;
1da177e4 354 int fd;
1da177e4 355
a677a039 356 fd = get_unused_fd_flags(flags);
7cbe66b6
AV
357 if (unlikely(fd < 0))
358 return fd;
1da177e4 359
4b936885 360 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
2c48b9c4 361 if (unlikely(!path.dentry)) {
7cbe66b6 362 put_unused_fd(fd);
39d8c1b6 363 return -ENOMEM;
7cbe66b6 364 }
2c48b9c4 365 path.mnt = mntget(sock_mnt);
39d8c1b6 366
2c48b9c4 367 d_instantiate(path.dentry, SOCK_INODE(sock));
cc3808f8 368 SOCK_INODE(sock)->i_fop = &socket_file_ops;
39d8c1b6 369
2c48b9c4 370 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 371 &socket_file_ops);
cc3808f8
AV
372 if (unlikely(!file)) {
373 /* drop dentry, keep inode */
7de9c6ee 374 ihold(path.dentry->d_inode);
2c48b9c4 375 path_put(&path);
cc3808f8
AV
376 put_unused_fd(fd);
377 return -ENFILE;
378 }
379
380 sock->file = file;
77d27200 381 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6
DM
382 file->f_pos = 0;
383 file->private_data = sock;
1da177e4 384
7cbe66b6
AV
385 *f = file;
386 return fd;
39d8c1b6
DM
387}
388
a677a039 389int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
390{
391 struct file *newfile;
7cbe66b6 392 int fd = sock_alloc_file(sock, &newfile, flags);
39d8c1b6 393
7cbe66b6 394 if (likely(fd >= 0))
39d8c1b6 395 fd_install(fd, newfile);
7cbe66b6 396
1da177e4
LT
397 return fd;
398}
c6d409cf 399EXPORT_SYMBOL(sock_map_fd);
1da177e4 400
6cb153ca
BL
401static struct socket *sock_from_file(struct file *file, int *err)
402{
6cb153ca
BL
403 if (file->f_op == &socket_file_ops)
404 return file->private_data; /* set in sock_map_fd */
405
23bb80d2
ED
406 *err = -ENOTSOCK;
407 return NULL;
6cb153ca
BL
408}
409
1da177e4 410/**
c6d409cf 411 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
412 * @fd: file handle
413 * @err: pointer to an error code return
414 *
415 * The file handle passed in is locked and the socket it is bound
416 * too is returned. If an error occurs the err pointer is overwritten
417 * with a negative errno code and NULL is returned. The function checks
418 * for both invalid handles and passing a handle which is not a socket.
419 *
420 * On a success the socket object pointer is returned.
421 */
422
423struct socket *sockfd_lookup(int fd, int *err)
424{
425 struct file *file;
1da177e4
LT
426 struct socket *sock;
427
89bddce5
SH
428 file = fget(fd);
429 if (!file) {
1da177e4
LT
430 *err = -EBADF;
431 return NULL;
432 }
89bddce5 433
6cb153ca
BL
434 sock = sock_from_file(file, err);
435 if (!sock)
1da177e4 436 fput(file);
6cb153ca
BL
437 return sock;
438}
c6d409cf 439EXPORT_SYMBOL(sockfd_lookup);
1da177e4 440
6cb153ca
BL
441static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
442{
443 struct file *file;
444 struct socket *sock;
445
3672558c 446 *err = -EBADF;
6cb153ca
BL
447 file = fget_light(fd, fput_needed);
448 if (file) {
449 sock = sock_from_file(file, err);
450 if (sock)
451 return sock;
452 fput_light(file, *fput_needed);
1da177e4 453 }
6cb153ca 454 return NULL;
1da177e4
LT
455}
456
457/**
458 * sock_alloc - allocate a socket
89bddce5 459 *
1da177e4
LT
460 * Allocate a new inode and socket object. The two are bound together
461 * and initialised. The socket is then returned. If we are out of inodes
462 * NULL is returned.
463 */
464
465static struct socket *sock_alloc(void)
466{
89bddce5
SH
467 struct inode *inode;
468 struct socket *sock;
1da177e4 469
a209dfc7 470 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
471 if (!inode)
472 return NULL;
473
474 sock = SOCKET_I(inode);
475
29a020d3 476 kmemcheck_annotate_bitfield(sock, type);
85fe4025 477 inode->i_ino = get_next_ino();
89bddce5 478 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
479 inode->i_uid = current_fsuid();
480 inode->i_gid = current_fsgid();
1da177e4 481
4e69489a 482 percpu_add(sockets_in_use, 1);
1da177e4
LT
483 return sock;
484}
485
486/*
487 * In theory you can't get an open on this inode, but /proc provides
488 * a back door. Remember to keep it shut otherwise you'll let the
489 * creepy crawlies in.
490 */
89bddce5 491
1da177e4
LT
492static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
493{
494 return -ENXIO;
495}
496
4b6f5d20 497const struct file_operations bad_sock_fops = {
1da177e4
LT
498 .owner = THIS_MODULE,
499 .open = sock_no_open,
6038f373 500 .llseek = noop_llseek,
1da177e4
LT
501};
502
503/**
504 * sock_release - close a socket
505 * @sock: socket to close
506 *
507 * The socket is released from the protocol stack if it has a release
508 * callback, and the inode is then released if the socket is bound to
89bddce5 509 * an inode not a file.
1da177e4 510 */
89bddce5 511
1da177e4
LT
512void sock_release(struct socket *sock)
513{
514 if (sock->ops) {
515 struct module *owner = sock->ops->owner;
516
517 sock->ops->release(sock);
518 sock->ops = NULL;
519 module_put(owner);
520 }
521
eaefd110 522 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
1da177e4
LT
523 printk(KERN_ERR "sock_release: fasync list not empty!\n");
524
4e69489a 525 percpu_sub(sockets_in_use, 1);
1da177e4
LT
526 if (!sock->file) {
527 iput(SOCK_INODE(sock));
528 return;
529 }
89bddce5 530 sock->file = NULL;
1da177e4 531}
c6d409cf 532EXPORT_SYMBOL(sock_release);
1da177e4 533
2244d07b 534int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
20d49473 535{
2244d07b 536 *tx_flags = 0;
20d49473 537 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
2244d07b 538 *tx_flags |= SKBTX_HW_TSTAMP;
20d49473 539 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
2244d07b 540 *tx_flags |= SKBTX_SW_TSTAMP;
6e3e939f
JB
541 if (sock_flag(sk, SOCK_WIFI_STATUS))
542 *tx_flags |= SKBTX_WIFI_STATUS;
20d49473
PO
543 return 0;
544}
545EXPORT_SYMBOL(sock_tx_timestamp);
546
228e548e
AB
547static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
548 struct msghdr *msg, size_t size)
1da177e4
LT
549{
550 struct sock_iocb *si = kiocb_to_siocb(iocb);
1da177e4 551
f8451725
HX
552 sock_update_classid(sock->sk);
553
5bc1421e
NH
554 sock_update_netprioidx(sock->sk);
555
1da177e4
LT
556 si->sock = sock;
557 si->scm = NULL;
558 si->msg = msg;
559 si->size = size;
560
1da177e4
LT
561 return sock->ops->sendmsg(iocb, sock, msg, size);
562}
563
228e548e
AB
564static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
565 struct msghdr *msg, size_t size)
566{
567 int err = security_socket_sendmsg(sock, msg, size);
568
569 return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
570}
571
1da177e4
LT
572int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
573{
574 struct kiocb iocb;
575 struct sock_iocb siocb;
576 int ret;
577
578 init_sync_kiocb(&iocb, NULL);
579 iocb.private = &siocb;
580 ret = __sock_sendmsg(&iocb, sock, msg, size);
581 if (-EIOCBQUEUED == ret)
582 ret = wait_on_sync_kiocb(&iocb);
583 return ret;
584}
c6d409cf 585EXPORT_SYMBOL(sock_sendmsg);
1da177e4 586
894dc24c 587static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
228e548e
AB
588{
589 struct kiocb iocb;
590 struct sock_iocb siocb;
591 int ret;
592
593 init_sync_kiocb(&iocb, NULL);
594 iocb.private = &siocb;
595 ret = __sock_sendmsg_nosec(&iocb, sock, msg, size);
596 if (-EIOCBQUEUED == ret)
597 ret = wait_on_sync_kiocb(&iocb);
598 return ret;
599}
600
1da177e4
LT
601int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
602 struct kvec *vec, size_t num, size_t size)
603{
604 mm_segment_t oldfs = get_fs();
605 int result;
606
607 set_fs(KERNEL_DS);
608 /*
609 * the following is safe, since for compiler definitions of kvec and
610 * iovec are identical, yielding the same in-core layout and alignment
611 */
89bddce5 612 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
613 msg->msg_iovlen = num;
614 result = sock_sendmsg(sock, msg, size);
615 set_fs(oldfs);
616 return result;
617}
c6d409cf 618EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 619
20d49473
PO
620static int ktime2ts(ktime_t kt, struct timespec *ts)
621{
622 if (kt.tv64) {
623 *ts = ktime_to_timespec(kt);
624 return 1;
625 } else {
626 return 0;
627 }
628}
629
92f37fd2
ED
630/*
631 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
632 */
633void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
634 struct sk_buff *skb)
635{
20d49473
PO
636 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
637 struct timespec ts[3];
638 int empty = 1;
639 struct skb_shared_hwtstamps *shhwtstamps =
640 skb_hwtstamps(skb);
641
642 /* Race occurred between timestamp enabling and packet
643 receiving. Fill in the current time for now. */
644 if (need_software_tstamp && skb->tstamp.tv64 == 0)
645 __net_timestamp(skb);
646
647 if (need_software_tstamp) {
648 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
649 struct timeval tv;
650 skb_get_timestamp(skb, &tv);
651 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
652 sizeof(tv), &tv);
653 } else {
842509b8 654 skb_get_timestampns(skb, &ts[0]);
20d49473 655 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
842509b8 656 sizeof(ts[0]), &ts[0]);
20d49473
PO
657 }
658 }
659
660
661 memset(ts, 0, sizeof(ts));
662 if (skb->tstamp.tv64 &&
663 sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) {
664 skb_get_timestampns(skb, ts + 0);
665 empty = 0;
666 }
667 if (shhwtstamps) {
668 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
669 ktime2ts(shhwtstamps->syststamp, ts + 1))
670 empty = 0;
671 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
672 ktime2ts(shhwtstamps->hwtstamp, ts + 2))
673 empty = 0;
92f37fd2 674 }
20d49473
PO
675 if (!empty)
676 put_cmsg(msg, SOL_SOCKET,
677 SCM_TIMESTAMPING, sizeof(ts), &ts);
92f37fd2 678}
7c81fd8b
ACM
679EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
680
6e3e939f
JB
681void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
682 struct sk_buff *skb)
683{
684 int ack;
685
686 if (!sock_flag(sk, SOCK_WIFI_STATUS))
687 return;
688 if (!skb->wifi_acked_valid)
689 return;
690
691 ack = skb->wifi_acked;
692
693 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
694}
695EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
696
11165f14 697static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
698 struct sk_buff *skb)
3b885787
NH
699{
700 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
701 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
702 sizeof(__u32), &skb->dropcount);
703}
704
767dd033 705void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
706 struct sk_buff *skb)
707{
708 sock_recv_timestamp(msg, sk, skb);
709 sock_recv_drops(msg, sk, skb);
710}
767dd033 711EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 712
a2e27255
ACM
713static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
714 struct msghdr *msg, size_t size, int flags)
1da177e4 715{
1da177e4
LT
716 struct sock_iocb *si = kiocb_to_siocb(iocb);
717
f8451725
HX
718 sock_update_classid(sock->sk);
719
1da177e4
LT
720 si->sock = sock;
721 si->scm = NULL;
722 si->msg = msg;
723 si->size = size;
724 si->flags = flags;
725
1da177e4
LT
726 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
727}
728
a2e27255
ACM
729static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
730 struct msghdr *msg, size_t size, int flags)
731{
732 int err = security_socket_recvmsg(sock, msg, size, flags);
733
734 return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
735}
736
89bddce5 737int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
738 size_t size, int flags)
739{
740 struct kiocb iocb;
741 struct sock_iocb siocb;
742 int ret;
743
89bddce5 744 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
745 iocb.private = &siocb;
746 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
747 if (-EIOCBQUEUED == ret)
748 ret = wait_on_sync_kiocb(&iocb);
749 return ret;
750}
c6d409cf 751EXPORT_SYMBOL(sock_recvmsg);
1da177e4 752
a2e27255
ACM
753static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
754 size_t size, int flags)
755{
756 struct kiocb iocb;
757 struct sock_iocb siocb;
758 int ret;
759
760 init_sync_kiocb(&iocb, NULL);
761 iocb.private = &siocb;
762 ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
763 if (-EIOCBQUEUED == ret)
764 ret = wait_on_sync_kiocb(&iocb);
765 return ret;
766}
767
c1249c0a
ML
768/**
769 * kernel_recvmsg - Receive a message from a socket (kernel space)
770 * @sock: The socket to receive the message from
771 * @msg: Received message
772 * @vec: Input s/g array for message data
773 * @num: Size of input s/g array
774 * @size: Number of bytes to read
775 * @flags: Message flags (MSG_DONTWAIT, etc...)
776 *
777 * On return the msg structure contains the scatter/gather array passed in the
778 * vec argument. The array is modified so that it consists of the unfilled
779 * portion of the original array.
780 *
781 * The returned value is the total number of bytes received, or an error.
782 */
89bddce5
SH
783int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
784 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
785{
786 mm_segment_t oldfs = get_fs();
787 int result;
788
789 set_fs(KERNEL_DS);
790 /*
791 * the following is safe, since for compiler definitions of kvec and
792 * iovec are identical, yielding the same in-core layout and alignment
793 */
89bddce5 794 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
795 result = sock_recvmsg(sock, msg, size, flags);
796 set_fs(oldfs);
797 return result;
798}
c6d409cf 799EXPORT_SYMBOL(kernel_recvmsg);
1da177e4
LT
800
801static void sock_aio_dtor(struct kiocb *iocb)
802{
803 kfree(iocb->private);
804}
805
ce1d4d3e
CH
806static ssize_t sock_sendpage(struct file *file, struct page *page,
807 int offset, size_t size, loff_t *ppos, int more)
1da177e4 808{
1da177e4
LT
809 struct socket *sock;
810 int flags;
811
ce1d4d3e
CH
812 sock = file->private_data;
813
35f9c09f
ED
814 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
815 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
816 flags |= more;
ce1d4d3e 817
e6949583 818 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 819}
1da177e4 820
9c55e01c 821static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 822 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
823 unsigned int flags)
824{
825 struct socket *sock = file->private_data;
826
997b37da
RDC
827 if (unlikely(!sock->ops->splice_read))
828 return -EINVAL;
829
f8451725
HX
830 sock_update_classid(sock->sk);
831
9c55e01c
JA
832 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
833}
834
ce1d4d3e 835static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 836 struct sock_iocb *siocb)
ce1d4d3e
CH
837{
838 if (!is_sync_kiocb(iocb)) {
839 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
840 if (!siocb)
841 return NULL;
1da177e4
LT
842 iocb->ki_dtor = sock_aio_dtor;
843 }
1da177e4 844
ce1d4d3e 845 siocb->kiocb = iocb;
ce1d4d3e
CH
846 iocb->private = siocb;
847 return siocb;
1da177e4
LT
848}
849
ce1d4d3e 850static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
851 struct file *file, const struct iovec *iov,
852 unsigned long nr_segs)
ce1d4d3e
CH
853{
854 struct socket *sock = file->private_data;
855 size_t size = 0;
856 int i;
1da177e4 857
89bddce5
SH
858 for (i = 0; i < nr_segs; i++)
859 size += iov[i].iov_len;
1da177e4 860
ce1d4d3e
CH
861 msg->msg_name = NULL;
862 msg->msg_namelen = 0;
863 msg->msg_control = NULL;
864 msg->msg_controllen = 0;
89bddce5 865 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
866 msg->msg_iovlen = nr_segs;
867 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
868
869 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
870}
871
027445c3
BP
872static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
873 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
874{
875 struct sock_iocb siocb, *x;
876
1da177e4
LT
877 if (pos != 0)
878 return -ESPIPE;
027445c3
BP
879
880 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
881 return 0;
882
027445c3
BP
883
884 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
885 if (!x)
886 return -ENOMEM;
027445c3 887 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
888}
889
ce1d4d3e 890static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
891 struct file *file, const struct iovec *iov,
892 unsigned long nr_segs)
1da177e4 893{
ce1d4d3e
CH
894 struct socket *sock = file->private_data;
895 size_t size = 0;
896 int i;
1da177e4 897
89bddce5
SH
898 for (i = 0; i < nr_segs; i++)
899 size += iov[i].iov_len;
1da177e4 900
ce1d4d3e
CH
901 msg->msg_name = NULL;
902 msg->msg_namelen = 0;
903 msg->msg_control = NULL;
904 msg->msg_controllen = 0;
89bddce5 905 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
906 msg->msg_iovlen = nr_segs;
907 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
908 if (sock->type == SOCK_SEQPACKET)
909 msg->msg_flags |= MSG_EOR;
1da177e4 910
ce1d4d3e 911 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
912}
913
027445c3
BP
914static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
915 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
916{
917 struct sock_iocb siocb, *x;
1da177e4 918
ce1d4d3e
CH
919 if (pos != 0)
920 return -ESPIPE;
027445c3 921
027445c3 922 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
923 if (!x)
924 return -ENOMEM;
1da177e4 925
027445c3 926 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
927}
928
1da177e4
LT
929/*
930 * Atomic setting of ioctl hooks to avoid race
931 * with module unload.
932 */
933
4a3e2f71 934static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 935static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 936
881d966b 937void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 938{
4a3e2f71 939 mutex_lock(&br_ioctl_mutex);
1da177e4 940 br_ioctl_hook = hook;
4a3e2f71 941 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
942}
943EXPORT_SYMBOL(brioctl_set);
944
4a3e2f71 945static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 946static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 947
881d966b 948void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 949{
4a3e2f71 950 mutex_lock(&vlan_ioctl_mutex);
1da177e4 951 vlan_ioctl_hook = hook;
4a3e2f71 952 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
953}
954EXPORT_SYMBOL(vlan_ioctl_set);
955
4a3e2f71 956static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 957static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 958
89bddce5 959void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 960{
4a3e2f71 961 mutex_lock(&dlci_ioctl_mutex);
1da177e4 962 dlci_ioctl_hook = hook;
4a3e2f71 963 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
964}
965EXPORT_SYMBOL(dlci_ioctl_set);
966
6b96018b
AB
967static long sock_do_ioctl(struct net *net, struct socket *sock,
968 unsigned int cmd, unsigned long arg)
969{
970 int err;
971 void __user *argp = (void __user *)arg;
972
973 err = sock->ops->ioctl(sock, cmd, arg);
974
975 /*
976 * If this ioctl is unknown try to hand it down
977 * to the NIC driver.
978 */
979 if (err == -ENOIOCTLCMD)
980 err = dev_ioctl(net, cmd, argp);
981
982 return err;
983}
984
1da177e4
LT
985/*
986 * With an ioctl, arg may well be a user mode pointer, but we don't know
987 * what to do with it - that's up to the protocol still.
988 */
989
990static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
991{
992 struct socket *sock;
881d966b 993 struct sock *sk;
1da177e4
LT
994 void __user *argp = (void __user *)arg;
995 int pid, err;
881d966b 996 struct net *net;
1da177e4 997
b69aee04 998 sock = file->private_data;
881d966b 999 sk = sock->sk;
3b1e0a65 1000 net = sock_net(sk);
1da177e4 1001 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 1002 err = dev_ioctl(net, cmd, argp);
1da177e4 1003 } else
3d23e349 1004#ifdef CONFIG_WEXT_CORE
1da177e4 1005 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 1006 err = dev_ioctl(net, cmd, argp);
1da177e4 1007 } else
3d23e349 1008#endif
89bddce5 1009 switch (cmd) {
1da177e4
LT
1010 case FIOSETOWN:
1011 case SIOCSPGRP:
1012 err = -EFAULT;
1013 if (get_user(pid, (int __user *)argp))
1014 break;
1015 err = f_setown(sock->file, pid, 1);
1016 break;
1017 case FIOGETOWN:
1018 case SIOCGPGRP:
609d7fa9 1019 err = put_user(f_getown(sock->file),
89bddce5 1020 (int __user *)argp);
1da177e4
LT
1021 break;
1022 case SIOCGIFBR:
1023 case SIOCSIFBR:
1024 case SIOCBRADDBR:
1025 case SIOCBRDELBR:
1026 err = -ENOPKG;
1027 if (!br_ioctl_hook)
1028 request_module("bridge");
1029
4a3e2f71 1030 mutex_lock(&br_ioctl_mutex);
89bddce5 1031 if (br_ioctl_hook)
881d966b 1032 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1033 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1034 break;
1035 case SIOCGIFVLAN:
1036 case SIOCSIFVLAN:
1037 err = -ENOPKG;
1038 if (!vlan_ioctl_hook)
1039 request_module("8021q");
1040
4a3e2f71 1041 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1042 if (vlan_ioctl_hook)
881d966b 1043 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1044 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1045 break;
1da177e4
LT
1046 case SIOCADDDLCI:
1047 case SIOCDELDLCI:
1048 err = -ENOPKG;
1049 if (!dlci_ioctl_hook)
1050 request_module("dlci");
1051
7512cbf6
PE
1052 mutex_lock(&dlci_ioctl_mutex);
1053 if (dlci_ioctl_hook)
1da177e4 1054 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1055 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1056 break;
1057 default:
6b96018b 1058 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1059 break;
89bddce5 1060 }
1da177e4
LT
1061 return err;
1062}
1063
1064int sock_create_lite(int family, int type, int protocol, struct socket **res)
1065{
1066 int err;
1067 struct socket *sock = NULL;
89bddce5 1068
1da177e4
LT
1069 err = security_socket_create(family, type, protocol, 1);
1070 if (err)
1071 goto out;
1072
1073 sock = sock_alloc();
1074 if (!sock) {
1075 err = -ENOMEM;
1076 goto out;
1077 }
1078
1da177e4 1079 sock->type = type;
7420ed23
VY
1080 err = security_socket_post_create(sock, family, type, protocol, 1);
1081 if (err)
1082 goto out_release;
1083
1da177e4
LT
1084out:
1085 *res = sock;
1086 return err;
7420ed23
VY
1087out_release:
1088 sock_release(sock);
1089 sock = NULL;
1090 goto out;
1da177e4 1091}
c6d409cf 1092EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1093
1094/* No kernel lock held - perfect */
89bddce5 1095static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
1096{
1097 struct socket *sock;
1098
1099 /*
89bddce5 1100 * We can't return errors to poll, so it's either yes or no.
1da177e4 1101 */
b69aee04 1102 sock = file->private_data;
1da177e4
LT
1103 return sock->ops->poll(file, sock, wait);
1104}
1105
89bddce5 1106static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1107{
b69aee04 1108 struct socket *sock = file->private_data;
1da177e4
LT
1109
1110 return sock->ops->mmap(file, sock, vma);
1111}
1112
20380731 1113static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
1114{
1115 /*
89bddce5
SH
1116 * It was possible the inode is NULL we were
1117 * closing an unfinished socket.
1da177e4
LT
1118 */
1119
89bddce5 1120 if (!inode) {
1da177e4
LT
1121 printk(KERN_DEBUG "sock_close: NULL inode\n");
1122 return 0;
1123 }
1da177e4
LT
1124 sock_release(SOCKET_I(inode));
1125 return 0;
1126}
1127
1128/*
1129 * Update the socket async list
1130 *
1131 * Fasync_list locking strategy.
1132 *
1133 * 1. fasync_list is modified only under process context socket lock
1134 * i.e. under semaphore.
1135 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1136 * or under socket lock
1da177e4
LT
1137 */
1138
1139static int sock_fasync(int fd, struct file *filp, int on)
1140{
989a2979
ED
1141 struct socket *sock = filp->private_data;
1142 struct sock *sk = sock->sk;
eaefd110 1143 struct socket_wq *wq;
1da177e4 1144
989a2979 1145 if (sk == NULL)
1da177e4 1146 return -EINVAL;
1da177e4
LT
1147
1148 lock_sock(sk);
eaefd110
ED
1149 wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
1150 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1151
eaefd110 1152 if (!wq->fasync_list)
989a2979
ED
1153 sock_reset_flag(sk, SOCK_FASYNC);
1154 else
bcdce719 1155 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1156
989a2979 1157 release_sock(sk);
1da177e4
LT
1158 return 0;
1159}
1160
43815482 1161/* This function may be called only under socket lock or callback_lock or rcu_lock */
1da177e4
LT
1162
1163int sock_wake_async(struct socket *sock, int how, int band)
1164{
43815482
ED
1165 struct socket_wq *wq;
1166
1167 if (!sock)
1168 return -1;
1169 rcu_read_lock();
1170 wq = rcu_dereference(sock->wq);
1171 if (!wq || !wq->fasync_list) {
1172 rcu_read_unlock();
1da177e4 1173 return -1;
43815482 1174 }
89bddce5 1175 switch (how) {
8d8ad9d7 1176 case SOCK_WAKE_WAITD:
1da177e4
LT
1177 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1178 break;
1179 goto call_kill;
8d8ad9d7 1180 case SOCK_WAKE_SPACE:
1da177e4
LT
1181 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1182 break;
1183 /* fall through */
8d8ad9d7 1184 case SOCK_WAKE_IO:
89bddce5 1185call_kill:
43815482 1186 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1187 break;
8d8ad9d7 1188 case SOCK_WAKE_URG:
43815482 1189 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1190 }
43815482 1191 rcu_read_unlock();
1da177e4
LT
1192 return 0;
1193}
c6d409cf 1194EXPORT_SYMBOL(sock_wake_async);
1da177e4 1195
721db93a 1196int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1197 struct socket **res, int kern)
1da177e4
LT
1198{
1199 int err;
1200 struct socket *sock;
55737fda 1201 const struct net_proto_family *pf;
1da177e4
LT
1202
1203 /*
89bddce5 1204 * Check protocol is in range
1da177e4
LT
1205 */
1206 if (family < 0 || family >= NPROTO)
1207 return -EAFNOSUPPORT;
1208 if (type < 0 || type >= SOCK_MAX)
1209 return -EINVAL;
1210
1211 /* Compatibility.
1212
1213 This uglymoron is moved from INET layer to here to avoid
1214 deadlock in module load.
1215 */
1216 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1217 static int warned;
1da177e4
LT
1218 if (!warned) {
1219 warned = 1;
89bddce5
SH
1220 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1221 current->comm);
1da177e4
LT
1222 }
1223 family = PF_PACKET;
1224 }
1225
1226 err = security_socket_create(family, type, protocol, kern);
1227 if (err)
1228 return err;
89bddce5 1229
55737fda
SH
1230 /*
1231 * Allocate the socket and allow the family to set things up. if
1232 * the protocol is 0, the family is instructed to select an appropriate
1233 * default.
1234 */
1235 sock = sock_alloc();
1236 if (!sock) {
1237 if (net_ratelimit())
1238 printk(KERN_WARNING "socket: no more sockets\n");
1239 return -ENFILE; /* Not exactly a match, but its the
1240 closest posix thing */
1241 }
1242
1243 sock->type = type;
1244
95a5afca 1245#ifdef CONFIG_MODULES
89bddce5
SH
1246 /* Attempt to load a protocol module if the find failed.
1247 *
1248 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1249 * requested real, full-featured networking support upon configuration.
1250 * Otherwise module support will break!
1251 */
190683a9 1252 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1253 request_module("net-pf-%d", family);
1da177e4
LT
1254#endif
1255
55737fda
SH
1256 rcu_read_lock();
1257 pf = rcu_dereference(net_families[family]);
1258 err = -EAFNOSUPPORT;
1259 if (!pf)
1260 goto out_release;
1da177e4
LT
1261
1262 /*
1263 * We will call the ->create function, that possibly is in a loadable
1264 * module, so we have to bump that loadable module refcnt first.
1265 */
55737fda 1266 if (!try_module_get(pf->owner))
1da177e4
LT
1267 goto out_release;
1268
55737fda
SH
1269 /* Now protected by module ref count */
1270 rcu_read_unlock();
1271
3f378b68 1272 err = pf->create(net, sock, protocol, kern);
55737fda 1273 if (err < 0)
1da177e4 1274 goto out_module_put;
a79af59e 1275
1da177e4
LT
1276 /*
1277 * Now to bump the refcnt of the [loadable] module that owns this
1278 * socket at sock_release time we decrement its refcnt.
1279 */
55737fda
SH
1280 if (!try_module_get(sock->ops->owner))
1281 goto out_module_busy;
1282
1da177e4
LT
1283 /*
1284 * Now that we're done with the ->create function, the [loadable]
1285 * module can have its refcnt decremented
1286 */
55737fda 1287 module_put(pf->owner);
7420ed23
VY
1288 err = security_socket_post_create(sock, family, type, protocol, kern);
1289 if (err)
3b185525 1290 goto out_sock_release;
55737fda 1291 *res = sock;
1da177e4 1292
55737fda
SH
1293 return 0;
1294
1295out_module_busy:
1296 err = -EAFNOSUPPORT;
1da177e4 1297out_module_put:
55737fda
SH
1298 sock->ops = NULL;
1299 module_put(pf->owner);
1300out_sock_release:
1da177e4 1301 sock_release(sock);
55737fda
SH
1302 return err;
1303
1304out_release:
1305 rcu_read_unlock();
1306 goto out_sock_release;
1da177e4 1307}
721db93a 1308EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1309
1310int sock_create(int family, int type, int protocol, struct socket **res)
1311{
1b8d7ae4 1312 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1313}
c6d409cf 1314EXPORT_SYMBOL(sock_create);
1da177e4
LT
1315
1316int sock_create_kern(int family, int type, int protocol, struct socket **res)
1317{
1b8d7ae4 1318 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4 1319}
c6d409cf 1320EXPORT_SYMBOL(sock_create_kern);
1da177e4 1321
3e0fa65f 1322SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1323{
1324 int retval;
1325 struct socket *sock;
a677a039
UD
1326 int flags;
1327
e38b36f3
UD
1328 /* Check the SOCK_* constants for consistency. */
1329 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1330 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1331 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1332 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1333
a677a039 1334 flags = type & ~SOCK_TYPE_MASK;
77d27200 1335 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1336 return -EINVAL;
1337 type &= SOCK_TYPE_MASK;
1da177e4 1338
aaca0bdc
UD
1339 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1340 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1341
1da177e4
LT
1342 retval = sock_create(family, type, protocol, &sock);
1343 if (retval < 0)
1344 goto out;
1345
77d27200 1346 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1347 if (retval < 0)
1348 goto out_release;
1349
1350out:
1351 /* It may be already another descriptor 8) Not kernel problem. */
1352 return retval;
1353
1354out_release:
1355 sock_release(sock);
1356 return retval;
1357}
1358
1359/*
1360 * Create a pair of connected sockets.
1361 */
1362
3e0fa65f
HC
1363SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1364 int __user *, usockvec)
1da177e4
LT
1365{
1366 struct socket *sock1, *sock2;
1367 int fd1, fd2, err;
db349509 1368 struct file *newfile1, *newfile2;
a677a039
UD
1369 int flags;
1370
1371 flags = type & ~SOCK_TYPE_MASK;
77d27200 1372 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1373 return -EINVAL;
1374 type &= SOCK_TYPE_MASK;
1da177e4 1375
aaca0bdc
UD
1376 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1377 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1378
1da177e4
LT
1379 /*
1380 * Obtain the first socket and check if the underlying protocol
1381 * supports the socketpair call.
1382 */
1383
1384 err = sock_create(family, type, protocol, &sock1);
1385 if (err < 0)
1386 goto out;
1387
1388 err = sock_create(family, type, protocol, &sock2);
1389 if (err < 0)
1390 goto out_release_1;
1391
1392 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1393 if (err < 0)
1da177e4
LT
1394 goto out_release_both;
1395
7cbe66b6 1396 fd1 = sock_alloc_file(sock1, &newfile1, flags);
bf3c23d1
DM
1397 if (unlikely(fd1 < 0)) {
1398 err = fd1;
db349509 1399 goto out_release_both;
bf3c23d1 1400 }
1da177e4 1401
7cbe66b6 1402 fd2 = sock_alloc_file(sock2, &newfile2, flags);
198de4d7
AV
1403 if (unlikely(fd2 < 0)) {
1404 err = fd2;
1405 fput(newfile1);
1406 put_unused_fd(fd1);
1407 sock_release(sock2);
1408 goto out;
db349509
AV
1409 }
1410
157cf649 1411 audit_fd_pair(fd1, fd2);
db349509
AV
1412 fd_install(fd1, newfile1);
1413 fd_install(fd2, newfile2);
1da177e4
LT
1414 /* fd1 and fd2 may be already another descriptors.
1415 * Not kernel problem.
1416 */
1417
89bddce5 1418 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1419 if (!err)
1420 err = put_user(fd2, &usockvec[1]);
1421 if (!err)
1422 return 0;
1423
1424 sys_close(fd2);
1425 sys_close(fd1);
1426 return err;
1427
1da177e4 1428out_release_both:
89bddce5 1429 sock_release(sock2);
1da177e4 1430out_release_1:
89bddce5 1431 sock_release(sock1);
1da177e4
LT
1432out:
1433 return err;
1434}
1435
1da177e4
LT
1436/*
1437 * Bind a name to a socket. Nothing much to do here since it's
1438 * the protocol's responsibility to handle the local address.
1439 *
1440 * We move the socket address to kernel space before we call
1441 * the protocol layer (having also checked the address is ok).
1442 */
1443
20f37034 1444SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1445{
1446 struct socket *sock;
230b1839 1447 struct sockaddr_storage address;
6cb153ca 1448 int err, fput_needed;
1da177e4 1449
89bddce5 1450 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1451 if (sock) {
43db362d 1452 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1453 if (err >= 0) {
1454 err = security_socket_bind(sock,
230b1839 1455 (struct sockaddr *)&address,
89bddce5 1456 addrlen);
6cb153ca
BL
1457 if (!err)
1458 err = sock->ops->bind(sock,
89bddce5 1459 (struct sockaddr *)
230b1839 1460 &address, addrlen);
1da177e4 1461 }
6cb153ca 1462 fput_light(sock->file, fput_needed);
89bddce5 1463 }
1da177e4
LT
1464 return err;
1465}
1466
1da177e4
LT
1467/*
1468 * Perform a listen. Basically, we allow the protocol to do anything
1469 * necessary for a listen, and if that works, we mark the socket as
1470 * ready for listening.
1471 */
1472
3e0fa65f 1473SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1474{
1475 struct socket *sock;
6cb153ca 1476 int err, fput_needed;
b8e1f9b5 1477 int somaxconn;
89bddce5
SH
1478
1479 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1480 if (sock) {
8efa6e93 1481 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1482 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1483 backlog = somaxconn;
1da177e4
LT
1484
1485 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1486 if (!err)
1487 err = sock->ops->listen(sock, backlog);
1da177e4 1488
6cb153ca 1489 fput_light(sock->file, fput_needed);
1da177e4
LT
1490 }
1491 return err;
1492}
1493
1da177e4
LT
1494/*
1495 * For accept, we attempt to create a new socket, set up the link
1496 * with the client, wake up the client, then return the new
1497 * connected fd. We collect the address of the connector in kernel
1498 * space and move it to user at the very end. This is unclean because
1499 * we open the socket then return an error.
1500 *
1501 * 1003.1g adds the ability to recvmsg() to query connection pending
1502 * status to recvmsg. We need to add that support in a way thats
1503 * clean when we restucture accept also.
1504 */
1505
20f37034
HC
1506SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1507 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1508{
1509 struct socket *sock, *newsock;
39d8c1b6 1510 struct file *newfile;
6cb153ca 1511 int err, len, newfd, fput_needed;
230b1839 1512 struct sockaddr_storage address;
1da177e4 1513
77d27200 1514 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1515 return -EINVAL;
1516
1517 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1518 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1519
6cb153ca 1520 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1521 if (!sock)
1522 goto out;
1523
1524 err = -ENFILE;
c6d409cf
ED
1525 newsock = sock_alloc();
1526 if (!newsock)
1da177e4
LT
1527 goto out_put;
1528
1529 newsock->type = sock->type;
1530 newsock->ops = sock->ops;
1531
1da177e4
LT
1532 /*
1533 * We don't need try_module_get here, as the listening socket (sock)
1534 * has the protocol module (sock->ops->owner) held.
1535 */
1536 __module_get(newsock->ops->owner);
1537
7cbe66b6 1538 newfd = sock_alloc_file(newsock, &newfile, flags);
39d8c1b6
DM
1539 if (unlikely(newfd < 0)) {
1540 err = newfd;
9a1875e6
DM
1541 sock_release(newsock);
1542 goto out_put;
39d8c1b6
DM
1543 }
1544
a79af59e
FF
1545 err = security_socket_accept(sock, newsock);
1546 if (err)
39d8c1b6 1547 goto out_fd;
a79af59e 1548
1da177e4
LT
1549 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1550 if (err < 0)
39d8c1b6 1551 goto out_fd;
1da177e4
LT
1552
1553 if (upeer_sockaddr) {
230b1839 1554 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1555 &len, 2) < 0) {
1da177e4 1556 err = -ECONNABORTED;
39d8c1b6 1557 goto out_fd;
1da177e4 1558 }
43db362d 1559 err = move_addr_to_user(&address,
230b1839 1560 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1561 if (err < 0)
39d8c1b6 1562 goto out_fd;
1da177e4
LT
1563 }
1564
1565 /* File flags are not inherited via accept() unlike another OSes. */
1566
39d8c1b6
DM
1567 fd_install(newfd, newfile);
1568 err = newfd;
1da177e4 1569
1da177e4 1570out_put:
6cb153ca 1571 fput_light(sock->file, fput_needed);
1da177e4
LT
1572out:
1573 return err;
39d8c1b6 1574out_fd:
9606a216 1575 fput(newfile);
39d8c1b6 1576 put_unused_fd(newfd);
1da177e4
LT
1577 goto out_put;
1578}
1579
20f37034
HC
1580SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1581 int __user *, upeer_addrlen)
aaca0bdc 1582{
de11defe 1583 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1584}
1585
1da177e4
LT
1586/*
1587 * Attempt to connect to a socket with the server address. The address
1588 * is in user space so we verify it is OK and move it to kernel space.
1589 *
1590 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1591 * break bindings
1592 *
1593 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1594 * other SEQPACKET protocols that take time to connect() as it doesn't
1595 * include the -EINPROGRESS status for such sockets.
1596 */
1597
20f37034
HC
1598SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1599 int, addrlen)
1da177e4
LT
1600{
1601 struct socket *sock;
230b1839 1602 struct sockaddr_storage address;
6cb153ca 1603 int err, fput_needed;
1da177e4 1604
6cb153ca 1605 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1606 if (!sock)
1607 goto out;
43db362d 1608 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1609 if (err < 0)
1610 goto out_put;
1611
89bddce5 1612 err =
230b1839 1613 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1614 if (err)
1615 goto out_put;
1616
230b1839 1617 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1618 sock->file->f_flags);
1619out_put:
6cb153ca 1620 fput_light(sock->file, fput_needed);
1da177e4
LT
1621out:
1622 return err;
1623}
1624
1625/*
1626 * Get the local address ('name') of a socket object. Move the obtained
1627 * name to user space.
1628 */
1629
20f37034
HC
1630SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1631 int __user *, usockaddr_len)
1da177e4
LT
1632{
1633 struct socket *sock;
230b1839 1634 struct sockaddr_storage address;
6cb153ca 1635 int len, err, fput_needed;
89bddce5 1636
6cb153ca 1637 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1638 if (!sock)
1639 goto out;
1640
1641 err = security_socket_getsockname(sock);
1642 if (err)
1643 goto out_put;
1644
230b1839 1645 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1646 if (err)
1647 goto out_put;
43db362d 1648 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1649
1650out_put:
6cb153ca 1651 fput_light(sock->file, fput_needed);
1da177e4
LT
1652out:
1653 return err;
1654}
1655
1656/*
1657 * Get the remote address ('name') of a socket object. Move the obtained
1658 * name to user space.
1659 */
1660
20f37034
HC
1661SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1662 int __user *, usockaddr_len)
1da177e4
LT
1663{
1664 struct socket *sock;
230b1839 1665 struct sockaddr_storage address;
6cb153ca 1666 int len, err, fput_needed;
1da177e4 1667
89bddce5
SH
1668 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1669 if (sock != NULL) {
1da177e4
LT
1670 err = security_socket_getpeername(sock);
1671 if (err) {
6cb153ca 1672 fput_light(sock->file, fput_needed);
1da177e4
LT
1673 return err;
1674 }
1675
89bddce5 1676 err =
230b1839 1677 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1678 1);
1da177e4 1679 if (!err)
43db362d 1680 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1681 usockaddr_len);
6cb153ca 1682 fput_light(sock->file, fput_needed);
1da177e4
LT
1683 }
1684 return err;
1685}
1686
1687/*
1688 * Send a datagram to a given address. We move the address into kernel
1689 * space and check the user space data area is readable before invoking
1690 * the protocol.
1691 */
1692
3e0fa65f 1693SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1694 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1695 int, addr_len)
1da177e4
LT
1696{
1697 struct socket *sock;
230b1839 1698 struct sockaddr_storage address;
1da177e4
LT
1699 int err;
1700 struct msghdr msg;
1701 struct iovec iov;
6cb153ca 1702 int fput_needed;
6cb153ca 1703
253eacc0
LT
1704 if (len > INT_MAX)
1705 len = INT_MAX;
de0fa95c
PE
1706 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1707 if (!sock)
4387ff75 1708 goto out;
6cb153ca 1709
89bddce5
SH
1710 iov.iov_base = buff;
1711 iov.iov_len = len;
1712 msg.msg_name = NULL;
1713 msg.msg_iov = &iov;
1714 msg.msg_iovlen = 1;
1715 msg.msg_control = NULL;
1716 msg.msg_controllen = 0;
1717 msg.msg_namelen = 0;
6cb153ca 1718 if (addr) {
43db362d 1719 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1720 if (err < 0)
1721 goto out_put;
230b1839 1722 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1723 msg.msg_namelen = addr_len;
1da177e4
LT
1724 }
1725 if (sock->file->f_flags & O_NONBLOCK)
1726 flags |= MSG_DONTWAIT;
1727 msg.msg_flags = flags;
1728 err = sock_sendmsg(sock, &msg, len);
1729
89bddce5 1730out_put:
de0fa95c 1731 fput_light(sock->file, fput_needed);
4387ff75 1732out:
1da177e4
LT
1733 return err;
1734}
1735
1736/*
89bddce5 1737 * Send a datagram down a socket.
1da177e4
LT
1738 */
1739
3e0fa65f 1740SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1741 unsigned int, flags)
1da177e4
LT
1742{
1743 return sys_sendto(fd, buff, len, flags, NULL, 0);
1744}
1745
1746/*
89bddce5 1747 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1748 * sender. We verify the buffers are writable and if needed move the
1749 * sender address from kernel to user space.
1750 */
1751
3e0fa65f 1752SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1753 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1754 int __user *, addr_len)
1da177e4
LT
1755{
1756 struct socket *sock;
1757 struct iovec iov;
1758 struct msghdr msg;
230b1839 1759 struct sockaddr_storage address;
89bddce5 1760 int err, err2;
6cb153ca
BL
1761 int fput_needed;
1762
253eacc0
LT
1763 if (size > INT_MAX)
1764 size = INT_MAX;
de0fa95c 1765 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1766 if (!sock)
de0fa95c 1767 goto out;
1da177e4 1768
89bddce5
SH
1769 msg.msg_control = NULL;
1770 msg.msg_controllen = 0;
1771 msg.msg_iovlen = 1;
1772 msg.msg_iov = &iov;
1773 iov.iov_len = size;
1774 iov.iov_base = ubuf;
230b1839
YH
1775 msg.msg_name = (struct sockaddr *)&address;
1776 msg.msg_namelen = sizeof(address);
1da177e4
LT
1777 if (sock->file->f_flags & O_NONBLOCK)
1778 flags |= MSG_DONTWAIT;
89bddce5 1779 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1780
89bddce5 1781 if (err >= 0 && addr != NULL) {
43db362d 1782 err2 = move_addr_to_user(&address,
230b1839 1783 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1784 if (err2 < 0)
1785 err = err2;
1da177e4 1786 }
de0fa95c
PE
1787
1788 fput_light(sock->file, fput_needed);
4387ff75 1789out:
1da177e4
LT
1790 return err;
1791}
1792
1793/*
89bddce5 1794 * Receive a datagram from a socket.
1da177e4
LT
1795 */
1796
89bddce5 1797asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
95c96174 1798 unsigned int flags)
1da177e4
LT
1799{
1800 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1801}
1802
1803/*
1804 * Set a socket option. Because we don't know the option lengths we have
1805 * to pass the user mode parameter for the protocols to sort out.
1806 */
1807
20f37034
HC
1808SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1809 char __user *, optval, int, optlen)
1da177e4 1810{
6cb153ca 1811 int err, fput_needed;
1da177e4
LT
1812 struct socket *sock;
1813
1814 if (optlen < 0)
1815 return -EINVAL;
89bddce5
SH
1816
1817 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1818 if (sock != NULL) {
1819 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1820 if (err)
1821 goto out_put;
1da177e4
LT
1822
1823 if (level == SOL_SOCKET)
89bddce5
SH
1824 err =
1825 sock_setsockopt(sock, level, optname, optval,
1826 optlen);
1da177e4 1827 else
89bddce5
SH
1828 err =
1829 sock->ops->setsockopt(sock, level, optname, optval,
1830 optlen);
6cb153ca
BL
1831out_put:
1832 fput_light(sock->file, fput_needed);
1da177e4
LT
1833 }
1834 return err;
1835}
1836
1837/*
1838 * Get a socket option. Because we don't know the option lengths we have
1839 * to pass a user mode parameter for the protocols to sort out.
1840 */
1841
20f37034
HC
1842SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1843 char __user *, optval, int __user *, optlen)
1da177e4 1844{
6cb153ca 1845 int err, fput_needed;
1da177e4
LT
1846 struct socket *sock;
1847
89bddce5
SH
1848 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1849 if (sock != NULL) {
6cb153ca
BL
1850 err = security_socket_getsockopt(sock, level, optname);
1851 if (err)
1852 goto out_put;
1da177e4
LT
1853
1854 if (level == SOL_SOCKET)
89bddce5
SH
1855 err =
1856 sock_getsockopt(sock, level, optname, optval,
1857 optlen);
1da177e4 1858 else
89bddce5
SH
1859 err =
1860 sock->ops->getsockopt(sock, level, optname, optval,
1861 optlen);
6cb153ca
BL
1862out_put:
1863 fput_light(sock->file, fput_needed);
1da177e4
LT
1864 }
1865 return err;
1866}
1867
1da177e4
LT
1868/*
1869 * Shutdown a socket.
1870 */
1871
754fe8d2 1872SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1873{
6cb153ca 1874 int err, fput_needed;
1da177e4
LT
1875 struct socket *sock;
1876
89bddce5
SH
1877 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1878 if (sock != NULL) {
1da177e4 1879 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1880 if (!err)
1881 err = sock->ops->shutdown(sock, how);
1882 fput_light(sock->file, fput_needed);
1da177e4
LT
1883 }
1884 return err;
1885}
1886
89bddce5 1887/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1888 * fields which are the same type (int / unsigned) on our platforms.
1889 */
1890#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1891#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1892#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1893
c71d8ebe
TH
1894struct used_address {
1895 struct sockaddr_storage name;
1896 unsigned int name_len;
1897};
1898
228e548e 1899static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
95c96174 1900 struct msghdr *msg_sys, unsigned int flags,
c71d8ebe 1901 struct used_address *used_address)
1da177e4 1902{
89bddce5
SH
1903 struct compat_msghdr __user *msg_compat =
1904 (struct compat_msghdr __user *)msg;
230b1839 1905 struct sockaddr_storage address;
1da177e4 1906 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1907 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1908 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1909 /* 20 is size of ipv6_pktinfo */
1da177e4 1910 unsigned char *ctl_buf = ctl;
1da177e4 1911 int err, ctl_len, iov_size, total_len;
89bddce5 1912
1da177e4
LT
1913 err = -EFAULT;
1914 if (MSG_CMSG_COMPAT & flags) {
228e548e 1915 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 1916 return -EFAULT;
228e548e 1917 } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1918 return -EFAULT;
1919
1da177e4
LT
1920 /* do not move before msg_sys is valid */
1921 err = -EMSGSIZE;
228e548e
AB
1922 if (msg_sys->msg_iovlen > UIO_MAXIOV)
1923 goto out;
1da177e4 1924
89bddce5 1925 /* Check whether to allocate the iovec area */
1da177e4 1926 err = -ENOMEM;
228e548e
AB
1927 iov_size = msg_sys->msg_iovlen * sizeof(struct iovec);
1928 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
1da177e4
LT
1929 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1930 if (!iov)
228e548e 1931 goto out;
1da177e4
LT
1932 }
1933
1934 /* This will also move the address data into kernel space */
1935 if (MSG_CMSG_COMPAT & flags) {
43db362d 1936 err = verify_compat_iovec(msg_sys, iov, &address, VERIFY_READ);
1da177e4 1937 } else
43db362d 1938 err = verify_iovec(msg_sys, iov, &address, VERIFY_READ);
89bddce5 1939 if (err < 0)
1da177e4
LT
1940 goto out_freeiov;
1941 total_len = err;
1942
1943 err = -ENOBUFS;
1944
228e548e 1945 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 1946 goto out_freeiov;
228e548e 1947 ctl_len = msg_sys->msg_controllen;
1da177e4 1948 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 1949 err =
228e548e 1950 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 1951 sizeof(ctl));
1da177e4
LT
1952 if (err)
1953 goto out_freeiov;
228e548e
AB
1954 ctl_buf = msg_sys->msg_control;
1955 ctl_len = msg_sys->msg_controllen;
1da177e4 1956 } else if (ctl_len) {
89bddce5 1957 if (ctl_len > sizeof(ctl)) {
1da177e4 1958 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1959 if (ctl_buf == NULL)
1da177e4
LT
1960 goto out_freeiov;
1961 }
1962 err = -EFAULT;
1963 /*
228e548e 1964 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
1965 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1966 * checking falls down on this.
1967 */
fb8621bb 1968 if (copy_from_user(ctl_buf,
228e548e 1969 (void __user __force *)msg_sys->msg_control,
89bddce5 1970 ctl_len))
1da177e4 1971 goto out_freectl;
228e548e 1972 msg_sys->msg_control = ctl_buf;
1da177e4 1973 }
228e548e 1974 msg_sys->msg_flags = flags;
1da177e4
LT
1975
1976 if (sock->file->f_flags & O_NONBLOCK)
228e548e 1977 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
1978 /*
1979 * If this is sendmmsg() and current destination address is same as
1980 * previously succeeded address, omit asking LSM's decision.
1981 * used_address->name_len is initialized to UINT_MAX so that the first
1982 * destination address never matches.
1983 */
bc909d9d
MD
1984 if (used_address && msg_sys->msg_name &&
1985 used_address->name_len == msg_sys->msg_namelen &&
1986 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe
TH
1987 used_address->name_len)) {
1988 err = sock_sendmsg_nosec(sock, msg_sys, total_len);
1989 goto out_freectl;
1990 }
1991 err = sock_sendmsg(sock, msg_sys, total_len);
1992 /*
1993 * If this is sendmmsg() and sending to current destination address was
1994 * successful, remember it.
1995 */
1996 if (used_address && err >= 0) {
1997 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
1998 if (msg_sys->msg_name)
1999 memcpy(&used_address->name, msg_sys->msg_name,
2000 used_address->name_len);
c71d8ebe 2001 }
1da177e4
LT
2002
2003out_freectl:
89bddce5 2004 if (ctl_buf != ctl)
1da177e4
LT
2005 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2006out_freeiov:
2007 if (iov != iovstack)
2008 sock_kfree_s(sock->sk, iov, iov_size);
228e548e
AB
2009out:
2010 return err;
2011}
2012
2013/*
2014 * BSD sendmsg interface
2015 */
2016
95c96174 2017SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned int, flags)
228e548e
AB
2018{
2019 int fput_needed, err;
2020 struct msghdr msg_sys;
2021 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
2022
2023 if (!sock)
2024 goto out;
2025
c71d8ebe 2026 err = __sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
228e548e 2027
6cb153ca 2028 fput_light(sock->file, fput_needed);
89bddce5 2029out:
1da177e4
LT
2030 return err;
2031}
2032
228e548e
AB
2033/*
2034 * Linux sendmmsg interface
2035 */
2036
2037int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2038 unsigned int flags)
2039{
2040 int fput_needed, err, datagrams;
2041 struct socket *sock;
2042 struct mmsghdr __user *entry;
2043 struct compat_mmsghdr __user *compat_entry;
2044 struct msghdr msg_sys;
c71d8ebe 2045 struct used_address used_address;
228e548e 2046
98382f41
AB
2047 if (vlen > UIO_MAXIOV)
2048 vlen = UIO_MAXIOV;
228e548e
AB
2049
2050 datagrams = 0;
2051
2052 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2053 if (!sock)
2054 return err;
2055
c71d8ebe 2056 used_address.name_len = UINT_MAX;
228e548e
AB
2057 entry = mmsg;
2058 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2059 err = 0;
228e548e
AB
2060
2061 while (datagrams < vlen) {
228e548e
AB
2062 if (MSG_CMSG_COMPAT & flags) {
2063 err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
c71d8ebe 2064 &msg_sys, flags, &used_address);
228e548e
AB
2065 if (err < 0)
2066 break;
2067 err = __put_user(err, &compat_entry->msg_len);
2068 ++compat_entry;
2069 } else {
2070 err = __sys_sendmsg(sock, (struct msghdr __user *)entry,
c71d8ebe 2071 &msg_sys, flags, &used_address);
228e548e
AB
2072 if (err < 0)
2073 break;
2074 err = put_user(err, &entry->msg_len);
2075 ++entry;
2076 }
2077
2078 if (err)
2079 break;
2080 ++datagrams;
2081 }
2082
228e548e
AB
2083 fput_light(sock->file, fput_needed);
2084
728ffb86
AB
2085 /* We only return an error if no datagrams were able to be sent */
2086 if (datagrams != 0)
228e548e
AB
2087 return datagrams;
2088
228e548e
AB
2089 return err;
2090}
2091
2092SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2093 unsigned int, vlen, unsigned int, flags)
2094{
2095 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2096}
2097
a2e27255 2098static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
95c96174 2099 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2100{
89bddce5
SH
2101 struct compat_msghdr __user *msg_compat =
2102 (struct compat_msghdr __user *)msg;
1da177e4 2103 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2104 struct iovec *iov = iovstack;
1da177e4
LT
2105 unsigned long cmsg_ptr;
2106 int err, iov_size, total_len, len;
2107
2108 /* kernel mode address */
230b1839 2109 struct sockaddr_storage addr;
1da177e4
LT
2110
2111 /* user mode address pointers */
2112 struct sockaddr __user *uaddr;
2113 int __user *uaddr_len;
89bddce5 2114
1da177e4 2115 if (MSG_CMSG_COMPAT & flags) {
a2e27255 2116 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 2117 return -EFAULT;
c6d409cf 2118 } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
89bddce5 2119 return -EFAULT;
1da177e4 2120
1da177e4 2121 err = -EMSGSIZE;
a2e27255
ACM
2122 if (msg_sys->msg_iovlen > UIO_MAXIOV)
2123 goto out;
89bddce5
SH
2124
2125 /* Check whether to allocate the iovec area */
1da177e4 2126 err = -ENOMEM;
a2e27255
ACM
2127 iov_size = msg_sys->msg_iovlen * sizeof(struct iovec);
2128 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
1da177e4
LT
2129 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
2130 if (!iov)
a2e27255 2131 goto out;
1da177e4
LT
2132 }
2133
2134 /*
89bddce5
SH
2135 * Save the user-mode address (verify_iovec will change the
2136 * kernel msghdr to use the kernel address space)
1da177e4 2137 */
89bddce5 2138
a2e27255 2139 uaddr = (__force void __user *)msg_sys->msg_name;
1da177e4
LT
2140 uaddr_len = COMPAT_NAMELEN(msg);
2141 if (MSG_CMSG_COMPAT & flags) {
43db362d 2142 err = verify_compat_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
1da177e4 2143 } else
43db362d 2144 err = verify_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
1da177e4
LT
2145 if (err < 0)
2146 goto out_freeiov;
89bddce5 2147 total_len = err;
1da177e4 2148
a2e27255
ACM
2149 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2150 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2151
1da177e4
LT
2152 if (sock->file->f_flags & O_NONBLOCK)
2153 flags |= MSG_DONTWAIT;
a2e27255
ACM
2154 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2155 total_len, flags);
1da177e4
LT
2156 if (err < 0)
2157 goto out_freeiov;
2158 len = err;
2159
2160 if (uaddr != NULL) {
43db362d 2161 err = move_addr_to_user(&addr,
a2e27255 2162 msg_sys->msg_namelen, uaddr,
89bddce5 2163 uaddr_len);
1da177e4
LT
2164 if (err < 0)
2165 goto out_freeiov;
2166 }
a2e27255 2167 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2168 COMPAT_FLAGS(msg));
1da177e4
LT
2169 if (err)
2170 goto out_freeiov;
2171 if (MSG_CMSG_COMPAT & flags)
a2e27255 2172 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2173 &msg_compat->msg_controllen);
2174 else
a2e27255 2175 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2176 &msg->msg_controllen);
2177 if (err)
2178 goto out_freeiov;
2179 err = len;
2180
2181out_freeiov:
2182 if (iov != iovstack)
2183 sock_kfree_s(sock->sk, iov, iov_size);
a2e27255
ACM
2184out:
2185 return err;
2186}
2187
2188/*
2189 * BSD recvmsg interface
2190 */
2191
2192SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
2193 unsigned int, flags)
2194{
2195 int fput_needed, err;
2196 struct msghdr msg_sys;
2197 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
2198
2199 if (!sock)
2200 goto out;
2201
2202 err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0);
2203
6cb153ca 2204 fput_light(sock->file, fput_needed);
1da177e4
LT
2205out:
2206 return err;
2207}
2208
a2e27255
ACM
2209/*
2210 * Linux recvmmsg interface
2211 */
2212
2213int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2214 unsigned int flags, struct timespec *timeout)
2215{
2216 int fput_needed, err, datagrams;
2217 struct socket *sock;
2218 struct mmsghdr __user *entry;
d7256d0e 2219 struct compat_mmsghdr __user *compat_entry;
a2e27255
ACM
2220 struct msghdr msg_sys;
2221 struct timespec end_time;
2222
2223 if (timeout &&
2224 poll_select_set_timeout(&end_time, timeout->tv_sec,
2225 timeout->tv_nsec))
2226 return -EINVAL;
2227
2228 datagrams = 0;
2229
2230 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2231 if (!sock)
2232 return err;
2233
2234 err = sock_error(sock->sk);
2235 if (err)
2236 goto out_put;
2237
2238 entry = mmsg;
d7256d0e 2239 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2240
2241 while (datagrams < vlen) {
2242 /*
2243 * No need to ask LSM for more than the first datagram.
2244 */
d7256d0e
JMG
2245 if (MSG_CMSG_COMPAT & flags) {
2246 err = __sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
b9eb8b87
AB
2247 &msg_sys, flags & ~MSG_WAITFORONE,
2248 datagrams);
d7256d0e
JMG
2249 if (err < 0)
2250 break;
2251 err = __put_user(err, &compat_entry->msg_len);
2252 ++compat_entry;
2253 } else {
2254 err = __sys_recvmsg(sock, (struct msghdr __user *)entry,
b9eb8b87
AB
2255 &msg_sys, flags & ~MSG_WAITFORONE,
2256 datagrams);
d7256d0e
JMG
2257 if (err < 0)
2258 break;
2259 err = put_user(err, &entry->msg_len);
2260 ++entry;
2261 }
2262
a2e27255
ACM
2263 if (err)
2264 break;
a2e27255
ACM
2265 ++datagrams;
2266
71c5c159
BB
2267 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2268 if (flags & MSG_WAITFORONE)
2269 flags |= MSG_DONTWAIT;
2270
a2e27255
ACM
2271 if (timeout) {
2272 ktime_get_ts(timeout);
2273 *timeout = timespec_sub(end_time, *timeout);
2274 if (timeout->tv_sec < 0) {
2275 timeout->tv_sec = timeout->tv_nsec = 0;
2276 break;
2277 }
2278
2279 /* Timeout, return less than vlen datagrams */
2280 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2281 break;
2282 }
2283
2284 /* Out of band data, return right away */
2285 if (msg_sys.msg_flags & MSG_OOB)
2286 break;
2287 }
2288
2289out_put:
2290 fput_light(sock->file, fput_needed);
1da177e4 2291
a2e27255
ACM
2292 if (err == 0)
2293 return datagrams;
2294
2295 if (datagrams != 0) {
2296 /*
2297 * We may return less entries than requested (vlen) if the
2298 * sock is non block and there aren't enough datagrams...
2299 */
2300 if (err != -EAGAIN) {
2301 /*
2302 * ... or if recvmsg returns an error after we
2303 * received some datagrams, where we record the
2304 * error to return on the next call or if the
2305 * app asks about it using getsockopt(SO_ERROR).
2306 */
2307 sock->sk->sk_err = -err;
2308 }
2309
2310 return datagrams;
2311 }
2312
2313 return err;
2314}
2315
2316SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2317 unsigned int, vlen, unsigned int, flags,
2318 struct timespec __user *, timeout)
2319{
2320 int datagrams;
2321 struct timespec timeout_sys;
2322
2323 if (!timeout)
2324 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2325
2326 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2327 return -EFAULT;
2328
2329 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2330
2331 if (datagrams > 0 &&
2332 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2333 datagrams = -EFAULT;
2334
2335 return datagrams;
2336}
2337
2338#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2339/* Argument list sizes for sys_socketcall */
2340#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2341static const unsigned char nargs[21] = {
c6d409cf
ED
2342 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2343 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2344 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2345 AL(4), AL(5), AL(4)
89bddce5
SH
2346};
2347
1da177e4
LT
2348#undef AL
2349
2350/*
89bddce5 2351 * System call vectors.
1da177e4
LT
2352 *
2353 * Argument checking cleaned up. Saved 20% in size.
2354 * This function doesn't need to set the kernel lock because
89bddce5 2355 * it is set by the callees.
1da177e4
LT
2356 */
2357
3e0fa65f 2358SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4
LT
2359{
2360 unsigned long a[6];
89bddce5 2361 unsigned long a0, a1;
1da177e4 2362 int err;
47379052 2363 unsigned int len;
1da177e4 2364
228e548e 2365 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2366 return -EINVAL;
2367
47379052
AV
2368 len = nargs[call];
2369 if (len > sizeof(a))
2370 return -EINVAL;
2371
1da177e4 2372 /* copy_from_user should be SMP safe. */
47379052 2373 if (copy_from_user(a, args, len))
1da177e4 2374 return -EFAULT;
3ec3b2fb 2375
f3298dc4 2376 audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb 2377
89bddce5
SH
2378 a0 = a[0];
2379 a1 = a[1];
2380
2381 switch (call) {
2382 case SYS_SOCKET:
2383 err = sys_socket(a0, a1, a[2]);
2384 break;
2385 case SYS_BIND:
2386 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2387 break;
2388 case SYS_CONNECT:
2389 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2390 break;
2391 case SYS_LISTEN:
2392 err = sys_listen(a0, a1);
2393 break;
2394 case SYS_ACCEPT:
de11defe
UD
2395 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2396 (int __user *)a[2], 0);
89bddce5
SH
2397 break;
2398 case SYS_GETSOCKNAME:
2399 err =
2400 sys_getsockname(a0, (struct sockaddr __user *)a1,
2401 (int __user *)a[2]);
2402 break;
2403 case SYS_GETPEERNAME:
2404 err =
2405 sys_getpeername(a0, (struct sockaddr __user *)a1,
2406 (int __user *)a[2]);
2407 break;
2408 case SYS_SOCKETPAIR:
2409 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2410 break;
2411 case SYS_SEND:
2412 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2413 break;
2414 case SYS_SENDTO:
2415 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2416 (struct sockaddr __user *)a[4], a[5]);
2417 break;
2418 case SYS_RECV:
2419 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2420 break;
2421 case SYS_RECVFROM:
2422 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2423 (struct sockaddr __user *)a[4],
2424 (int __user *)a[5]);
2425 break;
2426 case SYS_SHUTDOWN:
2427 err = sys_shutdown(a0, a1);
2428 break;
2429 case SYS_SETSOCKOPT:
2430 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2431 break;
2432 case SYS_GETSOCKOPT:
2433 err =
2434 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2435 (int __user *)a[4]);
2436 break;
2437 case SYS_SENDMSG:
2438 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2439 break;
228e548e
AB
2440 case SYS_SENDMMSG:
2441 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2442 break;
89bddce5
SH
2443 case SYS_RECVMSG:
2444 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2445 break;
a2e27255
ACM
2446 case SYS_RECVMMSG:
2447 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2448 (struct timespec __user *)a[4]);
2449 break;
de11defe
UD
2450 case SYS_ACCEPT4:
2451 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2452 (int __user *)a[2], a[3]);
aaca0bdc 2453 break;
89bddce5
SH
2454 default:
2455 err = -EINVAL;
2456 break;
1da177e4
LT
2457 }
2458 return err;
2459}
2460
89bddce5 2461#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2462
55737fda
SH
2463/**
2464 * sock_register - add a socket protocol handler
2465 * @ops: description of protocol
2466 *
1da177e4
LT
2467 * This function is called by a protocol handler that wants to
2468 * advertise its address family, and have it linked into the
55737fda
SH
2469 * socket interface. The value ops->family coresponds to the
2470 * socket system call protocol family.
1da177e4 2471 */
f0fd27d4 2472int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2473{
2474 int err;
2475
2476 if (ops->family >= NPROTO) {
89bddce5
SH
2477 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2478 NPROTO);
1da177e4
LT
2479 return -ENOBUFS;
2480 }
55737fda
SH
2481
2482 spin_lock(&net_family_lock);
190683a9
ED
2483 if (rcu_dereference_protected(net_families[ops->family],
2484 lockdep_is_held(&net_family_lock)))
55737fda
SH
2485 err = -EEXIST;
2486 else {
cf778b00 2487 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2488 err = 0;
2489 }
55737fda
SH
2490 spin_unlock(&net_family_lock);
2491
89bddce5 2492 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2493 return err;
2494}
c6d409cf 2495EXPORT_SYMBOL(sock_register);
1da177e4 2496
55737fda
SH
2497/**
2498 * sock_unregister - remove a protocol handler
2499 * @family: protocol family to remove
2500 *
1da177e4
LT
2501 * This function is called by a protocol handler that wants to
2502 * remove its address family, and have it unlinked from the
55737fda
SH
2503 * new socket creation.
2504 *
2505 * If protocol handler is a module, then it can use module reference
2506 * counts to protect against new references. If protocol handler is not
2507 * a module then it needs to provide its own protection in
2508 * the ops->create routine.
1da177e4 2509 */
f0fd27d4 2510void sock_unregister(int family)
1da177e4 2511{
f0fd27d4 2512 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2513
55737fda 2514 spin_lock(&net_family_lock);
a9b3cd7f 2515 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2516 spin_unlock(&net_family_lock);
2517
2518 synchronize_rcu();
2519
89bddce5 2520 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4 2521}
c6d409cf 2522EXPORT_SYMBOL(sock_unregister);
1da177e4 2523
77d76ea3 2524static int __init sock_init(void)
1da177e4 2525{
b3e19d92
NP
2526 int err;
2527
1da177e4 2528 /*
89bddce5 2529 * Initialize sock SLAB cache.
1da177e4 2530 */
89bddce5 2531
1da177e4
LT
2532 sk_init();
2533
1da177e4 2534 /*
89bddce5 2535 * Initialize skbuff SLAB cache
1da177e4
LT
2536 */
2537 skb_init();
1da177e4
LT
2538
2539 /*
89bddce5 2540 * Initialize the protocols module.
1da177e4
LT
2541 */
2542
2543 init_inodecache();
b3e19d92
NP
2544
2545 err = register_filesystem(&sock_fs_type);
2546 if (err)
2547 goto out_fs;
1da177e4 2548 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2549 if (IS_ERR(sock_mnt)) {
2550 err = PTR_ERR(sock_mnt);
2551 goto out_mount;
2552 }
77d76ea3
AK
2553
2554 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2555 */
2556
2557#ifdef CONFIG_NETFILTER
2558 netfilter_init();
2559#endif
cbeb321a 2560
c1f19b51
RC
2561#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
2562 skb_timestamping_init();
2563#endif
2564
b3e19d92
NP
2565out:
2566 return err;
2567
2568out_mount:
2569 unregister_filesystem(&sock_fs_type);
2570out_fs:
2571 goto out;
1da177e4
LT
2572}
2573
77d76ea3
AK
2574core_initcall(sock_init); /* early initcall */
2575
1da177e4
LT
2576#ifdef CONFIG_PROC_FS
2577void socket_seq_show(struct seq_file *seq)
2578{
2579 int cpu;
2580 int counter = 0;
2581
6f912042 2582 for_each_possible_cpu(cpu)
89bddce5 2583 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2584
2585 /* It can be negative, by the way. 8) */
2586 if (counter < 0)
2587 counter = 0;
2588
2589 seq_printf(seq, "sockets: used %d\n", counter);
2590}
89bddce5 2591#endif /* CONFIG_PROC_FS */
1da177e4 2592
89bbfc95 2593#ifdef CONFIG_COMPAT
6b96018b 2594static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2595 unsigned int cmd, void __user *up)
7a229387 2596{
7a229387
AB
2597 mm_segment_t old_fs = get_fs();
2598 struct timeval ktv;
2599 int err;
2600
2601 set_fs(KERNEL_DS);
6b96018b 2602 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2603 set_fs(old_fs);
644595f8
PA
2604 if (!err)
2605 err = compat_put_timeval(up, &ktv);
2606
7a229387
AB
2607 return err;
2608}
2609
6b96018b 2610static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2611 unsigned int cmd, void __user *up)
7a229387 2612{
7a229387
AB
2613 mm_segment_t old_fs = get_fs();
2614 struct timespec kts;
2615 int err;
2616
2617 set_fs(KERNEL_DS);
6b96018b 2618 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2619 set_fs(old_fs);
644595f8
PA
2620 if (!err)
2621 err = compat_put_timespec(up, &kts);
2622
7a229387
AB
2623 return err;
2624}
2625
6b96018b 2626static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2627{
2628 struct ifreq __user *uifr;
2629 int err;
2630
2631 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2632 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2633 return -EFAULT;
2634
6b96018b 2635 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2636 if (err)
2637 return err;
2638
6b96018b 2639 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2640 return -EFAULT;
2641
2642 return 0;
2643}
2644
6b96018b 2645static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2646{
6b96018b 2647 struct compat_ifconf ifc32;
7a229387
AB
2648 struct ifconf ifc;
2649 struct ifconf __user *uifc;
6b96018b 2650 struct compat_ifreq __user *ifr32;
7a229387
AB
2651 struct ifreq __user *ifr;
2652 unsigned int i, j;
2653 int err;
2654
6b96018b 2655 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2656 return -EFAULT;
2657
2658 if (ifc32.ifcbuf == 0) {
2659 ifc32.ifc_len = 0;
2660 ifc.ifc_len = 0;
2661 ifc.ifc_req = NULL;
2662 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2663 } else {
c6d409cf
ED
2664 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2665 sizeof(struct ifreq);
7a229387
AB
2666 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2667 ifc.ifc_len = len;
2668 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2669 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2670 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2671 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2672 return -EFAULT;
2673 ifr++;
2674 ifr32++;
2675 }
2676 }
2677 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2678 return -EFAULT;
2679
6b96018b 2680 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2681 if (err)
2682 return err;
2683
2684 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2685 return -EFAULT;
2686
2687 ifr = ifc.ifc_req;
2688 ifr32 = compat_ptr(ifc32.ifcbuf);
2689 for (i = 0, j = 0;
c6d409cf
ED
2690 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2691 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2692 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2693 return -EFAULT;
2694 ifr32++;
2695 ifr++;
2696 }
2697
2698 if (ifc32.ifcbuf == 0) {
2699 /* Translate from 64-bit structure multiple to
2700 * a 32-bit one.
2701 */
2702 i = ifc.ifc_len;
6b96018b 2703 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2704 ifc32.ifc_len = i;
2705 } else {
2706 ifc32.ifc_len = i;
2707 }
6b96018b 2708 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2709 return -EFAULT;
2710
2711 return 0;
2712}
2713
6b96018b 2714static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2715{
3a7da39d
BH
2716 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2717 bool convert_in = false, convert_out = false;
2718 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2719 struct ethtool_rxnfc __user *rxnfc;
7a229387 2720 struct ifreq __user *ifr;
3a7da39d
BH
2721 u32 rule_cnt = 0, actual_rule_cnt;
2722 u32 ethcmd;
7a229387 2723 u32 data;
3a7da39d 2724 int ret;
7a229387 2725
3a7da39d
BH
2726 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2727 return -EFAULT;
7a229387 2728
3a7da39d
BH
2729 compat_rxnfc = compat_ptr(data);
2730
2731 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2732 return -EFAULT;
2733
3a7da39d
BH
2734 /* Most ethtool structures are defined without padding.
2735 * Unfortunately struct ethtool_rxnfc is an exception.
2736 */
2737 switch (ethcmd) {
2738 default:
2739 break;
2740 case ETHTOOL_GRXCLSRLALL:
2741 /* Buffer size is variable */
2742 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2743 return -EFAULT;
2744 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2745 return -ENOMEM;
2746 buf_size += rule_cnt * sizeof(u32);
2747 /* fall through */
2748 case ETHTOOL_GRXRINGS:
2749 case ETHTOOL_GRXCLSRLCNT:
2750 case ETHTOOL_GRXCLSRULE:
55664f32 2751 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2752 convert_out = true;
2753 /* fall through */
2754 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2755 buf_size += sizeof(struct ethtool_rxnfc);
2756 convert_in = true;
2757 break;
2758 }
2759
2760 ifr = compat_alloc_user_space(buf_size);
2761 rxnfc = (void *)ifr + ALIGN(sizeof(struct ifreq), 8);
2762
2763 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2764 return -EFAULT;
2765
3a7da39d
BH
2766 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2767 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2768 return -EFAULT;
2769
3a7da39d 2770 if (convert_in) {
127fe533 2771 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2772 * fs.ring_cookie and at the end of fs, but nowhere else.
2773 */
127fe533
AD
2774 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2775 sizeof(compat_rxnfc->fs.m_ext) !=
2776 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2777 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2778 BUILD_BUG_ON(
2779 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2780 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2781 offsetof(struct ethtool_rxnfc, fs.location) -
2782 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2783
2784 if (copy_in_user(rxnfc, compat_rxnfc,
127fe533 2785 (void *)(&rxnfc->fs.m_ext + 1) -
3a7da39d
BH
2786 (void *)rxnfc) ||
2787 copy_in_user(&rxnfc->fs.ring_cookie,
2788 &compat_rxnfc->fs.ring_cookie,
2789 (void *)(&rxnfc->fs.location + 1) -
2790 (void *)&rxnfc->fs.ring_cookie) ||
2791 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2792 sizeof(rxnfc->rule_cnt)))
2793 return -EFAULT;
2794 }
2795
2796 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2797 if (ret)
2798 return ret;
2799
2800 if (convert_out) {
2801 if (copy_in_user(compat_rxnfc, rxnfc,
127fe533 2802 (const void *)(&rxnfc->fs.m_ext + 1) -
3a7da39d
BH
2803 (const void *)rxnfc) ||
2804 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2805 &rxnfc->fs.ring_cookie,
2806 (const void *)(&rxnfc->fs.location + 1) -
2807 (const void *)&rxnfc->fs.ring_cookie) ||
2808 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2809 sizeof(rxnfc->rule_cnt)))
2810 return -EFAULT;
2811
2812 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2813 /* As an optimisation, we only copy the actual
2814 * number of rules that the underlying
2815 * function returned. Since Mallory might
2816 * change the rule count in user memory, we
2817 * check that it is less than the rule count
2818 * originally given (as the user buffer size),
2819 * which has been range-checked.
2820 */
2821 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2822 return -EFAULT;
2823 if (actual_rule_cnt < rule_cnt)
2824 rule_cnt = actual_rule_cnt;
2825 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2826 &rxnfc->rule_locs[0],
2827 rule_cnt * sizeof(u32)))
2828 return -EFAULT;
2829 }
2830 }
2831
2832 return 0;
7a229387
AB
2833}
2834
7a50a240
AB
2835static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2836{
2837 void __user *uptr;
2838 compat_uptr_t uptr32;
2839 struct ifreq __user *uifr;
2840
c6d409cf 2841 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2842 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2843 return -EFAULT;
2844
2845 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2846 return -EFAULT;
2847
2848 uptr = compat_ptr(uptr32);
2849
2850 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2851 return -EFAULT;
2852
2853 return dev_ioctl(net, SIOCWANDEV, uifr);
2854}
2855
6b96018b
AB
2856static int bond_ioctl(struct net *net, unsigned int cmd,
2857 struct compat_ifreq __user *ifr32)
7a229387
AB
2858{
2859 struct ifreq kifr;
2860 struct ifreq __user *uifr;
7a229387
AB
2861 mm_segment_t old_fs;
2862 int err;
2863 u32 data;
2864 void __user *datap;
2865
2866 switch (cmd) {
2867 case SIOCBONDENSLAVE:
2868 case SIOCBONDRELEASE:
2869 case SIOCBONDSETHWADDR:
2870 case SIOCBONDCHANGEACTIVE:
6b96018b 2871 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2872 return -EFAULT;
2873
2874 old_fs = get_fs();
c6d409cf 2875 set_fs(KERNEL_DS);
c3f52ae6 2876 err = dev_ioctl(net, cmd,
2877 (struct ifreq __user __force *) &kifr);
c6d409cf 2878 set_fs(old_fs);
7a229387
AB
2879
2880 return err;
2881 case SIOCBONDSLAVEINFOQUERY:
2882 case SIOCBONDINFOQUERY:
2883 uifr = compat_alloc_user_space(sizeof(*uifr));
2884 if (copy_in_user(&uifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
2885 return -EFAULT;
2886
2887 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2888 return -EFAULT;
2889
2890 datap = compat_ptr(data);
2891 if (put_user(datap, &uifr->ifr_ifru.ifru_data))
2892 return -EFAULT;
2893
6b96018b 2894 return dev_ioctl(net, cmd, uifr);
7a229387 2895 default:
07d106d0 2896 return -ENOIOCTLCMD;
ccbd6a5a 2897 }
7a229387
AB
2898}
2899
6b96018b
AB
2900static int siocdevprivate_ioctl(struct net *net, unsigned int cmd,
2901 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2902{
2903 struct ifreq __user *u_ifreq64;
7a229387
AB
2904 char tmp_buf[IFNAMSIZ];
2905 void __user *data64;
2906 u32 data32;
2907
2908 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2909 IFNAMSIZ))
2910 return -EFAULT;
2911 if (__get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
2912 return -EFAULT;
2913 data64 = compat_ptr(data32);
2914
2915 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2916
2917 /* Don't check these user accesses, just let that get trapped
2918 * in the ioctl handler instead.
2919 */
2920 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2921 IFNAMSIZ))
2922 return -EFAULT;
2923 if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
2924 return -EFAULT;
2925
6b96018b 2926 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2927}
2928
6b96018b
AB
2929static int dev_ifsioc(struct net *net, struct socket *sock,
2930 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 2931{
a2116ed2 2932 struct ifreq __user *uifr;
7a229387
AB
2933 int err;
2934
a2116ed2
AB
2935 uifr = compat_alloc_user_space(sizeof(*uifr));
2936 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2937 return -EFAULT;
2938
2939 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2940
7a229387
AB
2941 if (!err) {
2942 switch (cmd) {
2943 case SIOCGIFFLAGS:
2944 case SIOCGIFMETRIC:
2945 case SIOCGIFMTU:
2946 case SIOCGIFMEM:
2947 case SIOCGIFHWADDR:
2948 case SIOCGIFINDEX:
2949 case SIOCGIFADDR:
2950 case SIOCGIFBRDADDR:
2951 case SIOCGIFDSTADDR:
2952 case SIOCGIFNETMASK:
fab2532b 2953 case SIOCGIFPFLAGS:
7a229387 2954 case SIOCGIFTXQLEN:
fab2532b
AB
2955 case SIOCGMIIPHY:
2956 case SIOCGMIIREG:
a2116ed2 2957 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
2958 err = -EFAULT;
2959 break;
2960 }
2961 }
2962 return err;
2963}
2964
a2116ed2
AB
2965static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2966 struct compat_ifreq __user *uifr32)
2967{
2968 struct ifreq ifr;
2969 struct compat_ifmap __user *uifmap32;
2970 mm_segment_t old_fs;
2971 int err;
2972
2973 uifmap32 = &uifr32->ifr_ifru.ifru_map;
2974 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
2975 err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2976 err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2977 err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2978 err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq);
2979 err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma);
2980 err |= __get_user(ifr.ifr_map.port, &uifmap32->port);
2981 if (err)
2982 return -EFAULT;
2983
2984 old_fs = get_fs();
c6d409cf 2985 set_fs(KERNEL_DS);
c3f52ae6 2986 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 2987 set_fs(old_fs);
a2116ed2
AB
2988
2989 if (cmd == SIOCGIFMAP && !err) {
2990 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
2991 err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2992 err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2993 err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2994 err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq);
2995 err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma);
2996 err |= __put_user(ifr.ifr_map.port, &uifmap32->port);
2997 if (err)
2998 err = -EFAULT;
2999 }
3000 return err;
3001}
3002
3003static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uifr32)
3004{
3005 void __user *uptr;
3006 compat_uptr_t uptr32;
3007 struct ifreq __user *uifr;
3008
c6d409cf 3009 uifr = compat_alloc_user_space(sizeof(*uifr));
a2116ed2
AB
3010 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
3011 return -EFAULT;
3012
3013 if (get_user(uptr32, &uifr32->ifr_data))
3014 return -EFAULT;
3015
3016 uptr = compat_ptr(uptr32);
3017
3018 if (put_user(uptr, &uifr->ifr_data))
3019 return -EFAULT;
3020
3021 return dev_ioctl(net, SIOCSHWTSTAMP, uifr);
3022}
3023
7a229387 3024struct rtentry32 {
c6d409cf 3025 u32 rt_pad1;
7a229387
AB
3026 struct sockaddr rt_dst; /* target address */
3027 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3028 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3029 unsigned short rt_flags;
3030 short rt_pad2;
3031 u32 rt_pad3;
3032 unsigned char rt_tos;
3033 unsigned char rt_class;
3034 short rt_pad4;
3035 short rt_metric; /* +1 for binary compatibility! */
7a229387 3036 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3037 u32 rt_mtu; /* per route MTU/Window */
3038 u32 rt_window; /* Window clamping */
7a229387
AB
3039 unsigned short rt_irtt; /* Initial RTT */
3040};
3041
3042struct in6_rtmsg32 {
3043 struct in6_addr rtmsg_dst;
3044 struct in6_addr rtmsg_src;
3045 struct in6_addr rtmsg_gateway;
3046 u32 rtmsg_type;
3047 u16 rtmsg_dst_len;
3048 u16 rtmsg_src_len;
3049 u32 rtmsg_metric;
3050 u32 rtmsg_info;
3051 u32 rtmsg_flags;
3052 s32 rtmsg_ifindex;
3053};
3054
6b96018b
AB
3055static int routing_ioctl(struct net *net, struct socket *sock,
3056 unsigned int cmd, void __user *argp)
7a229387
AB
3057{
3058 int ret;
3059 void *r = NULL;
3060 struct in6_rtmsg r6;
3061 struct rtentry r4;
3062 char devname[16];
3063 u32 rtdev;
3064 mm_segment_t old_fs = get_fs();
3065
6b96018b
AB
3066 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3067 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3068 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3069 3 * sizeof(struct in6_addr));
c6d409cf
ED
3070 ret |= __get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3071 ret |= __get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3072 ret |= __get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3073 ret |= __get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3074 ret |= __get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3075 ret |= __get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3076 ret |= __get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3077
3078 r = (void *) &r6;
3079 } else { /* ipv4 */
6b96018b 3080 struct rtentry32 __user *ur4 = argp;
c6d409cf 3081 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3082 3 * sizeof(struct sockaddr));
c6d409cf
ED
3083 ret |= __get_user(r4.rt_flags, &(ur4->rt_flags));
3084 ret |= __get_user(r4.rt_metric, &(ur4->rt_metric));
3085 ret |= __get_user(r4.rt_mtu, &(ur4->rt_mtu));
3086 ret |= __get_user(r4.rt_window, &(ur4->rt_window));
3087 ret |= __get_user(r4.rt_irtt, &(ur4->rt_irtt));
3088 ret |= __get_user(rtdev, &(ur4->rt_dev));
7a229387 3089 if (rtdev) {
c6d409cf 3090 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3091 r4.rt_dev = (char __user __force *)devname;
3092 devname[15] = 0;
7a229387
AB
3093 } else
3094 r4.rt_dev = NULL;
3095
3096 r = (void *) &r4;
3097 }
3098
3099 if (ret) {
3100 ret = -EFAULT;
3101 goto out;
3102 }
3103
c6d409cf 3104 set_fs(KERNEL_DS);
6b96018b 3105 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3106 set_fs(old_fs);
7a229387
AB
3107
3108out:
7a229387
AB
3109 return ret;
3110}
3111
3112/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3113 * for some operations; this forces use of the newer bridge-utils that
25985edc 3114 * use compatible ioctls
7a229387 3115 */
6b96018b 3116static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3117{
6b96018b 3118 compat_ulong_t tmp;
7a229387 3119
6b96018b 3120 if (get_user(tmp, argp))
7a229387
AB
3121 return -EFAULT;
3122 if (tmp == BRCTL_GET_VERSION)
3123 return BRCTL_VERSION + 1;
3124 return -EINVAL;
3125}
3126
6b96018b
AB
3127static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3128 unsigned int cmd, unsigned long arg)
3129{
3130 void __user *argp = compat_ptr(arg);
3131 struct sock *sk = sock->sk;
3132 struct net *net = sock_net(sk);
7a229387 3133
6b96018b
AB
3134 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
3135 return siocdevprivate_ioctl(net, cmd, argp);
3136
3137 switch (cmd) {
3138 case SIOCSIFBR:
3139 case SIOCGIFBR:
3140 return old_bridge_ioctl(argp);
3141 case SIOCGIFNAME:
3142 return dev_ifname32(net, argp);
3143 case SIOCGIFCONF:
3144 return dev_ifconf(net, argp);
3145 case SIOCETHTOOL:
3146 return ethtool_ioctl(net, argp);
7a50a240
AB
3147 case SIOCWANDEV:
3148 return compat_siocwandev(net, argp);
a2116ed2
AB
3149 case SIOCGIFMAP:
3150 case SIOCSIFMAP:
3151 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3152 case SIOCBONDENSLAVE:
3153 case SIOCBONDRELEASE:
3154 case SIOCBONDSETHWADDR:
3155 case SIOCBONDSLAVEINFOQUERY:
3156 case SIOCBONDINFOQUERY:
3157 case SIOCBONDCHANGEACTIVE:
3158 return bond_ioctl(net, cmd, argp);
3159 case SIOCADDRT:
3160 case SIOCDELRT:
3161 return routing_ioctl(net, sock, cmd, argp);
3162 case SIOCGSTAMP:
3163 return do_siocgstamp(net, sock, cmd, argp);
3164 case SIOCGSTAMPNS:
3165 return do_siocgstampns(net, sock, cmd, argp);
a2116ed2
AB
3166 case SIOCSHWTSTAMP:
3167 return compat_siocshwtstamp(net, argp);
6b96018b
AB
3168
3169 case FIOSETOWN:
3170 case SIOCSPGRP:
3171 case FIOGETOWN:
3172 case SIOCGPGRP:
3173 case SIOCBRADDBR:
3174 case SIOCBRDELBR:
3175 case SIOCGIFVLAN:
3176 case SIOCSIFVLAN:
3177 case SIOCADDDLCI:
3178 case SIOCDELDLCI:
3179 return sock_ioctl(file, cmd, arg);
3180
3181 case SIOCGIFFLAGS:
3182 case SIOCSIFFLAGS:
3183 case SIOCGIFMETRIC:
3184 case SIOCSIFMETRIC:
3185 case SIOCGIFMTU:
3186 case SIOCSIFMTU:
3187 case SIOCGIFMEM:
3188 case SIOCSIFMEM:
3189 case SIOCGIFHWADDR:
3190 case SIOCSIFHWADDR:
3191 case SIOCADDMULTI:
3192 case SIOCDELMULTI:
3193 case SIOCGIFINDEX:
6b96018b
AB
3194 case SIOCGIFADDR:
3195 case SIOCSIFADDR:
3196 case SIOCSIFHWBROADCAST:
6b96018b 3197 case SIOCDIFADDR:
6b96018b
AB
3198 case SIOCGIFBRDADDR:
3199 case SIOCSIFBRDADDR:
3200 case SIOCGIFDSTADDR:
3201 case SIOCSIFDSTADDR:
3202 case SIOCGIFNETMASK:
3203 case SIOCSIFNETMASK:
3204 case SIOCSIFPFLAGS:
3205 case SIOCGIFPFLAGS:
3206 case SIOCGIFTXQLEN:
3207 case SIOCSIFTXQLEN:
3208 case SIOCBRADDIF:
3209 case SIOCBRDELIF:
9177efd3
AB
3210 case SIOCSIFNAME:
3211 case SIOCGMIIPHY:
3212 case SIOCGMIIREG:
3213 case SIOCSMIIREG:
6b96018b 3214 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3215
6b96018b
AB
3216 case SIOCSARP:
3217 case SIOCGARP:
3218 case SIOCDARP:
6b96018b 3219 case SIOCATMARK:
9177efd3
AB
3220 return sock_do_ioctl(net, sock, cmd, arg);
3221 }
3222
6b96018b
AB
3223 return -ENOIOCTLCMD;
3224}
7a229387 3225
95c96174 3226static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3227 unsigned long arg)
89bbfc95
SP
3228{
3229 struct socket *sock = file->private_data;
3230 int ret = -ENOIOCTLCMD;
87de87d5
DM
3231 struct sock *sk;
3232 struct net *net;
3233
3234 sk = sock->sk;
3235 net = sock_net(sk);
89bbfc95
SP
3236
3237 if (sock->ops->compat_ioctl)
3238 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3239
87de87d5
DM
3240 if (ret == -ENOIOCTLCMD &&
3241 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3242 ret = compat_wext_handle_ioctl(net, cmd, arg);
3243
6b96018b
AB
3244 if (ret == -ENOIOCTLCMD)
3245 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3246
89bbfc95
SP
3247 return ret;
3248}
3249#endif
3250
ac5a488e
SS
3251int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3252{
3253 return sock->ops->bind(sock, addr, addrlen);
3254}
c6d409cf 3255EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3256
3257int kernel_listen(struct socket *sock, int backlog)
3258{
3259 return sock->ops->listen(sock, backlog);
3260}
c6d409cf 3261EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3262
3263int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3264{
3265 struct sock *sk = sock->sk;
3266 int err;
3267
3268 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3269 newsock);
3270 if (err < 0)
3271 goto done;
3272
3273 err = sock->ops->accept(sock, *newsock, flags);
3274 if (err < 0) {
3275 sock_release(*newsock);
fa8705b0 3276 *newsock = NULL;
ac5a488e
SS
3277 goto done;
3278 }
3279
3280 (*newsock)->ops = sock->ops;
1b08534e 3281 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3282
3283done:
3284 return err;
3285}
c6d409cf 3286EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3287
3288int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3289 int flags)
ac5a488e
SS
3290{
3291 return sock->ops->connect(sock, addr, addrlen, flags);
3292}
c6d409cf 3293EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3294
3295int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3296 int *addrlen)
3297{
3298 return sock->ops->getname(sock, addr, addrlen, 0);
3299}
c6d409cf 3300EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3301
3302int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3303 int *addrlen)
3304{
3305 return sock->ops->getname(sock, addr, addrlen, 1);
3306}
c6d409cf 3307EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3308
3309int kernel_getsockopt(struct socket *sock, int level, int optname,
3310 char *optval, int *optlen)
3311{
3312 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3313 char __user *uoptval;
3314 int __user *uoptlen;
ac5a488e
SS
3315 int err;
3316
fb8621bb
NK
3317 uoptval = (char __user __force *) optval;
3318 uoptlen = (int __user __force *) optlen;
3319
ac5a488e
SS
3320 set_fs(KERNEL_DS);
3321 if (level == SOL_SOCKET)
fb8621bb 3322 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3323 else
fb8621bb
NK
3324 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3325 uoptlen);
ac5a488e
SS
3326 set_fs(oldfs);
3327 return err;
3328}
c6d409cf 3329EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3330
3331int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3332 char *optval, unsigned int optlen)
ac5a488e
SS
3333{
3334 mm_segment_t oldfs = get_fs();
fb8621bb 3335 char __user *uoptval;
ac5a488e
SS
3336 int err;
3337
fb8621bb
NK
3338 uoptval = (char __user __force *) optval;
3339
ac5a488e
SS
3340 set_fs(KERNEL_DS);
3341 if (level == SOL_SOCKET)
fb8621bb 3342 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3343 else
fb8621bb 3344 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3345 optlen);
3346 set_fs(oldfs);
3347 return err;
3348}
c6d409cf 3349EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3350
3351int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3352 size_t size, int flags)
3353{
f8451725
HX
3354 sock_update_classid(sock->sk);
3355
ac5a488e
SS
3356 if (sock->ops->sendpage)
3357 return sock->ops->sendpage(sock, page, offset, size, flags);
3358
3359 return sock_no_sendpage(sock, page, offset, size, flags);
3360}
c6d409cf 3361EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3362
3363int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3364{
3365 mm_segment_t oldfs = get_fs();
3366 int err;
3367
3368 set_fs(KERNEL_DS);
3369 err = sock->ops->ioctl(sock, cmd, arg);
3370 set_fs(oldfs);
3371
3372 return err;
3373}
c6d409cf 3374EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3375
91cf45f0
TM
3376int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3377{
3378 return sock->ops->shutdown(sock, how);
3379}
91cf45f0 3380EXPORT_SYMBOL(kernel_sock_shutdown);