ipv6: Fix Smatch warning.
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4
LT
72#include <linux/wanrouter.h>
73#include <linux/if_bridge.h>
20380731
ACM
74#include <linux/if_frad.h>
75#include <linux/if_vlan.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
1da177e4
LT
91
92#include <asm/uaccess.h>
93#include <asm/unistd.h>
94
95#include <net/compat.h>
87de87d5 96#include <net/wext.h>
f8451725 97#include <net/cls_cgroup.h>
1da177e4
LT
98
99#include <net/sock.h>
100#include <linux/netfilter.h>
101
6b96018b
AB
102#include <linux/if_tun.h>
103#include <linux/ipv6_route.h>
104#include <linux/route.h>
6b96018b
AB
105#include <linux/sockios.h>
106#include <linux/atalk.h>
107
1da177e4 108static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
109static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
110 unsigned long nr_segs, loff_t pos);
111static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
112 unsigned long nr_segs, loff_t pos);
89bddce5 113static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
114
115static int sock_close(struct inode *inode, struct file *file);
116static unsigned int sock_poll(struct file *file,
117 struct poll_table_struct *wait);
89bddce5 118static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
119#ifdef CONFIG_COMPAT
120static long compat_sock_ioctl(struct file *file,
89bddce5 121 unsigned int cmd, unsigned long arg);
89bbfc95 122#endif
1da177e4 123static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
124static ssize_t sock_sendpage(struct file *file, struct page *page,
125 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 126static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 127 struct pipe_inode_info *pipe, size_t len,
9c55e01c 128 unsigned int flags);
1da177e4 129
1da177e4
LT
130/*
131 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
132 * in the operation structures but are done directly via the socketcall() multiplexor.
133 */
134
da7071d7 135static const struct file_operations socket_file_ops = {
1da177e4
LT
136 .owner = THIS_MODULE,
137 .llseek = no_llseek,
138 .aio_read = sock_aio_read,
139 .aio_write = sock_aio_write,
140 .poll = sock_poll,
141 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
142#ifdef CONFIG_COMPAT
143 .compat_ioctl = compat_sock_ioctl,
144#endif
1da177e4
LT
145 .mmap = sock_mmap,
146 .open = sock_no_open, /* special open code to disallow open via /proc */
147 .release = sock_close,
148 .fasync = sock_fasync,
5274f052
JA
149 .sendpage = sock_sendpage,
150 .splice_write = generic_splice_sendpage,
9c55e01c 151 .splice_read = sock_splice_read,
1da177e4
LT
152};
153
154/*
155 * The protocol list. Each protocol is registered in here.
156 */
157
1da177e4 158static DEFINE_SPINLOCK(net_family_lock);
190683a9 159static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 160
1da177e4
LT
161/*
162 * Statistics counters of the socket lists
163 */
164
c6d409cf 165static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
166
167/*
89bddce5
SH
168 * Support routines.
169 * Move socket addresses back and forth across the kernel/user
170 * divide and look after the messy bits.
1da177e4
LT
171 */
172
1da177e4
LT
173/**
174 * move_addr_to_kernel - copy a socket address into kernel space
175 * @uaddr: Address in user space
176 * @kaddr: Address in kernel space
177 * @ulen: Length in user space
178 *
179 * The address is copied into kernel space. If the provided address is
180 * too long an error code of -EINVAL is returned. If the copy gives
181 * invalid addresses -EFAULT is returned. On a success 0 is returned.
182 */
183
230b1839 184int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr)
1da177e4 185{
230b1839 186 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 187 return -EINVAL;
89bddce5 188 if (ulen == 0)
1da177e4 189 return 0;
89bddce5 190 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 191 return -EFAULT;
3ec3b2fb 192 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
193}
194
195/**
196 * move_addr_to_user - copy an address to user space
197 * @kaddr: kernel space address
198 * @klen: length of address in kernel
199 * @uaddr: user space address
200 * @ulen: pointer to user length field
201 *
202 * The value pointed to by ulen on entry is the buffer length available.
203 * This is overwritten with the buffer space used. -EINVAL is returned
204 * if an overlong buffer is specified or a negative buffer size. -EFAULT
205 * is returned if either the buffer or the length field are not
206 * accessible.
207 * After copying the data up to the limit the user specifies, the true
208 * length of the data is written over the length limit the user
209 * specified. Zero is returned for a success.
210 */
89bddce5 211
11165f14 212static int move_addr_to_user(struct sockaddr *kaddr, int klen,
213 void __user *uaddr, int __user *ulen)
1da177e4
LT
214{
215 int err;
216 int len;
217
89bddce5
SH
218 err = get_user(len, ulen);
219 if (err)
1da177e4 220 return err;
89bddce5
SH
221 if (len > klen)
222 len = klen;
230b1839 223 if (len < 0 || len > sizeof(struct sockaddr_storage))
1da177e4 224 return -EINVAL;
89bddce5 225 if (len) {
d6fe3945
SG
226 if (audit_sockaddr(klen, kaddr))
227 return -ENOMEM;
89bddce5 228 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
229 return -EFAULT;
230 }
231 /*
89bddce5
SH
232 * "fromlen shall refer to the value before truncation.."
233 * 1003.1g
1da177e4
LT
234 */
235 return __put_user(klen, ulen);
236}
237
e18b890b 238static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
239
240static struct inode *sock_alloc_inode(struct super_block *sb)
241{
242 struct socket_alloc *ei;
eaefd110 243 struct socket_wq *wq;
89bddce5 244
e94b1766 245 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
246 if (!ei)
247 return NULL;
eaefd110
ED
248 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
249 if (!wq) {
43815482
ED
250 kmem_cache_free(sock_inode_cachep, ei);
251 return NULL;
252 }
eaefd110
ED
253 init_waitqueue_head(&wq->wait);
254 wq->fasync_list = NULL;
255 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 256
1da177e4
LT
257 ei->socket.state = SS_UNCONNECTED;
258 ei->socket.flags = 0;
259 ei->socket.ops = NULL;
260 ei->socket.sk = NULL;
261 ei->socket.file = NULL;
1da177e4
LT
262
263 return &ei->vfs_inode;
264}
265
266static void sock_destroy_inode(struct inode *inode)
267{
43815482 268 struct socket_alloc *ei;
eaefd110 269 struct socket_wq *wq;
43815482
ED
270
271 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 272 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 273 kfree_rcu(wq, rcu);
43815482 274 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
275}
276
51cc5068 277static void init_once(void *foo)
1da177e4 278{
89bddce5 279 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 280
a35afb83 281 inode_init_once(&ei->vfs_inode);
1da177e4 282}
89bddce5 283
1da177e4
LT
284static int init_inodecache(void)
285{
286 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
287 sizeof(struct socket_alloc),
288 0,
289 (SLAB_HWCACHE_ALIGN |
290 SLAB_RECLAIM_ACCOUNT |
291 SLAB_MEM_SPREAD),
20c2df83 292 init_once);
1da177e4
LT
293 if (sock_inode_cachep == NULL)
294 return -ENOMEM;
295 return 0;
296}
297
b87221de 298static const struct super_operations sockfs_ops = {
c6d409cf
ED
299 .alloc_inode = sock_alloc_inode,
300 .destroy_inode = sock_destroy_inode,
301 .statfs = simple_statfs,
1da177e4
LT
302};
303
c23fbb6b
ED
304/*
305 * sockfs_dname() is called from d_path().
306 */
307static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
308{
309 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
310 dentry->d_inode->i_ino);
311}
312
3ba13d17 313static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 314 .d_dname = sockfs_dname,
1da177e4
LT
315};
316
c74a1cbb
AV
317static struct dentry *sockfs_mount(struct file_system_type *fs_type,
318 int flags, const char *dev_name, void *data)
319{
320 return mount_pseudo(fs_type, "socket:", &sockfs_ops,
321 &sockfs_dentry_operations, SOCKFS_MAGIC);
322}
323
324static struct vfsmount *sock_mnt __read_mostly;
325
326static struct file_system_type sock_fs_type = {
327 .name = "sockfs",
328 .mount = sockfs_mount,
329 .kill_sb = kill_anon_super,
330};
331
1da177e4
LT
332/*
333 * Obtains the first available file descriptor and sets it up for use.
334 *
39d8c1b6
DM
335 * These functions create file structures and maps them to fd space
336 * of the current process. On success it returns file descriptor
1da177e4
LT
337 * and file struct implicitly stored in sock->file.
338 * Note that another thread may close file descriptor before we return
339 * from this function. We use the fact that now we do not refer
340 * to socket after mapping. If one day we will need it, this
341 * function will increment ref. count on file by 1.
342 *
343 * In any case returned fd MAY BE not valid!
344 * This race condition is unavoidable
345 * with shared fd spaces, we cannot solve it inside kernel,
346 * but we take care of internal coherence yet.
347 */
348
7cbe66b6 349static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
1da177e4 350{
7cbe66b6 351 struct qstr name = { .name = "" };
2c48b9c4 352 struct path path;
7cbe66b6 353 struct file *file;
1da177e4 354 int fd;
1da177e4 355
a677a039 356 fd = get_unused_fd_flags(flags);
7cbe66b6
AV
357 if (unlikely(fd < 0))
358 return fd;
1da177e4 359
4b936885 360 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
2c48b9c4 361 if (unlikely(!path.dentry)) {
7cbe66b6 362 put_unused_fd(fd);
39d8c1b6 363 return -ENOMEM;
7cbe66b6 364 }
2c48b9c4 365 path.mnt = mntget(sock_mnt);
39d8c1b6 366
2c48b9c4 367 d_instantiate(path.dentry, SOCK_INODE(sock));
cc3808f8 368 SOCK_INODE(sock)->i_fop = &socket_file_ops;
39d8c1b6 369
2c48b9c4 370 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 371 &socket_file_ops);
cc3808f8
AV
372 if (unlikely(!file)) {
373 /* drop dentry, keep inode */
7de9c6ee 374 ihold(path.dentry->d_inode);
2c48b9c4 375 path_put(&path);
cc3808f8
AV
376 put_unused_fd(fd);
377 return -ENFILE;
378 }
379
380 sock->file = file;
77d27200 381 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6
DM
382 file->f_pos = 0;
383 file->private_data = sock;
1da177e4 384
7cbe66b6
AV
385 *f = file;
386 return fd;
39d8c1b6
DM
387}
388
a677a039 389int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
390{
391 struct file *newfile;
7cbe66b6 392 int fd = sock_alloc_file(sock, &newfile, flags);
39d8c1b6 393
7cbe66b6 394 if (likely(fd >= 0))
39d8c1b6 395 fd_install(fd, newfile);
7cbe66b6 396
1da177e4
LT
397 return fd;
398}
c6d409cf 399EXPORT_SYMBOL(sock_map_fd);
1da177e4 400
6cb153ca
BL
401static struct socket *sock_from_file(struct file *file, int *err)
402{
6cb153ca
BL
403 if (file->f_op == &socket_file_ops)
404 return file->private_data; /* set in sock_map_fd */
405
23bb80d2
ED
406 *err = -ENOTSOCK;
407 return NULL;
6cb153ca
BL
408}
409
1da177e4 410/**
c6d409cf 411 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
412 * @fd: file handle
413 * @err: pointer to an error code return
414 *
415 * The file handle passed in is locked and the socket it is bound
416 * too is returned. If an error occurs the err pointer is overwritten
417 * with a negative errno code and NULL is returned. The function checks
418 * for both invalid handles and passing a handle which is not a socket.
419 *
420 * On a success the socket object pointer is returned.
421 */
422
423struct socket *sockfd_lookup(int fd, int *err)
424{
425 struct file *file;
1da177e4
LT
426 struct socket *sock;
427
89bddce5
SH
428 file = fget(fd);
429 if (!file) {
1da177e4
LT
430 *err = -EBADF;
431 return NULL;
432 }
89bddce5 433
6cb153ca
BL
434 sock = sock_from_file(file, err);
435 if (!sock)
1da177e4 436 fput(file);
6cb153ca
BL
437 return sock;
438}
c6d409cf 439EXPORT_SYMBOL(sockfd_lookup);
1da177e4 440
6cb153ca
BL
441static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
442{
443 struct file *file;
444 struct socket *sock;
445
3672558c 446 *err = -EBADF;
6cb153ca
BL
447 file = fget_light(fd, fput_needed);
448 if (file) {
449 sock = sock_from_file(file, err);
450 if (sock)
451 return sock;
452 fput_light(file, *fput_needed);
1da177e4 453 }
6cb153ca 454 return NULL;
1da177e4
LT
455}
456
457/**
458 * sock_alloc - allocate a socket
89bddce5 459 *
1da177e4
LT
460 * Allocate a new inode and socket object. The two are bound together
461 * and initialised. The socket is then returned. If we are out of inodes
462 * NULL is returned.
463 */
464
465static struct socket *sock_alloc(void)
466{
89bddce5
SH
467 struct inode *inode;
468 struct socket *sock;
1da177e4 469
a209dfc7 470 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
471 if (!inode)
472 return NULL;
473
474 sock = SOCKET_I(inode);
475
29a020d3 476 kmemcheck_annotate_bitfield(sock, type);
85fe4025 477 inode->i_ino = get_next_ino();
89bddce5 478 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
479 inode->i_uid = current_fsuid();
480 inode->i_gid = current_fsgid();
1da177e4 481
4e69489a 482 percpu_add(sockets_in_use, 1);
1da177e4
LT
483 return sock;
484}
485
486/*
487 * In theory you can't get an open on this inode, but /proc provides
488 * a back door. Remember to keep it shut otherwise you'll let the
489 * creepy crawlies in.
490 */
89bddce5 491
1da177e4
LT
492static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
493{
494 return -ENXIO;
495}
496
4b6f5d20 497const struct file_operations bad_sock_fops = {
1da177e4
LT
498 .owner = THIS_MODULE,
499 .open = sock_no_open,
6038f373 500 .llseek = noop_llseek,
1da177e4
LT
501};
502
503/**
504 * sock_release - close a socket
505 * @sock: socket to close
506 *
507 * The socket is released from the protocol stack if it has a release
508 * callback, and the inode is then released if the socket is bound to
89bddce5 509 * an inode not a file.
1da177e4 510 */
89bddce5 511
1da177e4
LT
512void sock_release(struct socket *sock)
513{
514 if (sock->ops) {
515 struct module *owner = sock->ops->owner;
516
517 sock->ops->release(sock);
518 sock->ops = NULL;
519 module_put(owner);
520 }
521
eaefd110 522 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
1da177e4
LT
523 printk(KERN_ERR "sock_release: fasync list not empty!\n");
524
4e69489a 525 percpu_sub(sockets_in_use, 1);
1da177e4
LT
526 if (!sock->file) {
527 iput(SOCK_INODE(sock));
528 return;
529 }
89bddce5 530 sock->file = NULL;
1da177e4 531}
c6d409cf 532EXPORT_SYMBOL(sock_release);
1da177e4 533
2244d07b 534int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
20d49473 535{
2244d07b 536 *tx_flags = 0;
20d49473 537 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
2244d07b 538 *tx_flags |= SKBTX_HW_TSTAMP;
20d49473 539 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
2244d07b 540 *tx_flags |= SKBTX_SW_TSTAMP;
6e3e939f
JB
541 if (sock_flag(sk, SOCK_WIFI_STATUS))
542 *tx_flags |= SKBTX_WIFI_STATUS;
20d49473
PO
543 return 0;
544}
545EXPORT_SYMBOL(sock_tx_timestamp);
546
228e548e
AB
547static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
548 struct msghdr *msg, size_t size)
1da177e4
LT
549{
550 struct sock_iocb *si = kiocb_to_siocb(iocb);
1da177e4 551
f8451725
HX
552 sock_update_classid(sock->sk);
553
5bc1421e
NH
554 sock_update_netprioidx(sock->sk);
555
1da177e4
LT
556 si->sock = sock;
557 si->scm = NULL;
558 si->msg = msg;
559 si->size = size;
560
1da177e4
LT
561 return sock->ops->sendmsg(iocb, sock, msg, size);
562}
563
228e548e
AB
564static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
565 struct msghdr *msg, size_t size)
566{
567 int err = security_socket_sendmsg(sock, msg, size);
568
569 return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
570}
571
1da177e4
LT
572int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
573{
574 struct kiocb iocb;
575 struct sock_iocb siocb;
576 int ret;
577
578 init_sync_kiocb(&iocb, NULL);
579 iocb.private = &siocb;
580 ret = __sock_sendmsg(&iocb, sock, msg, size);
581 if (-EIOCBQUEUED == ret)
582 ret = wait_on_sync_kiocb(&iocb);
583 return ret;
584}
c6d409cf 585EXPORT_SYMBOL(sock_sendmsg);
1da177e4 586
894dc24c 587static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
228e548e
AB
588{
589 struct kiocb iocb;
590 struct sock_iocb siocb;
591 int ret;
592
593 init_sync_kiocb(&iocb, NULL);
594 iocb.private = &siocb;
595 ret = __sock_sendmsg_nosec(&iocb, sock, msg, size);
596 if (-EIOCBQUEUED == ret)
597 ret = wait_on_sync_kiocb(&iocb);
598 return ret;
599}
600
1da177e4
LT
601int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
602 struct kvec *vec, size_t num, size_t size)
603{
604 mm_segment_t oldfs = get_fs();
605 int result;
606
607 set_fs(KERNEL_DS);
608 /*
609 * the following is safe, since for compiler definitions of kvec and
610 * iovec are identical, yielding the same in-core layout and alignment
611 */
89bddce5 612 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
613 msg->msg_iovlen = num;
614 result = sock_sendmsg(sock, msg, size);
615 set_fs(oldfs);
616 return result;
617}
c6d409cf 618EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 619
20d49473
PO
620static int ktime2ts(ktime_t kt, struct timespec *ts)
621{
622 if (kt.tv64) {
623 *ts = ktime_to_timespec(kt);
624 return 1;
625 } else {
626 return 0;
627 }
628}
629
92f37fd2
ED
630/*
631 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
632 */
633void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
634 struct sk_buff *skb)
635{
20d49473
PO
636 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
637 struct timespec ts[3];
638 int empty = 1;
639 struct skb_shared_hwtstamps *shhwtstamps =
640 skb_hwtstamps(skb);
641
642 /* Race occurred between timestamp enabling and packet
643 receiving. Fill in the current time for now. */
644 if (need_software_tstamp && skb->tstamp.tv64 == 0)
645 __net_timestamp(skb);
646
647 if (need_software_tstamp) {
648 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
649 struct timeval tv;
650 skb_get_timestamp(skb, &tv);
651 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
652 sizeof(tv), &tv);
653 } else {
842509b8 654 skb_get_timestampns(skb, &ts[0]);
20d49473 655 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
842509b8 656 sizeof(ts[0]), &ts[0]);
20d49473
PO
657 }
658 }
659
660
661 memset(ts, 0, sizeof(ts));
662 if (skb->tstamp.tv64 &&
663 sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) {
664 skb_get_timestampns(skb, ts + 0);
665 empty = 0;
666 }
667 if (shhwtstamps) {
668 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
669 ktime2ts(shhwtstamps->syststamp, ts + 1))
670 empty = 0;
671 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
672 ktime2ts(shhwtstamps->hwtstamp, ts + 2))
673 empty = 0;
92f37fd2 674 }
20d49473
PO
675 if (!empty)
676 put_cmsg(msg, SOL_SOCKET,
677 SCM_TIMESTAMPING, sizeof(ts), &ts);
92f37fd2 678}
7c81fd8b
ACM
679EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
680
6e3e939f
JB
681void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
682 struct sk_buff *skb)
683{
684 int ack;
685
686 if (!sock_flag(sk, SOCK_WIFI_STATUS))
687 return;
688 if (!skb->wifi_acked_valid)
689 return;
690
691 ack = skb->wifi_acked;
692
693 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
694}
695EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
696
11165f14 697static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
698 struct sk_buff *skb)
3b885787
NH
699{
700 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
701 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
702 sizeof(__u32), &skb->dropcount);
703}
704
767dd033 705void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
706 struct sk_buff *skb)
707{
708 sock_recv_timestamp(msg, sk, skb);
709 sock_recv_drops(msg, sk, skb);
710}
767dd033 711EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 712
a2e27255
ACM
713static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
714 struct msghdr *msg, size_t size, int flags)
1da177e4 715{
1da177e4
LT
716 struct sock_iocb *si = kiocb_to_siocb(iocb);
717
f8451725
HX
718 sock_update_classid(sock->sk);
719
1da177e4
LT
720 si->sock = sock;
721 si->scm = NULL;
722 si->msg = msg;
723 si->size = size;
724 si->flags = flags;
725
1da177e4
LT
726 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
727}
728
a2e27255
ACM
729static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
730 struct msghdr *msg, size_t size, int flags)
731{
732 int err = security_socket_recvmsg(sock, msg, size, flags);
733
734 return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
735}
736
89bddce5 737int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
738 size_t size, int flags)
739{
740 struct kiocb iocb;
741 struct sock_iocb siocb;
742 int ret;
743
89bddce5 744 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
745 iocb.private = &siocb;
746 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
747 if (-EIOCBQUEUED == ret)
748 ret = wait_on_sync_kiocb(&iocb);
749 return ret;
750}
c6d409cf 751EXPORT_SYMBOL(sock_recvmsg);
1da177e4 752
a2e27255
ACM
753static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
754 size_t size, int flags)
755{
756 struct kiocb iocb;
757 struct sock_iocb siocb;
758 int ret;
759
760 init_sync_kiocb(&iocb, NULL);
761 iocb.private = &siocb;
762 ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
763 if (-EIOCBQUEUED == ret)
764 ret = wait_on_sync_kiocb(&iocb);
765 return ret;
766}
767
c1249c0a
ML
768/**
769 * kernel_recvmsg - Receive a message from a socket (kernel space)
770 * @sock: The socket to receive the message from
771 * @msg: Received message
772 * @vec: Input s/g array for message data
773 * @num: Size of input s/g array
774 * @size: Number of bytes to read
775 * @flags: Message flags (MSG_DONTWAIT, etc...)
776 *
777 * On return the msg structure contains the scatter/gather array passed in the
778 * vec argument. The array is modified so that it consists of the unfilled
779 * portion of the original array.
780 *
781 * The returned value is the total number of bytes received, or an error.
782 */
89bddce5
SH
783int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
784 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
785{
786 mm_segment_t oldfs = get_fs();
787 int result;
788
789 set_fs(KERNEL_DS);
790 /*
791 * the following is safe, since for compiler definitions of kvec and
792 * iovec are identical, yielding the same in-core layout and alignment
793 */
89bddce5 794 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
795 result = sock_recvmsg(sock, msg, size, flags);
796 set_fs(oldfs);
797 return result;
798}
c6d409cf 799EXPORT_SYMBOL(kernel_recvmsg);
1da177e4
LT
800
801static void sock_aio_dtor(struct kiocb *iocb)
802{
803 kfree(iocb->private);
804}
805
ce1d4d3e
CH
806static ssize_t sock_sendpage(struct file *file, struct page *page,
807 int offset, size_t size, loff_t *ppos, int more)
1da177e4 808{
1da177e4
LT
809 struct socket *sock;
810 int flags;
811
ce1d4d3e
CH
812 sock = file->private_data;
813
814 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
815 if (more)
816 flags |= MSG_MORE;
817
e6949583 818 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 819}
1da177e4 820
9c55e01c 821static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 822 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
823 unsigned int flags)
824{
825 struct socket *sock = file->private_data;
826
997b37da
RDC
827 if (unlikely(!sock->ops->splice_read))
828 return -EINVAL;
829
f8451725
HX
830 sock_update_classid(sock->sk);
831
9c55e01c
JA
832 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
833}
834
ce1d4d3e 835static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 836 struct sock_iocb *siocb)
ce1d4d3e
CH
837{
838 if (!is_sync_kiocb(iocb)) {
839 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
840 if (!siocb)
841 return NULL;
1da177e4
LT
842 iocb->ki_dtor = sock_aio_dtor;
843 }
1da177e4 844
ce1d4d3e 845 siocb->kiocb = iocb;
ce1d4d3e
CH
846 iocb->private = siocb;
847 return siocb;
1da177e4
LT
848}
849
ce1d4d3e 850static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
851 struct file *file, const struct iovec *iov,
852 unsigned long nr_segs)
ce1d4d3e
CH
853{
854 struct socket *sock = file->private_data;
855 size_t size = 0;
856 int i;
1da177e4 857
89bddce5
SH
858 for (i = 0; i < nr_segs; i++)
859 size += iov[i].iov_len;
1da177e4 860
ce1d4d3e
CH
861 msg->msg_name = NULL;
862 msg->msg_namelen = 0;
863 msg->msg_control = NULL;
864 msg->msg_controllen = 0;
89bddce5 865 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
866 msg->msg_iovlen = nr_segs;
867 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
868
869 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
870}
871
027445c3
BP
872static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
873 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
874{
875 struct sock_iocb siocb, *x;
876
1da177e4
LT
877 if (pos != 0)
878 return -ESPIPE;
027445c3
BP
879
880 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
881 return 0;
882
027445c3
BP
883
884 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
885 if (!x)
886 return -ENOMEM;
027445c3 887 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
888}
889
ce1d4d3e 890static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
891 struct file *file, const struct iovec *iov,
892 unsigned long nr_segs)
1da177e4 893{
ce1d4d3e
CH
894 struct socket *sock = file->private_data;
895 size_t size = 0;
896 int i;
1da177e4 897
89bddce5
SH
898 for (i = 0; i < nr_segs; i++)
899 size += iov[i].iov_len;
1da177e4 900
ce1d4d3e
CH
901 msg->msg_name = NULL;
902 msg->msg_namelen = 0;
903 msg->msg_control = NULL;
904 msg->msg_controllen = 0;
89bddce5 905 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
906 msg->msg_iovlen = nr_segs;
907 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
908 if (sock->type == SOCK_SEQPACKET)
909 msg->msg_flags |= MSG_EOR;
1da177e4 910
ce1d4d3e 911 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
912}
913
027445c3
BP
914static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
915 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
916{
917 struct sock_iocb siocb, *x;
1da177e4 918
ce1d4d3e
CH
919 if (pos != 0)
920 return -ESPIPE;
027445c3 921
027445c3 922 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
923 if (!x)
924 return -ENOMEM;
1da177e4 925
027445c3 926 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
927}
928
1da177e4
LT
929/*
930 * Atomic setting of ioctl hooks to avoid race
931 * with module unload.
932 */
933
4a3e2f71 934static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 935static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 936
881d966b 937void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 938{
4a3e2f71 939 mutex_lock(&br_ioctl_mutex);
1da177e4 940 br_ioctl_hook = hook;
4a3e2f71 941 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
942}
943EXPORT_SYMBOL(brioctl_set);
944
4a3e2f71 945static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 946static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 947
881d966b 948void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 949{
4a3e2f71 950 mutex_lock(&vlan_ioctl_mutex);
1da177e4 951 vlan_ioctl_hook = hook;
4a3e2f71 952 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
953}
954EXPORT_SYMBOL(vlan_ioctl_set);
955
4a3e2f71 956static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 957static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 958
89bddce5 959void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 960{
4a3e2f71 961 mutex_lock(&dlci_ioctl_mutex);
1da177e4 962 dlci_ioctl_hook = hook;
4a3e2f71 963 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
964}
965EXPORT_SYMBOL(dlci_ioctl_set);
966
6b96018b
AB
967static long sock_do_ioctl(struct net *net, struct socket *sock,
968 unsigned int cmd, unsigned long arg)
969{
970 int err;
971 void __user *argp = (void __user *)arg;
972
973 err = sock->ops->ioctl(sock, cmd, arg);
974
975 /*
976 * If this ioctl is unknown try to hand it down
977 * to the NIC driver.
978 */
979 if (err == -ENOIOCTLCMD)
980 err = dev_ioctl(net, cmd, argp);
981
982 return err;
983}
984
1da177e4
LT
985/*
986 * With an ioctl, arg may well be a user mode pointer, but we don't know
987 * what to do with it - that's up to the protocol still.
988 */
989
990static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
991{
992 struct socket *sock;
881d966b 993 struct sock *sk;
1da177e4
LT
994 void __user *argp = (void __user *)arg;
995 int pid, err;
881d966b 996 struct net *net;
1da177e4 997
b69aee04 998 sock = file->private_data;
881d966b 999 sk = sock->sk;
3b1e0a65 1000 net = sock_net(sk);
1da177e4 1001 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 1002 err = dev_ioctl(net, cmd, argp);
1da177e4 1003 } else
3d23e349 1004#ifdef CONFIG_WEXT_CORE
1da177e4 1005 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 1006 err = dev_ioctl(net, cmd, argp);
1da177e4 1007 } else
3d23e349 1008#endif
89bddce5 1009 switch (cmd) {
1da177e4
LT
1010 case FIOSETOWN:
1011 case SIOCSPGRP:
1012 err = -EFAULT;
1013 if (get_user(pid, (int __user *)argp))
1014 break;
1015 err = f_setown(sock->file, pid, 1);
1016 break;
1017 case FIOGETOWN:
1018 case SIOCGPGRP:
609d7fa9 1019 err = put_user(f_getown(sock->file),
89bddce5 1020 (int __user *)argp);
1da177e4
LT
1021 break;
1022 case SIOCGIFBR:
1023 case SIOCSIFBR:
1024 case SIOCBRADDBR:
1025 case SIOCBRDELBR:
1026 err = -ENOPKG;
1027 if (!br_ioctl_hook)
1028 request_module("bridge");
1029
4a3e2f71 1030 mutex_lock(&br_ioctl_mutex);
89bddce5 1031 if (br_ioctl_hook)
881d966b 1032 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1033 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1034 break;
1035 case SIOCGIFVLAN:
1036 case SIOCSIFVLAN:
1037 err = -ENOPKG;
1038 if (!vlan_ioctl_hook)
1039 request_module("8021q");
1040
4a3e2f71 1041 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1042 if (vlan_ioctl_hook)
881d966b 1043 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1044 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1045 break;
1da177e4
LT
1046 case SIOCADDDLCI:
1047 case SIOCDELDLCI:
1048 err = -ENOPKG;
1049 if (!dlci_ioctl_hook)
1050 request_module("dlci");
1051
7512cbf6
PE
1052 mutex_lock(&dlci_ioctl_mutex);
1053 if (dlci_ioctl_hook)
1da177e4 1054 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1055 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1056 break;
1057 default:
6b96018b 1058 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1059 break;
89bddce5 1060 }
1da177e4
LT
1061 return err;
1062}
1063
1064int sock_create_lite(int family, int type, int protocol, struct socket **res)
1065{
1066 int err;
1067 struct socket *sock = NULL;
89bddce5 1068
1da177e4
LT
1069 err = security_socket_create(family, type, protocol, 1);
1070 if (err)
1071 goto out;
1072
1073 sock = sock_alloc();
1074 if (!sock) {
1075 err = -ENOMEM;
1076 goto out;
1077 }
1078
1da177e4 1079 sock->type = type;
7420ed23
VY
1080 err = security_socket_post_create(sock, family, type, protocol, 1);
1081 if (err)
1082 goto out_release;
1083
1da177e4
LT
1084out:
1085 *res = sock;
1086 return err;
7420ed23
VY
1087out_release:
1088 sock_release(sock);
1089 sock = NULL;
1090 goto out;
1da177e4 1091}
c6d409cf 1092EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1093
1094/* No kernel lock held - perfect */
89bddce5 1095static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
1096{
1097 struct socket *sock;
1098
1099 /*
89bddce5 1100 * We can't return errors to poll, so it's either yes or no.
1da177e4 1101 */
b69aee04 1102 sock = file->private_data;
1da177e4
LT
1103 return sock->ops->poll(file, sock, wait);
1104}
1105
89bddce5 1106static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1107{
b69aee04 1108 struct socket *sock = file->private_data;
1da177e4
LT
1109
1110 return sock->ops->mmap(file, sock, vma);
1111}
1112
20380731 1113static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
1114{
1115 /*
89bddce5
SH
1116 * It was possible the inode is NULL we were
1117 * closing an unfinished socket.
1da177e4
LT
1118 */
1119
89bddce5 1120 if (!inode) {
1da177e4
LT
1121 printk(KERN_DEBUG "sock_close: NULL inode\n");
1122 return 0;
1123 }
1da177e4
LT
1124 sock_release(SOCKET_I(inode));
1125 return 0;
1126}
1127
1128/*
1129 * Update the socket async list
1130 *
1131 * Fasync_list locking strategy.
1132 *
1133 * 1. fasync_list is modified only under process context socket lock
1134 * i.e. under semaphore.
1135 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1136 * or under socket lock
1da177e4
LT
1137 */
1138
1139static int sock_fasync(int fd, struct file *filp, int on)
1140{
989a2979
ED
1141 struct socket *sock = filp->private_data;
1142 struct sock *sk = sock->sk;
eaefd110 1143 struct socket_wq *wq;
1da177e4 1144
989a2979 1145 if (sk == NULL)
1da177e4 1146 return -EINVAL;
1da177e4
LT
1147
1148 lock_sock(sk);
eaefd110
ED
1149 wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
1150 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1151
eaefd110 1152 if (!wq->fasync_list)
989a2979
ED
1153 sock_reset_flag(sk, SOCK_FASYNC);
1154 else
bcdce719 1155 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1156
989a2979 1157 release_sock(sk);
1da177e4
LT
1158 return 0;
1159}
1160
43815482 1161/* This function may be called only under socket lock or callback_lock or rcu_lock */
1da177e4
LT
1162
1163int sock_wake_async(struct socket *sock, int how, int band)
1164{
43815482
ED
1165 struct socket_wq *wq;
1166
1167 if (!sock)
1168 return -1;
1169 rcu_read_lock();
1170 wq = rcu_dereference(sock->wq);
1171 if (!wq || !wq->fasync_list) {
1172 rcu_read_unlock();
1da177e4 1173 return -1;
43815482 1174 }
89bddce5 1175 switch (how) {
8d8ad9d7 1176 case SOCK_WAKE_WAITD:
1da177e4
LT
1177 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1178 break;
1179 goto call_kill;
8d8ad9d7 1180 case SOCK_WAKE_SPACE:
1da177e4
LT
1181 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1182 break;
1183 /* fall through */
8d8ad9d7 1184 case SOCK_WAKE_IO:
89bddce5 1185call_kill:
43815482 1186 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1187 break;
8d8ad9d7 1188 case SOCK_WAKE_URG:
43815482 1189 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1190 }
43815482 1191 rcu_read_unlock();
1da177e4
LT
1192 return 0;
1193}
c6d409cf 1194EXPORT_SYMBOL(sock_wake_async);
1da177e4 1195
721db93a 1196int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1197 struct socket **res, int kern)
1da177e4
LT
1198{
1199 int err;
1200 struct socket *sock;
55737fda 1201 const struct net_proto_family *pf;
1da177e4
LT
1202
1203 /*
89bddce5 1204 * Check protocol is in range
1da177e4
LT
1205 */
1206 if (family < 0 || family >= NPROTO)
1207 return -EAFNOSUPPORT;
1208 if (type < 0 || type >= SOCK_MAX)
1209 return -EINVAL;
1210
1211 /* Compatibility.
1212
1213 This uglymoron is moved from INET layer to here to avoid
1214 deadlock in module load.
1215 */
1216 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1217 static int warned;
1da177e4
LT
1218 if (!warned) {
1219 warned = 1;
89bddce5
SH
1220 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1221 current->comm);
1da177e4
LT
1222 }
1223 family = PF_PACKET;
1224 }
1225
1226 err = security_socket_create(family, type, protocol, kern);
1227 if (err)
1228 return err;
89bddce5 1229
55737fda
SH
1230 /*
1231 * Allocate the socket and allow the family to set things up. if
1232 * the protocol is 0, the family is instructed to select an appropriate
1233 * default.
1234 */
1235 sock = sock_alloc();
1236 if (!sock) {
1237 if (net_ratelimit())
1238 printk(KERN_WARNING "socket: no more sockets\n");
1239 return -ENFILE; /* Not exactly a match, but its the
1240 closest posix thing */
1241 }
1242
1243 sock->type = type;
1244
95a5afca 1245#ifdef CONFIG_MODULES
89bddce5
SH
1246 /* Attempt to load a protocol module if the find failed.
1247 *
1248 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1249 * requested real, full-featured networking support upon configuration.
1250 * Otherwise module support will break!
1251 */
190683a9 1252 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1253 request_module("net-pf-%d", family);
1da177e4
LT
1254#endif
1255
55737fda
SH
1256 rcu_read_lock();
1257 pf = rcu_dereference(net_families[family]);
1258 err = -EAFNOSUPPORT;
1259 if (!pf)
1260 goto out_release;
1da177e4
LT
1261
1262 /*
1263 * We will call the ->create function, that possibly is in a loadable
1264 * module, so we have to bump that loadable module refcnt first.
1265 */
55737fda 1266 if (!try_module_get(pf->owner))
1da177e4
LT
1267 goto out_release;
1268
55737fda
SH
1269 /* Now protected by module ref count */
1270 rcu_read_unlock();
1271
3f378b68 1272 err = pf->create(net, sock, protocol, kern);
55737fda 1273 if (err < 0)
1da177e4 1274 goto out_module_put;
a79af59e 1275
1da177e4
LT
1276 /*
1277 * Now to bump the refcnt of the [loadable] module that owns this
1278 * socket at sock_release time we decrement its refcnt.
1279 */
55737fda
SH
1280 if (!try_module_get(sock->ops->owner))
1281 goto out_module_busy;
1282
1da177e4
LT
1283 /*
1284 * Now that we're done with the ->create function, the [loadable]
1285 * module can have its refcnt decremented
1286 */
55737fda 1287 module_put(pf->owner);
7420ed23
VY
1288 err = security_socket_post_create(sock, family, type, protocol, kern);
1289 if (err)
3b185525 1290 goto out_sock_release;
55737fda 1291 *res = sock;
1da177e4 1292
55737fda
SH
1293 return 0;
1294
1295out_module_busy:
1296 err = -EAFNOSUPPORT;
1da177e4 1297out_module_put:
55737fda
SH
1298 sock->ops = NULL;
1299 module_put(pf->owner);
1300out_sock_release:
1da177e4 1301 sock_release(sock);
55737fda
SH
1302 return err;
1303
1304out_release:
1305 rcu_read_unlock();
1306 goto out_sock_release;
1da177e4 1307}
721db93a 1308EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1309
1310int sock_create(int family, int type, int protocol, struct socket **res)
1311{
1b8d7ae4 1312 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1313}
c6d409cf 1314EXPORT_SYMBOL(sock_create);
1da177e4
LT
1315
1316int sock_create_kern(int family, int type, int protocol, struct socket **res)
1317{
1b8d7ae4 1318 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4 1319}
c6d409cf 1320EXPORT_SYMBOL(sock_create_kern);
1da177e4 1321
3e0fa65f 1322SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1323{
1324 int retval;
1325 struct socket *sock;
a677a039
UD
1326 int flags;
1327
e38b36f3
UD
1328 /* Check the SOCK_* constants for consistency. */
1329 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1330 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1331 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1332 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1333
a677a039 1334 flags = type & ~SOCK_TYPE_MASK;
77d27200 1335 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1336 return -EINVAL;
1337 type &= SOCK_TYPE_MASK;
1da177e4 1338
aaca0bdc
UD
1339 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1340 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1341
1da177e4
LT
1342 retval = sock_create(family, type, protocol, &sock);
1343 if (retval < 0)
1344 goto out;
1345
77d27200 1346 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1347 if (retval < 0)
1348 goto out_release;
1349
1350out:
1351 /* It may be already another descriptor 8) Not kernel problem. */
1352 return retval;
1353
1354out_release:
1355 sock_release(sock);
1356 return retval;
1357}
1358
1359/*
1360 * Create a pair of connected sockets.
1361 */
1362
3e0fa65f
HC
1363SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1364 int __user *, usockvec)
1da177e4
LT
1365{
1366 struct socket *sock1, *sock2;
1367 int fd1, fd2, err;
db349509 1368 struct file *newfile1, *newfile2;
a677a039
UD
1369 int flags;
1370
1371 flags = type & ~SOCK_TYPE_MASK;
77d27200 1372 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1373 return -EINVAL;
1374 type &= SOCK_TYPE_MASK;
1da177e4 1375
aaca0bdc
UD
1376 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1377 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1378
1da177e4
LT
1379 /*
1380 * Obtain the first socket and check if the underlying protocol
1381 * supports the socketpair call.
1382 */
1383
1384 err = sock_create(family, type, protocol, &sock1);
1385 if (err < 0)
1386 goto out;
1387
1388 err = sock_create(family, type, protocol, &sock2);
1389 if (err < 0)
1390 goto out_release_1;
1391
1392 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1393 if (err < 0)
1da177e4
LT
1394 goto out_release_both;
1395
7cbe66b6 1396 fd1 = sock_alloc_file(sock1, &newfile1, flags);
bf3c23d1
DM
1397 if (unlikely(fd1 < 0)) {
1398 err = fd1;
db349509 1399 goto out_release_both;
bf3c23d1 1400 }
1da177e4 1401
7cbe66b6 1402 fd2 = sock_alloc_file(sock2, &newfile2, flags);
198de4d7
AV
1403 if (unlikely(fd2 < 0)) {
1404 err = fd2;
1405 fput(newfile1);
1406 put_unused_fd(fd1);
1407 sock_release(sock2);
1408 goto out;
db349509
AV
1409 }
1410
157cf649 1411 audit_fd_pair(fd1, fd2);
db349509
AV
1412 fd_install(fd1, newfile1);
1413 fd_install(fd2, newfile2);
1da177e4
LT
1414 /* fd1 and fd2 may be already another descriptors.
1415 * Not kernel problem.
1416 */
1417
89bddce5 1418 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1419 if (!err)
1420 err = put_user(fd2, &usockvec[1]);
1421 if (!err)
1422 return 0;
1423
1424 sys_close(fd2);
1425 sys_close(fd1);
1426 return err;
1427
1da177e4 1428out_release_both:
89bddce5 1429 sock_release(sock2);
1da177e4 1430out_release_1:
89bddce5 1431 sock_release(sock1);
1da177e4
LT
1432out:
1433 return err;
1434}
1435
1da177e4
LT
1436/*
1437 * Bind a name to a socket. Nothing much to do here since it's
1438 * the protocol's responsibility to handle the local address.
1439 *
1440 * We move the socket address to kernel space before we call
1441 * the protocol layer (having also checked the address is ok).
1442 */
1443
20f37034 1444SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1445{
1446 struct socket *sock;
230b1839 1447 struct sockaddr_storage address;
6cb153ca 1448 int err, fput_needed;
1da177e4 1449
89bddce5 1450 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1451 if (sock) {
230b1839 1452 err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address);
89bddce5
SH
1453 if (err >= 0) {
1454 err = security_socket_bind(sock,
230b1839 1455 (struct sockaddr *)&address,
89bddce5 1456 addrlen);
6cb153ca
BL
1457 if (!err)
1458 err = sock->ops->bind(sock,
89bddce5 1459 (struct sockaddr *)
230b1839 1460 &address, addrlen);
1da177e4 1461 }
6cb153ca 1462 fput_light(sock->file, fput_needed);
89bddce5 1463 }
1da177e4
LT
1464 return err;
1465}
1466
1da177e4
LT
1467/*
1468 * Perform a listen. Basically, we allow the protocol to do anything
1469 * necessary for a listen, and if that works, we mark the socket as
1470 * ready for listening.
1471 */
1472
3e0fa65f 1473SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1474{
1475 struct socket *sock;
6cb153ca 1476 int err, fput_needed;
b8e1f9b5 1477 int somaxconn;
89bddce5
SH
1478
1479 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1480 if (sock) {
8efa6e93 1481 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
b8e1f9b5
PE
1482 if ((unsigned)backlog > somaxconn)
1483 backlog = somaxconn;
1da177e4
LT
1484
1485 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1486 if (!err)
1487 err = sock->ops->listen(sock, backlog);
1da177e4 1488
6cb153ca 1489 fput_light(sock->file, fput_needed);
1da177e4
LT
1490 }
1491 return err;
1492}
1493
1da177e4
LT
1494/*
1495 * For accept, we attempt to create a new socket, set up the link
1496 * with the client, wake up the client, then return the new
1497 * connected fd. We collect the address of the connector in kernel
1498 * space and move it to user at the very end. This is unclean because
1499 * we open the socket then return an error.
1500 *
1501 * 1003.1g adds the ability to recvmsg() to query connection pending
1502 * status to recvmsg. We need to add that support in a way thats
1503 * clean when we restucture accept also.
1504 */
1505
20f37034
HC
1506SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1507 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1508{
1509 struct socket *sock, *newsock;
39d8c1b6 1510 struct file *newfile;
6cb153ca 1511 int err, len, newfd, fput_needed;
230b1839 1512 struct sockaddr_storage address;
1da177e4 1513
77d27200 1514 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1515 return -EINVAL;
1516
1517 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1518 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1519
6cb153ca 1520 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1521 if (!sock)
1522 goto out;
1523
1524 err = -ENFILE;
c6d409cf
ED
1525 newsock = sock_alloc();
1526 if (!newsock)
1da177e4
LT
1527 goto out_put;
1528
1529 newsock->type = sock->type;
1530 newsock->ops = sock->ops;
1531
1da177e4
LT
1532 /*
1533 * We don't need try_module_get here, as the listening socket (sock)
1534 * has the protocol module (sock->ops->owner) held.
1535 */
1536 __module_get(newsock->ops->owner);
1537
7cbe66b6 1538 newfd = sock_alloc_file(newsock, &newfile, flags);
39d8c1b6
DM
1539 if (unlikely(newfd < 0)) {
1540 err = newfd;
9a1875e6
DM
1541 sock_release(newsock);
1542 goto out_put;
39d8c1b6
DM
1543 }
1544
a79af59e
FF
1545 err = security_socket_accept(sock, newsock);
1546 if (err)
39d8c1b6 1547 goto out_fd;
a79af59e 1548
1da177e4
LT
1549 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1550 if (err < 0)
39d8c1b6 1551 goto out_fd;
1da177e4
LT
1552
1553 if (upeer_sockaddr) {
230b1839 1554 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1555 &len, 2) < 0) {
1da177e4 1556 err = -ECONNABORTED;
39d8c1b6 1557 goto out_fd;
1da177e4 1558 }
230b1839
YH
1559 err = move_addr_to_user((struct sockaddr *)&address,
1560 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1561 if (err < 0)
39d8c1b6 1562 goto out_fd;
1da177e4
LT
1563 }
1564
1565 /* File flags are not inherited via accept() unlike another OSes. */
1566
39d8c1b6
DM
1567 fd_install(newfd, newfile);
1568 err = newfd;
1da177e4 1569
1da177e4 1570out_put:
6cb153ca 1571 fput_light(sock->file, fput_needed);
1da177e4
LT
1572out:
1573 return err;
39d8c1b6 1574out_fd:
9606a216 1575 fput(newfile);
39d8c1b6 1576 put_unused_fd(newfd);
1da177e4
LT
1577 goto out_put;
1578}
1579
20f37034
HC
1580SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1581 int __user *, upeer_addrlen)
aaca0bdc 1582{
de11defe 1583 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1584}
1585
1da177e4
LT
1586/*
1587 * Attempt to connect to a socket with the server address. The address
1588 * is in user space so we verify it is OK and move it to kernel space.
1589 *
1590 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1591 * break bindings
1592 *
1593 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1594 * other SEQPACKET protocols that take time to connect() as it doesn't
1595 * include the -EINPROGRESS status for such sockets.
1596 */
1597
20f37034
HC
1598SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1599 int, addrlen)
1da177e4
LT
1600{
1601 struct socket *sock;
230b1839 1602 struct sockaddr_storage address;
6cb153ca 1603 int err, fput_needed;
1da177e4 1604
6cb153ca 1605 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1606 if (!sock)
1607 goto out;
230b1839 1608 err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address);
1da177e4
LT
1609 if (err < 0)
1610 goto out_put;
1611
89bddce5 1612 err =
230b1839 1613 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1614 if (err)
1615 goto out_put;
1616
230b1839 1617 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1618 sock->file->f_flags);
1619out_put:
6cb153ca 1620 fput_light(sock->file, fput_needed);
1da177e4
LT
1621out:
1622 return err;
1623}
1624
1625/*
1626 * Get the local address ('name') of a socket object. Move the obtained
1627 * name to user space.
1628 */
1629
20f37034
HC
1630SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1631 int __user *, usockaddr_len)
1da177e4
LT
1632{
1633 struct socket *sock;
230b1839 1634 struct sockaddr_storage address;
6cb153ca 1635 int len, err, fput_needed;
89bddce5 1636
6cb153ca 1637 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1638 if (!sock)
1639 goto out;
1640
1641 err = security_socket_getsockname(sock);
1642 if (err)
1643 goto out_put;
1644
230b1839 1645 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1646 if (err)
1647 goto out_put;
230b1839 1648 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1649
1650out_put:
6cb153ca 1651 fput_light(sock->file, fput_needed);
1da177e4
LT
1652out:
1653 return err;
1654}
1655
1656/*
1657 * Get the remote address ('name') of a socket object. Move the obtained
1658 * name to user space.
1659 */
1660
20f37034
HC
1661SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1662 int __user *, usockaddr_len)
1da177e4
LT
1663{
1664 struct socket *sock;
230b1839 1665 struct sockaddr_storage address;
6cb153ca 1666 int len, err, fput_needed;
1da177e4 1667
89bddce5
SH
1668 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1669 if (sock != NULL) {
1da177e4
LT
1670 err = security_socket_getpeername(sock);
1671 if (err) {
6cb153ca 1672 fput_light(sock->file, fput_needed);
1da177e4
LT
1673 return err;
1674 }
1675
89bddce5 1676 err =
230b1839 1677 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1678 1);
1da177e4 1679 if (!err)
230b1839 1680 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr,
89bddce5 1681 usockaddr_len);
6cb153ca 1682 fput_light(sock->file, fput_needed);
1da177e4
LT
1683 }
1684 return err;
1685}
1686
1687/*
1688 * Send a datagram to a given address. We move the address into kernel
1689 * space and check the user space data area is readable before invoking
1690 * the protocol.
1691 */
1692
3e0fa65f
HC
1693SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1694 unsigned, flags, struct sockaddr __user *, addr,
1695 int, addr_len)
1da177e4
LT
1696{
1697 struct socket *sock;
230b1839 1698 struct sockaddr_storage address;
1da177e4
LT
1699 int err;
1700 struct msghdr msg;
1701 struct iovec iov;
6cb153ca 1702 int fput_needed;
6cb153ca 1703
253eacc0
LT
1704 if (len > INT_MAX)
1705 len = INT_MAX;
de0fa95c
PE
1706 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1707 if (!sock)
4387ff75 1708 goto out;
6cb153ca 1709
89bddce5
SH
1710 iov.iov_base = buff;
1711 iov.iov_len = len;
1712 msg.msg_name = NULL;
1713 msg.msg_iov = &iov;
1714 msg.msg_iovlen = 1;
1715 msg.msg_control = NULL;
1716 msg.msg_controllen = 0;
1717 msg.msg_namelen = 0;
6cb153ca 1718 if (addr) {
230b1839 1719 err = move_addr_to_kernel(addr, addr_len, (struct sockaddr *)&address);
1da177e4
LT
1720 if (err < 0)
1721 goto out_put;
230b1839 1722 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1723 msg.msg_namelen = addr_len;
1da177e4
LT
1724 }
1725 if (sock->file->f_flags & O_NONBLOCK)
1726 flags |= MSG_DONTWAIT;
1727 msg.msg_flags = flags;
1728 err = sock_sendmsg(sock, &msg, len);
1729
89bddce5 1730out_put:
de0fa95c 1731 fput_light(sock->file, fput_needed);
4387ff75 1732out:
1da177e4
LT
1733 return err;
1734}
1735
1736/*
89bddce5 1737 * Send a datagram down a socket.
1da177e4
LT
1738 */
1739
3e0fa65f
HC
1740SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
1741 unsigned, flags)
1da177e4
LT
1742{
1743 return sys_sendto(fd, buff, len, flags, NULL, 0);
1744}
1745
1746/*
89bddce5 1747 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1748 * sender. We verify the buffers are writable and if needed move the
1749 * sender address from kernel to user space.
1750 */
1751
3e0fa65f
HC
1752SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
1753 unsigned, flags, struct sockaddr __user *, addr,
1754 int __user *, addr_len)
1da177e4
LT
1755{
1756 struct socket *sock;
1757 struct iovec iov;
1758 struct msghdr msg;
230b1839 1759 struct sockaddr_storage address;
89bddce5 1760 int err, err2;
6cb153ca
BL
1761 int fput_needed;
1762
253eacc0
LT
1763 if (size > INT_MAX)
1764 size = INT_MAX;
de0fa95c 1765 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1766 if (!sock)
de0fa95c 1767 goto out;
1da177e4 1768
89bddce5
SH
1769 msg.msg_control = NULL;
1770 msg.msg_controllen = 0;
1771 msg.msg_iovlen = 1;
1772 msg.msg_iov = &iov;
1773 iov.iov_len = size;
1774 iov.iov_base = ubuf;
230b1839
YH
1775 msg.msg_name = (struct sockaddr *)&address;
1776 msg.msg_namelen = sizeof(address);
1da177e4
LT
1777 if (sock->file->f_flags & O_NONBLOCK)
1778 flags |= MSG_DONTWAIT;
89bddce5 1779 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1780
89bddce5 1781 if (err >= 0 && addr != NULL) {
230b1839
YH
1782 err2 = move_addr_to_user((struct sockaddr *)&address,
1783 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1784 if (err2 < 0)
1785 err = err2;
1da177e4 1786 }
de0fa95c
PE
1787
1788 fput_light(sock->file, fput_needed);
4387ff75 1789out:
1da177e4
LT
1790 return err;
1791}
1792
1793/*
89bddce5 1794 * Receive a datagram from a socket.
1da177e4
LT
1795 */
1796
89bddce5
SH
1797asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1798 unsigned flags)
1da177e4
LT
1799{
1800 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1801}
1802
1803/*
1804 * Set a socket option. Because we don't know the option lengths we have
1805 * to pass the user mode parameter for the protocols to sort out.
1806 */
1807
20f37034
HC
1808SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1809 char __user *, optval, int, optlen)
1da177e4 1810{
6cb153ca 1811 int err, fput_needed;
1da177e4
LT
1812 struct socket *sock;
1813
1814 if (optlen < 0)
1815 return -EINVAL;
89bddce5
SH
1816
1817 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1818 if (sock != NULL) {
1819 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1820 if (err)
1821 goto out_put;
1da177e4
LT
1822
1823 if (level == SOL_SOCKET)
89bddce5
SH
1824 err =
1825 sock_setsockopt(sock, level, optname, optval,
1826 optlen);
1da177e4 1827 else
89bddce5
SH
1828 err =
1829 sock->ops->setsockopt(sock, level, optname, optval,
1830 optlen);
6cb153ca
BL
1831out_put:
1832 fput_light(sock->file, fput_needed);
1da177e4
LT
1833 }
1834 return err;
1835}
1836
1837/*
1838 * Get a socket option. Because we don't know the option lengths we have
1839 * to pass a user mode parameter for the protocols to sort out.
1840 */
1841
20f37034
HC
1842SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1843 char __user *, optval, int __user *, optlen)
1da177e4 1844{
6cb153ca 1845 int err, fput_needed;
1da177e4
LT
1846 struct socket *sock;
1847
89bddce5
SH
1848 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1849 if (sock != NULL) {
6cb153ca
BL
1850 err = security_socket_getsockopt(sock, level, optname);
1851 if (err)
1852 goto out_put;
1da177e4
LT
1853
1854 if (level == SOL_SOCKET)
89bddce5
SH
1855 err =
1856 sock_getsockopt(sock, level, optname, optval,
1857 optlen);
1da177e4 1858 else
89bddce5
SH
1859 err =
1860 sock->ops->getsockopt(sock, level, optname, optval,
1861 optlen);
6cb153ca
BL
1862out_put:
1863 fput_light(sock->file, fput_needed);
1da177e4
LT
1864 }
1865 return err;
1866}
1867
1da177e4
LT
1868/*
1869 * Shutdown a socket.
1870 */
1871
754fe8d2 1872SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1873{
6cb153ca 1874 int err, fput_needed;
1da177e4
LT
1875 struct socket *sock;
1876
89bddce5
SH
1877 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1878 if (sock != NULL) {
1da177e4 1879 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1880 if (!err)
1881 err = sock->ops->shutdown(sock, how);
1882 fput_light(sock->file, fput_needed);
1da177e4
LT
1883 }
1884 return err;
1885}
1886
89bddce5 1887/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1888 * fields which are the same type (int / unsigned) on our platforms.
1889 */
1890#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1891#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1892#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1893
c71d8ebe
TH
1894struct used_address {
1895 struct sockaddr_storage name;
1896 unsigned int name_len;
1897};
1898
228e548e 1899static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
c71d8ebe
TH
1900 struct msghdr *msg_sys, unsigned flags,
1901 struct used_address *used_address)
1da177e4 1902{
89bddce5
SH
1903 struct compat_msghdr __user *msg_compat =
1904 (struct compat_msghdr __user *)msg;
230b1839 1905 struct sockaddr_storage address;
1da177e4 1906 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1907 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1908 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1909 /* 20 is size of ipv6_pktinfo */
1da177e4 1910 unsigned char *ctl_buf = ctl;
1da177e4 1911 int err, ctl_len, iov_size, total_len;
89bddce5 1912
1da177e4
LT
1913 err = -EFAULT;
1914 if (MSG_CMSG_COMPAT & flags) {
228e548e 1915 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 1916 return -EFAULT;
228e548e 1917 } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1918 return -EFAULT;
1919
1da177e4
LT
1920 /* do not move before msg_sys is valid */
1921 err = -EMSGSIZE;
228e548e
AB
1922 if (msg_sys->msg_iovlen > UIO_MAXIOV)
1923 goto out;
1da177e4 1924
89bddce5 1925 /* Check whether to allocate the iovec area */
1da177e4 1926 err = -ENOMEM;
228e548e
AB
1927 iov_size = msg_sys->msg_iovlen * sizeof(struct iovec);
1928 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
1da177e4
LT
1929 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1930 if (!iov)
228e548e 1931 goto out;
1da177e4
LT
1932 }
1933
1934 /* This will also move the address data into kernel space */
1935 if (MSG_CMSG_COMPAT & flags) {
228e548e 1936 err = verify_compat_iovec(msg_sys, iov,
230b1839
YH
1937 (struct sockaddr *)&address,
1938 VERIFY_READ);
1da177e4 1939 } else
228e548e 1940 err = verify_iovec(msg_sys, iov,
230b1839
YH
1941 (struct sockaddr *)&address,
1942 VERIFY_READ);
89bddce5 1943 if (err < 0)
1da177e4
LT
1944 goto out_freeiov;
1945 total_len = err;
1946
1947 err = -ENOBUFS;
1948
228e548e 1949 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 1950 goto out_freeiov;
228e548e 1951 ctl_len = msg_sys->msg_controllen;
1da177e4 1952 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 1953 err =
228e548e 1954 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 1955 sizeof(ctl));
1da177e4
LT
1956 if (err)
1957 goto out_freeiov;
228e548e
AB
1958 ctl_buf = msg_sys->msg_control;
1959 ctl_len = msg_sys->msg_controllen;
1da177e4 1960 } else if (ctl_len) {
89bddce5 1961 if (ctl_len > sizeof(ctl)) {
1da177e4 1962 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1963 if (ctl_buf == NULL)
1da177e4
LT
1964 goto out_freeiov;
1965 }
1966 err = -EFAULT;
1967 /*
228e548e 1968 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
1969 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1970 * checking falls down on this.
1971 */
fb8621bb 1972 if (copy_from_user(ctl_buf,
228e548e 1973 (void __user __force *)msg_sys->msg_control,
89bddce5 1974 ctl_len))
1da177e4 1975 goto out_freectl;
228e548e 1976 msg_sys->msg_control = ctl_buf;
1da177e4 1977 }
228e548e 1978 msg_sys->msg_flags = flags;
1da177e4
LT
1979
1980 if (sock->file->f_flags & O_NONBLOCK)
228e548e 1981 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
1982 /*
1983 * If this is sendmmsg() and current destination address is same as
1984 * previously succeeded address, omit asking LSM's decision.
1985 * used_address->name_len is initialized to UINT_MAX so that the first
1986 * destination address never matches.
1987 */
bc909d9d
MD
1988 if (used_address && msg_sys->msg_name &&
1989 used_address->name_len == msg_sys->msg_namelen &&
1990 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe
TH
1991 used_address->name_len)) {
1992 err = sock_sendmsg_nosec(sock, msg_sys, total_len);
1993 goto out_freectl;
1994 }
1995 err = sock_sendmsg(sock, msg_sys, total_len);
1996 /*
1997 * If this is sendmmsg() and sending to current destination address was
1998 * successful, remember it.
1999 */
2000 if (used_address && err >= 0) {
2001 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2002 if (msg_sys->msg_name)
2003 memcpy(&used_address->name, msg_sys->msg_name,
2004 used_address->name_len);
c71d8ebe 2005 }
1da177e4
LT
2006
2007out_freectl:
89bddce5 2008 if (ctl_buf != ctl)
1da177e4
LT
2009 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2010out_freeiov:
2011 if (iov != iovstack)
2012 sock_kfree_s(sock->sk, iov, iov_size);
228e548e
AB
2013out:
2014 return err;
2015}
2016
2017/*
2018 * BSD sendmsg interface
2019 */
2020
2021SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
2022{
2023 int fput_needed, err;
2024 struct msghdr msg_sys;
2025 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
2026
2027 if (!sock)
2028 goto out;
2029
c71d8ebe 2030 err = __sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
228e548e 2031
6cb153ca 2032 fput_light(sock->file, fput_needed);
89bddce5 2033out:
1da177e4
LT
2034 return err;
2035}
2036
228e548e
AB
2037/*
2038 * Linux sendmmsg interface
2039 */
2040
2041int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2042 unsigned int flags)
2043{
2044 int fput_needed, err, datagrams;
2045 struct socket *sock;
2046 struct mmsghdr __user *entry;
2047 struct compat_mmsghdr __user *compat_entry;
2048 struct msghdr msg_sys;
c71d8ebe 2049 struct used_address used_address;
228e548e 2050
98382f41
AB
2051 if (vlen > UIO_MAXIOV)
2052 vlen = UIO_MAXIOV;
228e548e
AB
2053
2054 datagrams = 0;
2055
2056 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2057 if (!sock)
2058 return err;
2059
c71d8ebe 2060 used_address.name_len = UINT_MAX;
228e548e
AB
2061 entry = mmsg;
2062 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2063 err = 0;
228e548e
AB
2064
2065 while (datagrams < vlen) {
228e548e
AB
2066 if (MSG_CMSG_COMPAT & flags) {
2067 err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
c71d8ebe 2068 &msg_sys, flags, &used_address);
228e548e
AB
2069 if (err < 0)
2070 break;
2071 err = __put_user(err, &compat_entry->msg_len);
2072 ++compat_entry;
2073 } else {
2074 err = __sys_sendmsg(sock, (struct msghdr __user *)entry,
c71d8ebe 2075 &msg_sys, flags, &used_address);
228e548e
AB
2076 if (err < 0)
2077 break;
2078 err = put_user(err, &entry->msg_len);
2079 ++entry;
2080 }
2081
2082 if (err)
2083 break;
2084 ++datagrams;
2085 }
2086
228e548e
AB
2087 fput_light(sock->file, fput_needed);
2088
728ffb86
AB
2089 /* We only return an error if no datagrams were able to be sent */
2090 if (datagrams != 0)
228e548e
AB
2091 return datagrams;
2092
228e548e
AB
2093 return err;
2094}
2095
2096SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2097 unsigned int, vlen, unsigned int, flags)
2098{
2099 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2100}
2101
a2e27255
ACM
2102static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
2103 struct msghdr *msg_sys, unsigned flags, int nosec)
1da177e4 2104{
89bddce5
SH
2105 struct compat_msghdr __user *msg_compat =
2106 (struct compat_msghdr __user *)msg;
1da177e4 2107 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2108 struct iovec *iov = iovstack;
1da177e4
LT
2109 unsigned long cmsg_ptr;
2110 int err, iov_size, total_len, len;
2111
2112 /* kernel mode address */
230b1839 2113 struct sockaddr_storage addr;
1da177e4
LT
2114
2115 /* user mode address pointers */
2116 struct sockaddr __user *uaddr;
2117 int __user *uaddr_len;
89bddce5 2118
1da177e4 2119 if (MSG_CMSG_COMPAT & flags) {
a2e27255 2120 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 2121 return -EFAULT;
c6d409cf 2122 } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
89bddce5 2123 return -EFAULT;
1da177e4 2124
1da177e4 2125 err = -EMSGSIZE;
a2e27255
ACM
2126 if (msg_sys->msg_iovlen > UIO_MAXIOV)
2127 goto out;
89bddce5
SH
2128
2129 /* Check whether to allocate the iovec area */
1da177e4 2130 err = -ENOMEM;
a2e27255
ACM
2131 iov_size = msg_sys->msg_iovlen * sizeof(struct iovec);
2132 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
1da177e4
LT
2133 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
2134 if (!iov)
a2e27255 2135 goto out;
1da177e4
LT
2136 }
2137
2138 /*
89bddce5
SH
2139 * Save the user-mode address (verify_iovec will change the
2140 * kernel msghdr to use the kernel address space)
1da177e4 2141 */
89bddce5 2142
a2e27255 2143 uaddr = (__force void __user *)msg_sys->msg_name;
1da177e4
LT
2144 uaddr_len = COMPAT_NAMELEN(msg);
2145 if (MSG_CMSG_COMPAT & flags) {
a2e27255 2146 err = verify_compat_iovec(msg_sys, iov,
230b1839
YH
2147 (struct sockaddr *)&addr,
2148 VERIFY_WRITE);
1da177e4 2149 } else
a2e27255 2150 err = verify_iovec(msg_sys, iov,
230b1839
YH
2151 (struct sockaddr *)&addr,
2152 VERIFY_WRITE);
1da177e4
LT
2153 if (err < 0)
2154 goto out_freeiov;
89bddce5 2155 total_len = err;
1da177e4 2156
a2e27255
ACM
2157 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2158 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2159
1da177e4
LT
2160 if (sock->file->f_flags & O_NONBLOCK)
2161 flags |= MSG_DONTWAIT;
a2e27255
ACM
2162 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2163 total_len, flags);
1da177e4
LT
2164 if (err < 0)
2165 goto out_freeiov;
2166 len = err;
2167
2168 if (uaddr != NULL) {
230b1839 2169 err = move_addr_to_user((struct sockaddr *)&addr,
a2e27255 2170 msg_sys->msg_namelen, uaddr,
89bddce5 2171 uaddr_len);
1da177e4
LT
2172 if (err < 0)
2173 goto out_freeiov;
2174 }
a2e27255 2175 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2176 COMPAT_FLAGS(msg));
1da177e4
LT
2177 if (err)
2178 goto out_freeiov;
2179 if (MSG_CMSG_COMPAT & flags)
a2e27255 2180 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2181 &msg_compat->msg_controllen);
2182 else
a2e27255 2183 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2184 &msg->msg_controllen);
2185 if (err)
2186 goto out_freeiov;
2187 err = len;
2188
2189out_freeiov:
2190 if (iov != iovstack)
2191 sock_kfree_s(sock->sk, iov, iov_size);
a2e27255
ACM
2192out:
2193 return err;
2194}
2195
2196/*
2197 * BSD recvmsg interface
2198 */
2199
2200SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
2201 unsigned int, flags)
2202{
2203 int fput_needed, err;
2204 struct msghdr msg_sys;
2205 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
2206
2207 if (!sock)
2208 goto out;
2209
2210 err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0);
2211
6cb153ca 2212 fput_light(sock->file, fput_needed);
1da177e4
LT
2213out:
2214 return err;
2215}
2216
a2e27255
ACM
2217/*
2218 * Linux recvmmsg interface
2219 */
2220
2221int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2222 unsigned int flags, struct timespec *timeout)
2223{
2224 int fput_needed, err, datagrams;
2225 struct socket *sock;
2226 struct mmsghdr __user *entry;
d7256d0e 2227 struct compat_mmsghdr __user *compat_entry;
a2e27255
ACM
2228 struct msghdr msg_sys;
2229 struct timespec end_time;
2230
2231 if (timeout &&
2232 poll_select_set_timeout(&end_time, timeout->tv_sec,
2233 timeout->tv_nsec))
2234 return -EINVAL;
2235
2236 datagrams = 0;
2237
2238 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2239 if (!sock)
2240 return err;
2241
2242 err = sock_error(sock->sk);
2243 if (err)
2244 goto out_put;
2245
2246 entry = mmsg;
d7256d0e 2247 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2248
2249 while (datagrams < vlen) {
2250 /*
2251 * No need to ask LSM for more than the first datagram.
2252 */
d7256d0e
JMG
2253 if (MSG_CMSG_COMPAT & flags) {
2254 err = __sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
b9eb8b87
AB
2255 &msg_sys, flags & ~MSG_WAITFORONE,
2256 datagrams);
d7256d0e
JMG
2257 if (err < 0)
2258 break;
2259 err = __put_user(err, &compat_entry->msg_len);
2260 ++compat_entry;
2261 } else {
2262 err = __sys_recvmsg(sock, (struct msghdr __user *)entry,
b9eb8b87
AB
2263 &msg_sys, flags & ~MSG_WAITFORONE,
2264 datagrams);
d7256d0e
JMG
2265 if (err < 0)
2266 break;
2267 err = put_user(err, &entry->msg_len);
2268 ++entry;
2269 }
2270
a2e27255
ACM
2271 if (err)
2272 break;
a2e27255
ACM
2273 ++datagrams;
2274
71c5c159
BB
2275 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2276 if (flags & MSG_WAITFORONE)
2277 flags |= MSG_DONTWAIT;
2278
a2e27255
ACM
2279 if (timeout) {
2280 ktime_get_ts(timeout);
2281 *timeout = timespec_sub(end_time, *timeout);
2282 if (timeout->tv_sec < 0) {
2283 timeout->tv_sec = timeout->tv_nsec = 0;
2284 break;
2285 }
2286
2287 /* Timeout, return less than vlen datagrams */
2288 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2289 break;
2290 }
2291
2292 /* Out of band data, return right away */
2293 if (msg_sys.msg_flags & MSG_OOB)
2294 break;
2295 }
2296
2297out_put:
2298 fput_light(sock->file, fput_needed);
1da177e4 2299
a2e27255
ACM
2300 if (err == 0)
2301 return datagrams;
2302
2303 if (datagrams != 0) {
2304 /*
2305 * We may return less entries than requested (vlen) if the
2306 * sock is non block and there aren't enough datagrams...
2307 */
2308 if (err != -EAGAIN) {
2309 /*
2310 * ... or if recvmsg returns an error after we
2311 * received some datagrams, where we record the
2312 * error to return on the next call or if the
2313 * app asks about it using getsockopt(SO_ERROR).
2314 */
2315 sock->sk->sk_err = -err;
2316 }
2317
2318 return datagrams;
2319 }
2320
2321 return err;
2322}
2323
2324SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2325 unsigned int, vlen, unsigned int, flags,
2326 struct timespec __user *, timeout)
2327{
2328 int datagrams;
2329 struct timespec timeout_sys;
2330
2331 if (!timeout)
2332 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2333
2334 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2335 return -EFAULT;
2336
2337 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2338
2339 if (datagrams > 0 &&
2340 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2341 datagrams = -EFAULT;
2342
2343 return datagrams;
2344}
2345
2346#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2347/* Argument list sizes for sys_socketcall */
2348#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2349static const unsigned char nargs[21] = {
c6d409cf
ED
2350 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2351 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2352 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2353 AL(4), AL(5), AL(4)
89bddce5
SH
2354};
2355
1da177e4
LT
2356#undef AL
2357
2358/*
89bddce5 2359 * System call vectors.
1da177e4
LT
2360 *
2361 * Argument checking cleaned up. Saved 20% in size.
2362 * This function doesn't need to set the kernel lock because
89bddce5 2363 * it is set by the callees.
1da177e4
LT
2364 */
2365
3e0fa65f 2366SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4
LT
2367{
2368 unsigned long a[6];
89bddce5 2369 unsigned long a0, a1;
1da177e4 2370 int err;
47379052 2371 unsigned int len;
1da177e4 2372
228e548e 2373 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2374 return -EINVAL;
2375
47379052
AV
2376 len = nargs[call];
2377 if (len > sizeof(a))
2378 return -EINVAL;
2379
1da177e4 2380 /* copy_from_user should be SMP safe. */
47379052 2381 if (copy_from_user(a, args, len))
1da177e4 2382 return -EFAULT;
3ec3b2fb 2383
f3298dc4 2384 audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb 2385
89bddce5
SH
2386 a0 = a[0];
2387 a1 = a[1];
2388
2389 switch (call) {
2390 case SYS_SOCKET:
2391 err = sys_socket(a0, a1, a[2]);
2392 break;
2393 case SYS_BIND:
2394 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2395 break;
2396 case SYS_CONNECT:
2397 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2398 break;
2399 case SYS_LISTEN:
2400 err = sys_listen(a0, a1);
2401 break;
2402 case SYS_ACCEPT:
de11defe
UD
2403 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2404 (int __user *)a[2], 0);
89bddce5
SH
2405 break;
2406 case SYS_GETSOCKNAME:
2407 err =
2408 sys_getsockname(a0, (struct sockaddr __user *)a1,
2409 (int __user *)a[2]);
2410 break;
2411 case SYS_GETPEERNAME:
2412 err =
2413 sys_getpeername(a0, (struct sockaddr __user *)a1,
2414 (int __user *)a[2]);
2415 break;
2416 case SYS_SOCKETPAIR:
2417 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2418 break;
2419 case SYS_SEND:
2420 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2421 break;
2422 case SYS_SENDTO:
2423 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2424 (struct sockaddr __user *)a[4], a[5]);
2425 break;
2426 case SYS_RECV:
2427 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2428 break;
2429 case SYS_RECVFROM:
2430 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2431 (struct sockaddr __user *)a[4],
2432 (int __user *)a[5]);
2433 break;
2434 case SYS_SHUTDOWN:
2435 err = sys_shutdown(a0, a1);
2436 break;
2437 case SYS_SETSOCKOPT:
2438 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2439 break;
2440 case SYS_GETSOCKOPT:
2441 err =
2442 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2443 (int __user *)a[4]);
2444 break;
2445 case SYS_SENDMSG:
2446 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2447 break;
228e548e
AB
2448 case SYS_SENDMMSG:
2449 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2450 break;
89bddce5
SH
2451 case SYS_RECVMSG:
2452 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2453 break;
a2e27255
ACM
2454 case SYS_RECVMMSG:
2455 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2456 (struct timespec __user *)a[4]);
2457 break;
de11defe
UD
2458 case SYS_ACCEPT4:
2459 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2460 (int __user *)a[2], a[3]);
aaca0bdc 2461 break;
89bddce5
SH
2462 default:
2463 err = -EINVAL;
2464 break;
1da177e4
LT
2465 }
2466 return err;
2467}
2468
89bddce5 2469#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2470
55737fda
SH
2471/**
2472 * sock_register - add a socket protocol handler
2473 * @ops: description of protocol
2474 *
1da177e4
LT
2475 * This function is called by a protocol handler that wants to
2476 * advertise its address family, and have it linked into the
55737fda
SH
2477 * socket interface. The value ops->family coresponds to the
2478 * socket system call protocol family.
1da177e4 2479 */
f0fd27d4 2480int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2481{
2482 int err;
2483
2484 if (ops->family >= NPROTO) {
89bddce5
SH
2485 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2486 NPROTO);
1da177e4
LT
2487 return -ENOBUFS;
2488 }
55737fda
SH
2489
2490 spin_lock(&net_family_lock);
190683a9
ED
2491 if (rcu_dereference_protected(net_families[ops->family],
2492 lockdep_is_held(&net_family_lock)))
55737fda
SH
2493 err = -EEXIST;
2494 else {
cf778b00 2495 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2496 err = 0;
2497 }
55737fda
SH
2498 spin_unlock(&net_family_lock);
2499
89bddce5 2500 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2501 return err;
2502}
c6d409cf 2503EXPORT_SYMBOL(sock_register);
1da177e4 2504
55737fda
SH
2505/**
2506 * sock_unregister - remove a protocol handler
2507 * @family: protocol family to remove
2508 *
1da177e4
LT
2509 * This function is called by a protocol handler that wants to
2510 * remove its address family, and have it unlinked from the
55737fda
SH
2511 * new socket creation.
2512 *
2513 * If protocol handler is a module, then it can use module reference
2514 * counts to protect against new references. If protocol handler is not
2515 * a module then it needs to provide its own protection in
2516 * the ops->create routine.
1da177e4 2517 */
f0fd27d4 2518void sock_unregister(int family)
1da177e4 2519{
f0fd27d4 2520 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2521
55737fda 2522 spin_lock(&net_family_lock);
a9b3cd7f 2523 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2524 spin_unlock(&net_family_lock);
2525
2526 synchronize_rcu();
2527
89bddce5 2528 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4 2529}
c6d409cf 2530EXPORT_SYMBOL(sock_unregister);
1da177e4 2531
77d76ea3 2532static int __init sock_init(void)
1da177e4 2533{
b3e19d92
NP
2534 int err;
2535
1da177e4 2536 /*
89bddce5 2537 * Initialize sock SLAB cache.
1da177e4 2538 */
89bddce5 2539
1da177e4
LT
2540 sk_init();
2541
1da177e4 2542 /*
89bddce5 2543 * Initialize skbuff SLAB cache
1da177e4
LT
2544 */
2545 skb_init();
1da177e4
LT
2546
2547 /*
89bddce5 2548 * Initialize the protocols module.
1da177e4
LT
2549 */
2550
2551 init_inodecache();
b3e19d92
NP
2552
2553 err = register_filesystem(&sock_fs_type);
2554 if (err)
2555 goto out_fs;
1da177e4 2556 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2557 if (IS_ERR(sock_mnt)) {
2558 err = PTR_ERR(sock_mnt);
2559 goto out_mount;
2560 }
77d76ea3
AK
2561
2562 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2563 */
2564
2565#ifdef CONFIG_NETFILTER
2566 netfilter_init();
2567#endif
cbeb321a 2568
c1f19b51
RC
2569#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
2570 skb_timestamping_init();
2571#endif
2572
b3e19d92
NP
2573out:
2574 return err;
2575
2576out_mount:
2577 unregister_filesystem(&sock_fs_type);
2578out_fs:
2579 goto out;
1da177e4
LT
2580}
2581
77d76ea3
AK
2582core_initcall(sock_init); /* early initcall */
2583
1da177e4
LT
2584#ifdef CONFIG_PROC_FS
2585void socket_seq_show(struct seq_file *seq)
2586{
2587 int cpu;
2588 int counter = 0;
2589
6f912042 2590 for_each_possible_cpu(cpu)
89bddce5 2591 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2592
2593 /* It can be negative, by the way. 8) */
2594 if (counter < 0)
2595 counter = 0;
2596
2597 seq_printf(seq, "sockets: used %d\n", counter);
2598}
89bddce5 2599#endif /* CONFIG_PROC_FS */
1da177e4 2600
89bbfc95 2601#ifdef CONFIG_COMPAT
6b96018b
AB
2602static int do_siocgstamp(struct net *net, struct socket *sock,
2603 unsigned int cmd, struct compat_timeval __user *up)
7a229387 2604{
7a229387
AB
2605 mm_segment_t old_fs = get_fs();
2606 struct timeval ktv;
2607 int err;
2608
2609 set_fs(KERNEL_DS);
6b96018b 2610 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387
AB
2611 set_fs(old_fs);
2612 if (!err) {
2613 err = put_user(ktv.tv_sec, &up->tv_sec);
2614 err |= __put_user(ktv.tv_usec, &up->tv_usec);
2615 }
2616 return err;
2617}
2618
6b96018b
AB
2619static int do_siocgstampns(struct net *net, struct socket *sock,
2620 unsigned int cmd, struct compat_timespec __user *up)
7a229387 2621{
7a229387
AB
2622 mm_segment_t old_fs = get_fs();
2623 struct timespec kts;
2624 int err;
2625
2626 set_fs(KERNEL_DS);
6b96018b 2627 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387
AB
2628 set_fs(old_fs);
2629 if (!err) {
2630 err = put_user(kts.tv_sec, &up->tv_sec);
2631 err |= __put_user(kts.tv_nsec, &up->tv_nsec);
2632 }
2633 return err;
2634}
2635
6b96018b 2636static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2637{
2638 struct ifreq __user *uifr;
2639 int err;
2640
2641 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2642 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2643 return -EFAULT;
2644
6b96018b 2645 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2646 if (err)
2647 return err;
2648
6b96018b 2649 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2650 return -EFAULT;
2651
2652 return 0;
2653}
2654
6b96018b 2655static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2656{
6b96018b 2657 struct compat_ifconf ifc32;
7a229387
AB
2658 struct ifconf ifc;
2659 struct ifconf __user *uifc;
6b96018b 2660 struct compat_ifreq __user *ifr32;
7a229387
AB
2661 struct ifreq __user *ifr;
2662 unsigned int i, j;
2663 int err;
2664
6b96018b 2665 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2666 return -EFAULT;
2667
2668 if (ifc32.ifcbuf == 0) {
2669 ifc32.ifc_len = 0;
2670 ifc.ifc_len = 0;
2671 ifc.ifc_req = NULL;
2672 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2673 } else {
c6d409cf
ED
2674 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2675 sizeof(struct ifreq);
7a229387
AB
2676 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2677 ifc.ifc_len = len;
2678 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2679 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2680 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2681 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2682 return -EFAULT;
2683 ifr++;
2684 ifr32++;
2685 }
2686 }
2687 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2688 return -EFAULT;
2689
6b96018b 2690 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2691 if (err)
2692 return err;
2693
2694 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2695 return -EFAULT;
2696
2697 ifr = ifc.ifc_req;
2698 ifr32 = compat_ptr(ifc32.ifcbuf);
2699 for (i = 0, j = 0;
c6d409cf
ED
2700 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2701 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2702 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2703 return -EFAULT;
2704 ifr32++;
2705 ifr++;
2706 }
2707
2708 if (ifc32.ifcbuf == 0) {
2709 /* Translate from 64-bit structure multiple to
2710 * a 32-bit one.
2711 */
2712 i = ifc.ifc_len;
6b96018b 2713 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2714 ifc32.ifc_len = i;
2715 } else {
2716 ifc32.ifc_len = i;
2717 }
6b96018b 2718 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2719 return -EFAULT;
2720
2721 return 0;
2722}
2723
6b96018b 2724static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2725{
3a7da39d
BH
2726 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2727 bool convert_in = false, convert_out = false;
2728 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2729 struct ethtool_rxnfc __user *rxnfc;
7a229387 2730 struct ifreq __user *ifr;
3a7da39d
BH
2731 u32 rule_cnt = 0, actual_rule_cnt;
2732 u32 ethcmd;
7a229387 2733 u32 data;
3a7da39d 2734 int ret;
7a229387 2735
3a7da39d
BH
2736 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2737 return -EFAULT;
7a229387 2738
3a7da39d
BH
2739 compat_rxnfc = compat_ptr(data);
2740
2741 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2742 return -EFAULT;
2743
3a7da39d
BH
2744 /* Most ethtool structures are defined without padding.
2745 * Unfortunately struct ethtool_rxnfc is an exception.
2746 */
2747 switch (ethcmd) {
2748 default:
2749 break;
2750 case ETHTOOL_GRXCLSRLALL:
2751 /* Buffer size is variable */
2752 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2753 return -EFAULT;
2754 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2755 return -ENOMEM;
2756 buf_size += rule_cnt * sizeof(u32);
2757 /* fall through */
2758 case ETHTOOL_GRXRINGS:
2759 case ETHTOOL_GRXCLSRLCNT:
2760 case ETHTOOL_GRXCLSRULE:
55664f32 2761 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2762 convert_out = true;
2763 /* fall through */
2764 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2765 buf_size += sizeof(struct ethtool_rxnfc);
2766 convert_in = true;
2767 break;
2768 }
2769
2770 ifr = compat_alloc_user_space(buf_size);
2771 rxnfc = (void *)ifr + ALIGN(sizeof(struct ifreq), 8);
2772
2773 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2774 return -EFAULT;
2775
3a7da39d
BH
2776 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2777 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2778 return -EFAULT;
2779
3a7da39d 2780 if (convert_in) {
127fe533 2781 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2782 * fs.ring_cookie and at the end of fs, but nowhere else.
2783 */
127fe533
AD
2784 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2785 sizeof(compat_rxnfc->fs.m_ext) !=
2786 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2787 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2788 BUILD_BUG_ON(
2789 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2790 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2791 offsetof(struct ethtool_rxnfc, fs.location) -
2792 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2793
2794 if (copy_in_user(rxnfc, compat_rxnfc,
127fe533 2795 (void *)(&rxnfc->fs.m_ext + 1) -
3a7da39d
BH
2796 (void *)rxnfc) ||
2797 copy_in_user(&rxnfc->fs.ring_cookie,
2798 &compat_rxnfc->fs.ring_cookie,
2799 (void *)(&rxnfc->fs.location + 1) -
2800 (void *)&rxnfc->fs.ring_cookie) ||
2801 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2802 sizeof(rxnfc->rule_cnt)))
2803 return -EFAULT;
2804 }
2805
2806 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2807 if (ret)
2808 return ret;
2809
2810 if (convert_out) {
2811 if (copy_in_user(compat_rxnfc, rxnfc,
127fe533 2812 (const void *)(&rxnfc->fs.m_ext + 1) -
3a7da39d
BH
2813 (const void *)rxnfc) ||
2814 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2815 &rxnfc->fs.ring_cookie,
2816 (const void *)(&rxnfc->fs.location + 1) -
2817 (const void *)&rxnfc->fs.ring_cookie) ||
2818 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2819 sizeof(rxnfc->rule_cnt)))
2820 return -EFAULT;
2821
2822 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2823 /* As an optimisation, we only copy the actual
2824 * number of rules that the underlying
2825 * function returned. Since Mallory might
2826 * change the rule count in user memory, we
2827 * check that it is less than the rule count
2828 * originally given (as the user buffer size),
2829 * which has been range-checked.
2830 */
2831 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2832 return -EFAULT;
2833 if (actual_rule_cnt < rule_cnt)
2834 rule_cnt = actual_rule_cnt;
2835 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2836 &rxnfc->rule_locs[0],
2837 rule_cnt * sizeof(u32)))
2838 return -EFAULT;
2839 }
2840 }
2841
2842 return 0;
7a229387
AB
2843}
2844
7a50a240
AB
2845static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2846{
2847 void __user *uptr;
2848 compat_uptr_t uptr32;
2849 struct ifreq __user *uifr;
2850
c6d409cf 2851 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2852 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2853 return -EFAULT;
2854
2855 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2856 return -EFAULT;
2857
2858 uptr = compat_ptr(uptr32);
2859
2860 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2861 return -EFAULT;
2862
2863 return dev_ioctl(net, SIOCWANDEV, uifr);
2864}
2865
6b96018b
AB
2866static int bond_ioctl(struct net *net, unsigned int cmd,
2867 struct compat_ifreq __user *ifr32)
7a229387
AB
2868{
2869 struct ifreq kifr;
2870 struct ifreq __user *uifr;
7a229387
AB
2871 mm_segment_t old_fs;
2872 int err;
2873 u32 data;
2874 void __user *datap;
2875
2876 switch (cmd) {
2877 case SIOCBONDENSLAVE:
2878 case SIOCBONDRELEASE:
2879 case SIOCBONDSETHWADDR:
2880 case SIOCBONDCHANGEACTIVE:
6b96018b 2881 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2882 return -EFAULT;
2883
2884 old_fs = get_fs();
c6d409cf 2885 set_fs(KERNEL_DS);
c3f52ae6 2886 err = dev_ioctl(net, cmd,
2887 (struct ifreq __user __force *) &kifr);
c6d409cf 2888 set_fs(old_fs);
7a229387
AB
2889
2890 return err;
2891 case SIOCBONDSLAVEINFOQUERY:
2892 case SIOCBONDINFOQUERY:
2893 uifr = compat_alloc_user_space(sizeof(*uifr));
2894 if (copy_in_user(&uifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
2895 return -EFAULT;
2896
2897 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2898 return -EFAULT;
2899
2900 datap = compat_ptr(data);
2901 if (put_user(datap, &uifr->ifr_ifru.ifru_data))
2902 return -EFAULT;
2903
6b96018b 2904 return dev_ioctl(net, cmd, uifr);
7a229387 2905 default:
07d106d0 2906 return -ENOIOCTLCMD;
ccbd6a5a 2907 }
7a229387
AB
2908}
2909
6b96018b
AB
2910static int siocdevprivate_ioctl(struct net *net, unsigned int cmd,
2911 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2912{
2913 struct ifreq __user *u_ifreq64;
7a229387
AB
2914 char tmp_buf[IFNAMSIZ];
2915 void __user *data64;
2916 u32 data32;
2917
2918 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2919 IFNAMSIZ))
2920 return -EFAULT;
2921 if (__get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
2922 return -EFAULT;
2923 data64 = compat_ptr(data32);
2924
2925 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2926
2927 /* Don't check these user accesses, just let that get trapped
2928 * in the ioctl handler instead.
2929 */
2930 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2931 IFNAMSIZ))
2932 return -EFAULT;
2933 if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
2934 return -EFAULT;
2935
6b96018b 2936 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2937}
2938
6b96018b
AB
2939static int dev_ifsioc(struct net *net, struct socket *sock,
2940 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 2941{
a2116ed2 2942 struct ifreq __user *uifr;
7a229387
AB
2943 int err;
2944
a2116ed2
AB
2945 uifr = compat_alloc_user_space(sizeof(*uifr));
2946 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2947 return -EFAULT;
2948
2949 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2950
7a229387
AB
2951 if (!err) {
2952 switch (cmd) {
2953 case SIOCGIFFLAGS:
2954 case SIOCGIFMETRIC:
2955 case SIOCGIFMTU:
2956 case SIOCGIFMEM:
2957 case SIOCGIFHWADDR:
2958 case SIOCGIFINDEX:
2959 case SIOCGIFADDR:
2960 case SIOCGIFBRDADDR:
2961 case SIOCGIFDSTADDR:
2962 case SIOCGIFNETMASK:
fab2532b 2963 case SIOCGIFPFLAGS:
7a229387 2964 case SIOCGIFTXQLEN:
fab2532b
AB
2965 case SIOCGMIIPHY:
2966 case SIOCGMIIREG:
a2116ed2 2967 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
2968 err = -EFAULT;
2969 break;
2970 }
2971 }
2972 return err;
2973}
2974
a2116ed2
AB
2975static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2976 struct compat_ifreq __user *uifr32)
2977{
2978 struct ifreq ifr;
2979 struct compat_ifmap __user *uifmap32;
2980 mm_segment_t old_fs;
2981 int err;
2982
2983 uifmap32 = &uifr32->ifr_ifru.ifru_map;
2984 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
2985 err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2986 err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2987 err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2988 err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq);
2989 err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma);
2990 err |= __get_user(ifr.ifr_map.port, &uifmap32->port);
2991 if (err)
2992 return -EFAULT;
2993
2994 old_fs = get_fs();
c6d409cf 2995 set_fs(KERNEL_DS);
c3f52ae6 2996 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 2997 set_fs(old_fs);
a2116ed2
AB
2998
2999 if (cmd == SIOCGIFMAP && !err) {
3000 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3001 err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3002 err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3003 err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3004 err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq);
3005 err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma);
3006 err |= __put_user(ifr.ifr_map.port, &uifmap32->port);
3007 if (err)
3008 err = -EFAULT;
3009 }
3010 return err;
3011}
3012
3013static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uifr32)
3014{
3015 void __user *uptr;
3016 compat_uptr_t uptr32;
3017 struct ifreq __user *uifr;
3018
c6d409cf 3019 uifr = compat_alloc_user_space(sizeof(*uifr));
a2116ed2
AB
3020 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
3021 return -EFAULT;
3022
3023 if (get_user(uptr32, &uifr32->ifr_data))
3024 return -EFAULT;
3025
3026 uptr = compat_ptr(uptr32);
3027
3028 if (put_user(uptr, &uifr->ifr_data))
3029 return -EFAULT;
3030
3031 return dev_ioctl(net, SIOCSHWTSTAMP, uifr);
3032}
3033
7a229387 3034struct rtentry32 {
c6d409cf 3035 u32 rt_pad1;
7a229387
AB
3036 struct sockaddr rt_dst; /* target address */
3037 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3038 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3039 unsigned short rt_flags;
3040 short rt_pad2;
3041 u32 rt_pad3;
3042 unsigned char rt_tos;
3043 unsigned char rt_class;
3044 short rt_pad4;
3045 short rt_metric; /* +1 for binary compatibility! */
7a229387 3046 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3047 u32 rt_mtu; /* per route MTU/Window */
3048 u32 rt_window; /* Window clamping */
7a229387
AB
3049 unsigned short rt_irtt; /* Initial RTT */
3050};
3051
3052struct in6_rtmsg32 {
3053 struct in6_addr rtmsg_dst;
3054 struct in6_addr rtmsg_src;
3055 struct in6_addr rtmsg_gateway;
3056 u32 rtmsg_type;
3057 u16 rtmsg_dst_len;
3058 u16 rtmsg_src_len;
3059 u32 rtmsg_metric;
3060 u32 rtmsg_info;
3061 u32 rtmsg_flags;
3062 s32 rtmsg_ifindex;
3063};
3064
6b96018b
AB
3065static int routing_ioctl(struct net *net, struct socket *sock,
3066 unsigned int cmd, void __user *argp)
7a229387
AB
3067{
3068 int ret;
3069 void *r = NULL;
3070 struct in6_rtmsg r6;
3071 struct rtentry r4;
3072 char devname[16];
3073 u32 rtdev;
3074 mm_segment_t old_fs = get_fs();
3075
6b96018b
AB
3076 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3077 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3078 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3079 3 * sizeof(struct in6_addr));
c6d409cf
ED
3080 ret |= __get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3081 ret |= __get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3082 ret |= __get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3083 ret |= __get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3084 ret |= __get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3085 ret |= __get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3086 ret |= __get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3087
3088 r = (void *) &r6;
3089 } else { /* ipv4 */
6b96018b 3090 struct rtentry32 __user *ur4 = argp;
c6d409cf 3091 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3092 3 * sizeof(struct sockaddr));
c6d409cf
ED
3093 ret |= __get_user(r4.rt_flags, &(ur4->rt_flags));
3094 ret |= __get_user(r4.rt_metric, &(ur4->rt_metric));
3095 ret |= __get_user(r4.rt_mtu, &(ur4->rt_mtu));
3096 ret |= __get_user(r4.rt_window, &(ur4->rt_window));
3097 ret |= __get_user(r4.rt_irtt, &(ur4->rt_irtt));
3098 ret |= __get_user(rtdev, &(ur4->rt_dev));
7a229387 3099 if (rtdev) {
c6d409cf 3100 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3101 r4.rt_dev = (char __user __force *)devname;
3102 devname[15] = 0;
7a229387
AB
3103 } else
3104 r4.rt_dev = NULL;
3105
3106 r = (void *) &r4;
3107 }
3108
3109 if (ret) {
3110 ret = -EFAULT;
3111 goto out;
3112 }
3113
c6d409cf 3114 set_fs(KERNEL_DS);
6b96018b 3115 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3116 set_fs(old_fs);
7a229387
AB
3117
3118out:
7a229387
AB
3119 return ret;
3120}
3121
3122/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3123 * for some operations; this forces use of the newer bridge-utils that
25985edc 3124 * use compatible ioctls
7a229387 3125 */
6b96018b 3126static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3127{
6b96018b 3128 compat_ulong_t tmp;
7a229387 3129
6b96018b 3130 if (get_user(tmp, argp))
7a229387
AB
3131 return -EFAULT;
3132 if (tmp == BRCTL_GET_VERSION)
3133 return BRCTL_VERSION + 1;
3134 return -EINVAL;
3135}
3136
6b96018b
AB
3137static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3138 unsigned int cmd, unsigned long arg)
3139{
3140 void __user *argp = compat_ptr(arg);
3141 struct sock *sk = sock->sk;
3142 struct net *net = sock_net(sk);
7a229387 3143
6b96018b
AB
3144 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
3145 return siocdevprivate_ioctl(net, cmd, argp);
3146
3147 switch (cmd) {
3148 case SIOCSIFBR:
3149 case SIOCGIFBR:
3150 return old_bridge_ioctl(argp);
3151 case SIOCGIFNAME:
3152 return dev_ifname32(net, argp);
3153 case SIOCGIFCONF:
3154 return dev_ifconf(net, argp);
3155 case SIOCETHTOOL:
3156 return ethtool_ioctl(net, argp);
7a50a240
AB
3157 case SIOCWANDEV:
3158 return compat_siocwandev(net, argp);
a2116ed2
AB
3159 case SIOCGIFMAP:
3160 case SIOCSIFMAP:
3161 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3162 case SIOCBONDENSLAVE:
3163 case SIOCBONDRELEASE:
3164 case SIOCBONDSETHWADDR:
3165 case SIOCBONDSLAVEINFOQUERY:
3166 case SIOCBONDINFOQUERY:
3167 case SIOCBONDCHANGEACTIVE:
3168 return bond_ioctl(net, cmd, argp);
3169 case SIOCADDRT:
3170 case SIOCDELRT:
3171 return routing_ioctl(net, sock, cmd, argp);
3172 case SIOCGSTAMP:
3173 return do_siocgstamp(net, sock, cmd, argp);
3174 case SIOCGSTAMPNS:
3175 return do_siocgstampns(net, sock, cmd, argp);
a2116ed2
AB
3176 case SIOCSHWTSTAMP:
3177 return compat_siocshwtstamp(net, argp);
6b96018b
AB
3178
3179 case FIOSETOWN:
3180 case SIOCSPGRP:
3181 case FIOGETOWN:
3182 case SIOCGPGRP:
3183 case SIOCBRADDBR:
3184 case SIOCBRDELBR:
3185 case SIOCGIFVLAN:
3186 case SIOCSIFVLAN:
3187 case SIOCADDDLCI:
3188 case SIOCDELDLCI:
3189 return sock_ioctl(file, cmd, arg);
3190
3191 case SIOCGIFFLAGS:
3192 case SIOCSIFFLAGS:
3193 case SIOCGIFMETRIC:
3194 case SIOCSIFMETRIC:
3195 case SIOCGIFMTU:
3196 case SIOCSIFMTU:
3197 case SIOCGIFMEM:
3198 case SIOCSIFMEM:
3199 case SIOCGIFHWADDR:
3200 case SIOCSIFHWADDR:
3201 case SIOCADDMULTI:
3202 case SIOCDELMULTI:
3203 case SIOCGIFINDEX:
6b96018b
AB
3204 case SIOCGIFADDR:
3205 case SIOCSIFADDR:
3206 case SIOCSIFHWBROADCAST:
6b96018b 3207 case SIOCDIFADDR:
6b96018b
AB
3208 case SIOCGIFBRDADDR:
3209 case SIOCSIFBRDADDR:
3210 case SIOCGIFDSTADDR:
3211 case SIOCSIFDSTADDR:
3212 case SIOCGIFNETMASK:
3213 case SIOCSIFNETMASK:
3214 case SIOCSIFPFLAGS:
3215 case SIOCGIFPFLAGS:
3216 case SIOCGIFTXQLEN:
3217 case SIOCSIFTXQLEN:
3218 case SIOCBRADDIF:
3219 case SIOCBRDELIF:
9177efd3
AB
3220 case SIOCSIFNAME:
3221 case SIOCGMIIPHY:
3222 case SIOCGMIIREG:
3223 case SIOCSMIIREG:
6b96018b 3224 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3225
6b96018b
AB
3226 case SIOCSARP:
3227 case SIOCGARP:
3228 case SIOCDARP:
6b96018b 3229 case SIOCATMARK:
9177efd3
AB
3230 return sock_do_ioctl(net, sock, cmd, arg);
3231 }
3232
6b96018b
AB
3233 return -ENOIOCTLCMD;
3234}
7a229387 3235
89bbfc95 3236static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 3237 unsigned long arg)
89bbfc95
SP
3238{
3239 struct socket *sock = file->private_data;
3240 int ret = -ENOIOCTLCMD;
87de87d5
DM
3241 struct sock *sk;
3242 struct net *net;
3243
3244 sk = sock->sk;
3245 net = sock_net(sk);
89bbfc95
SP
3246
3247 if (sock->ops->compat_ioctl)
3248 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3249
87de87d5
DM
3250 if (ret == -ENOIOCTLCMD &&
3251 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3252 ret = compat_wext_handle_ioctl(net, cmd, arg);
3253
6b96018b
AB
3254 if (ret == -ENOIOCTLCMD)
3255 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3256
89bbfc95
SP
3257 return ret;
3258}
3259#endif
3260
ac5a488e
SS
3261int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3262{
3263 return sock->ops->bind(sock, addr, addrlen);
3264}
c6d409cf 3265EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3266
3267int kernel_listen(struct socket *sock, int backlog)
3268{
3269 return sock->ops->listen(sock, backlog);
3270}
c6d409cf 3271EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3272
3273int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3274{
3275 struct sock *sk = sock->sk;
3276 int err;
3277
3278 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3279 newsock);
3280 if (err < 0)
3281 goto done;
3282
3283 err = sock->ops->accept(sock, *newsock, flags);
3284 if (err < 0) {
3285 sock_release(*newsock);
fa8705b0 3286 *newsock = NULL;
ac5a488e
SS
3287 goto done;
3288 }
3289
3290 (*newsock)->ops = sock->ops;
1b08534e 3291 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3292
3293done:
3294 return err;
3295}
c6d409cf 3296EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3297
3298int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3299 int flags)
ac5a488e
SS
3300{
3301 return sock->ops->connect(sock, addr, addrlen, flags);
3302}
c6d409cf 3303EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3304
3305int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3306 int *addrlen)
3307{
3308 return sock->ops->getname(sock, addr, addrlen, 0);
3309}
c6d409cf 3310EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3311
3312int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3313 int *addrlen)
3314{
3315 return sock->ops->getname(sock, addr, addrlen, 1);
3316}
c6d409cf 3317EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3318
3319int kernel_getsockopt(struct socket *sock, int level, int optname,
3320 char *optval, int *optlen)
3321{
3322 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3323 char __user *uoptval;
3324 int __user *uoptlen;
ac5a488e
SS
3325 int err;
3326
fb8621bb
NK
3327 uoptval = (char __user __force *) optval;
3328 uoptlen = (int __user __force *) optlen;
3329
ac5a488e
SS
3330 set_fs(KERNEL_DS);
3331 if (level == SOL_SOCKET)
fb8621bb 3332 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3333 else
fb8621bb
NK
3334 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3335 uoptlen);
ac5a488e
SS
3336 set_fs(oldfs);
3337 return err;
3338}
c6d409cf 3339EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3340
3341int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3342 char *optval, unsigned int optlen)
ac5a488e
SS
3343{
3344 mm_segment_t oldfs = get_fs();
fb8621bb 3345 char __user *uoptval;
ac5a488e
SS
3346 int err;
3347
fb8621bb
NK
3348 uoptval = (char __user __force *) optval;
3349
ac5a488e
SS
3350 set_fs(KERNEL_DS);
3351 if (level == SOL_SOCKET)
fb8621bb 3352 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3353 else
fb8621bb 3354 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3355 optlen);
3356 set_fs(oldfs);
3357 return err;
3358}
c6d409cf 3359EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3360
3361int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3362 size_t size, int flags)
3363{
f8451725
HX
3364 sock_update_classid(sock->sk);
3365
ac5a488e
SS
3366 if (sock->ops->sendpage)
3367 return sock->ops->sendpage(sock, page, offset, size, flags);
3368
3369 return sock_no_sendpage(sock, page, offset, size, flags);
3370}
c6d409cf 3371EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3372
3373int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3374{
3375 mm_segment_t oldfs = get_fs();
3376 int err;
3377
3378 set_fs(KERNEL_DS);
3379 err = sock->ops->ioctl(sock, cmd, arg);
3380 set_fs(oldfs);
3381
3382 return err;
3383}
c6d409cf 3384EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3385
91cf45f0
TM
3386int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3387{
3388 return sock->ops->shutdown(sock, how);
3389}
91cf45f0 3390EXPORT_SYMBOL(kernel_sock_shutdown);