tun: experimental zero copy tx support
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4
LT
72#include <linux/wanrouter.h>
73#include <linux/if_bridge.h>
20380731
ACM
74#include <linux/if_frad.h>
75#include <linux/if_vlan.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
1da177e4
LT
91
92#include <asm/uaccess.h>
93#include <asm/unistd.h>
94
95#include <net/compat.h>
87de87d5 96#include <net/wext.h>
f8451725 97#include <net/cls_cgroup.h>
1da177e4
LT
98
99#include <net/sock.h>
100#include <linux/netfilter.h>
101
6b96018b
AB
102#include <linux/if_tun.h>
103#include <linux/ipv6_route.h>
104#include <linux/route.h>
6b96018b
AB
105#include <linux/sockios.h>
106#include <linux/atalk.h>
107
1da177e4 108static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
109static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
110 unsigned long nr_segs, loff_t pos);
111static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
112 unsigned long nr_segs, loff_t pos);
89bddce5 113static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
114
115static int sock_close(struct inode *inode, struct file *file);
116static unsigned int sock_poll(struct file *file,
117 struct poll_table_struct *wait);
89bddce5 118static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
119#ifdef CONFIG_COMPAT
120static long compat_sock_ioctl(struct file *file,
89bddce5 121 unsigned int cmd, unsigned long arg);
89bbfc95 122#endif
1da177e4 123static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
124static ssize_t sock_sendpage(struct file *file, struct page *page,
125 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 126static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 127 struct pipe_inode_info *pipe, size_t len,
9c55e01c 128 unsigned int flags);
1da177e4 129
1da177e4
LT
130/*
131 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
132 * in the operation structures but are done directly via the socketcall() multiplexor.
133 */
134
da7071d7 135static const struct file_operations socket_file_ops = {
1da177e4
LT
136 .owner = THIS_MODULE,
137 .llseek = no_llseek,
138 .aio_read = sock_aio_read,
139 .aio_write = sock_aio_write,
140 .poll = sock_poll,
141 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
142#ifdef CONFIG_COMPAT
143 .compat_ioctl = compat_sock_ioctl,
144#endif
1da177e4
LT
145 .mmap = sock_mmap,
146 .open = sock_no_open, /* special open code to disallow open via /proc */
147 .release = sock_close,
148 .fasync = sock_fasync,
5274f052
JA
149 .sendpage = sock_sendpage,
150 .splice_write = generic_splice_sendpage,
9c55e01c 151 .splice_read = sock_splice_read,
1da177e4
LT
152};
153
154/*
155 * The protocol list. Each protocol is registered in here.
156 */
157
1da177e4 158static DEFINE_SPINLOCK(net_family_lock);
190683a9 159static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 160
1da177e4
LT
161/*
162 * Statistics counters of the socket lists
163 */
164
c6d409cf 165static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
166
167/*
89bddce5
SH
168 * Support routines.
169 * Move socket addresses back and forth across the kernel/user
170 * divide and look after the messy bits.
1da177e4
LT
171 */
172
1da177e4
LT
173/**
174 * move_addr_to_kernel - copy a socket address into kernel space
175 * @uaddr: Address in user space
176 * @kaddr: Address in kernel space
177 * @ulen: Length in user space
178 *
179 * The address is copied into kernel space. If the provided address is
180 * too long an error code of -EINVAL is returned. If the copy gives
181 * invalid addresses -EFAULT is returned. On a success 0 is returned.
182 */
183
43db362d 184int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 185{
230b1839 186 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 187 return -EINVAL;
89bddce5 188 if (ulen == 0)
1da177e4 189 return 0;
89bddce5 190 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 191 return -EFAULT;
3ec3b2fb 192 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
193}
194
195/**
196 * move_addr_to_user - copy an address to user space
197 * @kaddr: kernel space address
198 * @klen: length of address in kernel
199 * @uaddr: user space address
200 * @ulen: pointer to user length field
201 *
202 * The value pointed to by ulen on entry is the buffer length available.
203 * This is overwritten with the buffer space used. -EINVAL is returned
204 * if an overlong buffer is specified or a negative buffer size. -EFAULT
205 * is returned if either the buffer or the length field are not
206 * accessible.
207 * After copying the data up to the limit the user specifies, the true
208 * length of the data is written over the length limit the user
209 * specified. Zero is returned for a success.
210 */
89bddce5 211
43db362d 212static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 213 void __user *uaddr, int __user *ulen)
1da177e4
LT
214{
215 int err;
216 int len;
217
89bddce5
SH
218 err = get_user(len, ulen);
219 if (err)
1da177e4 220 return err;
89bddce5
SH
221 if (len > klen)
222 len = klen;
230b1839 223 if (len < 0 || len > sizeof(struct sockaddr_storage))
1da177e4 224 return -EINVAL;
89bddce5 225 if (len) {
d6fe3945
SG
226 if (audit_sockaddr(klen, kaddr))
227 return -ENOMEM;
89bddce5 228 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
229 return -EFAULT;
230 }
231 /*
89bddce5
SH
232 * "fromlen shall refer to the value before truncation.."
233 * 1003.1g
1da177e4
LT
234 */
235 return __put_user(klen, ulen);
236}
237
e18b890b 238static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
239
240static struct inode *sock_alloc_inode(struct super_block *sb)
241{
242 struct socket_alloc *ei;
eaefd110 243 struct socket_wq *wq;
89bddce5 244
e94b1766 245 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
246 if (!ei)
247 return NULL;
eaefd110
ED
248 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
249 if (!wq) {
43815482
ED
250 kmem_cache_free(sock_inode_cachep, ei);
251 return NULL;
252 }
eaefd110
ED
253 init_waitqueue_head(&wq->wait);
254 wq->fasync_list = NULL;
255 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 256
1da177e4
LT
257 ei->socket.state = SS_UNCONNECTED;
258 ei->socket.flags = 0;
259 ei->socket.ops = NULL;
260 ei->socket.sk = NULL;
261 ei->socket.file = NULL;
1da177e4
LT
262
263 return &ei->vfs_inode;
264}
265
266static void sock_destroy_inode(struct inode *inode)
267{
43815482 268 struct socket_alloc *ei;
eaefd110 269 struct socket_wq *wq;
43815482
ED
270
271 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 272 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 273 kfree_rcu(wq, rcu);
43815482 274 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
275}
276
51cc5068 277static void init_once(void *foo)
1da177e4 278{
89bddce5 279 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 280
a35afb83 281 inode_init_once(&ei->vfs_inode);
1da177e4 282}
89bddce5 283
1da177e4
LT
284static int init_inodecache(void)
285{
286 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
287 sizeof(struct socket_alloc),
288 0,
289 (SLAB_HWCACHE_ALIGN |
290 SLAB_RECLAIM_ACCOUNT |
291 SLAB_MEM_SPREAD),
20c2df83 292 init_once);
1da177e4
LT
293 if (sock_inode_cachep == NULL)
294 return -ENOMEM;
295 return 0;
296}
297
b87221de 298static const struct super_operations sockfs_ops = {
c6d409cf
ED
299 .alloc_inode = sock_alloc_inode,
300 .destroy_inode = sock_destroy_inode,
301 .statfs = simple_statfs,
1da177e4
LT
302};
303
c23fbb6b
ED
304/*
305 * sockfs_dname() is called from d_path().
306 */
307static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
308{
309 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
310 dentry->d_inode->i_ino);
311}
312
3ba13d17 313static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 314 .d_dname = sockfs_dname,
1da177e4
LT
315};
316
c74a1cbb
AV
317static struct dentry *sockfs_mount(struct file_system_type *fs_type,
318 int flags, const char *dev_name, void *data)
319{
320 return mount_pseudo(fs_type, "socket:", &sockfs_ops,
321 &sockfs_dentry_operations, SOCKFS_MAGIC);
322}
323
324static struct vfsmount *sock_mnt __read_mostly;
325
326static struct file_system_type sock_fs_type = {
327 .name = "sockfs",
328 .mount = sockfs_mount,
329 .kill_sb = kill_anon_super,
330};
331
1da177e4
LT
332/*
333 * Obtains the first available file descriptor and sets it up for use.
334 *
39d8c1b6
DM
335 * These functions create file structures and maps them to fd space
336 * of the current process. On success it returns file descriptor
1da177e4
LT
337 * and file struct implicitly stored in sock->file.
338 * Note that another thread may close file descriptor before we return
339 * from this function. We use the fact that now we do not refer
340 * to socket after mapping. If one day we will need it, this
341 * function will increment ref. count on file by 1.
342 *
343 * In any case returned fd MAY BE not valid!
344 * This race condition is unavoidable
345 * with shared fd spaces, we cannot solve it inside kernel,
346 * but we take care of internal coherence yet.
347 */
348
7cbe66b6 349static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
1da177e4 350{
7cbe66b6 351 struct qstr name = { .name = "" };
2c48b9c4 352 struct path path;
7cbe66b6 353 struct file *file;
1da177e4 354 int fd;
1da177e4 355
a677a039 356 fd = get_unused_fd_flags(flags);
7cbe66b6
AV
357 if (unlikely(fd < 0))
358 return fd;
1da177e4 359
4b936885 360 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
2c48b9c4 361 if (unlikely(!path.dentry)) {
7cbe66b6 362 put_unused_fd(fd);
39d8c1b6 363 return -ENOMEM;
7cbe66b6 364 }
2c48b9c4 365 path.mnt = mntget(sock_mnt);
39d8c1b6 366
2c48b9c4 367 d_instantiate(path.dentry, SOCK_INODE(sock));
cc3808f8 368 SOCK_INODE(sock)->i_fop = &socket_file_ops;
39d8c1b6 369
2c48b9c4 370 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 371 &socket_file_ops);
cc3808f8
AV
372 if (unlikely(!file)) {
373 /* drop dentry, keep inode */
7de9c6ee 374 ihold(path.dentry->d_inode);
2c48b9c4 375 path_put(&path);
cc3808f8
AV
376 put_unused_fd(fd);
377 return -ENFILE;
378 }
379
380 sock->file = file;
77d27200 381 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6
DM
382 file->f_pos = 0;
383 file->private_data = sock;
1da177e4 384
7cbe66b6
AV
385 *f = file;
386 return fd;
39d8c1b6
DM
387}
388
a677a039 389int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
390{
391 struct file *newfile;
7cbe66b6 392 int fd = sock_alloc_file(sock, &newfile, flags);
39d8c1b6 393
7cbe66b6 394 if (likely(fd >= 0))
39d8c1b6 395 fd_install(fd, newfile);
7cbe66b6 396
1da177e4
LT
397 return fd;
398}
c6d409cf 399EXPORT_SYMBOL(sock_map_fd);
1da177e4 400
6cb153ca
BL
401static struct socket *sock_from_file(struct file *file, int *err)
402{
6cb153ca
BL
403 if (file->f_op == &socket_file_ops)
404 return file->private_data; /* set in sock_map_fd */
405
23bb80d2
ED
406 *err = -ENOTSOCK;
407 return NULL;
6cb153ca
BL
408}
409
1da177e4 410/**
c6d409cf 411 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
412 * @fd: file handle
413 * @err: pointer to an error code return
414 *
415 * The file handle passed in is locked and the socket it is bound
416 * too is returned. If an error occurs the err pointer is overwritten
417 * with a negative errno code and NULL is returned. The function checks
418 * for both invalid handles and passing a handle which is not a socket.
419 *
420 * On a success the socket object pointer is returned.
421 */
422
423struct socket *sockfd_lookup(int fd, int *err)
424{
425 struct file *file;
1da177e4
LT
426 struct socket *sock;
427
89bddce5
SH
428 file = fget(fd);
429 if (!file) {
1da177e4
LT
430 *err = -EBADF;
431 return NULL;
432 }
89bddce5 433
6cb153ca
BL
434 sock = sock_from_file(file, err);
435 if (!sock)
1da177e4 436 fput(file);
6cb153ca
BL
437 return sock;
438}
c6d409cf 439EXPORT_SYMBOL(sockfd_lookup);
1da177e4 440
6cb153ca
BL
441static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
442{
443 struct file *file;
444 struct socket *sock;
445
3672558c 446 *err = -EBADF;
6cb153ca
BL
447 file = fget_light(fd, fput_needed);
448 if (file) {
449 sock = sock_from_file(file, err);
450 if (sock)
451 return sock;
452 fput_light(file, *fput_needed);
1da177e4 453 }
6cb153ca 454 return NULL;
1da177e4
LT
455}
456
457/**
458 * sock_alloc - allocate a socket
89bddce5 459 *
1da177e4
LT
460 * Allocate a new inode and socket object. The two are bound together
461 * and initialised. The socket is then returned. If we are out of inodes
462 * NULL is returned.
463 */
464
465static struct socket *sock_alloc(void)
466{
89bddce5
SH
467 struct inode *inode;
468 struct socket *sock;
1da177e4 469
a209dfc7 470 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
471 if (!inode)
472 return NULL;
473
474 sock = SOCKET_I(inode);
475
29a020d3 476 kmemcheck_annotate_bitfield(sock, type);
85fe4025 477 inode->i_ino = get_next_ino();
89bddce5 478 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
479 inode->i_uid = current_fsuid();
480 inode->i_gid = current_fsgid();
1da177e4 481
19e8d69c 482 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
483 return sock;
484}
485
486/*
487 * In theory you can't get an open on this inode, but /proc provides
488 * a back door. Remember to keep it shut otherwise you'll let the
489 * creepy crawlies in.
490 */
89bddce5 491
1da177e4
LT
492static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
493{
494 return -ENXIO;
495}
496
4b6f5d20 497const struct file_operations bad_sock_fops = {
1da177e4
LT
498 .owner = THIS_MODULE,
499 .open = sock_no_open,
6038f373 500 .llseek = noop_llseek,
1da177e4
LT
501};
502
503/**
504 * sock_release - close a socket
505 * @sock: socket to close
506 *
507 * The socket is released from the protocol stack if it has a release
508 * callback, and the inode is then released if the socket is bound to
89bddce5 509 * an inode not a file.
1da177e4 510 */
89bddce5 511
1da177e4
LT
512void sock_release(struct socket *sock)
513{
514 if (sock->ops) {
515 struct module *owner = sock->ops->owner;
516
517 sock->ops->release(sock);
518 sock->ops = NULL;
519 module_put(owner);
520 }
521
eaefd110 522 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
1da177e4
LT
523 printk(KERN_ERR "sock_release: fasync list not empty!\n");
524
b09e786b
MP
525 if (test_bit(SOCK_EXTERNALLY_ALLOCATED, &sock->flags))
526 return;
527
19e8d69c 528 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
529 if (!sock->file) {
530 iput(SOCK_INODE(sock));
531 return;
532 }
89bddce5 533 sock->file = NULL;
1da177e4 534}
c6d409cf 535EXPORT_SYMBOL(sock_release);
1da177e4 536
2244d07b 537int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
20d49473 538{
2244d07b 539 *tx_flags = 0;
20d49473 540 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
2244d07b 541 *tx_flags |= SKBTX_HW_TSTAMP;
20d49473 542 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
2244d07b 543 *tx_flags |= SKBTX_SW_TSTAMP;
6e3e939f
JB
544 if (sock_flag(sk, SOCK_WIFI_STATUS))
545 *tx_flags |= SKBTX_WIFI_STATUS;
20d49473
PO
546 return 0;
547}
548EXPORT_SYMBOL(sock_tx_timestamp);
549
228e548e
AB
550static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
551 struct msghdr *msg, size_t size)
1da177e4
LT
552{
553 struct sock_iocb *si = kiocb_to_siocb(iocb);
1da177e4 554
f8451725
HX
555 sock_update_classid(sock->sk);
556
5bc1421e
NH
557 sock_update_netprioidx(sock->sk);
558
1da177e4
LT
559 si->sock = sock;
560 si->scm = NULL;
561 si->msg = msg;
562 si->size = size;
563
1da177e4
LT
564 return sock->ops->sendmsg(iocb, sock, msg, size);
565}
566
228e548e
AB
567static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
568 struct msghdr *msg, size_t size)
569{
570 int err = security_socket_sendmsg(sock, msg, size);
571
572 return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
573}
574
1da177e4
LT
575int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
576{
577 struct kiocb iocb;
578 struct sock_iocb siocb;
579 int ret;
580
581 init_sync_kiocb(&iocb, NULL);
582 iocb.private = &siocb;
583 ret = __sock_sendmsg(&iocb, sock, msg, size);
584 if (-EIOCBQUEUED == ret)
585 ret = wait_on_sync_kiocb(&iocb);
586 return ret;
587}
c6d409cf 588EXPORT_SYMBOL(sock_sendmsg);
1da177e4 589
894dc24c 590static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
228e548e
AB
591{
592 struct kiocb iocb;
593 struct sock_iocb siocb;
594 int ret;
595
596 init_sync_kiocb(&iocb, NULL);
597 iocb.private = &siocb;
598 ret = __sock_sendmsg_nosec(&iocb, sock, msg, size);
599 if (-EIOCBQUEUED == ret)
600 ret = wait_on_sync_kiocb(&iocb);
601 return ret;
602}
603
1da177e4
LT
604int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
605 struct kvec *vec, size_t num, size_t size)
606{
607 mm_segment_t oldfs = get_fs();
608 int result;
609
610 set_fs(KERNEL_DS);
611 /*
612 * the following is safe, since for compiler definitions of kvec and
613 * iovec are identical, yielding the same in-core layout and alignment
614 */
89bddce5 615 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
616 msg->msg_iovlen = num;
617 result = sock_sendmsg(sock, msg, size);
618 set_fs(oldfs);
619 return result;
620}
c6d409cf 621EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 622
20d49473
PO
623static int ktime2ts(ktime_t kt, struct timespec *ts)
624{
625 if (kt.tv64) {
626 *ts = ktime_to_timespec(kt);
627 return 1;
628 } else {
629 return 0;
630 }
631}
632
92f37fd2
ED
633/*
634 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
635 */
636void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
637 struct sk_buff *skb)
638{
20d49473
PO
639 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
640 struct timespec ts[3];
641 int empty = 1;
642 struct skb_shared_hwtstamps *shhwtstamps =
643 skb_hwtstamps(skb);
644
645 /* Race occurred between timestamp enabling and packet
646 receiving. Fill in the current time for now. */
647 if (need_software_tstamp && skb->tstamp.tv64 == 0)
648 __net_timestamp(skb);
649
650 if (need_software_tstamp) {
651 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
652 struct timeval tv;
653 skb_get_timestamp(skb, &tv);
654 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
655 sizeof(tv), &tv);
656 } else {
842509b8 657 skb_get_timestampns(skb, &ts[0]);
20d49473 658 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
842509b8 659 sizeof(ts[0]), &ts[0]);
20d49473
PO
660 }
661 }
662
663
664 memset(ts, 0, sizeof(ts));
665 if (skb->tstamp.tv64 &&
666 sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) {
667 skb_get_timestampns(skb, ts + 0);
668 empty = 0;
669 }
670 if (shhwtstamps) {
671 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
672 ktime2ts(shhwtstamps->syststamp, ts + 1))
673 empty = 0;
674 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
675 ktime2ts(shhwtstamps->hwtstamp, ts + 2))
676 empty = 0;
92f37fd2 677 }
20d49473
PO
678 if (!empty)
679 put_cmsg(msg, SOL_SOCKET,
680 SCM_TIMESTAMPING, sizeof(ts), &ts);
92f37fd2 681}
7c81fd8b
ACM
682EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
683
6e3e939f
JB
684void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
685 struct sk_buff *skb)
686{
687 int ack;
688
689 if (!sock_flag(sk, SOCK_WIFI_STATUS))
690 return;
691 if (!skb->wifi_acked_valid)
692 return;
693
694 ack = skb->wifi_acked;
695
696 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
697}
698EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
699
11165f14 700static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
701 struct sk_buff *skb)
3b885787
NH
702{
703 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
704 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
705 sizeof(__u32), &skb->dropcount);
706}
707
767dd033 708void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
709 struct sk_buff *skb)
710{
711 sock_recv_timestamp(msg, sk, skb);
712 sock_recv_drops(msg, sk, skb);
713}
767dd033 714EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 715
a2e27255
ACM
716static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
717 struct msghdr *msg, size_t size, int flags)
1da177e4 718{
1da177e4
LT
719 struct sock_iocb *si = kiocb_to_siocb(iocb);
720
f8451725
HX
721 sock_update_classid(sock->sk);
722
1da177e4
LT
723 si->sock = sock;
724 si->scm = NULL;
725 si->msg = msg;
726 si->size = size;
727 si->flags = flags;
728
1da177e4
LT
729 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
730}
731
a2e27255
ACM
732static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
733 struct msghdr *msg, size_t size, int flags)
734{
735 int err = security_socket_recvmsg(sock, msg, size, flags);
736
737 return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
738}
739
89bddce5 740int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
741 size_t size, int flags)
742{
743 struct kiocb iocb;
744 struct sock_iocb siocb;
745 int ret;
746
89bddce5 747 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
748 iocb.private = &siocb;
749 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
750 if (-EIOCBQUEUED == ret)
751 ret = wait_on_sync_kiocb(&iocb);
752 return ret;
753}
c6d409cf 754EXPORT_SYMBOL(sock_recvmsg);
1da177e4 755
a2e27255
ACM
756static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
757 size_t size, int flags)
758{
759 struct kiocb iocb;
760 struct sock_iocb siocb;
761 int ret;
762
763 init_sync_kiocb(&iocb, NULL);
764 iocb.private = &siocb;
765 ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
766 if (-EIOCBQUEUED == ret)
767 ret = wait_on_sync_kiocb(&iocb);
768 return ret;
769}
770
c1249c0a
ML
771/**
772 * kernel_recvmsg - Receive a message from a socket (kernel space)
773 * @sock: The socket to receive the message from
774 * @msg: Received message
775 * @vec: Input s/g array for message data
776 * @num: Size of input s/g array
777 * @size: Number of bytes to read
778 * @flags: Message flags (MSG_DONTWAIT, etc...)
779 *
780 * On return the msg structure contains the scatter/gather array passed in the
781 * vec argument. The array is modified so that it consists of the unfilled
782 * portion of the original array.
783 *
784 * The returned value is the total number of bytes received, or an error.
785 */
89bddce5
SH
786int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
787 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
788{
789 mm_segment_t oldfs = get_fs();
790 int result;
791
792 set_fs(KERNEL_DS);
793 /*
794 * the following is safe, since for compiler definitions of kvec and
795 * iovec are identical, yielding the same in-core layout and alignment
796 */
89bddce5 797 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
798 result = sock_recvmsg(sock, msg, size, flags);
799 set_fs(oldfs);
800 return result;
801}
c6d409cf 802EXPORT_SYMBOL(kernel_recvmsg);
1da177e4
LT
803
804static void sock_aio_dtor(struct kiocb *iocb)
805{
806 kfree(iocb->private);
807}
808
ce1d4d3e
CH
809static ssize_t sock_sendpage(struct file *file, struct page *page,
810 int offset, size_t size, loff_t *ppos, int more)
1da177e4 811{
1da177e4
LT
812 struct socket *sock;
813 int flags;
814
ce1d4d3e
CH
815 sock = file->private_data;
816
35f9c09f
ED
817 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
818 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
819 flags |= more;
ce1d4d3e 820
e6949583 821 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 822}
1da177e4 823
9c55e01c 824static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 825 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
826 unsigned int flags)
827{
828 struct socket *sock = file->private_data;
829
997b37da
RDC
830 if (unlikely(!sock->ops->splice_read))
831 return -EINVAL;
832
f8451725
HX
833 sock_update_classid(sock->sk);
834
9c55e01c
JA
835 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
836}
837
ce1d4d3e 838static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 839 struct sock_iocb *siocb)
ce1d4d3e
CH
840{
841 if (!is_sync_kiocb(iocb)) {
842 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
843 if (!siocb)
844 return NULL;
1da177e4
LT
845 iocb->ki_dtor = sock_aio_dtor;
846 }
1da177e4 847
ce1d4d3e 848 siocb->kiocb = iocb;
ce1d4d3e
CH
849 iocb->private = siocb;
850 return siocb;
1da177e4
LT
851}
852
ce1d4d3e 853static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
854 struct file *file, const struct iovec *iov,
855 unsigned long nr_segs)
ce1d4d3e
CH
856{
857 struct socket *sock = file->private_data;
858 size_t size = 0;
859 int i;
1da177e4 860
89bddce5
SH
861 for (i = 0; i < nr_segs; i++)
862 size += iov[i].iov_len;
1da177e4 863
ce1d4d3e
CH
864 msg->msg_name = NULL;
865 msg->msg_namelen = 0;
866 msg->msg_control = NULL;
867 msg->msg_controllen = 0;
89bddce5 868 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
869 msg->msg_iovlen = nr_segs;
870 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
871
872 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
873}
874
027445c3
BP
875static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
876 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
877{
878 struct sock_iocb siocb, *x;
879
1da177e4
LT
880 if (pos != 0)
881 return -ESPIPE;
027445c3
BP
882
883 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
884 return 0;
885
027445c3
BP
886
887 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
888 if (!x)
889 return -ENOMEM;
027445c3 890 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
891}
892
ce1d4d3e 893static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
894 struct file *file, const struct iovec *iov,
895 unsigned long nr_segs)
1da177e4 896{
ce1d4d3e
CH
897 struct socket *sock = file->private_data;
898 size_t size = 0;
899 int i;
1da177e4 900
89bddce5
SH
901 for (i = 0; i < nr_segs; i++)
902 size += iov[i].iov_len;
1da177e4 903
ce1d4d3e
CH
904 msg->msg_name = NULL;
905 msg->msg_namelen = 0;
906 msg->msg_control = NULL;
907 msg->msg_controllen = 0;
89bddce5 908 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
909 msg->msg_iovlen = nr_segs;
910 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
911 if (sock->type == SOCK_SEQPACKET)
912 msg->msg_flags |= MSG_EOR;
1da177e4 913
ce1d4d3e 914 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
915}
916
027445c3
BP
917static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
918 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
919{
920 struct sock_iocb siocb, *x;
1da177e4 921
ce1d4d3e
CH
922 if (pos != 0)
923 return -ESPIPE;
027445c3 924
027445c3 925 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
926 if (!x)
927 return -ENOMEM;
1da177e4 928
027445c3 929 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
930}
931
1da177e4
LT
932/*
933 * Atomic setting of ioctl hooks to avoid race
934 * with module unload.
935 */
936
4a3e2f71 937static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 938static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 939
881d966b 940void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 941{
4a3e2f71 942 mutex_lock(&br_ioctl_mutex);
1da177e4 943 br_ioctl_hook = hook;
4a3e2f71 944 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
945}
946EXPORT_SYMBOL(brioctl_set);
947
4a3e2f71 948static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 949static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 950
881d966b 951void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 952{
4a3e2f71 953 mutex_lock(&vlan_ioctl_mutex);
1da177e4 954 vlan_ioctl_hook = hook;
4a3e2f71 955 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
956}
957EXPORT_SYMBOL(vlan_ioctl_set);
958
4a3e2f71 959static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 960static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 961
89bddce5 962void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 963{
4a3e2f71 964 mutex_lock(&dlci_ioctl_mutex);
1da177e4 965 dlci_ioctl_hook = hook;
4a3e2f71 966 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
967}
968EXPORT_SYMBOL(dlci_ioctl_set);
969
6b96018b
AB
970static long sock_do_ioctl(struct net *net, struct socket *sock,
971 unsigned int cmd, unsigned long arg)
972{
973 int err;
974 void __user *argp = (void __user *)arg;
975
976 err = sock->ops->ioctl(sock, cmd, arg);
977
978 /*
979 * If this ioctl is unknown try to hand it down
980 * to the NIC driver.
981 */
982 if (err == -ENOIOCTLCMD)
983 err = dev_ioctl(net, cmd, argp);
984
985 return err;
986}
987
1da177e4
LT
988/*
989 * With an ioctl, arg may well be a user mode pointer, but we don't know
990 * what to do with it - that's up to the protocol still.
991 */
992
993static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
994{
995 struct socket *sock;
881d966b 996 struct sock *sk;
1da177e4
LT
997 void __user *argp = (void __user *)arg;
998 int pid, err;
881d966b 999 struct net *net;
1da177e4 1000
b69aee04 1001 sock = file->private_data;
881d966b 1002 sk = sock->sk;
3b1e0a65 1003 net = sock_net(sk);
1da177e4 1004 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 1005 err = dev_ioctl(net, cmd, argp);
1da177e4 1006 } else
3d23e349 1007#ifdef CONFIG_WEXT_CORE
1da177e4 1008 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 1009 err = dev_ioctl(net, cmd, argp);
1da177e4 1010 } else
3d23e349 1011#endif
89bddce5 1012 switch (cmd) {
1da177e4
LT
1013 case FIOSETOWN:
1014 case SIOCSPGRP:
1015 err = -EFAULT;
1016 if (get_user(pid, (int __user *)argp))
1017 break;
1018 err = f_setown(sock->file, pid, 1);
1019 break;
1020 case FIOGETOWN:
1021 case SIOCGPGRP:
609d7fa9 1022 err = put_user(f_getown(sock->file),
89bddce5 1023 (int __user *)argp);
1da177e4
LT
1024 break;
1025 case SIOCGIFBR:
1026 case SIOCSIFBR:
1027 case SIOCBRADDBR:
1028 case SIOCBRDELBR:
1029 err = -ENOPKG;
1030 if (!br_ioctl_hook)
1031 request_module("bridge");
1032
4a3e2f71 1033 mutex_lock(&br_ioctl_mutex);
89bddce5 1034 if (br_ioctl_hook)
881d966b 1035 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1036 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1037 break;
1038 case SIOCGIFVLAN:
1039 case SIOCSIFVLAN:
1040 err = -ENOPKG;
1041 if (!vlan_ioctl_hook)
1042 request_module("8021q");
1043
4a3e2f71 1044 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1045 if (vlan_ioctl_hook)
881d966b 1046 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1047 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1048 break;
1da177e4
LT
1049 case SIOCADDDLCI:
1050 case SIOCDELDLCI:
1051 err = -ENOPKG;
1052 if (!dlci_ioctl_hook)
1053 request_module("dlci");
1054
7512cbf6
PE
1055 mutex_lock(&dlci_ioctl_mutex);
1056 if (dlci_ioctl_hook)
1da177e4 1057 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1058 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1059 break;
1060 default:
6b96018b 1061 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1062 break;
89bddce5 1063 }
1da177e4
LT
1064 return err;
1065}
1066
1067int sock_create_lite(int family, int type, int protocol, struct socket **res)
1068{
1069 int err;
1070 struct socket *sock = NULL;
89bddce5 1071
1da177e4
LT
1072 err = security_socket_create(family, type, protocol, 1);
1073 if (err)
1074 goto out;
1075
1076 sock = sock_alloc();
1077 if (!sock) {
1078 err = -ENOMEM;
1079 goto out;
1080 }
1081
1da177e4 1082 sock->type = type;
7420ed23
VY
1083 err = security_socket_post_create(sock, family, type, protocol, 1);
1084 if (err)
1085 goto out_release;
1086
1da177e4
LT
1087out:
1088 *res = sock;
1089 return err;
7420ed23
VY
1090out_release:
1091 sock_release(sock);
1092 sock = NULL;
1093 goto out;
1da177e4 1094}
c6d409cf 1095EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1096
1097/* No kernel lock held - perfect */
89bddce5 1098static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
1099{
1100 struct socket *sock;
1101
1102 /*
89bddce5 1103 * We can't return errors to poll, so it's either yes or no.
1da177e4 1104 */
b69aee04 1105 sock = file->private_data;
1da177e4
LT
1106 return sock->ops->poll(file, sock, wait);
1107}
1108
89bddce5 1109static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1110{
b69aee04 1111 struct socket *sock = file->private_data;
1da177e4
LT
1112
1113 return sock->ops->mmap(file, sock, vma);
1114}
1115
20380731 1116static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
1117{
1118 /*
89bddce5
SH
1119 * It was possible the inode is NULL we were
1120 * closing an unfinished socket.
1da177e4
LT
1121 */
1122
89bddce5 1123 if (!inode) {
1da177e4
LT
1124 printk(KERN_DEBUG "sock_close: NULL inode\n");
1125 return 0;
1126 }
1da177e4
LT
1127 sock_release(SOCKET_I(inode));
1128 return 0;
1129}
1130
1131/*
1132 * Update the socket async list
1133 *
1134 * Fasync_list locking strategy.
1135 *
1136 * 1. fasync_list is modified only under process context socket lock
1137 * i.e. under semaphore.
1138 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1139 * or under socket lock
1da177e4
LT
1140 */
1141
1142static int sock_fasync(int fd, struct file *filp, int on)
1143{
989a2979
ED
1144 struct socket *sock = filp->private_data;
1145 struct sock *sk = sock->sk;
eaefd110 1146 struct socket_wq *wq;
1da177e4 1147
989a2979 1148 if (sk == NULL)
1da177e4 1149 return -EINVAL;
1da177e4
LT
1150
1151 lock_sock(sk);
eaefd110
ED
1152 wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
1153 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1154
eaefd110 1155 if (!wq->fasync_list)
989a2979
ED
1156 sock_reset_flag(sk, SOCK_FASYNC);
1157 else
bcdce719 1158 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1159
989a2979 1160 release_sock(sk);
1da177e4
LT
1161 return 0;
1162}
1163
43815482 1164/* This function may be called only under socket lock or callback_lock or rcu_lock */
1da177e4
LT
1165
1166int sock_wake_async(struct socket *sock, int how, int band)
1167{
43815482
ED
1168 struct socket_wq *wq;
1169
1170 if (!sock)
1171 return -1;
1172 rcu_read_lock();
1173 wq = rcu_dereference(sock->wq);
1174 if (!wq || !wq->fasync_list) {
1175 rcu_read_unlock();
1da177e4 1176 return -1;
43815482 1177 }
89bddce5 1178 switch (how) {
8d8ad9d7 1179 case SOCK_WAKE_WAITD:
1da177e4
LT
1180 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1181 break;
1182 goto call_kill;
8d8ad9d7 1183 case SOCK_WAKE_SPACE:
1da177e4
LT
1184 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1185 break;
1186 /* fall through */
8d8ad9d7 1187 case SOCK_WAKE_IO:
89bddce5 1188call_kill:
43815482 1189 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1190 break;
8d8ad9d7 1191 case SOCK_WAKE_URG:
43815482 1192 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1193 }
43815482 1194 rcu_read_unlock();
1da177e4
LT
1195 return 0;
1196}
c6d409cf 1197EXPORT_SYMBOL(sock_wake_async);
1da177e4 1198
721db93a 1199int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1200 struct socket **res, int kern)
1da177e4
LT
1201{
1202 int err;
1203 struct socket *sock;
55737fda 1204 const struct net_proto_family *pf;
1da177e4
LT
1205
1206 /*
89bddce5 1207 * Check protocol is in range
1da177e4
LT
1208 */
1209 if (family < 0 || family >= NPROTO)
1210 return -EAFNOSUPPORT;
1211 if (type < 0 || type >= SOCK_MAX)
1212 return -EINVAL;
1213
1214 /* Compatibility.
1215
1216 This uglymoron is moved from INET layer to here to avoid
1217 deadlock in module load.
1218 */
1219 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1220 static int warned;
1da177e4
LT
1221 if (!warned) {
1222 warned = 1;
89bddce5
SH
1223 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1224 current->comm);
1da177e4
LT
1225 }
1226 family = PF_PACKET;
1227 }
1228
1229 err = security_socket_create(family, type, protocol, kern);
1230 if (err)
1231 return err;
89bddce5 1232
55737fda
SH
1233 /*
1234 * Allocate the socket and allow the family to set things up. if
1235 * the protocol is 0, the family is instructed to select an appropriate
1236 * default.
1237 */
1238 sock = sock_alloc();
1239 if (!sock) {
e87cc472 1240 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1241 return -ENFILE; /* Not exactly a match, but its the
1242 closest posix thing */
1243 }
1244
1245 sock->type = type;
1246
95a5afca 1247#ifdef CONFIG_MODULES
89bddce5
SH
1248 /* Attempt to load a protocol module if the find failed.
1249 *
1250 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1251 * requested real, full-featured networking support upon configuration.
1252 * Otherwise module support will break!
1253 */
190683a9 1254 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1255 request_module("net-pf-%d", family);
1da177e4
LT
1256#endif
1257
55737fda
SH
1258 rcu_read_lock();
1259 pf = rcu_dereference(net_families[family]);
1260 err = -EAFNOSUPPORT;
1261 if (!pf)
1262 goto out_release;
1da177e4
LT
1263
1264 /*
1265 * We will call the ->create function, that possibly is in a loadable
1266 * module, so we have to bump that loadable module refcnt first.
1267 */
55737fda 1268 if (!try_module_get(pf->owner))
1da177e4
LT
1269 goto out_release;
1270
55737fda
SH
1271 /* Now protected by module ref count */
1272 rcu_read_unlock();
1273
3f378b68 1274 err = pf->create(net, sock, protocol, kern);
55737fda 1275 if (err < 0)
1da177e4 1276 goto out_module_put;
a79af59e 1277
1da177e4
LT
1278 /*
1279 * Now to bump the refcnt of the [loadable] module that owns this
1280 * socket at sock_release time we decrement its refcnt.
1281 */
55737fda
SH
1282 if (!try_module_get(sock->ops->owner))
1283 goto out_module_busy;
1284
1da177e4
LT
1285 /*
1286 * Now that we're done with the ->create function, the [loadable]
1287 * module can have its refcnt decremented
1288 */
55737fda 1289 module_put(pf->owner);
7420ed23
VY
1290 err = security_socket_post_create(sock, family, type, protocol, kern);
1291 if (err)
3b185525 1292 goto out_sock_release;
55737fda 1293 *res = sock;
1da177e4 1294
55737fda
SH
1295 return 0;
1296
1297out_module_busy:
1298 err = -EAFNOSUPPORT;
1da177e4 1299out_module_put:
55737fda
SH
1300 sock->ops = NULL;
1301 module_put(pf->owner);
1302out_sock_release:
1da177e4 1303 sock_release(sock);
55737fda
SH
1304 return err;
1305
1306out_release:
1307 rcu_read_unlock();
1308 goto out_sock_release;
1da177e4 1309}
721db93a 1310EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1311
1312int sock_create(int family, int type, int protocol, struct socket **res)
1313{
1b8d7ae4 1314 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1315}
c6d409cf 1316EXPORT_SYMBOL(sock_create);
1da177e4
LT
1317
1318int sock_create_kern(int family, int type, int protocol, struct socket **res)
1319{
1b8d7ae4 1320 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4 1321}
c6d409cf 1322EXPORT_SYMBOL(sock_create_kern);
1da177e4 1323
3e0fa65f 1324SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1325{
1326 int retval;
1327 struct socket *sock;
a677a039
UD
1328 int flags;
1329
e38b36f3
UD
1330 /* Check the SOCK_* constants for consistency. */
1331 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1332 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1333 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1334 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1335
a677a039 1336 flags = type & ~SOCK_TYPE_MASK;
77d27200 1337 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1338 return -EINVAL;
1339 type &= SOCK_TYPE_MASK;
1da177e4 1340
aaca0bdc
UD
1341 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1342 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1343
1da177e4
LT
1344 retval = sock_create(family, type, protocol, &sock);
1345 if (retval < 0)
1346 goto out;
1347
77d27200 1348 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1349 if (retval < 0)
1350 goto out_release;
1351
1352out:
1353 /* It may be already another descriptor 8) Not kernel problem. */
1354 return retval;
1355
1356out_release:
1357 sock_release(sock);
1358 return retval;
1359}
1360
1361/*
1362 * Create a pair of connected sockets.
1363 */
1364
3e0fa65f
HC
1365SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1366 int __user *, usockvec)
1da177e4
LT
1367{
1368 struct socket *sock1, *sock2;
1369 int fd1, fd2, err;
db349509 1370 struct file *newfile1, *newfile2;
a677a039
UD
1371 int flags;
1372
1373 flags = type & ~SOCK_TYPE_MASK;
77d27200 1374 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1375 return -EINVAL;
1376 type &= SOCK_TYPE_MASK;
1da177e4 1377
aaca0bdc
UD
1378 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1379 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1380
1da177e4
LT
1381 /*
1382 * Obtain the first socket and check if the underlying protocol
1383 * supports the socketpair call.
1384 */
1385
1386 err = sock_create(family, type, protocol, &sock1);
1387 if (err < 0)
1388 goto out;
1389
1390 err = sock_create(family, type, protocol, &sock2);
1391 if (err < 0)
1392 goto out_release_1;
1393
1394 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1395 if (err < 0)
1da177e4
LT
1396 goto out_release_both;
1397
7cbe66b6 1398 fd1 = sock_alloc_file(sock1, &newfile1, flags);
bf3c23d1
DM
1399 if (unlikely(fd1 < 0)) {
1400 err = fd1;
db349509 1401 goto out_release_both;
bf3c23d1 1402 }
1da177e4 1403
7cbe66b6 1404 fd2 = sock_alloc_file(sock2, &newfile2, flags);
198de4d7
AV
1405 if (unlikely(fd2 < 0)) {
1406 err = fd2;
1407 fput(newfile1);
1408 put_unused_fd(fd1);
1409 sock_release(sock2);
1410 goto out;
db349509
AV
1411 }
1412
157cf649 1413 audit_fd_pair(fd1, fd2);
db349509
AV
1414 fd_install(fd1, newfile1);
1415 fd_install(fd2, newfile2);
1da177e4
LT
1416 /* fd1 and fd2 may be already another descriptors.
1417 * Not kernel problem.
1418 */
1419
89bddce5 1420 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1421 if (!err)
1422 err = put_user(fd2, &usockvec[1]);
1423 if (!err)
1424 return 0;
1425
1426 sys_close(fd2);
1427 sys_close(fd1);
1428 return err;
1429
1da177e4 1430out_release_both:
89bddce5 1431 sock_release(sock2);
1da177e4 1432out_release_1:
89bddce5 1433 sock_release(sock1);
1da177e4
LT
1434out:
1435 return err;
1436}
1437
1da177e4
LT
1438/*
1439 * Bind a name to a socket. Nothing much to do here since it's
1440 * the protocol's responsibility to handle the local address.
1441 *
1442 * We move the socket address to kernel space before we call
1443 * the protocol layer (having also checked the address is ok).
1444 */
1445
20f37034 1446SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1447{
1448 struct socket *sock;
230b1839 1449 struct sockaddr_storage address;
6cb153ca 1450 int err, fput_needed;
1da177e4 1451
89bddce5 1452 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1453 if (sock) {
43db362d 1454 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1455 if (err >= 0) {
1456 err = security_socket_bind(sock,
230b1839 1457 (struct sockaddr *)&address,
89bddce5 1458 addrlen);
6cb153ca
BL
1459 if (!err)
1460 err = sock->ops->bind(sock,
89bddce5 1461 (struct sockaddr *)
230b1839 1462 &address, addrlen);
1da177e4 1463 }
6cb153ca 1464 fput_light(sock->file, fput_needed);
89bddce5 1465 }
1da177e4
LT
1466 return err;
1467}
1468
1da177e4
LT
1469/*
1470 * Perform a listen. Basically, we allow the protocol to do anything
1471 * necessary for a listen, and if that works, we mark the socket as
1472 * ready for listening.
1473 */
1474
3e0fa65f 1475SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1476{
1477 struct socket *sock;
6cb153ca 1478 int err, fput_needed;
b8e1f9b5 1479 int somaxconn;
89bddce5
SH
1480
1481 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1482 if (sock) {
8efa6e93 1483 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1484 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1485 backlog = somaxconn;
1da177e4
LT
1486
1487 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1488 if (!err)
1489 err = sock->ops->listen(sock, backlog);
1da177e4 1490
6cb153ca 1491 fput_light(sock->file, fput_needed);
1da177e4
LT
1492 }
1493 return err;
1494}
1495
1da177e4
LT
1496/*
1497 * For accept, we attempt to create a new socket, set up the link
1498 * with the client, wake up the client, then return the new
1499 * connected fd. We collect the address of the connector in kernel
1500 * space and move it to user at the very end. This is unclean because
1501 * we open the socket then return an error.
1502 *
1503 * 1003.1g adds the ability to recvmsg() to query connection pending
1504 * status to recvmsg. We need to add that support in a way thats
1505 * clean when we restucture accept also.
1506 */
1507
20f37034
HC
1508SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1509 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1510{
1511 struct socket *sock, *newsock;
39d8c1b6 1512 struct file *newfile;
6cb153ca 1513 int err, len, newfd, fput_needed;
230b1839 1514 struct sockaddr_storage address;
1da177e4 1515
77d27200 1516 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1517 return -EINVAL;
1518
1519 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1520 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1521
6cb153ca 1522 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1523 if (!sock)
1524 goto out;
1525
1526 err = -ENFILE;
c6d409cf
ED
1527 newsock = sock_alloc();
1528 if (!newsock)
1da177e4
LT
1529 goto out_put;
1530
1531 newsock->type = sock->type;
1532 newsock->ops = sock->ops;
1533
1da177e4
LT
1534 /*
1535 * We don't need try_module_get here, as the listening socket (sock)
1536 * has the protocol module (sock->ops->owner) held.
1537 */
1538 __module_get(newsock->ops->owner);
1539
7cbe66b6 1540 newfd = sock_alloc_file(newsock, &newfile, flags);
39d8c1b6
DM
1541 if (unlikely(newfd < 0)) {
1542 err = newfd;
9a1875e6
DM
1543 sock_release(newsock);
1544 goto out_put;
39d8c1b6
DM
1545 }
1546
a79af59e
FF
1547 err = security_socket_accept(sock, newsock);
1548 if (err)
39d8c1b6 1549 goto out_fd;
a79af59e 1550
1da177e4
LT
1551 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1552 if (err < 0)
39d8c1b6 1553 goto out_fd;
1da177e4
LT
1554
1555 if (upeer_sockaddr) {
230b1839 1556 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1557 &len, 2) < 0) {
1da177e4 1558 err = -ECONNABORTED;
39d8c1b6 1559 goto out_fd;
1da177e4 1560 }
43db362d 1561 err = move_addr_to_user(&address,
230b1839 1562 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1563 if (err < 0)
39d8c1b6 1564 goto out_fd;
1da177e4
LT
1565 }
1566
1567 /* File flags are not inherited via accept() unlike another OSes. */
1568
39d8c1b6
DM
1569 fd_install(newfd, newfile);
1570 err = newfd;
1da177e4 1571
1da177e4 1572out_put:
6cb153ca 1573 fput_light(sock->file, fput_needed);
1da177e4
LT
1574out:
1575 return err;
39d8c1b6 1576out_fd:
9606a216 1577 fput(newfile);
39d8c1b6 1578 put_unused_fd(newfd);
1da177e4
LT
1579 goto out_put;
1580}
1581
20f37034
HC
1582SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1583 int __user *, upeer_addrlen)
aaca0bdc 1584{
de11defe 1585 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1586}
1587
1da177e4
LT
1588/*
1589 * Attempt to connect to a socket with the server address. The address
1590 * is in user space so we verify it is OK and move it to kernel space.
1591 *
1592 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1593 * break bindings
1594 *
1595 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1596 * other SEQPACKET protocols that take time to connect() as it doesn't
1597 * include the -EINPROGRESS status for such sockets.
1598 */
1599
20f37034
HC
1600SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1601 int, addrlen)
1da177e4
LT
1602{
1603 struct socket *sock;
230b1839 1604 struct sockaddr_storage address;
6cb153ca 1605 int err, fput_needed;
1da177e4 1606
6cb153ca 1607 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1608 if (!sock)
1609 goto out;
43db362d 1610 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1611 if (err < 0)
1612 goto out_put;
1613
89bddce5 1614 err =
230b1839 1615 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1616 if (err)
1617 goto out_put;
1618
230b1839 1619 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1620 sock->file->f_flags);
1621out_put:
6cb153ca 1622 fput_light(sock->file, fput_needed);
1da177e4
LT
1623out:
1624 return err;
1625}
1626
1627/*
1628 * Get the local address ('name') of a socket object. Move the obtained
1629 * name to user space.
1630 */
1631
20f37034
HC
1632SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1633 int __user *, usockaddr_len)
1da177e4
LT
1634{
1635 struct socket *sock;
230b1839 1636 struct sockaddr_storage address;
6cb153ca 1637 int len, err, fput_needed;
89bddce5 1638
6cb153ca 1639 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1640 if (!sock)
1641 goto out;
1642
1643 err = security_socket_getsockname(sock);
1644 if (err)
1645 goto out_put;
1646
230b1839 1647 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1648 if (err)
1649 goto out_put;
43db362d 1650 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1651
1652out_put:
6cb153ca 1653 fput_light(sock->file, fput_needed);
1da177e4
LT
1654out:
1655 return err;
1656}
1657
1658/*
1659 * Get the remote address ('name') of a socket object. Move the obtained
1660 * name to user space.
1661 */
1662
20f37034
HC
1663SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1664 int __user *, usockaddr_len)
1da177e4
LT
1665{
1666 struct socket *sock;
230b1839 1667 struct sockaddr_storage address;
6cb153ca 1668 int len, err, fput_needed;
1da177e4 1669
89bddce5
SH
1670 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1671 if (sock != NULL) {
1da177e4
LT
1672 err = security_socket_getpeername(sock);
1673 if (err) {
6cb153ca 1674 fput_light(sock->file, fput_needed);
1da177e4
LT
1675 return err;
1676 }
1677
89bddce5 1678 err =
230b1839 1679 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1680 1);
1da177e4 1681 if (!err)
43db362d 1682 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1683 usockaddr_len);
6cb153ca 1684 fput_light(sock->file, fput_needed);
1da177e4
LT
1685 }
1686 return err;
1687}
1688
1689/*
1690 * Send a datagram to a given address. We move the address into kernel
1691 * space and check the user space data area is readable before invoking
1692 * the protocol.
1693 */
1694
3e0fa65f 1695SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1696 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1697 int, addr_len)
1da177e4
LT
1698{
1699 struct socket *sock;
230b1839 1700 struct sockaddr_storage address;
1da177e4
LT
1701 int err;
1702 struct msghdr msg;
1703 struct iovec iov;
6cb153ca 1704 int fput_needed;
6cb153ca 1705
253eacc0
LT
1706 if (len > INT_MAX)
1707 len = INT_MAX;
de0fa95c
PE
1708 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1709 if (!sock)
4387ff75 1710 goto out;
6cb153ca 1711
89bddce5
SH
1712 iov.iov_base = buff;
1713 iov.iov_len = len;
1714 msg.msg_name = NULL;
1715 msg.msg_iov = &iov;
1716 msg.msg_iovlen = 1;
1717 msg.msg_control = NULL;
1718 msg.msg_controllen = 0;
1719 msg.msg_namelen = 0;
6cb153ca 1720 if (addr) {
43db362d 1721 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1722 if (err < 0)
1723 goto out_put;
230b1839 1724 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1725 msg.msg_namelen = addr_len;
1da177e4
LT
1726 }
1727 if (sock->file->f_flags & O_NONBLOCK)
1728 flags |= MSG_DONTWAIT;
1729 msg.msg_flags = flags;
1730 err = sock_sendmsg(sock, &msg, len);
1731
89bddce5 1732out_put:
de0fa95c 1733 fput_light(sock->file, fput_needed);
4387ff75 1734out:
1da177e4
LT
1735 return err;
1736}
1737
1738/*
89bddce5 1739 * Send a datagram down a socket.
1da177e4
LT
1740 */
1741
3e0fa65f 1742SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1743 unsigned int, flags)
1da177e4
LT
1744{
1745 return sys_sendto(fd, buff, len, flags, NULL, 0);
1746}
1747
1748/*
89bddce5 1749 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1750 * sender. We verify the buffers are writable and if needed move the
1751 * sender address from kernel to user space.
1752 */
1753
3e0fa65f 1754SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1755 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1756 int __user *, addr_len)
1da177e4
LT
1757{
1758 struct socket *sock;
1759 struct iovec iov;
1760 struct msghdr msg;
230b1839 1761 struct sockaddr_storage address;
89bddce5 1762 int err, err2;
6cb153ca
BL
1763 int fput_needed;
1764
253eacc0
LT
1765 if (size > INT_MAX)
1766 size = INT_MAX;
de0fa95c 1767 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1768 if (!sock)
de0fa95c 1769 goto out;
1da177e4 1770
89bddce5
SH
1771 msg.msg_control = NULL;
1772 msg.msg_controllen = 0;
1773 msg.msg_iovlen = 1;
1774 msg.msg_iov = &iov;
1775 iov.iov_len = size;
1776 iov.iov_base = ubuf;
230b1839
YH
1777 msg.msg_name = (struct sockaddr *)&address;
1778 msg.msg_namelen = sizeof(address);
1da177e4
LT
1779 if (sock->file->f_flags & O_NONBLOCK)
1780 flags |= MSG_DONTWAIT;
89bddce5 1781 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1782
89bddce5 1783 if (err >= 0 && addr != NULL) {
43db362d 1784 err2 = move_addr_to_user(&address,
230b1839 1785 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1786 if (err2 < 0)
1787 err = err2;
1da177e4 1788 }
de0fa95c
PE
1789
1790 fput_light(sock->file, fput_needed);
4387ff75 1791out:
1da177e4
LT
1792 return err;
1793}
1794
1795/*
89bddce5 1796 * Receive a datagram from a socket.
1da177e4
LT
1797 */
1798
89bddce5 1799asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
95c96174 1800 unsigned int flags)
1da177e4
LT
1801{
1802 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1803}
1804
1805/*
1806 * Set a socket option. Because we don't know the option lengths we have
1807 * to pass the user mode parameter for the protocols to sort out.
1808 */
1809
20f37034
HC
1810SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1811 char __user *, optval, int, optlen)
1da177e4 1812{
6cb153ca 1813 int err, fput_needed;
1da177e4
LT
1814 struct socket *sock;
1815
1816 if (optlen < 0)
1817 return -EINVAL;
89bddce5
SH
1818
1819 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1820 if (sock != NULL) {
1821 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1822 if (err)
1823 goto out_put;
1da177e4
LT
1824
1825 if (level == SOL_SOCKET)
89bddce5
SH
1826 err =
1827 sock_setsockopt(sock, level, optname, optval,
1828 optlen);
1da177e4 1829 else
89bddce5
SH
1830 err =
1831 sock->ops->setsockopt(sock, level, optname, optval,
1832 optlen);
6cb153ca
BL
1833out_put:
1834 fput_light(sock->file, fput_needed);
1da177e4
LT
1835 }
1836 return err;
1837}
1838
1839/*
1840 * Get a socket option. Because we don't know the option lengths we have
1841 * to pass a user mode parameter for the protocols to sort out.
1842 */
1843
20f37034
HC
1844SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1845 char __user *, optval, int __user *, optlen)
1da177e4 1846{
6cb153ca 1847 int err, fput_needed;
1da177e4
LT
1848 struct socket *sock;
1849
89bddce5
SH
1850 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1851 if (sock != NULL) {
6cb153ca
BL
1852 err = security_socket_getsockopt(sock, level, optname);
1853 if (err)
1854 goto out_put;
1da177e4
LT
1855
1856 if (level == SOL_SOCKET)
89bddce5
SH
1857 err =
1858 sock_getsockopt(sock, level, optname, optval,
1859 optlen);
1da177e4 1860 else
89bddce5
SH
1861 err =
1862 sock->ops->getsockopt(sock, level, optname, optval,
1863 optlen);
6cb153ca
BL
1864out_put:
1865 fput_light(sock->file, fput_needed);
1da177e4
LT
1866 }
1867 return err;
1868}
1869
1da177e4
LT
1870/*
1871 * Shutdown a socket.
1872 */
1873
754fe8d2 1874SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1875{
6cb153ca 1876 int err, fput_needed;
1da177e4
LT
1877 struct socket *sock;
1878
89bddce5
SH
1879 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1880 if (sock != NULL) {
1da177e4 1881 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1882 if (!err)
1883 err = sock->ops->shutdown(sock, how);
1884 fput_light(sock->file, fput_needed);
1da177e4
LT
1885 }
1886 return err;
1887}
1888
89bddce5 1889/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1890 * fields which are the same type (int / unsigned) on our platforms.
1891 */
1892#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1893#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1894#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1895
c71d8ebe
TH
1896struct used_address {
1897 struct sockaddr_storage name;
1898 unsigned int name_len;
1899};
1900
228e548e 1901static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
95c96174 1902 struct msghdr *msg_sys, unsigned int flags,
c71d8ebe 1903 struct used_address *used_address)
1da177e4 1904{
89bddce5
SH
1905 struct compat_msghdr __user *msg_compat =
1906 (struct compat_msghdr __user *)msg;
230b1839 1907 struct sockaddr_storage address;
1da177e4 1908 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1909 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1910 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1911 /* 20 is size of ipv6_pktinfo */
1da177e4 1912 unsigned char *ctl_buf = ctl;
a74e9106 1913 int err, ctl_len, total_len;
89bddce5 1914
1da177e4
LT
1915 err = -EFAULT;
1916 if (MSG_CMSG_COMPAT & flags) {
228e548e 1917 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 1918 return -EFAULT;
228e548e 1919 } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1920 return -EFAULT;
1921
228e548e 1922 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
a74e9106
ED
1923 err = -EMSGSIZE;
1924 if (msg_sys->msg_iovlen > UIO_MAXIOV)
1925 goto out;
1926 err = -ENOMEM;
1927 iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec),
1928 GFP_KERNEL);
1da177e4 1929 if (!iov)
228e548e 1930 goto out;
1da177e4
LT
1931 }
1932
1933 /* This will also move the address data into kernel space */
1934 if (MSG_CMSG_COMPAT & flags) {
43db362d 1935 err = verify_compat_iovec(msg_sys, iov, &address, VERIFY_READ);
1da177e4 1936 } else
43db362d 1937 err = verify_iovec(msg_sys, iov, &address, VERIFY_READ);
89bddce5 1938 if (err < 0)
1da177e4
LT
1939 goto out_freeiov;
1940 total_len = err;
1941
1942 err = -ENOBUFS;
1943
228e548e 1944 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 1945 goto out_freeiov;
228e548e 1946 ctl_len = msg_sys->msg_controllen;
1da177e4 1947 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 1948 err =
228e548e 1949 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 1950 sizeof(ctl));
1da177e4
LT
1951 if (err)
1952 goto out_freeiov;
228e548e
AB
1953 ctl_buf = msg_sys->msg_control;
1954 ctl_len = msg_sys->msg_controllen;
1da177e4 1955 } else if (ctl_len) {
89bddce5 1956 if (ctl_len > sizeof(ctl)) {
1da177e4 1957 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1958 if (ctl_buf == NULL)
1da177e4
LT
1959 goto out_freeiov;
1960 }
1961 err = -EFAULT;
1962 /*
228e548e 1963 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
1964 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1965 * checking falls down on this.
1966 */
fb8621bb 1967 if (copy_from_user(ctl_buf,
228e548e 1968 (void __user __force *)msg_sys->msg_control,
89bddce5 1969 ctl_len))
1da177e4 1970 goto out_freectl;
228e548e 1971 msg_sys->msg_control = ctl_buf;
1da177e4 1972 }
228e548e 1973 msg_sys->msg_flags = flags;
1da177e4
LT
1974
1975 if (sock->file->f_flags & O_NONBLOCK)
228e548e 1976 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
1977 /*
1978 * If this is sendmmsg() and current destination address is same as
1979 * previously succeeded address, omit asking LSM's decision.
1980 * used_address->name_len is initialized to UINT_MAX so that the first
1981 * destination address never matches.
1982 */
bc909d9d
MD
1983 if (used_address && msg_sys->msg_name &&
1984 used_address->name_len == msg_sys->msg_namelen &&
1985 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe
TH
1986 used_address->name_len)) {
1987 err = sock_sendmsg_nosec(sock, msg_sys, total_len);
1988 goto out_freectl;
1989 }
1990 err = sock_sendmsg(sock, msg_sys, total_len);
1991 /*
1992 * If this is sendmmsg() and sending to current destination address was
1993 * successful, remember it.
1994 */
1995 if (used_address && err >= 0) {
1996 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
1997 if (msg_sys->msg_name)
1998 memcpy(&used_address->name, msg_sys->msg_name,
1999 used_address->name_len);
c71d8ebe 2000 }
1da177e4
LT
2001
2002out_freectl:
89bddce5 2003 if (ctl_buf != ctl)
1da177e4
LT
2004 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2005out_freeiov:
2006 if (iov != iovstack)
a74e9106 2007 kfree(iov);
228e548e
AB
2008out:
2009 return err;
2010}
2011
2012/*
2013 * BSD sendmsg interface
2014 */
2015
95c96174 2016SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned int, flags)
228e548e
AB
2017{
2018 int fput_needed, err;
2019 struct msghdr msg_sys;
2020 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
2021
2022 if (!sock)
2023 goto out;
2024
c71d8ebe 2025 err = __sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
228e548e 2026
6cb153ca 2027 fput_light(sock->file, fput_needed);
89bddce5 2028out:
1da177e4
LT
2029 return err;
2030}
2031
228e548e
AB
2032/*
2033 * Linux sendmmsg interface
2034 */
2035
2036int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2037 unsigned int flags)
2038{
2039 int fput_needed, err, datagrams;
2040 struct socket *sock;
2041 struct mmsghdr __user *entry;
2042 struct compat_mmsghdr __user *compat_entry;
2043 struct msghdr msg_sys;
c71d8ebe 2044 struct used_address used_address;
228e548e 2045
98382f41
AB
2046 if (vlen > UIO_MAXIOV)
2047 vlen = UIO_MAXIOV;
228e548e
AB
2048
2049 datagrams = 0;
2050
2051 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2052 if (!sock)
2053 return err;
2054
c71d8ebe 2055 used_address.name_len = UINT_MAX;
228e548e
AB
2056 entry = mmsg;
2057 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2058 err = 0;
228e548e
AB
2059
2060 while (datagrams < vlen) {
228e548e
AB
2061 if (MSG_CMSG_COMPAT & flags) {
2062 err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
c71d8ebe 2063 &msg_sys, flags, &used_address);
228e548e
AB
2064 if (err < 0)
2065 break;
2066 err = __put_user(err, &compat_entry->msg_len);
2067 ++compat_entry;
2068 } else {
2069 err = __sys_sendmsg(sock, (struct msghdr __user *)entry,
c71d8ebe 2070 &msg_sys, flags, &used_address);
228e548e
AB
2071 if (err < 0)
2072 break;
2073 err = put_user(err, &entry->msg_len);
2074 ++entry;
2075 }
2076
2077 if (err)
2078 break;
2079 ++datagrams;
2080 }
2081
228e548e
AB
2082 fput_light(sock->file, fput_needed);
2083
728ffb86
AB
2084 /* We only return an error if no datagrams were able to be sent */
2085 if (datagrams != 0)
228e548e
AB
2086 return datagrams;
2087
228e548e
AB
2088 return err;
2089}
2090
2091SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2092 unsigned int, vlen, unsigned int, flags)
2093{
2094 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2095}
2096
a2e27255 2097static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
95c96174 2098 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2099{
89bddce5
SH
2100 struct compat_msghdr __user *msg_compat =
2101 (struct compat_msghdr __user *)msg;
1da177e4 2102 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2103 struct iovec *iov = iovstack;
1da177e4 2104 unsigned long cmsg_ptr;
a74e9106 2105 int err, total_len, len;
1da177e4
LT
2106
2107 /* kernel mode address */
230b1839 2108 struct sockaddr_storage addr;
1da177e4
LT
2109
2110 /* user mode address pointers */
2111 struct sockaddr __user *uaddr;
2112 int __user *uaddr_len;
89bddce5 2113
1da177e4 2114 if (MSG_CMSG_COMPAT & flags) {
a2e27255 2115 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 2116 return -EFAULT;
c6d409cf 2117 } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
89bddce5 2118 return -EFAULT;
1da177e4 2119
a2e27255 2120 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
a74e9106
ED
2121 err = -EMSGSIZE;
2122 if (msg_sys->msg_iovlen > UIO_MAXIOV)
2123 goto out;
2124 err = -ENOMEM;
2125 iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec),
2126 GFP_KERNEL);
1da177e4 2127 if (!iov)
a2e27255 2128 goto out;
1da177e4
LT
2129 }
2130
2131 /*
89bddce5
SH
2132 * Save the user-mode address (verify_iovec will change the
2133 * kernel msghdr to use the kernel address space)
1da177e4 2134 */
89bddce5 2135
a2e27255 2136 uaddr = (__force void __user *)msg_sys->msg_name;
1da177e4
LT
2137 uaddr_len = COMPAT_NAMELEN(msg);
2138 if (MSG_CMSG_COMPAT & flags) {
43db362d 2139 err = verify_compat_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
1da177e4 2140 } else
43db362d 2141 err = verify_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
1da177e4
LT
2142 if (err < 0)
2143 goto out_freeiov;
89bddce5 2144 total_len = err;
1da177e4 2145
a2e27255
ACM
2146 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2147 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2148
1da177e4
LT
2149 if (sock->file->f_flags & O_NONBLOCK)
2150 flags |= MSG_DONTWAIT;
a2e27255
ACM
2151 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2152 total_len, flags);
1da177e4
LT
2153 if (err < 0)
2154 goto out_freeiov;
2155 len = err;
2156
2157 if (uaddr != NULL) {
43db362d 2158 err = move_addr_to_user(&addr,
a2e27255 2159 msg_sys->msg_namelen, uaddr,
89bddce5 2160 uaddr_len);
1da177e4
LT
2161 if (err < 0)
2162 goto out_freeiov;
2163 }
a2e27255 2164 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2165 COMPAT_FLAGS(msg));
1da177e4
LT
2166 if (err)
2167 goto out_freeiov;
2168 if (MSG_CMSG_COMPAT & flags)
a2e27255 2169 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2170 &msg_compat->msg_controllen);
2171 else
a2e27255 2172 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2173 &msg->msg_controllen);
2174 if (err)
2175 goto out_freeiov;
2176 err = len;
2177
2178out_freeiov:
2179 if (iov != iovstack)
a74e9106 2180 kfree(iov);
a2e27255
ACM
2181out:
2182 return err;
2183}
2184
2185/*
2186 * BSD recvmsg interface
2187 */
2188
2189SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
2190 unsigned int, flags)
2191{
2192 int fput_needed, err;
2193 struct msghdr msg_sys;
2194 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
2195
2196 if (!sock)
2197 goto out;
2198
2199 err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0);
2200
6cb153ca 2201 fput_light(sock->file, fput_needed);
1da177e4
LT
2202out:
2203 return err;
2204}
2205
a2e27255
ACM
2206/*
2207 * Linux recvmmsg interface
2208 */
2209
2210int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2211 unsigned int flags, struct timespec *timeout)
2212{
2213 int fput_needed, err, datagrams;
2214 struct socket *sock;
2215 struct mmsghdr __user *entry;
d7256d0e 2216 struct compat_mmsghdr __user *compat_entry;
a2e27255
ACM
2217 struct msghdr msg_sys;
2218 struct timespec end_time;
2219
2220 if (timeout &&
2221 poll_select_set_timeout(&end_time, timeout->tv_sec,
2222 timeout->tv_nsec))
2223 return -EINVAL;
2224
2225 datagrams = 0;
2226
2227 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2228 if (!sock)
2229 return err;
2230
2231 err = sock_error(sock->sk);
2232 if (err)
2233 goto out_put;
2234
2235 entry = mmsg;
d7256d0e 2236 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2237
2238 while (datagrams < vlen) {
2239 /*
2240 * No need to ask LSM for more than the first datagram.
2241 */
d7256d0e
JMG
2242 if (MSG_CMSG_COMPAT & flags) {
2243 err = __sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
b9eb8b87
AB
2244 &msg_sys, flags & ~MSG_WAITFORONE,
2245 datagrams);
d7256d0e
JMG
2246 if (err < 0)
2247 break;
2248 err = __put_user(err, &compat_entry->msg_len);
2249 ++compat_entry;
2250 } else {
2251 err = __sys_recvmsg(sock, (struct msghdr __user *)entry,
b9eb8b87
AB
2252 &msg_sys, flags & ~MSG_WAITFORONE,
2253 datagrams);
d7256d0e
JMG
2254 if (err < 0)
2255 break;
2256 err = put_user(err, &entry->msg_len);
2257 ++entry;
2258 }
2259
a2e27255
ACM
2260 if (err)
2261 break;
a2e27255
ACM
2262 ++datagrams;
2263
71c5c159
BB
2264 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2265 if (flags & MSG_WAITFORONE)
2266 flags |= MSG_DONTWAIT;
2267
a2e27255
ACM
2268 if (timeout) {
2269 ktime_get_ts(timeout);
2270 *timeout = timespec_sub(end_time, *timeout);
2271 if (timeout->tv_sec < 0) {
2272 timeout->tv_sec = timeout->tv_nsec = 0;
2273 break;
2274 }
2275
2276 /* Timeout, return less than vlen datagrams */
2277 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2278 break;
2279 }
2280
2281 /* Out of band data, return right away */
2282 if (msg_sys.msg_flags & MSG_OOB)
2283 break;
2284 }
2285
2286out_put:
2287 fput_light(sock->file, fput_needed);
1da177e4 2288
a2e27255
ACM
2289 if (err == 0)
2290 return datagrams;
2291
2292 if (datagrams != 0) {
2293 /*
2294 * We may return less entries than requested (vlen) if the
2295 * sock is non block and there aren't enough datagrams...
2296 */
2297 if (err != -EAGAIN) {
2298 /*
2299 * ... or if recvmsg returns an error after we
2300 * received some datagrams, where we record the
2301 * error to return on the next call or if the
2302 * app asks about it using getsockopt(SO_ERROR).
2303 */
2304 sock->sk->sk_err = -err;
2305 }
2306
2307 return datagrams;
2308 }
2309
2310 return err;
2311}
2312
2313SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2314 unsigned int, vlen, unsigned int, flags,
2315 struct timespec __user *, timeout)
2316{
2317 int datagrams;
2318 struct timespec timeout_sys;
2319
2320 if (!timeout)
2321 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2322
2323 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2324 return -EFAULT;
2325
2326 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2327
2328 if (datagrams > 0 &&
2329 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2330 datagrams = -EFAULT;
2331
2332 return datagrams;
2333}
2334
2335#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2336/* Argument list sizes for sys_socketcall */
2337#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2338static const unsigned char nargs[21] = {
c6d409cf
ED
2339 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2340 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2341 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2342 AL(4), AL(5), AL(4)
89bddce5
SH
2343};
2344
1da177e4
LT
2345#undef AL
2346
2347/*
89bddce5 2348 * System call vectors.
1da177e4
LT
2349 *
2350 * Argument checking cleaned up. Saved 20% in size.
2351 * This function doesn't need to set the kernel lock because
89bddce5 2352 * it is set by the callees.
1da177e4
LT
2353 */
2354
3e0fa65f 2355SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4
LT
2356{
2357 unsigned long a[6];
89bddce5 2358 unsigned long a0, a1;
1da177e4 2359 int err;
47379052 2360 unsigned int len;
1da177e4 2361
228e548e 2362 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2363 return -EINVAL;
2364
47379052
AV
2365 len = nargs[call];
2366 if (len > sizeof(a))
2367 return -EINVAL;
2368
1da177e4 2369 /* copy_from_user should be SMP safe. */
47379052 2370 if (copy_from_user(a, args, len))
1da177e4 2371 return -EFAULT;
3ec3b2fb 2372
f3298dc4 2373 audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb 2374
89bddce5
SH
2375 a0 = a[0];
2376 a1 = a[1];
2377
2378 switch (call) {
2379 case SYS_SOCKET:
2380 err = sys_socket(a0, a1, a[2]);
2381 break;
2382 case SYS_BIND:
2383 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2384 break;
2385 case SYS_CONNECT:
2386 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2387 break;
2388 case SYS_LISTEN:
2389 err = sys_listen(a0, a1);
2390 break;
2391 case SYS_ACCEPT:
de11defe
UD
2392 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2393 (int __user *)a[2], 0);
89bddce5
SH
2394 break;
2395 case SYS_GETSOCKNAME:
2396 err =
2397 sys_getsockname(a0, (struct sockaddr __user *)a1,
2398 (int __user *)a[2]);
2399 break;
2400 case SYS_GETPEERNAME:
2401 err =
2402 sys_getpeername(a0, (struct sockaddr __user *)a1,
2403 (int __user *)a[2]);
2404 break;
2405 case SYS_SOCKETPAIR:
2406 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2407 break;
2408 case SYS_SEND:
2409 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2410 break;
2411 case SYS_SENDTO:
2412 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2413 (struct sockaddr __user *)a[4], a[5]);
2414 break;
2415 case SYS_RECV:
2416 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2417 break;
2418 case SYS_RECVFROM:
2419 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2420 (struct sockaddr __user *)a[4],
2421 (int __user *)a[5]);
2422 break;
2423 case SYS_SHUTDOWN:
2424 err = sys_shutdown(a0, a1);
2425 break;
2426 case SYS_SETSOCKOPT:
2427 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2428 break;
2429 case SYS_GETSOCKOPT:
2430 err =
2431 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2432 (int __user *)a[4]);
2433 break;
2434 case SYS_SENDMSG:
2435 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2436 break;
228e548e
AB
2437 case SYS_SENDMMSG:
2438 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2439 break;
89bddce5
SH
2440 case SYS_RECVMSG:
2441 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2442 break;
a2e27255
ACM
2443 case SYS_RECVMMSG:
2444 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2445 (struct timespec __user *)a[4]);
2446 break;
de11defe
UD
2447 case SYS_ACCEPT4:
2448 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2449 (int __user *)a[2], a[3]);
aaca0bdc 2450 break;
89bddce5
SH
2451 default:
2452 err = -EINVAL;
2453 break;
1da177e4
LT
2454 }
2455 return err;
2456}
2457
89bddce5 2458#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2459
55737fda
SH
2460/**
2461 * sock_register - add a socket protocol handler
2462 * @ops: description of protocol
2463 *
1da177e4
LT
2464 * This function is called by a protocol handler that wants to
2465 * advertise its address family, and have it linked into the
55737fda
SH
2466 * socket interface. The value ops->family coresponds to the
2467 * socket system call protocol family.
1da177e4 2468 */
f0fd27d4 2469int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2470{
2471 int err;
2472
2473 if (ops->family >= NPROTO) {
89bddce5
SH
2474 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2475 NPROTO);
1da177e4
LT
2476 return -ENOBUFS;
2477 }
55737fda
SH
2478
2479 spin_lock(&net_family_lock);
190683a9
ED
2480 if (rcu_dereference_protected(net_families[ops->family],
2481 lockdep_is_held(&net_family_lock)))
55737fda
SH
2482 err = -EEXIST;
2483 else {
cf778b00 2484 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2485 err = 0;
2486 }
55737fda
SH
2487 spin_unlock(&net_family_lock);
2488
89bddce5 2489 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2490 return err;
2491}
c6d409cf 2492EXPORT_SYMBOL(sock_register);
1da177e4 2493
55737fda
SH
2494/**
2495 * sock_unregister - remove a protocol handler
2496 * @family: protocol family to remove
2497 *
1da177e4
LT
2498 * This function is called by a protocol handler that wants to
2499 * remove its address family, and have it unlinked from the
55737fda
SH
2500 * new socket creation.
2501 *
2502 * If protocol handler is a module, then it can use module reference
2503 * counts to protect against new references. If protocol handler is not
2504 * a module then it needs to provide its own protection in
2505 * the ops->create routine.
1da177e4 2506 */
f0fd27d4 2507void sock_unregister(int family)
1da177e4 2508{
f0fd27d4 2509 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2510
55737fda 2511 spin_lock(&net_family_lock);
a9b3cd7f 2512 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2513 spin_unlock(&net_family_lock);
2514
2515 synchronize_rcu();
2516
89bddce5 2517 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4 2518}
c6d409cf 2519EXPORT_SYMBOL(sock_unregister);
1da177e4 2520
77d76ea3 2521static int __init sock_init(void)
1da177e4 2522{
b3e19d92 2523 int err;
2ca794e5
EB
2524 /*
2525 * Initialize the network sysctl infrastructure.
2526 */
2527 err = net_sysctl_init();
2528 if (err)
2529 goto out;
b3e19d92 2530
1da177e4 2531 /*
89bddce5 2532 * Initialize sock SLAB cache.
1da177e4 2533 */
89bddce5 2534
1da177e4
LT
2535 sk_init();
2536
1da177e4 2537 /*
89bddce5 2538 * Initialize skbuff SLAB cache
1da177e4
LT
2539 */
2540 skb_init();
1da177e4
LT
2541
2542 /*
89bddce5 2543 * Initialize the protocols module.
1da177e4
LT
2544 */
2545
2546 init_inodecache();
b3e19d92
NP
2547
2548 err = register_filesystem(&sock_fs_type);
2549 if (err)
2550 goto out_fs;
1da177e4 2551 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2552 if (IS_ERR(sock_mnt)) {
2553 err = PTR_ERR(sock_mnt);
2554 goto out_mount;
2555 }
77d76ea3
AK
2556
2557 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2558 */
2559
2560#ifdef CONFIG_NETFILTER
2561 netfilter_init();
2562#endif
cbeb321a 2563
c1f19b51
RC
2564#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
2565 skb_timestamping_init();
2566#endif
2567
b3e19d92
NP
2568out:
2569 return err;
2570
2571out_mount:
2572 unregister_filesystem(&sock_fs_type);
2573out_fs:
2574 goto out;
1da177e4
LT
2575}
2576
77d76ea3
AK
2577core_initcall(sock_init); /* early initcall */
2578
1da177e4
LT
2579#ifdef CONFIG_PROC_FS
2580void socket_seq_show(struct seq_file *seq)
2581{
2582 int cpu;
2583 int counter = 0;
2584
6f912042 2585 for_each_possible_cpu(cpu)
89bddce5 2586 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2587
2588 /* It can be negative, by the way. 8) */
2589 if (counter < 0)
2590 counter = 0;
2591
2592 seq_printf(seq, "sockets: used %d\n", counter);
2593}
89bddce5 2594#endif /* CONFIG_PROC_FS */
1da177e4 2595
89bbfc95 2596#ifdef CONFIG_COMPAT
6b96018b 2597static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2598 unsigned int cmd, void __user *up)
7a229387 2599{
7a229387
AB
2600 mm_segment_t old_fs = get_fs();
2601 struct timeval ktv;
2602 int err;
2603
2604 set_fs(KERNEL_DS);
6b96018b 2605 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2606 set_fs(old_fs);
644595f8
PA
2607 if (!err)
2608 err = compat_put_timeval(up, &ktv);
2609
7a229387
AB
2610 return err;
2611}
2612
6b96018b 2613static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2614 unsigned int cmd, void __user *up)
7a229387 2615{
7a229387
AB
2616 mm_segment_t old_fs = get_fs();
2617 struct timespec kts;
2618 int err;
2619
2620 set_fs(KERNEL_DS);
6b96018b 2621 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2622 set_fs(old_fs);
644595f8
PA
2623 if (!err)
2624 err = compat_put_timespec(up, &kts);
2625
7a229387
AB
2626 return err;
2627}
2628
6b96018b 2629static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2630{
2631 struct ifreq __user *uifr;
2632 int err;
2633
2634 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2635 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2636 return -EFAULT;
2637
6b96018b 2638 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2639 if (err)
2640 return err;
2641
6b96018b 2642 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2643 return -EFAULT;
2644
2645 return 0;
2646}
2647
6b96018b 2648static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2649{
6b96018b 2650 struct compat_ifconf ifc32;
7a229387
AB
2651 struct ifconf ifc;
2652 struct ifconf __user *uifc;
6b96018b 2653 struct compat_ifreq __user *ifr32;
7a229387
AB
2654 struct ifreq __user *ifr;
2655 unsigned int i, j;
2656 int err;
2657
6b96018b 2658 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2659 return -EFAULT;
2660
2661 if (ifc32.ifcbuf == 0) {
2662 ifc32.ifc_len = 0;
2663 ifc.ifc_len = 0;
2664 ifc.ifc_req = NULL;
2665 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2666 } else {
c6d409cf
ED
2667 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2668 sizeof(struct ifreq);
7a229387
AB
2669 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2670 ifc.ifc_len = len;
2671 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2672 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2673 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2674 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2675 return -EFAULT;
2676 ifr++;
2677 ifr32++;
2678 }
2679 }
2680 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2681 return -EFAULT;
2682
6b96018b 2683 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2684 if (err)
2685 return err;
2686
2687 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2688 return -EFAULT;
2689
2690 ifr = ifc.ifc_req;
2691 ifr32 = compat_ptr(ifc32.ifcbuf);
2692 for (i = 0, j = 0;
c6d409cf
ED
2693 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2694 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2695 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2696 return -EFAULT;
2697 ifr32++;
2698 ifr++;
2699 }
2700
2701 if (ifc32.ifcbuf == 0) {
2702 /* Translate from 64-bit structure multiple to
2703 * a 32-bit one.
2704 */
2705 i = ifc.ifc_len;
6b96018b 2706 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2707 ifc32.ifc_len = i;
2708 } else {
2709 ifc32.ifc_len = i;
2710 }
6b96018b 2711 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2712 return -EFAULT;
2713
2714 return 0;
2715}
2716
6b96018b 2717static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2718{
3a7da39d
BH
2719 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2720 bool convert_in = false, convert_out = false;
2721 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2722 struct ethtool_rxnfc __user *rxnfc;
7a229387 2723 struct ifreq __user *ifr;
3a7da39d
BH
2724 u32 rule_cnt = 0, actual_rule_cnt;
2725 u32 ethcmd;
7a229387 2726 u32 data;
3a7da39d 2727 int ret;
7a229387 2728
3a7da39d
BH
2729 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2730 return -EFAULT;
7a229387 2731
3a7da39d
BH
2732 compat_rxnfc = compat_ptr(data);
2733
2734 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2735 return -EFAULT;
2736
3a7da39d
BH
2737 /* Most ethtool structures are defined without padding.
2738 * Unfortunately struct ethtool_rxnfc is an exception.
2739 */
2740 switch (ethcmd) {
2741 default:
2742 break;
2743 case ETHTOOL_GRXCLSRLALL:
2744 /* Buffer size is variable */
2745 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2746 return -EFAULT;
2747 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2748 return -ENOMEM;
2749 buf_size += rule_cnt * sizeof(u32);
2750 /* fall through */
2751 case ETHTOOL_GRXRINGS:
2752 case ETHTOOL_GRXCLSRLCNT:
2753 case ETHTOOL_GRXCLSRULE:
55664f32 2754 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2755 convert_out = true;
2756 /* fall through */
2757 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2758 buf_size += sizeof(struct ethtool_rxnfc);
2759 convert_in = true;
2760 break;
2761 }
2762
2763 ifr = compat_alloc_user_space(buf_size);
2764 rxnfc = (void *)ifr + ALIGN(sizeof(struct ifreq), 8);
2765
2766 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2767 return -EFAULT;
2768
3a7da39d
BH
2769 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2770 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2771 return -EFAULT;
2772
3a7da39d 2773 if (convert_in) {
127fe533 2774 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2775 * fs.ring_cookie and at the end of fs, but nowhere else.
2776 */
127fe533
AD
2777 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2778 sizeof(compat_rxnfc->fs.m_ext) !=
2779 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2780 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2781 BUILD_BUG_ON(
2782 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2783 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2784 offsetof(struct ethtool_rxnfc, fs.location) -
2785 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2786
2787 if (copy_in_user(rxnfc, compat_rxnfc,
127fe533 2788 (void *)(&rxnfc->fs.m_ext + 1) -
3a7da39d
BH
2789 (void *)rxnfc) ||
2790 copy_in_user(&rxnfc->fs.ring_cookie,
2791 &compat_rxnfc->fs.ring_cookie,
2792 (void *)(&rxnfc->fs.location + 1) -
2793 (void *)&rxnfc->fs.ring_cookie) ||
2794 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2795 sizeof(rxnfc->rule_cnt)))
2796 return -EFAULT;
2797 }
2798
2799 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2800 if (ret)
2801 return ret;
2802
2803 if (convert_out) {
2804 if (copy_in_user(compat_rxnfc, rxnfc,
127fe533 2805 (const void *)(&rxnfc->fs.m_ext + 1) -
3a7da39d
BH
2806 (const void *)rxnfc) ||
2807 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2808 &rxnfc->fs.ring_cookie,
2809 (const void *)(&rxnfc->fs.location + 1) -
2810 (const void *)&rxnfc->fs.ring_cookie) ||
2811 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2812 sizeof(rxnfc->rule_cnt)))
2813 return -EFAULT;
2814
2815 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2816 /* As an optimisation, we only copy the actual
2817 * number of rules that the underlying
2818 * function returned. Since Mallory might
2819 * change the rule count in user memory, we
2820 * check that it is less than the rule count
2821 * originally given (as the user buffer size),
2822 * which has been range-checked.
2823 */
2824 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2825 return -EFAULT;
2826 if (actual_rule_cnt < rule_cnt)
2827 rule_cnt = actual_rule_cnt;
2828 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2829 &rxnfc->rule_locs[0],
2830 rule_cnt * sizeof(u32)))
2831 return -EFAULT;
2832 }
2833 }
2834
2835 return 0;
7a229387
AB
2836}
2837
7a50a240
AB
2838static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2839{
2840 void __user *uptr;
2841 compat_uptr_t uptr32;
2842 struct ifreq __user *uifr;
2843
c6d409cf 2844 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2845 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2846 return -EFAULT;
2847
2848 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2849 return -EFAULT;
2850
2851 uptr = compat_ptr(uptr32);
2852
2853 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2854 return -EFAULT;
2855
2856 return dev_ioctl(net, SIOCWANDEV, uifr);
2857}
2858
6b96018b
AB
2859static int bond_ioctl(struct net *net, unsigned int cmd,
2860 struct compat_ifreq __user *ifr32)
7a229387
AB
2861{
2862 struct ifreq kifr;
2863 struct ifreq __user *uifr;
7a229387
AB
2864 mm_segment_t old_fs;
2865 int err;
2866 u32 data;
2867 void __user *datap;
2868
2869 switch (cmd) {
2870 case SIOCBONDENSLAVE:
2871 case SIOCBONDRELEASE:
2872 case SIOCBONDSETHWADDR:
2873 case SIOCBONDCHANGEACTIVE:
6b96018b 2874 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2875 return -EFAULT;
2876
2877 old_fs = get_fs();
c6d409cf 2878 set_fs(KERNEL_DS);
c3f52ae6 2879 err = dev_ioctl(net, cmd,
2880 (struct ifreq __user __force *) &kifr);
c6d409cf 2881 set_fs(old_fs);
7a229387
AB
2882
2883 return err;
2884 case SIOCBONDSLAVEINFOQUERY:
2885 case SIOCBONDINFOQUERY:
2886 uifr = compat_alloc_user_space(sizeof(*uifr));
2887 if (copy_in_user(&uifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
2888 return -EFAULT;
2889
2890 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2891 return -EFAULT;
2892
2893 datap = compat_ptr(data);
2894 if (put_user(datap, &uifr->ifr_ifru.ifru_data))
2895 return -EFAULT;
2896
6b96018b 2897 return dev_ioctl(net, cmd, uifr);
7a229387 2898 default:
07d106d0 2899 return -ENOIOCTLCMD;
ccbd6a5a 2900 }
7a229387
AB
2901}
2902
6b96018b
AB
2903static int siocdevprivate_ioctl(struct net *net, unsigned int cmd,
2904 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2905{
2906 struct ifreq __user *u_ifreq64;
7a229387
AB
2907 char tmp_buf[IFNAMSIZ];
2908 void __user *data64;
2909 u32 data32;
2910
2911 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2912 IFNAMSIZ))
2913 return -EFAULT;
2914 if (__get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
2915 return -EFAULT;
2916 data64 = compat_ptr(data32);
2917
2918 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2919
2920 /* Don't check these user accesses, just let that get trapped
2921 * in the ioctl handler instead.
2922 */
2923 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2924 IFNAMSIZ))
2925 return -EFAULT;
2926 if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
2927 return -EFAULT;
2928
6b96018b 2929 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2930}
2931
6b96018b
AB
2932static int dev_ifsioc(struct net *net, struct socket *sock,
2933 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 2934{
a2116ed2 2935 struct ifreq __user *uifr;
7a229387
AB
2936 int err;
2937
a2116ed2
AB
2938 uifr = compat_alloc_user_space(sizeof(*uifr));
2939 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2940 return -EFAULT;
2941
2942 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2943
7a229387
AB
2944 if (!err) {
2945 switch (cmd) {
2946 case SIOCGIFFLAGS:
2947 case SIOCGIFMETRIC:
2948 case SIOCGIFMTU:
2949 case SIOCGIFMEM:
2950 case SIOCGIFHWADDR:
2951 case SIOCGIFINDEX:
2952 case SIOCGIFADDR:
2953 case SIOCGIFBRDADDR:
2954 case SIOCGIFDSTADDR:
2955 case SIOCGIFNETMASK:
fab2532b 2956 case SIOCGIFPFLAGS:
7a229387 2957 case SIOCGIFTXQLEN:
fab2532b
AB
2958 case SIOCGMIIPHY:
2959 case SIOCGMIIREG:
a2116ed2 2960 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
2961 err = -EFAULT;
2962 break;
2963 }
2964 }
2965 return err;
2966}
2967
a2116ed2
AB
2968static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2969 struct compat_ifreq __user *uifr32)
2970{
2971 struct ifreq ifr;
2972 struct compat_ifmap __user *uifmap32;
2973 mm_segment_t old_fs;
2974 int err;
2975
2976 uifmap32 = &uifr32->ifr_ifru.ifru_map;
2977 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
2978 err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2979 err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2980 err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2981 err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq);
2982 err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma);
2983 err |= __get_user(ifr.ifr_map.port, &uifmap32->port);
2984 if (err)
2985 return -EFAULT;
2986
2987 old_fs = get_fs();
c6d409cf 2988 set_fs(KERNEL_DS);
c3f52ae6 2989 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 2990 set_fs(old_fs);
a2116ed2
AB
2991
2992 if (cmd == SIOCGIFMAP && !err) {
2993 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
2994 err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2995 err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2996 err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2997 err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq);
2998 err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma);
2999 err |= __put_user(ifr.ifr_map.port, &uifmap32->port);
3000 if (err)
3001 err = -EFAULT;
3002 }
3003 return err;
3004}
3005
3006static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uifr32)
3007{
3008 void __user *uptr;
3009 compat_uptr_t uptr32;
3010 struct ifreq __user *uifr;
3011
c6d409cf 3012 uifr = compat_alloc_user_space(sizeof(*uifr));
a2116ed2
AB
3013 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
3014 return -EFAULT;
3015
3016 if (get_user(uptr32, &uifr32->ifr_data))
3017 return -EFAULT;
3018
3019 uptr = compat_ptr(uptr32);
3020
3021 if (put_user(uptr, &uifr->ifr_data))
3022 return -EFAULT;
3023
3024 return dev_ioctl(net, SIOCSHWTSTAMP, uifr);
3025}
3026
7a229387 3027struct rtentry32 {
c6d409cf 3028 u32 rt_pad1;
7a229387
AB
3029 struct sockaddr rt_dst; /* target address */
3030 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3031 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3032 unsigned short rt_flags;
3033 short rt_pad2;
3034 u32 rt_pad3;
3035 unsigned char rt_tos;
3036 unsigned char rt_class;
3037 short rt_pad4;
3038 short rt_metric; /* +1 for binary compatibility! */
7a229387 3039 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3040 u32 rt_mtu; /* per route MTU/Window */
3041 u32 rt_window; /* Window clamping */
7a229387
AB
3042 unsigned short rt_irtt; /* Initial RTT */
3043};
3044
3045struct in6_rtmsg32 {
3046 struct in6_addr rtmsg_dst;
3047 struct in6_addr rtmsg_src;
3048 struct in6_addr rtmsg_gateway;
3049 u32 rtmsg_type;
3050 u16 rtmsg_dst_len;
3051 u16 rtmsg_src_len;
3052 u32 rtmsg_metric;
3053 u32 rtmsg_info;
3054 u32 rtmsg_flags;
3055 s32 rtmsg_ifindex;
3056};
3057
6b96018b
AB
3058static int routing_ioctl(struct net *net, struct socket *sock,
3059 unsigned int cmd, void __user *argp)
7a229387
AB
3060{
3061 int ret;
3062 void *r = NULL;
3063 struct in6_rtmsg r6;
3064 struct rtentry r4;
3065 char devname[16];
3066 u32 rtdev;
3067 mm_segment_t old_fs = get_fs();
3068
6b96018b
AB
3069 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3070 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3071 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3072 3 * sizeof(struct in6_addr));
c6d409cf
ED
3073 ret |= __get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3074 ret |= __get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3075 ret |= __get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3076 ret |= __get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3077 ret |= __get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3078 ret |= __get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3079 ret |= __get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3080
3081 r = (void *) &r6;
3082 } else { /* ipv4 */
6b96018b 3083 struct rtentry32 __user *ur4 = argp;
c6d409cf 3084 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3085 3 * sizeof(struct sockaddr));
c6d409cf
ED
3086 ret |= __get_user(r4.rt_flags, &(ur4->rt_flags));
3087 ret |= __get_user(r4.rt_metric, &(ur4->rt_metric));
3088 ret |= __get_user(r4.rt_mtu, &(ur4->rt_mtu));
3089 ret |= __get_user(r4.rt_window, &(ur4->rt_window));
3090 ret |= __get_user(r4.rt_irtt, &(ur4->rt_irtt));
3091 ret |= __get_user(rtdev, &(ur4->rt_dev));
7a229387 3092 if (rtdev) {
c6d409cf 3093 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3094 r4.rt_dev = (char __user __force *)devname;
3095 devname[15] = 0;
7a229387
AB
3096 } else
3097 r4.rt_dev = NULL;
3098
3099 r = (void *) &r4;
3100 }
3101
3102 if (ret) {
3103 ret = -EFAULT;
3104 goto out;
3105 }
3106
c6d409cf 3107 set_fs(KERNEL_DS);
6b96018b 3108 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3109 set_fs(old_fs);
7a229387
AB
3110
3111out:
7a229387
AB
3112 return ret;
3113}
3114
3115/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3116 * for some operations; this forces use of the newer bridge-utils that
25985edc 3117 * use compatible ioctls
7a229387 3118 */
6b96018b 3119static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3120{
6b96018b 3121 compat_ulong_t tmp;
7a229387 3122
6b96018b 3123 if (get_user(tmp, argp))
7a229387
AB
3124 return -EFAULT;
3125 if (tmp == BRCTL_GET_VERSION)
3126 return BRCTL_VERSION + 1;
3127 return -EINVAL;
3128}
3129
6b96018b
AB
3130static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3131 unsigned int cmd, unsigned long arg)
3132{
3133 void __user *argp = compat_ptr(arg);
3134 struct sock *sk = sock->sk;
3135 struct net *net = sock_net(sk);
7a229387 3136
6b96018b
AB
3137 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
3138 return siocdevprivate_ioctl(net, cmd, argp);
3139
3140 switch (cmd) {
3141 case SIOCSIFBR:
3142 case SIOCGIFBR:
3143 return old_bridge_ioctl(argp);
3144 case SIOCGIFNAME:
3145 return dev_ifname32(net, argp);
3146 case SIOCGIFCONF:
3147 return dev_ifconf(net, argp);
3148 case SIOCETHTOOL:
3149 return ethtool_ioctl(net, argp);
7a50a240
AB
3150 case SIOCWANDEV:
3151 return compat_siocwandev(net, argp);
a2116ed2
AB
3152 case SIOCGIFMAP:
3153 case SIOCSIFMAP:
3154 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3155 case SIOCBONDENSLAVE:
3156 case SIOCBONDRELEASE:
3157 case SIOCBONDSETHWADDR:
3158 case SIOCBONDSLAVEINFOQUERY:
3159 case SIOCBONDINFOQUERY:
3160 case SIOCBONDCHANGEACTIVE:
3161 return bond_ioctl(net, cmd, argp);
3162 case SIOCADDRT:
3163 case SIOCDELRT:
3164 return routing_ioctl(net, sock, cmd, argp);
3165 case SIOCGSTAMP:
3166 return do_siocgstamp(net, sock, cmd, argp);
3167 case SIOCGSTAMPNS:
3168 return do_siocgstampns(net, sock, cmd, argp);
a2116ed2
AB
3169 case SIOCSHWTSTAMP:
3170 return compat_siocshwtstamp(net, argp);
6b96018b
AB
3171
3172 case FIOSETOWN:
3173 case SIOCSPGRP:
3174 case FIOGETOWN:
3175 case SIOCGPGRP:
3176 case SIOCBRADDBR:
3177 case SIOCBRDELBR:
3178 case SIOCGIFVLAN:
3179 case SIOCSIFVLAN:
3180 case SIOCADDDLCI:
3181 case SIOCDELDLCI:
3182 return sock_ioctl(file, cmd, arg);
3183
3184 case SIOCGIFFLAGS:
3185 case SIOCSIFFLAGS:
3186 case SIOCGIFMETRIC:
3187 case SIOCSIFMETRIC:
3188 case SIOCGIFMTU:
3189 case SIOCSIFMTU:
3190 case SIOCGIFMEM:
3191 case SIOCSIFMEM:
3192 case SIOCGIFHWADDR:
3193 case SIOCSIFHWADDR:
3194 case SIOCADDMULTI:
3195 case SIOCDELMULTI:
3196 case SIOCGIFINDEX:
6b96018b
AB
3197 case SIOCGIFADDR:
3198 case SIOCSIFADDR:
3199 case SIOCSIFHWBROADCAST:
6b96018b 3200 case SIOCDIFADDR:
6b96018b
AB
3201 case SIOCGIFBRDADDR:
3202 case SIOCSIFBRDADDR:
3203 case SIOCGIFDSTADDR:
3204 case SIOCSIFDSTADDR:
3205 case SIOCGIFNETMASK:
3206 case SIOCSIFNETMASK:
3207 case SIOCSIFPFLAGS:
3208 case SIOCGIFPFLAGS:
3209 case SIOCGIFTXQLEN:
3210 case SIOCSIFTXQLEN:
3211 case SIOCBRADDIF:
3212 case SIOCBRDELIF:
9177efd3
AB
3213 case SIOCSIFNAME:
3214 case SIOCGMIIPHY:
3215 case SIOCGMIIREG:
3216 case SIOCSMIIREG:
6b96018b 3217 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3218
6b96018b
AB
3219 case SIOCSARP:
3220 case SIOCGARP:
3221 case SIOCDARP:
6b96018b 3222 case SIOCATMARK:
9177efd3
AB
3223 return sock_do_ioctl(net, sock, cmd, arg);
3224 }
3225
6b96018b
AB
3226 return -ENOIOCTLCMD;
3227}
7a229387 3228
95c96174 3229static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3230 unsigned long arg)
89bbfc95
SP
3231{
3232 struct socket *sock = file->private_data;
3233 int ret = -ENOIOCTLCMD;
87de87d5
DM
3234 struct sock *sk;
3235 struct net *net;
3236
3237 sk = sock->sk;
3238 net = sock_net(sk);
89bbfc95
SP
3239
3240 if (sock->ops->compat_ioctl)
3241 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3242
87de87d5
DM
3243 if (ret == -ENOIOCTLCMD &&
3244 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3245 ret = compat_wext_handle_ioctl(net, cmd, arg);
3246
6b96018b
AB
3247 if (ret == -ENOIOCTLCMD)
3248 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3249
89bbfc95
SP
3250 return ret;
3251}
3252#endif
3253
ac5a488e
SS
3254int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3255{
3256 return sock->ops->bind(sock, addr, addrlen);
3257}
c6d409cf 3258EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3259
3260int kernel_listen(struct socket *sock, int backlog)
3261{
3262 return sock->ops->listen(sock, backlog);
3263}
c6d409cf 3264EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3265
3266int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3267{
3268 struct sock *sk = sock->sk;
3269 int err;
3270
3271 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3272 newsock);
3273 if (err < 0)
3274 goto done;
3275
3276 err = sock->ops->accept(sock, *newsock, flags);
3277 if (err < 0) {
3278 sock_release(*newsock);
fa8705b0 3279 *newsock = NULL;
ac5a488e
SS
3280 goto done;
3281 }
3282
3283 (*newsock)->ops = sock->ops;
1b08534e 3284 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3285
3286done:
3287 return err;
3288}
c6d409cf 3289EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3290
3291int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3292 int flags)
ac5a488e
SS
3293{
3294 return sock->ops->connect(sock, addr, addrlen, flags);
3295}
c6d409cf 3296EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3297
3298int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3299 int *addrlen)
3300{
3301 return sock->ops->getname(sock, addr, addrlen, 0);
3302}
c6d409cf 3303EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3304
3305int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3306 int *addrlen)
3307{
3308 return sock->ops->getname(sock, addr, addrlen, 1);
3309}
c6d409cf 3310EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3311
3312int kernel_getsockopt(struct socket *sock, int level, int optname,
3313 char *optval, int *optlen)
3314{
3315 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3316 char __user *uoptval;
3317 int __user *uoptlen;
ac5a488e
SS
3318 int err;
3319
fb8621bb
NK
3320 uoptval = (char __user __force *) optval;
3321 uoptlen = (int __user __force *) optlen;
3322
ac5a488e
SS
3323 set_fs(KERNEL_DS);
3324 if (level == SOL_SOCKET)
fb8621bb 3325 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3326 else
fb8621bb
NK
3327 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3328 uoptlen);
ac5a488e
SS
3329 set_fs(oldfs);
3330 return err;
3331}
c6d409cf 3332EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3333
3334int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3335 char *optval, unsigned int optlen)
ac5a488e
SS
3336{
3337 mm_segment_t oldfs = get_fs();
fb8621bb 3338 char __user *uoptval;
ac5a488e
SS
3339 int err;
3340
fb8621bb
NK
3341 uoptval = (char __user __force *) optval;
3342
ac5a488e
SS
3343 set_fs(KERNEL_DS);
3344 if (level == SOL_SOCKET)
fb8621bb 3345 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3346 else
fb8621bb 3347 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3348 optlen);
3349 set_fs(oldfs);
3350 return err;
3351}
c6d409cf 3352EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3353
3354int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3355 size_t size, int flags)
3356{
f8451725
HX
3357 sock_update_classid(sock->sk);
3358
ac5a488e
SS
3359 if (sock->ops->sendpage)
3360 return sock->ops->sendpage(sock, page, offset, size, flags);
3361
3362 return sock_no_sendpage(sock, page, offset, size, flags);
3363}
c6d409cf 3364EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3365
3366int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3367{
3368 mm_segment_t oldfs = get_fs();
3369 int err;
3370
3371 set_fs(KERNEL_DS);
3372 err = sock->ops->ioctl(sock, cmd, arg);
3373 set_fs(oldfs);
3374
3375 return err;
3376}
c6d409cf 3377EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3378
91cf45f0
TM
3379int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3380{
3381 return sock->ops->shutdown(sock, how);
3382}
91cf45f0 3383EXPORT_SYMBOL(kernel_sock_shutdown);