Linux 3.2
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4
LT
72#include <linux/wanrouter.h>
73#include <linux/if_bridge.h>
20380731
ACM
74#include <linux/if_frad.h>
75#include <linux/if_vlan.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
1da177e4
LT
91
92#include <asm/uaccess.h>
93#include <asm/unistd.h>
94
95#include <net/compat.h>
87de87d5 96#include <net/wext.h>
f8451725 97#include <net/cls_cgroup.h>
1da177e4
LT
98
99#include <net/sock.h>
100#include <linux/netfilter.h>
101
6b96018b
AB
102#include <linux/if_tun.h>
103#include <linux/ipv6_route.h>
104#include <linux/route.h>
6b96018b
AB
105#include <linux/sockios.h>
106#include <linux/atalk.h>
107
1da177e4 108static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
109static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
110 unsigned long nr_segs, loff_t pos);
111static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
112 unsigned long nr_segs, loff_t pos);
89bddce5 113static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
114
115static int sock_close(struct inode *inode, struct file *file);
116static unsigned int sock_poll(struct file *file,
117 struct poll_table_struct *wait);
89bddce5 118static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
119#ifdef CONFIG_COMPAT
120static long compat_sock_ioctl(struct file *file,
89bddce5 121 unsigned int cmd, unsigned long arg);
89bbfc95 122#endif
1da177e4 123static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
124static ssize_t sock_sendpage(struct file *file, struct page *page,
125 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 126static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 127 struct pipe_inode_info *pipe, size_t len,
9c55e01c 128 unsigned int flags);
1da177e4 129
1da177e4
LT
130/*
131 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
132 * in the operation structures but are done directly via the socketcall() multiplexor.
133 */
134
da7071d7 135static const struct file_operations socket_file_ops = {
1da177e4
LT
136 .owner = THIS_MODULE,
137 .llseek = no_llseek,
138 .aio_read = sock_aio_read,
139 .aio_write = sock_aio_write,
140 .poll = sock_poll,
141 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
142#ifdef CONFIG_COMPAT
143 .compat_ioctl = compat_sock_ioctl,
144#endif
1da177e4
LT
145 .mmap = sock_mmap,
146 .open = sock_no_open, /* special open code to disallow open via /proc */
147 .release = sock_close,
148 .fasync = sock_fasync,
5274f052
JA
149 .sendpage = sock_sendpage,
150 .splice_write = generic_splice_sendpage,
9c55e01c 151 .splice_read = sock_splice_read,
1da177e4
LT
152};
153
154/*
155 * The protocol list. Each protocol is registered in here.
156 */
157
1da177e4 158static DEFINE_SPINLOCK(net_family_lock);
190683a9 159static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 160
1da177e4
LT
161/*
162 * Statistics counters of the socket lists
163 */
164
c6d409cf 165static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
166
167/*
89bddce5
SH
168 * Support routines.
169 * Move socket addresses back and forth across the kernel/user
170 * divide and look after the messy bits.
1da177e4
LT
171 */
172
1da177e4
LT
173/**
174 * move_addr_to_kernel - copy a socket address into kernel space
175 * @uaddr: Address in user space
176 * @kaddr: Address in kernel space
177 * @ulen: Length in user space
178 *
179 * The address is copied into kernel space. If the provided address is
180 * too long an error code of -EINVAL is returned. If the copy gives
181 * invalid addresses -EFAULT is returned. On a success 0 is returned.
182 */
183
230b1839 184int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr)
1da177e4 185{
230b1839 186 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 187 return -EINVAL;
89bddce5 188 if (ulen == 0)
1da177e4 189 return 0;
89bddce5 190 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 191 return -EFAULT;
3ec3b2fb 192 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
193}
194
195/**
196 * move_addr_to_user - copy an address to user space
197 * @kaddr: kernel space address
198 * @klen: length of address in kernel
199 * @uaddr: user space address
200 * @ulen: pointer to user length field
201 *
202 * The value pointed to by ulen on entry is the buffer length available.
203 * This is overwritten with the buffer space used. -EINVAL is returned
204 * if an overlong buffer is specified or a negative buffer size. -EFAULT
205 * is returned if either the buffer or the length field are not
206 * accessible.
207 * After copying the data up to the limit the user specifies, the true
208 * length of the data is written over the length limit the user
209 * specified. Zero is returned for a success.
210 */
89bddce5 211
11165f14 212static int move_addr_to_user(struct sockaddr *kaddr, int klen,
213 void __user *uaddr, int __user *ulen)
1da177e4
LT
214{
215 int err;
216 int len;
217
89bddce5
SH
218 err = get_user(len, ulen);
219 if (err)
1da177e4 220 return err;
89bddce5
SH
221 if (len > klen)
222 len = klen;
230b1839 223 if (len < 0 || len > sizeof(struct sockaddr_storage))
1da177e4 224 return -EINVAL;
89bddce5 225 if (len) {
d6fe3945
SG
226 if (audit_sockaddr(klen, kaddr))
227 return -ENOMEM;
89bddce5 228 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
229 return -EFAULT;
230 }
231 /*
89bddce5
SH
232 * "fromlen shall refer to the value before truncation.."
233 * 1003.1g
1da177e4
LT
234 */
235 return __put_user(klen, ulen);
236}
237
e18b890b 238static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
239
240static struct inode *sock_alloc_inode(struct super_block *sb)
241{
242 struct socket_alloc *ei;
eaefd110 243 struct socket_wq *wq;
89bddce5 244
e94b1766 245 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
246 if (!ei)
247 return NULL;
eaefd110
ED
248 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
249 if (!wq) {
43815482
ED
250 kmem_cache_free(sock_inode_cachep, ei);
251 return NULL;
252 }
eaefd110
ED
253 init_waitqueue_head(&wq->wait);
254 wq->fasync_list = NULL;
255 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 256
1da177e4
LT
257 ei->socket.state = SS_UNCONNECTED;
258 ei->socket.flags = 0;
259 ei->socket.ops = NULL;
260 ei->socket.sk = NULL;
261 ei->socket.file = NULL;
1da177e4
LT
262
263 return &ei->vfs_inode;
264}
265
266static void sock_destroy_inode(struct inode *inode)
267{
43815482 268 struct socket_alloc *ei;
eaefd110 269 struct socket_wq *wq;
43815482
ED
270
271 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 272 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 273 kfree_rcu(wq, rcu);
43815482 274 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
275}
276
51cc5068 277static void init_once(void *foo)
1da177e4 278{
89bddce5 279 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 280
a35afb83 281 inode_init_once(&ei->vfs_inode);
1da177e4 282}
89bddce5 283
1da177e4
LT
284static int init_inodecache(void)
285{
286 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
287 sizeof(struct socket_alloc),
288 0,
289 (SLAB_HWCACHE_ALIGN |
290 SLAB_RECLAIM_ACCOUNT |
291 SLAB_MEM_SPREAD),
20c2df83 292 init_once);
1da177e4
LT
293 if (sock_inode_cachep == NULL)
294 return -ENOMEM;
295 return 0;
296}
297
b87221de 298static const struct super_operations sockfs_ops = {
c6d409cf
ED
299 .alloc_inode = sock_alloc_inode,
300 .destroy_inode = sock_destroy_inode,
301 .statfs = simple_statfs,
1da177e4
LT
302};
303
c23fbb6b
ED
304/*
305 * sockfs_dname() is called from d_path().
306 */
307static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
308{
309 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
310 dentry->d_inode->i_ino);
311}
312
3ba13d17 313static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 314 .d_dname = sockfs_dname,
1da177e4
LT
315};
316
c74a1cbb
AV
317static struct dentry *sockfs_mount(struct file_system_type *fs_type,
318 int flags, const char *dev_name, void *data)
319{
320 return mount_pseudo(fs_type, "socket:", &sockfs_ops,
321 &sockfs_dentry_operations, SOCKFS_MAGIC);
322}
323
324static struct vfsmount *sock_mnt __read_mostly;
325
326static struct file_system_type sock_fs_type = {
327 .name = "sockfs",
328 .mount = sockfs_mount,
329 .kill_sb = kill_anon_super,
330};
331
1da177e4
LT
332/*
333 * Obtains the first available file descriptor and sets it up for use.
334 *
39d8c1b6
DM
335 * These functions create file structures and maps them to fd space
336 * of the current process. On success it returns file descriptor
1da177e4
LT
337 * and file struct implicitly stored in sock->file.
338 * Note that another thread may close file descriptor before we return
339 * from this function. We use the fact that now we do not refer
340 * to socket after mapping. If one day we will need it, this
341 * function will increment ref. count on file by 1.
342 *
343 * In any case returned fd MAY BE not valid!
344 * This race condition is unavoidable
345 * with shared fd spaces, we cannot solve it inside kernel,
346 * but we take care of internal coherence yet.
347 */
348
7cbe66b6 349static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
1da177e4 350{
7cbe66b6 351 struct qstr name = { .name = "" };
2c48b9c4 352 struct path path;
7cbe66b6 353 struct file *file;
1da177e4 354 int fd;
1da177e4 355
a677a039 356 fd = get_unused_fd_flags(flags);
7cbe66b6
AV
357 if (unlikely(fd < 0))
358 return fd;
1da177e4 359
4b936885 360 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
2c48b9c4 361 if (unlikely(!path.dentry)) {
7cbe66b6 362 put_unused_fd(fd);
39d8c1b6 363 return -ENOMEM;
7cbe66b6 364 }
2c48b9c4 365 path.mnt = mntget(sock_mnt);
39d8c1b6 366
2c48b9c4 367 d_instantiate(path.dentry, SOCK_INODE(sock));
cc3808f8 368 SOCK_INODE(sock)->i_fop = &socket_file_ops;
39d8c1b6 369
2c48b9c4 370 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 371 &socket_file_ops);
cc3808f8
AV
372 if (unlikely(!file)) {
373 /* drop dentry, keep inode */
7de9c6ee 374 ihold(path.dentry->d_inode);
2c48b9c4 375 path_put(&path);
cc3808f8
AV
376 put_unused_fd(fd);
377 return -ENFILE;
378 }
379
380 sock->file = file;
77d27200 381 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6
DM
382 file->f_pos = 0;
383 file->private_data = sock;
1da177e4 384
7cbe66b6
AV
385 *f = file;
386 return fd;
39d8c1b6
DM
387}
388
a677a039 389int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
390{
391 struct file *newfile;
7cbe66b6 392 int fd = sock_alloc_file(sock, &newfile, flags);
39d8c1b6 393
7cbe66b6 394 if (likely(fd >= 0))
39d8c1b6 395 fd_install(fd, newfile);
7cbe66b6 396
1da177e4
LT
397 return fd;
398}
c6d409cf 399EXPORT_SYMBOL(sock_map_fd);
1da177e4 400
6cb153ca
BL
401static struct socket *sock_from_file(struct file *file, int *err)
402{
6cb153ca
BL
403 if (file->f_op == &socket_file_ops)
404 return file->private_data; /* set in sock_map_fd */
405
23bb80d2
ED
406 *err = -ENOTSOCK;
407 return NULL;
6cb153ca
BL
408}
409
1da177e4 410/**
c6d409cf 411 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
412 * @fd: file handle
413 * @err: pointer to an error code return
414 *
415 * The file handle passed in is locked and the socket it is bound
416 * too is returned. If an error occurs the err pointer is overwritten
417 * with a negative errno code and NULL is returned. The function checks
418 * for both invalid handles and passing a handle which is not a socket.
419 *
420 * On a success the socket object pointer is returned.
421 */
422
423struct socket *sockfd_lookup(int fd, int *err)
424{
425 struct file *file;
1da177e4
LT
426 struct socket *sock;
427
89bddce5
SH
428 file = fget(fd);
429 if (!file) {
1da177e4
LT
430 *err = -EBADF;
431 return NULL;
432 }
89bddce5 433
6cb153ca
BL
434 sock = sock_from_file(file, err);
435 if (!sock)
1da177e4 436 fput(file);
6cb153ca
BL
437 return sock;
438}
c6d409cf 439EXPORT_SYMBOL(sockfd_lookup);
1da177e4 440
6cb153ca
BL
441static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
442{
443 struct file *file;
444 struct socket *sock;
445
3672558c 446 *err = -EBADF;
6cb153ca
BL
447 file = fget_light(fd, fput_needed);
448 if (file) {
449 sock = sock_from_file(file, err);
450 if (sock)
451 return sock;
452 fput_light(file, *fput_needed);
1da177e4 453 }
6cb153ca 454 return NULL;
1da177e4
LT
455}
456
457/**
458 * sock_alloc - allocate a socket
89bddce5 459 *
1da177e4
LT
460 * Allocate a new inode and socket object. The two are bound together
461 * and initialised. The socket is then returned. If we are out of inodes
462 * NULL is returned.
463 */
464
465static struct socket *sock_alloc(void)
466{
89bddce5
SH
467 struct inode *inode;
468 struct socket *sock;
1da177e4 469
a209dfc7 470 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
471 if (!inode)
472 return NULL;
473
474 sock = SOCKET_I(inode);
475
29a020d3 476 kmemcheck_annotate_bitfield(sock, type);
85fe4025 477 inode->i_ino = get_next_ino();
89bddce5 478 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
479 inode->i_uid = current_fsuid();
480 inode->i_gid = current_fsgid();
1da177e4 481
4e69489a 482 percpu_add(sockets_in_use, 1);
1da177e4
LT
483 return sock;
484}
485
486/*
487 * In theory you can't get an open on this inode, but /proc provides
488 * a back door. Remember to keep it shut otherwise you'll let the
489 * creepy crawlies in.
490 */
89bddce5 491
1da177e4
LT
492static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
493{
494 return -ENXIO;
495}
496
4b6f5d20 497const struct file_operations bad_sock_fops = {
1da177e4
LT
498 .owner = THIS_MODULE,
499 .open = sock_no_open,
6038f373 500 .llseek = noop_llseek,
1da177e4
LT
501};
502
503/**
504 * sock_release - close a socket
505 * @sock: socket to close
506 *
507 * The socket is released from the protocol stack if it has a release
508 * callback, and the inode is then released if the socket is bound to
89bddce5 509 * an inode not a file.
1da177e4 510 */
89bddce5 511
1da177e4
LT
512void sock_release(struct socket *sock)
513{
514 if (sock->ops) {
515 struct module *owner = sock->ops->owner;
516
517 sock->ops->release(sock);
518 sock->ops = NULL;
519 module_put(owner);
520 }
521
eaefd110 522 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
1da177e4
LT
523 printk(KERN_ERR "sock_release: fasync list not empty!\n");
524
4e69489a 525 percpu_sub(sockets_in_use, 1);
1da177e4
LT
526 if (!sock->file) {
527 iput(SOCK_INODE(sock));
528 return;
529 }
89bddce5 530 sock->file = NULL;
1da177e4 531}
c6d409cf 532EXPORT_SYMBOL(sock_release);
1da177e4 533
2244d07b 534int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
20d49473 535{
2244d07b 536 *tx_flags = 0;
20d49473 537 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
2244d07b 538 *tx_flags |= SKBTX_HW_TSTAMP;
20d49473 539 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
2244d07b 540 *tx_flags |= SKBTX_SW_TSTAMP;
20d49473
PO
541 return 0;
542}
543EXPORT_SYMBOL(sock_tx_timestamp);
544
228e548e
AB
545static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
546 struct msghdr *msg, size_t size)
1da177e4
LT
547{
548 struct sock_iocb *si = kiocb_to_siocb(iocb);
1da177e4 549
f8451725
HX
550 sock_update_classid(sock->sk);
551
1da177e4
LT
552 si->sock = sock;
553 si->scm = NULL;
554 si->msg = msg;
555 si->size = size;
556
1da177e4
LT
557 return sock->ops->sendmsg(iocb, sock, msg, size);
558}
559
228e548e
AB
560static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
561 struct msghdr *msg, size_t size)
562{
563 int err = security_socket_sendmsg(sock, msg, size);
564
565 return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
566}
567
1da177e4
LT
568int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
569{
570 struct kiocb iocb;
571 struct sock_iocb siocb;
572 int ret;
573
574 init_sync_kiocb(&iocb, NULL);
575 iocb.private = &siocb;
576 ret = __sock_sendmsg(&iocb, sock, msg, size);
577 if (-EIOCBQUEUED == ret)
578 ret = wait_on_sync_kiocb(&iocb);
579 return ret;
580}
c6d409cf 581EXPORT_SYMBOL(sock_sendmsg);
1da177e4 582
894dc24c 583static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
228e548e
AB
584{
585 struct kiocb iocb;
586 struct sock_iocb siocb;
587 int ret;
588
589 init_sync_kiocb(&iocb, NULL);
590 iocb.private = &siocb;
591 ret = __sock_sendmsg_nosec(&iocb, sock, msg, size);
592 if (-EIOCBQUEUED == ret)
593 ret = wait_on_sync_kiocb(&iocb);
594 return ret;
595}
596
1da177e4
LT
597int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
598 struct kvec *vec, size_t num, size_t size)
599{
600 mm_segment_t oldfs = get_fs();
601 int result;
602
603 set_fs(KERNEL_DS);
604 /*
605 * the following is safe, since for compiler definitions of kvec and
606 * iovec are identical, yielding the same in-core layout and alignment
607 */
89bddce5 608 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
609 msg->msg_iovlen = num;
610 result = sock_sendmsg(sock, msg, size);
611 set_fs(oldfs);
612 return result;
613}
c6d409cf 614EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 615
20d49473
PO
616static int ktime2ts(ktime_t kt, struct timespec *ts)
617{
618 if (kt.tv64) {
619 *ts = ktime_to_timespec(kt);
620 return 1;
621 } else {
622 return 0;
623 }
624}
625
92f37fd2
ED
626/*
627 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
628 */
629void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
630 struct sk_buff *skb)
631{
20d49473
PO
632 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
633 struct timespec ts[3];
634 int empty = 1;
635 struct skb_shared_hwtstamps *shhwtstamps =
636 skb_hwtstamps(skb);
637
638 /* Race occurred between timestamp enabling and packet
639 receiving. Fill in the current time for now. */
640 if (need_software_tstamp && skb->tstamp.tv64 == 0)
641 __net_timestamp(skb);
642
643 if (need_software_tstamp) {
644 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
645 struct timeval tv;
646 skb_get_timestamp(skb, &tv);
647 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
648 sizeof(tv), &tv);
649 } else {
842509b8 650 skb_get_timestampns(skb, &ts[0]);
20d49473 651 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
842509b8 652 sizeof(ts[0]), &ts[0]);
20d49473
PO
653 }
654 }
655
656
657 memset(ts, 0, sizeof(ts));
658 if (skb->tstamp.tv64 &&
659 sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) {
660 skb_get_timestampns(skb, ts + 0);
661 empty = 0;
662 }
663 if (shhwtstamps) {
664 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
665 ktime2ts(shhwtstamps->syststamp, ts + 1))
666 empty = 0;
667 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
668 ktime2ts(shhwtstamps->hwtstamp, ts + 2))
669 empty = 0;
92f37fd2 670 }
20d49473
PO
671 if (!empty)
672 put_cmsg(msg, SOL_SOCKET,
673 SCM_TIMESTAMPING, sizeof(ts), &ts);
92f37fd2 674}
7c81fd8b
ACM
675EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
676
11165f14 677static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
678 struct sk_buff *skb)
3b885787
NH
679{
680 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
681 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
682 sizeof(__u32), &skb->dropcount);
683}
684
767dd033 685void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
686 struct sk_buff *skb)
687{
688 sock_recv_timestamp(msg, sk, skb);
689 sock_recv_drops(msg, sk, skb);
690}
767dd033 691EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 692
a2e27255
ACM
693static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
694 struct msghdr *msg, size_t size, int flags)
1da177e4 695{
1da177e4
LT
696 struct sock_iocb *si = kiocb_to_siocb(iocb);
697
f8451725
HX
698 sock_update_classid(sock->sk);
699
1da177e4
LT
700 si->sock = sock;
701 si->scm = NULL;
702 si->msg = msg;
703 si->size = size;
704 si->flags = flags;
705
1da177e4
LT
706 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
707}
708
a2e27255
ACM
709static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
710 struct msghdr *msg, size_t size, int flags)
711{
712 int err = security_socket_recvmsg(sock, msg, size, flags);
713
714 return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
715}
716
89bddce5 717int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
718 size_t size, int flags)
719{
720 struct kiocb iocb;
721 struct sock_iocb siocb;
722 int ret;
723
89bddce5 724 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
725 iocb.private = &siocb;
726 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
727 if (-EIOCBQUEUED == ret)
728 ret = wait_on_sync_kiocb(&iocb);
729 return ret;
730}
c6d409cf 731EXPORT_SYMBOL(sock_recvmsg);
1da177e4 732
a2e27255
ACM
733static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
734 size_t size, int flags)
735{
736 struct kiocb iocb;
737 struct sock_iocb siocb;
738 int ret;
739
740 init_sync_kiocb(&iocb, NULL);
741 iocb.private = &siocb;
742 ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
743 if (-EIOCBQUEUED == ret)
744 ret = wait_on_sync_kiocb(&iocb);
745 return ret;
746}
747
c1249c0a
ML
748/**
749 * kernel_recvmsg - Receive a message from a socket (kernel space)
750 * @sock: The socket to receive the message from
751 * @msg: Received message
752 * @vec: Input s/g array for message data
753 * @num: Size of input s/g array
754 * @size: Number of bytes to read
755 * @flags: Message flags (MSG_DONTWAIT, etc...)
756 *
757 * On return the msg structure contains the scatter/gather array passed in the
758 * vec argument. The array is modified so that it consists of the unfilled
759 * portion of the original array.
760 *
761 * The returned value is the total number of bytes received, or an error.
762 */
89bddce5
SH
763int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
764 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
765{
766 mm_segment_t oldfs = get_fs();
767 int result;
768
769 set_fs(KERNEL_DS);
770 /*
771 * the following is safe, since for compiler definitions of kvec and
772 * iovec are identical, yielding the same in-core layout and alignment
773 */
89bddce5 774 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
775 result = sock_recvmsg(sock, msg, size, flags);
776 set_fs(oldfs);
777 return result;
778}
c6d409cf 779EXPORT_SYMBOL(kernel_recvmsg);
1da177e4
LT
780
781static void sock_aio_dtor(struct kiocb *iocb)
782{
783 kfree(iocb->private);
784}
785
ce1d4d3e
CH
786static ssize_t sock_sendpage(struct file *file, struct page *page,
787 int offset, size_t size, loff_t *ppos, int more)
1da177e4 788{
1da177e4
LT
789 struct socket *sock;
790 int flags;
791
ce1d4d3e
CH
792 sock = file->private_data;
793
794 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
795 if (more)
796 flags |= MSG_MORE;
797
e6949583 798 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 799}
1da177e4 800
9c55e01c 801static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 802 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
803 unsigned int flags)
804{
805 struct socket *sock = file->private_data;
806
997b37da
RDC
807 if (unlikely(!sock->ops->splice_read))
808 return -EINVAL;
809
f8451725
HX
810 sock_update_classid(sock->sk);
811
9c55e01c
JA
812 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
813}
814
ce1d4d3e 815static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 816 struct sock_iocb *siocb)
ce1d4d3e
CH
817{
818 if (!is_sync_kiocb(iocb)) {
819 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
820 if (!siocb)
821 return NULL;
1da177e4
LT
822 iocb->ki_dtor = sock_aio_dtor;
823 }
1da177e4 824
ce1d4d3e 825 siocb->kiocb = iocb;
ce1d4d3e
CH
826 iocb->private = siocb;
827 return siocb;
1da177e4
LT
828}
829
ce1d4d3e 830static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
831 struct file *file, const struct iovec *iov,
832 unsigned long nr_segs)
ce1d4d3e
CH
833{
834 struct socket *sock = file->private_data;
835 size_t size = 0;
836 int i;
1da177e4 837
89bddce5
SH
838 for (i = 0; i < nr_segs; i++)
839 size += iov[i].iov_len;
1da177e4 840
ce1d4d3e
CH
841 msg->msg_name = NULL;
842 msg->msg_namelen = 0;
843 msg->msg_control = NULL;
844 msg->msg_controllen = 0;
89bddce5 845 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
846 msg->msg_iovlen = nr_segs;
847 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
848
849 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
850}
851
027445c3
BP
852static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
853 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
854{
855 struct sock_iocb siocb, *x;
856
1da177e4
LT
857 if (pos != 0)
858 return -ESPIPE;
027445c3
BP
859
860 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
861 return 0;
862
027445c3
BP
863
864 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
865 if (!x)
866 return -ENOMEM;
027445c3 867 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
868}
869
ce1d4d3e 870static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
871 struct file *file, const struct iovec *iov,
872 unsigned long nr_segs)
1da177e4 873{
ce1d4d3e
CH
874 struct socket *sock = file->private_data;
875 size_t size = 0;
876 int i;
1da177e4 877
89bddce5
SH
878 for (i = 0; i < nr_segs; i++)
879 size += iov[i].iov_len;
1da177e4 880
ce1d4d3e
CH
881 msg->msg_name = NULL;
882 msg->msg_namelen = 0;
883 msg->msg_control = NULL;
884 msg->msg_controllen = 0;
89bddce5 885 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
886 msg->msg_iovlen = nr_segs;
887 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
888 if (sock->type == SOCK_SEQPACKET)
889 msg->msg_flags |= MSG_EOR;
1da177e4 890
ce1d4d3e 891 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
892}
893
027445c3
BP
894static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
895 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
896{
897 struct sock_iocb siocb, *x;
1da177e4 898
ce1d4d3e
CH
899 if (pos != 0)
900 return -ESPIPE;
027445c3 901
027445c3 902 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
903 if (!x)
904 return -ENOMEM;
1da177e4 905
027445c3 906 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
907}
908
1da177e4
LT
909/*
910 * Atomic setting of ioctl hooks to avoid race
911 * with module unload.
912 */
913
4a3e2f71 914static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 915static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 916
881d966b 917void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 918{
4a3e2f71 919 mutex_lock(&br_ioctl_mutex);
1da177e4 920 br_ioctl_hook = hook;
4a3e2f71 921 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
922}
923EXPORT_SYMBOL(brioctl_set);
924
4a3e2f71 925static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 926static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 927
881d966b 928void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 929{
4a3e2f71 930 mutex_lock(&vlan_ioctl_mutex);
1da177e4 931 vlan_ioctl_hook = hook;
4a3e2f71 932 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
933}
934EXPORT_SYMBOL(vlan_ioctl_set);
935
4a3e2f71 936static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 937static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 938
89bddce5 939void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 940{
4a3e2f71 941 mutex_lock(&dlci_ioctl_mutex);
1da177e4 942 dlci_ioctl_hook = hook;
4a3e2f71 943 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
944}
945EXPORT_SYMBOL(dlci_ioctl_set);
946
6b96018b
AB
947static long sock_do_ioctl(struct net *net, struct socket *sock,
948 unsigned int cmd, unsigned long arg)
949{
950 int err;
951 void __user *argp = (void __user *)arg;
952
953 err = sock->ops->ioctl(sock, cmd, arg);
954
955 /*
956 * If this ioctl is unknown try to hand it down
957 * to the NIC driver.
958 */
959 if (err == -ENOIOCTLCMD)
960 err = dev_ioctl(net, cmd, argp);
961
962 return err;
963}
964
1da177e4
LT
965/*
966 * With an ioctl, arg may well be a user mode pointer, but we don't know
967 * what to do with it - that's up to the protocol still.
968 */
969
970static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
971{
972 struct socket *sock;
881d966b 973 struct sock *sk;
1da177e4
LT
974 void __user *argp = (void __user *)arg;
975 int pid, err;
881d966b 976 struct net *net;
1da177e4 977
b69aee04 978 sock = file->private_data;
881d966b 979 sk = sock->sk;
3b1e0a65 980 net = sock_net(sk);
1da177e4 981 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 982 err = dev_ioctl(net, cmd, argp);
1da177e4 983 } else
3d23e349 984#ifdef CONFIG_WEXT_CORE
1da177e4 985 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 986 err = dev_ioctl(net, cmd, argp);
1da177e4 987 } else
3d23e349 988#endif
89bddce5 989 switch (cmd) {
1da177e4
LT
990 case FIOSETOWN:
991 case SIOCSPGRP:
992 err = -EFAULT;
993 if (get_user(pid, (int __user *)argp))
994 break;
995 err = f_setown(sock->file, pid, 1);
996 break;
997 case FIOGETOWN:
998 case SIOCGPGRP:
609d7fa9 999 err = put_user(f_getown(sock->file),
89bddce5 1000 (int __user *)argp);
1da177e4
LT
1001 break;
1002 case SIOCGIFBR:
1003 case SIOCSIFBR:
1004 case SIOCBRADDBR:
1005 case SIOCBRDELBR:
1006 err = -ENOPKG;
1007 if (!br_ioctl_hook)
1008 request_module("bridge");
1009
4a3e2f71 1010 mutex_lock(&br_ioctl_mutex);
89bddce5 1011 if (br_ioctl_hook)
881d966b 1012 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1013 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1014 break;
1015 case SIOCGIFVLAN:
1016 case SIOCSIFVLAN:
1017 err = -ENOPKG;
1018 if (!vlan_ioctl_hook)
1019 request_module("8021q");
1020
4a3e2f71 1021 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1022 if (vlan_ioctl_hook)
881d966b 1023 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1024 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1025 break;
1da177e4
LT
1026 case SIOCADDDLCI:
1027 case SIOCDELDLCI:
1028 err = -ENOPKG;
1029 if (!dlci_ioctl_hook)
1030 request_module("dlci");
1031
7512cbf6
PE
1032 mutex_lock(&dlci_ioctl_mutex);
1033 if (dlci_ioctl_hook)
1da177e4 1034 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1035 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1036 break;
1037 default:
6b96018b 1038 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1039 break;
89bddce5 1040 }
1da177e4
LT
1041 return err;
1042}
1043
1044int sock_create_lite(int family, int type, int protocol, struct socket **res)
1045{
1046 int err;
1047 struct socket *sock = NULL;
89bddce5 1048
1da177e4
LT
1049 err = security_socket_create(family, type, protocol, 1);
1050 if (err)
1051 goto out;
1052
1053 sock = sock_alloc();
1054 if (!sock) {
1055 err = -ENOMEM;
1056 goto out;
1057 }
1058
1da177e4 1059 sock->type = type;
7420ed23
VY
1060 err = security_socket_post_create(sock, family, type, protocol, 1);
1061 if (err)
1062 goto out_release;
1063
1da177e4
LT
1064out:
1065 *res = sock;
1066 return err;
7420ed23
VY
1067out_release:
1068 sock_release(sock);
1069 sock = NULL;
1070 goto out;
1da177e4 1071}
c6d409cf 1072EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1073
1074/* No kernel lock held - perfect */
89bddce5 1075static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
1076{
1077 struct socket *sock;
1078
1079 /*
89bddce5 1080 * We can't return errors to poll, so it's either yes or no.
1da177e4 1081 */
b69aee04 1082 sock = file->private_data;
1da177e4
LT
1083 return sock->ops->poll(file, sock, wait);
1084}
1085
89bddce5 1086static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1087{
b69aee04 1088 struct socket *sock = file->private_data;
1da177e4
LT
1089
1090 return sock->ops->mmap(file, sock, vma);
1091}
1092
20380731 1093static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
1094{
1095 /*
89bddce5
SH
1096 * It was possible the inode is NULL we were
1097 * closing an unfinished socket.
1da177e4
LT
1098 */
1099
89bddce5 1100 if (!inode) {
1da177e4
LT
1101 printk(KERN_DEBUG "sock_close: NULL inode\n");
1102 return 0;
1103 }
1da177e4
LT
1104 sock_release(SOCKET_I(inode));
1105 return 0;
1106}
1107
1108/*
1109 * Update the socket async list
1110 *
1111 * Fasync_list locking strategy.
1112 *
1113 * 1. fasync_list is modified only under process context socket lock
1114 * i.e. under semaphore.
1115 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1116 * or under socket lock
1da177e4
LT
1117 */
1118
1119static int sock_fasync(int fd, struct file *filp, int on)
1120{
989a2979
ED
1121 struct socket *sock = filp->private_data;
1122 struct sock *sk = sock->sk;
eaefd110 1123 struct socket_wq *wq;
1da177e4 1124
989a2979 1125 if (sk == NULL)
1da177e4 1126 return -EINVAL;
1da177e4
LT
1127
1128 lock_sock(sk);
eaefd110
ED
1129 wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
1130 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1131
eaefd110 1132 if (!wq->fasync_list)
989a2979
ED
1133 sock_reset_flag(sk, SOCK_FASYNC);
1134 else
bcdce719 1135 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1136
989a2979 1137 release_sock(sk);
1da177e4
LT
1138 return 0;
1139}
1140
43815482 1141/* This function may be called only under socket lock or callback_lock or rcu_lock */
1da177e4
LT
1142
1143int sock_wake_async(struct socket *sock, int how, int band)
1144{
43815482
ED
1145 struct socket_wq *wq;
1146
1147 if (!sock)
1148 return -1;
1149 rcu_read_lock();
1150 wq = rcu_dereference(sock->wq);
1151 if (!wq || !wq->fasync_list) {
1152 rcu_read_unlock();
1da177e4 1153 return -1;
43815482 1154 }
89bddce5 1155 switch (how) {
8d8ad9d7 1156 case SOCK_WAKE_WAITD:
1da177e4
LT
1157 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1158 break;
1159 goto call_kill;
8d8ad9d7 1160 case SOCK_WAKE_SPACE:
1da177e4
LT
1161 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1162 break;
1163 /* fall through */
8d8ad9d7 1164 case SOCK_WAKE_IO:
89bddce5 1165call_kill:
43815482 1166 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1167 break;
8d8ad9d7 1168 case SOCK_WAKE_URG:
43815482 1169 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1170 }
43815482 1171 rcu_read_unlock();
1da177e4
LT
1172 return 0;
1173}
c6d409cf 1174EXPORT_SYMBOL(sock_wake_async);
1da177e4 1175
721db93a 1176int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1177 struct socket **res, int kern)
1da177e4
LT
1178{
1179 int err;
1180 struct socket *sock;
55737fda 1181 const struct net_proto_family *pf;
1da177e4
LT
1182
1183 /*
89bddce5 1184 * Check protocol is in range
1da177e4
LT
1185 */
1186 if (family < 0 || family >= NPROTO)
1187 return -EAFNOSUPPORT;
1188 if (type < 0 || type >= SOCK_MAX)
1189 return -EINVAL;
1190
1191 /* Compatibility.
1192
1193 This uglymoron is moved from INET layer to here to avoid
1194 deadlock in module load.
1195 */
1196 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1197 static int warned;
1da177e4
LT
1198 if (!warned) {
1199 warned = 1;
89bddce5
SH
1200 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1201 current->comm);
1da177e4
LT
1202 }
1203 family = PF_PACKET;
1204 }
1205
1206 err = security_socket_create(family, type, protocol, kern);
1207 if (err)
1208 return err;
89bddce5 1209
55737fda
SH
1210 /*
1211 * Allocate the socket and allow the family to set things up. if
1212 * the protocol is 0, the family is instructed to select an appropriate
1213 * default.
1214 */
1215 sock = sock_alloc();
1216 if (!sock) {
1217 if (net_ratelimit())
1218 printk(KERN_WARNING "socket: no more sockets\n");
1219 return -ENFILE; /* Not exactly a match, but its the
1220 closest posix thing */
1221 }
1222
1223 sock->type = type;
1224
95a5afca 1225#ifdef CONFIG_MODULES
89bddce5
SH
1226 /* Attempt to load a protocol module if the find failed.
1227 *
1228 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1229 * requested real, full-featured networking support upon configuration.
1230 * Otherwise module support will break!
1231 */
190683a9 1232 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1233 request_module("net-pf-%d", family);
1da177e4
LT
1234#endif
1235
55737fda
SH
1236 rcu_read_lock();
1237 pf = rcu_dereference(net_families[family]);
1238 err = -EAFNOSUPPORT;
1239 if (!pf)
1240 goto out_release;
1da177e4
LT
1241
1242 /*
1243 * We will call the ->create function, that possibly is in a loadable
1244 * module, so we have to bump that loadable module refcnt first.
1245 */
55737fda 1246 if (!try_module_get(pf->owner))
1da177e4
LT
1247 goto out_release;
1248
55737fda
SH
1249 /* Now protected by module ref count */
1250 rcu_read_unlock();
1251
3f378b68 1252 err = pf->create(net, sock, protocol, kern);
55737fda 1253 if (err < 0)
1da177e4 1254 goto out_module_put;
a79af59e 1255
1da177e4
LT
1256 /*
1257 * Now to bump the refcnt of the [loadable] module that owns this
1258 * socket at sock_release time we decrement its refcnt.
1259 */
55737fda
SH
1260 if (!try_module_get(sock->ops->owner))
1261 goto out_module_busy;
1262
1da177e4
LT
1263 /*
1264 * Now that we're done with the ->create function, the [loadable]
1265 * module can have its refcnt decremented
1266 */
55737fda 1267 module_put(pf->owner);
7420ed23
VY
1268 err = security_socket_post_create(sock, family, type, protocol, kern);
1269 if (err)
3b185525 1270 goto out_sock_release;
55737fda 1271 *res = sock;
1da177e4 1272
55737fda
SH
1273 return 0;
1274
1275out_module_busy:
1276 err = -EAFNOSUPPORT;
1da177e4 1277out_module_put:
55737fda
SH
1278 sock->ops = NULL;
1279 module_put(pf->owner);
1280out_sock_release:
1da177e4 1281 sock_release(sock);
55737fda
SH
1282 return err;
1283
1284out_release:
1285 rcu_read_unlock();
1286 goto out_sock_release;
1da177e4 1287}
721db93a 1288EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1289
1290int sock_create(int family, int type, int protocol, struct socket **res)
1291{
1b8d7ae4 1292 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1293}
c6d409cf 1294EXPORT_SYMBOL(sock_create);
1da177e4
LT
1295
1296int sock_create_kern(int family, int type, int protocol, struct socket **res)
1297{
1b8d7ae4 1298 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4 1299}
c6d409cf 1300EXPORT_SYMBOL(sock_create_kern);
1da177e4 1301
3e0fa65f 1302SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1303{
1304 int retval;
1305 struct socket *sock;
a677a039
UD
1306 int flags;
1307
e38b36f3
UD
1308 /* Check the SOCK_* constants for consistency. */
1309 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1310 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1311 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1312 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1313
a677a039 1314 flags = type & ~SOCK_TYPE_MASK;
77d27200 1315 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1316 return -EINVAL;
1317 type &= SOCK_TYPE_MASK;
1da177e4 1318
aaca0bdc
UD
1319 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1320 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1321
1da177e4
LT
1322 retval = sock_create(family, type, protocol, &sock);
1323 if (retval < 0)
1324 goto out;
1325
77d27200 1326 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1327 if (retval < 0)
1328 goto out_release;
1329
1330out:
1331 /* It may be already another descriptor 8) Not kernel problem. */
1332 return retval;
1333
1334out_release:
1335 sock_release(sock);
1336 return retval;
1337}
1338
1339/*
1340 * Create a pair of connected sockets.
1341 */
1342
3e0fa65f
HC
1343SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1344 int __user *, usockvec)
1da177e4
LT
1345{
1346 struct socket *sock1, *sock2;
1347 int fd1, fd2, err;
db349509 1348 struct file *newfile1, *newfile2;
a677a039
UD
1349 int flags;
1350
1351 flags = type & ~SOCK_TYPE_MASK;
77d27200 1352 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1353 return -EINVAL;
1354 type &= SOCK_TYPE_MASK;
1da177e4 1355
aaca0bdc
UD
1356 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1357 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1358
1da177e4
LT
1359 /*
1360 * Obtain the first socket and check if the underlying protocol
1361 * supports the socketpair call.
1362 */
1363
1364 err = sock_create(family, type, protocol, &sock1);
1365 if (err < 0)
1366 goto out;
1367
1368 err = sock_create(family, type, protocol, &sock2);
1369 if (err < 0)
1370 goto out_release_1;
1371
1372 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1373 if (err < 0)
1da177e4
LT
1374 goto out_release_both;
1375
7cbe66b6 1376 fd1 = sock_alloc_file(sock1, &newfile1, flags);
bf3c23d1
DM
1377 if (unlikely(fd1 < 0)) {
1378 err = fd1;
db349509 1379 goto out_release_both;
bf3c23d1 1380 }
1da177e4 1381
7cbe66b6 1382 fd2 = sock_alloc_file(sock2, &newfile2, flags);
198de4d7
AV
1383 if (unlikely(fd2 < 0)) {
1384 err = fd2;
1385 fput(newfile1);
1386 put_unused_fd(fd1);
1387 sock_release(sock2);
1388 goto out;
db349509
AV
1389 }
1390
157cf649 1391 audit_fd_pair(fd1, fd2);
db349509
AV
1392 fd_install(fd1, newfile1);
1393 fd_install(fd2, newfile2);
1da177e4
LT
1394 /* fd1 and fd2 may be already another descriptors.
1395 * Not kernel problem.
1396 */
1397
89bddce5 1398 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1399 if (!err)
1400 err = put_user(fd2, &usockvec[1]);
1401 if (!err)
1402 return 0;
1403
1404 sys_close(fd2);
1405 sys_close(fd1);
1406 return err;
1407
1da177e4 1408out_release_both:
89bddce5 1409 sock_release(sock2);
1da177e4 1410out_release_1:
89bddce5 1411 sock_release(sock1);
1da177e4
LT
1412out:
1413 return err;
1414}
1415
1da177e4
LT
1416/*
1417 * Bind a name to a socket. Nothing much to do here since it's
1418 * the protocol's responsibility to handle the local address.
1419 *
1420 * We move the socket address to kernel space before we call
1421 * the protocol layer (having also checked the address is ok).
1422 */
1423
20f37034 1424SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1425{
1426 struct socket *sock;
230b1839 1427 struct sockaddr_storage address;
6cb153ca 1428 int err, fput_needed;
1da177e4 1429
89bddce5 1430 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1431 if (sock) {
230b1839 1432 err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address);
89bddce5
SH
1433 if (err >= 0) {
1434 err = security_socket_bind(sock,
230b1839 1435 (struct sockaddr *)&address,
89bddce5 1436 addrlen);
6cb153ca
BL
1437 if (!err)
1438 err = sock->ops->bind(sock,
89bddce5 1439 (struct sockaddr *)
230b1839 1440 &address, addrlen);
1da177e4 1441 }
6cb153ca 1442 fput_light(sock->file, fput_needed);
89bddce5 1443 }
1da177e4
LT
1444 return err;
1445}
1446
1da177e4
LT
1447/*
1448 * Perform a listen. Basically, we allow the protocol to do anything
1449 * necessary for a listen, and if that works, we mark the socket as
1450 * ready for listening.
1451 */
1452
3e0fa65f 1453SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1454{
1455 struct socket *sock;
6cb153ca 1456 int err, fput_needed;
b8e1f9b5 1457 int somaxconn;
89bddce5
SH
1458
1459 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1460 if (sock) {
8efa6e93 1461 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
b8e1f9b5
PE
1462 if ((unsigned)backlog > somaxconn)
1463 backlog = somaxconn;
1da177e4
LT
1464
1465 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1466 if (!err)
1467 err = sock->ops->listen(sock, backlog);
1da177e4 1468
6cb153ca 1469 fput_light(sock->file, fput_needed);
1da177e4
LT
1470 }
1471 return err;
1472}
1473
1da177e4
LT
1474/*
1475 * For accept, we attempt to create a new socket, set up the link
1476 * with the client, wake up the client, then return the new
1477 * connected fd. We collect the address of the connector in kernel
1478 * space and move it to user at the very end. This is unclean because
1479 * we open the socket then return an error.
1480 *
1481 * 1003.1g adds the ability to recvmsg() to query connection pending
1482 * status to recvmsg. We need to add that support in a way thats
1483 * clean when we restucture accept also.
1484 */
1485
20f37034
HC
1486SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1487 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1488{
1489 struct socket *sock, *newsock;
39d8c1b6 1490 struct file *newfile;
6cb153ca 1491 int err, len, newfd, fput_needed;
230b1839 1492 struct sockaddr_storage address;
1da177e4 1493
77d27200 1494 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1495 return -EINVAL;
1496
1497 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1498 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1499
6cb153ca 1500 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1501 if (!sock)
1502 goto out;
1503
1504 err = -ENFILE;
c6d409cf
ED
1505 newsock = sock_alloc();
1506 if (!newsock)
1da177e4
LT
1507 goto out_put;
1508
1509 newsock->type = sock->type;
1510 newsock->ops = sock->ops;
1511
1da177e4
LT
1512 /*
1513 * We don't need try_module_get here, as the listening socket (sock)
1514 * has the protocol module (sock->ops->owner) held.
1515 */
1516 __module_get(newsock->ops->owner);
1517
7cbe66b6 1518 newfd = sock_alloc_file(newsock, &newfile, flags);
39d8c1b6
DM
1519 if (unlikely(newfd < 0)) {
1520 err = newfd;
9a1875e6
DM
1521 sock_release(newsock);
1522 goto out_put;
39d8c1b6
DM
1523 }
1524
a79af59e
FF
1525 err = security_socket_accept(sock, newsock);
1526 if (err)
39d8c1b6 1527 goto out_fd;
a79af59e 1528
1da177e4
LT
1529 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1530 if (err < 0)
39d8c1b6 1531 goto out_fd;
1da177e4
LT
1532
1533 if (upeer_sockaddr) {
230b1839 1534 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1535 &len, 2) < 0) {
1da177e4 1536 err = -ECONNABORTED;
39d8c1b6 1537 goto out_fd;
1da177e4 1538 }
230b1839
YH
1539 err = move_addr_to_user((struct sockaddr *)&address,
1540 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1541 if (err < 0)
39d8c1b6 1542 goto out_fd;
1da177e4
LT
1543 }
1544
1545 /* File flags are not inherited via accept() unlike another OSes. */
1546
39d8c1b6
DM
1547 fd_install(newfd, newfile);
1548 err = newfd;
1da177e4 1549
1da177e4 1550out_put:
6cb153ca 1551 fput_light(sock->file, fput_needed);
1da177e4
LT
1552out:
1553 return err;
39d8c1b6 1554out_fd:
9606a216 1555 fput(newfile);
39d8c1b6 1556 put_unused_fd(newfd);
1da177e4
LT
1557 goto out_put;
1558}
1559
20f37034
HC
1560SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1561 int __user *, upeer_addrlen)
aaca0bdc 1562{
de11defe 1563 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1564}
1565
1da177e4
LT
1566/*
1567 * Attempt to connect to a socket with the server address. The address
1568 * is in user space so we verify it is OK and move it to kernel space.
1569 *
1570 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1571 * break bindings
1572 *
1573 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1574 * other SEQPACKET protocols that take time to connect() as it doesn't
1575 * include the -EINPROGRESS status for such sockets.
1576 */
1577
20f37034
HC
1578SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1579 int, addrlen)
1da177e4
LT
1580{
1581 struct socket *sock;
230b1839 1582 struct sockaddr_storage address;
6cb153ca 1583 int err, fput_needed;
1da177e4 1584
6cb153ca 1585 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1586 if (!sock)
1587 goto out;
230b1839 1588 err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address);
1da177e4
LT
1589 if (err < 0)
1590 goto out_put;
1591
89bddce5 1592 err =
230b1839 1593 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1594 if (err)
1595 goto out_put;
1596
230b1839 1597 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1598 sock->file->f_flags);
1599out_put:
6cb153ca 1600 fput_light(sock->file, fput_needed);
1da177e4
LT
1601out:
1602 return err;
1603}
1604
1605/*
1606 * Get the local address ('name') of a socket object. Move the obtained
1607 * name to user space.
1608 */
1609
20f37034
HC
1610SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1611 int __user *, usockaddr_len)
1da177e4
LT
1612{
1613 struct socket *sock;
230b1839 1614 struct sockaddr_storage address;
6cb153ca 1615 int len, err, fput_needed;
89bddce5 1616
6cb153ca 1617 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1618 if (!sock)
1619 goto out;
1620
1621 err = security_socket_getsockname(sock);
1622 if (err)
1623 goto out_put;
1624
230b1839 1625 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1626 if (err)
1627 goto out_put;
230b1839 1628 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1629
1630out_put:
6cb153ca 1631 fput_light(sock->file, fput_needed);
1da177e4
LT
1632out:
1633 return err;
1634}
1635
1636/*
1637 * Get the remote address ('name') of a socket object. Move the obtained
1638 * name to user space.
1639 */
1640
20f37034
HC
1641SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1642 int __user *, usockaddr_len)
1da177e4
LT
1643{
1644 struct socket *sock;
230b1839 1645 struct sockaddr_storage address;
6cb153ca 1646 int len, err, fput_needed;
1da177e4 1647
89bddce5
SH
1648 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1649 if (sock != NULL) {
1da177e4
LT
1650 err = security_socket_getpeername(sock);
1651 if (err) {
6cb153ca 1652 fput_light(sock->file, fput_needed);
1da177e4
LT
1653 return err;
1654 }
1655
89bddce5 1656 err =
230b1839 1657 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1658 1);
1da177e4 1659 if (!err)
230b1839 1660 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr,
89bddce5 1661 usockaddr_len);
6cb153ca 1662 fput_light(sock->file, fput_needed);
1da177e4
LT
1663 }
1664 return err;
1665}
1666
1667/*
1668 * Send a datagram to a given address. We move the address into kernel
1669 * space and check the user space data area is readable before invoking
1670 * the protocol.
1671 */
1672
3e0fa65f
HC
1673SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1674 unsigned, flags, struct sockaddr __user *, addr,
1675 int, addr_len)
1da177e4
LT
1676{
1677 struct socket *sock;
230b1839 1678 struct sockaddr_storage address;
1da177e4
LT
1679 int err;
1680 struct msghdr msg;
1681 struct iovec iov;
6cb153ca 1682 int fput_needed;
6cb153ca 1683
253eacc0
LT
1684 if (len > INT_MAX)
1685 len = INT_MAX;
de0fa95c
PE
1686 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1687 if (!sock)
4387ff75 1688 goto out;
6cb153ca 1689
89bddce5
SH
1690 iov.iov_base = buff;
1691 iov.iov_len = len;
1692 msg.msg_name = NULL;
1693 msg.msg_iov = &iov;
1694 msg.msg_iovlen = 1;
1695 msg.msg_control = NULL;
1696 msg.msg_controllen = 0;
1697 msg.msg_namelen = 0;
6cb153ca 1698 if (addr) {
230b1839 1699 err = move_addr_to_kernel(addr, addr_len, (struct sockaddr *)&address);
1da177e4
LT
1700 if (err < 0)
1701 goto out_put;
230b1839 1702 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1703 msg.msg_namelen = addr_len;
1da177e4
LT
1704 }
1705 if (sock->file->f_flags & O_NONBLOCK)
1706 flags |= MSG_DONTWAIT;
1707 msg.msg_flags = flags;
1708 err = sock_sendmsg(sock, &msg, len);
1709
89bddce5 1710out_put:
de0fa95c 1711 fput_light(sock->file, fput_needed);
4387ff75 1712out:
1da177e4
LT
1713 return err;
1714}
1715
1716/*
89bddce5 1717 * Send a datagram down a socket.
1da177e4
LT
1718 */
1719
3e0fa65f
HC
1720SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
1721 unsigned, flags)
1da177e4
LT
1722{
1723 return sys_sendto(fd, buff, len, flags, NULL, 0);
1724}
1725
1726/*
89bddce5 1727 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1728 * sender. We verify the buffers are writable and if needed move the
1729 * sender address from kernel to user space.
1730 */
1731
3e0fa65f
HC
1732SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
1733 unsigned, flags, struct sockaddr __user *, addr,
1734 int __user *, addr_len)
1da177e4
LT
1735{
1736 struct socket *sock;
1737 struct iovec iov;
1738 struct msghdr msg;
230b1839 1739 struct sockaddr_storage address;
89bddce5 1740 int err, err2;
6cb153ca
BL
1741 int fput_needed;
1742
253eacc0
LT
1743 if (size > INT_MAX)
1744 size = INT_MAX;
de0fa95c 1745 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1746 if (!sock)
de0fa95c 1747 goto out;
1da177e4 1748
89bddce5
SH
1749 msg.msg_control = NULL;
1750 msg.msg_controllen = 0;
1751 msg.msg_iovlen = 1;
1752 msg.msg_iov = &iov;
1753 iov.iov_len = size;
1754 iov.iov_base = ubuf;
230b1839
YH
1755 msg.msg_name = (struct sockaddr *)&address;
1756 msg.msg_namelen = sizeof(address);
1da177e4
LT
1757 if (sock->file->f_flags & O_NONBLOCK)
1758 flags |= MSG_DONTWAIT;
89bddce5 1759 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1760
89bddce5 1761 if (err >= 0 && addr != NULL) {
230b1839
YH
1762 err2 = move_addr_to_user((struct sockaddr *)&address,
1763 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1764 if (err2 < 0)
1765 err = err2;
1da177e4 1766 }
de0fa95c
PE
1767
1768 fput_light(sock->file, fput_needed);
4387ff75 1769out:
1da177e4
LT
1770 return err;
1771}
1772
1773/*
89bddce5 1774 * Receive a datagram from a socket.
1da177e4
LT
1775 */
1776
89bddce5
SH
1777asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1778 unsigned flags)
1da177e4
LT
1779{
1780 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1781}
1782
1783/*
1784 * Set a socket option. Because we don't know the option lengths we have
1785 * to pass the user mode parameter for the protocols to sort out.
1786 */
1787
20f37034
HC
1788SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1789 char __user *, optval, int, optlen)
1da177e4 1790{
6cb153ca 1791 int err, fput_needed;
1da177e4
LT
1792 struct socket *sock;
1793
1794 if (optlen < 0)
1795 return -EINVAL;
89bddce5
SH
1796
1797 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1798 if (sock != NULL) {
1799 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1800 if (err)
1801 goto out_put;
1da177e4
LT
1802
1803 if (level == SOL_SOCKET)
89bddce5
SH
1804 err =
1805 sock_setsockopt(sock, level, optname, optval,
1806 optlen);
1da177e4 1807 else
89bddce5
SH
1808 err =
1809 sock->ops->setsockopt(sock, level, optname, optval,
1810 optlen);
6cb153ca
BL
1811out_put:
1812 fput_light(sock->file, fput_needed);
1da177e4
LT
1813 }
1814 return err;
1815}
1816
1817/*
1818 * Get a socket option. Because we don't know the option lengths we have
1819 * to pass a user mode parameter for the protocols to sort out.
1820 */
1821
20f37034
HC
1822SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1823 char __user *, optval, int __user *, optlen)
1da177e4 1824{
6cb153ca 1825 int err, fput_needed;
1da177e4
LT
1826 struct socket *sock;
1827
89bddce5
SH
1828 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1829 if (sock != NULL) {
6cb153ca
BL
1830 err = security_socket_getsockopt(sock, level, optname);
1831 if (err)
1832 goto out_put;
1da177e4
LT
1833
1834 if (level == SOL_SOCKET)
89bddce5
SH
1835 err =
1836 sock_getsockopt(sock, level, optname, optval,
1837 optlen);
1da177e4 1838 else
89bddce5
SH
1839 err =
1840 sock->ops->getsockopt(sock, level, optname, optval,
1841 optlen);
6cb153ca
BL
1842out_put:
1843 fput_light(sock->file, fput_needed);
1da177e4
LT
1844 }
1845 return err;
1846}
1847
1da177e4
LT
1848/*
1849 * Shutdown a socket.
1850 */
1851
754fe8d2 1852SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1853{
6cb153ca 1854 int err, fput_needed;
1da177e4
LT
1855 struct socket *sock;
1856
89bddce5
SH
1857 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1858 if (sock != NULL) {
1da177e4 1859 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1860 if (!err)
1861 err = sock->ops->shutdown(sock, how);
1862 fput_light(sock->file, fput_needed);
1da177e4
LT
1863 }
1864 return err;
1865}
1866
89bddce5 1867/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1868 * fields which are the same type (int / unsigned) on our platforms.
1869 */
1870#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1871#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1872#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1873
c71d8ebe
TH
1874struct used_address {
1875 struct sockaddr_storage name;
1876 unsigned int name_len;
1877};
1878
228e548e 1879static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
c71d8ebe
TH
1880 struct msghdr *msg_sys, unsigned flags,
1881 struct used_address *used_address)
1da177e4 1882{
89bddce5
SH
1883 struct compat_msghdr __user *msg_compat =
1884 (struct compat_msghdr __user *)msg;
230b1839 1885 struct sockaddr_storage address;
1da177e4 1886 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1887 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1888 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1889 /* 20 is size of ipv6_pktinfo */
1da177e4 1890 unsigned char *ctl_buf = ctl;
1da177e4 1891 int err, ctl_len, iov_size, total_len;
89bddce5 1892
1da177e4
LT
1893 err = -EFAULT;
1894 if (MSG_CMSG_COMPAT & flags) {
228e548e 1895 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 1896 return -EFAULT;
228e548e 1897 } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1898 return -EFAULT;
1899
1da177e4
LT
1900 /* do not move before msg_sys is valid */
1901 err = -EMSGSIZE;
228e548e
AB
1902 if (msg_sys->msg_iovlen > UIO_MAXIOV)
1903 goto out;
1da177e4 1904
89bddce5 1905 /* Check whether to allocate the iovec area */
1da177e4 1906 err = -ENOMEM;
228e548e
AB
1907 iov_size = msg_sys->msg_iovlen * sizeof(struct iovec);
1908 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
1da177e4
LT
1909 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1910 if (!iov)
228e548e 1911 goto out;
1da177e4
LT
1912 }
1913
1914 /* This will also move the address data into kernel space */
1915 if (MSG_CMSG_COMPAT & flags) {
228e548e 1916 err = verify_compat_iovec(msg_sys, iov,
230b1839
YH
1917 (struct sockaddr *)&address,
1918 VERIFY_READ);
1da177e4 1919 } else
228e548e 1920 err = verify_iovec(msg_sys, iov,
230b1839
YH
1921 (struct sockaddr *)&address,
1922 VERIFY_READ);
89bddce5 1923 if (err < 0)
1da177e4
LT
1924 goto out_freeiov;
1925 total_len = err;
1926
1927 err = -ENOBUFS;
1928
228e548e 1929 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 1930 goto out_freeiov;
228e548e 1931 ctl_len = msg_sys->msg_controllen;
1da177e4 1932 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 1933 err =
228e548e 1934 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 1935 sizeof(ctl));
1da177e4
LT
1936 if (err)
1937 goto out_freeiov;
228e548e
AB
1938 ctl_buf = msg_sys->msg_control;
1939 ctl_len = msg_sys->msg_controllen;
1da177e4 1940 } else if (ctl_len) {
89bddce5 1941 if (ctl_len > sizeof(ctl)) {
1da177e4 1942 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1943 if (ctl_buf == NULL)
1da177e4
LT
1944 goto out_freeiov;
1945 }
1946 err = -EFAULT;
1947 /*
228e548e 1948 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
1949 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1950 * checking falls down on this.
1951 */
fb8621bb 1952 if (copy_from_user(ctl_buf,
228e548e 1953 (void __user __force *)msg_sys->msg_control,
89bddce5 1954 ctl_len))
1da177e4 1955 goto out_freectl;
228e548e 1956 msg_sys->msg_control = ctl_buf;
1da177e4 1957 }
228e548e 1958 msg_sys->msg_flags = flags;
1da177e4
LT
1959
1960 if (sock->file->f_flags & O_NONBLOCK)
228e548e 1961 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
1962 /*
1963 * If this is sendmmsg() and current destination address is same as
1964 * previously succeeded address, omit asking LSM's decision.
1965 * used_address->name_len is initialized to UINT_MAX so that the first
1966 * destination address never matches.
1967 */
bc909d9d
MD
1968 if (used_address && msg_sys->msg_name &&
1969 used_address->name_len == msg_sys->msg_namelen &&
1970 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe
TH
1971 used_address->name_len)) {
1972 err = sock_sendmsg_nosec(sock, msg_sys, total_len);
1973 goto out_freectl;
1974 }
1975 err = sock_sendmsg(sock, msg_sys, total_len);
1976 /*
1977 * If this is sendmmsg() and sending to current destination address was
1978 * successful, remember it.
1979 */
1980 if (used_address && err >= 0) {
1981 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
1982 if (msg_sys->msg_name)
1983 memcpy(&used_address->name, msg_sys->msg_name,
1984 used_address->name_len);
c71d8ebe 1985 }
1da177e4
LT
1986
1987out_freectl:
89bddce5 1988 if (ctl_buf != ctl)
1da177e4
LT
1989 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1990out_freeiov:
1991 if (iov != iovstack)
1992 sock_kfree_s(sock->sk, iov, iov_size);
228e548e
AB
1993out:
1994 return err;
1995}
1996
1997/*
1998 * BSD sendmsg interface
1999 */
2000
2001SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
2002{
2003 int fput_needed, err;
2004 struct msghdr msg_sys;
2005 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
2006
2007 if (!sock)
2008 goto out;
2009
c71d8ebe 2010 err = __sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
228e548e 2011
6cb153ca 2012 fput_light(sock->file, fput_needed);
89bddce5 2013out:
1da177e4
LT
2014 return err;
2015}
2016
228e548e
AB
2017/*
2018 * Linux sendmmsg interface
2019 */
2020
2021int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2022 unsigned int flags)
2023{
2024 int fput_needed, err, datagrams;
2025 struct socket *sock;
2026 struct mmsghdr __user *entry;
2027 struct compat_mmsghdr __user *compat_entry;
2028 struct msghdr msg_sys;
c71d8ebe 2029 struct used_address used_address;
228e548e 2030
98382f41
AB
2031 if (vlen > UIO_MAXIOV)
2032 vlen = UIO_MAXIOV;
228e548e
AB
2033
2034 datagrams = 0;
2035
2036 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2037 if (!sock)
2038 return err;
2039
c71d8ebe 2040 used_address.name_len = UINT_MAX;
228e548e
AB
2041 entry = mmsg;
2042 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2043 err = 0;
228e548e
AB
2044
2045 while (datagrams < vlen) {
228e548e
AB
2046 if (MSG_CMSG_COMPAT & flags) {
2047 err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
c71d8ebe 2048 &msg_sys, flags, &used_address);
228e548e
AB
2049 if (err < 0)
2050 break;
2051 err = __put_user(err, &compat_entry->msg_len);
2052 ++compat_entry;
2053 } else {
2054 err = __sys_sendmsg(sock, (struct msghdr __user *)entry,
c71d8ebe 2055 &msg_sys, flags, &used_address);
228e548e
AB
2056 if (err < 0)
2057 break;
2058 err = put_user(err, &entry->msg_len);
2059 ++entry;
2060 }
2061
2062 if (err)
2063 break;
2064 ++datagrams;
2065 }
2066
228e548e
AB
2067 fput_light(sock->file, fput_needed);
2068
728ffb86
AB
2069 /* We only return an error if no datagrams were able to be sent */
2070 if (datagrams != 0)
228e548e
AB
2071 return datagrams;
2072
228e548e
AB
2073 return err;
2074}
2075
2076SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2077 unsigned int, vlen, unsigned int, flags)
2078{
2079 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2080}
2081
a2e27255
ACM
2082static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
2083 struct msghdr *msg_sys, unsigned flags, int nosec)
1da177e4 2084{
89bddce5
SH
2085 struct compat_msghdr __user *msg_compat =
2086 (struct compat_msghdr __user *)msg;
1da177e4 2087 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2088 struct iovec *iov = iovstack;
1da177e4
LT
2089 unsigned long cmsg_ptr;
2090 int err, iov_size, total_len, len;
2091
2092 /* kernel mode address */
230b1839 2093 struct sockaddr_storage addr;
1da177e4
LT
2094
2095 /* user mode address pointers */
2096 struct sockaddr __user *uaddr;
2097 int __user *uaddr_len;
89bddce5 2098
1da177e4 2099 if (MSG_CMSG_COMPAT & flags) {
a2e27255 2100 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 2101 return -EFAULT;
c6d409cf 2102 } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
89bddce5 2103 return -EFAULT;
1da177e4 2104
1da177e4 2105 err = -EMSGSIZE;
a2e27255
ACM
2106 if (msg_sys->msg_iovlen > UIO_MAXIOV)
2107 goto out;
89bddce5
SH
2108
2109 /* Check whether to allocate the iovec area */
1da177e4 2110 err = -ENOMEM;
a2e27255
ACM
2111 iov_size = msg_sys->msg_iovlen * sizeof(struct iovec);
2112 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
1da177e4
LT
2113 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
2114 if (!iov)
a2e27255 2115 goto out;
1da177e4
LT
2116 }
2117
2118 /*
89bddce5
SH
2119 * Save the user-mode address (verify_iovec will change the
2120 * kernel msghdr to use the kernel address space)
1da177e4 2121 */
89bddce5 2122
a2e27255 2123 uaddr = (__force void __user *)msg_sys->msg_name;
1da177e4
LT
2124 uaddr_len = COMPAT_NAMELEN(msg);
2125 if (MSG_CMSG_COMPAT & flags) {
a2e27255 2126 err = verify_compat_iovec(msg_sys, iov,
230b1839
YH
2127 (struct sockaddr *)&addr,
2128 VERIFY_WRITE);
1da177e4 2129 } else
a2e27255 2130 err = verify_iovec(msg_sys, iov,
230b1839
YH
2131 (struct sockaddr *)&addr,
2132 VERIFY_WRITE);
1da177e4
LT
2133 if (err < 0)
2134 goto out_freeiov;
89bddce5 2135 total_len = err;
1da177e4 2136
a2e27255
ACM
2137 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2138 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2139
1da177e4
LT
2140 if (sock->file->f_flags & O_NONBLOCK)
2141 flags |= MSG_DONTWAIT;
a2e27255
ACM
2142 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2143 total_len, flags);
1da177e4
LT
2144 if (err < 0)
2145 goto out_freeiov;
2146 len = err;
2147
2148 if (uaddr != NULL) {
230b1839 2149 err = move_addr_to_user((struct sockaddr *)&addr,
a2e27255 2150 msg_sys->msg_namelen, uaddr,
89bddce5 2151 uaddr_len);
1da177e4
LT
2152 if (err < 0)
2153 goto out_freeiov;
2154 }
a2e27255 2155 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2156 COMPAT_FLAGS(msg));
1da177e4
LT
2157 if (err)
2158 goto out_freeiov;
2159 if (MSG_CMSG_COMPAT & flags)
a2e27255 2160 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2161 &msg_compat->msg_controllen);
2162 else
a2e27255 2163 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2164 &msg->msg_controllen);
2165 if (err)
2166 goto out_freeiov;
2167 err = len;
2168
2169out_freeiov:
2170 if (iov != iovstack)
2171 sock_kfree_s(sock->sk, iov, iov_size);
a2e27255
ACM
2172out:
2173 return err;
2174}
2175
2176/*
2177 * BSD recvmsg interface
2178 */
2179
2180SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
2181 unsigned int, flags)
2182{
2183 int fput_needed, err;
2184 struct msghdr msg_sys;
2185 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
2186
2187 if (!sock)
2188 goto out;
2189
2190 err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0);
2191
6cb153ca 2192 fput_light(sock->file, fput_needed);
1da177e4
LT
2193out:
2194 return err;
2195}
2196
a2e27255
ACM
2197/*
2198 * Linux recvmmsg interface
2199 */
2200
2201int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2202 unsigned int flags, struct timespec *timeout)
2203{
2204 int fput_needed, err, datagrams;
2205 struct socket *sock;
2206 struct mmsghdr __user *entry;
d7256d0e 2207 struct compat_mmsghdr __user *compat_entry;
a2e27255
ACM
2208 struct msghdr msg_sys;
2209 struct timespec end_time;
2210
2211 if (timeout &&
2212 poll_select_set_timeout(&end_time, timeout->tv_sec,
2213 timeout->tv_nsec))
2214 return -EINVAL;
2215
2216 datagrams = 0;
2217
2218 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2219 if (!sock)
2220 return err;
2221
2222 err = sock_error(sock->sk);
2223 if (err)
2224 goto out_put;
2225
2226 entry = mmsg;
d7256d0e 2227 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2228
2229 while (datagrams < vlen) {
2230 /*
2231 * No need to ask LSM for more than the first datagram.
2232 */
d7256d0e
JMG
2233 if (MSG_CMSG_COMPAT & flags) {
2234 err = __sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
b9eb8b87
AB
2235 &msg_sys, flags & ~MSG_WAITFORONE,
2236 datagrams);
d7256d0e
JMG
2237 if (err < 0)
2238 break;
2239 err = __put_user(err, &compat_entry->msg_len);
2240 ++compat_entry;
2241 } else {
2242 err = __sys_recvmsg(sock, (struct msghdr __user *)entry,
b9eb8b87
AB
2243 &msg_sys, flags & ~MSG_WAITFORONE,
2244 datagrams);
d7256d0e
JMG
2245 if (err < 0)
2246 break;
2247 err = put_user(err, &entry->msg_len);
2248 ++entry;
2249 }
2250
a2e27255
ACM
2251 if (err)
2252 break;
a2e27255
ACM
2253 ++datagrams;
2254
71c5c159
BB
2255 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2256 if (flags & MSG_WAITFORONE)
2257 flags |= MSG_DONTWAIT;
2258
a2e27255
ACM
2259 if (timeout) {
2260 ktime_get_ts(timeout);
2261 *timeout = timespec_sub(end_time, *timeout);
2262 if (timeout->tv_sec < 0) {
2263 timeout->tv_sec = timeout->tv_nsec = 0;
2264 break;
2265 }
2266
2267 /* Timeout, return less than vlen datagrams */
2268 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2269 break;
2270 }
2271
2272 /* Out of band data, return right away */
2273 if (msg_sys.msg_flags & MSG_OOB)
2274 break;
2275 }
2276
2277out_put:
2278 fput_light(sock->file, fput_needed);
1da177e4 2279
a2e27255
ACM
2280 if (err == 0)
2281 return datagrams;
2282
2283 if (datagrams != 0) {
2284 /*
2285 * We may return less entries than requested (vlen) if the
2286 * sock is non block and there aren't enough datagrams...
2287 */
2288 if (err != -EAGAIN) {
2289 /*
2290 * ... or if recvmsg returns an error after we
2291 * received some datagrams, where we record the
2292 * error to return on the next call or if the
2293 * app asks about it using getsockopt(SO_ERROR).
2294 */
2295 sock->sk->sk_err = -err;
2296 }
2297
2298 return datagrams;
2299 }
2300
2301 return err;
2302}
2303
2304SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2305 unsigned int, vlen, unsigned int, flags,
2306 struct timespec __user *, timeout)
2307{
2308 int datagrams;
2309 struct timespec timeout_sys;
2310
2311 if (!timeout)
2312 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2313
2314 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2315 return -EFAULT;
2316
2317 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2318
2319 if (datagrams > 0 &&
2320 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2321 datagrams = -EFAULT;
2322
2323 return datagrams;
2324}
2325
2326#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2327/* Argument list sizes for sys_socketcall */
2328#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2329static const unsigned char nargs[21] = {
c6d409cf
ED
2330 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2331 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2332 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2333 AL(4), AL(5), AL(4)
89bddce5
SH
2334};
2335
1da177e4
LT
2336#undef AL
2337
2338/*
89bddce5 2339 * System call vectors.
1da177e4
LT
2340 *
2341 * Argument checking cleaned up. Saved 20% in size.
2342 * This function doesn't need to set the kernel lock because
89bddce5 2343 * it is set by the callees.
1da177e4
LT
2344 */
2345
3e0fa65f 2346SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4
LT
2347{
2348 unsigned long a[6];
89bddce5 2349 unsigned long a0, a1;
1da177e4 2350 int err;
47379052 2351 unsigned int len;
1da177e4 2352
228e548e 2353 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2354 return -EINVAL;
2355
47379052
AV
2356 len = nargs[call];
2357 if (len > sizeof(a))
2358 return -EINVAL;
2359
1da177e4 2360 /* copy_from_user should be SMP safe. */
47379052 2361 if (copy_from_user(a, args, len))
1da177e4 2362 return -EFAULT;
3ec3b2fb 2363
f3298dc4 2364 audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb 2365
89bddce5
SH
2366 a0 = a[0];
2367 a1 = a[1];
2368
2369 switch (call) {
2370 case SYS_SOCKET:
2371 err = sys_socket(a0, a1, a[2]);
2372 break;
2373 case SYS_BIND:
2374 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2375 break;
2376 case SYS_CONNECT:
2377 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2378 break;
2379 case SYS_LISTEN:
2380 err = sys_listen(a0, a1);
2381 break;
2382 case SYS_ACCEPT:
de11defe
UD
2383 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2384 (int __user *)a[2], 0);
89bddce5
SH
2385 break;
2386 case SYS_GETSOCKNAME:
2387 err =
2388 sys_getsockname(a0, (struct sockaddr __user *)a1,
2389 (int __user *)a[2]);
2390 break;
2391 case SYS_GETPEERNAME:
2392 err =
2393 sys_getpeername(a0, (struct sockaddr __user *)a1,
2394 (int __user *)a[2]);
2395 break;
2396 case SYS_SOCKETPAIR:
2397 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2398 break;
2399 case SYS_SEND:
2400 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2401 break;
2402 case SYS_SENDTO:
2403 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2404 (struct sockaddr __user *)a[4], a[5]);
2405 break;
2406 case SYS_RECV:
2407 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2408 break;
2409 case SYS_RECVFROM:
2410 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2411 (struct sockaddr __user *)a[4],
2412 (int __user *)a[5]);
2413 break;
2414 case SYS_SHUTDOWN:
2415 err = sys_shutdown(a0, a1);
2416 break;
2417 case SYS_SETSOCKOPT:
2418 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2419 break;
2420 case SYS_GETSOCKOPT:
2421 err =
2422 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2423 (int __user *)a[4]);
2424 break;
2425 case SYS_SENDMSG:
2426 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2427 break;
228e548e
AB
2428 case SYS_SENDMMSG:
2429 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2430 break;
89bddce5
SH
2431 case SYS_RECVMSG:
2432 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2433 break;
a2e27255
ACM
2434 case SYS_RECVMMSG:
2435 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2436 (struct timespec __user *)a[4]);
2437 break;
de11defe
UD
2438 case SYS_ACCEPT4:
2439 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2440 (int __user *)a[2], a[3]);
aaca0bdc 2441 break;
89bddce5
SH
2442 default:
2443 err = -EINVAL;
2444 break;
1da177e4
LT
2445 }
2446 return err;
2447}
2448
89bddce5 2449#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2450
55737fda
SH
2451/**
2452 * sock_register - add a socket protocol handler
2453 * @ops: description of protocol
2454 *
1da177e4
LT
2455 * This function is called by a protocol handler that wants to
2456 * advertise its address family, and have it linked into the
55737fda
SH
2457 * socket interface. The value ops->family coresponds to the
2458 * socket system call protocol family.
1da177e4 2459 */
f0fd27d4 2460int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2461{
2462 int err;
2463
2464 if (ops->family >= NPROTO) {
89bddce5
SH
2465 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2466 NPROTO);
1da177e4
LT
2467 return -ENOBUFS;
2468 }
55737fda
SH
2469
2470 spin_lock(&net_family_lock);
190683a9
ED
2471 if (rcu_dereference_protected(net_families[ops->family],
2472 lockdep_is_held(&net_family_lock)))
55737fda
SH
2473 err = -EEXIST;
2474 else {
a9b3cd7f 2475 RCU_INIT_POINTER(net_families[ops->family], ops);
1da177e4
LT
2476 err = 0;
2477 }
55737fda
SH
2478 spin_unlock(&net_family_lock);
2479
89bddce5 2480 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2481 return err;
2482}
c6d409cf 2483EXPORT_SYMBOL(sock_register);
1da177e4 2484
55737fda
SH
2485/**
2486 * sock_unregister - remove a protocol handler
2487 * @family: protocol family to remove
2488 *
1da177e4
LT
2489 * This function is called by a protocol handler that wants to
2490 * remove its address family, and have it unlinked from the
55737fda
SH
2491 * new socket creation.
2492 *
2493 * If protocol handler is a module, then it can use module reference
2494 * counts to protect against new references. If protocol handler is not
2495 * a module then it needs to provide its own protection in
2496 * the ops->create routine.
1da177e4 2497 */
f0fd27d4 2498void sock_unregister(int family)
1da177e4 2499{
f0fd27d4 2500 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2501
55737fda 2502 spin_lock(&net_family_lock);
a9b3cd7f 2503 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2504 spin_unlock(&net_family_lock);
2505
2506 synchronize_rcu();
2507
89bddce5 2508 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4 2509}
c6d409cf 2510EXPORT_SYMBOL(sock_unregister);
1da177e4 2511
77d76ea3 2512static int __init sock_init(void)
1da177e4 2513{
b3e19d92
NP
2514 int err;
2515
1da177e4 2516 /*
89bddce5 2517 * Initialize sock SLAB cache.
1da177e4 2518 */
89bddce5 2519
1da177e4
LT
2520 sk_init();
2521
1da177e4 2522 /*
89bddce5 2523 * Initialize skbuff SLAB cache
1da177e4
LT
2524 */
2525 skb_init();
1da177e4
LT
2526
2527 /*
89bddce5 2528 * Initialize the protocols module.
1da177e4
LT
2529 */
2530
2531 init_inodecache();
b3e19d92
NP
2532
2533 err = register_filesystem(&sock_fs_type);
2534 if (err)
2535 goto out_fs;
1da177e4 2536 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2537 if (IS_ERR(sock_mnt)) {
2538 err = PTR_ERR(sock_mnt);
2539 goto out_mount;
2540 }
77d76ea3
AK
2541
2542 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2543 */
2544
2545#ifdef CONFIG_NETFILTER
2546 netfilter_init();
2547#endif
cbeb321a 2548
c1f19b51
RC
2549#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
2550 skb_timestamping_init();
2551#endif
2552
b3e19d92
NP
2553out:
2554 return err;
2555
2556out_mount:
2557 unregister_filesystem(&sock_fs_type);
2558out_fs:
2559 goto out;
1da177e4
LT
2560}
2561
77d76ea3
AK
2562core_initcall(sock_init); /* early initcall */
2563
1da177e4
LT
2564#ifdef CONFIG_PROC_FS
2565void socket_seq_show(struct seq_file *seq)
2566{
2567 int cpu;
2568 int counter = 0;
2569
6f912042 2570 for_each_possible_cpu(cpu)
89bddce5 2571 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2572
2573 /* It can be negative, by the way. 8) */
2574 if (counter < 0)
2575 counter = 0;
2576
2577 seq_printf(seq, "sockets: used %d\n", counter);
2578}
89bddce5 2579#endif /* CONFIG_PROC_FS */
1da177e4 2580
89bbfc95 2581#ifdef CONFIG_COMPAT
6b96018b
AB
2582static int do_siocgstamp(struct net *net, struct socket *sock,
2583 unsigned int cmd, struct compat_timeval __user *up)
7a229387 2584{
7a229387
AB
2585 mm_segment_t old_fs = get_fs();
2586 struct timeval ktv;
2587 int err;
2588
2589 set_fs(KERNEL_DS);
6b96018b 2590 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387
AB
2591 set_fs(old_fs);
2592 if (!err) {
2593 err = put_user(ktv.tv_sec, &up->tv_sec);
2594 err |= __put_user(ktv.tv_usec, &up->tv_usec);
2595 }
2596 return err;
2597}
2598
6b96018b
AB
2599static int do_siocgstampns(struct net *net, struct socket *sock,
2600 unsigned int cmd, struct compat_timespec __user *up)
7a229387 2601{
7a229387
AB
2602 mm_segment_t old_fs = get_fs();
2603 struct timespec kts;
2604 int err;
2605
2606 set_fs(KERNEL_DS);
6b96018b 2607 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387
AB
2608 set_fs(old_fs);
2609 if (!err) {
2610 err = put_user(kts.tv_sec, &up->tv_sec);
2611 err |= __put_user(kts.tv_nsec, &up->tv_nsec);
2612 }
2613 return err;
2614}
2615
6b96018b 2616static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2617{
2618 struct ifreq __user *uifr;
2619 int err;
2620
2621 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2622 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2623 return -EFAULT;
2624
6b96018b 2625 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2626 if (err)
2627 return err;
2628
6b96018b 2629 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2630 return -EFAULT;
2631
2632 return 0;
2633}
2634
6b96018b 2635static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2636{
6b96018b 2637 struct compat_ifconf ifc32;
7a229387
AB
2638 struct ifconf ifc;
2639 struct ifconf __user *uifc;
6b96018b 2640 struct compat_ifreq __user *ifr32;
7a229387
AB
2641 struct ifreq __user *ifr;
2642 unsigned int i, j;
2643 int err;
2644
6b96018b 2645 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2646 return -EFAULT;
2647
2648 if (ifc32.ifcbuf == 0) {
2649 ifc32.ifc_len = 0;
2650 ifc.ifc_len = 0;
2651 ifc.ifc_req = NULL;
2652 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2653 } else {
c6d409cf
ED
2654 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2655 sizeof(struct ifreq);
7a229387
AB
2656 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2657 ifc.ifc_len = len;
2658 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2659 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2660 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2661 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2662 return -EFAULT;
2663 ifr++;
2664 ifr32++;
2665 }
2666 }
2667 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2668 return -EFAULT;
2669
6b96018b 2670 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2671 if (err)
2672 return err;
2673
2674 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2675 return -EFAULT;
2676
2677 ifr = ifc.ifc_req;
2678 ifr32 = compat_ptr(ifc32.ifcbuf);
2679 for (i = 0, j = 0;
c6d409cf
ED
2680 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2681 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2682 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2683 return -EFAULT;
2684 ifr32++;
2685 ifr++;
2686 }
2687
2688 if (ifc32.ifcbuf == 0) {
2689 /* Translate from 64-bit structure multiple to
2690 * a 32-bit one.
2691 */
2692 i = ifc.ifc_len;
6b96018b 2693 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2694 ifc32.ifc_len = i;
2695 } else {
2696 ifc32.ifc_len = i;
2697 }
6b96018b 2698 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2699 return -EFAULT;
2700
2701 return 0;
2702}
2703
6b96018b 2704static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2705{
3a7da39d
BH
2706 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2707 bool convert_in = false, convert_out = false;
2708 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2709 struct ethtool_rxnfc __user *rxnfc;
7a229387 2710 struct ifreq __user *ifr;
3a7da39d
BH
2711 u32 rule_cnt = 0, actual_rule_cnt;
2712 u32 ethcmd;
7a229387 2713 u32 data;
3a7da39d 2714 int ret;
7a229387 2715
3a7da39d
BH
2716 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2717 return -EFAULT;
7a229387 2718
3a7da39d
BH
2719 compat_rxnfc = compat_ptr(data);
2720
2721 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2722 return -EFAULT;
2723
3a7da39d
BH
2724 /* Most ethtool structures are defined without padding.
2725 * Unfortunately struct ethtool_rxnfc is an exception.
2726 */
2727 switch (ethcmd) {
2728 default:
2729 break;
2730 case ETHTOOL_GRXCLSRLALL:
2731 /* Buffer size is variable */
2732 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2733 return -EFAULT;
2734 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2735 return -ENOMEM;
2736 buf_size += rule_cnt * sizeof(u32);
2737 /* fall through */
2738 case ETHTOOL_GRXRINGS:
2739 case ETHTOOL_GRXCLSRLCNT:
2740 case ETHTOOL_GRXCLSRULE:
2741 convert_out = true;
2742 /* fall through */
2743 case ETHTOOL_SRXCLSRLDEL:
2744 case ETHTOOL_SRXCLSRLINS:
2745 buf_size += sizeof(struct ethtool_rxnfc);
2746 convert_in = true;
2747 break;
2748 }
2749
2750 ifr = compat_alloc_user_space(buf_size);
2751 rxnfc = (void *)ifr + ALIGN(sizeof(struct ifreq), 8);
2752
2753 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2754 return -EFAULT;
2755
3a7da39d
BH
2756 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2757 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2758 return -EFAULT;
2759
3a7da39d 2760 if (convert_in) {
127fe533 2761 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2762 * fs.ring_cookie and at the end of fs, but nowhere else.
2763 */
127fe533
AD
2764 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2765 sizeof(compat_rxnfc->fs.m_ext) !=
2766 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2767 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2768 BUILD_BUG_ON(
2769 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2770 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2771 offsetof(struct ethtool_rxnfc, fs.location) -
2772 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2773
2774 if (copy_in_user(rxnfc, compat_rxnfc,
127fe533 2775 (void *)(&rxnfc->fs.m_ext + 1) -
3a7da39d
BH
2776 (void *)rxnfc) ||
2777 copy_in_user(&rxnfc->fs.ring_cookie,
2778 &compat_rxnfc->fs.ring_cookie,
2779 (void *)(&rxnfc->fs.location + 1) -
2780 (void *)&rxnfc->fs.ring_cookie) ||
2781 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2782 sizeof(rxnfc->rule_cnt)))
2783 return -EFAULT;
2784 }
2785
2786 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2787 if (ret)
2788 return ret;
2789
2790 if (convert_out) {
2791 if (copy_in_user(compat_rxnfc, rxnfc,
127fe533 2792 (const void *)(&rxnfc->fs.m_ext + 1) -
3a7da39d
BH
2793 (const void *)rxnfc) ||
2794 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2795 &rxnfc->fs.ring_cookie,
2796 (const void *)(&rxnfc->fs.location + 1) -
2797 (const void *)&rxnfc->fs.ring_cookie) ||
2798 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2799 sizeof(rxnfc->rule_cnt)))
2800 return -EFAULT;
2801
2802 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2803 /* As an optimisation, we only copy the actual
2804 * number of rules that the underlying
2805 * function returned. Since Mallory might
2806 * change the rule count in user memory, we
2807 * check that it is less than the rule count
2808 * originally given (as the user buffer size),
2809 * which has been range-checked.
2810 */
2811 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2812 return -EFAULT;
2813 if (actual_rule_cnt < rule_cnt)
2814 rule_cnt = actual_rule_cnt;
2815 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2816 &rxnfc->rule_locs[0],
2817 rule_cnt * sizeof(u32)))
2818 return -EFAULT;
2819 }
2820 }
2821
2822 return 0;
7a229387
AB
2823}
2824
7a50a240
AB
2825static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2826{
2827 void __user *uptr;
2828 compat_uptr_t uptr32;
2829 struct ifreq __user *uifr;
2830
c6d409cf 2831 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2832 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2833 return -EFAULT;
2834
2835 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2836 return -EFAULT;
2837
2838 uptr = compat_ptr(uptr32);
2839
2840 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2841 return -EFAULT;
2842
2843 return dev_ioctl(net, SIOCWANDEV, uifr);
2844}
2845
6b96018b
AB
2846static int bond_ioctl(struct net *net, unsigned int cmd,
2847 struct compat_ifreq __user *ifr32)
7a229387
AB
2848{
2849 struct ifreq kifr;
2850 struct ifreq __user *uifr;
7a229387
AB
2851 mm_segment_t old_fs;
2852 int err;
2853 u32 data;
2854 void __user *datap;
2855
2856 switch (cmd) {
2857 case SIOCBONDENSLAVE:
2858 case SIOCBONDRELEASE:
2859 case SIOCBONDSETHWADDR:
2860 case SIOCBONDCHANGEACTIVE:
6b96018b 2861 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2862 return -EFAULT;
2863
2864 old_fs = get_fs();
c6d409cf 2865 set_fs(KERNEL_DS);
c3f52ae6 2866 err = dev_ioctl(net, cmd,
2867 (struct ifreq __user __force *) &kifr);
c6d409cf 2868 set_fs(old_fs);
7a229387
AB
2869
2870 return err;
2871 case SIOCBONDSLAVEINFOQUERY:
2872 case SIOCBONDINFOQUERY:
2873 uifr = compat_alloc_user_space(sizeof(*uifr));
2874 if (copy_in_user(&uifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
2875 return -EFAULT;
2876
2877 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2878 return -EFAULT;
2879
2880 datap = compat_ptr(data);
2881 if (put_user(datap, &uifr->ifr_ifru.ifru_data))
2882 return -EFAULT;
2883
6b96018b 2884 return dev_ioctl(net, cmd, uifr);
7a229387
AB
2885 default:
2886 return -EINVAL;
ccbd6a5a 2887 }
7a229387
AB
2888}
2889
6b96018b
AB
2890static int siocdevprivate_ioctl(struct net *net, unsigned int cmd,
2891 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2892{
2893 struct ifreq __user *u_ifreq64;
7a229387
AB
2894 char tmp_buf[IFNAMSIZ];
2895 void __user *data64;
2896 u32 data32;
2897
2898 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2899 IFNAMSIZ))
2900 return -EFAULT;
2901 if (__get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
2902 return -EFAULT;
2903 data64 = compat_ptr(data32);
2904
2905 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2906
2907 /* Don't check these user accesses, just let that get trapped
2908 * in the ioctl handler instead.
2909 */
2910 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2911 IFNAMSIZ))
2912 return -EFAULT;
2913 if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
2914 return -EFAULT;
2915
6b96018b 2916 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2917}
2918
6b96018b
AB
2919static int dev_ifsioc(struct net *net, struct socket *sock,
2920 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 2921{
a2116ed2 2922 struct ifreq __user *uifr;
7a229387
AB
2923 int err;
2924
a2116ed2
AB
2925 uifr = compat_alloc_user_space(sizeof(*uifr));
2926 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2927 return -EFAULT;
2928
2929 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2930
7a229387
AB
2931 if (!err) {
2932 switch (cmd) {
2933 case SIOCGIFFLAGS:
2934 case SIOCGIFMETRIC:
2935 case SIOCGIFMTU:
2936 case SIOCGIFMEM:
2937 case SIOCGIFHWADDR:
2938 case SIOCGIFINDEX:
2939 case SIOCGIFADDR:
2940 case SIOCGIFBRDADDR:
2941 case SIOCGIFDSTADDR:
2942 case SIOCGIFNETMASK:
fab2532b 2943 case SIOCGIFPFLAGS:
7a229387 2944 case SIOCGIFTXQLEN:
fab2532b
AB
2945 case SIOCGMIIPHY:
2946 case SIOCGMIIREG:
a2116ed2 2947 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
2948 err = -EFAULT;
2949 break;
2950 }
2951 }
2952 return err;
2953}
2954
a2116ed2
AB
2955static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2956 struct compat_ifreq __user *uifr32)
2957{
2958 struct ifreq ifr;
2959 struct compat_ifmap __user *uifmap32;
2960 mm_segment_t old_fs;
2961 int err;
2962
2963 uifmap32 = &uifr32->ifr_ifru.ifru_map;
2964 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
2965 err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2966 err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2967 err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2968 err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq);
2969 err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma);
2970 err |= __get_user(ifr.ifr_map.port, &uifmap32->port);
2971 if (err)
2972 return -EFAULT;
2973
2974 old_fs = get_fs();
c6d409cf 2975 set_fs(KERNEL_DS);
c3f52ae6 2976 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 2977 set_fs(old_fs);
a2116ed2
AB
2978
2979 if (cmd == SIOCGIFMAP && !err) {
2980 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
2981 err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2982 err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2983 err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2984 err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq);
2985 err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma);
2986 err |= __put_user(ifr.ifr_map.port, &uifmap32->port);
2987 if (err)
2988 err = -EFAULT;
2989 }
2990 return err;
2991}
2992
2993static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uifr32)
2994{
2995 void __user *uptr;
2996 compat_uptr_t uptr32;
2997 struct ifreq __user *uifr;
2998
c6d409cf 2999 uifr = compat_alloc_user_space(sizeof(*uifr));
a2116ed2
AB
3000 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
3001 return -EFAULT;
3002
3003 if (get_user(uptr32, &uifr32->ifr_data))
3004 return -EFAULT;
3005
3006 uptr = compat_ptr(uptr32);
3007
3008 if (put_user(uptr, &uifr->ifr_data))
3009 return -EFAULT;
3010
3011 return dev_ioctl(net, SIOCSHWTSTAMP, uifr);
3012}
3013
7a229387 3014struct rtentry32 {
c6d409cf 3015 u32 rt_pad1;
7a229387
AB
3016 struct sockaddr rt_dst; /* target address */
3017 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3018 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3019 unsigned short rt_flags;
3020 short rt_pad2;
3021 u32 rt_pad3;
3022 unsigned char rt_tos;
3023 unsigned char rt_class;
3024 short rt_pad4;
3025 short rt_metric; /* +1 for binary compatibility! */
7a229387 3026 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3027 u32 rt_mtu; /* per route MTU/Window */
3028 u32 rt_window; /* Window clamping */
7a229387
AB
3029 unsigned short rt_irtt; /* Initial RTT */
3030};
3031
3032struct in6_rtmsg32 {
3033 struct in6_addr rtmsg_dst;
3034 struct in6_addr rtmsg_src;
3035 struct in6_addr rtmsg_gateway;
3036 u32 rtmsg_type;
3037 u16 rtmsg_dst_len;
3038 u16 rtmsg_src_len;
3039 u32 rtmsg_metric;
3040 u32 rtmsg_info;
3041 u32 rtmsg_flags;
3042 s32 rtmsg_ifindex;
3043};
3044
6b96018b
AB
3045static int routing_ioctl(struct net *net, struct socket *sock,
3046 unsigned int cmd, void __user *argp)
7a229387
AB
3047{
3048 int ret;
3049 void *r = NULL;
3050 struct in6_rtmsg r6;
3051 struct rtentry r4;
3052 char devname[16];
3053 u32 rtdev;
3054 mm_segment_t old_fs = get_fs();
3055
6b96018b
AB
3056 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3057 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3058 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3059 3 * sizeof(struct in6_addr));
c6d409cf
ED
3060 ret |= __get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3061 ret |= __get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3062 ret |= __get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3063 ret |= __get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3064 ret |= __get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3065 ret |= __get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3066 ret |= __get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3067
3068 r = (void *) &r6;
3069 } else { /* ipv4 */
6b96018b 3070 struct rtentry32 __user *ur4 = argp;
c6d409cf 3071 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3072 3 * sizeof(struct sockaddr));
c6d409cf
ED
3073 ret |= __get_user(r4.rt_flags, &(ur4->rt_flags));
3074 ret |= __get_user(r4.rt_metric, &(ur4->rt_metric));
3075 ret |= __get_user(r4.rt_mtu, &(ur4->rt_mtu));
3076 ret |= __get_user(r4.rt_window, &(ur4->rt_window));
3077 ret |= __get_user(r4.rt_irtt, &(ur4->rt_irtt));
3078 ret |= __get_user(rtdev, &(ur4->rt_dev));
7a229387 3079 if (rtdev) {
c6d409cf 3080 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3081 r4.rt_dev = (char __user __force *)devname;
3082 devname[15] = 0;
7a229387
AB
3083 } else
3084 r4.rt_dev = NULL;
3085
3086 r = (void *) &r4;
3087 }
3088
3089 if (ret) {
3090 ret = -EFAULT;
3091 goto out;
3092 }
3093
c6d409cf 3094 set_fs(KERNEL_DS);
6b96018b 3095 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3096 set_fs(old_fs);
7a229387
AB
3097
3098out:
7a229387
AB
3099 return ret;
3100}
3101
3102/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3103 * for some operations; this forces use of the newer bridge-utils that
25985edc 3104 * use compatible ioctls
7a229387 3105 */
6b96018b 3106static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3107{
6b96018b 3108 compat_ulong_t tmp;
7a229387 3109
6b96018b 3110 if (get_user(tmp, argp))
7a229387
AB
3111 return -EFAULT;
3112 if (tmp == BRCTL_GET_VERSION)
3113 return BRCTL_VERSION + 1;
3114 return -EINVAL;
3115}
3116
6b96018b
AB
3117static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3118 unsigned int cmd, unsigned long arg)
3119{
3120 void __user *argp = compat_ptr(arg);
3121 struct sock *sk = sock->sk;
3122 struct net *net = sock_net(sk);
7a229387 3123
6b96018b
AB
3124 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
3125 return siocdevprivate_ioctl(net, cmd, argp);
3126
3127 switch (cmd) {
3128 case SIOCSIFBR:
3129 case SIOCGIFBR:
3130 return old_bridge_ioctl(argp);
3131 case SIOCGIFNAME:
3132 return dev_ifname32(net, argp);
3133 case SIOCGIFCONF:
3134 return dev_ifconf(net, argp);
3135 case SIOCETHTOOL:
3136 return ethtool_ioctl(net, argp);
7a50a240
AB
3137 case SIOCWANDEV:
3138 return compat_siocwandev(net, argp);
a2116ed2
AB
3139 case SIOCGIFMAP:
3140 case SIOCSIFMAP:
3141 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3142 case SIOCBONDENSLAVE:
3143 case SIOCBONDRELEASE:
3144 case SIOCBONDSETHWADDR:
3145 case SIOCBONDSLAVEINFOQUERY:
3146 case SIOCBONDINFOQUERY:
3147 case SIOCBONDCHANGEACTIVE:
3148 return bond_ioctl(net, cmd, argp);
3149 case SIOCADDRT:
3150 case SIOCDELRT:
3151 return routing_ioctl(net, sock, cmd, argp);
3152 case SIOCGSTAMP:
3153 return do_siocgstamp(net, sock, cmd, argp);
3154 case SIOCGSTAMPNS:
3155 return do_siocgstampns(net, sock, cmd, argp);
a2116ed2
AB
3156 case SIOCSHWTSTAMP:
3157 return compat_siocshwtstamp(net, argp);
6b96018b
AB
3158
3159 case FIOSETOWN:
3160 case SIOCSPGRP:
3161 case FIOGETOWN:
3162 case SIOCGPGRP:
3163 case SIOCBRADDBR:
3164 case SIOCBRDELBR:
3165 case SIOCGIFVLAN:
3166 case SIOCSIFVLAN:
3167 case SIOCADDDLCI:
3168 case SIOCDELDLCI:
3169 return sock_ioctl(file, cmd, arg);
3170
3171 case SIOCGIFFLAGS:
3172 case SIOCSIFFLAGS:
3173 case SIOCGIFMETRIC:
3174 case SIOCSIFMETRIC:
3175 case SIOCGIFMTU:
3176 case SIOCSIFMTU:
3177 case SIOCGIFMEM:
3178 case SIOCSIFMEM:
3179 case SIOCGIFHWADDR:
3180 case SIOCSIFHWADDR:
3181 case SIOCADDMULTI:
3182 case SIOCDELMULTI:
3183 case SIOCGIFINDEX:
6b96018b
AB
3184 case SIOCGIFADDR:
3185 case SIOCSIFADDR:
3186 case SIOCSIFHWBROADCAST:
6b96018b 3187 case SIOCDIFADDR:
6b96018b
AB
3188 case SIOCGIFBRDADDR:
3189 case SIOCSIFBRDADDR:
3190 case SIOCGIFDSTADDR:
3191 case SIOCSIFDSTADDR:
3192 case SIOCGIFNETMASK:
3193 case SIOCSIFNETMASK:
3194 case SIOCSIFPFLAGS:
3195 case SIOCGIFPFLAGS:
3196 case SIOCGIFTXQLEN:
3197 case SIOCSIFTXQLEN:
3198 case SIOCBRADDIF:
3199 case SIOCBRDELIF:
9177efd3
AB
3200 case SIOCSIFNAME:
3201 case SIOCGMIIPHY:
3202 case SIOCGMIIREG:
3203 case SIOCSMIIREG:
6b96018b 3204 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3205
6b96018b
AB
3206 case SIOCSARP:
3207 case SIOCGARP:
3208 case SIOCDARP:
6b96018b 3209 case SIOCATMARK:
9177efd3
AB
3210 return sock_do_ioctl(net, sock, cmd, arg);
3211 }
3212
3213 /* Prevent warning from compat_sys_ioctl, these always
3214 * result in -EINVAL in the native case anyway. */
3215 switch (cmd) {
3216 case SIOCRTMSG:
3217 case SIOCGIFCOUNT:
6b96018b
AB
3218 case SIOCSRARP:
3219 case SIOCGRARP:
3220 case SIOCDRARP:
9177efd3
AB
3221 case SIOCSIFLINK:
3222 case SIOCGIFSLAVE:
3223 case SIOCSIFSLAVE:
3224 return -EINVAL;
6b96018b
AB
3225 }
3226
3227 return -ENOIOCTLCMD;
3228}
7a229387 3229
89bbfc95 3230static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 3231 unsigned long arg)
89bbfc95
SP
3232{
3233 struct socket *sock = file->private_data;
3234 int ret = -ENOIOCTLCMD;
87de87d5
DM
3235 struct sock *sk;
3236 struct net *net;
3237
3238 sk = sock->sk;
3239 net = sock_net(sk);
89bbfc95
SP
3240
3241 if (sock->ops->compat_ioctl)
3242 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3243
87de87d5
DM
3244 if (ret == -ENOIOCTLCMD &&
3245 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3246 ret = compat_wext_handle_ioctl(net, cmd, arg);
3247
6b96018b
AB
3248 if (ret == -ENOIOCTLCMD)
3249 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3250
89bbfc95
SP
3251 return ret;
3252}
3253#endif
3254
ac5a488e
SS
3255int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3256{
3257 return sock->ops->bind(sock, addr, addrlen);
3258}
c6d409cf 3259EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3260
3261int kernel_listen(struct socket *sock, int backlog)
3262{
3263 return sock->ops->listen(sock, backlog);
3264}
c6d409cf 3265EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3266
3267int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3268{
3269 struct sock *sk = sock->sk;
3270 int err;
3271
3272 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3273 newsock);
3274 if (err < 0)
3275 goto done;
3276
3277 err = sock->ops->accept(sock, *newsock, flags);
3278 if (err < 0) {
3279 sock_release(*newsock);
fa8705b0 3280 *newsock = NULL;
ac5a488e
SS
3281 goto done;
3282 }
3283
3284 (*newsock)->ops = sock->ops;
1b08534e 3285 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3286
3287done:
3288 return err;
3289}
c6d409cf 3290EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3291
3292int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3293 int flags)
ac5a488e
SS
3294{
3295 return sock->ops->connect(sock, addr, addrlen, flags);
3296}
c6d409cf 3297EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3298
3299int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3300 int *addrlen)
3301{
3302 return sock->ops->getname(sock, addr, addrlen, 0);
3303}
c6d409cf 3304EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3305
3306int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3307 int *addrlen)
3308{
3309 return sock->ops->getname(sock, addr, addrlen, 1);
3310}
c6d409cf 3311EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3312
3313int kernel_getsockopt(struct socket *sock, int level, int optname,
3314 char *optval, int *optlen)
3315{
3316 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3317 char __user *uoptval;
3318 int __user *uoptlen;
ac5a488e
SS
3319 int err;
3320
fb8621bb
NK
3321 uoptval = (char __user __force *) optval;
3322 uoptlen = (int __user __force *) optlen;
3323
ac5a488e
SS
3324 set_fs(KERNEL_DS);
3325 if (level == SOL_SOCKET)
fb8621bb 3326 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3327 else
fb8621bb
NK
3328 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3329 uoptlen);
ac5a488e
SS
3330 set_fs(oldfs);
3331 return err;
3332}
c6d409cf 3333EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3334
3335int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3336 char *optval, unsigned int optlen)
ac5a488e
SS
3337{
3338 mm_segment_t oldfs = get_fs();
fb8621bb 3339 char __user *uoptval;
ac5a488e
SS
3340 int err;
3341
fb8621bb
NK
3342 uoptval = (char __user __force *) optval;
3343
ac5a488e
SS
3344 set_fs(KERNEL_DS);
3345 if (level == SOL_SOCKET)
fb8621bb 3346 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3347 else
fb8621bb 3348 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3349 optlen);
3350 set_fs(oldfs);
3351 return err;
3352}
c6d409cf 3353EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3354
3355int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3356 size_t size, int flags)
3357{
f8451725
HX
3358 sock_update_classid(sock->sk);
3359
ac5a488e
SS
3360 if (sock->ops->sendpage)
3361 return sock->ops->sendpage(sock, page, offset, size, flags);
3362
3363 return sock_no_sendpage(sock, page, offset, size, flags);
3364}
c6d409cf 3365EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3366
3367int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3368{
3369 mm_segment_t oldfs = get_fs();
3370 int err;
3371
3372 set_fs(KERNEL_DS);
3373 err = sock->ops->ioctl(sock, cmd, arg);
3374 set_fs(oldfs);
3375
3376 return err;
3377}
c6d409cf 3378EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3379
91cf45f0
TM
3380int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3381{
3382 return sock->ops->shutdown(sock, how);
3383}
91cf45f0 3384EXPORT_SYMBOL(kernel_sock_shutdown);