DM9000: Wake on LAN support
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4
LT
72#include <linux/wanrouter.h>
73#include <linux/if_bridge.h>
20380731
ACM
74#include <linux/if_frad.h>
75#include <linux/if_vlan.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
1da177e4
LT
90
91#include <asm/uaccess.h>
92#include <asm/unistd.h>
93
94#include <net/compat.h>
87de87d5 95#include <net/wext.h>
1da177e4
LT
96
97#include <net/sock.h>
98#include <linux/netfilter.h>
99
6b96018b
AB
100#include <linux/if_tun.h>
101#include <linux/ipv6_route.h>
102#include <linux/route.h>
103#include <linux/atmdev.h>
104#include <linux/atmarp.h>
105#include <linux/atmsvc.h>
106#include <linux/atmlec.h>
107#include <linux/atmclip.h>
108#include <linux/atmmpc.h>
109#include <linux/atm_tcp.h>
110#include <linux/sonet.h>
111#include <linux/sockios.h>
112#include <linux/atalk.h>
113
1da177e4 114static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
115static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
116 unsigned long nr_segs, loff_t pos);
117static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
118 unsigned long nr_segs, loff_t pos);
89bddce5 119static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
120
121static int sock_close(struct inode *inode, struct file *file);
122static unsigned int sock_poll(struct file *file,
123 struct poll_table_struct *wait);
89bddce5 124static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
125#ifdef CONFIG_COMPAT
126static long compat_sock_ioctl(struct file *file,
89bddce5 127 unsigned int cmd, unsigned long arg);
89bbfc95 128#endif
1da177e4 129static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
130static ssize_t sock_sendpage(struct file *file, struct page *page,
131 int offset, size_t size, loff_t *ppos, int more);
9c55e01c
JA
132static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
133 struct pipe_inode_info *pipe, size_t len,
134 unsigned int flags);
1da177e4 135
1da177e4
LT
136/*
137 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
138 * in the operation structures but are done directly via the socketcall() multiplexor.
139 */
140
da7071d7 141static const struct file_operations socket_file_ops = {
1da177e4
LT
142 .owner = THIS_MODULE,
143 .llseek = no_llseek,
144 .aio_read = sock_aio_read,
145 .aio_write = sock_aio_write,
146 .poll = sock_poll,
147 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
148#ifdef CONFIG_COMPAT
149 .compat_ioctl = compat_sock_ioctl,
150#endif
1da177e4
LT
151 .mmap = sock_mmap,
152 .open = sock_no_open, /* special open code to disallow open via /proc */
153 .release = sock_close,
154 .fasync = sock_fasync,
5274f052
JA
155 .sendpage = sock_sendpage,
156 .splice_write = generic_splice_sendpage,
9c55e01c 157 .splice_read = sock_splice_read,
1da177e4
LT
158};
159
160/*
161 * The protocol list. Each protocol is registered in here.
162 */
163
1da177e4 164static DEFINE_SPINLOCK(net_family_lock);
f0fd27d4 165static const struct net_proto_family *net_families[NPROTO] __read_mostly;
1da177e4 166
1da177e4
LT
167/*
168 * Statistics counters of the socket lists
169 */
170
171static DEFINE_PER_CPU(int, sockets_in_use) = 0;
172
173/*
89bddce5
SH
174 * Support routines.
175 * Move socket addresses back and forth across the kernel/user
176 * divide and look after the messy bits.
1da177e4
LT
177 */
178
89bddce5 179#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
180 16 for IP, 16 for IPX,
181 24 for IPv6,
89bddce5 182 about 80 for AX.25
1da177e4
LT
183 must be at least one bigger than
184 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 185 :unix_mkname()).
1da177e4 186 */
89bddce5 187
1da177e4
LT
188/**
189 * move_addr_to_kernel - copy a socket address into kernel space
190 * @uaddr: Address in user space
191 * @kaddr: Address in kernel space
192 * @ulen: Length in user space
193 *
194 * The address is copied into kernel space. If the provided address is
195 * too long an error code of -EINVAL is returned. If the copy gives
196 * invalid addresses -EFAULT is returned. On a success 0 is returned.
197 */
198
230b1839 199int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr)
1da177e4 200{
230b1839 201 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 202 return -EINVAL;
89bddce5 203 if (ulen == 0)
1da177e4 204 return 0;
89bddce5 205 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 206 return -EFAULT;
3ec3b2fb 207 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
208}
209
210/**
211 * move_addr_to_user - copy an address to user space
212 * @kaddr: kernel space address
213 * @klen: length of address in kernel
214 * @uaddr: user space address
215 * @ulen: pointer to user length field
216 *
217 * The value pointed to by ulen on entry is the buffer length available.
218 * This is overwritten with the buffer space used. -EINVAL is returned
219 * if an overlong buffer is specified or a negative buffer size. -EFAULT
220 * is returned if either the buffer or the length field are not
221 * accessible.
222 * After copying the data up to the limit the user specifies, the true
223 * length of the data is written over the length limit the user
224 * specified. Zero is returned for a success.
225 */
89bddce5 226
230b1839 227int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr,
89bddce5 228 int __user *ulen)
1da177e4
LT
229{
230 int err;
231 int len;
232
89bddce5
SH
233 err = get_user(len, ulen);
234 if (err)
1da177e4 235 return err;
89bddce5
SH
236 if (len > klen)
237 len = klen;
230b1839 238 if (len < 0 || len > sizeof(struct sockaddr_storage))
1da177e4 239 return -EINVAL;
89bddce5 240 if (len) {
d6fe3945
SG
241 if (audit_sockaddr(klen, kaddr))
242 return -ENOMEM;
89bddce5 243 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
244 return -EFAULT;
245 }
246 /*
89bddce5
SH
247 * "fromlen shall refer to the value before truncation.."
248 * 1003.1g
1da177e4
LT
249 */
250 return __put_user(klen, ulen);
251}
252
e18b890b 253static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
254
255static struct inode *sock_alloc_inode(struct super_block *sb)
256{
257 struct socket_alloc *ei;
89bddce5 258
e94b1766 259 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
260 if (!ei)
261 return NULL;
262 init_waitqueue_head(&ei->socket.wait);
89bddce5 263
1da177e4
LT
264 ei->socket.fasync_list = NULL;
265 ei->socket.state = SS_UNCONNECTED;
266 ei->socket.flags = 0;
267 ei->socket.ops = NULL;
268 ei->socket.sk = NULL;
269 ei->socket.file = NULL;
1da177e4
LT
270
271 return &ei->vfs_inode;
272}
273
274static void sock_destroy_inode(struct inode *inode)
275{
276 kmem_cache_free(sock_inode_cachep,
277 container_of(inode, struct socket_alloc, vfs_inode));
278}
279
51cc5068 280static void init_once(void *foo)
1da177e4 281{
89bddce5 282 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 283
a35afb83 284 inode_init_once(&ei->vfs_inode);
1da177e4 285}
89bddce5 286
1da177e4
LT
287static int init_inodecache(void)
288{
289 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
290 sizeof(struct socket_alloc),
291 0,
292 (SLAB_HWCACHE_ALIGN |
293 SLAB_RECLAIM_ACCOUNT |
294 SLAB_MEM_SPREAD),
20c2df83 295 init_once);
1da177e4
LT
296 if (sock_inode_cachep == NULL)
297 return -ENOMEM;
298 return 0;
299}
300
b87221de 301static const struct super_operations sockfs_ops = {
1da177e4
LT
302 .alloc_inode = sock_alloc_inode,
303 .destroy_inode =sock_destroy_inode,
304 .statfs = simple_statfs,
305};
306
454e2398 307static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
308 int flags, const char *dev_name, void *data,
309 struct vfsmount *mnt)
1da177e4 310{
454e2398
DH
311 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
312 mnt);
1da177e4
LT
313}
314
ba89966c 315static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
316
317static struct file_system_type sock_fs_type = {
318 .name = "sockfs",
319 .get_sb = sockfs_get_sb,
320 .kill_sb = kill_anon_super,
321};
89bddce5 322
1da177e4
LT
323static int sockfs_delete_dentry(struct dentry *dentry)
324{
304e61e6
ED
325 /*
326 * At creation time, we pretended this dentry was hashed
327 * (by clearing DCACHE_UNHASHED bit in d_flags)
328 * At delete time, we restore the truth : not hashed.
329 * (so that dput() can proceed correctly)
330 */
331 dentry->d_flags |= DCACHE_UNHASHED;
332 return 0;
1da177e4 333}
c23fbb6b
ED
334
335/*
336 * sockfs_dname() is called from d_path().
337 */
338static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
339{
340 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
341 dentry->d_inode->i_ino);
342}
343
3ba13d17 344static const struct dentry_operations sockfs_dentry_operations = {
89bddce5 345 .d_delete = sockfs_delete_dentry,
c23fbb6b 346 .d_dname = sockfs_dname,
1da177e4
LT
347};
348
349/*
350 * Obtains the first available file descriptor and sets it up for use.
351 *
39d8c1b6
DM
352 * These functions create file structures and maps them to fd space
353 * of the current process. On success it returns file descriptor
1da177e4
LT
354 * and file struct implicitly stored in sock->file.
355 * Note that another thread may close file descriptor before we return
356 * from this function. We use the fact that now we do not refer
357 * to socket after mapping. If one day we will need it, this
358 * function will increment ref. count on file by 1.
359 *
360 * In any case returned fd MAY BE not valid!
361 * This race condition is unavoidable
362 * with shared fd spaces, we cannot solve it inside kernel,
363 * but we take care of internal coherence yet.
364 */
365
a677a039 366static int sock_alloc_fd(struct file **filep, int flags)
1da177e4
LT
367{
368 int fd;
1da177e4 369
a677a039 370 fd = get_unused_fd_flags(flags);
39d8c1b6 371 if (likely(fd >= 0)) {
1da177e4
LT
372 struct file *file = get_empty_filp();
373
39d8c1b6
DM
374 *filep = file;
375 if (unlikely(!file)) {
1da177e4 376 put_unused_fd(fd);
39d8c1b6 377 return -ENFILE;
1da177e4 378 }
39d8c1b6
DM
379 } else
380 *filep = NULL;
381 return fd;
382}
1da177e4 383
77d27200 384static int sock_attach_fd(struct socket *sock, struct file *file, int flags)
39d8c1b6 385{
ce8d2cdf 386 struct dentry *dentry;
c23fbb6b 387 struct qstr name = { .name = "" };
39d8c1b6 388
ce8d2cdf
DH
389 dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
390 if (unlikely(!dentry))
39d8c1b6
DM
391 return -ENOMEM;
392
ce8d2cdf 393 dentry->d_op = &sockfs_dentry_operations;
304e61e6
ED
394 /*
395 * We dont want to push this dentry into global dentry hash table.
396 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
397 * This permits a working /proc/$pid/fd/XXX on sockets
398 */
ce8d2cdf
DH
399 dentry->d_flags &= ~DCACHE_UNHASHED;
400 d_instantiate(dentry, SOCK_INODE(sock));
39d8c1b6
DM
401
402 sock->file = file;
ce8d2cdf
DH
403 init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE,
404 &socket_file_ops);
405 SOCK_INODE(sock)->i_fop = &socket_file_ops;
77d27200 406 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6
DM
407 file->f_pos = 0;
408 file->private_data = sock;
1da177e4 409
39d8c1b6
DM
410 return 0;
411}
412
a677a039 413int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
414{
415 struct file *newfile;
a677a039 416 int fd = sock_alloc_fd(&newfile, flags);
39d8c1b6
DM
417
418 if (likely(fd >= 0)) {
77d27200 419 int err = sock_attach_fd(sock, newfile, flags);
39d8c1b6
DM
420
421 if (unlikely(err < 0)) {
422 put_filp(newfile);
1da177e4 423 put_unused_fd(fd);
39d8c1b6 424 return err;
1da177e4 425 }
39d8c1b6 426 fd_install(fd, newfile);
1da177e4 427 }
1da177e4
LT
428 return fd;
429}
430
6cb153ca
BL
431static struct socket *sock_from_file(struct file *file, int *err)
432{
6cb153ca
BL
433 if (file->f_op == &socket_file_ops)
434 return file->private_data; /* set in sock_map_fd */
435
23bb80d2
ED
436 *err = -ENOTSOCK;
437 return NULL;
6cb153ca
BL
438}
439
1da177e4
LT
440/**
441 * sockfd_lookup - Go from a file number to its socket slot
442 * @fd: file handle
443 * @err: pointer to an error code return
444 *
445 * The file handle passed in is locked and the socket it is bound
446 * too is returned. If an error occurs the err pointer is overwritten
447 * with a negative errno code and NULL is returned. The function checks
448 * for both invalid handles and passing a handle which is not a socket.
449 *
450 * On a success the socket object pointer is returned.
451 */
452
453struct socket *sockfd_lookup(int fd, int *err)
454{
455 struct file *file;
1da177e4
LT
456 struct socket *sock;
457
89bddce5
SH
458 file = fget(fd);
459 if (!file) {
1da177e4
LT
460 *err = -EBADF;
461 return NULL;
462 }
89bddce5 463
6cb153ca
BL
464 sock = sock_from_file(file, err);
465 if (!sock)
1da177e4 466 fput(file);
6cb153ca
BL
467 return sock;
468}
1da177e4 469
6cb153ca
BL
470static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
471{
472 struct file *file;
473 struct socket *sock;
474
3672558c 475 *err = -EBADF;
6cb153ca
BL
476 file = fget_light(fd, fput_needed);
477 if (file) {
478 sock = sock_from_file(file, err);
479 if (sock)
480 return sock;
481 fput_light(file, *fput_needed);
1da177e4 482 }
6cb153ca 483 return NULL;
1da177e4
LT
484}
485
486/**
487 * sock_alloc - allocate a socket
89bddce5 488 *
1da177e4
LT
489 * Allocate a new inode and socket object. The two are bound together
490 * and initialised. The socket is then returned. If we are out of inodes
491 * NULL is returned.
492 */
493
494static struct socket *sock_alloc(void)
495{
89bddce5
SH
496 struct inode *inode;
497 struct socket *sock;
1da177e4
LT
498
499 inode = new_inode(sock_mnt->mnt_sb);
500 if (!inode)
501 return NULL;
502
503 sock = SOCKET_I(inode);
504
29a020d3 505 kmemcheck_annotate_bitfield(sock, type);
89bddce5 506 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
507 inode->i_uid = current_fsuid();
508 inode->i_gid = current_fsgid();
1da177e4 509
4e69489a 510 percpu_add(sockets_in_use, 1);
1da177e4
LT
511 return sock;
512}
513
514/*
515 * In theory you can't get an open on this inode, but /proc provides
516 * a back door. Remember to keep it shut otherwise you'll let the
517 * creepy crawlies in.
518 */
89bddce5 519
1da177e4
LT
520static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
521{
522 return -ENXIO;
523}
524
4b6f5d20 525const struct file_operations bad_sock_fops = {
1da177e4
LT
526 .owner = THIS_MODULE,
527 .open = sock_no_open,
528};
529
530/**
531 * sock_release - close a socket
532 * @sock: socket to close
533 *
534 * The socket is released from the protocol stack if it has a release
535 * callback, and the inode is then released if the socket is bound to
89bddce5 536 * an inode not a file.
1da177e4 537 */
89bddce5 538
1da177e4
LT
539void sock_release(struct socket *sock)
540{
541 if (sock->ops) {
542 struct module *owner = sock->ops->owner;
543
544 sock->ops->release(sock);
545 sock->ops = NULL;
546 module_put(owner);
547 }
548
549 if (sock->fasync_list)
550 printk(KERN_ERR "sock_release: fasync list not empty!\n");
551
4e69489a 552 percpu_sub(sockets_in_use, 1);
1da177e4
LT
553 if (!sock->file) {
554 iput(SOCK_INODE(sock));
555 return;
556 }
89bddce5 557 sock->file = NULL;
1da177e4
LT
558}
559
20d49473
PO
560int sock_tx_timestamp(struct msghdr *msg, struct sock *sk,
561 union skb_shared_tx *shtx)
562{
563 shtx->flags = 0;
564 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
565 shtx->hardware = 1;
566 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
567 shtx->software = 1;
568 return 0;
569}
570EXPORT_SYMBOL(sock_tx_timestamp);
571
89bddce5 572static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
573 struct msghdr *msg, size_t size)
574{
575 struct sock_iocb *si = kiocb_to_siocb(iocb);
576 int err;
577
578 si->sock = sock;
579 si->scm = NULL;
580 si->msg = msg;
581 si->size = size;
582
583 err = security_socket_sendmsg(sock, msg, size);
584 if (err)
585 return err;
586
587 return sock->ops->sendmsg(iocb, sock, msg, size);
588}
589
590int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
591{
592 struct kiocb iocb;
593 struct sock_iocb siocb;
594 int ret;
595
596 init_sync_kiocb(&iocb, NULL);
597 iocb.private = &siocb;
598 ret = __sock_sendmsg(&iocb, sock, msg, size);
599 if (-EIOCBQUEUED == ret)
600 ret = wait_on_sync_kiocb(&iocb);
601 return ret;
602}
603
604int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
605 struct kvec *vec, size_t num, size_t size)
606{
607 mm_segment_t oldfs = get_fs();
608 int result;
609
610 set_fs(KERNEL_DS);
611 /*
612 * the following is safe, since for compiler definitions of kvec and
613 * iovec are identical, yielding the same in-core layout and alignment
614 */
89bddce5 615 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
616 msg->msg_iovlen = num;
617 result = sock_sendmsg(sock, msg, size);
618 set_fs(oldfs);
619 return result;
620}
621
20d49473
PO
622static int ktime2ts(ktime_t kt, struct timespec *ts)
623{
624 if (kt.tv64) {
625 *ts = ktime_to_timespec(kt);
626 return 1;
627 } else {
628 return 0;
629 }
630}
631
92f37fd2
ED
632/*
633 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
634 */
635void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
636 struct sk_buff *skb)
637{
20d49473
PO
638 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
639 struct timespec ts[3];
640 int empty = 1;
641 struct skb_shared_hwtstamps *shhwtstamps =
642 skb_hwtstamps(skb);
643
644 /* Race occurred between timestamp enabling and packet
645 receiving. Fill in the current time for now. */
646 if (need_software_tstamp && skb->tstamp.tv64 == 0)
647 __net_timestamp(skb);
648
649 if (need_software_tstamp) {
650 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
651 struct timeval tv;
652 skb_get_timestamp(skb, &tv);
653 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
654 sizeof(tv), &tv);
655 } else {
656 struct timespec ts;
657 skb_get_timestampns(skb, &ts);
658 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
659 sizeof(ts), &ts);
660 }
661 }
662
663
664 memset(ts, 0, sizeof(ts));
665 if (skb->tstamp.tv64 &&
666 sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) {
667 skb_get_timestampns(skb, ts + 0);
668 empty = 0;
669 }
670 if (shhwtstamps) {
671 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
672 ktime2ts(shhwtstamps->syststamp, ts + 1))
673 empty = 0;
674 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
675 ktime2ts(shhwtstamps->hwtstamp, ts + 2))
676 empty = 0;
92f37fd2 677 }
20d49473
PO
678 if (!empty)
679 put_cmsg(msg, SOL_SOCKET,
680 SCM_TIMESTAMPING, sizeof(ts), &ts);
92f37fd2
ED
681}
682
7c81fd8b
ACM
683EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
684
3b885787
NH
685inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
686{
687 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
688 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
689 sizeof(__u32), &skb->dropcount);
690}
691
692void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
693 struct sk_buff *skb)
694{
695 sock_recv_timestamp(msg, sk, skb);
696 sock_recv_drops(msg, sk, skb);
697}
698EXPORT_SYMBOL_GPL(sock_recv_ts_and_drops);
699
a2e27255
ACM
700static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
701 struct msghdr *msg, size_t size, int flags)
1da177e4 702{
1da177e4
LT
703 struct sock_iocb *si = kiocb_to_siocb(iocb);
704
705 si->sock = sock;
706 si->scm = NULL;
707 si->msg = msg;
708 si->size = size;
709 si->flags = flags;
710
1da177e4
LT
711 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
712}
713
a2e27255
ACM
714static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
715 struct msghdr *msg, size_t size, int flags)
716{
717 int err = security_socket_recvmsg(sock, msg, size, flags);
718
719 return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
720}
721
89bddce5 722int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
723 size_t size, int flags)
724{
725 struct kiocb iocb;
726 struct sock_iocb siocb;
727 int ret;
728
89bddce5 729 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
730 iocb.private = &siocb;
731 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
732 if (-EIOCBQUEUED == ret)
733 ret = wait_on_sync_kiocb(&iocb);
734 return ret;
735}
736
a2e27255
ACM
737static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
738 size_t size, int flags)
739{
740 struct kiocb iocb;
741 struct sock_iocb siocb;
742 int ret;
743
744 init_sync_kiocb(&iocb, NULL);
745 iocb.private = &siocb;
746 ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
747 if (-EIOCBQUEUED == ret)
748 ret = wait_on_sync_kiocb(&iocb);
749 return ret;
750}
751
89bddce5
SH
752int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
753 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
754{
755 mm_segment_t oldfs = get_fs();
756 int result;
757
758 set_fs(KERNEL_DS);
759 /*
760 * the following is safe, since for compiler definitions of kvec and
761 * iovec are identical, yielding the same in-core layout and alignment
762 */
89bddce5 763 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
764 result = sock_recvmsg(sock, msg, size, flags);
765 set_fs(oldfs);
766 return result;
767}
768
769static void sock_aio_dtor(struct kiocb *iocb)
770{
771 kfree(iocb->private);
772}
773
ce1d4d3e
CH
774static ssize_t sock_sendpage(struct file *file, struct page *page,
775 int offset, size_t size, loff_t *ppos, int more)
1da177e4 776{
1da177e4
LT
777 struct socket *sock;
778 int flags;
779
ce1d4d3e
CH
780 sock = file->private_data;
781
782 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
783 if (more)
784 flags |= MSG_MORE;
785
e6949583 786 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 787}
1da177e4 788
9c55e01c
JA
789static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
790 struct pipe_inode_info *pipe, size_t len,
791 unsigned int flags)
792{
793 struct socket *sock = file->private_data;
794
997b37da
RDC
795 if (unlikely(!sock->ops->splice_read))
796 return -EINVAL;
797
9c55e01c
JA
798 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
799}
800
ce1d4d3e 801static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 802 struct sock_iocb *siocb)
ce1d4d3e
CH
803{
804 if (!is_sync_kiocb(iocb)) {
805 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
806 if (!siocb)
807 return NULL;
1da177e4
LT
808 iocb->ki_dtor = sock_aio_dtor;
809 }
1da177e4 810
ce1d4d3e 811 siocb->kiocb = iocb;
ce1d4d3e
CH
812 iocb->private = siocb;
813 return siocb;
1da177e4
LT
814}
815
ce1d4d3e 816static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
817 struct file *file, const struct iovec *iov,
818 unsigned long nr_segs)
ce1d4d3e
CH
819{
820 struct socket *sock = file->private_data;
821 size_t size = 0;
822 int i;
1da177e4 823
89bddce5
SH
824 for (i = 0; i < nr_segs; i++)
825 size += iov[i].iov_len;
1da177e4 826
ce1d4d3e
CH
827 msg->msg_name = NULL;
828 msg->msg_namelen = 0;
829 msg->msg_control = NULL;
830 msg->msg_controllen = 0;
89bddce5 831 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
832 msg->msg_iovlen = nr_segs;
833 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
834
835 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
836}
837
027445c3
BP
838static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
839 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
840{
841 struct sock_iocb siocb, *x;
842
1da177e4
LT
843 if (pos != 0)
844 return -ESPIPE;
027445c3
BP
845
846 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
847 return 0;
848
027445c3
BP
849
850 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
851 if (!x)
852 return -ENOMEM;
027445c3 853 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
854}
855
ce1d4d3e 856static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
857 struct file *file, const struct iovec *iov,
858 unsigned long nr_segs)
1da177e4 859{
ce1d4d3e
CH
860 struct socket *sock = file->private_data;
861 size_t size = 0;
862 int i;
1da177e4 863
89bddce5
SH
864 for (i = 0; i < nr_segs; i++)
865 size += iov[i].iov_len;
1da177e4 866
ce1d4d3e
CH
867 msg->msg_name = NULL;
868 msg->msg_namelen = 0;
869 msg->msg_control = NULL;
870 msg->msg_controllen = 0;
89bddce5 871 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
872 msg->msg_iovlen = nr_segs;
873 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
874 if (sock->type == SOCK_SEQPACKET)
875 msg->msg_flags |= MSG_EOR;
1da177e4 876
ce1d4d3e 877 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
878}
879
027445c3
BP
880static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
881 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
882{
883 struct sock_iocb siocb, *x;
1da177e4 884
ce1d4d3e
CH
885 if (pos != 0)
886 return -ESPIPE;
027445c3 887
027445c3 888 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
889 if (!x)
890 return -ENOMEM;
1da177e4 891
027445c3 892 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
893}
894
1da177e4
LT
895/*
896 * Atomic setting of ioctl hooks to avoid race
897 * with module unload.
898 */
899
4a3e2f71 900static DEFINE_MUTEX(br_ioctl_mutex);
881d966b 901static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL;
1da177e4 902
881d966b 903void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 904{
4a3e2f71 905 mutex_lock(&br_ioctl_mutex);
1da177e4 906 br_ioctl_hook = hook;
4a3e2f71 907 mutex_unlock(&br_ioctl_mutex);
1da177e4 908}
89bddce5 909
1da177e4
LT
910EXPORT_SYMBOL(brioctl_set);
911
4a3e2f71 912static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 913static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 914
881d966b 915void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 916{
4a3e2f71 917 mutex_lock(&vlan_ioctl_mutex);
1da177e4 918 vlan_ioctl_hook = hook;
4a3e2f71 919 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 920}
89bddce5 921
1da177e4
LT
922EXPORT_SYMBOL(vlan_ioctl_set);
923
4a3e2f71 924static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 925static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 926
89bddce5 927void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 928{
4a3e2f71 929 mutex_lock(&dlci_ioctl_mutex);
1da177e4 930 dlci_ioctl_hook = hook;
4a3e2f71 931 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 932}
89bddce5 933
1da177e4
LT
934EXPORT_SYMBOL(dlci_ioctl_set);
935
6b96018b
AB
936static long sock_do_ioctl(struct net *net, struct socket *sock,
937 unsigned int cmd, unsigned long arg)
938{
939 int err;
940 void __user *argp = (void __user *)arg;
941
942 err = sock->ops->ioctl(sock, cmd, arg);
943
944 /*
945 * If this ioctl is unknown try to hand it down
946 * to the NIC driver.
947 */
948 if (err == -ENOIOCTLCMD)
949 err = dev_ioctl(net, cmd, argp);
950
951 return err;
952}
953
1da177e4
LT
954/*
955 * With an ioctl, arg may well be a user mode pointer, but we don't know
956 * what to do with it - that's up to the protocol still.
957 */
958
959static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
960{
961 struct socket *sock;
881d966b 962 struct sock *sk;
1da177e4
LT
963 void __user *argp = (void __user *)arg;
964 int pid, err;
881d966b 965 struct net *net;
1da177e4 966
b69aee04 967 sock = file->private_data;
881d966b 968 sk = sock->sk;
3b1e0a65 969 net = sock_net(sk);
1da177e4 970 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 971 err = dev_ioctl(net, cmd, argp);
1da177e4 972 } else
3d23e349 973#ifdef CONFIG_WEXT_CORE
1da177e4 974 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 975 err = dev_ioctl(net, cmd, argp);
1da177e4 976 } else
3d23e349 977#endif
89bddce5 978 switch (cmd) {
1da177e4
LT
979 case FIOSETOWN:
980 case SIOCSPGRP:
981 err = -EFAULT;
982 if (get_user(pid, (int __user *)argp))
983 break;
984 err = f_setown(sock->file, pid, 1);
985 break;
986 case FIOGETOWN:
987 case SIOCGPGRP:
609d7fa9 988 err = put_user(f_getown(sock->file),
89bddce5 989 (int __user *)argp);
1da177e4
LT
990 break;
991 case SIOCGIFBR:
992 case SIOCSIFBR:
993 case SIOCBRADDBR:
994 case SIOCBRDELBR:
995 err = -ENOPKG;
996 if (!br_ioctl_hook)
997 request_module("bridge");
998
4a3e2f71 999 mutex_lock(&br_ioctl_mutex);
89bddce5 1000 if (br_ioctl_hook)
881d966b 1001 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1002 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1003 break;
1004 case SIOCGIFVLAN:
1005 case SIOCSIFVLAN:
1006 err = -ENOPKG;
1007 if (!vlan_ioctl_hook)
1008 request_module("8021q");
1009
4a3e2f71 1010 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1011 if (vlan_ioctl_hook)
881d966b 1012 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1013 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1014 break;
1da177e4
LT
1015 case SIOCADDDLCI:
1016 case SIOCDELDLCI:
1017 err = -ENOPKG;
1018 if (!dlci_ioctl_hook)
1019 request_module("dlci");
1020
7512cbf6
PE
1021 mutex_lock(&dlci_ioctl_mutex);
1022 if (dlci_ioctl_hook)
1da177e4 1023 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1024 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1025 break;
1026 default:
6b96018b 1027 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1028 break;
89bddce5 1029 }
1da177e4
LT
1030 return err;
1031}
1032
1033int sock_create_lite(int family, int type, int protocol, struct socket **res)
1034{
1035 int err;
1036 struct socket *sock = NULL;
89bddce5 1037
1da177e4
LT
1038 err = security_socket_create(family, type, protocol, 1);
1039 if (err)
1040 goto out;
1041
1042 sock = sock_alloc();
1043 if (!sock) {
1044 err = -ENOMEM;
1045 goto out;
1046 }
1047
1da177e4 1048 sock->type = type;
7420ed23
VY
1049 err = security_socket_post_create(sock, family, type, protocol, 1);
1050 if (err)
1051 goto out_release;
1052
1da177e4
LT
1053out:
1054 *res = sock;
1055 return err;
7420ed23
VY
1056out_release:
1057 sock_release(sock);
1058 sock = NULL;
1059 goto out;
1da177e4
LT
1060}
1061
1062/* No kernel lock held - perfect */
89bddce5 1063static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
1064{
1065 struct socket *sock;
1066
1067 /*
89bddce5 1068 * We can't return errors to poll, so it's either yes or no.
1da177e4 1069 */
b69aee04 1070 sock = file->private_data;
1da177e4
LT
1071 return sock->ops->poll(file, sock, wait);
1072}
1073
89bddce5 1074static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1075{
b69aee04 1076 struct socket *sock = file->private_data;
1da177e4
LT
1077
1078 return sock->ops->mmap(file, sock, vma);
1079}
1080
20380731 1081static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
1082{
1083 /*
89bddce5
SH
1084 * It was possible the inode is NULL we were
1085 * closing an unfinished socket.
1da177e4
LT
1086 */
1087
89bddce5 1088 if (!inode) {
1da177e4
LT
1089 printk(KERN_DEBUG "sock_close: NULL inode\n");
1090 return 0;
1091 }
1da177e4
LT
1092 sock_release(SOCKET_I(inode));
1093 return 0;
1094}
1095
1096/*
1097 * Update the socket async list
1098 *
1099 * Fasync_list locking strategy.
1100 *
1101 * 1. fasync_list is modified only under process context socket lock
1102 * i.e. under semaphore.
1103 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
1104 * or under socket lock.
1105 * 3. fasync_list can be used from softirq context, so that
1106 * modification under socket lock have to be enhanced with
1107 * write_lock_bh(&sk->sk_callback_lock).
1108 * --ANK (990710)
1109 */
1110
1111static int sock_fasync(int fd, struct file *filp, int on)
1112{
89bddce5 1113 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
1114 struct socket *sock;
1115 struct sock *sk;
1116
89bddce5 1117 if (on) {
8b3a7005 1118 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 1119 if (fna == NULL)
1da177e4
LT
1120 return -ENOMEM;
1121 }
1122
b69aee04 1123 sock = filp->private_data;
1da177e4 1124
89bddce5
SH
1125 sk = sock->sk;
1126 if (sk == NULL) {
1da177e4
LT
1127 kfree(fna);
1128 return -EINVAL;
1129 }
1130
1131 lock_sock(sk);
1132
76398425
JC
1133 spin_lock(&filp->f_lock);
1134 if (on)
1135 filp->f_flags |= FASYNC;
1136 else
1137 filp->f_flags &= ~FASYNC;
1138 spin_unlock(&filp->f_lock);
1139
89bddce5 1140 prev = &(sock->fasync_list);
1da177e4 1141
89bddce5
SH
1142 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1143 if (fa->fa_file == filp)
1da177e4
LT
1144 break;
1145
89bddce5
SH
1146 if (on) {
1147 if (fa != NULL) {
1da177e4 1148 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1149 fa->fa_fd = fd;
1da177e4
LT
1150 write_unlock_bh(&sk->sk_callback_lock);
1151
1152 kfree(fna);
1153 goto out;
1154 }
89bddce5
SH
1155 fna->fa_file = filp;
1156 fna->fa_fd = fd;
1157 fna->magic = FASYNC_MAGIC;
1158 fna->fa_next = sock->fasync_list;
1da177e4 1159 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1160 sock->fasync_list = fna;
bcdce719 1161 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1162 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1163 } else {
1164 if (fa != NULL) {
1da177e4 1165 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1166 *prev = fa->fa_next;
bcdce719
ED
1167 if (!sock->fasync_list)
1168 sock_reset_flag(sk, SOCK_FASYNC);
1da177e4
LT
1169 write_unlock_bh(&sk->sk_callback_lock);
1170 kfree(fa);
1171 }
1172 }
1173
1174out:
1175 release_sock(sock->sk);
1176 return 0;
1177}
1178
1179/* This function may be called only under socket lock or callback_lock */
1180
1181int sock_wake_async(struct socket *sock, int how, int band)
1182{
1183 if (!sock || !sock->fasync_list)
1184 return -1;
89bddce5 1185 switch (how) {
8d8ad9d7 1186 case SOCK_WAKE_WAITD:
1da177e4
LT
1187 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1188 break;
1189 goto call_kill;
8d8ad9d7 1190 case SOCK_WAKE_SPACE:
1da177e4
LT
1191 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1192 break;
1193 /* fall through */
8d8ad9d7 1194 case SOCK_WAKE_IO:
89bddce5 1195call_kill:
1da177e4
LT
1196 __kill_fasync(sock->fasync_list, SIGIO, band);
1197 break;
8d8ad9d7 1198 case SOCK_WAKE_URG:
1da177e4
LT
1199 __kill_fasync(sock->fasync_list, SIGURG, band);
1200 }
1201 return 0;
1202}
1203
1b8d7ae4 1204static int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1205 struct socket **res, int kern)
1da177e4
LT
1206{
1207 int err;
1208 struct socket *sock;
55737fda 1209 const struct net_proto_family *pf;
1da177e4
LT
1210
1211 /*
89bddce5 1212 * Check protocol is in range
1da177e4
LT
1213 */
1214 if (family < 0 || family >= NPROTO)
1215 return -EAFNOSUPPORT;
1216 if (type < 0 || type >= SOCK_MAX)
1217 return -EINVAL;
1218
1219 /* Compatibility.
1220
1221 This uglymoron is moved from INET layer to here to avoid
1222 deadlock in module load.
1223 */
1224 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1225 static int warned;
1da177e4
LT
1226 if (!warned) {
1227 warned = 1;
89bddce5
SH
1228 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1229 current->comm);
1da177e4
LT
1230 }
1231 family = PF_PACKET;
1232 }
1233
1234 err = security_socket_create(family, type, protocol, kern);
1235 if (err)
1236 return err;
89bddce5 1237
55737fda
SH
1238 /*
1239 * Allocate the socket and allow the family to set things up. if
1240 * the protocol is 0, the family is instructed to select an appropriate
1241 * default.
1242 */
1243 sock = sock_alloc();
1244 if (!sock) {
1245 if (net_ratelimit())
1246 printk(KERN_WARNING "socket: no more sockets\n");
1247 return -ENFILE; /* Not exactly a match, but its the
1248 closest posix thing */
1249 }
1250
1251 sock->type = type;
1252
95a5afca 1253#ifdef CONFIG_MODULES
89bddce5
SH
1254 /* Attempt to load a protocol module if the find failed.
1255 *
1256 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1257 * requested real, full-featured networking support upon configuration.
1258 * Otherwise module support will break!
1259 */
55737fda 1260 if (net_families[family] == NULL)
89bddce5 1261 request_module("net-pf-%d", family);
1da177e4
LT
1262#endif
1263
55737fda
SH
1264 rcu_read_lock();
1265 pf = rcu_dereference(net_families[family]);
1266 err = -EAFNOSUPPORT;
1267 if (!pf)
1268 goto out_release;
1da177e4
LT
1269
1270 /*
1271 * We will call the ->create function, that possibly is in a loadable
1272 * module, so we have to bump that loadable module refcnt first.
1273 */
55737fda 1274 if (!try_module_get(pf->owner))
1da177e4
LT
1275 goto out_release;
1276
55737fda
SH
1277 /* Now protected by module ref count */
1278 rcu_read_unlock();
1279
3f378b68 1280 err = pf->create(net, sock, protocol, kern);
55737fda 1281 if (err < 0)
1da177e4 1282 goto out_module_put;
a79af59e 1283
1da177e4
LT
1284 /*
1285 * Now to bump the refcnt of the [loadable] module that owns this
1286 * socket at sock_release time we decrement its refcnt.
1287 */
55737fda
SH
1288 if (!try_module_get(sock->ops->owner))
1289 goto out_module_busy;
1290
1da177e4
LT
1291 /*
1292 * Now that we're done with the ->create function, the [loadable]
1293 * module can have its refcnt decremented
1294 */
55737fda 1295 module_put(pf->owner);
7420ed23
VY
1296 err = security_socket_post_create(sock, family, type, protocol, kern);
1297 if (err)
3b185525 1298 goto out_sock_release;
55737fda 1299 *res = sock;
1da177e4 1300
55737fda
SH
1301 return 0;
1302
1303out_module_busy:
1304 err = -EAFNOSUPPORT;
1da177e4 1305out_module_put:
55737fda
SH
1306 sock->ops = NULL;
1307 module_put(pf->owner);
1308out_sock_release:
1da177e4 1309 sock_release(sock);
55737fda
SH
1310 return err;
1311
1312out_release:
1313 rcu_read_unlock();
1314 goto out_sock_release;
1da177e4
LT
1315}
1316
1317int sock_create(int family, int type, int protocol, struct socket **res)
1318{
1b8d7ae4 1319 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4
LT
1320}
1321
1322int sock_create_kern(int family, int type, int protocol, struct socket **res)
1323{
1b8d7ae4 1324 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4
LT
1325}
1326
3e0fa65f 1327SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1328{
1329 int retval;
1330 struct socket *sock;
a677a039
UD
1331 int flags;
1332
e38b36f3
UD
1333 /* Check the SOCK_* constants for consistency. */
1334 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1335 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1336 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1337 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1338
a677a039 1339 flags = type & ~SOCK_TYPE_MASK;
77d27200 1340 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1341 return -EINVAL;
1342 type &= SOCK_TYPE_MASK;
1da177e4 1343
aaca0bdc
UD
1344 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1345 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1346
1da177e4
LT
1347 retval = sock_create(family, type, protocol, &sock);
1348 if (retval < 0)
1349 goto out;
1350
77d27200 1351 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1352 if (retval < 0)
1353 goto out_release;
1354
1355out:
1356 /* It may be already another descriptor 8) Not kernel problem. */
1357 return retval;
1358
1359out_release:
1360 sock_release(sock);
1361 return retval;
1362}
1363
1364/*
1365 * Create a pair of connected sockets.
1366 */
1367
3e0fa65f
HC
1368SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1369 int __user *, usockvec)
1da177e4
LT
1370{
1371 struct socket *sock1, *sock2;
1372 int fd1, fd2, err;
db349509 1373 struct file *newfile1, *newfile2;
a677a039
UD
1374 int flags;
1375
1376 flags = type & ~SOCK_TYPE_MASK;
77d27200 1377 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1378 return -EINVAL;
1379 type &= SOCK_TYPE_MASK;
1da177e4 1380
aaca0bdc
UD
1381 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1382 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1383
1da177e4
LT
1384 /*
1385 * Obtain the first socket and check if the underlying protocol
1386 * supports the socketpair call.
1387 */
1388
1389 err = sock_create(family, type, protocol, &sock1);
1390 if (err < 0)
1391 goto out;
1392
1393 err = sock_create(family, type, protocol, &sock2);
1394 if (err < 0)
1395 goto out_release_1;
1396
1397 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1398 if (err < 0)
1da177e4
LT
1399 goto out_release_both;
1400
a677a039 1401 fd1 = sock_alloc_fd(&newfile1, flags & O_CLOEXEC);
bf3c23d1
DM
1402 if (unlikely(fd1 < 0)) {
1403 err = fd1;
db349509 1404 goto out_release_both;
bf3c23d1 1405 }
1da177e4 1406
a677a039 1407 fd2 = sock_alloc_fd(&newfile2, flags & O_CLOEXEC);
db349509 1408 if (unlikely(fd2 < 0)) {
bf3c23d1 1409 err = fd2;
db349509
AV
1410 put_filp(newfile1);
1411 put_unused_fd(fd1);
1da177e4 1412 goto out_release_both;
db349509 1413 }
1da177e4 1414
77d27200 1415 err = sock_attach_fd(sock1, newfile1, flags & O_NONBLOCK);
db349509
AV
1416 if (unlikely(err < 0)) {
1417 goto out_fd2;
1418 }
1419
77d27200 1420 err = sock_attach_fd(sock2, newfile2, flags & O_NONBLOCK);
db349509
AV
1421 if (unlikely(err < 0)) {
1422 fput(newfile1);
1423 goto out_fd1;
1424 }
1425
157cf649 1426 audit_fd_pair(fd1, fd2);
db349509
AV
1427 fd_install(fd1, newfile1);
1428 fd_install(fd2, newfile2);
1da177e4
LT
1429 /* fd1 and fd2 may be already another descriptors.
1430 * Not kernel problem.
1431 */
1432
89bddce5 1433 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1434 if (!err)
1435 err = put_user(fd2, &usockvec[1]);
1436 if (!err)
1437 return 0;
1438
1439 sys_close(fd2);
1440 sys_close(fd1);
1441 return err;
1442
1da177e4 1443out_release_both:
89bddce5 1444 sock_release(sock2);
1da177e4 1445out_release_1:
89bddce5 1446 sock_release(sock1);
1da177e4
LT
1447out:
1448 return err;
db349509
AV
1449
1450out_fd2:
1451 put_filp(newfile1);
1452 sock_release(sock1);
1453out_fd1:
1454 put_filp(newfile2);
1455 sock_release(sock2);
db349509
AV
1456 put_unused_fd(fd1);
1457 put_unused_fd(fd2);
1458 goto out;
1da177e4
LT
1459}
1460
1da177e4
LT
1461/*
1462 * Bind a name to a socket. Nothing much to do here since it's
1463 * the protocol's responsibility to handle the local address.
1464 *
1465 * We move the socket address to kernel space before we call
1466 * the protocol layer (having also checked the address is ok).
1467 */
1468
20f37034 1469SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1470{
1471 struct socket *sock;
230b1839 1472 struct sockaddr_storage address;
6cb153ca 1473 int err, fput_needed;
1da177e4 1474
89bddce5 1475 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1476 if (sock) {
230b1839 1477 err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address);
89bddce5
SH
1478 if (err >= 0) {
1479 err = security_socket_bind(sock,
230b1839 1480 (struct sockaddr *)&address,
89bddce5 1481 addrlen);
6cb153ca
BL
1482 if (!err)
1483 err = sock->ops->bind(sock,
89bddce5 1484 (struct sockaddr *)
230b1839 1485 &address, addrlen);
1da177e4 1486 }
6cb153ca 1487 fput_light(sock->file, fput_needed);
89bddce5 1488 }
1da177e4
LT
1489 return err;
1490}
1491
1da177e4
LT
1492/*
1493 * Perform a listen. Basically, we allow the protocol to do anything
1494 * necessary for a listen, and if that works, we mark the socket as
1495 * ready for listening.
1496 */
1497
3e0fa65f 1498SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1499{
1500 struct socket *sock;
6cb153ca 1501 int err, fput_needed;
b8e1f9b5 1502 int somaxconn;
89bddce5
SH
1503
1504 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1505 if (sock) {
8efa6e93 1506 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
b8e1f9b5
PE
1507 if ((unsigned)backlog > somaxconn)
1508 backlog = somaxconn;
1da177e4
LT
1509
1510 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1511 if (!err)
1512 err = sock->ops->listen(sock, backlog);
1da177e4 1513
6cb153ca 1514 fput_light(sock->file, fput_needed);
1da177e4
LT
1515 }
1516 return err;
1517}
1518
1da177e4
LT
1519/*
1520 * For accept, we attempt to create a new socket, set up the link
1521 * with the client, wake up the client, then return the new
1522 * connected fd. We collect the address of the connector in kernel
1523 * space and move it to user at the very end. This is unclean because
1524 * we open the socket then return an error.
1525 *
1526 * 1003.1g adds the ability to recvmsg() to query connection pending
1527 * status to recvmsg. We need to add that support in a way thats
1528 * clean when we restucture accept also.
1529 */
1530
20f37034
HC
1531SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1532 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1533{
1534 struct socket *sock, *newsock;
39d8c1b6 1535 struct file *newfile;
6cb153ca 1536 int err, len, newfd, fput_needed;
230b1839 1537 struct sockaddr_storage address;
1da177e4 1538
77d27200 1539 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1540 return -EINVAL;
1541
1542 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1543 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1544
6cb153ca 1545 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1546 if (!sock)
1547 goto out;
1548
1549 err = -ENFILE;
89bddce5 1550 if (!(newsock = sock_alloc()))
1da177e4
LT
1551 goto out_put;
1552
1553 newsock->type = sock->type;
1554 newsock->ops = sock->ops;
1555
1da177e4
LT
1556 /*
1557 * We don't need try_module_get here, as the listening socket (sock)
1558 * has the protocol module (sock->ops->owner) held.
1559 */
1560 __module_get(newsock->ops->owner);
1561
aaca0bdc 1562 newfd = sock_alloc_fd(&newfile, flags & O_CLOEXEC);
39d8c1b6
DM
1563 if (unlikely(newfd < 0)) {
1564 err = newfd;
9a1875e6
DM
1565 sock_release(newsock);
1566 goto out_put;
39d8c1b6
DM
1567 }
1568
77d27200 1569 err = sock_attach_fd(newsock, newfile, flags & O_NONBLOCK);
39d8c1b6 1570 if (err < 0)
79f4f642 1571 goto out_fd_simple;
39d8c1b6 1572
a79af59e
FF
1573 err = security_socket_accept(sock, newsock);
1574 if (err)
39d8c1b6 1575 goto out_fd;
a79af59e 1576
1da177e4
LT
1577 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1578 if (err < 0)
39d8c1b6 1579 goto out_fd;
1da177e4
LT
1580
1581 if (upeer_sockaddr) {
230b1839 1582 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1583 &len, 2) < 0) {
1da177e4 1584 err = -ECONNABORTED;
39d8c1b6 1585 goto out_fd;
1da177e4 1586 }
230b1839
YH
1587 err = move_addr_to_user((struct sockaddr *)&address,
1588 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1589 if (err < 0)
39d8c1b6 1590 goto out_fd;
1da177e4
LT
1591 }
1592
1593 /* File flags are not inherited via accept() unlike another OSes. */
1594
39d8c1b6
DM
1595 fd_install(newfd, newfile);
1596 err = newfd;
1da177e4 1597
1da177e4 1598out_put:
6cb153ca 1599 fput_light(sock->file, fput_needed);
1da177e4
LT
1600out:
1601 return err;
79f4f642
AD
1602out_fd_simple:
1603 sock_release(newsock);
1604 put_filp(newfile);
1605 put_unused_fd(newfd);
1606 goto out_put;
39d8c1b6 1607out_fd:
9606a216 1608 fput(newfile);
39d8c1b6 1609 put_unused_fd(newfd);
1da177e4
LT
1610 goto out_put;
1611}
1612
20f37034
HC
1613SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1614 int __user *, upeer_addrlen)
aaca0bdc 1615{
de11defe 1616 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1617}
1618
1da177e4
LT
1619/*
1620 * Attempt to connect to a socket with the server address. The address
1621 * is in user space so we verify it is OK and move it to kernel space.
1622 *
1623 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1624 * break bindings
1625 *
1626 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1627 * other SEQPACKET protocols that take time to connect() as it doesn't
1628 * include the -EINPROGRESS status for such sockets.
1629 */
1630
20f37034
HC
1631SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1632 int, addrlen)
1da177e4
LT
1633{
1634 struct socket *sock;
230b1839 1635 struct sockaddr_storage address;
6cb153ca 1636 int err, fput_needed;
1da177e4 1637
6cb153ca 1638 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1639 if (!sock)
1640 goto out;
230b1839 1641 err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address);
1da177e4
LT
1642 if (err < 0)
1643 goto out_put;
1644
89bddce5 1645 err =
230b1839 1646 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1647 if (err)
1648 goto out_put;
1649
230b1839 1650 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1651 sock->file->f_flags);
1652out_put:
6cb153ca 1653 fput_light(sock->file, fput_needed);
1da177e4
LT
1654out:
1655 return err;
1656}
1657
1658/*
1659 * Get the local address ('name') of a socket object. Move the obtained
1660 * name to user space.
1661 */
1662
20f37034
HC
1663SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1664 int __user *, usockaddr_len)
1da177e4
LT
1665{
1666 struct socket *sock;
230b1839 1667 struct sockaddr_storage address;
6cb153ca 1668 int len, err, fput_needed;
89bddce5 1669
6cb153ca 1670 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1671 if (!sock)
1672 goto out;
1673
1674 err = security_socket_getsockname(sock);
1675 if (err)
1676 goto out_put;
1677
230b1839 1678 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1679 if (err)
1680 goto out_put;
230b1839 1681 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1682
1683out_put:
6cb153ca 1684 fput_light(sock->file, fput_needed);
1da177e4
LT
1685out:
1686 return err;
1687}
1688
1689/*
1690 * Get the remote address ('name') of a socket object. Move the obtained
1691 * name to user space.
1692 */
1693
20f37034
HC
1694SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1695 int __user *, usockaddr_len)
1da177e4
LT
1696{
1697 struct socket *sock;
230b1839 1698 struct sockaddr_storage address;
6cb153ca 1699 int len, err, fput_needed;
1da177e4 1700
89bddce5
SH
1701 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1702 if (sock != NULL) {
1da177e4
LT
1703 err = security_socket_getpeername(sock);
1704 if (err) {
6cb153ca 1705 fput_light(sock->file, fput_needed);
1da177e4
LT
1706 return err;
1707 }
1708
89bddce5 1709 err =
230b1839 1710 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1711 1);
1da177e4 1712 if (!err)
230b1839 1713 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr,
89bddce5 1714 usockaddr_len);
6cb153ca 1715 fput_light(sock->file, fput_needed);
1da177e4
LT
1716 }
1717 return err;
1718}
1719
1720/*
1721 * Send a datagram to a given address. We move the address into kernel
1722 * space and check the user space data area is readable before invoking
1723 * the protocol.
1724 */
1725
3e0fa65f
HC
1726SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1727 unsigned, flags, struct sockaddr __user *, addr,
1728 int, addr_len)
1da177e4
LT
1729{
1730 struct socket *sock;
230b1839 1731 struct sockaddr_storage address;
1da177e4
LT
1732 int err;
1733 struct msghdr msg;
1734 struct iovec iov;
6cb153ca 1735 int fput_needed;
6cb153ca 1736
de0fa95c
PE
1737 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1738 if (!sock)
4387ff75 1739 goto out;
6cb153ca 1740
89bddce5
SH
1741 iov.iov_base = buff;
1742 iov.iov_len = len;
1743 msg.msg_name = NULL;
1744 msg.msg_iov = &iov;
1745 msg.msg_iovlen = 1;
1746 msg.msg_control = NULL;
1747 msg.msg_controllen = 0;
1748 msg.msg_namelen = 0;
6cb153ca 1749 if (addr) {
230b1839 1750 err = move_addr_to_kernel(addr, addr_len, (struct sockaddr *)&address);
1da177e4
LT
1751 if (err < 0)
1752 goto out_put;
230b1839 1753 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1754 msg.msg_namelen = addr_len;
1da177e4
LT
1755 }
1756 if (sock->file->f_flags & O_NONBLOCK)
1757 flags |= MSG_DONTWAIT;
1758 msg.msg_flags = flags;
1759 err = sock_sendmsg(sock, &msg, len);
1760
89bddce5 1761out_put:
de0fa95c 1762 fput_light(sock->file, fput_needed);
4387ff75 1763out:
1da177e4
LT
1764 return err;
1765}
1766
1767/*
89bddce5 1768 * Send a datagram down a socket.
1da177e4
LT
1769 */
1770
3e0fa65f
HC
1771SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
1772 unsigned, flags)
1da177e4
LT
1773{
1774 return sys_sendto(fd, buff, len, flags, NULL, 0);
1775}
1776
1777/*
89bddce5 1778 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1779 * sender. We verify the buffers are writable and if needed move the
1780 * sender address from kernel to user space.
1781 */
1782
3e0fa65f
HC
1783SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
1784 unsigned, flags, struct sockaddr __user *, addr,
1785 int __user *, addr_len)
1da177e4
LT
1786{
1787 struct socket *sock;
1788 struct iovec iov;
1789 struct msghdr msg;
230b1839 1790 struct sockaddr_storage address;
89bddce5 1791 int err, err2;
6cb153ca
BL
1792 int fput_needed;
1793
de0fa95c 1794 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1795 if (!sock)
de0fa95c 1796 goto out;
1da177e4 1797
89bddce5
SH
1798 msg.msg_control = NULL;
1799 msg.msg_controllen = 0;
1800 msg.msg_iovlen = 1;
1801 msg.msg_iov = &iov;
1802 iov.iov_len = size;
1803 iov.iov_base = ubuf;
230b1839
YH
1804 msg.msg_name = (struct sockaddr *)&address;
1805 msg.msg_namelen = sizeof(address);
1da177e4
LT
1806 if (sock->file->f_flags & O_NONBLOCK)
1807 flags |= MSG_DONTWAIT;
89bddce5 1808 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1809
89bddce5 1810 if (err >= 0 && addr != NULL) {
230b1839
YH
1811 err2 = move_addr_to_user((struct sockaddr *)&address,
1812 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1813 if (err2 < 0)
1814 err = err2;
1da177e4 1815 }
de0fa95c
PE
1816
1817 fput_light(sock->file, fput_needed);
4387ff75 1818out:
1da177e4
LT
1819 return err;
1820}
1821
1822/*
89bddce5 1823 * Receive a datagram from a socket.
1da177e4
LT
1824 */
1825
89bddce5
SH
1826asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1827 unsigned flags)
1da177e4
LT
1828{
1829 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1830}
1831
1832/*
1833 * Set a socket option. Because we don't know the option lengths we have
1834 * to pass the user mode parameter for the protocols to sort out.
1835 */
1836
20f37034
HC
1837SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1838 char __user *, optval, int, optlen)
1da177e4 1839{
6cb153ca 1840 int err, fput_needed;
1da177e4
LT
1841 struct socket *sock;
1842
1843 if (optlen < 0)
1844 return -EINVAL;
89bddce5
SH
1845
1846 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1847 if (sock != NULL) {
1848 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1849 if (err)
1850 goto out_put;
1da177e4
LT
1851
1852 if (level == SOL_SOCKET)
89bddce5
SH
1853 err =
1854 sock_setsockopt(sock, level, optname, optval,
1855 optlen);
1da177e4 1856 else
89bddce5
SH
1857 err =
1858 sock->ops->setsockopt(sock, level, optname, optval,
1859 optlen);
6cb153ca
BL
1860out_put:
1861 fput_light(sock->file, fput_needed);
1da177e4
LT
1862 }
1863 return err;
1864}
1865
1866/*
1867 * Get a socket option. Because we don't know the option lengths we have
1868 * to pass a user mode parameter for the protocols to sort out.
1869 */
1870
20f37034
HC
1871SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1872 char __user *, optval, int __user *, optlen)
1da177e4 1873{
6cb153ca 1874 int err, fput_needed;
1da177e4
LT
1875 struct socket *sock;
1876
89bddce5
SH
1877 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1878 if (sock != NULL) {
6cb153ca
BL
1879 err = security_socket_getsockopt(sock, level, optname);
1880 if (err)
1881 goto out_put;
1da177e4
LT
1882
1883 if (level == SOL_SOCKET)
89bddce5
SH
1884 err =
1885 sock_getsockopt(sock, level, optname, optval,
1886 optlen);
1da177e4 1887 else
89bddce5
SH
1888 err =
1889 sock->ops->getsockopt(sock, level, optname, optval,
1890 optlen);
6cb153ca
BL
1891out_put:
1892 fput_light(sock->file, fput_needed);
1da177e4
LT
1893 }
1894 return err;
1895}
1896
1da177e4
LT
1897/*
1898 * Shutdown a socket.
1899 */
1900
754fe8d2 1901SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1902{
6cb153ca 1903 int err, fput_needed;
1da177e4
LT
1904 struct socket *sock;
1905
89bddce5
SH
1906 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1907 if (sock != NULL) {
1da177e4 1908 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1909 if (!err)
1910 err = sock->ops->shutdown(sock, how);
1911 fput_light(sock->file, fput_needed);
1da177e4
LT
1912 }
1913 return err;
1914}
1915
89bddce5 1916/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1917 * fields which are the same type (int / unsigned) on our platforms.
1918 */
1919#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1920#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1921#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1922
1da177e4
LT
1923/*
1924 * BSD sendmsg interface
1925 */
1926
3e0fa65f 1927SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
1da177e4 1928{
89bddce5
SH
1929 struct compat_msghdr __user *msg_compat =
1930 (struct compat_msghdr __user *)msg;
1da177e4 1931 struct socket *sock;
230b1839 1932 struct sockaddr_storage address;
1da177e4 1933 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1934 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1935 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1936 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1937 unsigned char *ctl_buf = ctl;
1938 struct msghdr msg_sys;
1939 int err, ctl_len, iov_size, total_len;
6cb153ca 1940 int fput_needed;
89bddce5 1941
1da177e4
LT
1942 err = -EFAULT;
1943 if (MSG_CMSG_COMPAT & flags) {
1944 if (get_compat_msghdr(&msg_sys, msg_compat))
1945 return -EFAULT;
89bddce5
SH
1946 }
1947 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1948 return -EFAULT;
1949
6cb153ca 1950 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1951 if (!sock)
1da177e4
LT
1952 goto out;
1953
1954 /* do not move before msg_sys is valid */
1955 err = -EMSGSIZE;
1956 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1957 goto out_put;
1958
89bddce5 1959 /* Check whether to allocate the iovec area */
1da177e4
LT
1960 err = -ENOMEM;
1961 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1962 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1963 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1964 if (!iov)
1965 goto out_put;
1966 }
1967
1968 /* This will also move the address data into kernel space */
1969 if (MSG_CMSG_COMPAT & flags) {
230b1839
YH
1970 err = verify_compat_iovec(&msg_sys, iov,
1971 (struct sockaddr *)&address,
1972 VERIFY_READ);
1da177e4 1973 } else
230b1839
YH
1974 err = verify_iovec(&msg_sys, iov,
1975 (struct sockaddr *)&address,
1976 VERIFY_READ);
89bddce5 1977 if (err < 0)
1da177e4
LT
1978 goto out_freeiov;
1979 total_len = err;
1980
1981 err = -ENOBUFS;
1982
1983 if (msg_sys.msg_controllen > INT_MAX)
1984 goto out_freeiov;
89bddce5 1985 ctl_len = msg_sys.msg_controllen;
1da177e4 1986 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1987 err =
1988 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1989 sizeof(ctl));
1da177e4
LT
1990 if (err)
1991 goto out_freeiov;
1992 ctl_buf = msg_sys.msg_control;
8920e8f9 1993 ctl_len = msg_sys.msg_controllen;
1da177e4 1994 } else if (ctl_len) {
89bddce5 1995 if (ctl_len > sizeof(ctl)) {
1da177e4 1996 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1997 if (ctl_buf == NULL)
1da177e4
LT
1998 goto out_freeiov;
1999 }
2000 err = -EFAULT;
2001 /*
2002 * Careful! Before this, msg_sys.msg_control contains a user pointer.
2003 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2004 * checking falls down on this.
2005 */
89bddce5
SH
2006 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
2007 ctl_len))
1da177e4
LT
2008 goto out_freectl;
2009 msg_sys.msg_control = ctl_buf;
2010 }
2011 msg_sys.msg_flags = flags;
2012
2013 if (sock->file->f_flags & O_NONBLOCK)
2014 msg_sys.msg_flags |= MSG_DONTWAIT;
2015 err = sock_sendmsg(sock, &msg_sys, total_len);
2016
2017out_freectl:
89bddce5 2018 if (ctl_buf != ctl)
1da177e4
LT
2019 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2020out_freeiov:
2021 if (iov != iovstack)
2022 sock_kfree_s(sock->sk, iov, iov_size);
2023out_put:
6cb153ca 2024 fput_light(sock->file, fput_needed);
89bddce5 2025out:
1da177e4
LT
2026 return err;
2027}
2028
a2e27255
ACM
2029static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
2030 struct msghdr *msg_sys, unsigned flags, int nosec)
1da177e4 2031{
89bddce5
SH
2032 struct compat_msghdr __user *msg_compat =
2033 (struct compat_msghdr __user *)msg;
1da177e4 2034 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2035 struct iovec *iov = iovstack;
1da177e4
LT
2036 unsigned long cmsg_ptr;
2037 int err, iov_size, total_len, len;
2038
2039 /* kernel mode address */
230b1839 2040 struct sockaddr_storage addr;
1da177e4
LT
2041
2042 /* user mode address pointers */
2043 struct sockaddr __user *uaddr;
2044 int __user *uaddr_len;
89bddce5 2045
1da177e4 2046 if (MSG_CMSG_COMPAT & flags) {
a2e27255 2047 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 2048 return -EFAULT;
89bddce5 2049 }
a2e27255 2050 else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
89bddce5 2051 return -EFAULT;
1da177e4 2052
1da177e4 2053 err = -EMSGSIZE;
a2e27255
ACM
2054 if (msg_sys->msg_iovlen > UIO_MAXIOV)
2055 goto out;
89bddce5
SH
2056
2057 /* Check whether to allocate the iovec area */
1da177e4 2058 err = -ENOMEM;
a2e27255
ACM
2059 iov_size = msg_sys->msg_iovlen * sizeof(struct iovec);
2060 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
1da177e4
LT
2061 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
2062 if (!iov)
a2e27255 2063 goto out;
1da177e4
LT
2064 }
2065
2066 /*
89bddce5
SH
2067 * Save the user-mode address (verify_iovec will change the
2068 * kernel msghdr to use the kernel address space)
1da177e4 2069 */
89bddce5 2070
a2e27255 2071 uaddr = (__force void __user *)msg_sys->msg_name;
1da177e4
LT
2072 uaddr_len = COMPAT_NAMELEN(msg);
2073 if (MSG_CMSG_COMPAT & flags) {
a2e27255 2074 err = verify_compat_iovec(msg_sys, iov,
230b1839
YH
2075 (struct sockaddr *)&addr,
2076 VERIFY_WRITE);
1da177e4 2077 } else
a2e27255 2078 err = verify_iovec(msg_sys, iov,
230b1839
YH
2079 (struct sockaddr *)&addr,
2080 VERIFY_WRITE);
1da177e4
LT
2081 if (err < 0)
2082 goto out_freeiov;
89bddce5 2083 total_len = err;
1da177e4 2084
a2e27255
ACM
2085 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2086 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2087
1da177e4
LT
2088 if (sock->file->f_flags & O_NONBLOCK)
2089 flags |= MSG_DONTWAIT;
a2e27255
ACM
2090 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2091 total_len, flags);
1da177e4
LT
2092 if (err < 0)
2093 goto out_freeiov;
2094 len = err;
2095
2096 if (uaddr != NULL) {
230b1839 2097 err = move_addr_to_user((struct sockaddr *)&addr,
a2e27255 2098 msg_sys->msg_namelen, uaddr,
89bddce5 2099 uaddr_len);
1da177e4
LT
2100 if (err < 0)
2101 goto out_freeiov;
2102 }
a2e27255 2103 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2104 COMPAT_FLAGS(msg));
1da177e4
LT
2105 if (err)
2106 goto out_freeiov;
2107 if (MSG_CMSG_COMPAT & flags)
a2e27255 2108 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2109 &msg_compat->msg_controllen);
2110 else
a2e27255 2111 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2112 &msg->msg_controllen);
2113 if (err)
2114 goto out_freeiov;
2115 err = len;
2116
2117out_freeiov:
2118 if (iov != iovstack)
2119 sock_kfree_s(sock->sk, iov, iov_size);
a2e27255
ACM
2120out:
2121 return err;
2122}
2123
2124/*
2125 * BSD recvmsg interface
2126 */
2127
2128SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
2129 unsigned int, flags)
2130{
2131 int fput_needed, err;
2132 struct msghdr msg_sys;
2133 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
2134
2135 if (!sock)
2136 goto out;
2137
2138 err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0);
2139
6cb153ca 2140 fput_light(sock->file, fput_needed);
1da177e4
LT
2141out:
2142 return err;
2143}
2144
a2e27255
ACM
2145/*
2146 * Linux recvmmsg interface
2147 */
2148
2149int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2150 unsigned int flags, struct timespec *timeout)
2151{
2152 int fput_needed, err, datagrams;
2153 struct socket *sock;
2154 struct mmsghdr __user *entry;
2155 struct msghdr msg_sys;
2156 struct timespec end_time;
2157
2158 if (timeout &&
2159 poll_select_set_timeout(&end_time, timeout->tv_sec,
2160 timeout->tv_nsec))
2161 return -EINVAL;
2162
2163 datagrams = 0;
2164
2165 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2166 if (!sock)
2167 return err;
2168
2169 err = sock_error(sock->sk);
2170 if (err)
2171 goto out_put;
2172
2173 entry = mmsg;
2174
2175 while (datagrams < vlen) {
2176 /*
2177 * No need to ask LSM for more than the first datagram.
2178 */
2179 err = __sys_recvmsg(sock, (struct msghdr __user *)entry,
2180 &msg_sys, flags, datagrams);
2181 if (err < 0)
2182 break;
2183 err = put_user(err, &entry->msg_len);
2184 if (err)
2185 break;
2186 ++entry;
2187 ++datagrams;
2188
2189 if (timeout) {
2190 ktime_get_ts(timeout);
2191 *timeout = timespec_sub(end_time, *timeout);
2192 if (timeout->tv_sec < 0) {
2193 timeout->tv_sec = timeout->tv_nsec = 0;
2194 break;
2195 }
2196
2197 /* Timeout, return less than vlen datagrams */
2198 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2199 break;
2200 }
2201
2202 /* Out of band data, return right away */
2203 if (msg_sys.msg_flags & MSG_OOB)
2204 break;
2205 }
2206
2207out_put:
2208 fput_light(sock->file, fput_needed);
1da177e4 2209
a2e27255
ACM
2210 if (err == 0)
2211 return datagrams;
2212
2213 if (datagrams != 0) {
2214 /*
2215 * We may return less entries than requested (vlen) if the
2216 * sock is non block and there aren't enough datagrams...
2217 */
2218 if (err != -EAGAIN) {
2219 /*
2220 * ... or if recvmsg returns an error after we
2221 * received some datagrams, where we record the
2222 * error to return on the next call or if the
2223 * app asks about it using getsockopt(SO_ERROR).
2224 */
2225 sock->sk->sk_err = -err;
2226 }
2227
2228 return datagrams;
2229 }
2230
2231 return err;
2232}
2233
2234SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2235 unsigned int, vlen, unsigned int, flags,
2236 struct timespec __user *, timeout)
2237{
2238 int datagrams;
2239 struct timespec timeout_sys;
2240
2241 if (!timeout)
2242 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2243
2244 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2245 return -EFAULT;
2246
2247 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2248
2249 if (datagrams > 0 &&
2250 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2251 datagrams = -EFAULT;
2252
2253 return datagrams;
2254}
2255
2256#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2257/* Argument list sizes for sys_socketcall */
2258#define AL(x) ((x) * sizeof(unsigned long))
a2e27255 2259static const unsigned char nargs[20] = {
89bddce5
SH
2260 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
2261 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
aaca0bdc 2262 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
a2e27255 2263 AL(4),AL(5)
89bddce5
SH
2264};
2265
1da177e4
LT
2266#undef AL
2267
2268/*
89bddce5 2269 * System call vectors.
1da177e4
LT
2270 *
2271 * Argument checking cleaned up. Saved 20% in size.
2272 * This function doesn't need to set the kernel lock because
89bddce5 2273 * it is set by the callees.
1da177e4
LT
2274 */
2275
3e0fa65f 2276SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4
LT
2277{
2278 unsigned long a[6];
89bddce5 2279 unsigned long a0, a1;
1da177e4 2280 int err;
47379052 2281 unsigned int len;
1da177e4 2282
a2e27255 2283 if (call < 1 || call > SYS_RECVMMSG)
1da177e4
LT
2284 return -EINVAL;
2285
47379052
AV
2286 len = nargs[call];
2287 if (len > sizeof(a))
2288 return -EINVAL;
2289
1da177e4 2290 /* copy_from_user should be SMP safe. */
47379052 2291 if (copy_from_user(a, args, len))
1da177e4 2292 return -EFAULT;
3ec3b2fb 2293
f3298dc4 2294 audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb 2295
89bddce5
SH
2296 a0 = a[0];
2297 a1 = a[1];
2298
2299 switch (call) {
2300 case SYS_SOCKET:
2301 err = sys_socket(a0, a1, a[2]);
2302 break;
2303 case SYS_BIND:
2304 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2305 break;
2306 case SYS_CONNECT:
2307 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2308 break;
2309 case SYS_LISTEN:
2310 err = sys_listen(a0, a1);
2311 break;
2312 case SYS_ACCEPT:
de11defe
UD
2313 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2314 (int __user *)a[2], 0);
89bddce5
SH
2315 break;
2316 case SYS_GETSOCKNAME:
2317 err =
2318 sys_getsockname(a0, (struct sockaddr __user *)a1,
2319 (int __user *)a[2]);
2320 break;
2321 case SYS_GETPEERNAME:
2322 err =
2323 sys_getpeername(a0, (struct sockaddr __user *)a1,
2324 (int __user *)a[2]);
2325 break;
2326 case SYS_SOCKETPAIR:
2327 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2328 break;
2329 case SYS_SEND:
2330 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2331 break;
2332 case SYS_SENDTO:
2333 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2334 (struct sockaddr __user *)a[4], a[5]);
2335 break;
2336 case SYS_RECV:
2337 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2338 break;
2339 case SYS_RECVFROM:
2340 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2341 (struct sockaddr __user *)a[4],
2342 (int __user *)a[5]);
2343 break;
2344 case SYS_SHUTDOWN:
2345 err = sys_shutdown(a0, a1);
2346 break;
2347 case SYS_SETSOCKOPT:
2348 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2349 break;
2350 case SYS_GETSOCKOPT:
2351 err =
2352 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2353 (int __user *)a[4]);
2354 break;
2355 case SYS_SENDMSG:
2356 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2357 break;
2358 case SYS_RECVMSG:
2359 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2360 break;
a2e27255
ACM
2361 case SYS_RECVMMSG:
2362 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2363 (struct timespec __user *)a[4]);
2364 break;
de11defe
UD
2365 case SYS_ACCEPT4:
2366 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2367 (int __user *)a[2], a[3]);
aaca0bdc 2368 break;
89bddce5
SH
2369 default:
2370 err = -EINVAL;
2371 break;
1da177e4
LT
2372 }
2373 return err;
2374}
2375
89bddce5 2376#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2377
55737fda
SH
2378/**
2379 * sock_register - add a socket protocol handler
2380 * @ops: description of protocol
2381 *
1da177e4
LT
2382 * This function is called by a protocol handler that wants to
2383 * advertise its address family, and have it linked into the
55737fda
SH
2384 * socket interface. The value ops->family coresponds to the
2385 * socket system call protocol family.
1da177e4 2386 */
f0fd27d4 2387int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2388{
2389 int err;
2390
2391 if (ops->family >= NPROTO) {
89bddce5
SH
2392 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2393 NPROTO);
1da177e4
LT
2394 return -ENOBUFS;
2395 }
55737fda
SH
2396
2397 spin_lock(&net_family_lock);
2398 if (net_families[ops->family])
2399 err = -EEXIST;
2400 else {
89bddce5 2401 net_families[ops->family] = ops;
1da177e4
LT
2402 err = 0;
2403 }
55737fda
SH
2404 spin_unlock(&net_family_lock);
2405
89bddce5 2406 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2407 return err;
2408}
2409
55737fda
SH
2410/**
2411 * sock_unregister - remove a protocol handler
2412 * @family: protocol family to remove
2413 *
1da177e4
LT
2414 * This function is called by a protocol handler that wants to
2415 * remove its address family, and have it unlinked from the
55737fda
SH
2416 * new socket creation.
2417 *
2418 * If protocol handler is a module, then it can use module reference
2419 * counts to protect against new references. If protocol handler is not
2420 * a module then it needs to provide its own protection in
2421 * the ops->create routine.
1da177e4 2422 */
f0fd27d4 2423void sock_unregister(int family)
1da177e4 2424{
f0fd27d4 2425 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2426
55737fda 2427 spin_lock(&net_family_lock);
89bddce5 2428 net_families[family] = NULL;
55737fda
SH
2429 spin_unlock(&net_family_lock);
2430
2431 synchronize_rcu();
2432
89bddce5 2433 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2434}
2435
77d76ea3 2436static int __init sock_init(void)
1da177e4
LT
2437{
2438 /*
89bddce5 2439 * Initialize sock SLAB cache.
1da177e4 2440 */
89bddce5 2441
1da177e4
LT
2442 sk_init();
2443
1da177e4 2444 /*
89bddce5 2445 * Initialize skbuff SLAB cache
1da177e4
LT
2446 */
2447 skb_init();
1da177e4
LT
2448
2449 /*
89bddce5 2450 * Initialize the protocols module.
1da177e4
LT
2451 */
2452
2453 init_inodecache();
2454 register_filesystem(&sock_fs_type);
2455 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2456
2457 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2458 */
2459
2460#ifdef CONFIG_NETFILTER
2461 netfilter_init();
2462#endif
cbeb321a
DM
2463
2464 return 0;
1da177e4
LT
2465}
2466
77d76ea3
AK
2467core_initcall(sock_init); /* early initcall */
2468
1da177e4
LT
2469#ifdef CONFIG_PROC_FS
2470void socket_seq_show(struct seq_file *seq)
2471{
2472 int cpu;
2473 int counter = 0;
2474
6f912042 2475 for_each_possible_cpu(cpu)
89bddce5 2476 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2477
2478 /* It can be negative, by the way. 8) */
2479 if (counter < 0)
2480 counter = 0;
2481
2482 seq_printf(seq, "sockets: used %d\n", counter);
2483}
89bddce5 2484#endif /* CONFIG_PROC_FS */
1da177e4 2485
89bbfc95 2486#ifdef CONFIG_COMPAT
6b96018b
AB
2487static int do_siocgstamp(struct net *net, struct socket *sock,
2488 unsigned int cmd, struct compat_timeval __user *up)
7a229387 2489{
7a229387
AB
2490 mm_segment_t old_fs = get_fs();
2491 struct timeval ktv;
2492 int err;
2493
2494 set_fs(KERNEL_DS);
6b96018b 2495 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387
AB
2496 set_fs(old_fs);
2497 if (!err) {
2498 err = put_user(ktv.tv_sec, &up->tv_sec);
2499 err |= __put_user(ktv.tv_usec, &up->tv_usec);
2500 }
2501 return err;
2502}
2503
6b96018b
AB
2504static int do_siocgstampns(struct net *net, struct socket *sock,
2505 unsigned int cmd, struct compat_timespec __user *up)
7a229387 2506{
7a229387
AB
2507 mm_segment_t old_fs = get_fs();
2508 struct timespec kts;
2509 int err;
2510
2511 set_fs(KERNEL_DS);
6b96018b 2512 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387
AB
2513 set_fs(old_fs);
2514 if (!err) {
2515 err = put_user(kts.tv_sec, &up->tv_sec);
2516 err |= __put_user(kts.tv_nsec, &up->tv_nsec);
2517 }
2518 return err;
2519}
2520
6b96018b 2521static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2522{
2523 struct ifreq __user *uifr;
2524 int err;
2525
2526 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2527 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2528 return -EFAULT;
2529
6b96018b 2530 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2531 if (err)
2532 return err;
2533
6b96018b 2534 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2535 return -EFAULT;
2536
2537 return 0;
2538}
2539
6b96018b 2540static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2541{
6b96018b 2542 struct compat_ifconf ifc32;
7a229387
AB
2543 struct ifconf ifc;
2544 struct ifconf __user *uifc;
6b96018b 2545 struct compat_ifreq __user *ifr32;
7a229387
AB
2546 struct ifreq __user *ifr;
2547 unsigned int i, j;
2548 int err;
2549
6b96018b 2550 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2551 return -EFAULT;
2552
2553 if (ifc32.ifcbuf == 0) {
2554 ifc32.ifc_len = 0;
2555 ifc.ifc_len = 0;
2556 ifc.ifc_req = NULL;
2557 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2558 } else {
6b96018b 2559 size_t len =((ifc32.ifc_len / sizeof (struct compat_ifreq)) + 1) *
7a229387
AB
2560 sizeof (struct ifreq);
2561 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2562 ifc.ifc_len = len;
2563 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2564 ifr32 = compat_ptr(ifc32.ifcbuf);
6b96018b
AB
2565 for (i = 0; i < ifc32.ifc_len; i += sizeof (struct compat_ifreq)) {
2566 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2567 return -EFAULT;
2568 ifr++;
2569 ifr32++;
2570 }
2571 }
2572 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2573 return -EFAULT;
2574
6b96018b 2575 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2576 if (err)
2577 return err;
2578
2579 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2580 return -EFAULT;
2581
2582 ifr = ifc.ifc_req;
2583 ifr32 = compat_ptr(ifc32.ifcbuf);
2584 for (i = 0, j = 0;
6b96018b
AB
2585 i + sizeof (struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2586 i += sizeof (struct compat_ifreq), j += sizeof (struct ifreq)) {
2587 if (copy_in_user(ifr32, ifr, sizeof (struct compat_ifreq)))
7a229387
AB
2588 return -EFAULT;
2589 ifr32++;
2590 ifr++;
2591 }
2592
2593 if (ifc32.ifcbuf == 0) {
2594 /* Translate from 64-bit structure multiple to
2595 * a 32-bit one.
2596 */
2597 i = ifc.ifc_len;
6b96018b 2598 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2599 ifc32.ifc_len = i;
2600 } else {
2601 ifc32.ifc_len = i;
2602 }
6b96018b 2603 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2604 return -EFAULT;
2605
2606 return 0;
2607}
2608
6b96018b 2609static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387
AB
2610{
2611 struct ifreq __user *ifr;
7a229387
AB
2612 u32 data;
2613 void __user *datap;
2614
2615 ifr = compat_alloc_user_space(sizeof(*ifr));
7a229387
AB
2616
2617 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
2618 return -EFAULT;
2619
2620 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2621 return -EFAULT;
2622
2623 datap = compat_ptr(data);
2624 if (put_user(datap, &ifr->ifr_ifru.ifru_data))
2625 return -EFAULT;
2626
6b96018b 2627 return dev_ioctl(net, SIOCETHTOOL, ifr);
7a229387
AB
2628}
2629
7a50a240
AB
2630static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2631{
2632 void __user *uptr;
2633 compat_uptr_t uptr32;
2634 struct ifreq __user *uifr;
2635
2636 uifr = compat_alloc_user_space(sizeof (*uifr));
2637 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2638 return -EFAULT;
2639
2640 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2641 return -EFAULT;
2642
2643 uptr = compat_ptr(uptr32);
2644
2645 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2646 return -EFAULT;
2647
2648 return dev_ioctl(net, SIOCWANDEV, uifr);
2649}
2650
6b96018b
AB
2651static int bond_ioctl(struct net *net, unsigned int cmd,
2652 struct compat_ifreq __user *ifr32)
7a229387
AB
2653{
2654 struct ifreq kifr;
2655 struct ifreq __user *uifr;
7a229387
AB
2656 mm_segment_t old_fs;
2657 int err;
2658 u32 data;
2659 void __user *datap;
2660
2661 switch (cmd) {
2662 case SIOCBONDENSLAVE:
2663 case SIOCBONDRELEASE:
2664 case SIOCBONDSETHWADDR:
2665 case SIOCBONDCHANGEACTIVE:
6b96018b 2666 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2667 return -EFAULT;
2668
2669 old_fs = get_fs();
2670 set_fs (KERNEL_DS);
6b96018b 2671 err = dev_ioctl(net, cmd, &kifr);
7a229387
AB
2672 set_fs (old_fs);
2673
2674 return err;
2675 case SIOCBONDSLAVEINFOQUERY:
2676 case SIOCBONDINFOQUERY:
2677 uifr = compat_alloc_user_space(sizeof(*uifr));
2678 if (copy_in_user(&uifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
2679 return -EFAULT;
2680
2681 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2682 return -EFAULT;
2683
2684 datap = compat_ptr(data);
2685 if (put_user(datap, &uifr->ifr_ifru.ifru_data))
2686 return -EFAULT;
2687
6b96018b 2688 return dev_ioctl(net, cmd, uifr);
7a229387
AB
2689 default:
2690 return -EINVAL;
2691 };
2692}
2693
6b96018b
AB
2694static int siocdevprivate_ioctl(struct net *net, unsigned int cmd,
2695 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2696{
2697 struct ifreq __user *u_ifreq64;
7a229387
AB
2698 char tmp_buf[IFNAMSIZ];
2699 void __user *data64;
2700 u32 data32;
2701
2702 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2703 IFNAMSIZ))
2704 return -EFAULT;
2705 if (__get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
2706 return -EFAULT;
2707 data64 = compat_ptr(data32);
2708
2709 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2710
2711 /* Don't check these user accesses, just let that get trapped
2712 * in the ioctl handler instead.
2713 */
2714 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2715 IFNAMSIZ))
2716 return -EFAULT;
2717 if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
2718 return -EFAULT;
2719
6b96018b 2720 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2721}
2722
6b96018b
AB
2723static int dev_ifsioc(struct net *net, struct socket *sock,
2724 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387
AB
2725{
2726 struct ifreq ifr;
6b96018b 2727 struct compat_ifmap __user *uifmap32;
7a229387
AB
2728 mm_segment_t old_fs;
2729 int err;
2730
7a229387
AB
2731 uifmap32 = &uifr32->ifr_ifru.ifru_map;
2732 switch (cmd) {
2733 case SIOCSIFMAP:
2734 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
2735 err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2736 err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2737 err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2738 err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq);
2739 err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma);
2740 err |= __get_user(ifr.ifr_map.port, &uifmap32->port);
2741 if (err)
2742 return -EFAULT;
2743 break;
2744 case SIOCSHWTSTAMP:
2745 if (copy_from_user(&ifr, uifr32, sizeof(*uifr32)))
2746 return -EFAULT;
2747 ifr.ifr_data = compat_ptr(uifr32->ifr_ifru.ifru_data);
2748 break;
2749 default:
2750 if (copy_from_user(&ifr, uifr32, sizeof(*uifr32)))
2751 return -EFAULT;
2752 break;
2753 }
2754 old_fs = get_fs();
2755 set_fs (KERNEL_DS);
6b96018b 2756 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ifr);
7a229387
AB
2757 set_fs (old_fs);
2758 if (!err) {
2759 switch (cmd) {
2760 case SIOCGIFFLAGS:
2761 case SIOCGIFMETRIC:
2762 case SIOCGIFMTU:
2763 case SIOCGIFMEM:
2764 case SIOCGIFHWADDR:
2765 case SIOCGIFINDEX:
2766 case SIOCGIFADDR:
2767 case SIOCGIFBRDADDR:
2768 case SIOCGIFDSTADDR:
2769 case SIOCGIFNETMASK:
fab2532b 2770 case SIOCGIFPFLAGS:
7a229387 2771 case SIOCGIFTXQLEN:
fab2532b
AB
2772 case SIOCGMIIPHY:
2773 case SIOCGMIIREG:
7a229387
AB
2774 if (copy_to_user(uifr32, &ifr, sizeof(*uifr32)))
2775 return -EFAULT;
2776 break;
2777 case SIOCGIFMAP:
2778 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
2779 err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2780 err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2781 err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2782 err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq);
2783 err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma);
2784 err |= __put_user(ifr.ifr_map.port, &uifmap32->port);
2785 if (err)
2786 err = -EFAULT;
2787 break;
2788 }
2789 }
2790 return err;
2791}
2792
2793struct rtentry32 {
2794 u32 rt_pad1;
2795 struct sockaddr rt_dst; /* target address */
2796 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
2797 struct sockaddr rt_genmask; /* target network mask (IP) */
2798 unsigned short rt_flags;
2799 short rt_pad2;
2800 u32 rt_pad3;
2801 unsigned char rt_tos;
2802 unsigned char rt_class;
2803 short rt_pad4;
2804 short rt_metric; /* +1 for binary compatibility! */
2805 /* char * */ u32 rt_dev; /* forcing the device at add */
2806 u32 rt_mtu; /* per route MTU/Window */
2807 u32 rt_window; /* Window clamping */
2808 unsigned short rt_irtt; /* Initial RTT */
2809};
2810
2811struct in6_rtmsg32 {
2812 struct in6_addr rtmsg_dst;
2813 struct in6_addr rtmsg_src;
2814 struct in6_addr rtmsg_gateway;
2815 u32 rtmsg_type;
2816 u16 rtmsg_dst_len;
2817 u16 rtmsg_src_len;
2818 u32 rtmsg_metric;
2819 u32 rtmsg_info;
2820 u32 rtmsg_flags;
2821 s32 rtmsg_ifindex;
2822};
2823
6b96018b
AB
2824static int routing_ioctl(struct net *net, struct socket *sock,
2825 unsigned int cmd, void __user *argp)
7a229387
AB
2826{
2827 int ret;
2828 void *r = NULL;
2829 struct in6_rtmsg r6;
2830 struct rtentry r4;
2831 char devname[16];
2832 u32 rtdev;
2833 mm_segment_t old_fs = get_fs();
2834
6b96018b
AB
2835 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
2836 struct in6_rtmsg32 __user *ur6 = argp;
7a229387
AB
2837 ret = copy_from_user (&r6.rtmsg_dst, &(ur6->rtmsg_dst),
2838 3 * sizeof(struct in6_addr));
2839 ret |= __get_user (r6.rtmsg_type, &(ur6->rtmsg_type));
2840 ret |= __get_user (r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
2841 ret |= __get_user (r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
2842 ret |= __get_user (r6.rtmsg_metric, &(ur6->rtmsg_metric));
2843 ret |= __get_user (r6.rtmsg_info, &(ur6->rtmsg_info));
2844 ret |= __get_user (r6.rtmsg_flags, &(ur6->rtmsg_flags));
2845 ret |= __get_user (r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
2846
2847 r = (void *) &r6;
2848 } else { /* ipv4 */
6b96018b 2849 struct rtentry32 __user *ur4 = argp;
7a229387
AB
2850 ret = copy_from_user (&r4.rt_dst, &(ur4->rt_dst),
2851 3 * sizeof(struct sockaddr));
2852 ret |= __get_user (r4.rt_flags, &(ur4->rt_flags));
2853 ret |= __get_user (r4.rt_metric, &(ur4->rt_metric));
2854 ret |= __get_user (r4.rt_mtu, &(ur4->rt_mtu));
2855 ret |= __get_user (r4.rt_window, &(ur4->rt_window));
2856 ret |= __get_user (r4.rt_irtt, &(ur4->rt_irtt));
2857 ret |= __get_user (rtdev, &(ur4->rt_dev));
2858 if (rtdev) {
2859 ret |= copy_from_user (devname, compat_ptr(rtdev), 15);
2860 r4.rt_dev = devname; devname[15] = 0;
2861 } else
2862 r4.rt_dev = NULL;
2863
2864 r = (void *) &r4;
2865 }
2866
2867 if (ret) {
2868 ret = -EFAULT;
2869 goto out;
2870 }
2871
2872 set_fs (KERNEL_DS);
6b96018b 2873 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
7a229387
AB
2874 set_fs (old_fs);
2875
2876out:
7a229387
AB
2877 return ret;
2878}
2879
2880/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
2881 * for some operations; this forces use of the newer bridge-utils that
2882 * use compatiable ioctls
2883 */
6b96018b 2884static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 2885{
6b96018b 2886 compat_ulong_t tmp;
7a229387 2887
6b96018b 2888 if (get_user(tmp, argp))
7a229387
AB
2889 return -EFAULT;
2890 if (tmp == BRCTL_GET_VERSION)
2891 return BRCTL_VERSION + 1;
2892 return -EINVAL;
2893}
2894
2895struct atmif_sioc32 {
2896 compat_int_t number;
2897 compat_int_t length;
2898 compat_caddr_t arg;
2899};
2900
2901struct atm_iobuf32 {
2902 compat_int_t length;
2903 compat_caddr_t buffer;
2904};
2905
2906#define ATM_GETLINKRATE32 _IOW('a', ATMIOC_ITF+1, struct atmif_sioc32)
2907#define ATM_GETNAMES32 _IOW('a', ATMIOC_ITF+3, struct atm_iobuf32)
2908#define ATM_GETTYPE32 _IOW('a', ATMIOC_ITF+4, struct atmif_sioc32)
2909#define ATM_GETESI32 _IOW('a', ATMIOC_ITF+5, struct atmif_sioc32)
2910#define ATM_GETADDR32 _IOW('a', ATMIOC_ITF+6, struct atmif_sioc32)
2911#define ATM_RSTADDR32 _IOW('a', ATMIOC_ITF+7, struct atmif_sioc32)
2912#define ATM_ADDADDR32 _IOW('a', ATMIOC_ITF+8, struct atmif_sioc32)
2913#define ATM_DELADDR32 _IOW('a', ATMIOC_ITF+9, struct atmif_sioc32)
2914#define ATM_GETCIRANGE32 _IOW('a', ATMIOC_ITF+10, struct atmif_sioc32)
2915#define ATM_SETCIRANGE32 _IOW('a', ATMIOC_ITF+11, struct atmif_sioc32)
2916#define ATM_SETESI32 _IOW('a', ATMIOC_ITF+12, struct atmif_sioc32)
2917#define ATM_SETESIF32 _IOW('a', ATMIOC_ITF+13, struct atmif_sioc32)
2918#define ATM_GETSTAT32 _IOW('a', ATMIOC_SARCOM+0, struct atmif_sioc32)
2919#define ATM_GETSTATZ32 _IOW('a', ATMIOC_SARCOM+1, struct atmif_sioc32)
2920#define ATM_GETLOOP32 _IOW('a', ATMIOC_SARCOM+2, struct atmif_sioc32)
2921#define ATM_SETLOOP32 _IOW('a', ATMIOC_SARCOM+3, struct atmif_sioc32)
2922#define ATM_QUERYLOOP32 _IOW('a', ATMIOC_SARCOM+4, struct atmif_sioc32)
2923
2924static struct {
2925 unsigned int cmd32;
2926 unsigned int cmd;
2927} atm_ioctl_map[] = {
2928 { ATM_GETLINKRATE32, ATM_GETLINKRATE },
2929 { ATM_GETNAMES32, ATM_GETNAMES },
2930 { ATM_GETTYPE32, ATM_GETTYPE },
2931 { ATM_GETESI32, ATM_GETESI },
2932 { ATM_GETADDR32, ATM_GETADDR },
2933 { ATM_RSTADDR32, ATM_RSTADDR },
2934 { ATM_ADDADDR32, ATM_ADDADDR },
2935 { ATM_DELADDR32, ATM_DELADDR },
2936 { ATM_GETCIRANGE32, ATM_GETCIRANGE },
2937 { ATM_SETCIRANGE32, ATM_SETCIRANGE },
2938 { ATM_SETESI32, ATM_SETESI },
2939 { ATM_SETESIF32, ATM_SETESIF },
2940 { ATM_GETSTAT32, ATM_GETSTAT },
2941 { ATM_GETSTATZ32, ATM_GETSTATZ },
2942 { ATM_GETLOOP32, ATM_GETLOOP },
2943 { ATM_SETLOOP32, ATM_SETLOOP },
2944 { ATM_QUERYLOOP32, ATM_QUERYLOOP }
2945};
2946
2947#define NR_ATM_IOCTL ARRAY_SIZE(atm_ioctl_map)
2948
6b96018b
AB
2949static int do_atm_iobuf(struct net *net, struct socket *sock,
2950 unsigned int cmd, unsigned long arg)
7a229387
AB
2951{
2952 struct atm_iobuf __user *iobuf;
2953 struct atm_iobuf32 __user *iobuf32;
2954 u32 data;
2955 void __user *datap;
2956 int len, err;
2957
2958 iobuf = compat_alloc_user_space(sizeof(*iobuf));
2959 iobuf32 = compat_ptr(arg);
2960
2961 if (get_user(len, &iobuf32->length) ||
2962 get_user(data, &iobuf32->buffer))
2963 return -EFAULT;
2964 datap = compat_ptr(data);
2965 if (put_user(len, &iobuf->length) ||
2966 put_user(datap, &iobuf->buffer))
2967 return -EFAULT;
2968
6b96018b 2969 err = sock_do_ioctl(net, sock, cmd, (unsigned long)iobuf);
7a229387
AB
2970
2971 if (!err) {
2972 if (copy_in_user(&iobuf32->length, &iobuf->length,
2973 sizeof(int)))
2974 err = -EFAULT;
2975 }
2976
2977 return err;
2978}
2979
6b96018b
AB
2980static int do_atmif_sioc(struct net *net, struct socket *sock,
2981 unsigned int cmd, unsigned long arg)
7a229387
AB
2982{
2983 struct atmif_sioc __user *sioc;
2984 struct atmif_sioc32 __user *sioc32;
2985 u32 data;
2986 void __user *datap;
2987 int err;
2988
2989 sioc = compat_alloc_user_space(sizeof(*sioc));
2990 sioc32 = compat_ptr(arg);
2991
2992 if (copy_in_user(&sioc->number, &sioc32->number, 2 * sizeof(int)) ||
2993 get_user(data, &sioc32->arg))
2994 return -EFAULT;
2995 datap = compat_ptr(data);
2996 if (put_user(datap, &sioc->arg))
2997 return -EFAULT;
2998
6b96018b 2999 err = sock_do_ioctl(net, sock, cmd, (unsigned long) sioc);
7a229387
AB
3000
3001 if (!err) {
3002 if (copy_in_user(&sioc32->length, &sioc->length,
3003 sizeof(int)))
3004 err = -EFAULT;
3005 }
3006 return err;
3007}
3008
6b96018b
AB
3009static int do_atm_ioctl(struct net *net, struct socket *sock,
3010 unsigned int cmd32, unsigned long arg)
7a229387
AB
3011{
3012 int i;
3013 unsigned int cmd = 0;
3014
3015 switch (cmd32) {
3016 case SONET_GETSTAT:
3017 case SONET_GETSTATZ:
3018 case SONET_GETDIAG:
3019 case SONET_SETDIAG:
3020 case SONET_CLRDIAG:
3021 case SONET_SETFRAMING:
3022 case SONET_GETFRAMING:
3023 case SONET_GETFRSENSE:
6b96018b 3024 return do_atmif_sioc(net, sock, cmd32, arg);
7a229387
AB
3025 }
3026
3027 for (i = 0; i < NR_ATM_IOCTL; i++) {
3028 if (cmd32 == atm_ioctl_map[i].cmd32) {
3029 cmd = atm_ioctl_map[i].cmd;
3030 break;
3031 }
3032 }
3033 if (i == NR_ATM_IOCTL)
3034 return -EINVAL;
3035
3036 switch (cmd) {
3037 case ATM_GETNAMES:
6b96018b 3038 return do_atm_iobuf(net, sock, cmd, arg);
7a229387
AB
3039
3040 case ATM_GETLINKRATE:
3041 case ATM_GETTYPE:
3042 case ATM_GETESI:
3043 case ATM_GETADDR:
3044 case ATM_RSTADDR:
3045 case ATM_ADDADDR:
3046 case ATM_DELADDR:
3047 case ATM_GETCIRANGE:
3048 case ATM_SETCIRANGE:
3049 case ATM_SETESI:
3050 case ATM_SETESIF:
3051 case ATM_GETSTAT:
3052 case ATM_GETSTATZ:
3053 case ATM_GETLOOP:
3054 case ATM_SETLOOP:
3055 case ATM_QUERYLOOP:
6b96018b 3056 return do_atmif_sioc(net, sock, cmd, arg);
7a229387
AB
3057 }
3058
3059 return -EINVAL;
3060}
3061
6b96018b
AB
3062static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3063 unsigned int cmd, unsigned long arg)
3064{
3065 void __user *argp = compat_ptr(arg);
3066 struct sock *sk = sock->sk;
3067 struct net *net = sock_net(sk);
7a229387 3068
6b96018b
AB
3069 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
3070 return siocdevprivate_ioctl(net, cmd, argp);
3071
3072 switch (cmd) {
3073 case SIOCSIFBR:
3074 case SIOCGIFBR:
3075 return old_bridge_ioctl(argp);
3076 case SIOCGIFNAME:
3077 return dev_ifname32(net, argp);
3078 case SIOCGIFCONF:
3079 return dev_ifconf(net, argp);
3080 case SIOCETHTOOL:
3081 return ethtool_ioctl(net, argp);
7a50a240
AB
3082 case SIOCWANDEV:
3083 return compat_siocwandev(net, argp);
6b96018b
AB
3084 case SIOCBONDENSLAVE:
3085 case SIOCBONDRELEASE:
3086 case SIOCBONDSETHWADDR:
3087 case SIOCBONDSLAVEINFOQUERY:
3088 case SIOCBONDINFOQUERY:
3089 case SIOCBONDCHANGEACTIVE:
3090 return bond_ioctl(net, cmd, argp);
3091 case SIOCADDRT:
3092 case SIOCDELRT:
3093 return routing_ioctl(net, sock, cmd, argp);
3094 case SIOCGSTAMP:
3095 return do_siocgstamp(net, sock, cmd, argp);
3096 case SIOCGSTAMPNS:
3097 return do_siocgstampns(net, sock, cmd, argp);
6b96018b
AB
3098
3099 case FIOSETOWN:
3100 case SIOCSPGRP:
3101 case FIOGETOWN:
3102 case SIOCGPGRP:
3103 case SIOCBRADDBR:
3104 case SIOCBRDELBR:
3105 case SIOCGIFVLAN:
3106 case SIOCSIFVLAN:
3107 case SIOCADDDLCI:
3108 case SIOCDELDLCI:
3109 return sock_ioctl(file, cmd, arg);
3110
3111 case SIOCGIFFLAGS:
3112 case SIOCSIFFLAGS:
3113 case SIOCGIFMETRIC:
3114 case SIOCSIFMETRIC:
3115 case SIOCGIFMTU:
3116 case SIOCSIFMTU:
3117 case SIOCGIFMEM:
3118 case SIOCSIFMEM:
3119 case SIOCGIFHWADDR:
3120 case SIOCSIFHWADDR:
3121 case SIOCADDMULTI:
3122 case SIOCDELMULTI:
3123 case SIOCGIFINDEX:
3124 case SIOCGIFMAP:
3125 case SIOCSIFMAP:
3126 case SIOCGIFADDR:
3127 case SIOCSIFADDR:
3128 case SIOCSIFHWBROADCAST:
3129 case SIOCSHWTSTAMP:
3130 case SIOCDIFADDR:
6b96018b
AB
3131 case SIOCGIFBRDADDR:
3132 case SIOCSIFBRDADDR:
3133 case SIOCGIFDSTADDR:
3134 case SIOCSIFDSTADDR:
3135 case SIOCGIFNETMASK:
3136 case SIOCSIFNETMASK:
3137 case SIOCSIFPFLAGS:
3138 case SIOCGIFPFLAGS:
3139 case SIOCGIFTXQLEN:
3140 case SIOCSIFTXQLEN:
3141 case SIOCBRADDIF:
3142 case SIOCBRDELIF:
9177efd3
AB
3143 case SIOCSIFNAME:
3144 case SIOCGMIIPHY:
3145 case SIOCGMIIREG:
3146 case SIOCSMIIREG:
6b96018b 3147 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3148
6b96018b
AB
3149 case ATM_GETLINKRATE32:
3150 case ATM_GETNAMES32:
3151 case ATM_GETTYPE32:
3152 case ATM_GETESI32:
3153 case ATM_GETADDR32:
3154 case ATM_RSTADDR32:
3155 case ATM_ADDADDR32:
3156 case ATM_DELADDR32:
3157 case ATM_GETCIRANGE32:
3158 case ATM_SETCIRANGE32:
3159 case ATM_SETESI32:
3160 case ATM_SETESIF32:
3161 case ATM_GETSTAT32:
3162 case ATM_GETSTATZ32:
3163 case ATM_GETLOOP32:
3164 case ATM_SETLOOP32:
3165 case ATM_QUERYLOOP32:
3166 case SONET_GETSTAT:
3167 case SONET_GETSTATZ:
3168 case SONET_GETDIAG:
3169 case SONET_SETDIAG:
3170 case SONET_CLRDIAG:
3171 case SONET_SETFRAMING:
3172 case SONET_GETFRAMING:
3173 case SONET_GETFRSENSE:
3174 return do_atm_ioctl(net, sock, cmd, arg);
3175
3176 case ATMSIGD_CTRL:
3177 case ATMARPD_CTRL:
3178 case ATMLEC_CTRL:
3179 case ATMLEC_MCAST:
3180 case ATMLEC_DATA:
3181 case ATM_SETSC:
3182 case SIOCSIFATMTCP:
3183 case SIOCMKCLIP:
3184 case ATMARP_MKIP:
3185 case ATMARP_SETENTRY:
3186 case ATMARP_ENCAP:
3187 case ATMTCP_CREATE:
3188 case ATMTCP_REMOVE:
3189 case ATMMPC_CTRL:
3190 case ATMMPC_DATA:
3191
3192 case SIOCSARP:
3193 case SIOCGARP:
3194 case SIOCDARP:
6b96018b 3195 case SIOCATMARK:
9177efd3
AB
3196 return sock_do_ioctl(net, sock, cmd, arg);
3197 }
3198
3199 /* Prevent warning from compat_sys_ioctl, these always
3200 * result in -EINVAL in the native case anyway. */
3201 switch (cmd) {
3202 case SIOCRTMSG:
3203 case SIOCGIFCOUNT:
6b96018b
AB
3204 case SIOCSRARP:
3205 case SIOCGRARP:
3206 case SIOCDRARP:
9177efd3
AB
3207 case SIOCSIFLINK:
3208 case SIOCGIFSLAVE:
3209 case SIOCSIFSLAVE:
3210 return -EINVAL;
6b96018b
AB
3211 }
3212
3213 return -ENOIOCTLCMD;
3214}
7a229387 3215
89bbfc95 3216static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 3217 unsigned long arg)
89bbfc95
SP
3218{
3219 struct socket *sock = file->private_data;
3220 int ret = -ENOIOCTLCMD;
87de87d5
DM
3221 struct sock *sk;
3222 struct net *net;
3223
3224 sk = sock->sk;
3225 net = sock_net(sk);
89bbfc95
SP
3226
3227 if (sock->ops->compat_ioctl)
3228 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3229
87de87d5
DM
3230 if (ret == -ENOIOCTLCMD &&
3231 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3232 ret = compat_wext_handle_ioctl(net, cmd, arg);
3233
6b96018b
AB
3234 if (ret == -ENOIOCTLCMD)
3235 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3236
89bbfc95
SP
3237 return ret;
3238}
3239#endif
3240
ac5a488e
SS
3241int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3242{
3243 return sock->ops->bind(sock, addr, addrlen);
3244}
3245
3246int kernel_listen(struct socket *sock, int backlog)
3247{
3248 return sock->ops->listen(sock, backlog);
3249}
3250
3251int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3252{
3253 struct sock *sk = sock->sk;
3254 int err;
3255
3256 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3257 newsock);
3258 if (err < 0)
3259 goto done;
3260
3261 err = sock->ops->accept(sock, *newsock, flags);
3262 if (err < 0) {
3263 sock_release(*newsock);
fa8705b0 3264 *newsock = NULL;
ac5a488e
SS
3265 goto done;
3266 }
3267
3268 (*newsock)->ops = sock->ops;
1b08534e 3269 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3270
3271done:
3272 return err;
3273}
3274
3275int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3276 int flags)
ac5a488e
SS
3277{
3278 return sock->ops->connect(sock, addr, addrlen, flags);
3279}
3280
3281int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3282 int *addrlen)
3283{
3284 return sock->ops->getname(sock, addr, addrlen, 0);
3285}
3286
3287int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3288 int *addrlen)
3289{
3290 return sock->ops->getname(sock, addr, addrlen, 1);
3291}
3292
3293int kernel_getsockopt(struct socket *sock, int level, int optname,
3294 char *optval, int *optlen)
3295{
3296 mm_segment_t oldfs = get_fs();
3297 int err;
3298
3299 set_fs(KERNEL_DS);
3300 if (level == SOL_SOCKET)
3301 err = sock_getsockopt(sock, level, optname, optval, optlen);
3302 else
3303 err = sock->ops->getsockopt(sock, level, optname, optval,
3304 optlen);
3305 set_fs(oldfs);
3306 return err;
3307}
3308
3309int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3310 char *optval, unsigned int optlen)
ac5a488e
SS
3311{
3312 mm_segment_t oldfs = get_fs();
3313 int err;
3314
3315 set_fs(KERNEL_DS);
3316 if (level == SOL_SOCKET)
3317 err = sock_setsockopt(sock, level, optname, optval, optlen);
3318 else
3319 err = sock->ops->setsockopt(sock, level, optname, optval,
3320 optlen);
3321 set_fs(oldfs);
3322 return err;
3323}
3324
3325int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3326 size_t size, int flags)
3327{
3328 if (sock->ops->sendpage)
3329 return sock->ops->sendpage(sock, page, offset, size, flags);
3330
3331 return sock_no_sendpage(sock, page, offset, size, flags);
3332}
3333
3334int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3335{
3336 mm_segment_t oldfs = get_fs();
3337 int err;
3338
3339 set_fs(KERNEL_DS);
3340 err = sock->ops->ioctl(sock, cmd, arg);
3341 set_fs(oldfs);
3342
3343 return err;
3344}
3345
91cf45f0
TM
3346int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3347{
3348 return sock->ops->shutdown(sock, how);
3349}
3350
1da177e4
LT
3351EXPORT_SYMBOL(sock_create);
3352EXPORT_SYMBOL(sock_create_kern);
3353EXPORT_SYMBOL(sock_create_lite);
3354EXPORT_SYMBOL(sock_map_fd);
3355EXPORT_SYMBOL(sock_recvmsg);
3356EXPORT_SYMBOL(sock_register);
3357EXPORT_SYMBOL(sock_release);
3358EXPORT_SYMBOL(sock_sendmsg);
3359EXPORT_SYMBOL(sock_unregister);
3360EXPORT_SYMBOL(sock_wake_async);
3361EXPORT_SYMBOL(sockfd_lookup);
3362EXPORT_SYMBOL(kernel_sendmsg);
3363EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
3364EXPORT_SYMBOL(kernel_bind);
3365EXPORT_SYMBOL(kernel_listen);
3366EXPORT_SYMBOL(kernel_accept);
3367EXPORT_SYMBOL(kernel_connect);
3368EXPORT_SYMBOL(kernel_getsockname);
3369EXPORT_SYMBOL(kernel_getpeername);
3370EXPORT_SYMBOL(kernel_getsockopt);
3371EXPORT_SYMBOL(kernel_setsockopt);
3372EXPORT_SYMBOL(kernel_sendpage);
3373EXPORT_SYMBOL(kernel_sock_ioctl);
91cf45f0 3374EXPORT_SYMBOL(kernel_sock_shutdown);