tcp: Mark v6 response packets as CHECKSUM_PARTIAL
[linux-2.6-block.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4
LT
72#include <linux/wanrouter.h>
73#include <linux/if_bridge.h>
20380731
ACM
74#include <linux/if_frad.h>
75#include <linux/if_vlan.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
1da177e4
LT
91
92#include <asm/uaccess.h>
93#include <asm/unistd.h>
94
95#include <net/compat.h>
87de87d5 96#include <net/wext.h>
1da177e4
LT
97
98#include <net/sock.h>
99#include <linux/netfilter.h>
100
6b96018b
AB
101#include <linux/if_tun.h>
102#include <linux/ipv6_route.h>
103#include <linux/route.h>
6b96018b
AB
104#include <linux/sockios.h>
105#include <linux/atalk.h>
106
1da177e4 107static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
108static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
109 unsigned long nr_segs, loff_t pos);
110static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
111 unsigned long nr_segs, loff_t pos);
89bddce5 112static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
113
114static int sock_close(struct inode *inode, struct file *file);
115static unsigned int sock_poll(struct file *file,
116 struct poll_table_struct *wait);
89bddce5 117static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
118#ifdef CONFIG_COMPAT
119static long compat_sock_ioctl(struct file *file,
89bddce5 120 unsigned int cmd, unsigned long arg);
89bbfc95 121#endif
1da177e4 122static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
123static ssize_t sock_sendpage(struct file *file, struct page *page,
124 int offset, size_t size, loff_t *ppos, int more);
9c55e01c
JA
125static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
126 struct pipe_inode_info *pipe, size_t len,
127 unsigned int flags);
1da177e4 128
1da177e4
LT
129/*
130 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
131 * in the operation structures but are done directly via the socketcall() multiplexor.
132 */
133
da7071d7 134static const struct file_operations socket_file_ops = {
1da177e4
LT
135 .owner = THIS_MODULE,
136 .llseek = no_llseek,
137 .aio_read = sock_aio_read,
138 .aio_write = sock_aio_write,
139 .poll = sock_poll,
140 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
141#ifdef CONFIG_COMPAT
142 .compat_ioctl = compat_sock_ioctl,
143#endif
1da177e4
LT
144 .mmap = sock_mmap,
145 .open = sock_no_open, /* special open code to disallow open via /proc */
146 .release = sock_close,
147 .fasync = sock_fasync,
5274f052
JA
148 .sendpage = sock_sendpage,
149 .splice_write = generic_splice_sendpage,
9c55e01c 150 .splice_read = sock_splice_read,
1da177e4
LT
151};
152
153/*
154 * The protocol list. Each protocol is registered in here.
155 */
156
1da177e4 157static DEFINE_SPINLOCK(net_family_lock);
f0fd27d4 158static const struct net_proto_family *net_families[NPROTO] __read_mostly;
1da177e4 159
1da177e4
LT
160/*
161 * Statistics counters of the socket lists
162 */
163
164static DEFINE_PER_CPU(int, sockets_in_use) = 0;
165
166/*
89bddce5
SH
167 * Support routines.
168 * Move socket addresses back and forth across the kernel/user
169 * divide and look after the messy bits.
1da177e4
LT
170 */
171
89bddce5 172#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
173 16 for IP, 16 for IPX,
174 24 for IPv6,
89bddce5 175 about 80 for AX.25
1da177e4
LT
176 must be at least one bigger than
177 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 178 :unix_mkname()).
1da177e4 179 */
89bddce5 180
1da177e4
LT
181/**
182 * move_addr_to_kernel - copy a socket address into kernel space
183 * @uaddr: Address in user space
184 * @kaddr: Address in kernel space
185 * @ulen: Length in user space
186 *
187 * The address is copied into kernel space. If the provided address is
188 * too long an error code of -EINVAL is returned. If the copy gives
189 * invalid addresses -EFAULT is returned. On a success 0 is returned.
190 */
191
230b1839 192int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr)
1da177e4 193{
230b1839 194 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 195 return -EINVAL;
89bddce5 196 if (ulen == 0)
1da177e4 197 return 0;
89bddce5 198 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 199 return -EFAULT;
3ec3b2fb 200 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
201}
202
203/**
204 * move_addr_to_user - copy an address to user space
205 * @kaddr: kernel space address
206 * @klen: length of address in kernel
207 * @uaddr: user space address
208 * @ulen: pointer to user length field
209 *
210 * The value pointed to by ulen on entry is the buffer length available.
211 * This is overwritten with the buffer space used. -EINVAL is returned
212 * if an overlong buffer is specified or a negative buffer size. -EFAULT
213 * is returned if either the buffer or the length field are not
214 * accessible.
215 * After copying the data up to the limit the user specifies, the true
216 * length of the data is written over the length limit the user
217 * specified. Zero is returned for a success.
218 */
89bddce5 219
230b1839 220int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr,
89bddce5 221 int __user *ulen)
1da177e4
LT
222{
223 int err;
224 int len;
225
89bddce5
SH
226 err = get_user(len, ulen);
227 if (err)
1da177e4 228 return err;
89bddce5
SH
229 if (len > klen)
230 len = klen;
230b1839 231 if (len < 0 || len > sizeof(struct sockaddr_storage))
1da177e4 232 return -EINVAL;
89bddce5 233 if (len) {
d6fe3945
SG
234 if (audit_sockaddr(klen, kaddr))
235 return -ENOMEM;
89bddce5 236 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
237 return -EFAULT;
238 }
239 /*
89bddce5
SH
240 * "fromlen shall refer to the value before truncation.."
241 * 1003.1g
1da177e4
LT
242 */
243 return __put_user(klen, ulen);
244}
245
e18b890b 246static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
247
248static struct inode *sock_alloc_inode(struct super_block *sb)
249{
250 struct socket_alloc *ei;
89bddce5 251
e94b1766 252 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
253 if (!ei)
254 return NULL;
255 init_waitqueue_head(&ei->socket.wait);
89bddce5 256
1da177e4
LT
257 ei->socket.fasync_list = NULL;
258 ei->socket.state = SS_UNCONNECTED;
259 ei->socket.flags = 0;
260 ei->socket.ops = NULL;
261 ei->socket.sk = NULL;
262 ei->socket.file = NULL;
1da177e4
LT
263
264 return &ei->vfs_inode;
265}
266
267static void sock_destroy_inode(struct inode *inode)
268{
269 kmem_cache_free(sock_inode_cachep,
270 container_of(inode, struct socket_alloc, vfs_inode));
271}
272
51cc5068 273static void init_once(void *foo)
1da177e4 274{
89bddce5 275 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 276
a35afb83 277 inode_init_once(&ei->vfs_inode);
1da177e4 278}
89bddce5 279
1da177e4
LT
280static int init_inodecache(void)
281{
282 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
283 sizeof(struct socket_alloc),
284 0,
285 (SLAB_HWCACHE_ALIGN |
286 SLAB_RECLAIM_ACCOUNT |
287 SLAB_MEM_SPREAD),
20c2df83 288 init_once);
1da177e4
LT
289 if (sock_inode_cachep == NULL)
290 return -ENOMEM;
291 return 0;
292}
293
b87221de 294static const struct super_operations sockfs_ops = {
1da177e4
LT
295 .alloc_inode = sock_alloc_inode,
296 .destroy_inode =sock_destroy_inode,
297 .statfs = simple_statfs,
298};
299
454e2398 300static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
301 int flags, const char *dev_name, void *data,
302 struct vfsmount *mnt)
1da177e4 303{
454e2398
DH
304 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
305 mnt);
1da177e4
LT
306}
307
ba89966c 308static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
309
310static struct file_system_type sock_fs_type = {
311 .name = "sockfs",
312 .get_sb = sockfs_get_sb,
313 .kill_sb = kill_anon_super,
314};
89bddce5 315
c23fbb6b
ED
316/*
317 * sockfs_dname() is called from d_path().
318 */
319static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
320{
321 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
322 dentry->d_inode->i_ino);
323}
324
3ba13d17 325static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 326 .d_dname = sockfs_dname,
1da177e4
LT
327};
328
329/*
330 * Obtains the first available file descriptor and sets it up for use.
331 *
39d8c1b6
DM
332 * These functions create file structures and maps them to fd space
333 * of the current process. On success it returns file descriptor
1da177e4
LT
334 * and file struct implicitly stored in sock->file.
335 * Note that another thread may close file descriptor before we return
336 * from this function. We use the fact that now we do not refer
337 * to socket after mapping. If one day we will need it, this
338 * function will increment ref. count on file by 1.
339 *
340 * In any case returned fd MAY BE not valid!
341 * This race condition is unavoidable
342 * with shared fd spaces, we cannot solve it inside kernel,
343 * but we take care of internal coherence yet.
344 */
345
7cbe66b6 346static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
1da177e4 347{
7cbe66b6 348 struct qstr name = { .name = "" };
2c48b9c4 349 struct path path;
7cbe66b6 350 struct file *file;
1da177e4 351 int fd;
1da177e4 352
a677a039 353 fd = get_unused_fd_flags(flags);
7cbe66b6
AV
354 if (unlikely(fd < 0))
355 return fd;
1da177e4 356
2c48b9c4
AV
357 path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
358 if (unlikely(!path.dentry)) {
7cbe66b6 359 put_unused_fd(fd);
39d8c1b6 360 return -ENOMEM;
7cbe66b6 361 }
2c48b9c4 362 path.mnt = mntget(sock_mnt);
39d8c1b6 363
2c48b9c4 364 path.dentry->d_op = &sockfs_dentry_operations;
2c48b9c4 365 d_instantiate(path.dentry, SOCK_INODE(sock));
cc3808f8 366 SOCK_INODE(sock)->i_fop = &socket_file_ops;
39d8c1b6 367
2c48b9c4 368 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 369 &socket_file_ops);
cc3808f8
AV
370 if (unlikely(!file)) {
371 /* drop dentry, keep inode */
372 atomic_inc(&path.dentry->d_inode->i_count);
2c48b9c4 373 path_put(&path);
cc3808f8
AV
374 put_unused_fd(fd);
375 return -ENFILE;
376 }
377
378 sock->file = file;
77d27200 379 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6
DM
380 file->f_pos = 0;
381 file->private_data = sock;
1da177e4 382
7cbe66b6
AV
383 *f = file;
384 return fd;
39d8c1b6
DM
385}
386
a677a039 387int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
388{
389 struct file *newfile;
7cbe66b6 390 int fd = sock_alloc_file(sock, &newfile, flags);
39d8c1b6 391
7cbe66b6 392 if (likely(fd >= 0))
39d8c1b6 393 fd_install(fd, newfile);
7cbe66b6 394
1da177e4
LT
395 return fd;
396}
397
6cb153ca
BL
398static struct socket *sock_from_file(struct file *file, int *err)
399{
6cb153ca
BL
400 if (file->f_op == &socket_file_ops)
401 return file->private_data; /* set in sock_map_fd */
402
23bb80d2
ED
403 *err = -ENOTSOCK;
404 return NULL;
6cb153ca
BL
405}
406
1da177e4
LT
407/**
408 * sockfd_lookup - Go from a file number to its socket slot
409 * @fd: file handle
410 * @err: pointer to an error code return
411 *
412 * The file handle passed in is locked and the socket it is bound
413 * too is returned. If an error occurs the err pointer is overwritten
414 * with a negative errno code and NULL is returned. The function checks
415 * for both invalid handles and passing a handle which is not a socket.
416 *
417 * On a success the socket object pointer is returned.
418 */
419
420struct socket *sockfd_lookup(int fd, int *err)
421{
422 struct file *file;
1da177e4
LT
423 struct socket *sock;
424
89bddce5
SH
425 file = fget(fd);
426 if (!file) {
1da177e4
LT
427 *err = -EBADF;
428 return NULL;
429 }
89bddce5 430
6cb153ca
BL
431 sock = sock_from_file(file, err);
432 if (!sock)
1da177e4 433 fput(file);
6cb153ca
BL
434 return sock;
435}
1da177e4 436
6cb153ca
BL
437static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
438{
439 struct file *file;
440 struct socket *sock;
441
3672558c 442 *err = -EBADF;
6cb153ca
BL
443 file = fget_light(fd, fput_needed);
444 if (file) {
445 sock = sock_from_file(file, err);
446 if (sock)
447 return sock;
448 fput_light(file, *fput_needed);
1da177e4 449 }
6cb153ca 450 return NULL;
1da177e4
LT
451}
452
453/**
454 * sock_alloc - allocate a socket
89bddce5 455 *
1da177e4
LT
456 * Allocate a new inode and socket object. The two are bound together
457 * and initialised. The socket is then returned. If we are out of inodes
458 * NULL is returned.
459 */
460
461static struct socket *sock_alloc(void)
462{
89bddce5
SH
463 struct inode *inode;
464 struct socket *sock;
1da177e4
LT
465
466 inode = new_inode(sock_mnt->mnt_sb);
467 if (!inode)
468 return NULL;
469
470 sock = SOCKET_I(inode);
471
29a020d3 472 kmemcheck_annotate_bitfield(sock, type);
89bddce5 473 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
474 inode->i_uid = current_fsuid();
475 inode->i_gid = current_fsgid();
1da177e4 476
4e69489a 477 percpu_add(sockets_in_use, 1);
1da177e4
LT
478 return sock;
479}
480
481/*
482 * In theory you can't get an open on this inode, but /proc provides
483 * a back door. Remember to keep it shut otherwise you'll let the
484 * creepy crawlies in.
485 */
89bddce5 486
1da177e4
LT
487static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
488{
489 return -ENXIO;
490}
491
4b6f5d20 492const struct file_operations bad_sock_fops = {
1da177e4
LT
493 .owner = THIS_MODULE,
494 .open = sock_no_open,
495};
496
497/**
498 * sock_release - close a socket
499 * @sock: socket to close
500 *
501 * The socket is released from the protocol stack if it has a release
502 * callback, and the inode is then released if the socket is bound to
89bddce5 503 * an inode not a file.
1da177e4 504 */
89bddce5 505
1da177e4
LT
506void sock_release(struct socket *sock)
507{
508 if (sock->ops) {
509 struct module *owner = sock->ops->owner;
510
511 sock->ops->release(sock);
512 sock->ops = NULL;
513 module_put(owner);
514 }
515
516 if (sock->fasync_list)
517 printk(KERN_ERR "sock_release: fasync list not empty!\n");
518
4e69489a 519 percpu_sub(sockets_in_use, 1);
1da177e4
LT
520 if (!sock->file) {
521 iput(SOCK_INODE(sock));
522 return;
523 }
89bddce5 524 sock->file = NULL;
1da177e4
LT
525}
526
20d49473
PO
527int sock_tx_timestamp(struct msghdr *msg, struct sock *sk,
528 union skb_shared_tx *shtx)
529{
530 shtx->flags = 0;
531 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
532 shtx->hardware = 1;
533 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
534 shtx->software = 1;
535 return 0;
536}
537EXPORT_SYMBOL(sock_tx_timestamp);
538
89bddce5 539static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
540 struct msghdr *msg, size_t size)
541{
542 struct sock_iocb *si = kiocb_to_siocb(iocb);
543 int err;
544
545 si->sock = sock;
546 si->scm = NULL;
547 si->msg = msg;
548 si->size = size;
549
550 err = security_socket_sendmsg(sock, msg, size);
551 if (err)
552 return err;
553
554 return sock->ops->sendmsg(iocb, sock, msg, size);
555}
556
557int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
558{
559 struct kiocb iocb;
560 struct sock_iocb siocb;
561 int ret;
562
563 init_sync_kiocb(&iocb, NULL);
564 iocb.private = &siocb;
565 ret = __sock_sendmsg(&iocb, sock, msg, size);
566 if (-EIOCBQUEUED == ret)
567 ret = wait_on_sync_kiocb(&iocb);
568 return ret;
569}
570
571int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
572 struct kvec *vec, size_t num, size_t size)
573{
574 mm_segment_t oldfs = get_fs();
575 int result;
576
577 set_fs(KERNEL_DS);
578 /*
579 * the following is safe, since for compiler definitions of kvec and
580 * iovec are identical, yielding the same in-core layout and alignment
581 */
89bddce5 582 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
583 msg->msg_iovlen = num;
584 result = sock_sendmsg(sock, msg, size);
585 set_fs(oldfs);
586 return result;
587}
588
20d49473
PO
589static int ktime2ts(ktime_t kt, struct timespec *ts)
590{
591 if (kt.tv64) {
592 *ts = ktime_to_timespec(kt);
593 return 1;
594 } else {
595 return 0;
596 }
597}
598
92f37fd2
ED
599/*
600 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
601 */
602void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
603 struct sk_buff *skb)
604{
20d49473
PO
605 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
606 struct timespec ts[3];
607 int empty = 1;
608 struct skb_shared_hwtstamps *shhwtstamps =
609 skb_hwtstamps(skb);
610
611 /* Race occurred between timestamp enabling and packet
612 receiving. Fill in the current time for now. */
613 if (need_software_tstamp && skb->tstamp.tv64 == 0)
614 __net_timestamp(skb);
615
616 if (need_software_tstamp) {
617 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
618 struct timeval tv;
619 skb_get_timestamp(skb, &tv);
620 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
621 sizeof(tv), &tv);
622 } else {
842509b8 623 skb_get_timestampns(skb, &ts[0]);
20d49473 624 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
842509b8 625 sizeof(ts[0]), &ts[0]);
20d49473
PO
626 }
627 }
628
629
630 memset(ts, 0, sizeof(ts));
631 if (skb->tstamp.tv64 &&
632 sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) {
633 skb_get_timestampns(skb, ts + 0);
634 empty = 0;
635 }
636 if (shhwtstamps) {
637 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
638 ktime2ts(shhwtstamps->syststamp, ts + 1))
639 empty = 0;
640 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
641 ktime2ts(shhwtstamps->hwtstamp, ts + 2))
642 empty = 0;
92f37fd2 643 }
20d49473
PO
644 if (!empty)
645 put_cmsg(msg, SOL_SOCKET,
646 SCM_TIMESTAMPING, sizeof(ts), &ts);
92f37fd2
ED
647}
648
7c81fd8b
ACM
649EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
650
3b885787
NH
651inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
652{
653 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
654 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
655 sizeof(__u32), &skb->dropcount);
656}
657
658void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
659 struct sk_buff *skb)
660{
661 sock_recv_timestamp(msg, sk, skb);
662 sock_recv_drops(msg, sk, skb);
663}
664EXPORT_SYMBOL_GPL(sock_recv_ts_and_drops);
665
a2e27255
ACM
666static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
667 struct msghdr *msg, size_t size, int flags)
1da177e4 668{
1da177e4
LT
669 struct sock_iocb *si = kiocb_to_siocb(iocb);
670
671 si->sock = sock;
672 si->scm = NULL;
673 si->msg = msg;
674 si->size = size;
675 si->flags = flags;
676
1da177e4
LT
677 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
678}
679
a2e27255
ACM
680static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
681 struct msghdr *msg, size_t size, int flags)
682{
683 int err = security_socket_recvmsg(sock, msg, size, flags);
684
685 return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
686}
687
89bddce5 688int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
689 size_t size, int flags)
690{
691 struct kiocb iocb;
692 struct sock_iocb siocb;
693 int ret;
694
89bddce5 695 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
696 iocb.private = &siocb;
697 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
698 if (-EIOCBQUEUED == ret)
699 ret = wait_on_sync_kiocb(&iocb);
700 return ret;
701}
702
a2e27255
ACM
703static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
704 size_t size, int flags)
705{
706 struct kiocb iocb;
707 struct sock_iocb siocb;
708 int ret;
709
710 init_sync_kiocb(&iocb, NULL);
711 iocb.private = &siocb;
712 ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
713 if (-EIOCBQUEUED == ret)
714 ret = wait_on_sync_kiocb(&iocb);
715 return ret;
716}
717
89bddce5
SH
718int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
719 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
720{
721 mm_segment_t oldfs = get_fs();
722 int result;
723
724 set_fs(KERNEL_DS);
725 /*
726 * the following is safe, since for compiler definitions of kvec and
727 * iovec are identical, yielding the same in-core layout and alignment
728 */
89bddce5 729 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
730 result = sock_recvmsg(sock, msg, size, flags);
731 set_fs(oldfs);
732 return result;
733}
734
735static void sock_aio_dtor(struct kiocb *iocb)
736{
737 kfree(iocb->private);
738}
739
ce1d4d3e
CH
740static ssize_t sock_sendpage(struct file *file, struct page *page,
741 int offset, size_t size, loff_t *ppos, int more)
1da177e4 742{
1da177e4
LT
743 struct socket *sock;
744 int flags;
745
ce1d4d3e
CH
746 sock = file->private_data;
747
748 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
749 if (more)
750 flags |= MSG_MORE;
751
e6949583 752 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 753}
1da177e4 754
9c55e01c
JA
755static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
756 struct pipe_inode_info *pipe, size_t len,
757 unsigned int flags)
758{
759 struct socket *sock = file->private_data;
760
997b37da
RDC
761 if (unlikely(!sock->ops->splice_read))
762 return -EINVAL;
763
9c55e01c
JA
764 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
765}
766
ce1d4d3e 767static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 768 struct sock_iocb *siocb)
ce1d4d3e
CH
769{
770 if (!is_sync_kiocb(iocb)) {
771 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
772 if (!siocb)
773 return NULL;
1da177e4
LT
774 iocb->ki_dtor = sock_aio_dtor;
775 }
1da177e4 776
ce1d4d3e 777 siocb->kiocb = iocb;
ce1d4d3e
CH
778 iocb->private = siocb;
779 return siocb;
1da177e4
LT
780}
781
ce1d4d3e 782static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
783 struct file *file, const struct iovec *iov,
784 unsigned long nr_segs)
ce1d4d3e
CH
785{
786 struct socket *sock = file->private_data;
787 size_t size = 0;
788 int i;
1da177e4 789
89bddce5
SH
790 for (i = 0; i < nr_segs; i++)
791 size += iov[i].iov_len;
1da177e4 792
ce1d4d3e
CH
793 msg->msg_name = NULL;
794 msg->msg_namelen = 0;
795 msg->msg_control = NULL;
796 msg->msg_controllen = 0;
89bddce5 797 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
798 msg->msg_iovlen = nr_segs;
799 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
800
801 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
802}
803
027445c3
BP
804static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
805 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
806{
807 struct sock_iocb siocb, *x;
808
1da177e4
LT
809 if (pos != 0)
810 return -ESPIPE;
027445c3
BP
811
812 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
813 return 0;
814
027445c3
BP
815
816 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
817 if (!x)
818 return -ENOMEM;
027445c3 819 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
820}
821
ce1d4d3e 822static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
823 struct file *file, const struct iovec *iov,
824 unsigned long nr_segs)
1da177e4 825{
ce1d4d3e
CH
826 struct socket *sock = file->private_data;
827 size_t size = 0;
828 int i;
1da177e4 829
89bddce5
SH
830 for (i = 0; i < nr_segs; i++)
831 size += iov[i].iov_len;
1da177e4 832
ce1d4d3e
CH
833 msg->msg_name = NULL;
834 msg->msg_namelen = 0;
835 msg->msg_control = NULL;
836 msg->msg_controllen = 0;
89bddce5 837 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
838 msg->msg_iovlen = nr_segs;
839 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
840 if (sock->type == SOCK_SEQPACKET)
841 msg->msg_flags |= MSG_EOR;
1da177e4 842
ce1d4d3e 843 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
844}
845
027445c3
BP
846static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
847 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
848{
849 struct sock_iocb siocb, *x;
1da177e4 850
ce1d4d3e
CH
851 if (pos != 0)
852 return -ESPIPE;
027445c3 853
027445c3 854 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
855 if (!x)
856 return -ENOMEM;
1da177e4 857
027445c3 858 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
859}
860
1da177e4
LT
861/*
862 * Atomic setting of ioctl hooks to avoid race
863 * with module unload.
864 */
865
4a3e2f71 866static DEFINE_MUTEX(br_ioctl_mutex);
881d966b 867static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL;
1da177e4 868
881d966b 869void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 870{
4a3e2f71 871 mutex_lock(&br_ioctl_mutex);
1da177e4 872 br_ioctl_hook = hook;
4a3e2f71 873 mutex_unlock(&br_ioctl_mutex);
1da177e4 874}
89bddce5 875
1da177e4
LT
876EXPORT_SYMBOL(brioctl_set);
877
4a3e2f71 878static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 879static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 880
881d966b 881void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 882{
4a3e2f71 883 mutex_lock(&vlan_ioctl_mutex);
1da177e4 884 vlan_ioctl_hook = hook;
4a3e2f71 885 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 886}
89bddce5 887
1da177e4
LT
888EXPORT_SYMBOL(vlan_ioctl_set);
889
4a3e2f71 890static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 891static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 892
89bddce5 893void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 894{
4a3e2f71 895 mutex_lock(&dlci_ioctl_mutex);
1da177e4 896 dlci_ioctl_hook = hook;
4a3e2f71 897 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 898}
89bddce5 899
1da177e4
LT
900EXPORT_SYMBOL(dlci_ioctl_set);
901
6b96018b
AB
902static long sock_do_ioctl(struct net *net, struct socket *sock,
903 unsigned int cmd, unsigned long arg)
904{
905 int err;
906 void __user *argp = (void __user *)arg;
907
908 err = sock->ops->ioctl(sock, cmd, arg);
909
910 /*
911 * If this ioctl is unknown try to hand it down
912 * to the NIC driver.
913 */
914 if (err == -ENOIOCTLCMD)
915 err = dev_ioctl(net, cmd, argp);
916
917 return err;
918}
919
1da177e4
LT
920/*
921 * With an ioctl, arg may well be a user mode pointer, but we don't know
922 * what to do with it - that's up to the protocol still.
923 */
924
925static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
926{
927 struct socket *sock;
881d966b 928 struct sock *sk;
1da177e4
LT
929 void __user *argp = (void __user *)arg;
930 int pid, err;
881d966b 931 struct net *net;
1da177e4 932
b69aee04 933 sock = file->private_data;
881d966b 934 sk = sock->sk;
3b1e0a65 935 net = sock_net(sk);
1da177e4 936 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 937 err = dev_ioctl(net, cmd, argp);
1da177e4 938 } else
3d23e349 939#ifdef CONFIG_WEXT_CORE
1da177e4 940 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 941 err = dev_ioctl(net, cmd, argp);
1da177e4 942 } else
3d23e349 943#endif
89bddce5 944 switch (cmd) {
1da177e4
LT
945 case FIOSETOWN:
946 case SIOCSPGRP:
947 err = -EFAULT;
948 if (get_user(pid, (int __user *)argp))
949 break;
950 err = f_setown(sock->file, pid, 1);
951 break;
952 case FIOGETOWN:
953 case SIOCGPGRP:
609d7fa9 954 err = put_user(f_getown(sock->file),
89bddce5 955 (int __user *)argp);
1da177e4
LT
956 break;
957 case SIOCGIFBR:
958 case SIOCSIFBR:
959 case SIOCBRADDBR:
960 case SIOCBRDELBR:
961 err = -ENOPKG;
962 if (!br_ioctl_hook)
963 request_module("bridge");
964
4a3e2f71 965 mutex_lock(&br_ioctl_mutex);
89bddce5 966 if (br_ioctl_hook)
881d966b 967 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 968 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
969 break;
970 case SIOCGIFVLAN:
971 case SIOCSIFVLAN:
972 err = -ENOPKG;
973 if (!vlan_ioctl_hook)
974 request_module("8021q");
975
4a3e2f71 976 mutex_lock(&vlan_ioctl_mutex);
1da177e4 977 if (vlan_ioctl_hook)
881d966b 978 err = vlan_ioctl_hook(net, argp);
4a3e2f71 979 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 980 break;
1da177e4
LT
981 case SIOCADDDLCI:
982 case SIOCDELDLCI:
983 err = -ENOPKG;
984 if (!dlci_ioctl_hook)
985 request_module("dlci");
986
7512cbf6
PE
987 mutex_lock(&dlci_ioctl_mutex);
988 if (dlci_ioctl_hook)
1da177e4 989 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 990 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
991 break;
992 default:
6b96018b 993 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 994 break;
89bddce5 995 }
1da177e4
LT
996 return err;
997}
998
999int sock_create_lite(int family, int type, int protocol, struct socket **res)
1000{
1001 int err;
1002 struct socket *sock = NULL;
89bddce5 1003
1da177e4
LT
1004 err = security_socket_create(family, type, protocol, 1);
1005 if (err)
1006 goto out;
1007
1008 sock = sock_alloc();
1009 if (!sock) {
1010 err = -ENOMEM;
1011 goto out;
1012 }
1013
1da177e4 1014 sock->type = type;
7420ed23
VY
1015 err = security_socket_post_create(sock, family, type, protocol, 1);
1016 if (err)
1017 goto out_release;
1018
1da177e4
LT
1019out:
1020 *res = sock;
1021 return err;
7420ed23
VY
1022out_release:
1023 sock_release(sock);
1024 sock = NULL;
1025 goto out;
1da177e4
LT
1026}
1027
1028/* No kernel lock held - perfect */
89bddce5 1029static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
1030{
1031 struct socket *sock;
1032
1033 /*
89bddce5 1034 * We can't return errors to poll, so it's either yes or no.
1da177e4 1035 */
b69aee04 1036 sock = file->private_data;
1da177e4
LT
1037 return sock->ops->poll(file, sock, wait);
1038}
1039
89bddce5 1040static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1041{
b69aee04 1042 struct socket *sock = file->private_data;
1da177e4
LT
1043
1044 return sock->ops->mmap(file, sock, vma);
1045}
1046
20380731 1047static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
1048{
1049 /*
89bddce5
SH
1050 * It was possible the inode is NULL we were
1051 * closing an unfinished socket.
1da177e4
LT
1052 */
1053
89bddce5 1054 if (!inode) {
1da177e4
LT
1055 printk(KERN_DEBUG "sock_close: NULL inode\n");
1056 return 0;
1057 }
1da177e4
LT
1058 sock_release(SOCKET_I(inode));
1059 return 0;
1060}
1061
1062/*
1063 * Update the socket async list
1064 *
1065 * Fasync_list locking strategy.
1066 *
1067 * 1. fasync_list is modified only under process context socket lock
1068 * i.e. under semaphore.
1069 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
1070 * or under socket lock.
1071 * 3. fasync_list can be used from softirq context, so that
1072 * modification under socket lock have to be enhanced with
1073 * write_lock_bh(&sk->sk_callback_lock).
1074 * --ANK (990710)
1075 */
1076
1077static int sock_fasync(int fd, struct file *filp, int on)
1078{
89bddce5 1079 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
1080 struct socket *sock;
1081 struct sock *sk;
1082
89bddce5 1083 if (on) {
8b3a7005 1084 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 1085 if (fna == NULL)
1da177e4
LT
1086 return -ENOMEM;
1087 }
1088
b69aee04 1089 sock = filp->private_data;
1da177e4 1090
89bddce5
SH
1091 sk = sock->sk;
1092 if (sk == NULL) {
1da177e4
LT
1093 kfree(fna);
1094 return -EINVAL;
1095 }
1096
1097 lock_sock(sk);
1098
76398425
JC
1099 spin_lock(&filp->f_lock);
1100 if (on)
1101 filp->f_flags |= FASYNC;
1102 else
1103 filp->f_flags &= ~FASYNC;
1104 spin_unlock(&filp->f_lock);
1105
89bddce5 1106 prev = &(sock->fasync_list);
1da177e4 1107
89bddce5
SH
1108 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1109 if (fa->fa_file == filp)
1da177e4
LT
1110 break;
1111
89bddce5
SH
1112 if (on) {
1113 if (fa != NULL) {
1da177e4 1114 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1115 fa->fa_fd = fd;
1da177e4
LT
1116 write_unlock_bh(&sk->sk_callback_lock);
1117
1118 kfree(fna);
1119 goto out;
1120 }
89bddce5
SH
1121 fna->fa_file = filp;
1122 fna->fa_fd = fd;
1123 fna->magic = FASYNC_MAGIC;
1124 fna->fa_next = sock->fasync_list;
1da177e4 1125 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1126 sock->fasync_list = fna;
bcdce719 1127 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1128 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1129 } else {
1130 if (fa != NULL) {
1da177e4 1131 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1132 *prev = fa->fa_next;
bcdce719
ED
1133 if (!sock->fasync_list)
1134 sock_reset_flag(sk, SOCK_FASYNC);
1da177e4
LT
1135 write_unlock_bh(&sk->sk_callback_lock);
1136 kfree(fa);
1137 }
1138 }
1139
1140out:
1141 release_sock(sock->sk);
1142 return 0;
1143}
1144
1145/* This function may be called only under socket lock or callback_lock */
1146
1147int sock_wake_async(struct socket *sock, int how, int band)
1148{
1149 if (!sock || !sock->fasync_list)
1150 return -1;
89bddce5 1151 switch (how) {
8d8ad9d7 1152 case SOCK_WAKE_WAITD:
1da177e4
LT
1153 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1154 break;
1155 goto call_kill;
8d8ad9d7 1156 case SOCK_WAKE_SPACE:
1da177e4
LT
1157 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1158 break;
1159 /* fall through */
8d8ad9d7 1160 case SOCK_WAKE_IO:
89bddce5 1161call_kill:
1da177e4
LT
1162 __kill_fasync(sock->fasync_list, SIGIO, band);
1163 break;
8d8ad9d7 1164 case SOCK_WAKE_URG:
1da177e4
LT
1165 __kill_fasync(sock->fasync_list, SIGURG, band);
1166 }
1167 return 0;
1168}
1169
1b8d7ae4 1170static int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1171 struct socket **res, int kern)
1da177e4
LT
1172{
1173 int err;
1174 struct socket *sock;
55737fda 1175 const struct net_proto_family *pf;
1da177e4
LT
1176
1177 /*
89bddce5 1178 * Check protocol is in range
1da177e4
LT
1179 */
1180 if (family < 0 || family >= NPROTO)
1181 return -EAFNOSUPPORT;
1182 if (type < 0 || type >= SOCK_MAX)
1183 return -EINVAL;
1184
1185 /* Compatibility.
1186
1187 This uglymoron is moved from INET layer to here to avoid
1188 deadlock in module load.
1189 */
1190 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1191 static int warned;
1da177e4
LT
1192 if (!warned) {
1193 warned = 1;
89bddce5
SH
1194 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1195 current->comm);
1da177e4
LT
1196 }
1197 family = PF_PACKET;
1198 }
1199
1200 err = security_socket_create(family, type, protocol, kern);
1201 if (err)
1202 return err;
89bddce5 1203
55737fda
SH
1204 /*
1205 * Allocate the socket and allow the family to set things up. if
1206 * the protocol is 0, the family is instructed to select an appropriate
1207 * default.
1208 */
1209 sock = sock_alloc();
1210 if (!sock) {
1211 if (net_ratelimit())
1212 printk(KERN_WARNING "socket: no more sockets\n");
1213 return -ENFILE; /* Not exactly a match, but its the
1214 closest posix thing */
1215 }
1216
1217 sock->type = type;
1218
95a5afca 1219#ifdef CONFIG_MODULES
89bddce5
SH
1220 /* Attempt to load a protocol module if the find failed.
1221 *
1222 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1223 * requested real, full-featured networking support upon configuration.
1224 * Otherwise module support will break!
1225 */
55737fda 1226 if (net_families[family] == NULL)
89bddce5 1227 request_module("net-pf-%d", family);
1da177e4
LT
1228#endif
1229
55737fda
SH
1230 rcu_read_lock();
1231 pf = rcu_dereference(net_families[family]);
1232 err = -EAFNOSUPPORT;
1233 if (!pf)
1234 goto out_release;
1da177e4
LT
1235
1236 /*
1237 * We will call the ->create function, that possibly is in a loadable
1238 * module, so we have to bump that loadable module refcnt first.
1239 */
55737fda 1240 if (!try_module_get(pf->owner))
1da177e4
LT
1241 goto out_release;
1242
55737fda
SH
1243 /* Now protected by module ref count */
1244 rcu_read_unlock();
1245
3f378b68 1246 err = pf->create(net, sock, protocol, kern);
55737fda 1247 if (err < 0)
1da177e4 1248 goto out_module_put;
a79af59e 1249
1da177e4
LT
1250 /*
1251 * Now to bump the refcnt of the [loadable] module that owns this
1252 * socket at sock_release time we decrement its refcnt.
1253 */
55737fda
SH
1254 if (!try_module_get(sock->ops->owner))
1255 goto out_module_busy;
1256
1da177e4
LT
1257 /*
1258 * Now that we're done with the ->create function, the [loadable]
1259 * module can have its refcnt decremented
1260 */
55737fda 1261 module_put(pf->owner);
7420ed23
VY
1262 err = security_socket_post_create(sock, family, type, protocol, kern);
1263 if (err)
3b185525 1264 goto out_sock_release;
55737fda 1265 *res = sock;
1da177e4 1266
55737fda
SH
1267 return 0;
1268
1269out_module_busy:
1270 err = -EAFNOSUPPORT;
1da177e4 1271out_module_put:
55737fda
SH
1272 sock->ops = NULL;
1273 module_put(pf->owner);
1274out_sock_release:
1da177e4 1275 sock_release(sock);
55737fda
SH
1276 return err;
1277
1278out_release:
1279 rcu_read_unlock();
1280 goto out_sock_release;
1da177e4
LT
1281}
1282
1283int sock_create(int family, int type, int protocol, struct socket **res)
1284{
1b8d7ae4 1285 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4
LT
1286}
1287
1288int sock_create_kern(int family, int type, int protocol, struct socket **res)
1289{
1b8d7ae4 1290 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4
LT
1291}
1292
3e0fa65f 1293SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1294{
1295 int retval;
1296 struct socket *sock;
a677a039
UD
1297 int flags;
1298
e38b36f3
UD
1299 /* Check the SOCK_* constants for consistency. */
1300 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1301 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1302 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1303 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1304
a677a039 1305 flags = type & ~SOCK_TYPE_MASK;
77d27200 1306 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1307 return -EINVAL;
1308 type &= SOCK_TYPE_MASK;
1da177e4 1309
aaca0bdc
UD
1310 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1311 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1312
1da177e4
LT
1313 retval = sock_create(family, type, protocol, &sock);
1314 if (retval < 0)
1315 goto out;
1316
77d27200 1317 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1318 if (retval < 0)
1319 goto out_release;
1320
1321out:
1322 /* It may be already another descriptor 8) Not kernel problem. */
1323 return retval;
1324
1325out_release:
1326 sock_release(sock);
1327 return retval;
1328}
1329
1330/*
1331 * Create a pair of connected sockets.
1332 */
1333
3e0fa65f
HC
1334SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1335 int __user *, usockvec)
1da177e4
LT
1336{
1337 struct socket *sock1, *sock2;
1338 int fd1, fd2, err;
db349509 1339 struct file *newfile1, *newfile2;
a677a039
UD
1340 int flags;
1341
1342 flags = type & ~SOCK_TYPE_MASK;
77d27200 1343 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1344 return -EINVAL;
1345 type &= SOCK_TYPE_MASK;
1da177e4 1346
aaca0bdc
UD
1347 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1348 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1349
1da177e4
LT
1350 /*
1351 * Obtain the first socket and check if the underlying protocol
1352 * supports the socketpair call.
1353 */
1354
1355 err = sock_create(family, type, protocol, &sock1);
1356 if (err < 0)
1357 goto out;
1358
1359 err = sock_create(family, type, protocol, &sock2);
1360 if (err < 0)
1361 goto out_release_1;
1362
1363 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1364 if (err < 0)
1da177e4
LT
1365 goto out_release_both;
1366
7cbe66b6 1367 fd1 = sock_alloc_file(sock1, &newfile1, flags);
bf3c23d1
DM
1368 if (unlikely(fd1 < 0)) {
1369 err = fd1;
db349509 1370 goto out_release_both;
bf3c23d1 1371 }
1da177e4 1372
7cbe66b6 1373 fd2 = sock_alloc_file(sock2, &newfile2, flags);
198de4d7
AV
1374 if (unlikely(fd2 < 0)) {
1375 err = fd2;
1376 fput(newfile1);
1377 put_unused_fd(fd1);
1378 sock_release(sock2);
1379 goto out;
db349509
AV
1380 }
1381
157cf649 1382 audit_fd_pair(fd1, fd2);
db349509
AV
1383 fd_install(fd1, newfile1);
1384 fd_install(fd2, newfile2);
1da177e4
LT
1385 /* fd1 and fd2 may be already another descriptors.
1386 * Not kernel problem.
1387 */
1388
89bddce5 1389 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1390 if (!err)
1391 err = put_user(fd2, &usockvec[1]);
1392 if (!err)
1393 return 0;
1394
1395 sys_close(fd2);
1396 sys_close(fd1);
1397 return err;
1398
1da177e4 1399out_release_both:
89bddce5 1400 sock_release(sock2);
1da177e4 1401out_release_1:
89bddce5 1402 sock_release(sock1);
1da177e4
LT
1403out:
1404 return err;
1405}
1406
1da177e4
LT
1407/*
1408 * Bind a name to a socket. Nothing much to do here since it's
1409 * the protocol's responsibility to handle the local address.
1410 *
1411 * We move the socket address to kernel space before we call
1412 * the protocol layer (having also checked the address is ok).
1413 */
1414
20f37034 1415SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1416{
1417 struct socket *sock;
230b1839 1418 struct sockaddr_storage address;
6cb153ca 1419 int err, fput_needed;
1da177e4 1420
89bddce5 1421 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1422 if (sock) {
230b1839 1423 err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address);
89bddce5
SH
1424 if (err >= 0) {
1425 err = security_socket_bind(sock,
230b1839 1426 (struct sockaddr *)&address,
89bddce5 1427 addrlen);
6cb153ca
BL
1428 if (!err)
1429 err = sock->ops->bind(sock,
89bddce5 1430 (struct sockaddr *)
230b1839 1431 &address, addrlen);
1da177e4 1432 }
6cb153ca 1433 fput_light(sock->file, fput_needed);
89bddce5 1434 }
1da177e4
LT
1435 return err;
1436}
1437
1da177e4
LT
1438/*
1439 * Perform a listen. Basically, we allow the protocol to do anything
1440 * necessary for a listen, and if that works, we mark the socket as
1441 * ready for listening.
1442 */
1443
3e0fa65f 1444SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1445{
1446 struct socket *sock;
6cb153ca 1447 int err, fput_needed;
b8e1f9b5 1448 int somaxconn;
89bddce5
SH
1449
1450 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1451 if (sock) {
8efa6e93 1452 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
b8e1f9b5
PE
1453 if ((unsigned)backlog > somaxconn)
1454 backlog = somaxconn;
1da177e4
LT
1455
1456 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1457 if (!err)
1458 err = sock->ops->listen(sock, backlog);
1da177e4 1459
6cb153ca 1460 fput_light(sock->file, fput_needed);
1da177e4
LT
1461 }
1462 return err;
1463}
1464
1da177e4
LT
1465/*
1466 * For accept, we attempt to create a new socket, set up the link
1467 * with the client, wake up the client, then return the new
1468 * connected fd. We collect the address of the connector in kernel
1469 * space and move it to user at the very end. This is unclean because
1470 * we open the socket then return an error.
1471 *
1472 * 1003.1g adds the ability to recvmsg() to query connection pending
1473 * status to recvmsg. We need to add that support in a way thats
1474 * clean when we restucture accept also.
1475 */
1476
20f37034
HC
1477SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1478 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1479{
1480 struct socket *sock, *newsock;
39d8c1b6 1481 struct file *newfile;
6cb153ca 1482 int err, len, newfd, fput_needed;
230b1839 1483 struct sockaddr_storage address;
1da177e4 1484
77d27200 1485 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1486 return -EINVAL;
1487
1488 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1489 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1490
6cb153ca 1491 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1492 if (!sock)
1493 goto out;
1494
1495 err = -ENFILE;
89bddce5 1496 if (!(newsock = sock_alloc()))
1da177e4
LT
1497 goto out_put;
1498
1499 newsock->type = sock->type;
1500 newsock->ops = sock->ops;
1501
1da177e4
LT
1502 /*
1503 * We don't need try_module_get here, as the listening socket (sock)
1504 * has the protocol module (sock->ops->owner) held.
1505 */
1506 __module_get(newsock->ops->owner);
1507
7cbe66b6 1508 newfd = sock_alloc_file(newsock, &newfile, flags);
39d8c1b6
DM
1509 if (unlikely(newfd < 0)) {
1510 err = newfd;
9a1875e6
DM
1511 sock_release(newsock);
1512 goto out_put;
39d8c1b6
DM
1513 }
1514
a79af59e
FF
1515 err = security_socket_accept(sock, newsock);
1516 if (err)
39d8c1b6 1517 goto out_fd;
a79af59e 1518
1da177e4
LT
1519 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1520 if (err < 0)
39d8c1b6 1521 goto out_fd;
1da177e4
LT
1522
1523 if (upeer_sockaddr) {
230b1839 1524 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1525 &len, 2) < 0) {
1da177e4 1526 err = -ECONNABORTED;
39d8c1b6 1527 goto out_fd;
1da177e4 1528 }
230b1839
YH
1529 err = move_addr_to_user((struct sockaddr *)&address,
1530 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1531 if (err < 0)
39d8c1b6 1532 goto out_fd;
1da177e4
LT
1533 }
1534
1535 /* File flags are not inherited via accept() unlike another OSes. */
1536
39d8c1b6
DM
1537 fd_install(newfd, newfile);
1538 err = newfd;
1da177e4 1539
1da177e4 1540out_put:
6cb153ca 1541 fput_light(sock->file, fput_needed);
1da177e4
LT
1542out:
1543 return err;
39d8c1b6 1544out_fd:
9606a216 1545 fput(newfile);
39d8c1b6 1546 put_unused_fd(newfd);
1da177e4
LT
1547 goto out_put;
1548}
1549
20f37034
HC
1550SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1551 int __user *, upeer_addrlen)
aaca0bdc 1552{
de11defe 1553 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1554}
1555
1da177e4
LT
1556/*
1557 * Attempt to connect to a socket with the server address. The address
1558 * is in user space so we verify it is OK and move it to kernel space.
1559 *
1560 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1561 * break bindings
1562 *
1563 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1564 * other SEQPACKET protocols that take time to connect() as it doesn't
1565 * include the -EINPROGRESS status for such sockets.
1566 */
1567
20f37034
HC
1568SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1569 int, addrlen)
1da177e4
LT
1570{
1571 struct socket *sock;
230b1839 1572 struct sockaddr_storage address;
6cb153ca 1573 int err, fput_needed;
1da177e4 1574
6cb153ca 1575 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1576 if (!sock)
1577 goto out;
230b1839 1578 err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address);
1da177e4
LT
1579 if (err < 0)
1580 goto out_put;
1581
89bddce5 1582 err =
230b1839 1583 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1584 if (err)
1585 goto out_put;
1586
230b1839 1587 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1588 sock->file->f_flags);
1589out_put:
6cb153ca 1590 fput_light(sock->file, fput_needed);
1da177e4
LT
1591out:
1592 return err;
1593}
1594
1595/*
1596 * Get the local address ('name') of a socket object. Move the obtained
1597 * name to user space.
1598 */
1599
20f37034
HC
1600SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1601 int __user *, usockaddr_len)
1da177e4
LT
1602{
1603 struct socket *sock;
230b1839 1604 struct sockaddr_storage address;
6cb153ca 1605 int len, err, fput_needed;
89bddce5 1606
6cb153ca 1607 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1608 if (!sock)
1609 goto out;
1610
1611 err = security_socket_getsockname(sock);
1612 if (err)
1613 goto out_put;
1614
230b1839 1615 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1616 if (err)
1617 goto out_put;
230b1839 1618 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1619
1620out_put:
6cb153ca 1621 fput_light(sock->file, fput_needed);
1da177e4
LT
1622out:
1623 return err;
1624}
1625
1626/*
1627 * Get the remote address ('name') of a socket object. Move the obtained
1628 * name to user space.
1629 */
1630
20f37034
HC
1631SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1632 int __user *, usockaddr_len)
1da177e4
LT
1633{
1634 struct socket *sock;
230b1839 1635 struct sockaddr_storage address;
6cb153ca 1636 int len, err, fput_needed;
1da177e4 1637
89bddce5
SH
1638 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1639 if (sock != NULL) {
1da177e4
LT
1640 err = security_socket_getpeername(sock);
1641 if (err) {
6cb153ca 1642 fput_light(sock->file, fput_needed);
1da177e4
LT
1643 return err;
1644 }
1645
89bddce5 1646 err =
230b1839 1647 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1648 1);
1da177e4 1649 if (!err)
230b1839 1650 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr,
89bddce5 1651 usockaddr_len);
6cb153ca 1652 fput_light(sock->file, fput_needed);
1da177e4
LT
1653 }
1654 return err;
1655}
1656
1657/*
1658 * Send a datagram to a given address. We move the address into kernel
1659 * space and check the user space data area is readable before invoking
1660 * the protocol.
1661 */
1662
3e0fa65f
HC
1663SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1664 unsigned, flags, struct sockaddr __user *, addr,
1665 int, addr_len)
1da177e4
LT
1666{
1667 struct socket *sock;
230b1839 1668 struct sockaddr_storage address;
1da177e4
LT
1669 int err;
1670 struct msghdr msg;
1671 struct iovec iov;
6cb153ca 1672 int fput_needed;
6cb153ca 1673
de0fa95c
PE
1674 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1675 if (!sock)
4387ff75 1676 goto out;
6cb153ca 1677
89bddce5
SH
1678 iov.iov_base = buff;
1679 iov.iov_len = len;
1680 msg.msg_name = NULL;
1681 msg.msg_iov = &iov;
1682 msg.msg_iovlen = 1;
1683 msg.msg_control = NULL;
1684 msg.msg_controllen = 0;
1685 msg.msg_namelen = 0;
6cb153ca 1686 if (addr) {
230b1839 1687 err = move_addr_to_kernel(addr, addr_len, (struct sockaddr *)&address);
1da177e4
LT
1688 if (err < 0)
1689 goto out_put;
230b1839 1690 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1691 msg.msg_namelen = addr_len;
1da177e4
LT
1692 }
1693 if (sock->file->f_flags & O_NONBLOCK)
1694 flags |= MSG_DONTWAIT;
1695 msg.msg_flags = flags;
1696 err = sock_sendmsg(sock, &msg, len);
1697
89bddce5 1698out_put:
de0fa95c 1699 fput_light(sock->file, fput_needed);
4387ff75 1700out:
1da177e4
LT
1701 return err;
1702}
1703
1704/*
89bddce5 1705 * Send a datagram down a socket.
1da177e4
LT
1706 */
1707
3e0fa65f
HC
1708SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
1709 unsigned, flags)
1da177e4
LT
1710{
1711 return sys_sendto(fd, buff, len, flags, NULL, 0);
1712}
1713
1714/*
89bddce5 1715 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1716 * sender. We verify the buffers are writable and if needed move the
1717 * sender address from kernel to user space.
1718 */
1719
3e0fa65f
HC
1720SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
1721 unsigned, flags, struct sockaddr __user *, addr,
1722 int __user *, addr_len)
1da177e4
LT
1723{
1724 struct socket *sock;
1725 struct iovec iov;
1726 struct msghdr msg;
230b1839 1727 struct sockaddr_storage address;
89bddce5 1728 int err, err2;
6cb153ca
BL
1729 int fput_needed;
1730
de0fa95c 1731 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1732 if (!sock)
de0fa95c 1733 goto out;
1da177e4 1734
89bddce5
SH
1735 msg.msg_control = NULL;
1736 msg.msg_controllen = 0;
1737 msg.msg_iovlen = 1;
1738 msg.msg_iov = &iov;
1739 iov.iov_len = size;
1740 iov.iov_base = ubuf;
230b1839
YH
1741 msg.msg_name = (struct sockaddr *)&address;
1742 msg.msg_namelen = sizeof(address);
1da177e4
LT
1743 if (sock->file->f_flags & O_NONBLOCK)
1744 flags |= MSG_DONTWAIT;
89bddce5 1745 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1746
89bddce5 1747 if (err >= 0 && addr != NULL) {
230b1839
YH
1748 err2 = move_addr_to_user((struct sockaddr *)&address,
1749 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1750 if (err2 < 0)
1751 err = err2;
1da177e4 1752 }
de0fa95c
PE
1753
1754 fput_light(sock->file, fput_needed);
4387ff75 1755out:
1da177e4
LT
1756 return err;
1757}
1758
1759/*
89bddce5 1760 * Receive a datagram from a socket.
1da177e4
LT
1761 */
1762
89bddce5
SH
1763asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1764 unsigned flags)
1da177e4
LT
1765{
1766 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1767}
1768
1769/*
1770 * Set a socket option. Because we don't know the option lengths we have
1771 * to pass the user mode parameter for the protocols to sort out.
1772 */
1773
20f37034
HC
1774SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1775 char __user *, optval, int, optlen)
1da177e4 1776{
6cb153ca 1777 int err, fput_needed;
1da177e4
LT
1778 struct socket *sock;
1779
1780 if (optlen < 0)
1781 return -EINVAL;
89bddce5
SH
1782
1783 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1784 if (sock != NULL) {
1785 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1786 if (err)
1787 goto out_put;
1da177e4
LT
1788
1789 if (level == SOL_SOCKET)
89bddce5
SH
1790 err =
1791 sock_setsockopt(sock, level, optname, optval,
1792 optlen);
1da177e4 1793 else
89bddce5
SH
1794 err =
1795 sock->ops->setsockopt(sock, level, optname, optval,
1796 optlen);
6cb153ca
BL
1797out_put:
1798 fput_light(sock->file, fput_needed);
1da177e4
LT
1799 }
1800 return err;
1801}
1802
1803/*
1804 * Get a socket option. Because we don't know the option lengths we have
1805 * to pass a user mode parameter for the protocols to sort out.
1806 */
1807
20f37034
HC
1808SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1809 char __user *, optval, int __user *, optlen)
1da177e4 1810{
6cb153ca 1811 int err, fput_needed;
1da177e4
LT
1812 struct socket *sock;
1813
89bddce5
SH
1814 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1815 if (sock != NULL) {
6cb153ca
BL
1816 err = security_socket_getsockopt(sock, level, optname);
1817 if (err)
1818 goto out_put;
1da177e4
LT
1819
1820 if (level == SOL_SOCKET)
89bddce5
SH
1821 err =
1822 sock_getsockopt(sock, level, optname, optval,
1823 optlen);
1da177e4 1824 else
89bddce5
SH
1825 err =
1826 sock->ops->getsockopt(sock, level, optname, optval,
1827 optlen);
6cb153ca
BL
1828out_put:
1829 fput_light(sock->file, fput_needed);
1da177e4
LT
1830 }
1831 return err;
1832}
1833
1da177e4
LT
1834/*
1835 * Shutdown a socket.
1836 */
1837
754fe8d2 1838SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1839{
6cb153ca 1840 int err, fput_needed;
1da177e4
LT
1841 struct socket *sock;
1842
89bddce5
SH
1843 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1844 if (sock != NULL) {
1da177e4 1845 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1846 if (!err)
1847 err = sock->ops->shutdown(sock, how);
1848 fput_light(sock->file, fput_needed);
1da177e4
LT
1849 }
1850 return err;
1851}
1852
89bddce5 1853/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1854 * fields which are the same type (int / unsigned) on our platforms.
1855 */
1856#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1857#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1858#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1859
1da177e4
LT
1860/*
1861 * BSD sendmsg interface
1862 */
1863
3e0fa65f 1864SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
1da177e4 1865{
89bddce5
SH
1866 struct compat_msghdr __user *msg_compat =
1867 (struct compat_msghdr __user *)msg;
1da177e4 1868 struct socket *sock;
230b1839 1869 struct sockaddr_storage address;
1da177e4 1870 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1871 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1872 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1873 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1874 unsigned char *ctl_buf = ctl;
1875 struct msghdr msg_sys;
1876 int err, ctl_len, iov_size, total_len;
6cb153ca 1877 int fput_needed;
89bddce5 1878
1da177e4
LT
1879 err = -EFAULT;
1880 if (MSG_CMSG_COMPAT & flags) {
1881 if (get_compat_msghdr(&msg_sys, msg_compat))
1882 return -EFAULT;
89bddce5
SH
1883 }
1884 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1885 return -EFAULT;
1886
6cb153ca 1887 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1888 if (!sock)
1da177e4
LT
1889 goto out;
1890
1891 /* do not move before msg_sys is valid */
1892 err = -EMSGSIZE;
1893 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1894 goto out_put;
1895
89bddce5 1896 /* Check whether to allocate the iovec area */
1da177e4
LT
1897 err = -ENOMEM;
1898 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1899 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1900 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1901 if (!iov)
1902 goto out_put;
1903 }
1904
1905 /* This will also move the address data into kernel space */
1906 if (MSG_CMSG_COMPAT & flags) {
230b1839
YH
1907 err = verify_compat_iovec(&msg_sys, iov,
1908 (struct sockaddr *)&address,
1909 VERIFY_READ);
1da177e4 1910 } else
230b1839
YH
1911 err = verify_iovec(&msg_sys, iov,
1912 (struct sockaddr *)&address,
1913 VERIFY_READ);
89bddce5 1914 if (err < 0)
1da177e4
LT
1915 goto out_freeiov;
1916 total_len = err;
1917
1918 err = -ENOBUFS;
1919
1920 if (msg_sys.msg_controllen > INT_MAX)
1921 goto out_freeiov;
89bddce5 1922 ctl_len = msg_sys.msg_controllen;
1da177e4 1923 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1924 err =
1925 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1926 sizeof(ctl));
1da177e4
LT
1927 if (err)
1928 goto out_freeiov;
1929 ctl_buf = msg_sys.msg_control;
8920e8f9 1930 ctl_len = msg_sys.msg_controllen;
1da177e4 1931 } else if (ctl_len) {
89bddce5 1932 if (ctl_len > sizeof(ctl)) {
1da177e4 1933 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1934 if (ctl_buf == NULL)
1da177e4
LT
1935 goto out_freeiov;
1936 }
1937 err = -EFAULT;
1938 /*
1939 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1940 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1941 * checking falls down on this.
1942 */
89bddce5
SH
1943 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1944 ctl_len))
1da177e4
LT
1945 goto out_freectl;
1946 msg_sys.msg_control = ctl_buf;
1947 }
1948 msg_sys.msg_flags = flags;
1949
1950 if (sock->file->f_flags & O_NONBLOCK)
1951 msg_sys.msg_flags |= MSG_DONTWAIT;
1952 err = sock_sendmsg(sock, &msg_sys, total_len);
1953
1954out_freectl:
89bddce5 1955 if (ctl_buf != ctl)
1da177e4
LT
1956 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1957out_freeiov:
1958 if (iov != iovstack)
1959 sock_kfree_s(sock->sk, iov, iov_size);
1960out_put:
6cb153ca 1961 fput_light(sock->file, fput_needed);
89bddce5 1962out:
1da177e4
LT
1963 return err;
1964}
1965
a2e27255
ACM
1966static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
1967 struct msghdr *msg_sys, unsigned flags, int nosec)
1da177e4 1968{
89bddce5
SH
1969 struct compat_msghdr __user *msg_compat =
1970 (struct compat_msghdr __user *)msg;
1da177e4 1971 struct iovec iovstack[UIO_FASTIOV];
89bddce5 1972 struct iovec *iov = iovstack;
1da177e4
LT
1973 unsigned long cmsg_ptr;
1974 int err, iov_size, total_len, len;
1975
1976 /* kernel mode address */
230b1839 1977 struct sockaddr_storage addr;
1da177e4
LT
1978
1979 /* user mode address pointers */
1980 struct sockaddr __user *uaddr;
1981 int __user *uaddr_len;
89bddce5 1982
1da177e4 1983 if (MSG_CMSG_COMPAT & flags) {
a2e27255 1984 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 1985 return -EFAULT;
89bddce5 1986 }
a2e27255 1987 else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
89bddce5 1988 return -EFAULT;
1da177e4 1989
1da177e4 1990 err = -EMSGSIZE;
a2e27255
ACM
1991 if (msg_sys->msg_iovlen > UIO_MAXIOV)
1992 goto out;
89bddce5
SH
1993
1994 /* Check whether to allocate the iovec area */
1da177e4 1995 err = -ENOMEM;
a2e27255
ACM
1996 iov_size = msg_sys->msg_iovlen * sizeof(struct iovec);
1997 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
1da177e4
LT
1998 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1999 if (!iov)
a2e27255 2000 goto out;
1da177e4
LT
2001 }
2002
2003 /*
89bddce5
SH
2004 * Save the user-mode address (verify_iovec will change the
2005 * kernel msghdr to use the kernel address space)
1da177e4 2006 */
89bddce5 2007
a2e27255 2008 uaddr = (__force void __user *)msg_sys->msg_name;
1da177e4
LT
2009 uaddr_len = COMPAT_NAMELEN(msg);
2010 if (MSG_CMSG_COMPAT & flags) {
a2e27255 2011 err = verify_compat_iovec(msg_sys, iov,
230b1839
YH
2012 (struct sockaddr *)&addr,
2013 VERIFY_WRITE);
1da177e4 2014 } else
a2e27255 2015 err = verify_iovec(msg_sys, iov,
230b1839
YH
2016 (struct sockaddr *)&addr,
2017 VERIFY_WRITE);
1da177e4
LT
2018 if (err < 0)
2019 goto out_freeiov;
89bddce5 2020 total_len = err;
1da177e4 2021
a2e27255
ACM
2022 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2023 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2024
1da177e4
LT
2025 if (sock->file->f_flags & O_NONBLOCK)
2026 flags |= MSG_DONTWAIT;
a2e27255
ACM
2027 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2028 total_len, flags);
1da177e4
LT
2029 if (err < 0)
2030 goto out_freeiov;
2031 len = err;
2032
2033 if (uaddr != NULL) {
230b1839 2034 err = move_addr_to_user((struct sockaddr *)&addr,
a2e27255 2035 msg_sys->msg_namelen, uaddr,
89bddce5 2036 uaddr_len);
1da177e4
LT
2037 if (err < 0)
2038 goto out_freeiov;
2039 }
a2e27255 2040 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2041 COMPAT_FLAGS(msg));
1da177e4
LT
2042 if (err)
2043 goto out_freeiov;
2044 if (MSG_CMSG_COMPAT & flags)
a2e27255 2045 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2046 &msg_compat->msg_controllen);
2047 else
a2e27255 2048 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2049 &msg->msg_controllen);
2050 if (err)
2051 goto out_freeiov;
2052 err = len;
2053
2054out_freeiov:
2055 if (iov != iovstack)
2056 sock_kfree_s(sock->sk, iov, iov_size);
a2e27255
ACM
2057out:
2058 return err;
2059}
2060
2061/*
2062 * BSD recvmsg interface
2063 */
2064
2065SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
2066 unsigned int, flags)
2067{
2068 int fput_needed, err;
2069 struct msghdr msg_sys;
2070 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
2071
2072 if (!sock)
2073 goto out;
2074
2075 err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0);
2076
6cb153ca 2077 fput_light(sock->file, fput_needed);
1da177e4
LT
2078out:
2079 return err;
2080}
2081
a2e27255
ACM
2082/*
2083 * Linux recvmmsg interface
2084 */
2085
2086int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2087 unsigned int flags, struct timespec *timeout)
2088{
2089 int fput_needed, err, datagrams;
2090 struct socket *sock;
2091 struct mmsghdr __user *entry;
d7256d0e 2092 struct compat_mmsghdr __user *compat_entry;
a2e27255
ACM
2093 struct msghdr msg_sys;
2094 struct timespec end_time;
2095
2096 if (timeout &&
2097 poll_select_set_timeout(&end_time, timeout->tv_sec,
2098 timeout->tv_nsec))
2099 return -EINVAL;
2100
2101 datagrams = 0;
2102
2103 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2104 if (!sock)
2105 return err;
2106
2107 err = sock_error(sock->sk);
2108 if (err)
2109 goto out_put;
2110
2111 entry = mmsg;
d7256d0e 2112 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2113
2114 while (datagrams < vlen) {
2115 /*
2116 * No need to ask LSM for more than the first datagram.
2117 */
d7256d0e
JMG
2118 if (MSG_CMSG_COMPAT & flags) {
2119 err = __sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
2120 &msg_sys, flags, datagrams);
2121 if (err < 0)
2122 break;
2123 err = __put_user(err, &compat_entry->msg_len);
2124 ++compat_entry;
2125 } else {
2126 err = __sys_recvmsg(sock, (struct msghdr __user *)entry,
2127 &msg_sys, flags, datagrams);
2128 if (err < 0)
2129 break;
2130 err = put_user(err, &entry->msg_len);
2131 ++entry;
2132 }
2133
a2e27255
ACM
2134 if (err)
2135 break;
a2e27255
ACM
2136 ++datagrams;
2137
71c5c159
BB
2138 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2139 if (flags & MSG_WAITFORONE)
2140 flags |= MSG_DONTWAIT;
2141
a2e27255
ACM
2142 if (timeout) {
2143 ktime_get_ts(timeout);
2144 *timeout = timespec_sub(end_time, *timeout);
2145 if (timeout->tv_sec < 0) {
2146 timeout->tv_sec = timeout->tv_nsec = 0;
2147 break;
2148 }
2149
2150 /* Timeout, return less than vlen datagrams */
2151 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2152 break;
2153 }
2154
2155 /* Out of band data, return right away */
2156 if (msg_sys.msg_flags & MSG_OOB)
2157 break;
2158 }
2159
2160out_put:
2161 fput_light(sock->file, fput_needed);
1da177e4 2162
a2e27255
ACM
2163 if (err == 0)
2164 return datagrams;
2165
2166 if (datagrams != 0) {
2167 /*
2168 * We may return less entries than requested (vlen) if the
2169 * sock is non block and there aren't enough datagrams...
2170 */
2171 if (err != -EAGAIN) {
2172 /*
2173 * ... or if recvmsg returns an error after we
2174 * received some datagrams, where we record the
2175 * error to return on the next call or if the
2176 * app asks about it using getsockopt(SO_ERROR).
2177 */
2178 sock->sk->sk_err = -err;
2179 }
2180
2181 return datagrams;
2182 }
2183
2184 return err;
2185}
2186
2187SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2188 unsigned int, vlen, unsigned int, flags,
2189 struct timespec __user *, timeout)
2190{
2191 int datagrams;
2192 struct timespec timeout_sys;
2193
2194 if (!timeout)
2195 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2196
2197 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2198 return -EFAULT;
2199
2200 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2201
2202 if (datagrams > 0 &&
2203 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2204 datagrams = -EFAULT;
2205
2206 return datagrams;
2207}
2208
2209#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2210/* Argument list sizes for sys_socketcall */
2211#define AL(x) ((x) * sizeof(unsigned long))
a2e27255 2212static const unsigned char nargs[20] = {
89bddce5
SH
2213 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
2214 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
aaca0bdc 2215 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
a2e27255 2216 AL(4),AL(5)
89bddce5
SH
2217};
2218
1da177e4
LT
2219#undef AL
2220
2221/*
89bddce5 2222 * System call vectors.
1da177e4
LT
2223 *
2224 * Argument checking cleaned up. Saved 20% in size.
2225 * This function doesn't need to set the kernel lock because
89bddce5 2226 * it is set by the callees.
1da177e4
LT
2227 */
2228
3e0fa65f 2229SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4
LT
2230{
2231 unsigned long a[6];
89bddce5 2232 unsigned long a0, a1;
1da177e4 2233 int err;
47379052 2234 unsigned int len;
1da177e4 2235
a2e27255 2236 if (call < 1 || call > SYS_RECVMMSG)
1da177e4
LT
2237 return -EINVAL;
2238
47379052
AV
2239 len = nargs[call];
2240 if (len > sizeof(a))
2241 return -EINVAL;
2242
1da177e4 2243 /* copy_from_user should be SMP safe. */
47379052 2244 if (copy_from_user(a, args, len))
1da177e4 2245 return -EFAULT;
3ec3b2fb 2246
f3298dc4 2247 audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb 2248
89bddce5
SH
2249 a0 = a[0];
2250 a1 = a[1];
2251
2252 switch (call) {
2253 case SYS_SOCKET:
2254 err = sys_socket(a0, a1, a[2]);
2255 break;
2256 case SYS_BIND:
2257 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2258 break;
2259 case SYS_CONNECT:
2260 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2261 break;
2262 case SYS_LISTEN:
2263 err = sys_listen(a0, a1);
2264 break;
2265 case SYS_ACCEPT:
de11defe
UD
2266 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2267 (int __user *)a[2], 0);
89bddce5
SH
2268 break;
2269 case SYS_GETSOCKNAME:
2270 err =
2271 sys_getsockname(a0, (struct sockaddr __user *)a1,
2272 (int __user *)a[2]);
2273 break;
2274 case SYS_GETPEERNAME:
2275 err =
2276 sys_getpeername(a0, (struct sockaddr __user *)a1,
2277 (int __user *)a[2]);
2278 break;
2279 case SYS_SOCKETPAIR:
2280 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2281 break;
2282 case SYS_SEND:
2283 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2284 break;
2285 case SYS_SENDTO:
2286 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2287 (struct sockaddr __user *)a[4], a[5]);
2288 break;
2289 case SYS_RECV:
2290 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2291 break;
2292 case SYS_RECVFROM:
2293 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2294 (struct sockaddr __user *)a[4],
2295 (int __user *)a[5]);
2296 break;
2297 case SYS_SHUTDOWN:
2298 err = sys_shutdown(a0, a1);
2299 break;
2300 case SYS_SETSOCKOPT:
2301 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2302 break;
2303 case SYS_GETSOCKOPT:
2304 err =
2305 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2306 (int __user *)a[4]);
2307 break;
2308 case SYS_SENDMSG:
2309 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2310 break;
2311 case SYS_RECVMSG:
2312 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2313 break;
a2e27255
ACM
2314 case SYS_RECVMMSG:
2315 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2316 (struct timespec __user *)a[4]);
2317 break;
de11defe
UD
2318 case SYS_ACCEPT4:
2319 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2320 (int __user *)a[2], a[3]);
aaca0bdc 2321 break;
89bddce5
SH
2322 default:
2323 err = -EINVAL;
2324 break;
1da177e4
LT
2325 }
2326 return err;
2327}
2328
89bddce5 2329#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2330
55737fda
SH
2331/**
2332 * sock_register - add a socket protocol handler
2333 * @ops: description of protocol
2334 *
1da177e4
LT
2335 * This function is called by a protocol handler that wants to
2336 * advertise its address family, and have it linked into the
55737fda
SH
2337 * socket interface. The value ops->family coresponds to the
2338 * socket system call protocol family.
1da177e4 2339 */
f0fd27d4 2340int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2341{
2342 int err;
2343
2344 if (ops->family >= NPROTO) {
89bddce5
SH
2345 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2346 NPROTO);
1da177e4
LT
2347 return -ENOBUFS;
2348 }
55737fda
SH
2349
2350 spin_lock(&net_family_lock);
2351 if (net_families[ops->family])
2352 err = -EEXIST;
2353 else {
89bddce5 2354 net_families[ops->family] = ops;
1da177e4
LT
2355 err = 0;
2356 }
55737fda
SH
2357 spin_unlock(&net_family_lock);
2358
89bddce5 2359 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2360 return err;
2361}
2362
55737fda
SH
2363/**
2364 * sock_unregister - remove a protocol handler
2365 * @family: protocol family to remove
2366 *
1da177e4
LT
2367 * This function is called by a protocol handler that wants to
2368 * remove its address family, and have it unlinked from the
55737fda
SH
2369 * new socket creation.
2370 *
2371 * If protocol handler is a module, then it can use module reference
2372 * counts to protect against new references. If protocol handler is not
2373 * a module then it needs to provide its own protection in
2374 * the ops->create routine.
1da177e4 2375 */
f0fd27d4 2376void sock_unregister(int family)
1da177e4 2377{
f0fd27d4 2378 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2379
55737fda 2380 spin_lock(&net_family_lock);
89bddce5 2381 net_families[family] = NULL;
55737fda
SH
2382 spin_unlock(&net_family_lock);
2383
2384 synchronize_rcu();
2385
89bddce5 2386 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2387}
2388
77d76ea3 2389static int __init sock_init(void)
1da177e4
LT
2390{
2391 /*
89bddce5 2392 * Initialize sock SLAB cache.
1da177e4 2393 */
89bddce5 2394
1da177e4
LT
2395 sk_init();
2396
1da177e4 2397 /*
89bddce5 2398 * Initialize skbuff SLAB cache
1da177e4
LT
2399 */
2400 skb_init();
1da177e4
LT
2401
2402 /*
89bddce5 2403 * Initialize the protocols module.
1da177e4
LT
2404 */
2405
2406 init_inodecache();
2407 register_filesystem(&sock_fs_type);
2408 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2409
2410 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2411 */
2412
2413#ifdef CONFIG_NETFILTER
2414 netfilter_init();
2415#endif
cbeb321a
DM
2416
2417 return 0;
1da177e4
LT
2418}
2419
77d76ea3
AK
2420core_initcall(sock_init); /* early initcall */
2421
1da177e4
LT
2422#ifdef CONFIG_PROC_FS
2423void socket_seq_show(struct seq_file *seq)
2424{
2425 int cpu;
2426 int counter = 0;
2427
6f912042 2428 for_each_possible_cpu(cpu)
89bddce5 2429 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2430
2431 /* It can be negative, by the way. 8) */
2432 if (counter < 0)
2433 counter = 0;
2434
2435 seq_printf(seq, "sockets: used %d\n", counter);
2436}
89bddce5 2437#endif /* CONFIG_PROC_FS */
1da177e4 2438
89bbfc95 2439#ifdef CONFIG_COMPAT
6b96018b
AB
2440static int do_siocgstamp(struct net *net, struct socket *sock,
2441 unsigned int cmd, struct compat_timeval __user *up)
7a229387 2442{
7a229387
AB
2443 mm_segment_t old_fs = get_fs();
2444 struct timeval ktv;
2445 int err;
2446
2447 set_fs(KERNEL_DS);
6b96018b 2448 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387
AB
2449 set_fs(old_fs);
2450 if (!err) {
2451 err = put_user(ktv.tv_sec, &up->tv_sec);
2452 err |= __put_user(ktv.tv_usec, &up->tv_usec);
2453 }
2454 return err;
2455}
2456
6b96018b
AB
2457static int do_siocgstampns(struct net *net, struct socket *sock,
2458 unsigned int cmd, struct compat_timespec __user *up)
7a229387 2459{
7a229387
AB
2460 mm_segment_t old_fs = get_fs();
2461 struct timespec kts;
2462 int err;
2463
2464 set_fs(KERNEL_DS);
6b96018b 2465 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387
AB
2466 set_fs(old_fs);
2467 if (!err) {
2468 err = put_user(kts.tv_sec, &up->tv_sec);
2469 err |= __put_user(kts.tv_nsec, &up->tv_nsec);
2470 }
2471 return err;
2472}
2473
6b96018b 2474static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2475{
2476 struct ifreq __user *uifr;
2477 int err;
2478
2479 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2480 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2481 return -EFAULT;
2482
6b96018b 2483 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2484 if (err)
2485 return err;
2486
6b96018b 2487 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2488 return -EFAULT;
2489
2490 return 0;
2491}
2492
6b96018b 2493static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2494{
6b96018b 2495 struct compat_ifconf ifc32;
7a229387
AB
2496 struct ifconf ifc;
2497 struct ifconf __user *uifc;
6b96018b 2498 struct compat_ifreq __user *ifr32;
7a229387
AB
2499 struct ifreq __user *ifr;
2500 unsigned int i, j;
2501 int err;
2502
6b96018b 2503 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2504 return -EFAULT;
2505
2506 if (ifc32.ifcbuf == 0) {
2507 ifc32.ifc_len = 0;
2508 ifc.ifc_len = 0;
2509 ifc.ifc_req = NULL;
2510 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2511 } else {
6b96018b 2512 size_t len =((ifc32.ifc_len / sizeof (struct compat_ifreq)) + 1) *
7a229387
AB
2513 sizeof (struct ifreq);
2514 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2515 ifc.ifc_len = len;
2516 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2517 ifr32 = compat_ptr(ifc32.ifcbuf);
6b96018b
AB
2518 for (i = 0; i < ifc32.ifc_len; i += sizeof (struct compat_ifreq)) {
2519 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2520 return -EFAULT;
2521 ifr++;
2522 ifr32++;
2523 }
2524 }
2525 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2526 return -EFAULT;
2527
6b96018b 2528 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2529 if (err)
2530 return err;
2531
2532 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2533 return -EFAULT;
2534
2535 ifr = ifc.ifc_req;
2536 ifr32 = compat_ptr(ifc32.ifcbuf);
2537 for (i = 0, j = 0;
6b96018b
AB
2538 i + sizeof (struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2539 i += sizeof (struct compat_ifreq), j += sizeof (struct ifreq)) {
2540 if (copy_in_user(ifr32, ifr, sizeof (struct compat_ifreq)))
7a229387
AB
2541 return -EFAULT;
2542 ifr32++;
2543 ifr++;
2544 }
2545
2546 if (ifc32.ifcbuf == 0) {
2547 /* Translate from 64-bit structure multiple to
2548 * a 32-bit one.
2549 */
2550 i = ifc.ifc_len;
6b96018b 2551 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2552 ifc32.ifc_len = i;
2553 } else {
2554 ifc32.ifc_len = i;
2555 }
6b96018b 2556 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2557 return -EFAULT;
2558
2559 return 0;
2560}
2561
6b96018b 2562static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387
AB
2563{
2564 struct ifreq __user *ifr;
7a229387
AB
2565 u32 data;
2566 void __user *datap;
2567
2568 ifr = compat_alloc_user_space(sizeof(*ifr));
7a229387
AB
2569
2570 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
2571 return -EFAULT;
2572
2573 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2574 return -EFAULT;
2575
2576 datap = compat_ptr(data);
2577 if (put_user(datap, &ifr->ifr_ifru.ifru_data))
2578 return -EFAULT;
2579
6b96018b 2580 return dev_ioctl(net, SIOCETHTOOL, ifr);
7a229387
AB
2581}
2582
7a50a240
AB
2583static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2584{
2585 void __user *uptr;
2586 compat_uptr_t uptr32;
2587 struct ifreq __user *uifr;
2588
2589 uifr = compat_alloc_user_space(sizeof (*uifr));
2590 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2591 return -EFAULT;
2592
2593 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2594 return -EFAULT;
2595
2596 uptr = compat_ptr(uptr32);
2597
2598 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2599 return -EFAULT;
2600
2601 return dev_ioctl(net, SIOCWANDEV, uifr);
2602}
2603
6b96018b
AB
2604static int bond_ioctl(struct net *net, unsigned int cmd,
2605 struct compat_ifreq __user *ifr32)
7a229387
AB
2606{
2607 struct ifreq kifr;
2608 struct ifreq __user *uifr;
7a229387
AB
2609 mm_segment_t old_fs;
2610 int err;
2611 u32 data;
2612 void __user *datap;
2613
2614 switch (cmd) {
2615 case SIOCBONDENSLAVE:
2616 case SIOCBONDRELEASE:
2617 case SIOCBONDSETHWADDR:
2618 case SIOCBONDCHANGEACTIVE:
6b96018b 2619 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2620 return -EFAULT;
2621
2622 old_fs = get_fs();
2623 set_fs (KERNEL_DS);
6b96018b 2624 err = dev_ioctl(net, cmd, &kifr);
7a229387
AB
2625 set_fs (old_fs);
2626
2627 return err;
2628 case SIOCBONDSLAVEINFOQUERY:
2629 case SIOCBONDINFOQUERY:
2630 uifr = compat_alloc_user_space(sizeof(*uifr));
2631 if (copy_in_user(&uifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
2632 return -EFAULT;
2633
2634 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2635 return -EFAULT;
2636
2637 datap = compat_ptr(data);
2638 if (put_user(datap, &uifr->ifr_ifru.ifru_data))
2639 return -EFAULT;
2640
6b96018b 2641 return dev_ioctl(net, cmd, uifr);
7a229387
AB
2642 default:
2643 return -EINVAL;
2644 };
2645}
2646
6b96018b
AB
2647static int siocdevprivate_ioctl(struct net *net, unsigned int cmd,
2648 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2649{
2650 struct ifreq __user *u_ifreq64;
7a229387
AB
2651 char tmp_buf[IFNAMSIZ];
2652 void __user *data64;
2653 u32 data32;
2654
2655 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2656 IFNAMSIZ))
2657 return -EFAULT;
2658 if (__get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
2659 return -EFAULT;
2660 data64 = compat_ptr(data32);
2661
2662 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2663
2664 /* Don't check these user accesses, just let that get trapped
2665 * in the ioctl handler instead.
2666 */
2667 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2668 IFNAMSIZ))
2669 return -EFAULT;
2670 if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
2671 return -EFAULT;
2672
6b96018b 2673 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2674}
2675
6b96018b
AB
2676static int dev_ifsioc(struct net *net, struct socket *sock,
2677 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 2678{
a2116ed2 2679 struct ifreq __user *uifr;
7a229387
AB
2680 int err;
2681
a2116ed2
AB
2682 uifr = compat_alloc_user_space(sizeof(*uifr));
2683 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2684 return -EFAULT;
2685
2686 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2687
7a229387
AB
2688 if (!err) {
2689 switch (cmd) {
2690 case SIOCGIFFLAGS:
2691 case SIOCGIFMETRIC:
2692 case SIOCGIFMTU:
2693 case SIOCGIFMEM:
2694 case SIOCGIFHWADDR:
2695 case SIOCGIFINDEX:
2696 case SIOCGIFADDR:
2697 case SIOCGIFBRDADDR:
2698 case SIOCGIFDSTADDR:
2699 case SIOCGIFNETMASK:
fab2532b 2700 case SIOCGIFPFLAGS:
7a229387 2701 case SIOCGIFTXQLEN:
fab2532b
AB
2702 case SIOCGMIIPHY:
2703 case SIOCGMIIREG:
a2116ed2 2704 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
2705 err = -EFAULT;
2706 break;
2707 }
2708 }
2709 return err;
2710}
2711
a2116ed2
AB
2712static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2713 struct compat_ifreq __user *uifr32)
2714{
2715 struct ifreq ifr;
2716 struct compat_ifmap __user *uifmap32;
2717 mm_segment_t old_fs;
2718 int err;
2719
2720 uifmap32 = &uifr32->ifr_ifru.ifru_map;
2721 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
2722 err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2723 err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2724 err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2725 err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq);
2726 err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma);
2727 err |= __get_user(ifr.ifr_map.port, &uifmap32->port);
2728 if (err)
2729 return -EFAULT;
2730
2731 old_fs = get_fs();
2732 set_fs (KERNEL_DS);
2733 err = dev_ioctl(net, cmd, (void __user *)&ifr);
2734 set_fs (old_fs);
2735
2736 if (cmd == SIOCGIFMAP && !err) {
2737 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
2738 err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2739 err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2740 err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2741 err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq);
2742 err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma);
2743 err |= __put_user(ifr.ifr_map.port, &uifmap32->port);
2744 if (err)
2745 err = -EFAULT;
2746 }
2747 return err;
2748}
2749
2750static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uifr32)
2751{
2752 void __user *uptr;
2753 compat_uptr_t uptr32;
2754 struct ifreq __user *uifr;
2755
2756 uifr = compat_alloc_user_space(sizeof (*uifr));
2757 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2758 return -EFAULT;
2759
2760 if (get_user(uptr32, &uifr32->ifr_data))
2761 return -EFAULT;
2762
2763 uptr = compat_ptr(uptr32);
2764
2765 if (put_user(uptr, &uifr->ifr_data))
2766 return -EFAULT;
2767
2768 return dev_ioctl(net, SIOCSHWTSTAMP, uifr);
2769}
2770
7a229387
AB
2771struct rtentry32 {
2772 u32 rt_pad1;
2773 struct sockaddr rt_dst; /* target address */
2774 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
2775 struct sockaddr rt_genmask; /* target network mask (IP) */
2776 unsigned short rt_flags;
2777 short rt_pad2;
2778 u32 rt_pad3;
2779 unsigned char rt_tos;
2780 unsigned char rt_class;
2781 short rt_pad4;
2782 short rt_metric; /* +1 for binary compatibility! */
2783 /* char * */ u32 rt_dev; /* forcing the device at add */
2784 u32 rt_mtu; /* per route MTU/Window */
2785 u32 rt_window; /* Window clamping */
2786 unsigned short rt_irtt; /* Initial RTT */
2787};
2788
2789struct in6_rtmsg32 {
2790 struct in6_addr rtmsg_dst;
2791 struct in6_addr rtmsg_src;
2792 struct in6_addr rtmsg_gateway;
2793 u32 rtmsg_type;
2794 u16 rtmsg_dst_len;
2795 u16 rtmsg_src_len;
2796 u32 rtmsg_metric;
2797 u32 rtmsg_info;
2798 u32 rtmsg_flags;
2799 s32 rtmsg_ifindex;
2800};
2801
6b96018b
AB
2802static int routing_ioctl(struct net *net, struct socket *sock,
2803 unsigned int cmd, void __user *argp)
7a229387
AB
2804{
2805 int ret;
2806 void *r = NULL;
2807 struct in6_rtmsg r6;
2808 struct rtentry r4;
2809 char devname[16];
2810 u32 rtdev;
2811 mm_segment_t old_fs = get_fs();
2812
6b96018b
AB
2813 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
2814 struct in6_rtmsg32 __user *ur6 = argp;
7a229387
AB
2815 ret = copy_from_user (&r6.rtmsg_dst, &(ur6->rtmsg_dst),
2816 3 * sizeof(struct in6_addr));
2817 ret |= __get_user (r6.rtmsg_type, &(ur6->rtmsg_type));
2818 ret |= __get_user (r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
2819 ret |= __get_user (r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
2820 ret |= __get_user (r6.rtmsg_metric, &(ur6->rtmsg_metric));
2821 ret |= __get_user (r6.rtmsg_info, &(ur6->rtmsg_info));
2822 ret |= __get_user (r6.rtmsg_flags, &(ur6->rtmsg_flags));
2823 ret |= __get_user (r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
2824
2825 r = (void *) &r6;
2826 } else { /* ipv4 */
6b96018b 2827 struct rtentry32 __user *ur4 = argp;
7a229387
AB
2828 ret = copy_from_user (&r4.rt_dst, &(ur4->rt_dst),
2829 3 * sizeof(struct sockaddr));
2830 ret |= __get_user (r4.rt_flags, &(ur4->rt_flags));
2831 ret |= __get_user (r4.rt_metric, &(ur4->rt_metric));
2832 ret |= __get_user (r4.rt_mtu, &(ur4->rt_mtu));
2833 ret |= __get_user (r4.rt_window, &(ur4->rt_window));
2834 ret |= __get_user (r4.rt_irtt, &(ur4->rt_irtt));
2835 ret |= __get_user (rtdev, &(ur4->rt_dev));
2836 if (rtdev) {
2837 ret |= copy_from_user (devname, compat_ptr(rtdev), 15);
2838 r4.rt_dev = devname; devname[15] = 0;
2839 } else
2840 r4.rt_dev = NULL;
2841
2842 r = (void *) &r4;
2843 }
2844
2845 if (ret) {
2846 ret = -EFAULT;
2847 goto out;
2848 }
2849
2850 set_fs (KERNEL_DS);
6b96018b 2851 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
7a229387
AB
2852 set_fs (old_fs);
2853
2854out:
7a229387
AB
2855 return ret;
2856}
2857
2858/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
2859 * for some operations; this forces use of the newer bridge-utils that
2860 * use compatiable ioctls
2861 */
6b96018b 2862static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 2863{
6b96018b 2864 compat_ulong_t tmp;
7a229387 2865
6b96018b 2866 if (get_user(tmp, argp))
7a229387
AB
2867 return -EFAULT;
2868 if (tmp == BRCTL_GET_VERSION)
2869 return BRCTL_VERSION + 1;
2870 return -EINVAL;
2871}
2872
6b96018b
AB
2873static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
2874 unsigned int cmd, unsigned long arg)
2875{
2876 void __user *argp = compat_ptr(arg);
2877 struct sock *sk = sock->sk;
2878 struct net *net = sock_net(sk);
7a229387 2879
6b96018b
AB
2880 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
2881 return siocdevprivate_ioctl(net, cmd, argp);
2882
2883 switch (cmd) {
2884 case SIOCSIFBR:
2885 case SIOCGIFBR:
2886 return old_bridge_ioctl(argp);
2887 case SIOCGIFNAME:
2888 return dev_ifname32(net, argp);
2889 case SIOCGIFCONF:
2890 return dev_ifconf(net, argp);
2891 case SIOCETHTOOL:
2892 return ethtool_ioctl(net, argp);
7a50a240
AB
2893 case SIOCWANDEV:
2894 return compat_siocwandev(net, argp);
a2116ed2
AB
2895 case SIOCGIFMAP:
2896 case SIOCSIFMAP:
2897 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
2898 case SIOCBONDENSLAVE:
2899 case SIOCBONDRELEASE:
2900 case SIOCBONDSETHWADDR:
2901 case SIOCBONDSLAVEINFOQUERY:
2902 case SIOCBONDINFOQUERY:
2903 case SIOCBONDCHANGEACTIVE:
2904 return bond_ioctl(net, cmd, argp);
2905 case SIOCADDRT:
2906 case SIOCDELRT:
2907 return routing_ioctl(net, sock, cmd, argp);
2908 case SIOCGSTAMP:
2909 return do_siocgstamp(net, sock, cmd, argp);
2910 case SIOCGSTAMPNS:
2911 return do_siocgstampns(net, sock, cmd, argp);
a2116ed2
AB
2912 case SIOCSHWTSTAMP:
2913 return compat_siocshwtstamp(net, argp);
6b96018b
AB
2914
2915 case FIOSETOWN:
2916 case SIOCSPGRP:
2917 case FIOGETOWN:
2918 case SIOCGPGRP:
2919 case SIOCBRADDBR:
2920 case SIOCBRDELBR:
2921 case SIOCGIFVLAN:
2922 case SIOCSIFVLAN:
2923 case SIOCADDDLCI:
2924 case SIOCDELDLCI:
2925 return sock_ioctl(file, cmd, arg);
2926
2927 case SIOCGIFFLAGS:
2928 case SIOCSIFFLAGS:
2929 case SIOCGIFMETRIC:
2930 case SIOCSIFMETRIC:
2931 case SIOCGIFMTU:
2932 case SIOCSIFMTU:
2933 case SIOCGIFMEM:
2934 case SIOCSIFMEM:
2935 case SIOCGIFHWADDR:
2936 case SIOCSIFHWADDR:
2937 case SIOCADDMULTI:
2938 case SIOCDELMULTI:
2939 case SIOCGIFINDEX:
6b96018b
AB
2940 case SIOCGIFADDR:
2941 case SIOCSIFADDR:
2942 case SIOCSIFHWBROADCAST:
6b96018b 2943 case SIOCDIFADDR:
6b96018b
AB
2944 case SIOCGIFBRDADDR:
2945 case SIOCSIFBRDADDR:
2946 case SIOCGIFDSTADDR:
2947 case SIOCSIFDSTADDR:
2948 case SIOCGIFNETMASK:
2949 case SIOCSIFNETMASK:
2950 case SIOCSIFPFLAGS:
2951 case SIOCGIFPFLAGS:
2952 case SIOCGIFTXQLEN:
2953 case SIOCSIFTXQLEN:
2954 case SIOCBRADDIF:
2955 case SIOCBRDELIF:
9177efd3
AB
2956 case SIOCSIFNAME:
2957 case SIOCGMIIPHY:
2958 case SIOCGMIIREG:
2959 case SIOCSMIIREG:
6b96018b 2960 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 2961
6b96018b
AB
2962 case SIOCSARP:
2963 case SIOCGARP:
2964 case SIOCDARP:
6b96018b 2965 case SIOCATMARK:
9177efd3
AB
2966 return sock_do_ioctl(net, sock, cmd, arg);
2967 }
2968
2969 /* Prevent warning from compat_sys_ioctl, these always
2970 * result in -EINVAL in the native case anyway. */
2971 switch (cmd) {
2972 case SIOCRTMSG:
2973 case SIOCGIFCOUNT:
6b96018b
AB
2974 case SIOCSRARP:
2975 case SIOCGRARP:
2976 case SIOCDRARP:
9177efd3
AB
2977 case SIOCSIFLINK:
2978 case SIOCGIFSLAVE:
2979 case SIOCSIFSLAVE:
2980 return -EINVAL;
6b96018b
AB
2981 }
2982
2983 return -ENOIOCTLCMD;
2984}
7a229387 2985
89bbfc95 2986static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 2987 unsigned long arg)
89bbfc95
SP
2988{
2989 struct socket *sock = file->private_data;
2990 int ret = -ENOIOCTLCMD;
87de87d5
DM
2991 struct sock *sk;
2992 struct net *net;
2993
2994 sk = sock->sk;
2995 net = sock_net(sk);
89bbfc95
SP
2996
2997 if (sock->ops->compat_ioctl)
2998 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2999
87de87d5
DM
3000 if (ret == -ENOIOCTLCMD &&
3001 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3002 ret = compat_wext_handle_ioctl(net, cmd, arg);
3003
6b96018b
AB
3004 if (ret == -ENOIOCTLCMD)
3005 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3006
89bbfc95
SP
3007 return ret;
3008}
3009#endif
3010
ac5a488e
SS
3011int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3012{
3013 return sock->ops->bind(sock, addr, addrlen);
3014}
3015
3016int kernel_listen(struct socket *sock, int backlog)
3017{
3018 return sock->ops->listen(sock, backlog);
3019}
3020
3021int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3022{
3023 struct sock *sk = sock->sk;
3024 int err;
3025
3026 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3027 newsock);
3028 if (err < 0)
3029 goto done;
3030
3031 err = sock->ops->accept(sock, *newsock, flags);
3032 if (err < 0) {
3033 sock_release(*newsock);
fa8705b0 3034 *newsock = NULL;
ac5a488e
SS
3035 goto done;
3036 }
3037
3038 (*newsock)->ops = sock->ops;
1b08534e 3039 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3040
3041done:
3042 return err;
3043}
3044
3045int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3046 int flags)
ac5a488e
SS
3047{
3048 return sock->ops->connect(sock, addr, addrlen, flags);
3049}
3050
3051int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3052 int *addrlen)
3053{
3054 return sock->ops->getname(sock, addr, addrlen, 0);
3055}
3056
3057int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3058 int *addrlen)
3059{
3060 return sock->ops->getname(sock, addr, addrlen, 1);
3061}
3062
3063int kernel_getsockopt(struct socket *sock, int level, int optname,
3064 char *optval, int *optlen)
3065{
3066 mm_segment_t oldfs = get_fs();
3067 int err;
3068
3069 set_fs(KERNEL_DS);
3070 if (level == SOL_SOCKET)
3071 err = sock_getsockopt(sock, level, optname, optval, optlen);
3072 else
3073 err = sock->ops->getsockopt(sock, level, optname, optval,
3074 optlen);
3075 set_fs(oldfs);
3076 return err;
3077}
3078
3079int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3080 char *optval, unsigned int optlen)
ac5a488e
SS
3081{
3082 mm_segment_t oldfs = get_fs();
3083 int err;
3084
3085 set_fs(KERNEL_DS);
3086 if (level == SOL_SOCKET)
3087 err = sock_setsockopt(sock, level, optname, optval, optlen);
3088 else
3089 err = sock->ops->setsockopt(sock, level, optname, optval,
3090 optlen);
3091 set_fs(oldfs);
3092 return err;
3093}
3094
3095int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3096 size_t size, int flags)
3097{
3098 if (sock->ops->sendpage)
3099 return sock->ops->sendpage(sock, page, offset, size, flags);
3100
3101 return sock_no_sendpage(sock, page, offset, size, flags);
3102}
3103
3104int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3105{
3106 mm_segment_t oldfs = get_fs();
3107 int err;
3108
3109 set_fs(KERNEL_DS);
3110 err = sock->ops->ioctl(sock, cmd, arg);
3111 set_fs(oldfs);
3112
3113 return err;
3114}
3115
91cf45f0
TM
3116int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3117{
3118 return sock->ops->shutdown(sock, how);
3119}
3120
1da177e4
LT
3121EXPORT_SYMBOL(sock_create);
3122EXPORT_SYMBOL(sock_create_kern);
3123EXPORT_SYMBOL(sock_create_lite);
3124EXPORT_SYMBOL(sock_map_fd);
3125EXPORT_SYMBOL(sock_recvmsg);
3126EXPORT_SYMBOL(sock_register);
3127EXPORT_SYMBOL(sock_release);
3128EXPORT_SYMBOL(sock_sendmsg);
3129EXPORT_SYMBOL(sock_unregister);
3130EXPORT_SYMBOL(sock_wake_async);
3131EXPORT_SYMBOL(sockfd_lookup);
3132EXPORT_SYMBOL(kernel_sendmsg);
3133EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
3134EXPORT_SYMBOL(kernel_bind);
3135EXPORT_SYMBOL(kernel_listen);
3136EXPORT_SYMBOL(kernel_accept);
3137EXPORT_SYMBOL(kernel_connect);
3138EXPORT_SYMBOL(kernel_getsockname);
3139EXPORT_SYMBOL(kernel_getpeername);
3140EXPORT_SYMBOL(kernel_getsockopt);
3141EXPORT_SYMBOL(kernel_setsockopt);
3142EXPORT_SYMBOL(kernel_sendpage);
3143EXPORT_SYMBOL(kernel_sock_ioctl);
91cf45f0 3144EXPORT_SYMBOL(kernel_sock_shutdown);