lockdep: various fixes
[linux-2.6-block.git] / net / core / sock.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Generic socket support routines. Memory allocators, socket lock/release
7 * handler for protocols to use and generic option handler.
8 *
9 *
10 * Version: $Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
11 *
02c30a84 12 * Authors: Ross Biro
1da177e4
LT
13 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14 * Florian La Roche, <flla@stud.uni-sb.de>
15 * Alan Cox, <A.Cox@swansea.ac.uk>
16 *
17 * Fixes:
18 * Alan Cox : Numerous verify_area() problems
19 * Alan Cox : Connecting on a connecting socket
20 * now returns an error for tcp.
21 * Alan Cox : sock->protocol is set correctly.
22 * and is not sometimes left as 0.
23 * Alan Cox : connect handles icmp errors on a
24 * connect properly. Unfortunately there
25 * is a restart syscall nasty there. I
26 * can't match BSD without hacking the C
27 * library. Ideas urgently sought!
28 * Alan Cox : Disallow bind() to addresses that are
29 * not ours - especially broadcast ones!!
30 * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost)
31 * Alan Cox : sock_wfree/sock_rfree don't destroy sockets,
32 * instead they leave that for the DESTROY timer.
33 * Alan Cox : Clean up error flag in accept
34 * Alan Cox : TCP ack handling is buggy, the DESTROY timer
35 * was buggy. Put a remove_sock() in the handler
36 * for memory when we hit 0. Also altered the timer
4ec93edb 37 * code. The ACK stuff can wait and needs major
1da177e4
LT
38 * TCP layer surgery.
39 * Alan Cox : Fixed TCP ack bug, removed remove sock
40 * and fixed timer/inet_bh race.
41 * Alan Cox : Added zapped flag for TCP
42 * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code
43 * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44 * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources
45 * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing.
46 * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47 * Rick Sladkey : Relaxed UDP rules for matching packets.
48 * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support
49 * Pauline Middelink : identd support
50 * Alan Cox : Fixed connect() taking signals I think.
51 * Alan Cox : SO_LINGER supported
52 * Alan Cox : Error reporting fixes
53 * Anonymous : inet_create tidied up (sk->reuse setting)
54 * Alan Cox : inet sockets don't set sk->type!
55 * Alan Cox : Split socket option code
56 * Alan Cox : Callbacks
57 * Alan Cox : Nagle flag for Charles & Johannes stuff
58 * Alex : Removed restriction on inet fioctl
59 * Alan Cox : Splitting INET from NET core
60 * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt()
61 * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code
62 * Alan Cox : Split IP from generic code
63 * Alan Cox : New kfree_skbmem()
64 * Alan Cox : Make SO_DEBUG superuser only.
65 * Alan Cox : Allow anyone to clear SO_DEBUG
66 * (compatibility fix)
67 * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput.
68 * Alan Cox : Allocator for a socket is settable.
69 * Alan Cox : SO_ERROR includes soft errors.
70 * Alan Cox : Allow NULL arguments on some SO_ opts
71 * Alan Cox : Generic socket allocation to make hooks
72 * easier (suggested by Craig Metz).
73 * Michael Pall : SO_ERROR returns positive errno again
74 * Steve Whitehouse: Added default destructor to free
75 * protocol private data.
76 * Steve Whitehouse: Added various other default routines
77 * common to several socket families.
78 * Chris Evans : Call suser() check last on F_SETOWN
79 * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
80 * Andi Kleen : Add sock_kmalloc()/sock_kfree_s()
81 * Andi Kleen : Fix write_space callback
82 * Chris Evans : Security fixes - signedness again
83 * Arnaldo C. Melo : cleanups, use skb_queue_purge
84 *
85 * To Fix:
86 *
87 *
88 * This program is free software; you can redistribute it and/or
89 * modify it under the terms of the GNU General Public License
90 * as published by the Free Software Foundation; either version
91 * 2 of the License, or (at your option) any later version.
92 */
93
4fc268d2 94#include <linux/capability.h>
1da177e4
LT
95#include <linux/errno.h>
96#include <linux/types.h>
97#include <linux/socket.h>
98#include <linux/in.h>
99#include <linux/kernel.h>
1da177e4
LT
100#include <linux/module.h>
101#include <linux/proc_fs.h>
102#include <linux/seq_file.h>
103#include <linux/sched.h>
104#include <linux/timer.h>
105#include <linux/string.h>
106#include <linux/sockios.h>
107#include <linux/net.h>
108#include <linux/mm.h>
109#include <linux/slab.h>
110#include <linux/interrupt.h>
111#include <linux/poll.h>
112#include <linux/tcp.h>
113#include <linux/init.h>
a1f8e7f7 114#include <linux/highmem.h>
1da177e4
LT
115
116#include <asm/uaccess.h>
117#include <asm/system.h>
118
119#include <linux/netdevice.h>
120#include <net/protocol.h>
121#include <linux/skbuff.h>
2e6599cb 122#include <net/request_sock.h>
1da177e4
LT
123#include <net/sock.h>
124#include <net/xfrm.h>
125#include <linux/ipsec.h>
126
127#include <linux/filter.h>
128
129#ifdef CONFIG_INET
130#include <net/tcp.h>
131#endif
132
da21f24d
IM
133/*
134 * Each address family might have different locking rules, so we have
135 * one slock key per address family:
136 */
a5b5bb9a
IM
137static struct lock_class_key af_family_keys[AF_MAX];
138static struct lock_class_key af_family_slock_keys[AF_MAX];
139
140#ifdef CONFIG_DEBUG_LOCK_ALLOC
141/*
142 * Make lock validator output more readable. (we pre-construct these
143 * strings build-time, so that runtime initialization of socket
144 * locks is fast):
145 */
146static const char *af_family_key_strings[AF_MAX+1] = {
147 "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" ,
148 "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK",
149 "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" ,
150 "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" ,
151 "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" ,
152 "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" ,
153 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
154 "sk_lock-21" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
155 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
156 "sk_lock-27" , "sk_lock-28" , "sk_lock-29" ,
17926a79
DH
157 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
158 "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX"
a5b5bb9a
IM
159};
160static const char *af_family_slock_key_strings[AF_MAX+1] = {
161 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
162 "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK",
163 "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" ,
164 "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" ,
165 "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" ,
166 "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" ,
167 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
168 "slock-21" , "slock-AF_SNA" , "slock-AF_IRDA" ,
169 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
170 "slock-27" , "slock-28" , "slock-29" ,
17926a79
DH
171 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
172 "slock-AF_RXRPC" , "slock-AF_MAX"
a5b5bb9a
IM
173};
174#endif
da21f24d
IM
175
176/*
177 * sk_callback_lock locking rules are per-address-family,
178 * so split the lock classes by using a per-AF key:
179 */
180static struct lock_class_key af_callback_keys[AF_MAX];
181
1da177e4
LT
182/* Take into consideration the size of the struct sk_buff overhead in the
183 * determination of these values, since that is non-constant across
184 * platforms. This makes socket queueing behavior and performance
185 * not depend upon such differences.
186 */
187#define _SK_MEM_PACKETS 256
188#define _SK_MEM_OVERHEAD (sizeof(struct sk_buff) + 256)
189#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
190#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
191
192/* Run time adjustable parameters. */
ab32ea5d
BH
193__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
194__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
195__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
196__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
1da177e4
LT
197
198/* Maximal space eaten by iovec or ancilliary data plus some space */
ab32ea5d 199int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
1da177e4
LT
200
201static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
202{
203 struct timeval tv;
204
205 if (optlen < sizeof(tv))
206 return -EINVAL;
207 if (copy_from_user(&tv, optval, sizeof(tv)))
208 return -EFAULT;
ba78073e
VA
209 if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
210 return -EDOM;
1da177e4 211
ba78073e 212 if (tv.tv_sec < 0) {
6f11df83
AM
213 static int warned __read_mostly;
214
ba78073e
VA
215 *timeo_p = 0;
216 if (warned < 10 && net_ratelimit())
217 warned++;
218 printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) "
219 "tries to set negative timeout\n",
220 current->comm, current->pid);
221 return 0;
222 }
1da177e4
LT
223 *timeo_p = MAX_SCHEDULE_TIMEOUT;
224 if (tv.tv_sec == 0 && tv.tv_usec == 0)
225 return 0;
226 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
227 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
228 return 0;
229}
230
231static void sock_warn_obsolete_bsdism(const char *name)
232{
233 static int warned;
234 static char warncomm[TASK_COMM_LEN];
4ec93edb
YH
235 if (strcmp(warncomm, current->comm) && warned < 5) {
236 strcpy(warncomm, current->comm);
1da177e4
LT
237 printk(KERN_WARNING "process `%s' is using obsolete "
238 "%s SO_BSDCOMPAT\n", warncomm, name);
239 warned++;
240 }
241}
242
243static void sock_disable_timestamp(struct sock *sk)
4ec93edb
YH
244{
245 if (sock_flag(sk, SOCK_TIMESTAMP)) {
1da177e4
LT
246 sock_reset_flag(sk, SOCK_TIMESTAMP);
247 net_disable_timestamp();
248 }
249}
250
251
f0088a50
DV
252int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
253{
254 int err = 0;
255 int skb_len;
256
257 /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
258 number of warnings when compiling with -W --ANK
259 */
260 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
261 (unsigned)sk->sk_rcvbuf) {
262 err = -ENOMEM;
263 goto out;
264 }
265
fda9ef5d 266 err = sk_filter(sk, skb);
f0088a50
DV
267 if (err)
268 goto out;
269
270 skb->dev = NULL;
271 skb_set_owner_r(skb, sk);
272
273 /* Cache the SKB length before we tack it onto the receive
274 * queue. Once it is added it no longer belongs to us and
275 * may be freed by other threads of control pulling packets
276 * from the queue.
277 */
278 skb_len = skb->len;
279
280 skb_queue_tail(&sk->sk_receive_queue, skb);
281
282 if (!sock_flag(sk, SOCK_DEAD))
283 sk->sk_data_ready(sk, skb_len);
284out:
285 return err;
286}
287EXPORT_SYMBOL(sock_queue_rcv_skb);
288
58a5a7b9 289int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
f0088a50
DV
290{
291 int rc = NET_RX_SUCCESS;
292
fda9ef5d 293 if (sk_filter(sk, skb))
f0088a50
DV
294 goto discard_and_relse;
295
296 skb->dev = NULL;
297
58a5a7b9
ACM
298 if (nested)
299 bh_lock_sock_nested(sk);
300 else
301 bh_lock_sock(sk);
a5b5bb9a
IM
302 if (!sock_owned_by_user(sk)) {
303 /*
304 * trylock + unlock semantics:
305 */
306 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
307
f0088a50 308 rc = sk->sk_backlog_rcv(sk, skb);
a5b5bb9a
IM
309
310 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
311 } else
f0088a50
DV
312 sk_add_backlog(sk, skb);
313 bh_unlock_sock(sk);
314out:
315 sock_put(sk);
316 return rc;
317discard_and_relse:
318 kfree_skb(skb);
319 goto out;
320}
321EXPORT_SYMBOL(sk_receive_skb);
322
323struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
324{
325 struct dst_entry *dst = sk->sk_dst_cache;
326
327 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
328 sk->sk_dst_cache = NULL;
329 dst_release(dst);
330 return NULL;
331 }
332
333 return dst;
334}
335EXPORT_SYMBOL(__sk_dst_check);
336
337struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
338{
339 struct dst_entry *dst = sk_dst_get(sk);
340
341 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
342 sk_dst_reset(sk);
343 dst_release(dst);
344 return NULL;
345 }
346
347 return dst;
348}
349EXPORT_SYMBOL(sk_dst_check);
350
1da177e4
LT
351/*
352 * This is meant for all protocols to use and covers goings on
353 * at the socket level. Everything here is generic.
354 */
355
356int sock_setsockopt(struct socket *sock, int level, int optname,
357 char __user *optval, int optlen)
358{
359 struct sock *sk=sock->sk;
360 struct sk_filter *filter;
361 int val;
362 int valbool;
363 struct linger ling;
364 int ret = 0;
4ec93edb 365
1da177e4
LT
366 /*
367 * Options without arguments
368 */
369
370#ifdef SO_DONTLINGER /* Compatibility item... */
a77be819
KM
371 if (optname == SO_DONTLINGER) {
372 lock_sock(sk);
373 sock_reset_flag(sk, SOCK_LINGER);
374 release_sock(sk);
375 return 0;
1da177e4 376 }
a77be819 377#endif
4ec93edb 378
e71a4783
SH
379 if (optlen < sizeof(int))
380 return -EINVAL;
4ec93edb 381
1da177e4
LT
382 if (get_user(val, (int __user *)optval))
383 return -EFAULT;
4ec93edb
YH
384
385 valbool = val?1:0;
1da177e4
LT
386
387 lock_sock(sk);
388
e71a4783
SH
389 switch(optname) {
390 case SO_DEBUG:
391 if (val && !capable(CAP_NET_ADMIN)) {
392 ret = -EACCES;
393 }
394 else if (valbool)
395 sock_set_flag(sk, SOCK_DBG);
396 else
397 sock_reset_flag(sk, SOCK_DBG);
398 break;
399 case SO_REUSEADDR:
400 sk->sk_reuse = valbool;
401 break;
402 case SO_TYPE:
403 case SO_ERROR:
404 ret = -ENOPROTOOPT;
405 break;
406 case SO_DONTROUTE:
407 if (valbool)
408 sock_set_flag(sk, SOCK_LOCALROUTE);
409 else
410 sock_reset_flag(sk, SOCK_LOCALROUTE);
411 break;
412 case SO_BROADCAST:
413 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
414 break;
415 case SO_SNDBUF:
416 /* Don't error on this BSD doesn't and if you think
417 about it this is right. Otherwise apps have to
418 play 'guess the biggest size' games. RCVBUF/SNDBUF
419 are treated in BSD as hints */
420
421 if (val > sysctl_wmem_max)
422 val = sysctl_wmem_max;
b0573dea 423set_sndbuf:
e71a4783
SH
424 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
425 if ((val * 2) < SOCK_MIN_SNDBUF)
426 sk->sk_sndbuf = SOCK_MIN_SNDBUF;
427 else
428 sk->sk_sndbuf = val * 2;
1da177e4 429
e71a4783
SH
430 /*
431 * Wake up sending tasks if we
432 * upped the value.
433 */
434 sk->sk_write_space(sk);
435 break;
1da177e4 436
e71a4783
SH
437 case SO_SNDBUFFORCE:
438 if (!capable(CAP_NET_ADMIN)) {
439 ret = -EPERM;
440 break;
441 }
442 goto set_sndbuf;
b0573dea 443
e71a4783
SH
444 case SO_RCVBUF:
445 /* Don't error on this BSD doesn't and if you think
446 about it this is right. Otherwise apps have to
447 play 'guess the biggest size' games. RCVBUF/SNDBUF
448 are treated in BSD as hints */
4ec93edb 449
e71a4783
SH
450 if (val > sysctl_rmem_max)
451 val = sysctl_rmem_max;
b0573dea 452set_rcvbuf:
e71a4783
SH
453 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
454 /*
455 * We double it on the way in to account for
456 * "struct sk_buff" etc. overhead. Applications
457 * assume that the SO_RCVBUF setting they make will
458 * allow that much actual data to be received on that
459 * socket.
460 *
461 * Applications are unaware that "struct sk_buff" and
462 * other overheads allocate from the receive buffer
463 * during socket buffer allocation.
464 *
465 * And after considering the possible alternatives,
466 * returning the value we actually used in getsockopt
467 * is the most desirable behavior.
468 */
469 if ((val * 2) < SOCK_MIN_RCVBUF)
470 sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
471 else
472 sk->sk_rcvbuf = val * 2;
473 break;
474
475 case SO_RCVBUFFORCE:
476 if (!capable(CAP_NET_ADMIN)) {
477 ret = -EPERM;
1da177e4 478 break;
e71a4783
SH
479 }
480 goto set_rcvbuf;
1da177e4 481
e71a4783 482 case SO_KEEPALIVE:
1da177e4 483#ifdef CONFIG_INET
e71a4783
SH
484 if (sk->sk_protocol == IPPROTO_TCP)
485 tcp_set_keepalive(sk, valbool);
1da177e4 486#endif
e71a4783
SH
487 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
488 break;
489
490 case SO_OOBINLINE:
491 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
492 break;
493
494 case SO_NO_CHECK:
495 sk->sk_no_check = valbool;
496 break;
497
498 case SO_PRIORITY:
499 if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
500 sk->sk_priority = val;
501 else
502 ret = -EPERM;
503 break;
504
505 case SO_LINGER:
506 if (optlen < sizeof(ling)) {
507 ret = -EINVAL; /* 1003.1g */
1da177e4 508 break;
e71a4783
SH
509 }
510 if (copy_from_user(&ling,optval,sizeof(ling))) {
511 ret = -EFAULT;
1da177e4 512 break;
e71a4783
SH
513 }
514 if (!ling.l_onoff)
515 sock_reset_flag(sk, SOCK_LINGER);
516 else {
1da177e4 517#if (BITS_PER_LONG == 32)
e71a4783
SH
518 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
519 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
1da177e4 520 else
e71a4783
SH
521#endif
522 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
523 sock_set_flag(sk, SOCK_LINGER);
524 }
525 break;
526
527 case SO_BSDCOMPAT:
528 sock_warn_obsolete_bsdism("setsockopt");
529 break;
530
531 case SO_PASSCRED:
532 if (valbool)
533 set_bit(SOCK_PASSCRED, &sock->flags);
534 else
535 clear_bit(SOCK_PASSCRED, &sock->flags);
536 break;
537
538 case SO_TIMESTAMP:
92f37fd2 539 case SO_TIMESTAMPNS:
e71a4783 540 if (valbool) {
92f37fd2
ED
541 if (optname == SO_TIMESTAMP)
542 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
543 else
544 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
e71a4783
SH
545 sock_set_flag(sk, SOCK_RCVTSTAMP);
546 sock_enable_timestamp(sk);
92f37fd2 547 } else {
e71a4783 548 sock_reset_flag(sk, SOCK_RCVTSTAMP);
92f37fd2
ED
549 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
550 }
e71a4783
SH
551 break;
552
553 case SO_RCVLOWAT:
554 if (val < 0)
555 val = INT_MAX;
556 sk->sk_rcvlowat = val ? : 1;
557 break;
558
559 case SO_RCVTIMEO:
560 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
561 break;
562
563 case SO_SNDTIMEO:
564 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
565 break;
1da177e4 566
e71a4783
SH
567#ifdef CONFIG_NETDEVICES
568 case SO_BINDTODEVICE:
569 {
570 char devname[IFNAMSIZ];
1da177e4 571
e71a4783
SH
572 /* Sorry... */
573 if (!capable(CAP_NET_RAW)) {
574 ret = -EPERM;
1da177e4 575 break;
e71a4783 576 }
1da177e4 577
e71a4783
SH
578 /* Bind this socket to a particular device like "eth0",
579 * as specified in the passed interface name. If the
580 * name is "" or the option length is zero the socket
581 * is not bound.
582 */
1da177e4 583
e71a4783
SH
584 if (!valbool) {
585 sk->sk_bound_dev_if = 0;
586 } else {
587 if (optlen > IFNAMSIZ - 1)
588 optlen = IFNAMSIZ - 1;
589 memset(devname, 0, sizeof(devname));
590 if (copy_from_user(devname, optval, optlen)) {
591 ret = -EFAULT;
1da177e4
LT
592 break;
593 }
594
e71a4783
SH
595 /* Remove any cached route for this socket. */
596 sk_dst_reset(sk);
1da177e4 597
e71a4783 598 if (devname[0] == '\0') {
1da177e4
LT
599 sk->sk_bound_dev_if = 0;
600 } else {
e71a4783
SH
601 struct net_device *dev = dev_get_by_name(devname);
602 if (!dev) {
603 ret = -ENODEV;
1da177e4
LT
604 break;
605 }
e71a4783
SH
606 sk->sk_bound_dev_if = dev->ifindex;
607 dev_put(dev);
1da177e4 608 }
1da177e4 609 }
e71a4783
SH
610 break;
611 }
1da177e4
LT
612#endif
613
614
e71a4783
SH
615 case SO_ATTACH_FILTER:
616 ret = -EINVAL;
617 if (optlen == sizeof(struct sock_fprog)) {
618 struct sock_fprog fprog;
1da177e4 619
e71a4783
SH
620 ret = -EFAULT;
621 if (copy_from_user(&fprog, optval, sizeof(fprog)))
1da177e4 622 break;
e71a4783
SH
623
624 ret = sk_attach_filter(&fprog, sk);
625 }
626 break;
627
628 case SO_DETACH_FILTER:
629 rcu_read_lock_bh();
630 filter = rcu_dereference(sk->sk_filter);
631 if (filter) {
632 rcu_assign_pointer(sk->sk_filter, NULL);
633 sk_filter_release(sk, filter);
fda9ef5d 634 rcu_read_unlock_bh();
1da177e4 635 break;
e71a4783
SH
636 }
637 rcu_read_unlock_bh();
638 ret = -ENONET;
639 break;
1da177e4 640
e71a4783
SH
641 case SO_PASSSEC:
642 if (valbool)
643 set_bit(SOCK_PASSSEC, &sock->flags);
644 else
645 clear_bit(SOCK_PASSSEC, &sock->flags);
646 break;
877ce7c1 647
1da177e4
LT
648 /* We implement the SO_SNDLOWAT etc to
649 not be settable (1003.1g 5.3) */
e71a4783
SH
650 default:
651 ret = -ENOPROTOOPT;
652 break;
4ec93edb 653 }
1da177e4
LT
654 release_sock(sk);
655 return ret;
656}
657
658
659int sock_getsockopt(struct socket *sock, int level, int optname,
660 char __user *optval, int __user *optlen)
661{
662 struct sock *sk = sock->sk;
4ec93edb 663
e71a4783 664 union {
4ec93edb
YH
665 int val;
666 struct linger ling;
1da177e4
LT
667 struct timeval tm;
668 } v;
4ec93edb 669
1da177e4
LT
670 unsigned int lv = sizeof(int);
671 int len;
4ec93edb 672
e71a4783 673 if (get_user(len, optlen))
4ec93edb 674 return -EFAULT;
e71a4783 675 if (len < 0)
1da177e4 676 return -EINVAL;
4ec93edb 677
e71a4783
SH
678 switch(optname) {
679 case SO_DEBUG:
680 v.val = sock_flag(sk, SOCK_DBG);
681 break;
682
683 case SO_DONTROUTE:
684 v.val = sock_flag(sk, SOCK_LOCALROUTE);
685 break;
686
687 case SO_BROADCAST:
688 v.val = !!sock_flag(sk, SOCK_BROADCAST);
689 break;
690
691 case SO_SNDBUF:
692 v.val = sk->sk_sndbuf;
693 break;
694
695 case SO_RCVBUF:
696 v.val = sk->sk_rcvbuf;
697 break;
698
699 case SO_REUSEADDR:
700 v.val = sk->sk_reuse;
701 break;
702
703 case SO_KEEPALIVE:
704 v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
705 break;
706
707 case SO_TYPE:
708 v.val = sk->sk_type;
709 break;
710
711 case SO_ERROR:
712 v.val = -sock_error(sk);
713 if (v.val==0)
714 v.val = xchg(&sk->sk_err_soft, 0);
715 break;
716
717 case SO_OOBINLINE:
718 v.val = !!sock_flag(sk, SOCK_URGINLINE);
719 break;
720
721 case SO_NO_CHECK:
722 v.val = sk->sk_no_check;
723 break;
724
725 case SO_PRIORITY:
726 v.val = sk->sk_priority;
727 break;
728
729 case SO_LINGER:
730 lv = sizeof(v.ling);
731 v.ling.l_onoff = !!sock_flag(sk, SOCK_LINGER);
732 v.ling.l_linger = sk->sk_lingertime / HZ;
733 break;
734
735 case SO_BSDCOMPAT:
736 sock_warn_obsolete_bsdism("getsockopt");
737 break;
738
739 case SO_TIMESTAMP:
92f37fd2
ED
740 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
741 !sock_flag(sk, SOCK_RCVTSTAMPNS);
742 break;
743
744 case SO_TIMESTAMPNS:
745 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
e71a4783
SH
746 break;
747
748 case SO_RCVTIMEO:
749 lv=sizeof(struct timeval);
750 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
751 v.tm.tv_sec = 0;
752 v.tm.tv_usec = 0;
753 } else {
754 v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
755 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
756 }
757 break;
758
759 case SO_SNDTIMEO:
760 lv=sizeof(struct timeval);
761 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
762 v.tm.tv_sec = 0;
763 v.tm.tv_usec = 0;
764 } else {
765 v.tm.tv_sec = sk->sk_sndtimeo / HZ;
766 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
767 }
768 break;
1da177e4 769
e71a4783
SH
770 case SO_RCVLOWAT:
771 v.val = sk->sk_rcvlowat;
772 break;
1da177e4 773
e71a4783
SH
774 case SO_SNDLOWAT:
775 v.val=1;
776 break;
1da177e4 777
e71a4783
SH
778 case SO_PASSCRED:
779 v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
780 break;
1da177e4 781
e71a4783
SH
782 case SO_PEERCRED:
783 if (len > sizeof(sk->sk_peercred))
784 len = sizeof(sk->sk_peercred);
785 if (copy_to_user(optval, &sk->sk_peercred, len))
786 return -EFAULT;
787 goto lenout;
1da177e4 788
e71a4783
SH
789 case SO_PEERNAME:
790 {
791 char address[128];
792
793 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
794 return -ENOTCONN;
795 if (lv < len)
796 return -EINVAL;
797 if (copy_to_user(optval, address, len))
798 return -EFAULT;
799 goto lenout;
800 }
1da177e4 801
e71a4783
SH
802 /* Dubious BSD thing... Probably nobody even uses it, but
803 * the UNIX standard wants it for whatever reason... -DaveM
804 */
805 case SO_ACCEPTCONN:
806 v.val = sk->sk_state == TCP_LISTEN;
807 break;
1da177e4 808
e71a4783
SH
809 case SO_PASSSEC:
810 v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
811 break;
877ce7c1 812
e71a4783
SH
813 case SO_PEERSEC:
814 return security_socket_getpeersec_stream(sock, optval, optlen, len);
1da177e4 815
e71a4783
SH
816 default:
817 return -ENOPROTOOPT;
1da177e4 818 }
e71a4783 819
1da177e4
LT
820 if (len > lv)
821 len = lv;
822 if (copy_to_user(optval, &v, len))
823 return -EFAULT;
824lenout:
4ec93edb
YH
825 if (put_user(len, optlen))
826 return -EFAULT;
827 return 0;
1da177e4
LT
828}
829
a5b5bb9a
IM
830/*
831 * Initialize an sk_lock.
832 *
833 * (We also register the sk_lock with the lock validator.)
834 */
b6f99a21 835static inline void sock_lock_init(struct sock *sk)
a5b5bb9a 836{
ed07536e
PZ
837 sock_lock_init_class_and_name(sk,
838 af_family_slock_key_strings[sk->sk_family],
839 af_family_slock_keys + sk->sk_family,
840 af_family_key_strings[sk->sk_family],
841 af_family_keys + sk->sk_family);
a5b5bb9a
IM
842}
843
1da177e4
LT
844/**
845 * sk_alloc - All socket objects are allocated here
4dc3b16b
PP
846 * @family: protocol family
847 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
848 * @prot: struct proto associated with this new sock instance
849 * @zero_it: if we should zero the newly allocated sock
1da177e4 850 */
dd0fc66f 851struct sock *sk_alloc(int family, gfp_t priority,
86a76caf 852 struct proto *prot, int zero_it)
1da177e4
LT
853{
854 struct sock *sk = NULL;
e18b890b 855 struct kmem_cache *slab = prot->slab;
1da177e4
LT
856
857 if (slab != NULL)
858 sk = kmem_cache_alloc(slab, priority);
859 else
860 sk = kmalloc(prot->obj_size, priority);
861
862 if (sk) {
863 if (zero_it) {
864 memset(sk, 0, prot->obj_size);
865 sk->sk_family = family;
476e19cf
ACM
866 /*
867 * See comment in struct sock definition to understand
868 * why we need sk_prot_creator -acme
869 */
870 sk->sk_prot = sk->sk_prot_creator = prot;
1da177e4
LT
871 sock_lock_init(sk);
872 }
4ec93edb 873
a79af59e
FF
874 if (security_sk_alloc(sk, family, priority))
875 goto out_free;
876
877 if (!try_module_get(prot->owner))
878 goto out_free;
1da177e4
LT
879 }
880 return sk;
a79af59e
FF
881
882out_free:
883 if (slab != NULL)
884 kmem_cache_free(slab, sk);
885 else
886 kfree(sk);
887 return NULL;
1da177e4
LT
888}
889
890void sk_free(struct sock *sk)
891{
892 struct sk_filter *filter;
476e19cf 893 struct module *owner = sk->sk_prot_creator->owner;
1da177e4
LT
894
895 if (sk->sk_destruct)
896 sk->sk_destruct(sk);
897
fda9ef5d 898 filter = rcu_dereference(sk->sk_filter);
1da177e4
LT
899 if (filter) {
900 sk_filter_release(sk, filter);
fda9ef5d 901 rcu_assign_pointer(sk->sk_filter, NULL);
1da177e4
LT
902 }
903
904 sock_disable_timestamp(sk);
905
906 if (atomic_read(&sk->sk_omem_alloc))
907 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
908 __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
909
910 security_sk_free(sk);
476e19cf
ACM
911 if (sk->sk_prot_creator->slab != NULL)
912 kmem_cache_free(sk->sk_prot_creator->slab, sk);
1da177e4
LT
913 else
914 kfree(sk);
915 module_put(owner);
916}
917
dd0fc66f 918struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
87d11ceb
ACM
919{
920 struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
921
922 if (newsk != NULL) {
923 struct sk_filter *filter;
924
892c141e 925 sock_copy(newsk, sk);
87d11ceb
ACM
926
927 /* SANITY */
928 sk_node_init(&newsk->sk_node);
929 sock_lock_init(newsk);
930 bh_lock_sock(newsk);
fa438ccf 931 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
87d11ceb
ACM
932
933 atomic_set(&newsk->sk_rmem_alloc, 0);
934 atomic_set(&newsk->sk_wmem_alloc, 0);
935 atomic_set(&newsk->sk_omem_alloc, 0);
936 skb_queue_head_init(&newsk->sk_receive_queue);
937 skb_queue_head_init(&newsk->sk_write_queue);
97fc2f08
CL
938#ifdef CONFIG_NET_DMA
939 skb_queue_head_init(&newsk->sk_async_wait_queue);
940#endif
87d11ceb
ACM
941
942 rwlock_init(&newsk->sk_dst_lock);
943 rwlock_init(&newsk->sk_callback_lock);
da21f24d
IM
944 lockdep_set_class(&newsk->sk_callback_lock,
945 af_callback_keys + newsk->sk_family);
87d11ceb
ACM
946
947 newsk->sk_dst_cache = NULL;
948 newsk->sk_wmem_queued = 0;
949 newsk->sk_forward_alloc = 0;
950 newsk->sk_send_head = NULL;
87d11ceb
ACM
951 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
952
953 sock_reset_flag(newsk, SOCK_DONE);
954 skb_queue_head_init(&newsk->sk_error_queue);
955
956 filter = newsk->sk_filter;
957 if (filter != NULL)
958 sk_filter_charge(newsk, filter);
959
960 if (unlikely(xfrm_sk_clone_policy(newsk))) {
961 /* It is still raw copy of parent, so invalidate
962 * destructor and make plain sk_free() */
963 newsk->sk_destruct = NULL;
964 sk_free(newsk);
965 newsk = NULL;
966 goto out;
967 }
968
969 newsk->sk_err = 0;
970 newsk->sk_priority = 0;
971 atomic_set(&newsk->sk_refcnt, 2);
972
973 /*
974 * Increment the counter in the same struct proto as the master
975 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
976 * is the same as sk->sk_prot->socks, as this field was copied
977 * with memcpy).
978 *
979 * This _changes_ the previous behaviour, where
980 * tcp_create_openreq_child always was incrementing the
981 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
982 * to be taken into account in all callers. -acme
983 */
984 sk_refcnt_debug_inc(newsk);
985 newsk->sk_socket = NULL;
986 newsk->sk_sleep = NULL;
987
988 if (newsk->sk_prot->sockets_allocated)
989 atomic_inc(newsk->sk_prot->sockets_allocated);
990 }
991out:
992 return newsk;
993}
994
995EXPORT_SYMBOL_GPL(sk_clone);
996
9958089a
AK
997void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
998{
999 __sk_dst_set(sk, dst);
1000 sk->sk_route_caps = dst->dev->features;
1001 if (sk->sk_route_caps & NETIF_F_GSO)
4fcd6b99 1002 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
9958089a
AK
1003 if (sk_can_gso(sk)) {
1004 if (dst->header_len)
1005 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1006 else
1007 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1008 }
1009}
1010EXPORT_SYMBOL_GPL(sk_setup_caps);
1011
1da177e4
LT
1012void __init sk_init(void)
1013{
1014 if (num_physpages <= 4096) {
1015 sysctl_wmem_max = 32767;
1016 sysctl_rmem_max = 32767;
1017 sysctl_wmem_default = 32767;
1018 sysctl_rmem_default = 32767;
1019 } else if (num_physpages >= 131072) {
1020 sysctl_wmem_max = 131071;
1021 sysctl_rmem_max = 131071;
1022 }
1023}
1024
1025/*
1026 * Simple resource managers for sockets.
1027 */
1028
1029
4ec93edb
YH
1030/*
1031 * Write buffer destructor automatically called from kfree_skb.
1da177e4
LT
1032 */
1033void sock_wfree(struct sk_buff *skb)
1034{
1035 struct sock *sk = skb->sk;
1036
1037 /* In case it might be waiting for more memory. */
1038 atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
1039 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
1040 sk->sk_write_space(sk);
1041 sock_put(sk);
1042}
1043
4ec93edb
YH
1044/*
1045 * Read buffer destructor automatically called from kfree_skb.
1da177e4
LT
1046 */
1047void sock_rfree(struct sk_buff *skb)
1048{
1049 struct sock *sk = skb->sk;
1050
1051 atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
1052}
1053
1054
1055int sock_i_uid(struct sock *sk)
1056{
1057 int uid;
1058
1059 read_lock(&sk->sk_callback_lock);
1060 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
1061 read_unlock(&sk->sk_callback_lock);
1062 return uid;
1063}
1064
1065unsigned long sock_i_ino(struct sock *sk)
1066{
1067 unsigned long ino;
1068
1069 read_lock(&sk->sk_callback_lock);
1070 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1071 read_unlock(&sk->sk_callback_lock);
1072 return ino;
1073}
1074
1075/*
1076 * Allocate a skb from the socket's send buffer.
1077 */
86a76caf 1078struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
dd0fc66f 1079 gfp_t priority)
1da177e4
LT
1080{
1081 if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1082 struct sk_buff * skb = alloc_skb(size, priority);
1083 if (skb) {
1084 skb_set_owner_w(skb, sk);
1085 return skb;
1086 }
1087 }
1088 return NULL;
1089}
1090
1091/*
1092 * Allocate a skb from the socket's receive buffer.
4ec93edb 1093 */
86a76caf 1094struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
dd0fc66f 1095 gfp_t priority)
1da177e4
LT
1096{
1097 if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1098 struct sk_buff *skb = alloc_skb(size, priority);
1099 if (skb) {
1100 skb_set_owner_r(skb, sk);
1101 return skb;
1102 }
1103 }
1104 return NULL;
1105}
1106
4ec93edb 1107/*
1da177e4 1108 * Allocate a memory block from the socket's option memory buffer.
4ec93edb 1109 */
dd0fc66f 1110void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1da177e4
LT
1111{
1112 if ((unsigned)size <= sysctl_optmem_max &&
1113 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1114 void *mem;
1115 /* First do the add, to avoid the race if kmalloc
4ec93edb 1116 * might sleep.
1da177e4
LT
1117 */
1118 atomic_add(size, &sk->sk_omem_alloc);
1119 mem = kmalloc(size, priority);
1120 if (mem)
1121 return mem;
1122 atomic_sub(size, &sk->sk_omem_alloc);
1123 }
1124 return NULL;
1125}
1126
1127/*
1128 * Free an option memory block.
1129 */
1130void sock_kfree_s(struct sock *sk, void *mem, int size)
1131{
1132 kfree(mem);
1133 atomic_sub(size, &sk->sk_omem_alloc);
1134}
1135
1136/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1137 I think, these locks should be removed for datagram sockets.
1138 */
1139static long sock_wait_for_wmem(struct sock * sk, long timeo)
1140{
1141 DEFINE_WAIT(wait);
1142
1143 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1144 for (;;) {
1145 if (!timeo)
1146 break;
1147 if (signal_pending(current))
1148 break;
1149 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1150 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1151 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1152 break;
1153 if (sk->sk_shutdown & SEND_SHUTDOWN)
1154 break;
1155 if (sk->sk_err)
1156 break;
1157 timeo = schedule_timeout(timeo);
1158 }
1159 finish_wait(sk->sk_sleep, &wait);
1160 return timeo;
1161}
1162
1163
1164/*
1165 * Generic send/receive buffer handlers
1166 */
1167
1168static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
1169 unsigned long header_len,
1170 unsigned long data_len,
1171 int noblock, int *errcode)
1172{
1173 struct sk_buff *skb;
7d877f3b 1174 gfp_t gfp_mask;
1da177e4
LT
1175 long timeo;
1176 int err;
1177
1178 gfp_mask = sk->sk_allocation;
1179 if (gfp_mask & __GFP_WAIT)
1180 gfp_mask |= __GFP_REPEAT;
1181
1182 timeo = sock_sndtimeo(sk, noblock);
1183 while (1) {
1184 err = sock_error(sk);
1185 if (err != 0)
1186 goto failure;
1187
1188 err = -EPIPE;
1189 if (sk->sk_shutdown & SEND_SHUTDOWN)
1190 goto failure;
1191
1192 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
db38c179 1193 skb = alloc_skb(header_len, gfp_mask);
1da177e4
LT
1194 if (skb) {
1195 int npages;
1196 int i;
1197
1198 /* No pages, we're done... */
1199 if (!data_len)
1200 break;
1201
1202 npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1203 skb->truesize += data_len;
1204 skb_shinfo(skb)->nr_frags = npages;
1205 for (i = 0; i < npages; i++) {
1206 struct page *page;
1207 skb_frag_t *frag;
1208
1209 page = alloc_pages(sk->sk_allocation, 0);
1210 if (!page) {
1211 err = -ENOBUFS;
1212 skb_shinfo(skb)->nr_frags = i;
1213 kfree_skb(skb);
1214 goto failure;
1215 }
1216
1217 frag = &skb_shinfo(skb)->frags[i];
1218 frag->page = page;
1219 frag->page_offset = 0;
1220 frag->size = (data_len >= PAGE_SIZE ?
1221 PAGE_SIZE :
1222 data_len);
1223 data_len -= PAGE_SIZE;
1224 }
1225
1226 /* Full success... */
1227 break;
1228 }
1229 err = -ENOBUFS;
1230 goto failure;
1231 }
1232 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1233 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1234 err = -EAGAIN;
1235 if (!timeo)
1236 goto failure;
1237 if (signal_pending(current))
1238 goto interrupted;
1239 timeo = sock_wait_for_wmem(sk, timeo);
1240 }
1241
1242 skb_set_owner_w(skb, sk);
1243 return skb;
1244
1245interrupted:
1246 err = sock_intr_errno(timeo);
1247failure:
1248 *errcode = err;
1249 return NULL;
1250}
1251
4ec93edb 1252struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1da177e4
LT
1253 int noblock, int *errcode)
1254{
1255 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1256}
1257
1258static void __lock_sock(struct sock *sk)
1259{
1260 DEFINE_WAIT(wait);
1261
e71a4783 1262 for (;;) {
1da177e4
LT
1263 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1264 TASK_UNINTERRUPTIBLE);
1265 spin_unlock_bh(&sk->sk_lock.slock);
1266 schedule();
1267 spin_lock_bh(&sk->sk_lock.slock);
e71a4783 1268 if (!sock_owned_by_user(sk))
1da177e4
LT
1269 break;
1270 }
1271 finish_wait(&sk->sk_lock.wq, &wait);
1272}
1273
1274static void __release_sock(struct sock *sk)
1275{
1276 struct sk_buff *skb = sk->sk_backlog.head;
1277
1278 do {
1279 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1280 bh_unlock_sock(sk);
1281
1282 do {
1283 struct sk_buff *next = skb->next;
1284
1285 skb->next = NULL;
1286 sk->sk_backlog_rcv(sk, skb);
1287
1288 /*
1289 * We are in process context here with softirqs
1290 * disabled, use cond_resched_softirq() to preempt.
1291 * This is safe to do because we've taken the backlog
1292 * queue private:
1293 */
1294 cond_resched_softirq();
1295
1296 skb = next;
1297 } while (skb != NULL);
1298
1299 bh_lock_sock(sk);
e71a4783 1300 } while ((skb = sk->sk_backlog.head) != NULL);
1da177e4
LT
1301}
1302
1303/**
1304 * sk_wait_data - wait for data to arrive at sk_receive_queue
4dc3b16b
PP
1305 * @sk: sock to wait on
1306 * @timeo: for how long
1da177e4
LT
1307 *
1308 * Now socket state including sk->sk_err is changed only under lock,
1309 * hence we may omit checks after joining wait queue.
1310 * We check receive queue before schedule() only as optimization;
1311 * it is very likely that release_sock() added new data.
1312 */
1313int sk_wait_data(struct sock *sk, long *timeo)
1314{
1315 int rc;
1316 DEFINE_WAIT(wait);
1317
1318 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1319 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1320 rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1321 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1322 finish_wait(sk->sk_sleep, &wait);
1323 return rc;
1324}
1325
1326EXPORT_SYMBOL(sk_wait_data);
1327
1328/*
1329 * Set of default routines for initialising struct proto_ops when
1330 * the protocol does not support a particular function. In certain
1331 * cases where it makes no sense for a protocol to have a "do nothing"
1332 * function, some default processing is provided.
1333 */
1334
1335int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1336{
1337 return -EOPNOTSUPP;
1338}
1339
4ec93edb 1340int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1da177e4
LT
1341 int len, int flags)
1342{
1343 return -EOPNOTSUPP;
1344}
1345
1346int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1347{
1348 return -EOPNOTSUPP;
1349}
1350
1351int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1352{
1353 return -EOPNOTSUPP;
1354}
1355
4ec93edb 1356int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1da177e4
LT
1357 int *len, int peer)
1358{
1359 return -EOPNOTSUPP;
1360}
1361
1362unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
1363{
1364 return 0;
1365}
1366
1367int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1368{
1369 return -EOPNOTSUPP;
1370}
1371
1372int sock_no_listen(struct socket *sock, int backlog)
1373{
1374 return -EOPNOTSUPP;
1375}
1376
1377int sock_no_shutdown(struct socket *sock, int how)
1378{
1379 return -EOPNOTSUPP;
1380}
1381
1382int sock_no_setsockopt(struct socket *sock, int level, int optname,
1383 char __user *optval, int optlen)
1384{
1385 return -EOPNOTSUPP;
1386}
1387
1388int sock_no_getsockopt(struct socket *sock, int level, int optname,
1389 char __user *optval, int __user *optlen)
1390{
1391 return -EOPNOTSUPP;
1392}
1393
1394int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1395 size_t len)
1396{
1397 return -EOPNOTSUPP;
1398}
1399
1400int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1401 size_t len, int flags)
1402{
1403 return -EOPNOTSUPP;
1404}
1405
1406int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1407{
1408 /* Mirror missing mmap method error code */
1409 return -ENODEV;
1410}
1411
1412ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1413{
1414 ssize_t res;
1415 struct msghdr msg = {.msg_flags = flags};
1416 struct kvec iov;
1417 char *kaddr = kmap(page);
1418 iov.iov_base = kaddr + offset;
1419 iov.iov_len = size;
1420 res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1421 kunmap(page);
1422 return res;
1423}
1424
1425/*
1426 * Default Socket Callbacks
1427 */
1428
1429static void sock_def_wakeup(struct sock *sk)
1430{
1431 read_lock(&sk->sk_callback_lock);
1432 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1433 wake_up_interruptible_all(sk->sk_sleep);
1434 read_unlock(&sk->sk_callback_lock);
1435}
1436
1437static void sock_def_error_report(struct sock *sk)
1438{
1439 read_lock(&sk->sk_callback_lock);
1440 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1441 wake_up_interruptible(sk->sk_sleep);
4ec93edb 1442 sk_wake_async(sk,0,POLL_ERR);
1da177e4
LT
1443 read_unlock(&sk->sk_callback_lock);
1444}
1445
1446static void sock_def_readable(struct sock *sk, int len)
1447{
1448 read_lock(&sk->sk_callback_lock);
1449 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1450 wake_up_interruptible(sk->sk_sleep);
1451 sk_wake_async(sk,1,POLL_IN);
1452 read_unlock(&sk->sk_callback_lock);
1453}
1454
1455static void sock_def_write_space(struct sock *sk)
1456{
1457 read_lock(&sk->sk_callback_lock);
1458
1459 /* Do not wake up a writer until he can make "significant"
1460 * progress. --DaveM
1461 */
e71a4783 1462 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1da177e4
LT
1463 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1464 wake_up_interruptible(sk->sk_sleep);
1465
1466 /* Should agree with poll, otherwise some programs break */
1467 if (sock_writeable(sk))
1468 sk_wake_async(sk, 2, POLL_OUT);
1469 }
1470
1471 read_unlock(&sk->sk_callback_lock);
1472}
1473
1474static void sock_def_destruct(struct sock *sk)
1475{
a51482bd 1476 kfree(sk->sk_protinfo);
1da177e4
LT
1477}
1478
1479void sk_send_sigurg(struct sock *sk)
1480{
1481 if (sk->sk_socket && sk->sk_socket->file)
1482 if (send_sigurg(&sk->sk_socket->file->f_owner))
1483 sk_wake_async(sk, 3, POLL_PRI);
1484}
1485
1486void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1487 unsigned long expires)
1488{
1489 if (!mod_timer(timer, expires))
1490 sock_hold(sk);
1491}
1492
1493EXPORT_SYMBOL(sk_reset_timer);
1494
1495void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1496{
1497 if (timer_pending(timer) && del_timer(timer))
1498 __sock_put(sk);
1499}
1500
1501EXPORT_SYMBOL(sk_stop_timer);
1502
1503void sock_init_data(struct socket *sock, struct sock *sk)
1504{
1505 skb_queue_head_init(&sk->sk_receive_queue);
1506 skb_queue_head_init(&sk->sk_write_queue);
1507 skb_queue_head_init(&sk->sk_error_queue);
97fc2f08
CL
1508#ifdef CONFIG_NET_DMA
1509 skb_queue_head_init(&sk->sk_async_wait_queue);
1510#endif
1da177e4
LT
1511
1512 sk->sk_send_head = NULL;
1513
1514 init_timer(&sk->sk_timer);
4ec93edb 1515
1da177e4
LT
1516 sk->sk_allocation = GFP_KERNEL;
1517 sk->sk_rcvbuf = sysctl_rmem_default;
1518 sk->sk_sndbuf = sysctl_wmem_default;
1519 sk->sk_state = TCP_CLOSE;
1520 sk->sk_socket = sock;
1521
1522 sock_set_flag(sk, SOCK_ZAPPED);
1523
e71a4783 1524 if (sock) {
1da177e4
LT
1525 sk->sk_type = sock->type;
1526 sk->sk_sleep = &sock->wait;
1527 sock->sk = sk;
1528 } else
1529 sk->sk_sleep = NULL;
1530
1531 rwlock_init(&sk->sk_dst_lock);
1532 rwlock_init(&sk->sk_callback_lock);
da21f24d
IM
1533 lockdep_set_class(&sk->sk_callback_lock,
1534 af_callback_keys + sk->sk_family);
1da177e4
LT
1535
1536 sk->sk_state_change = sock_def_wakeup;
1537 sk->sk_data_ready = sock_def_readable;
1538 sk->sk_write_space = sock_def_write_space;
1539 sk->sk_error_report = sock_def_error_report;
1540 sk->sk_destruct = sock_def_destruct;
1541
1542 sk->sk_sndmsg_page = NULL;
1543 sk->sk_sndmsg_off = 0;
1544
1545 sk->sk_peercred.pid = 0;
1546 sk->sk_peercred.uid = -1;
1547 sk->sk_peercred.gid = -1;
1548 sk->sk_write_pending = 0;
1549 sk->sk_rcvlowat = 1;
1550 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
1551 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
1552
b7aa0bf7 1553 sk->sk_stamp = ktime_set(-1L, -1L);
1da177e4
LT
1554
1555 atomic_set(&sk->sk_refcnt, 1);
1556}
1557
fcc70d5f 1558void fastcall lock_sock_nested(struct sock *sk, int subclass)
1da177e4
LT
1559{
1560 might_sleep();
a5b5bb9a 1561 spin_lock_bh(&sk->sk_lock.slock);
1da177e4
LT
1562 if (sk->sk_lock.owner)
1563 __lock_sock(sk);
1564 sk->sk_lock.owner = (void *)1;
a5b5bb9a
IM
1565 spin_unlock(&sk->sk_lock.slock);
1566 /*
1567 * The sk_lock has mutex_lock() semantics here:
1568 */
fcc70d5f 1569 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
a5b5bb9a 1570 local_bh_enable();
1da177e4
LT
1571}
1572
fcc70d5f 1573EXPORT_SYMBOL(lock_sock_nested);
1da177e4
LT
1574
1575void fastcall release_sock(struct sock *sk)
1576{
a5b5bb9a
IM
1577 /*
1578 * The sk_lock has mutex_unlock() semantics:
1579 */
1580 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
1581
1582 spin_lock_bh(&sk->sk_lock.slock);
1da177e4
LT
1583 if (sk->sk_backlog.tail)
1584 __release_sock(sk);
1585 sk->sk_lock.owner = NULL;
a5b5bb9a
IM
1586 if (waitqueue_active(&sk->sk_lock.wq))
1587 wake_up(&sk->sk_lock.wq);
1588 spin_unlock_bh(&sk->sk_lock.slock);
1da177e4
LT
1589}
1590EXPORT_SYMBOL(release_sock);
1591
1592int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
4ec93edb 1593{
b7aa0bf7 1594 struct timeval tv;
1da177e4
LT
1595 if (!sock_flag(sk, SOCK_TIMESTAMP))
1596 sock_enable_timestamp(sk);
b7aa0bf7
ED
1597 tv = ktime_to_timeval(sk->sk_stamp);
1598 if (tv.tv_sec == -1)
1da177e4 1599 return -ENOENT;
b7aa0bf7
ED
1600 if (tv.tv_sec == 0) {
1601 sk->sk_stamp = ktime_get_real();
1602 tv = ktime_to_timeval(sk->sk_stamp);
1603 }
1604 return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
4ec93edb 1605}
1da177e4
LT
1606EXPORT_SYMBOL(sock_get_timestamp);
1607
ae40eb1e
ED
1608int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
1609{
1610 struct timespec ts;
1611 if (!sock_flag(sk, SOCK_TIMESTAMP))
1612 sock_enable_timestamp(sk);
1613 ts = ktime_to_timespec(sk->sk_stamp);
1614 if (ts.tv_sec == -1)
1615 return -ENOENT;
1616 if (ts.tv_sec == 0) {
1617 sk->sk_stamp = ktime_get_real();
1618 ts = ktime_to_timespec(sk->sk_stamp);
1619 }
1620 return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
1621}
1622EXPORT_SYMBOL(sock_get_timestampns);
1623
1da177e4 1624void sock_enable_timestamp(struct sock *sk)
4ec93edb
YH
1625{
1626 if (!sock_flag(sk, SOCK_TIMESTAMP)) {
1da177e4
LT
1627 sock_set_flag(sk, SOCK_TIMESTAMP);
1628 net_enable_timestamp();
1629 }
1630}
4ec93edb 1631EXPORT_SYMBOL(sock_enable_timestamp);
1da177e4
LT
1632
1633/*
1634 * Get a socket option on an socket.
1635 *
1636 * FIX: POSIX 1003.1g is very ambiguous here. It states that
1637 * asynchronous errors should be reported by getsockopt. We assume
1638 * this means if you specify SO_ERROR (otherwise whats the point of it).
1639 */
1640int sock_common_getsockopt(struct socket *sock, int level, int optname,
1641 char __user *optval, int __user *optlen)
1642{
1643 struct sock *sk = sock->sk;
1644
1645 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1646}
1647
1648EXPORT_SYMBOL(sock_common_getsockopt);
1649
3fdadf7d 1650#ifdef CONFIG_COMPAT
543d9cfe
ACM
1651int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
1652 char __user *optval, int __user *optlen)
3fdadf7d
DM
1653{
1654 struct sock *sk = sock->sk;
1655
1e51f951 1656 if (sk->sk_prot->compat_getsockopt != NULL)
543d9cfe
ACM
1657 return sk->sk_prot->compat_getsockopt(sk, level, optname,
1658 optval, optlen);
3fdadf7d
DM
1659 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1660}
1661EXPORT_SYMBOL(compat_sock_common_getsockopt);
1662#endif
1663
1da177e4
LT
1664int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1665 struct msghdr *msg, size_t size, int flags)
1666{
1667 struct sock *sk = sock->sk;
1668 int addr_len = 0;
1669 int err;
1670
1671 err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
1672 flags & ~MSG_DONTWAIT, &addr_len);
1673 if (err >= 0)
1674 msg->msg_namelen = addr_len;
1675 return err;
1676}
1677
1678EXPORT_SYMBOL(sock_common_recvmsg);
1679
1680/*
1681 * Set socket options on an inet socket.
1682 */
1683int sock_common_setsockopt(struct socket *sock, int level, int optname,
1684 char __user *optval, int optlen)
1685{
1686 struct sock *sk = sock->sk;
1687
1688 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1689}
1690
1691EXPORT_SYMBOL(sock_common_setsockopt);
1692
3fdadf7d 1693#ifdef CONFIG_COMPAT
543d9cfe
ACM
1694int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
1695 char __user *optval, int optlen)
3fdadf7d
DM
1696{
1697 struct sock *sk = sock->sk;
1698
543d9cfe
ACM
1699 if (sk->sk_prot->compat_setsockopt != NULL)
1700 return sk->sk_prot->compat_setsockopt(sk, level, optname,
1701 optval, optlen);
3fdadf7d
DM
1702 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1703}
1704EXPORT_SYMBOL(compat_sock_common_setsockopt);
1705#endif
1706
1da177e4
LT
1707void sk_common_release(struct sock *sk)
1708{
1709 if (sk->sk_prot->destroy)
1710 sk->sk_prot->destroy(sk);
1711
1712 /*
1713 * Observation: when sock_common_release is called, processes have
1714 * no access to socket. But net still has.
1715 * Step one, detach it from networking:
1716 *
1717 * A. Remove from hash tables.
1718 */
1719
1720 sk->sk_prot->unhash(sk);
1721
1722 /*
1723 * In this point socket cannot receive new packets, but it is possible
1724 * that some packets are in flight because some CPU runs receiver and
1725 * did hash table lookup before we unhashed socket. They will achieve
1726 * receive queue and will be purged by socket destructor.
1727 *
1728 * Also we still have packets pending on receive queue and probably,
1729 * our own packets waiting in device queues. sock_destroy will drain
1730 * receive queue, but transmitted packets will delay socket destruction
1731 * until the last reference will be released.
1732 */
1733
1734 sock_orphan(sk);
1735
1736 xfrm_sk_free_policy(sk);
1737
e6848976 1738 sk_refcnt_debug_release(sk);
1da177e4
LT
1739 sock_put(sk);
1740}
1741
1742EXPORT_SYMBOL(sk_common_release);
1743
1744static DEFINE_RWLOCK(proto_list_lock);
1745static LIST_HEAD(proto_list);
1746
1747int proto_register(struct proto *prot, int alloc_slab)
1748{
8feaf0c0
ACM
1749 char *request_sock_slab_name = NULL;
1750 char *timewait_sock_slab_name;
1da177e4
LT
1751 int rc = -ENOBUFS;
1752
1da177e4
LT
1753 if (alloc_slab) {
1754 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
1755 SLAB_HWCACHE_ALIGN, NULL, NULL);
1756
1757 if (prot->slab == NULL) {
1758 printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1759 prot->name);
2a278051 1760 goto out;
1da177e4 1761 }
2e6599cb
ACM
1762
1763 if (prot->rsk_prot != NULL) {
1764 static const char mask[] = "request_sock_%s";
1765
1766 request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1767 if (request_sock_slab_name == NULL)
1768 goto out_free_sock_slab;
1769
1770 sprintf(request_sock_slab_name, mask, prot->name);
1771 prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
1772 prot->rsk_prot->obj_size, 0,
1773 SLAB_HWCACHE_ALIGN, NULL, NULL);
1774
1775 if (prot->rsk_prot->slab == NULL) {
1776 printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
1777 prot->name);
1778 goto out_free_request_sock_slab_name;
1779 }
1780 }
8feaf0c0 1781
6d6ee43e 1782 if (prot->twsk_prot != NULL) {
8feaf0c0
ACM
1783 static const char mask[] = "tw_sock_%s";
1784
1785 timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1786
1787 if (timewait_sock_slab_name == NULL)
1788 goto out_free_request_sock_slab;
1789
1790 sprintf(timewait_sock_slab_name, mask, prot->name);
6d6ee43e
ACM
1791 prot->twsk_prot->twsk_slab =
1792 kmem_cache_create(timewait_sock_slab_name,
1793 prot->twsk_prot->twsk_obj_size,
1794 0, SLAB_HWCACHE_ALIGN,
1795 NULL, NULL);
1796 if (prot->twsk_prot->twsk_slab == NULL)
8feaf0c0
ACM
1797 goto out_free_timewait_sock_slab_name;
1798 }
1da177e4
LT
1799 }
1800
2a278051 1801 write_lock(&proto_list_lock);
1da177e4 1802 list_add(&prot->node, &proto_list);
1da177e4 1803 write_unlock(&proto_list_lock);
2a278051
ACM
1804 rc = 0;
1805out:
1da177e4 1806 return rc;
8feaf0c0
ACM
1807out_free_timewait_sock_slab_name:
1808 kfree(timewait_sock_slab_name);
1809out_free_request_sock_slab:
1810 if (prot->rsk_prot && prot->rsk_prot->slab) {
1811 kmem_cache_destroy(prot->rsk_prot->slab);
1812 prot->rsk_prot->slab = NULL;
1813 }
2e6599cb
ACM
1814out_free_request_sock_slab_name:
1815 kfree(request_sock_slab_name);
1816out_free_sock_slab:
1817 kmem_cache_destroy(prot->slab);
1818 prot->slab = NULL;
1819 goto out;
1da177e4
LT
1820}
1821
1822EXPORT_SYMBOL(proto_register);
1823
1824void proto_unregister(struct proto *prot)
1825{
1826 write_lock(&proto_list_lock);
0a3f4358
PM
1827 list_del(&prot->node);
1828 write_unlock(&proto_list_lock);
1da177e4
LT
1829
1830 if (prot->slab != NULL) {
1831 kmem_cache_destroy(prot->slab);
1832 prot->slab = NULL;
1833 }
1834
2e6599cb
ACM
1835 if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
1836 const char *name = kmem_cache_name(prot->rsk_prot->slab);
1837
1838 kmem_cache_destroy(prot->rsk_prot->slab);
1839 kfree(name);
1840 prot->rsk_prot->slab = NULL;
1841 }
1842
6d6ee43e
ACM
1843 if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
1844 const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
8feaf0c0 1845
6d6ee43e 1846 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
8feaf0c0 1847 kfree(name);
6d6ee43e 1848 prot->twsk_prot->twsk_slab = NULL;
8feaf0c0 1849 }
1da177e4
LT
1850}
1851
1852EXPORT_SYMBOL(proto_unregister);
1853
1854#ifdef CONFIG_PROC_FS
1da177e4
LT
1855static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
1856{
1857 read_lock(&proto_list_lock);
60f0438a 1858 return seq_list_start_head(&proto_list, *pos);
1da177e4
LT
1859}
1860
1861static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1862{
60f0438a 1863 return seq_list_next(v, &proto_list, pos);
1da177e4
LT
1864}
1865
1866static void proto_seq_stop(struct seq_file *seq, void *v)
1867{
1868 read_unlock(&proto_list_lock);
1869}
1870
1871static char proto_method_implemented(const void *method)
1872{
1873 return method == NULL ? 'n' : 'y';
1874}
1875
1876static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
1877{
1878 seq_printf(seq, "%-9s %4u %6d %6d %-3s %6u %-3s %-10s "
1879 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
1880 proto->name,
1881 proto->obj_size,
1882 proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
1883 proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
1884 proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
1885 proto->max_header,
1886 proto->slab == NULL ? "no" : "yes",
1887 module_name(proto->owner),
1888 proto_method_implemented(proto->close),
1889 proto_method_implemented(proto->connect),
1890 proto_method_implemented(proto->disconnect),
1891 proto_method_implemented(proto->accept),
1892 proto_method_implemented(proto->ioctl),
1893 proto_method_implemented(proto->init),
1894 proto_method_implemented(proto->destroy),
1895 proto_method_implemented(proto->shutdown),
1896 proto_method_implemented(proto->setsockopt),
1897 proto_method_implemented(proto->getsockopt),
1898 proto_method_implemented(proto->sendmsg),
1899 proto_method_implemented(proto->recvmsg),
1900 proto_method_implemented(proto->sendpage),
1901 proto_method_implemented(proto->bind),
1902 proto_method_implemented(proto->backlog_rcv),
1903 proto_method_implemented(proto->hash),
1904 proto_method_implemented(proto->unhash),
1905 proto_method_implemented(proto->get_port),
1906 proto_method_implemented(proto->enter_memory_pressure));
1907}
1908
1909static int proto_seq_show(struct seq_file *seq, void *v)
1910{
60f0438a 1911 if (v == &proto_list)
1da177e4
LT
1912 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
1913 "protocol",
1914 "size",
1915 "sockets",
1916 "memory",
1917 "press",
1918 "maxhdr",
1919 "slab",
1920 "module",
1921 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
1922 else
60f0438a 1923 proto_seq_printf(seq, list_entry(v, struct proto, node));
1da177e4
LT
1924 return 0;
1925}
1926
f690808e 1927static const struct seq_operations proto_seq_ops = {
1da177e4
LT
1928 .start = proto_seq_start,
1929 .next = proto_seq_next,
1930 .stop = proto_seq_stop,
1931 .show = proto_seq_show,
1932};
1933
1934static int proto_seq_open(struct inode *inode, struct file *file)
1935{
1936 return seq_open(file, &proto_seq_ops);
1937}
1938
9a32144e 1939static const struct file_operations proto_seq_fops = {
1da177e4
LT
1940 .owner = THIS_MODULE,
1941 .open = proto_seq_open,
1942 .read = seq_read,
1943 .llseek = seq_lseek,
1944 .release = seq_release,
1945};
1946
1947static int __init proto_init(void)
1948{
1949 /* register /proc/net/protocols */
1950 return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
1951}
1952
1953subsys_initcall(proto_init);
1954
1955#endif /* PROC_FS */
1956
1957EXPORT_SYMBOL(sk_alloc);
1958EXPORT_SYMBOL(sk_free);
1959EXPORT_SYMBOL(sk_send_sigurg);
1960EXPORT_SYMBOL(sock_alloc_send_skb);
1961EXPORT_SYMBOL(sock_init_data);
1962EXPORT_SYMBOL(sock_kfree_s);
1963EXPORT_SYMBOL(sock_kmalloc);
1964EXPORT_SYMBOL(sock_no_accept);
1965EXPORT_SYMBOL(sock_no_bind);
1966EXPORT_SYMBOL(sock_no_connect);
1967EXPORT_SYMBOL(sock_no_getname);
1968EXPORT_SYMBOL(sock_no_getsockopt);
1969EXPORT_SYMBOL(sock_no_ioctl);
1970EXPORT_SYMBOL(sock_no_listen);
1971EXPORT_SYMBOL(sock_no_mmap);
1972EXPORT_SYMBOL(sock_no_poll);
1973EXPORT_SYMBOL(sock_no_recvmsg);
1974EXPORT_SYMBOL(sock_no_sendmsg);
1975EXPORT_SYMBOL(sock_no_sendpage);
1976EXPORT_SYMBOL(sock_no_setsockopt);
1977EXPORT_SYMBOL(sock_no_shutdown);
1978EXPORT_SYMBOL(sock_no_socketpair);
1979EXPORT_SYMBOL(sock_rfree);
1980EXPORT_SYMBOL(sock_setsockopt);
1981EXPORT_SYMBOL(sock_wfree);
1982EXPORT_SYMBOL(sock_wmalloc);
1983EXPORT_SYMBOL(sock_i_uid);
1984EXPORT_SYMBOL(sock_i_ino);
1da177e4 1985EXPORT_SYMBOL(sysctl_optmem_max);
6baf1f41 1986#ifdef CONFIG_SYSCTL
1da177e4
LT
1987EXPORT_SYMBOL(sysctl_rmem_max);
1988EXPORT_SYMBOL(sysctl_wmem_max);
1989#endif