[NET] Cleanup: Use sock_owned_by_user() macro
[linux-2.6-block.git] / net / core / sock.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Generic socket support routines. Memory allocators, socket lock/release
7 * handler for protocols to use and generic option handler.
8 *
9 *
10 * Version: $Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
11 *
02c30a84 12 * Authors: Ross Biro
1da177e4
LT
13 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14 * Florian La Roche, <flla@stud.uni-sb.de>
15 * Alan Cox, <A.Cox@swansea.ac.uk>
16 *
17 * Fixes:
18 * Alan Cox : Numerous verify_area() problems
19 * Alan Cox : Connecting on a connecting socket
20 * now returns an error for tcp.
21 * Alan Cox : sock->protocol is set correctly.
22 * and is not sometimes left as 0.
23 * Alan Cox : connect handles icmp errors on a
24 * connect properly. Unfortunately there
25 * is a restart syscall nasty there. I
26 * can't match BSD without hacking the C
27 * library. Ideas urgently sought!
28 * Alan Cox : Disallow bind() to addresses that are
29 * not ours - especially broadcast ones!!
30 * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost)
31 * Alan Cox : sock_wfree/sock_rfree don't destroy sockets,
32 * instead they leave that for the DESTROY timer.
33 * Alan Cox : Clean up error flag in accept
34 * Alan Cox : TCP ack handling is buggy, the DESTROY timer
35 * was buggy. Put a remove_sock() in the handler
36 * for memory when we hit 0. Also altered the timer
4ec93edb 37 * code. The ACK stuff can wait and needs major
1da177e4
LT
38 * TCP layer surgery.
39 * Alan Cox : Fixed TCP ack bug, removed remove sock
40 * and fixed timer/inet_bh race.
41 * Alan Cox : Added zapped flag for TCP
42 * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code
43 * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44 * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources
45 * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing.
46 * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47 * Rick Sladkey : Relaxed UDP rules for matching packets.
48 * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support
49 * Pauline Middelink : identd support
50 * Alan Cox : Fixed connect() taking signals I think.
51 * Alan Cox : SO_LINGER supported
52 * Alan Cox : Error reporting fixes
53 * Anonymous : inet_create tidied up (sk->reuse setting)
54 * Alan Cox : inet sockets don't set sk->type!
55 * Alan Cox : Split socket option code
56 * Alan Cox : Callbacks
57 * Alan Cox : Nagle flag for Charles & Johannes stuff
58 * Alex : Removed restriction on inet fioctl
59 * Alan Cox : Splitting INET from NET core
60 * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt()
61 * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code
62 * Alan Cox : Split IP from generic code
63 * Alan Cox : New kfree_skbmem()
64 * Alan Cox : Make SO_DEBUG superuser only.
65 * Alan Cox : Allow anyone to clear SO_DEBUG
66 * (compatibility fix)
67 * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput.
68 * Alan Cox : Allocator for a socket is settable.
69 * Alan Cox : SO_ERROR includes soft errors.
70 * Alan Cox : Allow NULL arguments on some SO_ opts
71 * Alan Cox : Generic socket allocation to make hooks
72 * easier (suggested by Craig Metz).
73 * Michael Pall : SO_ERROR returns positive errno again
74 * Steve Whitehouse: Added default destructor to free
75 * protocol private data.
76 * Steve Whitehouse: Added various other default routines
77 * common to several socket families.
78 * Chris Evans : Call suser() check last on F_SETOWN
79 * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
80 * Andi Kleen : Add sock_kmalloc()/sock_kfree_s()
81 * Andi Kleen : Fix write_space callback
82 * Chris Evans : Security fixes - signedness again
83 * Arnaldo C. Melo : cleanups, use skb_queue_purge
84 *
85 * To Fix:
86 *
87 *
88 * This program is free software; you can redistribute it and/or
89 * modify it under the terms of the GNU General Public License
90 * as published by the Free Software Foundation; either version
91 * 2 of the License, or (at your option) any later version.
92 */
93
4fc268d2 94#include <linux/capability.h>
1da177e4
LT
95#include <linux/errno.h>
96#include <linux/types.h>
97#include <linux/socket.h>
98#include <linux/in.h>
99#include <linux/kernel.h>
1da177e4
LT
100#include <linux/module.h>
101#include <linux/proc_fs.h>
102#include <linux/seq_file.h>
103#include <linux/sched.h>
104#include <linux/timer.h>
105#include <linux/string.h>
106#include <linux/sockios.h>
107#include <linux/net.h>
108#include <linux/mm.h>
109#include <linux/slab.h>
110#include <linux/interrupt.h>
111#include <linux/poll.h>
112#include <linux/tcp.h>
113#include <linux/init.h>
a1f8e7f7 114#include <linux/highmem.h>
1da177e4
LT
115
116#include <asm/uaccess.h>
117#include <asm/system.h>
118
119#include <linux/netdevice.h>
120#include <net/protocol.h>
121#include <linux/skbuff.h>
2e6599cb 122#include <net/request_sock.h>
1da177e4
LT
123#include <net/sock.h>
124#include <net/xfrm.h>
125#include <linux/ipsec.h>
126
127#include <linux/filter.h>
128
129#ifdef CONFIG_INET
130#include <net/tcp.h>
131#endif
132
da21f24d
IM
133/*
134 * Each address family might have different locking rules, so we have
135 * one slock key per address family:
136 */
a5b5bb9a
IM
137static struct lock_class_key af_family_keys[AF_MAX];
138static struct lock_class_key af_family_slock_keys[AF_MAX];
139
140#ifdef CONFIG_DEBUG_LOCK_ALLOC
141/*
142 * Make lock validator output more readable. (we pre-construct these
143 * strings build-time, so that runtime initialization of socket
144 * locks is fast):
145 */
146static const char *af_family_key_strings[AF_MAX+1] = {
147 "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" ,
148 "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK",
149 "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" ,
150 "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" ,
151 "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" ,
152 "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" ,
153 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
154 "sk_lock-21" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
155 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
156 "sk_lock-27" , "sk_lock-28" , "sk_lock-29" ,
17926a79
DH
157 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
158 "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX"
a5b5bb9a
IM
159};
160static const char *af_family_slock_key_strings[AF_MAX+1] = {
161 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
162 "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK",
163 "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" ,
164 "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" ,
165 "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" ,
166 "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" ,
167 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
168 "slock-21" , "slock-AF_SNA" , "slock-AF_IRDA" ,
169 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
170 "slock-27" , "slock-28" , "slock-29" ,
17926a79
DH
171 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
172 "slock-AF_RXRPC" , "slock-AF_MAX"
a5b5bb9a 173};
443aef0e
PZ
174static const char *af_family_clock_key_strings[AF_MAX+1] = {
175 "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
176 "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK",
177 "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" ,
178 "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" ,
179 "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" ,
180 "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" ,
181 "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" ,
182 "clock-21" , "clock-AF_SNA" , "clock-AF_IRDA" ,
183 "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" ,
184 "clock-27" , "clock-28" , "clock-29" ,
e51f802b
DH
185 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
186 "clock-AF_RXRPC" , "clock-AF_MAX"
443aef0e 187};
a5b5bb9a 188#endif
da21f24d
IM
189
190/*
191 * sk_callback_lock locking rules are per-address-family,
192 * so split the lock classes by using a per-AF key:
193 */
194static struct lock_class_key af_callback_keys[AF_MAX];
195
1da177e4
LT
196/* Take into consideration the size of the struct sk_buff overhead in the
197 * determination of these values, since that is non-constant across
198 * platforms. This makes socket queueing behavior and performance
199 * not depend upon such differences.
200 */
201#define _SK_MEM_PACKETS 256
202#define _SK_MEM_OVERHEAD (sizeof(struct sk_buff) + 256)
203#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
204#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
205
206/* Run time adjustable parameters. */
ab32ea5d
BH
207__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
208__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
209__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
210__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
1da177e4
LT
211
212/* Maximal space eaten by iovec or ancilliary data plus some space */
ab32ea5d 213int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
1da177e4
LT
214
215static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
216{
217 struct timeval tv;
218
219 if (optlen < sizeof(tv))
220 return -EINVAL;
221 if (copy_from_user(&tv, optval, sizeof(tv)))
222 return -EFAULT;
ba78073e
VA
223 if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
224 return -EDOM;
1da177e4 225
ba78073e 226 if (tv.tv_sec < 0) {
6f11df83
AM
227 static int warned __read_mostly;
228
ba78073e
VA
229 *timeo_p = 0;
230 if (warned < 10 && net_ratelimit())
231 warned++;
232 printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) "
233 "tries to set negative timeout\n",
40b77c94 234 current->comm, current->pid);
ba78073e
VA
235 return 0;
236 }
1da177e4
LT
237 *timeo_p = MAX_SCHEDULE_TIMEOUT;
238 if (tv.tv_sec == 0 && tv.tv_usec == 0)
239 return 0;
240 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
241 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
242 return 0;
243}
244
245static void sock_warn_obsolete_bsdism(const char *name)
246{
247 static int warned;
248 static char warncomm[TASK_COMM_LEN];
4ec93edb
YH
249 if (strcmp(warncomm, current->comm) && warned < 5) {
250 strcpy(warncomm, current->comm);
1da177e4
LT
251 printk(KERN_WARNING "process `%s' is using obsolete "
252 "%s SO_BSDCOMPAT\n", warncomm, name);
253 warned++;
254 }
255}
256
257static void sock_disable_timestamp(struct sock *sk)
4ec93edb
YH
258{
259 if (sock_flag(sk, SOCK_TIMESTAMP)) {
1da177e4
LT
260 sock_reset_flag(sk, SOCK_TIMESTAMP);
261 net_disable_timestamp();
262 }
263}
264
265
f0088a50
DV
266int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
267{
268 int err = 0;
269 int skb_len;
270
271 /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
272 number of warnings when compiling with -W --ANK
273 */
274 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
275 (unsigned)sk->sk_rcvbuf) {
276 err = -ENOMEM;
277 goto out;
278 }
279
fda9ef5d 280 err = sk_filter(sk, skb);
f0088a50
DV
281 if (err)
282 goto out;
283
284 skb->dev = NULL;
285 skb_set_owner_r(skb, sk);
286
287 /* Cache the SKB length before we tack it onto the receive
288 * queue. Once it is added it no longer belongs to us and
289 * may be freed by other threads of control pulling packets
290 * from the queue.
291 */
292 skb_len = skb->len;
293
294 skb_queue_tail(&sk->sk_receive_queue, skb);
295
296 if (!sock_flag(sk, SOCK_DEAD))
297 sk->sk_data_ready(sk, skb_len);
298out:
299 return err;
300}
301EXPORT_SYMBOL(sock_queue_rcv_skb);
302
58a5a7b9 303int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
f0088a50
DV
304{
305 int rc = NET_RX_SUCCESS;
306
fda9ef5d 307 if (sk_filter(sk, skb))
f0088a50
DV
308 goto discard_and_relse;
309
310 skb->dev = NULL;
311
58a5a7b9
ACM
312 if (nested)
313 bh_lock_sock_nested(sk);
314 else
315 bh_lock_sock(sk);
a5b5bb9a
IM
316 if (!sock_owned_by_user(sk)) {
317 /*
318 * trylock + unlock semantics:
319 */
320 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
321
f0088a50 322 rc = sk->sk_backlog_rcv(sk, skb);
a5b5bb9a
IM
323
324 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
325 } else
f0088a50
DV
326 sk_add_backlog(sk, skb);
327 bh_unlock_sock(sk);
328out:
329 sock_put(sk);
330 return rc;
331discard_and_relse:
332 kfree_skb(skb);
333 goto out;
334}
335EXPORT_SYMBOL(sk_receive_skb);
336
337struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
338{
339 struct dst_entry *dst = sk->sk_dst_cache;
340
341 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
342 sk->sk_dst_cache = NULL;
343 dst_release(dst);
344 return NULL;
345 }
346
347 return dst;
348}
349EXPORT_SYMBOL(__sk_dst_check);
350
351struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
352{
353 struct dst_entry *dst = sk_dst_get(sk);
354
355 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
356 sk_dst_reset(sk);
357 dst_release(dst);
358 return NULL;
359 }
360
361 return dst;
362}
363EXPORT_SYMBOL(sk_dst_check);
364
4878809f
DM
365static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
366{
367 int ret = -ENOPROTOOPT;
368#ifdef CONFIG_NETDEVICES
369 char devname[IFNAMSIZ];
370 int index;
371
372 /* Sorry... */
373 ret = -EPERM;
374 if (!capable(CAP_NET_RAW))
375 goto out;
376
377 ret = -EINVAL;
378 if (optlen < 0)
379 goto out;
380
381 /* Bind this socket to a particular device like "eth0",
382 * as specified in the passed interface name. If the
383 * name is "" or the option length is zero the socket
384 * is not bound.
385 */
386 if (optlen > IFNAMSIZ - 1)
387 optlen = IFNAMSIZ - 1;
388 memset(devname, 0, sizeof(devname));
389
390 ret = -EFAULT;
391 if (copy_from_user(devname, optval, optlen))
392 goto out;
393
394 if (devname[0] == '\0') {
395 index = 0;
396 } else {
397 struct net_device *dev = dev_get_by_name(devname);
398
399 ret = -ENODEV;
400 if (!dev)
401 goto out;
402
403 index = dev->ifindex;
404 dev_put(dev);
405 }
406
407 lock_sock(sk);
408 sk->sk_bound_dev_if = index;
409 sk_dst_reset(sk);
410 release_sock(sk);
411
412 ret = 0;
413
414out:
415#endif
416
417 return ret;
418}
419
1da177e4
LT
420/*
421 * This is meant for all protocols to use and covers goings on
422 * at the socket level. Everything here is generic.
423 */
424
425int sock_setsockopt(struct socket *sock, int level, int optname,
426 char __user *optval, int optlen)
427{
428 struct sock *sk=sock->sk;
429 struct sk_filter *filter;
430 int val;
431 int valbool;
432 struct linger ling;
433 int ret = 0;
4ec93edb 434
1da177e4
LT
435 /*
436 * Options without arguments
437 */
438
439#ifdef SO_DONTLINGER /* Compatibility item... */
a77be819
KM
440 if (optname == SO_DONTLINGER) {
441 lock_sock(sk);
442 sock_reset_flag(sk, SOCK_LINGER);
443 release_sock(sk);
444 return 0;
1da177e4 445 }
a77be819 446#endif
4ec93edb 447
4878809f
DM
448 if (optname == SO_BINDTODEVICE)
449 return sock_bindtodevice(sk, optval, optlen);
450
e71a4783
SH
451 if (optlen < sizeof(int))
452 return -EINVAL;
4ec93edb 453
1da177e4
LT
454 if (get_user(val, (int __user *)optval))
455 return -EFAULT;
4ec93edb
YH
456
457 valbool = val?1:0;
1da177e4
LT
458
459 lock_sock(sk);
460
e71a4783
SH
461 switch(optname) {
462 case SO_DEBUG:
463 if (val && !capable(CAP_NET_ADMIN)) {
464 ret = -EACCES;
465 }
466 else if (valbool)
467 sock_set_flag(sk, SOCK_DBG);
468 else
469 sock_reset_flag(sk, SOCK_DBG);
470 break;
471 case SO_REUSEADDR:
472 sk->sk_reuse = valbool;
473 break;
474 case SO_TYPE:
475 case SO_ERROR:
476 ret = -ENOPROTOOPT;
477 break;
478 case SO_DONTROUTE:
479 if (valbool)
480 sock_set_flag(sk, SOCK_LOCALROUTE);
481 else
482 sock_reset_flag(sk, SOCK_LOCALROUTE);
483 break;
484 case SO_BROADCAST:
485 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
486 break;
487 case SO_SNDBUF:
488 /* Don't error on this BSD doesn't and if you think
489 about it this is right. Otherwise apps have to
490 play 'guess the biggest size' games. RCVBUF/SNDBUF
491 are treated in BSD as hints */
492
493 if (val > sysctl_wmem_max)
494 val = sysctl_wmem_max;
b0573dea 495set_sndbuf:
e71a4783
SH
496 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
497 if ((val * 2) < SOCK_MIN_SNDBUF)
498 sk->sk_sndbuf = SOCK_MIN_SNDBUF;
499 else
500 sk->sk_sndbuf = val * 2;
1da177e4 501
e71a4783
SH
502 /*
503 * Wake up sending tasks if we
504 * upped the value.
505 */
506 sk->sk_write_space(sk);
507 break;
1da177e4 508
e71a4783
SH
509 case SO_SNDBUFFORCE:
510 if (!capable(CAP_NET_ADMIN)) {
511 ret = -EPERM;
512 break;
513 }
514 goto set_sndbuf;
b0573dea 515
e71a4783
SH
516 case SO_RCVBUF:
517 /* Don't error on this BSD doesn't and if you think
518 about it this is right. Otherwise apps have to
519 play 'guess the biggest size' games. RCVBUF/SNDBUF
520 are treated in BSD as hints */
4ec93edb 521
e71a4783
SH
522 if (val > sysctl_rmem_max)
523 val = sysctl_rmem_max;
b0573dea 524set_rcvbuf:
e71a4783
SH
525 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
526 /*
527 * We double it on the way in to account for
528 * "struct sk_buff" etc. overhead. Applications
529 * assume that the SO_RCVBUF setting they make will
530 * allow that much actual data to be received on that
531 * socket.
532 *
533 * Applications are unaware that "struct sk_buff" and
534 * other overheads allocate from the receive buffer
535 * during socket buffer allocation.
536 *
537 * And after considering the possible alternatives,
538 * returning the value we actually used in getsockopt
539 * is the most desirable behavior.
540 */
541 if ((val * 2) < SOCK_MIN_RCVBUF)
542 sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
543 else
544 sk->sk_rcvbuf = val * 2;
545 break;
546
547 case SO_RCVBUFFORCE:
548 if (!capable(CAP_NET_ADMIN)) {
549 ret = -EPERM;
1da177e4 550 break;
e71a4783
SH
551 }
552 goto set_rcvbuf;
1da177e4 553
e71a4783 554 case SO_KEEPALIVE:
1da177e4 555#ifdef CONFIG_INET
e71a4783
SH
556 if (sk->sk_protocol == IPPROTO_TCP)
557 tcp_set_keepalive(sk, valbool);
1da177e4 558#endif
e71a4783
SH
559 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
560 break;
561
562 case SO_OOBINLINE:
563 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
564 break;
565
566 case SO_NO_CHECK:
567 sk->sk_no_check = valbool;
568 break;
569
570 case SO_PRIORITY:
571 if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
572 sk->sk_priority = val;
573 else
574 ret = -EPERM;
575 break;
576
577 case SO_LINGER:
578 if (optlen < sizeof(ling)) {
579 ret = -EINVAL; /* 1003.1g */
1da177e4 580 break;
e71a4783
SH
581 }
582 if (copy_from_user(&ling,optval,sizeof(ling))) {
583 ret = -EFAULT;
1da177e4 584 break;
e71a4783
SH
585 }
586 if (!ling.l_onoff)
587 sock_reset_flag(sk, SOCK_LINGER);
588 else {
1da177e4 589#if (BITS_PER_LONG == 32)
e71a4783
SH
590 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
591 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
1da177e4 592 else
e71a4783
SH
593#endif
594 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
595 sock_set_flag(sk, SOCK_LINGER);
596 }
597 break;
598
599 case SO_BSDCOMPAT:
600 sock_warn_obsolete_bsdism("setsockopt");
601 break;
602
603 case SO_PASSCRED:
604 if (valbool)
605 set_bit(SOCK_PASSCRED, &sock->flags);
606 else
607 clear_bit(SOCK_PASSCRED, &sock->flags);
608 break;
609
610 case SO_TIMESTAMP:
92f37fd2 611 case SO_TIMESTAMPNS:
e71a4783 612 if (valbool) {
92f37fd2
ED
613 if (optname == SO_TIMESTAMP)
614 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
615 else
616 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
e71a4783
SH
617 sock_set_flag(sk, SOCK_RCVTSTAMP);
618 sock_enable_timestamp(sk);
92f37fd2 619 } else {
e71a4783 620 sock_reset_flag(sk, SOCK_RCVTSTAMP);
92f37fd2
ED
621 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
622 }
e71a4783
SH
623 break;
624
625 case SO_RCVLOWAT:
626 if (val < 0)
627 val = INT_MAX;
628 sk->sk_rcvlowat = val ? : 1;
629 break;
630
631 case SO_RCVTIMEO:
632 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
633 break;
634
635 case SO_SNDTIMEO:
636 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
637 break;
1da177e4 638
e71a4783
SH
639 case SO_ATTACH_FILTER:
640 ret = -EINVAL;
641 if (optlen == sizeof(struct sock_fprog)) {
642 struct sock_fprog fprog;
1da177e4 643
e71a4783
SH
644 ret = -EFAULT;
645 if (copy_from_user(&fprog, optval, sizeof(fprog)))
1da177e4 646 break;
e71a4783
SH
647
648 ret = sk_attach_filter(&fprog, sk);
649 }
650 break;
651
652 case SO_DETACH_FILTER:
653 rcu_read_lock_bh();
654 filter = rcu_dereference(sk->sk_filter);
655 if (filter) {
656 rcu_assign_pointer(sk->sk_filter, NULL);
657 sk_filter_release(sk, filter);
fda9ef5d 658 rcu_read_unlock_bh();
1da177e4 659 break;
e71a4783
SH
660 }
661 rcu_read_unlock_bh();
662 ret = -ENONET;
663 break;
1da177e4 664
e71a4783
SH
665 case SO_PASSSEC:
666 if (valbool)
667 set_bit(SOCK_PASSSEC, &sock->flags);
668 else
669 clear_bit(SOCK_PASSSEC, &sock->flags);
670 break;
877ce7c1 671
1da177e4
LT
672 /* We implement the SO_SNDLOWAT etc to
673 not be settable (1003.1g 5.3) */
e71a4783
SH
674 default:
675 ret = -ENOPROTOOPT;
676 break;
4ec93edb 677 }
1da177e4
LT
678 release_sock(sk);
679 return ret;
680}
681
682
683int sock_getsockopt(struct socket *sock, int level, int optname,
684 char __user *optval, int __user *optlen)
685{
686 struct sock *sk = sock->sk;
4ec93edb 687
e71a4783 688 union {
4ec93edb
YH
689 int val;
690 struct linger ling;
1da177e4
LT
691 struct timeval tm;
692 } v;
4ec93edb 693
1da177e4
LT
694 unsigned int lv = sizeof(int);
695 int len;
4ec93edb 696
e71a4783 697 if (get_user(len, optlen))
4ec93edb 698 return -EFAULT;
e71a4783 699 if (len < 0)
1da177e4 700 return -EINVAL;
4ec93edb 701
e71a4783
SH
702 switch(optname) {
703 case SO_DEBUG:
704 v.val = sock_flag(sk, SOCK_DBG);
705 break;
706
707 case SO_DONTROUTE:
708 v.val = sock_flag(sk, SOCK_LOCALROUTE);
709 break;
710
711 case SO_BROADCAST:
712 v.val = !!sock_flag(sk, SOCK_BROADCAST);
713 break;
714
715 case SO_SNDBUF:
716 v.val = sk->sk_sndbuf;
717 break;
718
719 case SO_RCVBUF:
720 v.val = sk->sk_rcvbuf;
721 break;
722
723 case SO_REUSEADDR:
724 v.val = sk->sk_reuse;
725 break;
726
727 case SO_KEEPALIVE:
728 v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
729 break;
730
731 case SO_TYPE:
732 v.val = sk->sk_type;
733 break;
734
735 case SO_ERROR:
736 v.val = -sock_error(sk);
737 if (v.val==0)
738 v.val = xchg(&sk->sk_err_soft, 0);
739 break;
740
741 case SO_OOBINLINE:
742 v.val = !!sock_flag(sk, SOCK_URGINLINE);
743 break;
744
745 case SO_NO_CHECK:
746 v.val = sk->sk_no_check;
747 break;
748
749 case SO_PRIORITY:
750 v.val = sk->sk_priority;
751 break;
752
753 case SO_LINGER:
754 lv = sizeof(v.ling);
755 v.ling.l_onoff = !!sock_flag(sk, SOCK_LINGER);
756 v.ling.l_linger = sk->sk_lingertime / HZ;
757 break;
758
759 case SO_BSDCOMPAT:
760 sock_warn_obsolete_bsdism("getsockopt");
761 break;
762
763 case SO_TIMESTAMP:
92f37fd2
ED
764 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
765 !sock_flag(sk, SOCK_RCVTSTAMPNS);
766 break;
767
768 case SO_TIMESTAMPNS:
769 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
e71a4783
SH
770 break;
771
772 case SO_RCVTIMEO:
773 lv=sizeof(struct timeval);
774 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
775 v.tm.tv_sec = 0;
776 v.tm.tv_usec = 0;
777 } else {
778 v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
779 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
780 }
781 break;
782
783 case SO_SNDTIMEO:
784 lv=sizeof(struct timeval);
785 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
786 v.tm.tv_sec = 0;
787 v.tm.tv_usec = 0;
788 } else {
789 v.tm.tv_sec = sk->sk_sndtimeo / HZ;
790 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
791 }
792 break;
1da177e4 793
e71a4783
SH
794 case SO_RCVLOWAT:
795 v.val = sk->sk_rcvlowat;
796 break;
1da177e4 797
e71a4783
SH
798 case SO_SNDLOWAT:
799 v.val=1;
800 break;
1da177e4 801
e71a4783
SH
802 case SO_PASSCRED:
803 v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
804 break;
1da177e4 805
e71a4783
SH
806 case SO_PEERCRED:
807 if (len > sizeof(sk->sk_peercred))
808 len = sizeof(sk->sk_peercred);
809 if (copy_to_user(optval, &sk->sk_peercred, len))
810 return -EFAULT;
811 goto lenout;
1da177e4 812
e71a4783
SH
813 case SO_PEERNAME:
814 {
815 char address[128];
816
817 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
818 return -ENOTCONN;
819 if (lv < len)
820 return -EINVAL;
821 if (copy_to_user(optval, address, len))
822 return -EFAULT;
823 goto lenout;
824 }
1da177e4 825
e71a4783
SH
826 /* Dubious BSD thing... Probably nobody even uses it, but
827 * the UNIX standard wants it for whatever reason... -DaveM
828 */
829 case SO_ACCEPTCONN:
830 v.val = sk->sk_state == TCP_LISTEN;
831 break;
1da177e4 832
e71a4783
SH
833 case SO_PASSSEC:
834 v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
835 break;
877ce7c1 836
e71a4783
SH
837 case SO_PEERSEC:
838 return security_socket_getpeersec_stream(sock, optval, optlen, len);
1da177e4 839
e71a4783
SH
840 default:
841 return -ENOPROTOOPT;
1da177e4 842 }
e71a4783 843
1da177e4
LT
844 if (len > lv)
845 len = lv;
846 if (copy_to_user(optval, &v, len))
847 return -EFAULT;
848lenout:
4ec93edb
YH
849 if (put_user(len, optlen))
850 return -EFAULT;
851 return 0;
1da177e4
LT
852}
853
a5b5bb9a
IM
854/*
855 * Initialize an sk_lock.
856 *
857 * (We also register the sk_lock with the lock validator.)
858 */
b6f99a21 859static inline void sock_lock_init(struct sock *sk)
a5b5bb9a 860{
ed07536e
PZ
861 sock_lock_init_class_and_name(sk,
862 af_family_slock_key_strings[sk->sk_family],
863 af_family_slock_keys + sk->sk_family,
864 af_family_key_strings[sk->sk_family],
865 af_family_keys + sk->sk_family);
a5b5bb9a
IM
866}
867
1da177e4
LT
868/**
869 * sk_alloc - All socket objects are allocated here
4dc3b16b
PP
870 * @family: protocol family
871 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
872 * @prot: struct proto associated with this new sock instance
873 * @zero_it: if we should zero the newly allocated sock
1da177e4 874 */
dd0fc66f 875struct sock *sk_alloc(int family, gfp_t priority,
86a76caf 876 struct proto *prot, int zero_it)
1da177e4
LT
877{
878 struct sock *sk = NULL;
e18b890b 879 struct kmem_cache *slab = prot->slab;
1da177e4
LT
880
881 if (slab != NULL)
882 sk = kmem_cache_alloc(slab, priority);
883 else
884 sk = kmalloc(prot->obj_size, priority);
885
886 if (sk) {
887 if (zero_it) {
888 memset(sk, 0, prot->obj_size);
889 sk->sk_family = family;
476e19cf
ACM
890 /*
891 * See comment in struct sock definition to understand
892 * why we need sk_prot_creator -acme
893 */
894 sk->sk_prot = sk->sk_prot_creator = prot;
1da177e4
LT
895 sock_lock_init(sk);
896 }
4ec93edb 897
a79af59e
FF
898 if (security_sk_alloc(sk, family, priority))
899 goto out_free;
900
901 if (!try_module_get(prot->owner))
902 goto out_free;
1da177e4
LT
903 }
904 return sk;
a79af59e
FF
905
906out_free:
907 if (slab != NULL)
908 kmem_cache_free(slab, sk);
909 else
910 kfree(sk);
911 return NULL;
1da177e4
LT
912}
913
914void sk_free(struct sock *sk)
915{
916 struct sk_filter *filter;
476e19cf 917 struct module *owner = sk->sk_prot_creator->owner;
1da177e4
LT
918
919 if (sk->sk_destruct)
920 sk->sk_destruct(sk);
921
fda9ef5d 922 filter = rcu_dereference(sk->sk_filter);
1da177e4
LT
923 if (filter) {
924 sk_filter_release(sk, filter);
fda9ef5d 925 rcu_assign_pointer(sk->sk_filter, NULL);
1da177e4
LT
926 }
927
928 sock_disable_timestamp(sk);
929
930 if (atomic_read(&sk->sk_omem_alloc))
931 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
932 __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
933
934 security_sk_free(sk);
476e19cf
ACM
935 if (sk->sk_prot_creator->slab != NULL)
936 kmem_cache_free(sk->sk_prot_creator->slab, sk);
1da177e4
LT
937 else
938 kfree(sk);
939 module_put(owner);
940}
941
dd0fc66f 942struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
87d11ceb
ACM
943{
944 struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
945
946 if (newsk != NULL) {
947 struct sk_filter *filter;
948
892c141e 949 sock_copy(newsk, sk);
87d11ceb
ACM
950
951 /* SANITY */
952 sk_node_init(&newsk->sk_node);
953 sock_lock_init(newsk);
954 bh_lock_sock(newsk);
fa438ccf 955 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
87d11ceb
ACM
956
957 atomic_set(&newsk->sk_rmem_alloc, 0);
958 atomic_set(&newsk->sk_wmem_alloc, 0);
959 atomic_set(&newsk->sk_omem_alloc, 0);
960 skb_queue_head_init(&newsk->sk_receive_queue);
961 skb_queue_head_init(&newsk->sk_write_queue);
97fc2f08
CL
962#ifdef CONFIG_NET_DMA
963 skb_queue_head_init(&newsk->sk_async_wait_queue);
964#endif
87d11ceb
ACM
965
966 rwlock_init(&newsk->sk_dst_lock);
967 rwlock_init(&newsk->sk_callback_lock);
443aef0e
PZ
968 lockdep_set_class_and_name(&newsk->sk_callback_lock,
969 af_callback_keys + newsk->sk_family,
970 af_family_clock_key_strings[newsk->sk_family]);
87d11ceb
ACM
971
972 newsk->sk_dst_cache = NULL;
973 newsk->sk_wmem_queued = 0;
974 newsk->sk_forward_alloc = 0;
975 newsk->sk_send_head = NULL;
87d11ceb
ACM
976 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
977
978 sock_reset_flag(newsk, SOCK_DONE);
979 skb_queue_head_init(&newsk->sk_error_queue);
980
981 filter = newsk->sk_filter;
982 if (filter != NULL)
983 sk_filter_charge(newsk, filter);
984
985 if (unlikely(xfrm_sk_clone_policy(newsk))) {
986 /* It is still raw copy of parent, so invalidate
987 * destructor and make plain sk_free() */
988 newsk->sk_destruct = NULL;
989 sk_free(newsk);
990 newsk = NULL;
991 goto out;
992 }
993
994 newsk->sk_err = 0;
995 newsk->sk_priority = 0;
996 atomic_set(&newsk->sk_refcnt, 2);
997
998 /*
999 * Increment the counter in the same struct proto as the master
1000 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
1001 * is the same as sk->sk_prot->socks, as this field was copied
1002 * with memcpy).
1003 *
1004 * This _changes_ the previous behaviour, where
1005 * tcp_create_openreq_child always was incrementing the
1006 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
1007 * to be taken into account in all callers. -acme
1008 */
1009 sk_refcnt_debug_inc(newsk);
1010 newsk->sk_socket = NULL;
1011 newsk->sk_sleep = NULL;
1012
1013 if (newsk->sk_prot->sockets_allocated)
1014 atomic_inc(newsk->sk_prot->sockets_allocated);
1015 }
1016out:
1017 return newsk;
1018}
1019
1020EXPORT_SYMBOL_GPL(sk_clone);
1021
9958089a
AK
1022void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1023{
1024 __sk_dst_set(sk, dst);
1025 sk->sk_route_caps = dst->dev->features;
1026 if (sk->sk_route_caps & NETIF_F_GSO)
4fcd6b99 1027 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
9958089a
AK
1028 if (sk_can_gso(sk)) {
1029 if (dst->header_len)
1030 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1031 else
1032 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1033 }
1034}
1035EXPORT_SYMBOL_GPL(sk_setup_caps);
1036
1da177e4
LT
1037void __init sk_init(void)
1038{
1039 if (num_physpages <= 4096) {
1040 sysctl_wmem_max = 32767;
1041 sysctl_rmem_max = 32767;
1042 sysctl_wmem_default = 32767;
1043 sysctl_rmem_default = 32767;
1044 } else if (num_physpages >= 131072) {
1045 sysctl_wmem_max = 131071;
1046 sysctl_rmem_max = 131071;
1047 }
1048}
1049
1050/*
1051 * Simple resource managers for sockets.
1052 */
1053
1054
4ec93edb
YH
1055/*
1056 * Write buffer destructor automatically called from kfree_skb.
1da177e4
LT
1057 */
1058void sock_wfree(struct sk_buff *skb)
1059{
1060 struct sock *sk = skb->sk;
1061
1062 /* In case it might be waiting for more memory. */
1063 atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
1064 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
1065 sk->sk_write_space(sk);
1066 sock_put(sk);
1067}
1068
4ec93edb
YH
1069/*
1070 * Read buffer destructor automatically called from kfree_skb.
1da177e4
LT
1071 */
1072void sock_rfree(struct sk_buff *skb)
1073{
1074 struct sock *sk = skb->sk;
1075
1076 atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
1077}
1078
1079
1080int sock_i_uid(struct sock *sk)
1081{
1082 int uid;
1083
1084 read_lock(&sk->sk_callback_lock);
1085 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
1086 read_unlock(&sk->sk_callback_lock);
1087 return uid;
1088}
1089
1090unsigned long sock_i_ino(struct sock *sk)
1091{
1092 unsigned long ino;
1093
1094 read_lock(&sk->sk_callback_lock);
1095 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1096 read_unlock(&sk->sk_callback_lock);
1097 return ino;
1098}
1099
1100/*
1101 * Allocate a skb from the socket's send buffer.
1102 */
86a76caf 1103struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
dd0fc66f 1104 gfp_t priority)
1da177e4
LT
1105{
1106 if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1107 struct sk_buff * skb = alloc_skb(size, priority);
1108 if (skb) {
1109 skb_set_owner_w(skb, sk);
1110 return skb;
1111 }
1112 }
1113 return NULL;
1114}
1115
1116/*
1117 * Allocate a skb from the socket's receive buffer.
4ec93edb 1118 */
86a76caf 1119struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
dd0fc66f 1120 gfp_t priority)
1da177e4
LT
1121{
1122 if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1123 struct sk_buff *skb = alloc_skb(size, priority);
1124 if (skb) {
1125 skb_set_owner_r(skb, sk);
1126 return skb;
1127 }
1128 }
1129 return NULL;
1130}
1131
4ec93edb 1132/*
1da177e4 1133 * Allocate a memory block from the socket's option memory buffer.
4ec93edb 1134 */
dd0fc66f 1135void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1da177e4
LT
1136{
1137 if ((unsigned)size <= sysctl_optmem_max &&
1138 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1139 void *mem;
1140 /* First do the add, to avoid the race if kmalloc
4ec93edb 1141 * might sleep.
1da177e4
LT
1142 */
1143 atomic_add(size, &sk->sk_omem_alloc);
1144 mem = kmalloc(size, priority);
1145 if (mem)
1146 return mem;
1147 atomic_sub(size, &sk->sk_omem_alloc);
1148 }
1149 return NULL;
1150}
1151
1152/*
1153 * Free an option memory block.
1154 */
1155void sock_kfree_s(struct sock *sk, void *mem, int size)
1156{
1157 kfree(mem);
1158 atomic_sub(size, &sk->sk_omem_alloc);
1159}
1160
1161/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1162 I think, these locks should be removed for datagram sockets.
1163 */
1164static long sock_wait_for_wmem(struct sock * sk, long timeo)
1165{
1166 DEFINE_WAIT(wait);
1167
1168 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1169 for (;;) {
1170 if (!timeo)
1171 break;
1172 if (signal_pending(current))
1173 break;
1174 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1175 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1176 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1177 break;
1178 if (sk->sk_shutdown & SEND_SHUTDOWN)
1179 break;
1180 if (sk->sk_err)
1181 break;
1182 timeo = schedule_timeout(timeo);
1183 }
1184 finish_wait(sk->sk_sleep, &wait);
1185 return timeo;
1186}
1187
1188
1189/*
1190 * Generic send/receive buffer handlers
1191 */
1192
1193static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
1194 unsigned long header_len,
1195 unsigned long data_len,
1196 int noblock, int *errcode)
1197{
1198 struct sk_buff *skb;
7d877f3b 1199 gfp_t gfp_mask;
1da177e4
LT
1200 long timeo;
1201 int err;
1202
1203 gfp_mask = sk->sk_allocation;
1204 if (gfp_mask & __GFP_WAIT)
1205 gfp_mask |= __GFP_REPEAT;
1206
1207 timeo = sock_sndtimeo(sk, noblock);
1208 while (1) {
1209 err = sock_error(sk);
1210 if (err != 0)
1211 goto failure;
1212
1213 err = -EPIPE;
1214 if (sk->sk_shutdown & SEND_SHUTDOWN)
1215 goto failure;
1216
1217 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
db38c179 1218 skb = alloc_skb(header_len, gfp_mask);
1da177e4
LT
1219 if (skb) {
1220 int npages;
1221 int i;
1222
1223 /* No pages, we're done... */
1224 if (!data_len)
1225 break;
1226
1227 npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1228 skb->truesize += data_len;
1229 skb_shinfo(skb)->nr_frags = npages;
1230 for (i = 0; i < npages; i++) {
1231 struct page *page;
1232 skb_frag_t *frag;
1233
1234 page = alloc_pages(sk->sk_allocation, 0);
1235 if (!page) {
1236 err = -ENOBUFS;
1237 skb_shinfo(skb)->nr_frags = i;
1238 kfree_skb(skb);
1239 goto failure;
1240 }
1241
1242 frag = &skb_shinfo(skb)->frags[i];
1243 frag->page = page;
1244 frag->page_offset = 0;
1245 frag->size = (data_len >= PAGE_SIZE ?
1246 PAGE_SIZE :
1247 data_len);
1248 data_len -= PAGE_SIZE;
1249 }
1250
1251 /* Full success... */
1252 break;
1253 }
1254 err = -ENOBUFS;
1255 goto failure;
1256 }
1257 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1258 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1259 err = -EAGAIN;
1260 if (!timeo)
1261 goto failure;
1262 if (signal_pending(current))
1263 goto interrupted;
1264 timeo = sock_wait_for_wmem(sk, timeo);
1265 }
1266
1267 skb_set_owner_w(skb, sk);
1268 return skb;
1269
1270interrupted:
1271 err = sock_intr_errno(timeo);
1272failure:
1273 *errcode = err;
1274 return NULL;
1275}
1276
4ec93edb 1277struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1da177e4
LT
1278 int noblock, int *errcode)
1279{
1280 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1281}
1282
1283static void __lock_sock(struct sock *sk)
1284{
1285 DEFINE_WAIT(wait);
1286
e71a4783 1287 for (;;) {
1da177e4
LT
1288 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1289 TASK_UNINTERRUPTIBLE);
1290 spin_unlock_bh(&sk->sk_lock.slock);
1291 schedule();
1292 spin_lock_bh(&sk->sk_lock.slock);
e71a4783 1293 if (!sock_owned_by_user(sk))
1da177e4
LT
1294 break;
1295 }
1296 finish_wait(&sk->sk_lock.wq, &wait);
1297}
1298
1299static void __release_sock(struct sock *sk)
1300{
1301 struct sk_buff *skb = sk->sk_backlog.head;
1302
1303 do {
1304 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1305 bh_unlock_sock(sk);
1306
1307 do {
1308 struct sk_buff *next = skb->next;
1309
1310 skb->next = NULL;
1311 sk->sk_backlog_rcv(sk, skb);
1312
1313 /*
1314 * We are in process context here with softirqs
1315 * disabled, use cond_resched_softirq() to preempt.
1316 * This is safe to do because we've taken the backlog
1317 * queue private:
1318 */
1319 cond_resched_softirq();
1320
1321 skb = next;
1322 } while (skb != NULL);
1323
1324 bh_lock_sock(sk);
e71a4783 1325 } while ((skb = sk->sk_backlog.head) != NULL);
1da177e4
LT
1326}
1327
1328/**
1329 * sk_wait_data - wait for data to arrive at sk_receive_queue
4dc3b16b
PP
1330 * @sk: sock to wait on
1331 * @timeo: for how long
1da177e4
LT
1332 *
1333 * Now socket state including sk->sk_err is changed only under lock,
1334 * hence we may omit checks after joining wait queue.
1335 * We check receive queue before schedule() only as optimization;
1336 * it is very likely that release_sock() added new data.
1337 */
1338int sk_wait_data(struct sock *sk, long *timeo)
1339{
1340 int rc;
1341 DEFINE_WAIT(wait);
1342
1343 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1344 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1345 rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1346 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1347 finish_wait(sk->sk_sleep, &wait);
1348 return rc;
1349}
1350
1351EXPORT_SYMBOL(sk_wait_data);
1352
1353/*
1354 * Set of default routines for initialising struct proto_ops when
1355 * the protocol does not support a particular function. In certain
1356 * cases where it makes no sense for a protocol to have a "do nothing"
1357 * function, some default processing is provided.
1358 */
1359
1360int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1361{
1362 return -EOPNOTSUPP;
1363}
1364
4ec93edb 1365int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1da177e4
LT
1366 int len, int flags)
1367{
1368 return -EOPNOTSUPP;
1369}
1370
1371int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1372{
1373 return -EOPNOTSUPP;
1374}
1375
1376int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1377{
1378 return -EOPNOTSUPP;
1379}
1380
4ec93edb 1381int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1da177e4
LT
1382 int *len, int peer)
1383{
1384 return -EOPNOTSUPP;
1385}
1386
1387unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
1388{
1389 return 0;
1390}
1391
1392int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1393{
1394 return -EOPNOTSUPP;
1395}
1396
1397int sock_no_listen(struct socket *sock, int backlog)
1398{
1399 return -EOPNOTSUPP;
1400}
1401
1402int sock_no_shutdown(struct socket *sock, int how)
1403{
1404 return -EOPNOTSUPP;
1405}
1406
1407int sock_no_setsockopt(struct socket *sock, int level, int optname,
1408 char __user *optval, int optlen)
1409{
1410 return -EOPNOTSUPP;
1411}
1412
1413int sock_no_getsockopt(struct socket *sock, int level, int optname,
1414 char __user *optval, int __user *optlen)
1415{
1416 return -EOPNOTSUPP;
1417}
1418
1419int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1420 size_t len)
1421{
1422 return -EOPNOTSUPP;
1423}
1424
1425int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1426 size_t len, int flags)
1427{
1428 return -EOPNOTSUPP;
1429}
1430
1431int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1432{
1433 /* Mirror missing mmap method error code */
1434 return -ENODEV;
1435}
1436
1437ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1438{
1439 ssize_t res;
1440 struct msghdr msg = {.msg_flags = flags};
1441 struct kvec iov;
1442 char *kaddr = kmap(page);
1443 iov.iov_base = kaddr + offset;
1444 iov.iov_len = size;
1445 res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1446 kunmap(page);
1447 return res;
1448}
1449
1450/*
1451 * Default Socket Callbacks
1452 */
1453
1454static void sock_def_wakeup(struct sock *sk)
1455{
1456 read_lock(&sk->sk_callback_lock);
1457 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1458 wake_up_interruptible_all(sk->sk_sleep);
1459 read_unlock(&sk->sk_callback_lock);
1460}
1461
1462static void sock_def_error_report(struct sock *sk)
1463{
1464 read_lock(&sk->sk_callback_lock);
1465 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1466 wake_up_interruptible(sk->sk_sleep);
4ec93edb 1467 sk_wake_async(sk,0,POLL_ERR);
1da177e4
LT
1468 read_unlock(&sk->sk_callback_lock);
1469}
1470
1471static void sock_def_readable(struct sock *sk, int len)
1472{
1473 read_lock(&sk->sk_callback_lock);
1474 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1475 wake_up_interruptible(sk->sk_sleep);
1476 sk_wake_async(sk,1,POLL_IN);
1477 read_unlock(&sk->sk_callback_lock);
1478}
1479
1480static void sock_def_write_space(struct sock *sk)
1481{
1482 read_lock(&sk->sk_callback_lock);
1483
1484 /* Do not wake up a writer until he can make "significant"
1485 * progress. --DaveM
1486 */
e71a4783 1487 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1da177e4
LT
1488 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1489 wake_up_interruptible(sk->sk_sleep);
1490
1491 /* Should agree with poll, otherwise some programs break */
1492 if (sock_writeable(sk))
1493 sk_wake_async(sk, 2, POLL_OUT);
1494 }
1495
1496 read_unlock(&sk->sk_callback_lock);
1497}
1498
1499static void sock_def_destruct(struct sock *sk)
1500{
a51482bd 1501 kfree(sk->sk_protinfo);
1da177e4
LT
1502}
1503
1504void sk_send_sigurg(struct sock *sk)
1505{
1506 if (sk->sk_socket && sk->sk_socket->file)
1507 if (send_sigurg(&sk->sk_socket->file->f_owner))
1508 sk_wake_async(sk, 3, POLL_PRI);
1509}
1510
1511void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1512 unsigned long expires)
1513{
1514 if (!mod_timer(timer, expires))
1515 sock_hold(sk);
1516}
1517
1518EXPORT_SYMBOL(sk_reset_timer);
1519
1520void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1521{
1522 if (timer_pending(timer) && del_timer(timer))
1523 __sock_put(sk);
1524}
1525
1526EXPORT_SYMBOL(sk_stop_timer);
1527
1528void sock_init_data(struct socket *sock, struct sock *sk)
1529{
1530 skb_queue_head_init(&sk->sk_receive_queue);
1531 skb_queue_head_init(&sk->sk_write_queue);
1532 skb_queue_head_init(&sk->sk_error_queue);
97fc2f08
CL
1533#ifdef CONFIG_NET_DMA
1534 skb_queue_head_init(&sk->sk_async_wait_queue);
1535#endif
1da177e4
LT
1536
1537 sk->sk_send_head = NULL;
1538
1539 init_timer(&sk->sk_timer);
4ec93edb 1540
1da177e4
LT
1541 sk->sk_allocation = GFP_KERNEL;
1542 sk->sk_rcvbuf = sysctl_rmem_default;
1543 sk->sk_sndbuf = sysctl_wmem_default;
1544 sk->sk_state = TCP_CLOSE;
1545 sk->sk_socket = sock;
1546
1547 sock_set_flag(sk, SOCK_ZAPPED);
1548
e71a4783 1549 if (sock) {
1da177e4
LT
1550 sk->sk_type = sock->type;
1551 sk->sk_sleep = &sock->wait;
1552 sock->sk = sk;
1553 } else
1554 sk->sk_sleep = NULL;
1555
1556 rwlock_init(&sk->sk_dst_lock);
1557 rwlock_init(&sk->sk_callback_lock);
443aef0e
PZ
1558 lockdep_set_class_and_name(&sk->sk_callback_lock,
1559 af_callback_keys + sk->sk_family,
1560 af_family_clock_key_strings[sk->sk_family]);
1da177e4
LT
1561
1562 sk->sk_state_change = sock_def_wakeup;
1563 sk->sk_data_ready = sock_def_readable;
1564 sk->sk_write_space = sock_def_write_space;
1565 sk->sk_error_report = sock_def_error_report;
1566 sk->sk_destruct = sock_def_destruct;
1567
1568 sk->sk_sndmsg_page = NULL;
1569 sk->sk_sndmsg_off = 0;
1570
1571 sk->sk_peercred.pid = 0;
1572 sk->sk_peercred.uid = -1;
1573 sk->sk_peercred.gid = -1;
1574 sk->sk_write_pending = 0;
1575 sk->sk_rcvlowat = 1;
1576 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
1577 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
1578
b7aa0bf7 1579 sk->sk_stamp = ktime_set(-1L, -1L);
1da177e4
LT
1580
1581 atomic_set(&sk->sk_refcnt, 1);
1582}
1583
fcc70d5f 1584void fastcall lock_sock_nested(struct sock *sk, int subclass)
1da177e4
LT
1585{
1586 might_sleep();
a5b5bb9a 1587 spin_lock_bh(&sk->sk_lock.slock);
1da177e4
LT
1588 if (sk->sk_lock.owner)
1589 __lock_sock(sk);
1590 sk->sk_lock.owner = (void *)1;
a5b5bb9a
IM
1591 spin_unlock(&sk->sk_lock.slock);
1592 /*
1593 * The sk_lock has mutex_lock() semantics here:
1594 */
fcc70d5f 1595 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
a5b5bb9a 1596 local_bh_enable();
1da177e4
LT
1597}
1598
fcc70d5f 1599EXPORT_SYMBOL(lock_sock_nested);
1da177e4
LT
1600
1601void fastcall release_sock(struct sock *sk)
1602{
a5b5bb9a
IM
1603 /*
1604 * The sk_lock has mutex_unlock() semantics:
1605 */
1606 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
1607
1608 spin_lock_bh(&sk->sk_lock.slock);
1da177e4
LT
1609 if (sk->sk_backlog.tail)
1610 __release_sock(sk);
1611 sk->sk_lock.owner = NULL;
a5b5bb9a
IM
1612 if (waitqueue_active(&sk->sk_lock.wq))
1613 wake_up(&sk->sk_lock.wq);
1614 spin_unlock_bh(&sk->sk_lock.slock);
1da177e4
LT
1615}
1616EXPORT_SYMBOL(release_sock);
1617
1618int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
4ec93edb 1619{
b7aa0bf7 1620 struct timeval tv;
1da177e4
LT
1621 if (!sock_flag(sk, SOCK_TIMESTAMP))
1622 sock_enable_timestamp(sk);
b7aa0bf7
ED
1623 tv = ktime_to_timeval(sk->sk_stamp);
1624 if (tv.tv_sec == -1)
1da177e4 1625 return -ENOENT;
b7aa0bf7
ED
1626 if (tv.tv_sec == 0) {
1627 sk->sk_stamp = ktime_get_real();
1628 tv = ktime_to_timeval(sk->sk_stamp);
1629 }
1630 return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
4ec93edb 1631}
1da177e4
LT
1632EXPORT_SYMBOL(sock_get_timestamp);
1633
ae40eb1e
ED
1634int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
1635{
1636 struct timespec ts;
1637 if (!sock_flag(sk, SOCK_TIMESTAMP))
1638 sock_enable_timestamp(sk);
1639 ts = ktime_to_timespec(sk->sk_stamp);
1640 if (ts.tv_sec == -1)
1641 return -ENOENT;
1642 if (ts.tv_sec == 0) {
1643 sk->sk_stamp = ktime_get_real();
1644 ts = ktime_to_timespec(sk->sk_stamp);
1645 }
1646 return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
1647}
1648EXPORT_SYMBOL(sock_get_timestampns);
1649
1da177e4 1650void sock_enable_timestamp(struct sock *sk)
4ec93edb
YH
1651{
1652 if (!sock_flag(sk, SOCK_TIMESTAMP)) {
1da177e4
LT
1653 sock_set_flag(sk, SOCK_TIMESTAMP);
1654 net_enable_timestamp();
1655 }
1656}
4ec93edb 1657EXPORT_SYMBOL(sock_enable_timestamp);
1da177e4
LT
1658
1659/*
1660 * Get a socket option on an socket.
1661 *
1662 * FIX: POSIX 1003.1g is very ambiguous here. It states that
1663 * asynchronous errors should be reported by getsockopt. We assume
1664 * this means if you specify SO_ERROR (otherwise whats the point of it).
1665 */
1666int sock_common_getsockopt(struct socket *sock, int level, int optname,
1667 char __user *optval, int __user *optlen)
1668{
1669 struct sock *sk = sock->sk;
1670
1671 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1672}
1673
1674EXPORT_SYMBOL(sock_common_getsockopt);
1675
3fdadf7d 1676#ifdef CONFIG_COMPAT
543d9cfe
ACM
1677int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
1678 char __user *optval, int __user *optlen)
3fdadf7d
DM
1679{
1680 struct sock *sk = sock->sk;
1681
1e51f951 1682 if (sk->sk_prot->compat_getsockopt != NULL)
543d9cfe
ACM
1683 return sk->sk_prot->compat_getsockopt(sk, level, optname,
1684 optval, optlen);
3fdadf7d
DM
1685 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1686}
1687EXPORT_SYMBOL(compat_sock_common_getsockopt);
1688#endif
1689
1da177e4
LT
1690int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1691 struct msghdr *msg, size_t size, int flags)
1692{
1693 struct sock *sk = sock->sk;
1694 int addr_len = 0;
1695 int err;
1696
1697 err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
1698 flags & ~MSG_DONTWAIT, &addr_len);
1699 if (err >= 0)
1700 msg->msg_namelen = addr_len;
1701 return err;
1702}
1703
1704EXPORT_SYMBOL(sock_common_recvmsg);
1705
1706/*
1707 * Set socket options on an inet socket.
1708 */
1709int sock_common_setsockopt(struct socket *sock, int level, int optname,
1710 char __user *optval, int optlen)
1711{
1712 struct sock *sk = sock->sk;
1713
1714 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1715}
1716
1717EXPORT_SYMBOL(sock_common_setsockopt);
1718
3fdadf7d 1719#ifdef CONFIG_COMPAT
543d9cfe
ACM
1720int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
1721 char __user *optval, int optlen)
3fdadf7d
DM
1722{
1723 struct sock *sk = sock->sk;
1724
543d9cfe
ACM
1725 if (sk->sk_prot->compat_setsockopt != NULL)
1726 return sk->sk_prot->compat_setsockopt(sk, level, optname,
1727 optval, optlen);
3fdadf7d
DM
1728 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1729}
1730EXPORT_SYMBOL(compat_sock_common_setsockopt);
1731#endif
1732
1da177e4
LT
1733void sk_common_release(struct sock *sk)
1734{
1735 if (sk->sk_prot->destroy)
1736 sk->sk_prot->destroy(sk);
1737
1738 /*
1739 * Observation: when sock_common_release is called, processes have
1740 * no access to socket. But net still has.
1741 * Step one, detach it from networking:
1742 *
1743 * A. Remove from hash tables.
1744 */
1745
1746 sk->sk_prot->unhash(sk);
1747
1748 /*
1749 * In this point socket cannot receive new packets, but it is possible
1750 * that some packets are in flight because some CPU runs receiver and
1751 * did hash table lookup before we unhashed socket. They will achieve
1752 * receive queue and will be purged by socket destructor.
1753 *
1754 * Also we still have packets pending on receive queue and probably,
1755 * our own packets waiting in device queues. sock_destroy will drain
1756 * receive queue, but transmitted packets will delay socket destruction
1757 * until the last reference will be released.
1758 */
1759
1760 sock_orphan(sk);
1761
1762 xfrm_sk_free_policy(sk);
1763
e6848976 1764 sk_refcnt_debug_release(sk);
1da177e4
LT
1765 sock_put(sk);
1766}
1767
1768EXPORT_SYMBOL(sk_common_release);
1769
1770static DEFINE_RWLOCK(proto_list_lock);
1771static LIST_HEAD(proto_list);
1772
1773int proto_register(struct proto *prot, int alloc_slab)
1774{
8feaf0c0
ACM
1775 char *request_sock_slab_name = NULL;
1776 char *timewait_sock_slab_name;
1da177e4
LT
1777 int rc = -ENOBUFS;
1778
1da177e4
LT
1779 if (alloc_slab) {
1780 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
20c2df83 1781 SLAB_HWCACHE_ALIGN, NULL);
1da177e4
LT
1782
1783 if (prot->slab == NULL) {
1784 printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1785 prot->name);
2a278051 1786 goto out;
1da177e4 1787 }
2e6599cb
ACM
1788
1789 if (prot->rsk_prot != NULL) {
1790 static const char mask[] = "request_sock_%s";
1791
1792 request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1793 if (request_sock_slab_name == NULL)
1794 goto out_free_sock_slab;
1795
1796 sprintf(request_sock_slab_name, mask, prot->name);
1797 prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
1798 prot->rsk_prot->obj_size, 0,
20c2df83 1799 SLAB_HWCACHE_ALIGN, NULL);
2e6599cb
ACM
1800
1801 if (prot->rsk_prot->slab == NULL) {
1802 printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
1803 prot->name);
1804 goto out_free_request_sock_slab_name;
1805 }
1806 }
8feaf0c0 1807
6d6ee43e 1808 if (prot->twsk_prot != NULL) {
8feaf0c0
ACM
1809 static const char mask[] = "tw_sock_%s";
1810
1811 timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1812
1813 if (timewait_sock_slab_name == NULL)
1814 goto out_free_request_sock_slab;
1815
1816 sprintf(timewait_sock_slab_name, mask, prot->name);
6d6ee43e
ACM
1817 prot->twsk_prot->twsk_slab =
1818 kmem_cache_create(timewait_sock_slab_name,
1819 prot->twsk_prot->twsk_obj_size,
1820 0, SLAB_HWCACHE_ALIGN,
20c2df83 1821 NULL);
6d6ee43e 1822 if (prot->twsk_prot->twsk_slab == NULL)
8feaf0c0
ACM
1823 goto out_free_timewait_sock_slab_name;
1824 }
1da177e4
LT
1825 }
1826
2a278051 1827 write_lock(&proto_list_lock);
1da177e4 1828 list_add(&prot->node, &proto_list);
1da177e4 1829 write_unlock(&proto_list_lock);
2a278051
ACM
1830 rc = 0;
1831out:
1da177e4 1832 return rc;
8feaf0c0
ACM
1833out_free_timewait_sock_slab_name:
1834 kfree(timewait_sock_slab_name);
1835out_free_request_sock_slab:
1836 if (prot->rsk_prot && prot->rsk_prot->slab) {
1837 kmem_cache_destroy(prot->rsk_prot->slab);
1838 prot->rsk_prot->slab = NULL;
1839 }
2e6599cb
ACM
1840out_free_request_sock_slab_name:
1841 kfree(request_sock_slab_name);
1842out_free_sock_slab:
1843 kmem_cache_destroy(prot->slab);
1844 prot->slab = NULL;
1845 goto out;
1da177e4
LT
1846}
1847
1848EXPORT_SYMBOL(proto_register);
1849
1850void proto_unregister(struct proto *prot)
1851{
1852 write_lock(&proto_list_lock);
0a3f4358
PM
1853 list_del(&prot->node);
1854 write_unlock(&proto_list_lock);
1da177e4
LT
1855
1856 if (prot->slab != NULL) {
1857 kmem_cache_destroy(prot->slab);
1858 prot->slab = NULL;
1859 }
1860
2e6599cb
ACM
1861 if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
1862 const char *name = kmem_cache_name(prot->rsk_prot->slab);
1863
1864 kmem_cache_destroy(prot->rsk_prot->slab);
1865 kfree(name);
1866 prot->rsk_prot->slab = NULL;
1867 }
1868
6d6ee43e
ACM
1869 if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
1870 const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
8feaf0c0 1871
6d6ee43e 1872 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
8feaf0c0 1873 kfree(name);
6d6ee43e 1874 prot->twsk_prot->twsk_slab = NULL;
8feaf0c0 1875 }
1da177e4
LT
1876}
1877
1878EXPORT_SYMBOL(proto_unregister);
1879
1880#ifdef CONFIG_PROC_FS
1da177e4
LT
1881static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
1882{
1883 read_lock(&proto_list_lock);
60f0438a 1884 return seq_list_start_head(&proto_list, *pos);
1da177e4
LT
1885}
1886
1887static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1888{
60f0438a 1889 return seq_list_next(v, &proto_list, pos);
1da177e4
LT
1890}
1891
1892static void proto_seq_stop(struct seq_file *seq, void *v)
1893{
1894 read_unlock(&proto_list_lock);
1895}
1896
1897static char proto_method_implemented(const void *method)
1898{
1899 return method == NULL ? 'n' : 'y';
1900}
1901
1902static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
1903{
1904 seq_printf(seq, "%-9s %4u %6d %6d %-3s %6u %-3s %-10s "
1905 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
1906 proto->name,
1907 proto->obj_size,
1908 proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
1909 proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
1910 proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
1911 proto->max_header,
1912 proto->slab == NULL ? "no" : "yes",
1913 module_name(proto->owner),
1914 proto_method_implemented(proto->close),
1915 proto_method_implemented(proto->connect),
1916 proto_method_implemented(proto->disconnect),
1917 proto_method_implemented(proto->accept),
1918 proto_method_implemented(proto->ioctl),
1919 proto_method_implemented(proto->init),
1920 proto_method_implemented(proto->destroy),
1921 proto_method_implemented(proto->shutdown),
1922 proto_method_implemented(proto->setsockopt),
1923 proto_method_implemented(proto->getsockopt),
1924 proto_method_implemented(proto->sendmsg),
1925 proto_method_implemented(proto->recvmsg),
1926 proto_method_implemented(proto->sendpage),
1927 proto_method_implemented(proto->bind),
1928 proto_method_implemented(proto->backlog_rcv),
1929 proto_method_implemented(proto->hash),
1930 proto_method_implemented(proto->unhash),
1931 proto_method_implemented(proto->get_port),
1932 proto_method_implemented(proto->enter_memory_pressure));
1933}
1934
1935static int proto_seq_show(struct seq_file *seq, void *v)
1936{
60f0438a 1937 if (v == &proto_list)
1da177e4
LT
1938 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
1939 "protocol",
1940 "size",
1941 "sockets",
1942 "memory",
1943 "press",
1944 "maxhdr",
1945 "slab",
1946 "module",
1947 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
1948 else
60f0438a 1949 proto_seq_printf(seq, list_entry(v, struct proto, node));
1da177e4
LT
1950 return 0;
1951}
1952
f690808e 1953static const struct seq_operations proto_seq_ops = {
1da177e4
LT
1954 .start = proto_seq_start,
1955 .next = proto_seq_next,
1956 .stop = proto_seq_stop,
1957 .show = proto_seq_show,
1958};
1959
1960static int proto_seq_open(struct inode *inode, struct file *file)
1961{
1962 return seq_open(file, &proto_seq_ops);
1963}
1964
9a32144e 1965static const struct file_operations proto_seq_fops = {
1da177e4
LT
1966 .owner = THIS_MODULE,
1967 .open = proto_seq_open,
1968 .read = seq_read,
1969 .llseek = seq_lseek,
1970 .release = seq_release,
1971};
1972
1973static int __init proto_init(void)
1974{
1975 /* register /proc/net/protocols */
1976 return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
1977}
1978
1979subsys_initcall(proto_init);
1980
1981#endif /* PROC_FS */
1982
1983EXPORT_SYMBOL(sk_alloc);
1984EXPORT_SYMBOL(sk_free);
1985EXPORT_SYMBOL(sk_send_sigurg);
1986EXPORT_SYMBOL(sock_alloc_send_skb);
1987EXPORT_SYMBOL(sock_init_data);
1988EXPORT_SYMBOL(sock_kfree_s);
1989EXPORT_SYMBOL(sock_kmalloc);
1990EXPORT_SYMBOL(sock_no_accept);
1991EXPORT_SYMBOL(sock_no_bind);
1992EXPORT_SYMBOL(sock_no_connect);
1993EXPORT_SYMBOL(sock_no_getname);
1994EXPORT_SYMBOL(sock_no_getsockopt);
1995EXPORT_SYMBOL(sock_no_ioctl);
1996EXPORT_SYMBOL(sock_no_listen);
1997EXPORT_SYMBOL(sock_no_mmap);
1998EXPORT_SYMBOL(sock_no_poll);
1999EXPORT_SYMBOL(sock_no_recvmsg);
2000EXPORT_SYMBOL(sock_no_sendmsg);
2001EXPORT_SYMBOL(sock_no_sendpage);
2002EXPORT_SYMBOL(sock_no_setsockopt);
2003EXPORT_SYMBOL(sock_no_shutdown);
2004EXPORT_SYMBOL(sock_no_socketpair);
2005EXPORT_SYMBOL(sock_rfree);
2006EXPORT_SYMBOL(sock_setsockopt);
2007EXPORT_SYMBOL(sock_wfree);
2008EXPORT_SYMBOL(sock_wmalloc);
2009EXPORT_SYMBOL(sock_i_uid);
2010EXPORT_SYMBOL(sock_i_ino);
1da177e4 2011EXPORT_SYMBOL(sysctl_optmem_max);
6baf1f41 2012#ifdef CONFIG_SYSCTL
1da177e4
LT
2013EXPORT_SYMBOL(sysctl_rmem_max);
2014EXPORT_SYMBOL(sysctl_wmem_max);
2015#endif