r8169: fix early spinlock use
[linux-2.6-block.git] / net / ipv4 / tcp_ipv4.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
1da177e4
LT
8 * IPv4 specific functions
9 *
10 *
11 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24/*
25 * Changes:
26 * David S. Miller : New socket lookup architecture.
27 * This code is dedicated to John Dyson.
28 * David S. Miller : Change semantics of established hash,
29 * half is devoted to TIME_WAIT sockets
30 * and the rest go in the other half.
31 * Andi Kleen : Add support for syncookies and fixed
32 * some bugs: ip options weren't passed to
33 * the TCP layer, missed a check for an
34 * ACK bit.
35 * Andi Kleen : Implemented fast path mtu discovery.
36 * Fixed many serious bugs in the
60236fdd 37 * request_sock handling and moved
1da177e4
LT
38 * most of it into the af independent code.
39 * Added tail drop and some other bugfixes.
caa20d9a 40 * Added new listen semantics.
1da177e4
LT
41 * Mike McLagan : Routing by source
42 * Juan Jose Ciarlante: ip_dynaddr bits
43 * Andi Kleen: various fixes.
44 * Vitaly E. Lavrov : Transparent proxy revived after year
45 * coma.
46 * Andi Kleen : Fix new listen.
47 * Andi Kleen : Fix accept error reporting.
48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
50 * a single port at the same time.
51 */
52
1da177e4
LT
53
54#include <linux/types.h>
55#include <linux/fcntl.h>
56#include <linux/module.h>
57#include <linux/random.h>
58#include <linux/cache.h>
59#include <linux/jhash.h>
60#include <linux/init.h>
61#include <linux/times.h>
62
457c4cbc 63#include <net/net_namespace.h>
1da177e4 64#include <net/icmp.h>
304a1618 65#include <net/inet_hashtables.h>
1da177e4 66#include <net/tcp.h>
20380731 67#include <net/transp_v6.h>
1da177e4
LT
68#include <net/ipv6.h>
69#include <net/inet_common.h>
6d6ee43e 70#include <net/timewait_sock.h>
1da177e4 71#include <net/xfrm.h>
1a2449a8 72#include <net/netdma.h>
1da177e4
LT
73
74#include <linux/inet.h>
75#include <linux/ipv6.h>
76#include <linux/stddef.h>
77#include <linux/proc_fs.h>
78#include <linux/seq_file.h>
79
cfb6eeb4
YH
80#include <linux/crypto.h>
81#include <linux/scatterlist.h>
82
ab32ea5d
BH
83int sysctl_tcp_tw_reuse __read_mostly;
84int sysctl_tcp_low_latency __read_mostly;
1da177e4 85
1da177e4 86
cfb6eeb4 87#ifdef CONFIG_TCP_MD5SIG
7174259e
ACM
88static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
89 __be32 addr);
49a72dfb
AL
90static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
91 __be32 daddr, __be32 saddr, struct tcphdr *th);
9501f972
YH
92#else
93static inline
94struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
95{
96 return NULL;
97}
cfb6eeb4
YH
98#endif
99
0f7ff927 100struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
7174259e
ACM
101 .lhash_lock = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock),
102 .lhash_users = ATOMIC_INIT(0),
103 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
1da177e4
LT
104};
105
a94f723d 106static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
1da177e4 107{
eddc9ec5
ACM
108 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
109 ip_hdr(skb)->saddr,
aa8223c7
ACM
110 tcp_hdr(skb)->dest,
111 tcp_hdr(skb)->source);
1da177e4
LT
112}
113
6d6ee43e
ACM
114int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
115{
116 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
117 struct tcp_sock *tp = tcp_sk(sk);
118
119 /* With PAWS, it is safe from the viewpoint
120 of data integrity. Even without PAWS it is safe provided sequence
121 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
122
123 Actually, the idea is close to VJ's one, only timestamp cache is
124 held not per host, but per port pair and TW bucket is used as state
125 holder.
126
127 If TW bucket has been already destroyed we fall back to VJ's scheme
128 and use initial timestamp retrieved from peer table.
129 */
130 if (tcptw->tw_ts_recent_stamp &&
131 (twp == NULL || (sysctl_tcp_tw_reuse &&
9d729f72 132 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
6d6ee43e
ACM
133 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
134 if (tp->write_seq == 0)
135 tp->write_seq = 1;
136 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
137 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
138 sock_hold(sktw);
139 return 1;
140 }
141
142 return 0;
143}
144
145EXPORT_SYMBOL_GPL(tcp_twsk_unique);
146
1da177e4
LT
147/* This will initiate an outgoing connection. */
148int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
149{
150 struct inet_sock *inet = inet_sk(sk);
151 struct tcp_sock *tp = tcp_sk(sk);
152 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
153 struct rtable *rt;
bada8adc 154 __be32 daddr, nexthop;
1da177e4
LT
155 int tmp;
156 int err;
157
158 if (addr_len < sizeof(struct sockaddr_in))
159 return -EINVAL;
160
161 if (usin->sin_family != AF_INET)
162 return -EAFNOSUPPORT;
163
164 nexthop = daddr = usin->sin_addr.s_addr;
165 if (inet->opt && inet->opt->srr) {
166 if (!daddr)
167 return -EINVAL;
168 nexthop = inet->opt->faddr;
169 }
170
171 tmp = ip_route_connect(&rt, nexthop, inet->saddr,
172 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
173 IPPROTO_TCP,
8eb9086f 174 inet->sport, usin->sin_port, sk, 1);
584bdf8c
WD
175 if (tmp < 0) {
176 if (tmp == -ENETUNREACH)
7c73a6fa 177 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
1da177e4 178 return tmp;
584bdf8c 179 }
1da177e4
LT
180
181 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
182 ip_rt_put(rt);
183 return -ENETUNREACH;
184 }
185
186 if (!inet->opt || !inet->opt->srr)
187 daddr = rt->rt_dst;
188
189 if (!inet->saddr)
190 inet->saddr = rt->rt_src;
191 inet->rcv_saddr = inet->saddr;
192
193 if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
194 /* Reset inherited state */
195 tp->rx_opt.ts_recent = 0;
196 tp->rx_opt.ts_recent_stamp = 0;
197 tp->write_seq = 0;
198 }
199
295ff7ed 200 if (tcp_death_row.sysctl_tw_recycle &&
1da177e4
LT
201 !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
202 struct inet_peer *peer = rt_get_peer(rt);
7174259e
ACM
203 /*
204 * VJ's idea. We save last timestamp seen from
205 * the destination in peer table, when entering state
206 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
207 * when trying new connection.
1da177e4 208 */
7174259e 209 if (peer != NULL &&
9d729f72 210 peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) {
1da177e4
LT
211 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
212 tp->rx_opt.ts_recent = peer->tcp_ts;
213 }
214 }
215
216 inet->dport = usin->sin_port;
217 inet->daddr = daddr;
218
d83d8461 219 inet_csk(sk)->icsk_ext_hdr_len = 0;
1da177e4 220 if (inet->opt)
d83d8461 221 inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
1da177e4
LT
222
223 tp->rx_opt.mss_clamp = 536;
224
225 /* Socket identity is still unknown (sport may be zero).
226 * However we set state to SYN-SENT and not releasing socket
227 * lock select source port, enter ourselves into the hash tables and
228 * complete initialization after this.
229 */
230 tcp_set_state(sk, TCP_SYN_SENT);
a7f5e7f1 231 err = inet_hash_connect(&tcp_death_row, sk);
1da177e4
LT
232 if (err)
233 goto failure;
234
7174259e
ACM
235 err = ip_route_newports(&rt, IPPROTO_TCP,
236 inet->sport, inet->dport, sk);
1da177e4
LT
237 if (err)
238 goto failure;
239
240 /* OK, now commit destination to socket. */
bcd76111 241 sk->sk_gso_type = SKB_GSO_TCPV4;
6cbb0df7 242 sk_setup_caps(sk, &rt->u.dst);
1da177e4
LT
243
244 if (!tp->write_seq)
245 tp->write_seq = secure_tcp_sequence_number(inet->saddr,
246 inet->daddr,
247 inet->sport,
248 usin->sin_port);
249
250 inet->id = tp->write_seq ^ jiffies;
251
252 err = tcp_connect(sk);
253 rt = NULL;
254 if (err)
255 goto failure;
256
257 return 0;
258
259failure:
7174259e
ACM
260 /*
261 * This unhashes the socket and releases the local port,
262 * if necessary.
263 */
1da177e4
LT
264 tcp_set_state(sk, TCP_CLOSE);
265 ip_rt_put(rt);
266 sk->sk_route_caps = 0;
267 inet->dport = 0;
268 return err;
269}
270
1da177e4
LT
271/*
272 * This routine does path mtu discovery as defined in RFC1191.
273 */
40efc6fa 274static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
1da177e4
LT
275{
276 struct dst_entry *dst;
277 struct inet_sock *inet = inet_sk(sk);
1da177e4
LT
278
279 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
280 * send out by Linux are always <576bytes so they should go through
281 * unfragmented).
282 */
283 if (sk->sk_state == TCP_LISTEN)
284 return;
285
286 /* We don't check in the destentry if pmtu discovery is forbidden
287 * on this route. We just assume that no packet_to_big packets
288 * are send back when pmtu discovery is not active.
e905a9ed 289 * There is a small race when the user changes this flag in the
1da177e4
LT
290 * route, but I think that's acceptable.
291 */
292 if ((dst = __sk_dst_check(sk, 0)) == NULL)
293 return;
294
295 dst->ops->update_pmtu(dst, mtu);
296
297 /* Something is about to be wrong... Remember soft error
298 * for the case, if this connection will not able to recover.
299 */
300 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
301 sk->sk_err_soft = EMSGSIZE;
302
303 mtu = dst_mtu(dst);
304
305 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
d83d8461 306 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
1da177e4
LT
307 tcp_sync_mss(sk, mtu);
308
309 /* Resend the TCP packet because it's
310 * clear that the old packet has been
311 * dropped. This is the new "fast" path mtu
312 * discovery.
313 */
314 tcp_simple_retransmit(sk);
315 } /* else let the usual retransmit timer handle it */
316}
317
318/*
319 * This routine is called by the ICMP module when it gets some
320 * sort of error condition. If err < 0 then the socket should
321 * be closed and the error returned to the user. If err > 0
322 * it's just the icmp type << 8 | icmp code. After adjustment
323 * header points to the first 8 bytes of the tcp header. We need
324 * to find the appropriate port.
325 *
326 * The locking strategy used here is very "optimistic". When
327 * someone else accesses the socket the ICMP is just dropped
328 * and for some paths there is no check at all.
329 * A more general error queue to queue errors for later handling
330 * is probably better.
331 *
332 */
333
334void tcp_v4_err(struct sk_buff *skb, u32 info)
335{
336 struct iphdr *iph = (struct iphdr *)skb->data;
337 struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
338 struct tcp_sock *tp;
339 struct inet_sock *inet;
88c7664f
ACM
340 const int type = icmp_hdr(skb)->type;
341 const int code = icmp_hdr(skb)->code;
1da177e4
LT
342 struct sock *sk;
343 __u32 seq;
344 int err;
fd54d716 345 struct net *net = dev_net(skb->dev);
1da177e4
LT
346
347 if (skb->len < (iph->ihl << 2) + 8) {
dcfc23ca 348 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
349 return;
350 }
351
fd54d716 352 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
c67499c0 353 iph->saddr, th->source, inet_iif(skb));
1da177e4 354 if (!sk) {
dcfc23ca 355 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
356 return;
357 }
358 if (sk->sk_state == TCP_TIME_WAIT) {
9469c7b4 359 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
360 return;
361 }
362
363 bh_lock_sock(sk);
364 /* If too many ICMPs get dropped on busy
365 * servers this needs to be solved differently.
366 */
367 if (sock_owned_by_user(sk))
de0744af 368 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
1da177e4
LT
369
370 if (sk->sk_state == TCP_CLOSE)
371 goto out;
372
373 tp = tcp_sk(sk);
374 seq = ntohl(th->seq);
375 if (sk->sk_state != TCP_LISTEN &&
376 !between(seq, tp->snd_una, tp->snd_nxt)) {
de0744af 377 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
378 goto out;
379 }
380
381 switch (type) {
382 case ICMP_SOURCE_QUENCH:
383 /* Just silently ignore these. */
384 goto out;
385 case ICMP_PARAMETERPROB:
386 err = EPROTO;
387 break;
388 case ICMP_DEST_UNREACH:
389 if (code > NR_ICMP_UNREACH)
390 goto out;
391
392 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
393 if (!sock_owned_by_user(sk))
394 do_pmtu_discovery(sk, iph, info);
395 goto out;
396 }
397
398 err = icmp_err_convert[code].errno;
399 break;
400 case ICMP_TIME_EXCEEDED:
401 err = EHOSTUNREACH;
402 break;
403 default:
404 goto out;
405 }
406
407 switch (sk->sk_state) {
60236fdd 408 struct request_sock *req, **prev;
1da177e4
LT
409 case TCP_LISTEN:
410 if (sock_owned_by_user(sk))
411 goto out;
412
463c84b9
ACM
413 req = inet_csk_search_req(sk, &prev, th->dest,
414 iph->daddr, iph->saddr);
1da177e4
LT
415 if (!req)
416 goto out;
417
418 /* ICMPs are not backlogged, hence we cannot get
419 an established socket here.
420 */
547b792c 421 WARN_ON(req->sk);
1da177e4 422
2e6599cb 423 if (seq != tcp_rsk(req)->snt_isn) {
de0744af 424 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
425 goto out;
426 }
427
428 /*
429 * Still in SYN_RECV, just remove it silently.
430 * There is no good way to pass the error to the newly
431 * created socket, and POSIX does not want network
432 * errors returned from accept().
433 */
463c84b9 434 inet_csk_reqsk_queue_drop(sk, req, prev);
1da177e4
LT
435 goto out;
436
437 case TCP_SYN_SENT:
438 case TCP_SYN_RECV: /* Cannot happen.
439 It can f.e. if SYNs crossed.
440 */
441 if (!sock_owned_by_user(sk)) {
1da177e4
LT
442 sk->sk_err = err;
443
444 sk->sk_error_report(sk);
445
446 tcp_done(sk);
447 } else {
448 sk->sk_err_soft = err;
449 }
450 goto out;
451 }
452
453 /* If we've already connected we will keep trying
454 * until we time out, or the user gives up.
455 *
456 * rfc1122 4.2.3.9 allows to consider as hard errors
457 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
458 * but it is obsoleted by pmtu discovery).
459 *
460 * Note, that in modern internet, where routing is unreliable
461 * and in each dark corner broken firewalls sit, sending random
462 * errors ordered by their masters even this two messages finally lose
463 * their original sense (even Linux sends invalid PORT_UNREACHs)
464 *
465 * Now we are in compliance with RFCs.
466 * --ANK (980905)
467 */
468
469 inet = inet_sk(sk);
470 if (!sock_owned_by_user(sk) && inet->recverr) {
471 sk->sk_err = err;
472 sk->sk_error_report(sk);
473 } else { /* Only an error on timeout */
474 sk->sk_err_soft = err;
475 }
476
477out:
478 bh_unlock_sock(sk);
479 sock_put(sk);
480}
481
482/* This routine computes an IPv4 TCP checksum. */
8292a17a 483void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
1da177e4
LT
484{
485 struct inet_sock *inet = inet_sk(sk);
aa8223c7 486 struct tcphdr *th = tcp_hdr(skb);
1da177e4 487
84fa7933 488 if (skb->ip_summed == CHECKSUM_PARTIAL) {
ba7808ea
FD
489 th->check = ~tcp_v4_check(len, inet->saddr,
490 inet->daddr, 0);
663ead3b 491 skb->csum_start = skb_transport_header(skb) - skb->head;
ff1dcadb 492 skb->csum_offset = offsetof(struct tcphdr, check);
1da177e4 493 } else {
ba7808ea 494 th->check = tcp_v4_check(len, inet->saddr, inet->daddr,
1da177e4
LT
495 csum_partial((char *)th,
496 th->doff << 2,
497 skb->csum));
498 }
499}
500
a430a43d
HX
501int tcp_v4_gso_send_check(struct sk_buff *skb)
502{
eddc9ec5 503 const struct iphdr *iph;
a430a43d
HX
504 struct tcphdr *th;
505
506 if (!pskb_may_pull(skb, sizeof(*th)))
507 return -EINVAL;
508
eddc9ec5 509 iph = ip_hdr(skb);
aa8223c7 510 th = tcp_hdr(skb);
a430a43d
HX
511
512 th->check = 0;
ba7808ea 513 th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0);
663ead3b 514 skb->csum_start = skb_transport_header(skb) - skb->head;
ff1dcadb 515 skb->csum_offset = offsetof(struct tcphdr, check);
84fa7933 516 skb->ip_summed = CHECKSUM_PARTIAL;
a430a43d
HX
517 return 0;
518}
519
1da177e4
LT
520/*
521 * This routine will send an RST to the other tcp.
522 *
523 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
524 * for reset.
525 * Answer: if a packet caused RST, it is not for a socket
526 * existing in our system, if it is matched to a socket,
527 * it is just duplicate segment or bug in other side's TCP.
528 * So that we build reply only basing on parameters
529 * arrived with segment.
530 * Exception: precedence violation. We do not implement it in any case.
531 */
532
cfb6eeb4 533static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
1da177e4 534{
aa8223c7 535 struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
536 struct {
537 struct tcphdr th;
538#ifdef CONFIG_TCP_MD5SIG
714e85be 539 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
cfb6eeb4
YH
540#endif
541 } rep;
1da177e4 542 struct ip_reply_arg arg;
cfb6eeb4
YH
543#ifdef CONFIG_TCP_MD5SIG
544 struct tcp_md5sig_key *key;
545#endif
a86b1e30 546 struct net *net;
1da177e4
LT
547
548 /* Never send a reset in response to a reset. */
549 if (th->rst)
550 return;
551
ee6b9673 552 if (skb->rtable->rt_type != RTN_LOCAL)
1da177e4
LT
553 return;
554
555 /* Swap the send and the receive. */
cfb6eeb4
YH
556 memset(&rep, 0, sizeof(rep));
557 rep.th.dest = th->source;
558 rep.th.source = th->dest;
559 rep.th.doff = sizeof(struct tcphdr) / 4;
560 rep.th.rst = 1;
1da177e4
LT
561
562 if (th->ack) {
cfb6eeb4 563 rep.th.seq = th->ack_seq;
1da177e4 564 } else {
cfb6eeb4
YH
565 rep.th.ack = 1;
566 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
567 skb->len - (th->doff << 2));
1da177e4
LT
568 }
569
7174259e 570 memset(&arg, 0, sizeof(arg));
cfb6eeb4
YH
571 arg.iov[0].iov_base = (unsigned char *)&rep;
572 arg.iov[0].iov_len = sizeof(rep.th);
573
574#ifdef CONFIG_TCP_MD5SIG
eddc9ec5 575 key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
cfb6eeb4
YH
576 if (key) {
577 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
578 (TCPOPT_NOP << 16) |
579 (TCPOPT_MD5SIG << 8) |
580 TCPOLEN_MD5SIG);
581 /* Update length and the length the header thinks exists */
582 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
583 rep.th.doff = arg.iov[0].iov_len / 4;
584
49a72dfb
AL
585 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
586 key, ip_hdr(skb)->daddr,
587 ip_hdr(skb)->saddr, &rep.th);
cfb6eeb4
YH
588 }
589#endif
eddc9ec5
ACM
590 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
591 ip_hdr(skb)->saddr, /* XXX */
52cd5750 592 arg.iov[0].iov_len, IPPROTO_TCP, 0);
1da177e4 593 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
88ef4a5a 594 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
1da177e4 595
a86b1e30
PE
596 net = dev_net(skb->dst->dev);
597 ip_send_reply(net->ipv4.tcp_sock, skb,
7feb49c8 598 &arg, arg.iov[0].iov_len);
1da177e4 599
63231bdd
PE
600 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
601 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
1da177e4
LT
602}
603
604/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
605 outside socket context is ugly, certainly. What can I do?
606 */
607
9501f972
YH
608static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
609 u32 win, u32 ts, int oif,
88ef4a5a
KK
610 struct tcp_md5sig_key *key,
611 int reply_flags)
1da177e4 612{
aa8223c7 613 struct tcphdr *th = tcp_hdr(skb);
1da177e4
LT
614 struct {
615 struct tcphdr th;
714e85be 616 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
cfb6eeb4 617#ifdef CONFIG_TCP_MD5SIG
714e85be 618 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
cfb6eeb4
YH
619#endif
620 ];
1da177e4
LT
621 } rep;
622 struct ip_reply_arg arg;
4dd7972d 623 struct net *net = dev_net(skb->dst->dev);
1da177e4
LT
624
625 memset(&rep.th, 0, sizeof(struct tcphdr));
7174259e 626 memset(&arg, 0, sizeof(arg));
1da177e4
LT
627
628 arg.iov[0].iov_base = (unsigned char *)&rep;
629 arg.iov[0].iov_len = sizeof(rep.th);
630 if (ts) {
cfb6eeb4
YH
631 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
632 (TCPOPT_TIMESTAMP << 8) |
633 TCPOLEN_TIMESTAMP);
634 rep.opt[1] = htonl(tcp_time_stamp);
635 rep.opt[2] = htonl(ts);
cb48cfe8 636 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
1da177e4
LT
637 }
638
639 /* Swap the send and the receive. */
640 rep.th.dest = th->source;
641 rep.th.source = th->dest;
642 rep.th.doff = arg.iov[0].iov_len / 4;
643 rep.th.seq = htonl(seq);
644 rep.th.ack_seq = htonl(ack);
645 rep.th.ack = 1;
646 rep.th.window = htons(win);
647
cfb6eeb4 648#ifdef CONFIG_TCP_MD5SIG
cfb6eeb4
YH
649 if (key) {
650 int offset = (ts) ? 3 : 0;
651
652 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
653 (TCPOPT_NOP << 16) |
654 (TCPOPT_MD5SIG << 8) |
655 TCPOLEN_MD5SIG);
656 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
657 rep.th.doff = arg.iov[0].iov_len/4;
658
49a72dfb 659 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
90b7e112
AL
660 key, ip_hdr(skb)->saddr,
661 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
662 }
663#endif
88ef4a5a 664 arg.flags = reply_flags;
eddc9ec5
ACM
665 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
666 ip_hdr(skb)->saddr, /* XXX */
1da177e4
LT
667 arg.iov[0].iov_len, IPPROTO_TCP, 0);
668 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
9501f972
YH
669 if (oif)
670 arg.bound_dev_if = oif;
1da177e4 671
a86b1e30 672 ip_send_reply(net->ipv4.tcp_sock, skb,
7feb49c8 673 &arg, arg.iov[0].iov_len);
1da177e4 674
63231bdd 675 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
1da177e4
LT
676}
677
678static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
679{
8feaf0c0 680 struct inet_timewait_sock *tw = inet_twsk(sk);
cfb6eeb4 681 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1da177e4 682
9501f972 683 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
7174259e 684 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
9501f972
YH
685 tcptw->tw_ts_recent,
686 tw->tw_bound_dev_if,
88ef4a5a
KK
687 tcp_twsk_md5_key(tcptw),
688 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0
9501f972 689 );
1da177e4 690
8feaf0c0 691 inet_twsk_put(tw);
1da177e4
LT
692}
693
6edafaaf 694static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
7174259e 695 struct request_sock *req)
1da177e4 696{
9501f972 697 tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
cfb6eeb4 698 tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
9501f972
YH
699 req->ts_recent,
700 0,
88ef4a5a
KK
701 tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
702 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0);
1da177e4
LT
703}
704
1da177e4 705/*
9bf1d83e 706 * Send a SYN-ACK after having received a SYN.
60236fdd 707 * This still operates on a request_sock only, not on a big
1da177e4
LT
708 * socket.
709 */
fd80eb94
DL
710static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
711 struct dst_entry *dst)
1da177e4 712{
2e6599cb 713 const struct inet_request_sock *ireq = inet_rsk(req);
1da177e4
LT
714 int err = -1;
715 struct sk_buff * skb;
716
717 /* First, grab a route. */
463c84b9 718 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
fd80eb94 719 return -1;
1da177e4
LT
720
721 skb = tcp_make_synack(sk, dst, req);
722
723 if (skb) {
aa8223c7 724 struct tcphdr *th = tcp_hdr(skb);
1da177e4 725
ba7808ea 726 th->check = tcp_v4_check(skb->len,
2e6599cb
ACM
727 ireq->loc_addr,
728 ireq->rmt_addr,
1da177e4
LT
729 csum_partial((char *)th, skb->len,
730 skb->csum));
731
2e6599cb
ACM
732 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
733 ireq->rmt_addr,
734 ireq->opt);
b9df3cb8 735 err = net_xmit_eval(err);
1da177e4
LT
736 }
737
1da177e4
LT
738 dst_release(dst);
739 return err;
740}
741
fd80eb94
DL
742static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req)
743{
744 return __tcp_v4_send_synack(sk, req, NULL);
745}
746
1da177e4 747/*
60236fdd 748 * IPv4 request_sock destructor.
1da177e4 749 */
60236fdd 750static void tcp_v4_reqsk_destructor(struct request_sock *req)
1da177e4 751{
a51482bd 752 kfree(inet_rsk(req)->opt);
1da177e4
LT
753}
754
80e40daa 755#ifdef CONFIG_SYN_COOKIES
40efc6fa 756static void syn_flood_warning(struct sk_buff *skb)
1da177e4
LT
757{
758 static unsigned long warntime;
759
760 if (time_after(jiffies, (warntime + HZ * 60))) {
761 warntime = jiffies;
762 printk(KERN_INFO
763 "possible SYN flooding on port %d. Sending cookies.\n",
aa8223c7 764 ntohs(tcp_hdr(skb)->dest));
1da177e4
LT
765 }
766}
80e40daa 767#endif
1da177e4
LT
768
769/*
60236fdd 770 * Save and compile IPv4 options into the request_sock if needed.
1da177e4 771 */
40efc6fa
SH
772static struct ip_options *tcp_v4_save_options(struct sock *sk,
773 struct sk_buff *skb)
1da177e4
LT
774{
775 struct ip_options *opt = &(IPCB(skb)->opt);
776 struct ip_options *dopt = NULL;
777
778 if (opt && opt->optlen) {
779 int opt_size = optlength(opt);
780 dopt = kmalloc(opt_size, GFP_ATOMIC);
781 if (dopt) {
782 if (ip_options_echo(dopt, skb)) {
783 kfree(dopt);
784 dopt = NULL;
785 }
786 }
787 }
788 return dopt;
789}
790
cfb6eeb4
YH
791#ifdef CONFIG_TCP_MD5SIG
792/*
793 * RFC2385 MD5 checksumming requires a mapping of
794 * IP address->MD5 Key.
795 * We need to maintain these in the sk structure.
796 */
797
798/* Find the Key structure for an address. */
7174259e
ACM
799static struct tcp_md5sig_key *
800 tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
cfb6eeb4
YH
801{
802 struct tcp_sock *tp = tcp_sk(sk);
803 int i;
804
805 if (!tp->md5sig_info || !tp->md5sig_info->entries4)
806 return NULL;
807 for (i = 0; i < tp->md5sig_info->entries4; i++) {
808 if (tp->md5sig_info->keys4[i].addr == addr)
f8ab18d2 809 return &tp->md5sig_info->keys4[i].base;
cfb6eeb4
YH
810 }
811 return NULL;
812}
813
814struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
815 struct sock *addr_sk)
816{
817 return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr);
818}
819
820EXPORT_SYMBOL(tcp_v4_md5_lookup);
821
f5b99bcd
AB
822static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
823 struct request_sock *req)
cfb6eeb4
YH
824{
825 return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
826}
827
828/* This can be called on a newly created socket, from other files */
829int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
830 u8 *newkey, u8 newkeylen)
831{
832 /* Add Key to the list */
b0a713e9 833 struct tcp_md5sig_key *key;
cfb6eeb4
YH
834 struct tcp_sock *tp = tcp_sk(sk);
835 struct tcp4_md5sig_key *keys;
836
b0a713e9 837 key = tcp_v4_md5_do_lookup(sk, addr);
cfb6eeb4
YH
838 if (key) {
839 /* Pre-existing entry - just update that one. */
b0a713e9
MD
840 kfree(key->key);
841 key->key = newkey;
842 key->keylen = newkeylen;
cfb6eeb4 843 } else {
f6685938
ACM
844 struct tcp_md5sig_info *md5sig;
845
cfb6eeb4 846 if (!tp->md5sig_info) {
f6685938
ACM
847 tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
848 GFP_ATOMIC);
cfb6eeb4
YH
849 if (!tp->md5sig_info) {
850 kfree(newkey);
851 return -ENOMEM;
852 }
3d7dbeac 853 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
cfb6eeb4
YH
854 }
855 if (tcp_alloc_md5sig_pool() == NULL) {
856 kfree(newkey);
857 return -ENOMEM;
858 }
f6685938
ACM
859 md5sig = tp->md5sig_info;
860
861 if (md5sig->alloced4 == md5sig->entries4) {
862 keys = kmalloc((sizeof(*keys) *
e905a9ed 863 (md5sig->entries4 + 1)), GFP_ATOMIC);
cfb6eeb4
YH
864 if (!keys) {
865 kfree(newkey);
866 tcp_free_md5sig_pool();
867 return -ENOMEM;
868 }
869
f6685938
ACM
870 if (md5sig->entries4)
871 memcpy(keys, md5sig->keys4,
872 sizeof(*keys) * md5sig->entries4);
cfb6eeb4
YH
873
874 /* Free old key list, and reference new one */
a80cc20d 875 kfree(md5sig->keys4);
f6685938
ACM
876 md5sig->keys4 = keys;
877 md5sig->alloced4++;
cfb6eeb4 878 }
f6685938 879 md5sig->entries4++;
f8ab18d2
DM
880 md5sig->keys4[md5sig->entries4 - 1].addr = addr;
881 md5sig->keys4[md5sig->entries4 - 1].base.key = newkey;
882 md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
cfb6eeb4
YH
883 }
884 return 0;
885}
886
887EXPORT_SYMBOL(tcp_v4_md5_do_add);
888
889static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
890 u8 *newkey, u8 newkeylen)
891{
892 return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr,
893 newkey, newkeylen);
894}
895
896int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
897{
898 struct tcp_sock *tp = tcp_sk(sk);
899 int i;
900
901 for (i = 0; i < tp->md5sig_info->entries4; i++) {
902 if (tp->md5sig_info->keys4[i].addr == addr) {
903 /* Free the key */
f8ab18d2 904 kfree(tp->md5sig_info->keys4[i].base.key);
cfb6eeb4
YH
905 tp->md5sig_info->entries4--;
906
907 if (tp->md5sig_info->entries4 == 0) {
908 kfree(tp->md5sig_info->keys4);
909 tp->md5sig_info->keys4 = NULL;
8228a18d 910 tp->md5sig_info->alloced4 = 0;
7174259e 911 } else if (tp->md5sig_info->entries4 != i) {
cfb6eeb4 912 /* Need to do some manipulation */
354faf09
YH
913 memmove(&tp->md5sig_info->keys4[i],
914 &tp->md5sig_info->keys4[i+1],
915 (tp->md5sig_info->entries4 - i) *
916 sizeof(struct tcp4_md5sig_key));
cfb6eeb4
YH
917 }
918 tcp_free_md5sig_pool();
919 return 0;
920 }
921 }
922 return -ENOENT;
923}
924
925EXPORT_SYMBOL(tcp_v4_md5_do_del);
926
7174259e 927static void tcp_v4_clear_md5_list(struct sock *sk)
cfb6eeb4
YH
928{
929 struct tcp_sock *tp = tcp_sk(sk);
930
931 /* Free each key, then the set of key keys,
932 * the crypto element, and then decrement our
933 * hold on the last resort crypto.
934 */
935 if (tp->md5sig_info->entries4) {
936 int i;
937 for (i = 0; i < tp->md5sig_info->entries4; i++)
f8ab18d2 938 kfree(tp->md5sig_info->keys4[i].base.key);
cfb6eeb4
YH
939 tp->md5sig_info->entries4 = 0;
940 tcp_free_md5sig_pool();
941 }
942 if (tp->md5sig_info->keys4) {
943 kfree(tp->md5sig_info->keys4);
944 tp->md5sig_info->keys4 = NULL;
945 tp->md5sig_info->alloced4 = 0;
946 }
947}
948
7174259e
ACM
949static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
950 int optlen)
cfb6eeb4
YH
951{
952 struct tcp_md5sig cmd;
953 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
954 u8 *newkey;
955
956 if (optlen < sizeof(cmd))
957 return -EINVAL;
958
7174259e 959 if (copy_from_user(&cmd, optval, sizeof(cmd)))
cfb6eeb4
YH
960 return -EFAULT;
961
962 if (sin->sin_family != AF_INET)
963 return -EINVAL;
964
965 if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
966 if (!tcp_sk(sk)->md5sig_info)
967 return -ENOENT;
968 return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
969 }
970
971 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
972 return -EINVAL;
973
974 if (!tcp_sk(sk)->md5sig_info) {
975 struct tcp_sock *tp = tcp_sk(sk);
7174259e 976 struct tcp_md5sig_info *p = kzalloc(sizeof(*p), GFP_KERNEL);
cfb6eeb4 977
cfb6eeb4
YH
978 if (!p)
979 return -EINVAL;
980
981 tp->md5sig_info = p;
3d7dbeac 982 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
cfb6eeb4
YH
983 }
984
f6685938 985 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
cfb6eeb4
YH
986 if (!newkey)
987 return -ENOMEM;
cfb6eeb4
YH
988 return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
989 newkey, cmd.tcpm_keylen);
990}
991
49a72dfb
AL
992static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
993 __be32 daddr, __be32 saddr, int nbytes)
cfb6eeb4 994{
cfb6eeb4 995 struct tcp4_pseudohdr *bp;
49a72dfb 996 struct scatterlist sg;
cfb6eeb4
YH
997
998 bp = &hp->md5_blk.ip4;
cfb6eeb4
YH
999
1000 /*
49a72dfb 1001 * 1. the TCP pseudo-header (in the order: source IP address,
cfb6eeb4
YH
1002 * destination IP address, zero-padded protocol number, and
1003 * segment length)
1004 */
1005 bp->saddr = saddr;
1006 bp->daddr = daddr;
1007 bp->pad = 0;
076fb722 1008 bp->protocol = IPPROTO_TCP;
49a72dfb 1009 bp->len = cpu_to_be16(nbytes);
c7da57a1 1010
49a72dfb
AL
1011 sg_init_one(&sg, bp, sizeof(*bp));
1012 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1013}
1014
1015static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
1016 __be32 daddr, __be32 saddr, struct tcphdr *th)
1017{
1018 struct tcp_md5sig_pool *hp;
1019 struct hash_desc *desc;
1020
1021 hp = tcp_get_md5sig_pool();
1022 if (!hp)
1023 goto clear_hash_noput;
1024 desc = &hp->md5_desc;
1025
1026 if (crypto_hash_init(desc))
1027 goto clear_hash;
1028 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1029 goto clear_hash;
1030 if (tcp_md5_hash_header(hp, th))
1031 goto clear_hash;
1032 if (tcp_md5_hash_key(hp, key))
1033 goto clear_hash;
1034 if (crypto_hash_final(desc, md5_hash))
cfb6eeb4
YH
1035 goto clear_hash;
1036
cfb6eeb4 1037 tcp_put_md5sig_pool();
cfb6eeb4 1038 return 0;
49a72dfb 1039
cfb6eeb4
YH
1040clear_hash:
1041 tcp_put_md5sig_pool();
1042clear_hash_noput:
1043 memset(md5_hash, 0, 16);
49a72dfb 1044 return 1;
cfb6eeb4
YH
1045}
1046
49a72dfb
AL
1047int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1048 struct sock *sk, struct request_sock *req,
1049 struct sk_buff *skb)
cfb6eeb4 1050{
49a72dfb
AL
1051 struct tcp_md5sig_pool *hp;
1052 struct hash_desc *desc;
1053 struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
1054 __be32 saddr, daddr;
1055
1056 if (sk) {
1057 saddr = inet_sk(sk)->saddr;
1058 daddr = inet_sk(sk)->daddr;
49a72dfb
AL
1059 } else if (req) {
1060 saddr = inet_rsk(req)->loc_addr;
1061 daddr = inet_rsk(req)->rmt_addr;
cfb6eeb4 1062 } else {
49a72dfb
AL
1063 const struct iphdr *iph = ip_hdr(skb);
1064 saddr = iph->saddr;
1065 daddr = iph->daddr;
cfb6eeb4 1066 }
49a72dfb
AL
1067
1068 hp = tcp_get_md5sig_pool();
1069 if (!hp)
1070 goto clear_hash_noput;
1071 desc = &hp->md5_desc;
1072
1073 if (crypto_hash_init(desc))
1074 goto clear_hash;
1075
1076 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1077 goto clear_hash;
1078 if (tcp_md5_hash_header(hp, th))
1079 goto clear_hash;
1080 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1081 goto clear_hash;
1082 if (tcp_md5_hash_key(hp, key))
1083 goto clear_hash;
1084 if (crypto_hash_final(desc, md5_hash))
1085 goto clear_hash;
1086
1087 tcp_put_md5sig_pool();
1088 return 0;
1089
1090clear_hash:
1091 tcp_put_md5sig_pool();
1092clear_hash_noput:
1093 memset(md5_hash, 0, 16);
1094 return 1;
cfb6eeb4
YH
1095}
1096
49a72dfb 1097EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
cfb6eeb4 1098
7174259e 1099static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
cfb6eeb4
YH
1100{
1101 /*
1102 * This gets called for each TCP segment that arrives
1103 * so we want to be efficient.
1104 * We have 3 drop cases:
1105 * o No MD5 hash and one expected.
1106 * o MD5 hash and we're not expecting one.
1107 * o MD5 hash and its wrong.
1108 */
1109 __u8 *hash_location = NULL;
1110 struct tcp_md5sig_key *hash_expected;
eddc9ec5 1111 const struct iphdr *iph = ip_hdr(skb);
aa8223c7 1112 struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4 1113 int genhash;
cfb6eeb4
YH
1114 unsigned char newhash[16];
1115
1116 hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
7d5d5525 1117 hash_location = tcp_parse_md5sig_option(th);
cfb6eeb4 1118
cfb6eeb4
YH
1119 /* We've parsed the options - do we have a hash? */
1120 if (!hash_expected && !hash_location)
1121 return 0;
1122
1123 if (hash_expected && !hash_location) {
785957d3 1124 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
cfb6eeb4
YH
1125 return 1;
1126 }
1127
1128 if (!hash_expected && hash_location) {
785957d3 1129 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
cfb6eeb4
YH
1130 return 1;
1131 }
1132
1133 /* Okay, so this is hash_expected and hash_location -
1134 * so we need to calculate the checksum.
1135 */
49a72dfb
AL
1136 genhash = tcp_v4_md5_hash_skb(newhash,
1137 hash_expected,
1138 NULL, NULL, skb);
cfb6eeb4
YH
1139
1140 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1141 if (net_ratelimit()) {
1142 printk(KERN_INFO "MD5 Hash failed for "
1143 "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)%s\n",
7174259e
ACM
1144 NIPQUAD(iph->saddr), ntohs(th->source),
1145 NIPQUAD(iph->daddr), ntohs(th->dest),
cfb6eeb4 1146 genhash ? " tcp_v4_calc_md5_hash failed" : "");
cfb6eeb4
YH
1147 }
1148 return 1;
1149 }
1150 return 0;
1151}
1152
1153#endif
1154
72a3effa 1155struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1da177e4 1156 .family = PF_INET,
2e6599cb 1157 .obj_size = sizeof(struct tcp_request_sock),
1da177e4 1158 .rtx_syn_ack = tcp_v4_send_synack,
60236fdd
ACM
1159 .send_ack = tcp_v4_reqsk_send_ack,
1160 .destructor = tcp_v4_reqsk_destructor,
1da177e4
LT
1161 .send_reset = tcp_v4_send_reset,
1162};
1163
cfb6eeb4 1164#ifdef CONFIG_TCP_MD5SIG
b6332e6c 1165static struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
cfb6eeb4 1166 .md5_lookup = tcp_v4_reqsk_md5_lookup,
cfb6eeb4 1167};
b6332e6c 1168#endif
cfb6eeb4 1169
6d6ee43e
ACM
1170static struct timewait_sock_ops tcp_timewait_sock_ops = {
1171 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1172 .twsk_unique = tcp_twsk_unique,
cfb6eeb4 1173 .twsk_destructor= tcp_twsk_destructor,
6d6ee43e
ACM
1174};
1175
1da177e4
LT
1176int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1177{
2e6599cb 1178 struct inet_request_sock *ireq;
1da177e4 1179 struct tcp_options_received tmp_opt;
60236fdd 1180 struct request_sock *req;
eddc9ec5
ACM
1181 __be32 saddr = ip_hdr(skb)->saddr;
1182 __be32 daddr = ip_hdr(skb)->daddr;
1da177e4
LT
1183 __u32 isn = TCP_SKB_CB(skb)->when;
1184 struct dst_entry *dst = NULL;
1185#ifdef CONFIG_SYN_COOKIES
1186 int want_cookie = 0;
1187#else
1188#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1189#endif
1190
1191 /* Never answer to SYNs send to broadcast or multicast */
ee6b9673 1192 if (skb->rtable->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1da177e4
LT
1193 goto drop;
1194
1195 /* TW buckets are converted to open requests without
1196 * limitations, they conserve resources and peer is
1197 * evidently real one.
1198 */
463c84b9 1199 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1da177e4
LT
1200#ifdef CONFIG_SYN_COOKIES
1201 if (sysctl_tcp_syncookies) {
1202 want_cookie = 1;
1203 } else
1204#endif
1205 goto drop;
1206 }
1207
1208 /* Accept backlog is full. If we have already queued enough
1209 * of warm entries in syn queue, drop request. It is better than
1210 * clogging syn queue with openreqs with exponentially increasing
1211 * timeout.
1212 */
463c84b9 1213 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1da177e4
LT
1214 goto drop;
1215
ce4a7d0d 1216 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1da177e4
LT
1217 if (!req)
1218 goto drop;
1219
cfb6eeb4
YH
1220#ifdef CONFIG_TCP_MD5SIG
1221 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1222#endif
1223
1da177e4
LT
1224 tcp_clear_options(&tmp_opt);
1225 tmp_opt.mss_clamp = 536;
1226 tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss;
1227
1228 tcp_parse_options(skb, &tmp_opt, 0);
1229
4dfc2817 1230 if (want_cookie && !tmp_opt.saw_tstamp)
1da177e4 1231 tcp_clear_options(&tmp_opt);
1da177e4
LT
1232
1233 if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
1234 /* Some OSes (unknown ones, but I see them on web server, which
1235 * contains information interesting only for windows'
1236 * users) do not send their stamp in SYN. It is easy case.
1237 * We simply do not advertise TS support.
1238 */
1239 tmp_opt.saw_tstamp = 0;
1240 tmp_opt.tstamp_ok = 0;
1241 }
1242 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1243
1244 tcp_openreq_init(req, &tmp_opt, skb);
1245
4237c75c
VY
1246 if (security_inet_conn_request(sk, skb, req))
1247 goto drop_and_free;
1248
2e6599cb
ACM
1249 ireq = inet_rsk(req);
1250 ireq->loc_addr = daddr;
1251 ireq->rmt_addr = saddr;
88ef4a5a 1252 ireq->no_srccheck = inet_sk(sk)->transparent;
2e6599cb 1253 ireq->opt = tcp_v4_save_options(sk, skb);
1da177e4 1254 if (!want_cookie)
aa8223c7 1255 TCP_ECN_create_request(req, tcp_hdr(skb));
1da177e4
LT
1256
1257 if (want_cookie) {
1258#ifdef CONFIG_SYN_COOKIES
1259 syn_flood_warning(skb);
4dfc2817 1260 req->cookie_ts = tmp_opt.tstamp_ok;
1da177e4
LT
1261#endif
1262 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1263 } else if (!isn) {
1264 struct inet_peer *peer = NULL;
1265
1266 /* VJ's idea. We save last timestamp seen
1267 * from the destination in peer table, when entering
1268 * state TIME-WAIT, and check against it before
1269 * accepting new connection request.
1270 *
1271 * If "isn" is not zero, this request hit alive
1272 * timewait bucket, so that all the necessary checks
1273 * are made in the function processing timewait state.
1274 */
1275 if (tmp_opt.saw_tstamp &&
295ff7ed 1276 tcp_death_row.sysctl_tw_recycle &&
463c84b9 1277 (dst = inet_csk_route_req(sk, req)) != NULL &&
1da177e4
LT
1278 (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1279 peer->v4daddr == saddr) {
9d729f72 1280 if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
1da177e4
LT
1281 (s32)(peer->tcp_ts - req->ts_recent) >
1282 TCP_PAWS_WINDOW) {
de0744af 1283 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
7cd04fa7 1284 goto drop_and_release;
1da177e4
LT
1285 }
1286 }
1287 /* Kill the following clause, if you dislike this way. */
1288 else if (!sysctl_tcp_syncookies &&
463c84b9 1289 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1da177e4
LT
1290 (sysctl_max_syn_backlog >> 2)) &&
1291 (!peer || !peer->tcp_ts_stamp) &&
1292 (!dst || !dst_metric(dst, RTAX_RTT))) {
1293 /* Without syncookies last quarter of
1294 * backlog is filled with destinations,
1295 * proven to be alive.
1296 * It means that we continue to communicate
1297 * to destinations, already remembered
1298 * to the moment of synflood.
1299 */
64ce2073 1300 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open "
a7d632b6 1301 "request from " NIPQUAD_FMT "/%u\n",
64ce2073 1302 NIPQUAD(saddr),
aa8223c7 1303 ntohs(tcp_hdr(skb)->source));
7cd04fa7 1304 goto drop_and_release;
1da177e4
LT
1305 }
1306
a94f723d 1307 isn = tcp_v4_init_sequence(skb);
1da177e4 1308 }
2e6599cb 1309 tcp_rsk(req)->snt_isn = isn;
1da177e4 1310
7cd04fa7 1311 if (__tcp_v4_send_synack(sk, req, dst) || want_cookie)
1da177e4
LT
1312 goto drop_and_free;
1313
7cd04fa7 1314 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1da177e4
LT
1315 return 0;
1316
7cd04fa7
DL
1317drop_and_release:
1318 dst_release(dst);
1da177e4 1319drop_and_free:
60236fdd 1320 reqsk_free(req);
1da177e4 1321drop:
1da177e4
LT
1322 return 0;
1323}
1324
1325
1326/*
1327 * The three way handshake has completed - we got a valid synack -
1328 * now create the new socket.
1329 */
1330struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
60236fdd 1331 struct request_sock *req,
1da177e4
LT
1332 struct dst_entry *dst)
1333{
2e6599cb 1334 struct inet_request_sock *ireq;
1da177e4
LT
1335 struct inet_sock *newinet;
1336 struct tcp_sock *newtp;
1337 struct sock *newsk;
cfb6eeb4
YH
1338#ifdef CONFIG_TCP_MD5SIG
1339 struct tcp_md5sig_key *key;
1340#endif
1da177e4
LT
1341
1342 if (sk_acceptq_is_full(sk))
1343 goto exit_overflow;
1344
463c84b9 1345 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
1da177e4
LT
1346 goto exit;
1347
1348 newsk = tcp_create_openreq_child(sk, req, skb);
1349 if (!newsk)
1350 goto exit;
1351
bcd76111 1352 newsk->sk_gso_type = SKB_GSO_TCPV4;
6cbb0df7 1353 sk_setup_caps(newsk, dst);
1da177e4
LT
1354
1355 newtp = tcp_sk(newsk);
1356 newinet = inet_sk(newsk);
2e6599cb
ACM
1357 ireq = inet_rsk(req);
1358 newinet->daddr = ireq->rmt_addr;
1359 newinet->rcv_saddr = ireq->loc_addr;
1360 newinet->saddr = ireq->loc_addr;
1361 newinet->opt = ireq->opt;
1362 ireq->opt = NULL;
463c84b9 1363 newinet->mc_index = inet_iif(skb);
eddc9ec5 1364 newinet->mc_ttl = ip_hdr(skb)->ttl;
d83d8461 1365 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1da177e4 1366 if (newinet->opt)
d83d8461 1367 inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
1da177e4
LT
1368 newinet->id = newtp->write_seq ^ jiffies;
1369
5d424d5a 1370 tcp_mtup_init(newsk);
1da177e4
LT
1371 tcp_sync_mss(newsk, dst_mtu(dst));
1372 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
f5fff5dc
TQ
1373 if (tcp_sk(sk)->rx_opt.user_mss &&
1374 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1375 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1376
1da177e4
LT
1377 tcp_initialize_rcv_mss(newsk);
1378
cfb6eeb4
YH
1379#ifdef CONFIG_TCP_MD5SIG
1380 /* Copy over the MD5 key from the original socket */
1381 if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) {
1382 /*
1383 * We're using one, so create a matching key
1384 * on the newsk structure. If we fail to get
1385 * memory, then we end up not copying the key
1386 * across. Shucks.
1387 */
f6685938
ACM
1388 char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1389 if (newkey != NULL)
cfb6eeb4
YH
1390 tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr,
1391 newkey, key->keylen);
49a72dfb 1392 newsk->sk_route_caps &= ~NETIF_F_GSO_MASK;
cfb6eeb4
YH
1393 }
1394#endif
1395
ab1e0a13
ACM
1396 __inet_hash_nolisten(newsk);
1397 __inet_inherit_port(sk, newsk);
1da177e4
LT
1398
1399 return newsk;
1400
1401exit_overflow:
de0744af 1402 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1da177e4 1403exit:
de0744af 1404 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4
LT
1405 dst_release(dst);
1406 return NULL;
1407}
1408
1409static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1410{
aa8223c7 1411 struct tcphdr *th = tcp_hdr(skb);
eddc9ec5 1412 const struct iphdr *iph = ip_hdr(skb);
1da177e4 1413 struct sock *nsk;
60236fdd 1414 struct request_sock **prev;
1da177e4 1415 /* Find possible connection requests. */
463c84b9
ACM
1416 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1417 iph->saddr, iph->daddr);
1da177e4
LT
1418 if (req)
1419 return tcp_check_req(sk, skb, req, prev);
1420
3b1e0a65 1421 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
c67499c0 1422 th->source, iph->daddr, th->dest, inet_iif(skb));
1da177e4
LT
1423
1424 if (nsk) {
1425 if (nsk->sk_state != TCP_TIME_WAIT) {
1426 bh_lock_sock(nsk);
1427 return nsk;
1428 }
9469c7b4 1429 inet_twsk_put(inet_twsk(nsk));
1da177e4
LT
1430 return NULL;
1431 }
1432
1433#ifdef CONFIG_SYN_COOKIES
1434 if (!th->rst && !th->syn && th->ack)
1435 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1436#endif
1437 return sk;
1438}
1439
b51655b9 1440static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1da177e4 1441{
eddc9ec5
ACM
1442 const struct iphdr *iph = ip_hdr(skb);
1443
84fa7933 1444 if (skb->ip_summed == CHECKSUM_COMPLETE) {
eddc9ec5
ACM
1445 if (!tcp_v4_check(skb->len, iph->saddr,
1446 iph->daddr, skb->csum)) {
fb286bb2 1447 skb->ip_summed = CHECKSUM_UNNECESSARY;
1da177e4 1448 return 0;
fb286bb2 1449 }
1da177e4 1450 }
fb286bb2 1451
eddc9ec5 1452 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
fb286bb2
HX
1453 skb->len, IPPROTO_TCP, 0);
1454
1da177e4 1455 if (skb->len <= 76) {
fb286bb2 1456 return __skb_checksum_complete(skb);
1da177e4
LT
1457 }
1458 return 0;
1459}
1460
1461
1462/* The socket must have it's spinlock held when we get
1463 * here.
1464 *
1465 * We have a potential double-lock case here, so even when
1466 * doing backlog processing we use the BH locking scheme.
1467 * This is because we cannot sleep with the original spinlock
1468 * held.
1469 */
1470int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1471{
cfb6eeb4
YH
1472 struct sock *rsk;
1473#ifdef CONFIG_TCP_MD5SIG
1474 /*
1475 * We really want to reject the packet as early as possible
1476 * if:
1477 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1478 * o There is an MD5 option and we're not expecting one
1479 */
7174259e 1480 if (tcp_v4_inbound_md5_hash(sk, skb))
cfb6eeb4
YH
1481 goto discard;
1482#endif
1483
1da177e4
LT
1484 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1485 TCP_CHECK_TIMER(sk);
aa8223c7 1486 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1487 rsk = sk;
1da177e4 1488 goto reset;
cfb6eeb4 1489 }
1da177e4
LT
1490 TCP_CHECK_TIMER(sk);
1491 return 0;
1492 }
1493
ab6a5bb6 1494 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1da177e4
LT
1495 goto csum_err;
1496
1497 if (sk->sk_state == TCP_LISTEN) {
1498 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1499 if (!nsk)
1500 goto discard;
1501
1502 if (nsk != sk) {
cfb6eeb4
YH
1503 if (tcp_child_process(sk, nsk, skb)) {
1504 rsk = nsk;
1da177e4 1505 goto reset;
cfb6eeb4 1506 }
1da177e4
LT
1507 return 0;
1508 }
1509 }
1510
1511 TCP_CHECK_TIMER(sk);
aa8223c7 1512 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1513 rsk = sk;
1da177e4 1514 goto reset;
cfb6eeb4 1515 }
1da177e4
LT
1516 TCP_CHECK_TIMER(sk);
1517 return 0;
1518
1519reset:
cfb6eeb4 1520 tcp_v4_send_reset(rsk, skb);
1da177e4
LT
1521discard:
1522 kfree_skb(skb);
1523 /* Be careful here. If this function gets more complicated and
1524 * gcc suffers from register pressure on the x86, sk (in %ebx)
1525 * might be destroyed here. This current version compiles correctly,
1526 * but you have been warned.
1527 */
1528 return 0;
1529
1530csum_err:
63231bdd 1531 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1da177e4
LT
1532 goto discard;
1533}
1534
1535/*
1536 * From tcp_input.c
1537 */
1538
1539int tcp_v4_rcv(struct sk_buff *skb)
1540{
eddc9ec5 1541 const struct iphdr *iph;
1da177e4
LT
1542 struct tcphdr *th;
1543 struct sock *sk;
1544 int ret;
a86b1e30 1545 struct net *net = dev_net(skb->dev);
1da177e4
LT
1546
1547 if (skb->pkt_type != PACKET_HOST)
1548 goto discard_it;
1549
1550 /* Count it even if it's bad */
63231bdd 1551 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1da177e4
LT
1552
1553 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1554 goto discard_it;
1555
aa8223c7 1556 th = tcp_hdr(skb);
1da177e4
LT
1557
1558 if (th->doff < sizeof(struct tcphdr) / 4)
1559 goto bad_packet;
1560 if (!pskb_may_pull(skb, th->doff * 4))
1561 goto discard_it;
1562
1563 /* An explanation is required here, I think.
1564 * Packet length and doff are validated by header prediction,
caa20d9a 1565 * provided case of th->doff==0 is eliminated.
1da177e4 1566 * So, we defer the checks. */
60476372 1567 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1da177e4
LT
1568 goto bad_packet;
1569
aa8223c7 1570 th = tcp_hdr(skb);
eddc9ec5 1571 iph = ip_hdr(skb);
1da177e4
LT
1572 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1573 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1574 skb->len - th->doff * 4);
1575 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1576 TCP_SKB_CB(skb)->when = 0;
eddc9ec5 1577 TCP_SKB_CB(skb)->flags = iph->tos;
1da177e4
LT
1578 TCP_SKB_CB(skb)->sacked = 0;
1579
9a1f27c4 1580 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1da177e4
LT
1581 if (!sk)
1582 goto no_tcp_socket;
1583
1584process:
1585 if (sk->sk_state == TCP_TIME_WAIT)
1586 goto do_time_wait;
1587
1588 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1589 goto discard_and_relse;
b59c2701 1590 nf_reset(skb);
1da177e4 1591
fda9ef5d 1592 if (sk_filter(sk, skb))
1da177e4
LT
1593 goto discard_and_relse;
1594
1595 skb->dev = NULL;
1596
c6366184 1597 bh_lock_sock_nested(sk);
1da177e4
LT
1598 ret = 0;
1599 if (!sock_owned_by_user(sk)) {
1a2449a8
CL
1600#ifdef CONFIG_NET_DMA
1601 struct tcp_sock *tp = tcp_sk(sk);
1602 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1603 tp->ucopy.dma_chan = get_softnet_dma();
1604 if (tp->ucopy.dma_chan)
1da177e4 1605 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8
CL
1606 else
1607#endif
1608 {
1609 if (!tcp_prequeue(sk, skb))
1610 ret = tcp_v4_do_rcv(sk, skb);
1611 }
1da177e4
LT
1612 } else
1613 sk_add_backlog(sk, skb);
1614 bh_unlock_sock(sk);
1615
1616 sock_put(sk);
1617
1618 return ret;
1619
1620no_tcp_socket:
1621 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1622 goto discard_it;
1623
1624 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1625bad_packet:
63231bdd 1626 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1da177e4 1627 } else {
cfb6eeb4 1628 tcp_v4_send_reset(NULL, skb);
1da177e4
LT
1629 }
1630
1631discard_it:
1632 /* Discard frame. */
1633 kfree_skb(skb);
e905a9ed 1634 return 0;
1da177e4
LT
1635
1636discard_and_relse:
1637 sock_put(sk);
1638 goto discard_it;
1639
1640do_time_wait:
1641 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
9469c7b4 1642 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1643 goto discard_it;
1644 }
1645
1646 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
63231bdd 1647 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
9469c7b4 1648 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1649 goto discard_it;
1650 }
9469c7b4 1651 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1da177e4 1652 case TCP_TW_SYN: {
c346dca1 1653 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
c67499c0 1654 &tcp_hashinfo,
eddc9ec5 1655 iph->daddr, th->dest,
463c84b9 1656 inet_iif(skb));
1da177e4 1657 if (sk2) {
9469c7b4
YH
1658 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1659 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1660 sk = sk2;
1661 goto process;
1662 }
1663 /* Fall through to ACK */
1664 }
1665 case TCP_TW_ACK:
1666 tcp_v4_timewait_ack(sk, skb);
1667 break;
1668 case TCP_TW_RST:
1669 goto no_tcp_socket;
1670 case TCP_TW_SUCCESS:;
1671 }
1672 goto discard_it;
1673}
1674
1da177e4
LT
1675/* VJ's idea. Save last timestamp seen from this destination
1676 * and hold it at least for normal timewait interval to use for duplicate
1677 * segment detection in subsequent connections, before they enter synchronized
1678 * state.
1679 */
1680
1681int tcp_v4_remember_stamp(struct sock *sk)
1682{
1683 struct inet_sock *inet = inet_sk(sk);
1684 struct tcp_sock *tp = tcp_sk(sk);
1685 struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1686 struct inet_peer *peer = NULL;
1687 int release_it = 0;
1688
1689 if (!rt || rt->rt_dst != inet->daddr) {
1690 peer = inet_getpeer(inet->daddr, 1);
1691 release_it = 1;
1692 } else {
1693 if (!rt->peer)
1694 rt_bind_peer(rt, 1);
1695 peer = rt->peer;
1696 }
1697
1698 if (peer) {
1699 if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
9d729f72 1700 (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
1da177e4
LT
1701 peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
1702 peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
1703 peer->tcp_ts = tp->rx_opt.ts_recent;
1704 }
1705 if (release_it)
1706 inet_putpeer(peer);
1707 return 1;
1708 }
1709
1710 return 0;
1711}
1712
8feaf0c0 1713int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1da177e4 1714{
8feaf0c0 1715 struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
1da177e4
LT
1716
1717 if (peer) {
8feaf0c0
ACM
1718 const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
1719
1720 if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
9d729f72 1721 (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
8feaf0c0
ACM
1722 peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
1723 peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
1724 peer->tcp_ts = tcptw->tw_ts_recent;
1da177e4
LT
1725 }
1726 inet_putpeer(peer);
1727 return 1;
1728 }
1729
1730 return 0;
1731}
1732
8292a17a 1733struct inet_connection_sock_af_ops ipv4_specific = {
543d9cfe
ACM
1734 .queue_xmit = ip_queue_xmit,
1735 .send_check = tcp_v4_send_check,
1736 .rebuild_header = inet_sk_rebuild_header,
1737 .conn_request = tcp_v4_conn_request,
1738 .syn_recv_sock = tcp_v4_syn_recv_sock,
1739 .remember_stamp = tcp_v4_remember_stamp,
1740 .net_header_len = sizeof(struct iphdr),
1741 .setsockopt = ip_setsockopt,
1742 .getsockopt = ip_getsockopt,
1743 .addr2sockaddr = inet_csk_addr2sockaddr,
1744 .sockaddr_len = sizeof(struct sockaddr_in),
ab1e0a13 1745 .bind_conflict = inet_csk_bind_conflict,
3fdadf7d 1746#ifdef CONFIG_COMPAT
543d9cfe
ACM
1747 .compat_setsockopt = compat_ip_setsockopt,
1748 .compat_getsockopt = compat_ip_getsockopt,
3fdadf7d 1749#endif
1da177e4
LT
1750};
1751
cfb6eeb4 1752#ifdef CONFIG_TCP_MD5SIG
b6332e6c 1753static struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
cfb6eeb4 1754 .md5_lookup = tcp_v4_md5_lookup,
49a72dfb 1755 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4
YH
1756 .md5_add = tcp_v4_md5_add_func,
1757 .md5_parse = tcp_v4_parse_md5_keys,
cfb6eeb4 1758};
b6332e6c 1759#endif
cfb6eeb4 1760
1da177e4
LT
1761/* NOTE: A lot of things set to zero explicitly by call to
1762 * sk_alloc() so need not be done here.
1763 */
1764static int tcp_v4_init_sock(struct sock *sk)
1765{
6687e988 1766 struct inet_connection_sock *icsk = inet_csk(sk);
1da177e4
LT
1767 struct tcp_sock *tp = tcp_sk(sk);
1768
1769 skb_queue_head_init(&tp->out_of_order_queue);
1770 tcp_init_xmit_timers(sk);
1771 tcp_prequeue_init(tp);
1772
6687e988 1773 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1da177e4
LT
1774 tp->mdev = TCP_TIMEOUT_INIT;
1775
1776 /* So many TCP implementations out there (incorrectly) count the
1777 * initial SYN frame in their delayed-ACK and congestion control
1778 * algorithms that we must have the following bandaid to talk
1779 * efficiently to them. -DaveM
1780 */
1781 tp->snd_cwnd = 2;
1782
1783 /* See draft-stevens-tcpca-spec-01 for discussion of the
1784 * initialization of these values.
1785 */
1786 tp->snd_ssthresh = 0x7fffffff; /* Infinity */
1787 tp->snd_cwnd_clamp = ~0;
c1b4a7e6 1788 tp->mss_cache = 536;
1da177e4
LT
1789
1790 tp->reordering = sysctl_tcp_reordering;
6687e988 1791 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1da177e4
LT
1792
1793 sk->sk_state = TCP_CLOSE;
1794
1795 sk->sk_write_space = sk_stream_write_space;
1796 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1797
8292a17a 1798 icsk->icsk_af_ops = &ipv4_specific;
d83d8461 1799 icsk->icsk_sync_mss = tcp_sync_mss;
cfb6eeb4
YH
1800#ifdef CONFIG_TCP_MD5SIG
1801 tp->af_specific = &tcp_sock_ipv4_specific;
1802#endif
1da177e4
LT
1803
1804 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1805 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1806
1807 atomic_inc(&tcp_sockets_allocated);
1808
1809 return 0;
1810}
1811
7d06b2e0 1812void tcp_v4_destroy_sock(struct sock *sk)
1da177e4
LT
1813{
1814 struct tcp_sock *tp = tcp_sk(sk);
1815
1816 tcp_clear_xmit_timers(sk);
1817
6687e988 1818 tcp_cleanup_congestion_control(sk);
317a76f9 1819
1da177e4 1820 /* Cleanup up the write buffer. */
fe067e8a 1821 tcp_write_queue_purge(sk);
1da177e4
LT
1822
1823 /* Cleans up our, hopefully empty, out_of_order_queue. */
e905a9ed 1824 __skb_queue_purge(&tp->out_of_order_queue);
1da177e4 1825
cfb6eeb4
YH
1826#ifdef CONFIG_TCP_MD5SIG
1827 /* Clean up the MD5 key list, if any */
1828 if (tp->md5sig_info) {
1829 tcp_v4_clear_md5_list(sk);
1830 kfree(tp->md5sig_info);
1831 tp->md5sig_info = NULL;
1832 }
1833#endif
1834
1a2449a8
CL
1835#ifdef CONFIG_NET_DMA
1836 /* Cleans up our sk_async_wait_queue */
e905a9ed 1837 __skb_queue_purge(&sk->sk_async_wait_queue);
1a2449a8
CL
1838#endif
1839
1da177e4
LT
1840 /* Clean prequeue, it must be empty really */
1841 __skb_queue_purge(&tp->ucopy.prequeue);
1842
1843 /* Clean up a referenced TCP bind bucket. */
463c84b9 1844 if (inet_csk(sk)->icsk_bind_hash)
ab1e0a13 1845 inet_put_port(sk);
1da177e4
LT
1846
1847 /*
1848 * If sendmsg cached page exists, toss it.
1849 */
1850 if (sk->sk_sndmsg_page) {
1851 __free_page(sk->sk_sndmsg_page);
1852 sk->sk_sndmsg_page = NULL;
1853 }
1854
1855 atomic_dec(&tcp_sockets_allocated);
1da177e4
LT
1856}
1857
1858EXPORT_SYMBOL(tcp_v4_destroy_sock);
1859
1860#ifdef CONFIG_PROC_FS
1861/* Proc filesystem TCP sock list dumping. */
1862
8feaf0c0 1863static inline struct inet_timewait_sock *tw_head(struct hlist_head *head)
1da177e4
LT
1864{
1865 return hlist_empty(head) ? NULL :
8feaf0c0 1866 list_entry(head->first, struct inet_timewait_sock, tw_node);
1da177e4
LT
1867}
1868
8feaf0c0 1869static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1da177e4
LT
1870{
1871 return tw->tw_node.next ?
1872 hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1873}
1874
1875static void *listening_get_next(struct seq_file *seq, void *cur)
1876{
463c84b9 1877 struct inet_connection_sock *icsk;
1da177e4
LT
1878 struct hlist_node *node;
1879 struct sock *sk = cur;
1880 struct tcp_iter_state* st = seq->private;
a4146b1b 1881 struct net *net = seq_file_net(seq);
1da177e4
LT
1882
1883 if (!sk) {
1884 st->bucket = 0;
6e04e021 1885 sk = sk_head(&tcp_hashinfo.listening_hash[0]);
1da177e4
LT
1886 goto get_sk;
1887 }
1888
1889 ++st->num;
1890
1891 if (st->state == TCP_SEQ_STATE_OPENREQ) {
60236fdd 1892 struct request_sock *req = cur;
1da177e4 1893
72a3effa 1894 icsk = inet_csk(st->syn_wait_sk);
1da177e4
LT
1895 req = req->dl_next;
1896 while (1) {
1897 while (req) {
bdccc4ca 1898 if (req->rsk_ops->family == st->family) {
1da177e4
LT
1899 cur = req;
1900 goto out;
1901 }
1902 req = req->dl_next;
1903 }
72a3effa 1904 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1da177e4
LT
1905 break;
1906get_req:
463c84b9 1907 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1da177e4
LT
1908 }
1909 sk = sk_next(st->syn_wait_sk);
1910 st->state = TCP_SEQ_STATE_LISTENING;
463c84b9 1911 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 1912 } else {
e905a9ed 1913 icsk = inet_csk(sk);
463c84b9
ACM
1914 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1915 if (reqsk_queue_len(&icsk->icsk_accept_queue))
1da177e4 1916 goto start_req;
463c84b9 1917 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4
LT
1918 sk = sk_next(sk);
1919 }
1920get_sk:
1921 sk_for_each_from(sk, node) {
878628fb 1922 if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) {
1da177e4
LT
1923 cur = sk;
1924 goto out;
1925 }
e905a9ed 1926 icsk = inet_csk(sk);
463c84b9
ACM
1927 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1928 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1da177e4
LT
1929start_req:
1930 st->uid = sock_i_uid(sk);
1931 st->syn_wait_sk = sk;
1932 st->state = TCP_SEQ_STATE_OPENREQ;
1933 st->sbucket = 0;
1934 goto get_req;
1935 }
463c84b9 1936 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 1937 }
0f7ff927 1938 if (++st->bucket < INET_LHTABLE_SIZE) {
6e04e021 1939 sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]);
1da177e4
LT
1940 goto get_sk;
1941 }
1942 cur = NULL;
1943out:
1944 return cur;
1945}
1946
1947static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
1948{
1949 void *rc = listening_get_next(seq, NULL);
1950
1951 while (rc && *pos) {
1952 rc = listening_get_next(seq, rc);
1953 --*pos;
1954 }
1955 return rc;
1956}
1957
6eac5604
AK
1958static inline int empty_bucket(struct tcp_iter_state *st)
1959{
1960 return hlist_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
1961 hlist_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
1962}
1963
1da177e4
LT
1964static void *established_get_first(struct seq_file *seq)
1965{
1966 struct tcp_iter_state* st = seq->private;
a4146b1b 1967 struct net *net = seq_file_net(seq);
1da177e4
LT
1968 void *rc = NULL;
1969
6e04e021 1970 for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
1da177e4
LT
1971 struct sock *sk;
1972 struct hlist_node *node;
8feaf0c0 1973 struct inet_timewait_sock *tw;
230140cf 1974 rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1da177e4 1975
6eac5604
AK
1976 /* Lockless fast path for the common case of empty buckets */
1977 if (empty_bucket(st))
1978 continue;
1979
230140cf 1980 read_lock_bh(lock);
6e04e021 1981 sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
f40c8174 1982 if (sk->sk_family != st->family ||
878628fb 1983 !net_eq(sock_net(sk), net)) {
1da177e4
LT
1984 continue;
1985 }
1986 rc = sk;
1987 goto out;
1988 }
1989 st->state = TCP_SEQ_STATE_TIME_WAIT;
8feaf0c0 1990 inet_twsk_for_each(tw, node,
dbca9b27 1991 &tcp_hashinfo.ehash[st->bucket].twchain) {
28518fc1 1992 if (tw->tw_family != st->family ||
878628fb 1993 !net_eq(twsk_net(tw), net)) {
1da177e4
LT
1994 continue;
1995 }
1996 rc = tw;
1997 goto out;
1998 }
230140cf 1999 read_unlock_bh(lock);
1da177e4
LT
2000 st->state = TCP_SEQ_STATE_ESTABLISHED;
2001 }
2002out:
2003 return rc;
2004}
2005
2006static void *established_get_next(struct seq_file *seq, void *cur)
2007{
2008 struct sock *sk = cur;
8feaf0c0 2009 struct inet_timewait_sock *tw;
1da177e4
LT
2010 struct hlist_node *node;
2011 struct tcp_iter_state* st = seq->private;
a4146b1b 2012 struct net *net = seq_file_net(seq);
1da177e4
LT
2013
2014 ++st->num;
2015
2016 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2017 tw = cur;
2018 tw = tw_next(tw);
2019get_tw:
878628fb 2020 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
1da177e4
LT
2021 tw = tw_next(tw);
2022 }
2023 if (tw) {
2024 cur = tw;
2025 goto out;
2026 }
230140cf 2027 read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2028 st->state = TCP_SEQ_STATE_ESTABLISHED;
2029
6eac5604
AK
2030 /* Look for next non empty bucket */
2031 while (++st->bucket < tcp_hashinfo.ehash_size &&
2032 empty_bucket(st))
2033 ;
2034 if (st->bucket >= tcp_hashinfo.ehash_size)
2035 return NULL;
2036
2037 read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2038 sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
1da177e4
LT
2039 } else
2040 sk = sk_next(sk);
2041
2042 sk_for_each_from(sk, node) {
878628fb 2043 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
1da177e4
LT
2044 goto found;
2045 }
2046
2047 st->state = TCP_SEQ_STATE_TIME_WAIT;
dbca9b27 2048 tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
1da177e4
LT
2049 goto get_tw;
2050found:
2051 cur = sk;
2052out:
2053 return cur;
2054}
2055
2056static void *established_get_idx(struct seq_file *seq, loff_t pos)
2057{
2058 void *rc = established_get_first(seq);
2059
2060 while (rc && pos) {
2061 rc = established_get_next(seq, rc);
2062 --pos;
7174259e 2063 }
1da177e4
LT
2064 return rc;
2065}
2066
2067static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2068{
2069 void *rc;
2070 struct tcp_iter_state* st = seq->private;
2071
f3f05f70 2072 inet_listen_lock(&tcp_hashinfo);
1da177e4
LT
2073 st->state = TCP_SEQ_STATE_LISTENING;
2074 rc = listening_get_idx(seq, &pos);
2075
2076 if (!rc) {
f3f05f70 2077 inet_listen_unlock(&tcp_hashinfo);
1da177e4
LT
2078 st->state = TCP_SEQ_STATE_ESTABLISHED;
2079 rc = established_get_idx(seq, pos);
2080 }
2081
2082 return rc;
2083}
2084
2085static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2086{
2087 struct tcp_iter_state* st = seq->private;
2088 st->state = TCP_SEQ_STATE_LISTENING;
2089 st->num = 0;
2090 return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2091}
2092
2093static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2094{
2095 void *rc = NULL;
2096 struct tcp_iter_state* st;
2097
2098 if (v == SEQ_START_TOKEN) {
2099 rc = tcp_get_idx(seq, 0);
2100 goto out;
2101 }
2102 st = seq->private;
2103
2104 switch (st->state) {
2105 case TCP_SEQ_STATE_OPENREQ:
2106 case TCP_SEQ_STATE_LISTENING:
2107 rc = listening_get_next(seq, v);
2108 if (!rc) {
f3f05f70 2109 inet_listen_unlock(&tcp_hashinfo);
1da177e4
LT
2110 st->state = TCP_SEQ_STATE_ESTABLISHED;
2111 rc = established_get_first(seq);
2112 }
2113 break;
2114 case TCP_SEQ_STATE_ESTABLISHED:
2115 case TCP_SEQ_STATE_TIME_WAIT:
2116 rc = established_get_next(seq, v);
2117 break;
2118 }
2119out:
2120 ++*pos;
2121 return rc;
2122}
2123
2124static void tcp_seq_stop(struct seq_file *seq, void *v)
2125{
2126 struct tcp_iter_state* st = seq->private;
2127
2128 switch (st->state) {
2129 case TCP_SEQ_STATE_OPENREQ:
2130 if (v) {
463c84b9
ACM
2131 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2132 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4
LT
2133 }
2134 case TCP_SEQ_STATE_LISTENING:
2135 if (v != SEQ_START_TOKEN)
f3f05f70 2136 inet_listen_unlock(&tcp_hashinfo);
1da177e4
LT
2137 break;
2138 case TCP_SEQ_STATE_TIME_WAIT:
2139 case TCP_SEQ_STATE_ESTABLISHED:
2140 if (v)
230140cf 2141 read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2142 break;
2143 }
2144}
2145
2146static int tcp_seq_open(struct inode *inode, struct file *file)
2147{
2148 struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
1da177e4 2149 struct tcp_iter_state *s;
52d6f3f1 2150 int err;
1da177e4 2151
52d6f3f1
DL
2152 err = seq_open_net(inode, file, &afinfo->seq_ops,
2153 sizeof(struct tcp_iter_state));
2154 if (err < 0)
2155 return err;
f40c8174 2156
52d6f3f1 2157 s = ((struct seq_file *)file->private_data)->private;
1da177e4 2158 s->family = afinfo->family;
f40c8174
DL
2159 return 0;
2160}
2161
6f8b13bc 2162int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4
LT
2163{
2164 int rc = 0;
2165 struct proc_dir_entry *p;
2166
68fcadd1
DL
2167 afinfo->seq_fops.open = tcp_seq_open;
2168 afinfo->seq_fops.read = seq_read;
2169 afinfo->seq_fops.llseek = seq_lseek;
2170 afinfo->seq_fops.release = seq_release_net;
7174259e 2171
9427c4b3
DL
2172 afinfo->seq_ops.start = tcp_seq_start;
2173 afinfo->seq_ops.next = tcp_seq_next;
2174 afinfo->seq_ops.stop = tcp_seq_stop;
2175
84841c3c
DL
2176 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2177 &afinfo->seq_fops, afinfo);
2178 if (!p)
1da177e4
LT
2179 rc = -ENOMEM;
2180 return rc;
2181}
2182
6f8b13bc 2183void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4 2184{
6f8b13bc 2185 proc_net_remove(net, afinfo->name);
1da177e4
LT
2186}
2187
60236fdd 2188static void get_openreq4(struct sock *sk, struct request_sock *req,
5e659e4c 2189 struct seq_file *f, int i, int uid, int *len)
1da177e4 2190{
2e6599cb 2191 const struct inet_request_sock *ireq = inet_rsk(req);
1da177e4
LT
2192 int ttd = req->expires - jiffies;
2193
5e659e4c
PE
2194 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2195 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n",
1da177e4 2196 i,
2e6599cb 2197 ireq->loc_addr,
1da177e4 2198 ntohs(inet_sk(sk)->sport),
2e6599cb
ACM
2199 ireq->rmt_addr,
2200 ntohs(ireq->rmt_port),
1da177e4
LT
2201 TCP_SYN_RECV,
2202 0, 0, /* could print option size, but that is af dependent. */
2203 1, /* timers active (only the expire timer) */
2204 jiffies_to_clock_t(ttd),
2205 req->retrans,
2206 uid,
2207 0, /* non standard timer */
2208 0, /* open_requests have no inode */
2209 atomic_read(&sk->sk_refcnt),
5e659e4c
PE
2210 req,
2211 len);
1da177e4
LT
2212}
2213
5e659e4c 2214static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
1da177e4
LT
2215{
2216 int timer_active;
2217 unsigned long timer_expires;
cf4c6bf8
IJ
2218 struct tcp_sock *tp = tcp_sk(sk);
2219 const struct inet_connection_sock *icsk = inet_csk(sk);
2220 struct inet_sock *inet = inet_sk(sk);
714e85be
AV
2221 __be32 dest = inet->daddr;
2222 __be32 src = inet->rcv_saddr;
1da177e4
LT
2223 __u16 destp = ntohs(inet->dport);
2224 __u16 srcp = ntohs(inet->sport);
2225
463c84b9 2226 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1da177e4 2227 timer_active = 1;
463c84b9
ACM
2228 timer_expires = icsk->icsk_timeout;
2229 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1da177e4 2230 timer_active = 4;
463c84b9 2231 timer_expires = icsk->icsk_timeout;
cf4c6bf8 2232 } else if (timer_pending(&sk->sk_timer)) {
1da177e4 2233 timer_active = 2;
cf4c6bf8 2234 timer_expires = sk->sk_timer.expires;
1da177e4
LT
2235 } else {
2236 timer_active = 0;
2237 timer_expires = jiffies;
2238 }
2239
5e659e4c 2240 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
7be87351 2241 "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n",
cf4c6bf8 2242 i, src, srcp, dest, destp, sk->sk_state,
47da8ee6 2243 tp->write_seq - tp->snd_una,
cf4c6bf8 2244 sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog :
7174259e 2245 (tp->rcv_nxt - tp->copied_seq),
1da177e4
LT
2246 timer_active,
2247 jiffies_to_clock_t(timer_expires - jiffies),
463c84b9 2248 icsk->icsk_retransmits,
cf4c6bf8 2249 sock_i_uid(sk),
6687e988 2250 icsk->icsk_probes_out,
cf4c6bf8
IJ
2251 sock_i_ino(sk),
2252 atomic_read(&sk->sk_refcnt), sk,
7be87351
SH
2253 jiffies_to_clock_t(icsk->icsk_rto),
2254 jiffies_to_clock_t(icsk->icsk_ack.ato),
463c84b9 2255 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1da177e4 2256 tp->snd_cwnd,
5e659e4c
PE
2257 tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh,
2258 len);
1da177e4
LT
2259}
2260
7174259e 2261static void get_timewait4_sock(struct inet_timewait_sock *tw,
5e659e4c 2262 struct seq_file *f, int i, int *len)
1da177e4 2263{
23f33c2d 2264 __be32 dest, src;
1da177e4
LT
2265 __u16 destp, srcp;
2266 int ttd = tw->tw_ttd - jiffies;
2267
2268 if (ttd < 0)
2269 ttd = 0;
2270
2271 dest = tw->tw_daddr;
2272 src = tw->tw_rcv_saddr;
2273 destp = ntohs(tw->tw_dport);
2274 srcp = ntohs(tw->tw_sport);
2275
5e659e4c
PE
2276 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2277 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p%n",
1da177e4
LT
2278 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2279 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
5e659e4c 2280 atomic_read(&tw->tw_refcnt), tw, len);
1da177e4
LT
2281}
2282
2283#define TMPSZ 150
2284
2285static int tcp4_seq_show(struct seq_file *seq, void *v)
2286{
2287 struct tcp_iter_state* st;
5e659e4c 2288 int len;
1da177e4
LT
2289
2290 if (v == SEQ_START_TOKEN) {
2291 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2292 " sl local_address rem_address st tx_queue "
2293 "rx_queue tr tm->when retrnsmt uid timeout "
2294 "inode");
2295 goto out;
2296 }
2297 st = seq->private;
2298
2299 switch (st->state) {
2300 case TCP_SEQ_STATE_LISTENING:
2301 case TCP_SEQ_STATE_ESTABLISHED:
5e659e4c 2302 get_tcp4_sock(v, seq, st->num, &len);
1da177e4
LT
2303 break;
2304 case TCP_SEQ_STATE_OPENREQ:
5e659e4c 2305 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
1da177e4
LT
2306 break;
2307 case TCP_SEQ_STATE_TIME_WAIT:
5e659e4c 2308 get_timewait4_sock(v, seq, st->num, &len);
1da177e4
LT
2309 break;
2310 }
5e659e4c 2311 seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
1da177e4
LT
2312out:
2313 return 0;
2314}
2315
1da177e4 2316static struct tcp_seq_afinfo tcp4_seq_afinfo = {
1da177e4
LT
2317 .name = "tcp",
2318 .family = AF_INET,
5f4472c5
DL
2319 .seq_fops = {
2320 .owner = THIS_MODULE,
2321 },
9427c4b3
DL
2322 .seq_ops = {
2323 .show = tcp4_seq_show,
2324 },
1da177e4
LT
2325};
2326
757764f6
PE
2327static int tcp4_proc_init_net(struct net *net)
2328{
2329 return tcp_proc_register(net, &tcp4_seq_afinfo);
2330}
2331
2332static void tcp4_proc_exit_net(struct net *net)
2333{
2334 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2335}
2336
2337static struct pernet_operations tcp4_net_ops = {
2338 .init = tcp4_proc_init_net,
2339 .exit = tcp4_proc_exit_net,
2340};
2341
1da177e4
LT
2342int __init tcp4_proc_init(void)
2343{
757764f6 2344 return register_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2345}
2346
2347void tcp4_proc_exit(void)
2348{
757764f6 2349 unregister_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2350}
2351#endif /* CONFIG_PROC_FS */
2352
2353struct proto tcp_prot = {
2354 .name = "TCP",
2355 .owner = THIS_MODULE,
2356 .close = tcp_close,
2357 .connect = tcp_v4_connect,
2358 .disconnect = tcp_disconnect,
463c84b9 2359 .accept = inet_csk_accept,
1da177e4
LT
2360 .ioctl = tcp_ioctl,
2361 .init = tcp_v4_init_sock,
2362 .destroy = tcp_v4_destroy_sock,
2363 .shutdown = tcp_shutdown,
2364 .setsockopt = tcp_setsockopt,
2365 .getsockopt = tcp_getsockopt,
1da177e4
LT
2366 .recvmsg = tcp_recvmsg,
2367 .backlog_rcv = tcp_v4_do_rcv,
ab1e0a13
ACM
2368 .hash = inet_hash,
2369 .unhash = inet_unhash,
2370 .get_port = inet_csk_get_port,
1da177e4
LT
2371 .enter_memory_pressure = tcp_enter_memory_pressure,
2372 .sockets_allocated = &tcp_sockets_allocated,
0a5578cf 2373 .orphan_count = &tcp_orphan_count,
1da177e4
LT
2374 .memory_allocated = &tcp_memory_allocated,
2375 .memory_pressure = &tcp_memory_pressure,
2376 .sysctl_mem = sysctl_tcp_mem,
2377 .sysctl_wmem = sysctl_tcp_wmem,
2378 .sysctl_rmem = sysctl_tcp_rmem,
2379 .max_header = MAX_TCP_HEADER,
2380 .obj_size = sizeof(struct tcp_sock),
6d6ee43e 2381 .twsk_prot = &tcp_timewait_sock_ops,
60236fdd 2382 .rsk_prot = &tcp_request_sock_ops,
39d8cda7 2383 .h.hashinfo = &tcp_hashinfo,
543d9cfe
ACM
2384#ifdef CONFIG_COMPAT
2385 .compat_setsockopt = compat_tcp_setsockopt,
2386 .compat_getsockopt = compat_tcp_getsockopt,
2387#endif
1da177e4
LT
2388};
2389
046ee902
DL
2390
2391static int __net_init tcp_sk_init(struct net *net)
2392{
2393 return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2394 PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2395}
2396
2397static void __net_exit tcp_sk_exit(struct net *net)
2398{
2399 inet_ctl_sock_destroy(net->ipv4.tcp_sock);
d315492b 2400 inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET);
046ee902
DL
2401}
2402
2403static struct pernet_operations __net_initdata tcp_sk_ops = {
2404 .init = tcp_sk_init,
2405 .exit = tcp_sk_exit,
2406};
2407
9b0f976f 2408void __init tcp_v4_init(void)
1da177e4 2409{
046ee902 2410 if (register_pernet_device(&tcp_sk_ops))
1da177e4 2411 panic("Failed to create the TCP control socket.\n");
1da177e4
LT
2412}
2413
2414EXPORT_SYMBOL(ipv4_specific);
1da177e4 2415EXPORT_SYMBOL(tcp_hashinfo);
1da177e4 2416EXPORT_SYMBOL(tcp_prot);
1da177e4
LT
2417EXPORT_SYMBOL(tcp_v4_conn_request);
2418EXPORT_SYMBOL(tcp_v4_connect);
2419EXPORT_SYMBOL(tcp_v4_do_rcv);
1da177e4
LT
2420EXPORT_SYMBOL(tcp_v4_remember_stamp);
2421EXPORT_SYMBOL(tcp_v4_send_check);
2422EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
2423
2424#ifdef CONFIG_PROC_FS
2425EXPORT_SYMBOL(tcp_proc_register);
2426EXPORT_SYMBOL(tcp_proc_unregister);
2427#endif
1da177e4 2428EXPORT_SYMBOL(sysctl_tcp_low_latency);
1da177e4 2429