powerpc/44x: Add more changes for APM821XX EMAC driver
[linux-2.6-block.git] / net / ipv4 / tcp_ipv4.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
1da177e4
LT
8 * IPv4 specific functions
9 *
10 *
11 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24/*
25 * Changes:
26 * David S. Miller : New socket lookup architecture.
27 * This code is dedicated to John Dyson.
28 * David S. Miller : Change semantics of established hash,
29 * half is devoted to TIME_WAIT sockets
30 * and the rest go in the other half.
31 * Andi Kleen : Add support for syncookies and fixed
32 * some bugs: ip options weren't passed to
33 * the TCP layer, missed a check for an
34 * ACK bit.
35 * Andi Kleen : Implemented fast path mtu discovery.
36 * Fixed many serious bugs in the
60236fdd 37 * request_sock handling and moved
1da177e4
LT
38 * most of it into the af independent code.
39 * Added tail drop and some other bugfixes.
caa20d9a 40 * Added new listen semantics.
1da177e4
LT
41 * Mike McLagan : Routing by source
42 * Juan Jose Ciarlante: ip_dynaddr bits
43 * Andi Kleen: various fixes.
44 * Vitaly E. Lavrov : Transparent proxy revived after year
45 * coma.
46 * Andi Kleen : Fix new listen.
47 * Andi Kleen : Fix accept error reporting.
48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
50 * a single port at the same time.
51 */
52
1da177e4 53
eb4dea58 54#include <linux/bottom_half.h>
1da177e4
LT
55#include <linux/types.h>
56#include <linux/fcntl.h>
57#include <linux/module.h>
58#include <linux/random.h>
59#include <linux/cache.h>
60#include <linux/jhash.h>
61#include <linux/init.h>
62#include <linux/times.h>
5a0e3ad6 63#include <linux/slab.h>
1da177e4 64
457c4cbc 65#include <net/net_namespace.h>
1da177e4 66#include <net/icmp.h>
304a1618 67#include <net/inet_hashtables.h>
1da177e4 68#include <net/tcp.h>
20380731 69#include <net/transp_v6.h>
1da177e4
LT
70#include <net/ipv6.h>
71#include <net/inet_common.h>
6d6ee43e 72#include <net/timewait_sock.h>
1da177e4 73#include <net/xfrm.h>
1a2449a8 74#include <net/netdma.h>
6e5714ea 75#include <net/secure_seq.h>
d1a4c0b3 76#include <net/tcp_memcontrol.h>
1da177e4
LT
77
78#include <linux/inet.h>
79#include <linux/ipv6.h>
80#include <linux/stddef.h>
81#include <linux/proc_fs.h>
82#include <linux/seq_file.h>
83
cfb6eeb4
YH
84#include <linux/crypto.h>
85#include <linux/scatterlist.h>
86
ab32ea5d
BH
87int sysctl_tcp_tw_reuse __read_mostly;
88int sysctl_tcp_low_latency __read_mostly;
4bc2f18b 89EXPORT_SYMBOL(sysctl_tcp_low_latency);
1da177e4 90
1da177e4 91
cfb6eeb4 92#ifdef CONFIG_TCP_MD5SIG
a915da9b 93static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 94 __be32 daddr, __be32 saddr, const struct tcphdr *th);
cfb6eeb4
YH
95#endif
96
5caea4ea 97struct inet_hashinfo tcp_hashinfo;
4bc2f18b 98EXPORT_SYMBOL(tcp_hashinfo);
1da177e4 99
cf533ea5 100static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
1da177e4 101{
eddc9ec5
ACM
102 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
103 ip_hdr(skb)->saddr,
aa8223c7
ACM
104 tcp_hdr(skb)->dest,
105 tcp_hdr(skb)->source);
1da177e4
LT
106}
107
6d6ee43e
ACM
108int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
109{
110 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
111 struct tcp_sock *tp = tcp_sk(sk);
112
113 /* With PAWS, it is safe from the viewpoint
114 of data integrity. Even without PAWS it is safe provided sequence
115 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
116
117 Actually, the idea is close to VJ's one, only timestamp cache is
118 held not per host, but per port pair and TW bucket is used as state
119 holder.
120
121 If TW bucket has been already destroyed we fall back to VJ's scheme
122 and use initial timestamp retrieved from peer table.
123 */
124 if (tcptw->tw_ts_recent_stamp &&
125 (twp == NULL || (sysctl_tcp_tw_reuse &&
9d729f72 126 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
6d6ee43e
ACM
127 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
128 if (tp->write_seq == 0)
129 tp->write_seq = 1;
130 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
131 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
132 sock_hold(sktw);
133 return 1;
134 }
135
136 return 0;
137}
6d6ee43e
ACM
138EXPORT_SYMBOL_GPL(tcp_twsk_unique);
139
1da177e4
LT
140/* This will initiate an outgoing connection. */
141int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
142{
2d7192d6 143 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
1da177e4
LT
144 struct inet_sock *inet = inet_sk(sk);
145 struct tcp_sock *tp = tcp_sk(sk);
dca8b089 146 __be16 orig_sport, orig_dport;
bada8adc 147 __be32 daddr, nexthop;
da905bd1 148 struct flowi4 *fl4;
2d7192d6 149 struct rtable *rt;
1da177e4 150 int err;
f6d8bd05 151 struct ip_options_rcu *inet_opt;
1da177e4
LT
152
153 if (addr_len < sizeof(struct sockaddr_in))
154 return -EINVAL;
155
156 if (usin->sin_family != AF_INET)
157 return -EAFNOSUPPORT;
158
159 nexthop = daddr = usin->sin_addr.s_addr;
f6d8bd05
ED
160 inet_opt = rcu_dereference_protected(inet->inet_opt,
161 sock_owned_by_user(sk));
162 if (inet_opt && inet_opt->opt.srr) {
1da177e4
LT
163 if (!daddr)
164 return -EINVAL;
f6d8bd05 165 nexthop = inet_opt->opt.faddr;
1da177e4
LT
166 }
167
dca8b089
DM
168 orig_sport = inet->inet_sport;
169 orig_dport = usin->sin_port;
da905bd1
DM
170 fl4 = &inet->cork.fl.u.ip4;
171 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
b23dd4fe
DM
172 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
173 IPPROTO_TCP,
174 orig_sport, orig_dport, sk, true);
175 if (IS_ERR(rt)) {
176 err = PTR_ERR(rt);
177 if (err == -ENETUNREACH)
7c73a6fa 178 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
b23dd4fe 179 return err;
584bdf8c 180 }
1da177e4
LT
181
182 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
183 ip_rt_put(rt);
184 return -ENETUNREACH;
185 }
186
f6d8bd05 187 if (!inet_opt || !inet_opt->opt.srr)
da905bd1 188 daddr = fl4->daddr;
1da177e4 189
c720c7e8 190 if (!inet->inet_saddr)
da905bd1 191 inet->inet_saddr = fl4->saddr;
c720c7e8 192 inet->inet_rcv_saddr = inet->inet_saddr;
1da177e4 193
c720c7e8 194 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
1da177e4
LT
195 /* Reset inherited state */
196 tp->rx_opt.ts_recent = 0;
197 tp->rx_opt.ts_recent_stamp = 0;
198 tp->write_seq = 0;
199 }
200
295ff7ed 201 if (tcp_death_row.sysctl_tw_recycle &&
da905bd1 202 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) {
ed2361e6 203 struct inet_peer *peer = rt_get_peer(rt, fl4->daddr);
7174259e
ACM
204 /*
205 * VJ's idea. We save last timestamp seen from
206 * the destination in peer table, when entering state
207 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
208 * when trying new connection.
1da177e4 209 */
317fe0e6
ED
210 if (peer) {
211 inet_peer_refcheck(peer);
212 if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
213 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
214 tp->rx_opt.ts_recent = peer->tcp_ts;
215 }
1da177e4
LT
216 }
217 }
218
c720c7e8
ED
219 inet->inet_dport = usin->sin_port;
220 inet->inet_daddr = daddr;
1da177e4 221
d83d8461 222 inet_csk(sk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
223 if (inet_opt)
224 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1da177e4 225
bee7ca9e 226 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
1da177e4
LT
227
228 /* Socket identity is still unknown (sport may be zero).
229 * However we set state to SYN-SENT and not releasing socket
230 * lock select source port, enter ourselves into the hash tables and
231 * complete initialization after this.
232 */
233 tcp_set_state(sk, TCP_SYN_SENT);
a7f5e7f1 234 err = inet_hash_connect(&tcp_death_row, sk);
1da177e4
LT
235 if (err)
236 goto failure;
237
da905bd1 238 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
b23dd4fe
DM
239 inet->inet_sport, inet->inet_dport, sk);
240 if (IS_ERR(rt)) {
241 err = PTR_ERR(rt);
242 rt = NULL;
1da177e4 243 goto failure;
b23dd4fe 244 }
1da177e4 245 /* OK, now commit destination to socket. */
bcd76111 246 sk->sk_gso_type = SKB_GSO_TCPV4;
d8d1f30b 247 sk_setup_caps(sk, &rt->dst);
1da177e4
LT
248
249 if (!tp->write_seq)
c720c7e8
ED
250 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
251 inet->inet_daddr,
252 inet->inet_sport,
1da177e4
LT
253 usin->sin_port);
254
c720c7e8 255 inet->inet_id = tp->write_seq ^ jiffies;
1da177e4
LT
256
257 err = tcp_connect(sk);
258 rt = NULL;
259 if (err)
260 goto failure;
261
262 return 0;
263
264failure:
7174259e
ACM
265 /*
266 * This unhashes the socket and releases the local port,
267 * if necessary.
268 */
1da177e4
LT
269 tcp_set_state(sk, TCP_CLOSE);
270 ip_rt_put(rt);
271 sk->sk_route_caps = 0;
c720c7e8 272 inet->inet_dport = 0;
1da177e4
LT
273 return err;
274}
4bc2f18b 275EXPORT_SYMBOL(tcp_v4_connect);
1da177e4 276
1da177e4
LT
277/*
278 * This routine does path mtu discovery as defined in RFC1191.
279 */
b71d1d42 280static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
1da177e4
LT
281{
282 struct dst_entry *dst;
283 struct inet_sock *inet = inet_sk(sk);
1da177e4
LT
284
285 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
286 * send out by Linux are always <576bytes so they should go through
287 * unfragmented).
288 */
289 if (sk->sk_state == TCP_LISTEN)
290 return;
291
292 /* We don't check in the destentry if pmtu discovery is forbidden
293 * on this route. We just assume that no packet_to_big packets
294 * are send back when pmtu discovery is not active.
e905a9ed 295 * There is a small race when the user changes this flag in the
1da177e4
LT
296 * route, but I think that's acceptable.
297 */
298 if ((dst = __sk_dst_check(sk, 0)) == NULL)
299 return;
300
301 dst->ops->update_pmtu(dst, mtu);
302
303 /* Something is about to be wrong... Remember soft error
304 * for the case, if this connection will not able to recover.
305 */
306 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
307 sk->sk_err_soft = EMSGSIZE;
308
309 mtu = dst_mtu(dst);
310
311 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
d83d8461 312 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
1da177e4
LT
313 tcp_sync_mss(sk, mtu);
314
315 /* Resend the TCP packet because it's
316 * clear that the old packet has been
317 * dropped. This is the new "fast" path mtu
318 * discovery.
319 */
320 tcp_simple_retransmit(sk);
321 } /* else let the usual retransmit timer handle it */
322}
323
324/*
325 * This routine is called by the ICMP module when it gets some
326 * sort of error condition. If err < 0 then the socket should
327 * be closed and the error returned to the user. If err > 0
328 * it's just the icmp type << 8 | icmp code. After adjustment
329 * header points to the first 8 bytes of the tcp header. We need
330 * to find the appropriate port.
331 *
332 * The locking strategy used here is very "optimistic". When
333 * someone else accesses the socket the ICMP is just dropped
334 * and for some paths there is no check at all.
335 * A more general error queue to queue errors for later handling
336 * is probably better.
337 *
338 */
339
4d1a2d9e 340void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
1da177e4 341{
b71d1d42 342 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
4d1a2d9e 343 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
f1ecd5d9 344 struct inet_connection_sock *icsk;
1da177e4
LT
345 struct tcp_sock *tp;
346 struct inet_sock *inet;
4d1a2d9e
DL
347 const int type = icmp_hdr(icmp_skb)->type;
348 const int code = icmp_hdr(icmp_skb)->code;
1da177e4 349 struct sock *sk;
f1ecd5d9 350 struct sk_buff *skb;
1da177e4 351 __u32 seq;
f1ecd5d9 352 __u32 remaining;
1da177e4 353 int err;
4d1a2d9e 354 struct net *net = dev_net(icmp_skb->dev);
1da177e4 355
4d1a2d9e 356 if (icmp_skb->len < (iph->ihl << 2) + 8) {
dcfc23ca 357 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
358 return;
359 }
360
fd54d716 361 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
4d1a2d9e 362 iph->saddr, th->source, inet_iif(icmp_skb));
1da177e4 363 if (!sk) {
dcfc23ca 364 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
365 return;
366 }
367 if (sk->sk_state == TCP_TIME_WAIT) {
9469c7b4 368 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
369 return;
370 }
371
372 bh_lock_sock(sk);
373 /* If too many ICMPs get dropped on busy
374 * servers this needs to be solved differently.
375 */
376 if (sock_owned_by_user(sk))
de0744af 377 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
1da177e4
LT
378
379 if (sk->sk_state == TCP_CLOSE)
380 goto out;
381
97e3ecd1 382 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
383 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
384 goto out;
385 }
386
f1ecd5d9 387 icsk = inet_csk(sk);
1da177e4
LT
388 tp = tcp_sk(sk);
389 seq = ntohl(th->seq);
390 if (sk->sk_state != TCP_LISTEN &&
391 !between(seq, tp->snd_una, tp->snd_nxt)) {
de0744af 392 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
393 goto out;
394 }
395
396 switch (type) {
397 case ICMP_SOURCE_QUENCH:
398 /* Just silently ignore these. */
399 goto out;
400 case ICMP_PARAMETERPROB:
401 err = EPROTO;
402 break;
403 case ICMP_DEST_UNREACH:
404 if (code > NR_ICMP_UNREACH)
405 goto out;
406
407 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
408 if (!sock_owned_by_user(sk))
409 do_pmtu_discovery(sk, iph, info);
410 goto out;
411 }
412
413 err = icmp_err_convert[code].errno;
f1ecd5d9
DL
414 /* check if icmp_skb allows revert of backoff
415 * (see draft-zimmermann-tcp-lcd) */
416 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
417 break;
418 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
419 !icsk->icsk_backoff)
420 break;
421
8f49c270
DM
422 if (sock_owned_by_user(sk))
423 break;
424
f1ecd5d9 425 icsk->icsk_backoff--;
9ad7c049
JC
426 inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
427 TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
f1ecd5d9
DL
428 tcp_bound_rto(sk);
429
430 skb = tcp_write_queue_head(sk);
431 BUG_ON(!skb);
432
433 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
434 tcp_time_stamp - TCP_SKB_CB(skb)->when);
435
436 if (remaining) {
437 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
438 remaining, TCP_RTO_MAX);
f1ecd5d9
DL
439 } else {
440 /* RTO revert clocked out retransmission.
441 * Will retransmit now */
442 tcp_retransmit_timer(sk);
443 }
444
1da177e4
LT
445 break;
446 case ICMP_TIME_EXCEEDED:
447 err = EHOSTUNREACH;
448 break;
449 default:
450 goto out;
451 }
452
453 switch (sk->sk_state) {
60236fdd 454 struct request_sock *req, **prev;
1da177e4
LT
455 case TCP_LISTEN:
456 if (sock_owned_by_user(sk))
457 goto out;
458
463c84b9
ACM
459 req = inet_csk_search_req(sk, &prev, th->dest,
460 iph->daddr, iph->saddr);
1da177e4
LT
461 if (!req)
462 goto out;
463
464 /* ICMPs are not backlogged, hence we cannot get
465 an established socket here.
466 */
547b792c 467 WARN_ON(req->sk);
1da177e4 468
2e6599cb 469 if (seq != tcp_rsk(req)->snt_isn) {
de0744af 470 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
471 goto out;
472 }
473
474 /*
475 * Still in SYN_RECV, just remove it silently.
476 * There is no good way to pass the error to the newly
477 * created socket, and POSIX does not want network
478 * errors returned from accept().
479 */
463c84b9 480 inet_csk_reqsk_queue_drop(sk, req, prev);
1da177e4
LT
481 goto out;
482
483 case TCP_SYN_SENT:
484 case TCP_SYN_RECV: /* Cannot happen.
485 It can f.e. if SYNs crossed.
486 */
487 if (!sock_owned_by_user(sk)) {
1da177e4
LT
488 sk->sk_err = err;
489
490 sk->sk_error_report(sk);
491
492 tcp_done(sk);
493 } else {
494 sk->sk_err_soft = err;
495 }
496 goto out;
497 }
498
499 /* If we've already connected we will keep trying
500 * until we time out, or the user gives up.
501 *
502 * rfc1122 4.2.3.9 allows to consider as hard errors
503 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
504 * but it is obsoleted by pmtu discovery).
505 *
506 * Note, that in modern internet, where routing is unreliable
507 * and in each dark corner broken firewalls sit, sending random
508 * errors ordered by their masters even this two messages finally lose
509 * their original sense (even Linux sends invalid PORT_UNREACHs)
510 *
511 * Now we are in compliance with RFCs.
512 * --ANK (980905)
513 */
514
515 inet = inet_sk(sk);
516 if (!sock_owned_by_user(sk) && inet->recverr) {
517 sk->sk_err = err;
518 sk->sk_error_report(sk);
519 } else { /* Only an error on timeout */
520 sk->sk_err_soft = err;
521 }
522
523out:
524 bh_unlock_sock(sk);
525 sock_put(sk);
526}
527
419f9f89
HX
528static void __tcp_v4_send_check(struct sk_buff *skb,
529 __be32 saddr, __be32 daddr)
1da177e4 530{
aa8223c7 531 struct tcphdr *th = tcp_hdr(skb);
1da177e4 532
84fa7933 533 if (skb->ip_summed == CHECKSUM_PARTIAL) {
419f9f89 534 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
663ead3b 535 skb->csum_start = skb_transport_header(skb) - skb->head;
ff1dcadb 536 skb->csum_offset = offsetof(struct tcphdr, check);
1da177e4 537 } else {
419f9f89 538 th->check = tcp_v4_check(skb->len, saddr, daddr,
07f0757a 539 csum_partial(th,
1da177e4
LT
540 th->doff << 2,
541 skb->csum));
542 }
543}
544
419f9f89 545/* This routine computes an IPv4 TCP checksum. */
bb296246 546void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
419f9f89 547{
cf533ea5 548 const struct inet_sock *inet = inet_sk(sk);
419f9f89
HX
549
550 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
551}
4bc2f18b 552EXPORT_SYMBOL(tcp_v4_send_check);
419f9f89 553
a430a43d
HX
554int tcp_v4_gso_send_check(struct sk_buff *skb)
555{
eddc9ec5 556 const struct iphdr *iph;
a430a43d
HX
557 struct tcphdr *th;
558
559 if (!pskb_may_pull(skb, sizeof(*th)))
560 return -EINVAL;
561
eddc9ec5 562 iph = ip_hdr(skb);
aa8223c7 563 th = tcp_hdr(skb);
a430a43d
HX
564
565 th->check = 0;
84fa7933 566 skb->ip_summed = CHECKSUM_PARTIAL;
419f9f89 567 __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
a430a43d
HX
568 return 0;
569}
570
1da177e4
LT
571/*
572 * This routine will send an RST to the other tcp.
573 *
574 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
575 * for reset.
576 * Answer: if a packet caused RST, it is not for a socket
577 * existing in our system, if it is matched to a socket,
578 * it is just duplicate segment or bug in other side's TCP.
579 * So that we build reply only basing on parameters
580 * arrived with segment.
581 * Exception: precedence violation. We do not implement it in any case.
582 */
583
cfb6eeb4 584static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
1da177e4 585{
cf533ea5 586 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
587 struct {
588 struct tcphdr th;
589#ifdef CONFIG_TCP_MD5SIG
714e85be 590 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
cfb6eeb4
YH
591#endif
592 } rep;
1da177e4 593 struct ip_reply_arg arg;
cfb6eeb4
YH
594#ifdef CONFIG_TCP_MD5SIG
595 struct tcp_md5sig_key *key;
658ddaaf
SL
596 const __u8 *hash_location = NULL;
597 unsigned char newhash[16];
598 int genhash;
599 struct sock *sk1 = NULL;
cfb6eeb4 600#endif
a86b1e30 601 struct net *net;
1da177e4
LT
602
603 /* Never send a reset in response to a reset. */
604 if (th->rst)
605 return;
606
511c3f92 607 if (skb_rtable(skb)->rt_type != RTN_LOCAL)
1da177e4
LT
608 return;
609
610 /* Swap the send and the receive. */
cfb6eeb4
YH
611 memset(&rep, 0, sizeof(rep));
612 rep.th.dest = th->source;
613 rep.th.source = th->dest;
614 rep.th.doff = sizeof(struct tcphdr) / 4;
615 rep.th.rst = 1;
1da177e4
LT
616
617 if (th->ack) {
cfb6eeb4 618 rep.th.seq = th->ack_seq;
1da177e4 619 } else {
cfb6eeb4
YH
620 rep.th.ack = 1;
621 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
622 skb->len - (th->doff << 2));
1da177e4
LT
623 }
624
7174259e 625 memset(&arg, 0, sizeof(arg));
cfb6eeb4
YH
626 arg.iov[0].iov_base = (unsigned char *)&rep;
627 arg.iov[0].iov_len = sizeof(rep.th);
628
629#ifdef CONFIG_TCP_MD5SIG
658ddaaf
SL
630 hash_location = tcp_parse_md5sig_option(th);
631 if (!sk && hash_location) {
632 /*
633 * active side is lost. Try to find listening socket through
634 * source port, and then find md5 key through listening socket.
635 * we are not loose security here:
636 * Incoming packet is checked with md5 hash with finding key,
637 * no RST generated if md5 hash doesn't match.
638 */
639 sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
640 &tcp_hashinfo, ip_hdr(skb)->daddr,
641 ntohs(th->source), inet_iif(skb));
642 /* don't send rst if it can't find key */
643 if (!sk1)
644 return;
645 rcu_read_lock();
646 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
647 &ip_hdr(skb)->saddr, AF_INET);
648 if (!key)
649 goto release_sk1;
650
651 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
652 if (genhash || memcmp(hash_location, newhash, 16) != 0)
653 goto release_sk1;
654 } else {
655 key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
656 &ip_hdr(skb)->saddr,
657 AF_INET) : NULL;
658 }
659
cfb6eeb4
YH
660 if (key) {
661 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
662 (TCPOPT_NOP << 16) |
663 (TCPOPT_MD5SIG << 8) |
664 TCPOLEN_MD5SIG);
665 /* Update length and the length the header thinks exists */
666 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
667 rep.th.doff = arg.iov[0].iov_len / 4;
668
49a72dfb 669 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
78e645cb
IJ
670 key, ip_hdr(skb)->saddr,
671 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
672 }
673#endif
eddc9ec5
ACM
674 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
675 ip_hdr(skb)->saddr, /* XXX */
52cd5750 676 arg.iov[0].iov_len, IPPROTO_TCP, 0);
1da177e4 677 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
88ef4a5a 678 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
e2446eaa
SL
679 /* When socket is gone, all binding information is lost.
680 * routing might fail in this case. using iif for oif to
681 * make sure we can deliver it
682 */
683 arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb);
1da177e4 684
adf30907 685 net = dev_net(skb_dst(skb)->dev);
66b13d99 686 arg.tos = ip_hdr(skb)->tos;
0a5ebb80 687 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
7feb49c8 688 &arg, arg.iov[0].iov_len);
1da177e4 689
63231bdd
PE
690 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
691 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
658ddaaf
SL
692
693#ifdef CONFIG_TCP_MD5SIG
694release_sk1:
695 if (sk1) {
696 rcu_read_unlock();
697 sock_put(sk1);
698 }
699#endif
1da177e4
LT
700}
701
702/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
703 outside socket context is ugly, certainly. What can I do?
704 */
705
9501f972
YH
706static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
707 u32 win, u32 ts, int oif,
88ef4a5a 708 struct tcp_md5sig_key *key,
66b13d99 709 int reply_flags, u8 tos)
1da177e4 710{
cf533ea5 711 const struct tcphdr *th = tcp_hdr(skb);
1da177e4
LT
712 struct {
713 struct tcphdr th;
714e85be 714 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
cfb6eeb4 715#ifdef CONFIG_TCP_MD5SIG
714e85be 716 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
cfb6eeb4
YH
717#endif
718 ];
1da177e4
LT
719 } rep;
720 struct ip_reply_arg arg;
adf30907 721 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4
LT
722
723 memset(&rep.th, 0, sizeof(struct tcphdr));
7174259e 724 memset(&arg, 0, sizeof(arg));
1da177e4
LT
725
726 arg.iov[0].iov_base = (unsigned char *)&rep;
727 arg.iov[0].iov_len = sizeof(rep.th);
728 if (ts) {
cfb6eeb4
YH
729 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
730 (TCPOPT_TIMESTAMP << 8) |
731 TCPOLEN_TIMESTAMP);
732 rep.opt[1] = htonl(tcp_time_stamp);
733 rep.opt[2] = htonl(ts);
cb48cfe8 734 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
1da177e4
LT
735 }
736
737 /* Swap the send and the receive. */
738 rep.th.dest = th->source;
739 rep.th.source = th->dest;
740 rep.th.doff = arg.iov[0].iov_len / 4;
741 rep.th.seq = htonl(seq);
742 rep.th.ack_seq = htonl(ack);
743 rep.th.ack = 1;
744 rep.th.window = htons(win);
745
cfb6eeb4 746#ifdef CONFIG_TCP_MD5SIG
cfb6eeb4
YH
747 if (key) {
748 int offset = (ts) ? 3 : 0;
749
750 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
751 (TCPOPT_NOP << 16) |
752 (TCPOPT_MD5SIG << 8) |
753 TCPOLEN_MD5SIG);
754 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
755 rep.th.doff = arg.iov[0].iov_len/4;
756
49a72dfb 757 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
90b7e112
AL
758 key, ip_hdr(skb)->saddr,
759 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
760 }
761#endif
88ef4a5a 762 arg.flags = reply_flags;
eddc9ec5
ACM
763 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
764 ip_hdr(skb)->saddr, /* XXX */
1da177e4
LT
765 arg.iov[0].iov_len, IPPROTO_TCP, 0);
766 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
9501f972
YH
767 if (oif)
768 arg.bound_dev_if = oif;
66b13d99 769 arg.tos = tos;
0a5ebb80 770 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
7feb49c8 771 &arg, arg.iov[0].iov_len);
1da177e4 772
63231bdd 773 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
1da177e4
LT
774}
775
776static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
777{
8feaf0c0 778 struct inet_timewait_sock *tw = inet_twsk(sk);
cfb6eeb4 779 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1da177e4 780
9501f972 781 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
7174259e 782 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
9501f972
YH
783 tcptw->tw_ts_recent,
784 tw->tw_bound_dev_if,
88ef4a5a 785 tcp_twsk_md5_key(tcptw),
66b13d99
ED
786 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
787 tw->tw_tos
9501f972 788 );
1da177e4 789
8feaf0c0 790 inet_twsk_put(tw);
1da177e4
LT
791}
792
6edafaaf 793static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
7174259e 794 struct request_sock *req)
1da177e4 795{
9501f972 796 tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
cfb6eeb4 797 tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
9501f972
YH
798 req->ts_recent,
799 0,
a915da9b
ED
800 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
801 AF_INET),
66b13d99
ED
802 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
803 ip_hdr(skb)->tos);
1da177e4
LT
804}
805
1da177e4 806/*
9bf1d83e 807 * Send a SYN-ACK after having received a SYN.
60236fdd 808 * This still operates on a request_sock only, not on a big
1da177e4
LT
809 * socket.
810 */
72659ecc
OP
811static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
812 struct request_sock *req,
813 struct request_values *rvp)
1da177e4 814{
2e6599cb 815 const struct inet_request_sock *ireq = inet_rsk(req);
6bd023f3 816 struct flowi4 fl4;
1da177e4
LT
817 int err = -1;
818 struct sk_buff * skb;
819
820 /* First, grab a route. */
6bd023f3 821 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
fd80eb94 822 return -1;
1da177e4 823
e6b4d113 824 skb = tcp_make_synack(sk, dst, req, rvp);
1da177e4
LT
825
826 if (skb) {
419f9f89 827 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
1da177e4 828
2e6599cb
ACM
829 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
830 ireq->rmt_addr,
831 ireq->opt);
b9df3cb8 832 err = net_xmit_eval(err);
1da177e4
LT
833 }
834
1da177e4
LT
835 dst_release(dst);
836 return err;
837}
838
72659ecc 839static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
e6b4d113 840 struct request_values *rvp)
fd80eb94 841{
72659ecc
OP
842 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
843 return tcp_v4_send_synack(sk, NULL, req, rvp);
fd80eb94
DL
844}
845
1da177e4 846/*
60236fdd 847 * IPv4 request_sock destructor.
1da177e4 848 */
60236fdd 849static void tcp_v4_reqsk_destructor(struct request_sock *req)
1da177e4 850{
a51482bd 851 kfree(inet_rsk(req)->opt);
1da177e4
LT
852}
853
946cedcc
ED
854/*
855 * Return 1 if a syncookie should be sent
856 */
857int tcp_syn_flood_action(struct sock *sk,
858 const struct sk_buff *skb,
859 const char *proto)
1da177e4 860{
946cedcc
ED
861 const char *msg = "Dropping request";
862 int want_cookie = 0;
863 struct listen_sock *lopt;
864
865
1da177e4 866
2a1d4bd4 867#ifdef CONFIG_SYN_COOKIES
946cedcc 868 if (sysctl_tcp_syncookies) {
2a1d4bd4 869 msg = "Sending cookies";
946cedcc
ED
870 want_cookie = 1;
871 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
872 } else
80e40daa 873#endif
946cedcc
ED
874 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
875
876 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
877 if (!lopt->synflood_warned) {
878 lopt->synflood_warned = 1;
879 pr_info("%s: Possible SYN flooding on port %d. %s. "
880 " Check SNMP counters.\n",
881 proto, ntohs(tcp_hdr(skb)->dest), msg);
882 }
883 return want_cookie;
2a1d4bd4 884}
946cedcc 885EXPORT_SYMBOL(tcp_syn_flood_action);
1da177e4
LT
886
887/*
60236fdd 888 * Save and compile IPv4 options into the request_sock if needed.
1da177e4 889 */
f6d8bd05
ED
890static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
891 struct sk_buff *skb)
1da177e4 892{
f6d8bd05
ED
893 const struct ip_options *opt = &(IPCB(skb)->opt);
894 struct ip_options_rcu *dopt = NULL;
1da177e4
LT
895
896 if (opt && opt->optlen) {
f6d8bd05
ED
897 int opt_size = sizeof(*dopt) + opt->optlen;
898
1da177e4
LT
899 dopt = kmalloc(opt_size, GFP_ATOMIC);
900 if (dopt) {
f6d8bd05 901 if (ip_options_echo(&dopt->opt, skb)) {
1da177e4
LT
902 kfree(dopt);
903 dopt = NULL;
904 }
905 }
906 }
907 return dopt;
908}
909
cfb6eeb4
YH
910#ifdef CONFIG_TCP_MD5SIG
911/*
912 * RFC2385 MD5 checksumming requires a mapping of
913 * IP address->MD5 Key.
914 * We need to maintain these in the sk structure.
915 */
916
917/* Find the Key structure for an address. */
a915da9b
ED
918struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
919 const union tcp_md5_addr *addr,
920 int family)
cfb6eeb4
YH
921{
922 struct tcp_sock *tp = tcp_sk(sk);
a915da9b
ED
923 struct tcp_md5sig_key *key;
924 struct hlist_node *pos;
925 unsigned int size = sizeof(struct in_addr);
a8afca03 926 struct tcp_md5sig_info *md5sig;
cfb6eeb4 927
a8afca03
ED
928 /* caller either holds rcu_read_lock() or socket lock */
929 md5sig = rcu_dereference_check(tp->md5sig_info,
930 sock_owned_by_user(sk));
931 if (!md5sig)
cfb6eeb4 932 return NULL;
a915da9b
ED
933#if IS_ENABLED(CONFIG_IPV6)
934 if (family == AF_INET6)
935 size = sizeof(struct in6_addr);
936#endif
a8afca03 937 hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) {
a915da9b
ED
938 if (key->family != family)
939 continue;
940 if (!memcmp(&key->addr, addr, size))
941 return key;
cfb6eeb4
YH
942 }
943 return NULL;
944}
a915da9b 945EXPORT_SYMBOL(tcp_md5_do_lookup);
cfb6eeb4
YH
946
947struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
948 struct sock *addr_sk)
949{
a915da9b
ED
950 union tcp_md5_addr *addr;
951
952 addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
953 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4 954}
cfb6eeb4
YH
955EXPORT_SYMBOL(tcp_v4_md5_lookup);
956
f5b99bcd
AB
957static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
958 struct request_sock *req)
cfb6eeb4 959{
a915da9b
ED
960 union tcp_md5_addr *addr;
961
962 addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr;
963 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4
YH
964}
965
966/* This can be called on a newly created socket, from other files */
a915da9b
ED
967int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
968 int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
cfb6eeb4
YH
969{
970 /* Add Key to the list */
b0a713e9 971 struct tcp_md5sig_key *key;
cfb6eeb4 972 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 973 struct tcp_md5sig_info *md5sig;
cfb6eeb4 974
a915da9b 975 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
cfb6eeb4
YH
976 if (key) {
977 /* Pre-existing entry - just update that one. */
a915da9b 978 memcpy(key->key, newkey, newkeylen);
b0a713e9 979 key->keylen = newkeylen;
a915da9b
ED
980 return 0;
981 }
260fcbeb 982
a8afca03
ED
983 md5sig = rcu_dereference_protected(tp->md5sig_info,
984 sock_owned_by_user(sk));
a915da9b
ED
985 if (!md5sig) {
986 md5sig = kmalloc(sizeof(*md5sig), gfp);
987 if (!md5sig)
cfb6eeb4 988 return -ENOMEM;
cfb6eeb4 989
a915da9b
ED
990 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
991 INIT_HLIST_HEAD(&md5sig->head);
a8afca03 992 rcu_assign_pointer(tp->md5sig_info, md5sig);
a915da9b 993 }
cfb6eeb4 994
5f3d9cb2 995 key = sock_kmalloc(sk, sizeof(*key), gfp);
a915da9b
ED
996 if (!key)
997 return -ENOMEM;
998 if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
5f3d9cb2 999 sock_kfree_s(sk, key, sizeof(*key));
a915da9b 1000 return -ENOMEM;
cfb6eeb4 1001 }
a915da9b
ED
1002
1003 memcpy(key->key, newkey, newkeylen);
1004 key->keylen = newkeylen;
1005 key->family = family;
1006 memcpy(&key->addr, addr,
1007 (family == AF_INET6) ? sizeof(struct in6_addr) :
1008 sizeof(struct in_addr));
1009 hlist_add_head_rcu(&key->node, &md5sig->head);
cfb6eeb4
YH
1010 return 0;
1011}
a915da9b 1012EXPORT_SYMBOL(tcp_md5_do_add);
cfb6eeb4 1013
a915da9b 1014int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
cfb6eeb4
YH
1015{
1016 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 1017 struct tcp_md5sig_key *key;
a8afca03 1018 struct tcp_md5sig_info *md5sig;
a915da9b
ED
1019
1020 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
1021 if (!key)
1022 return -ENOENT;
1023 hlist_del_rcu(&key->node);
5f3d9cb2 1024 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1025 kfree_rcu(key, rcu);
a8afca03
ED
1026 md5sig = rcu_dereference_protected(tp->md5sig_info,
1027 sock_owned_by_user(sk));
1028 if (hlist_empty(&md5sig->head))
a915da9b
ED
1029 tcp_free_md5sig_pool();
1030 return 0;
cfb6eeb4 1031}
a915da9b 1032EXPORT_SYMBOL(tcp_md5_do_del);
cfb6eeb4 1033
a915da9b 1034void tcp_clear_md5_list(struct sock *sk)
cfb6eeb4
YH
1035{
1036 struct tcp_sock *tp = tcp_sk(sk);
a915da9b
ED
1037 struct tcp_md5sig_key *key;
1038 struct hlist_node *pos, *n;
a8afca03 1039 struct tcp_md5sig_info *md5sig;
cfb6eeb4 1040
a8afca03
ED
1041 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1042
1043 if (!hlist_empty(&md5sig->head))
cfb6eeb4 1044 tcp_free_md5sig_pool();
a8afca03 1045 hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) {
a915da9b 1046 hlist_del_rcu(&key->node);
5f3d9cb2 1047 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1048 kfree_rcu(key, rcu);
cfb6eeb4
YH
1049 }
1050}
1051
7174259e
ACM
1052static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1053 int optlen)
cfb6eeb4
YH
1054{
1055 struct tcp_md5sig cmd;
1056 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
cfb6eeb4
YH
1057
1058 if (optlen < sizeof(cmd))
1059 return -EINVAL;
1060
7174259e 1061 if (copy_from_user(&cmd, optval, sizeof(cmd)))
cfb6eeb4
YH
1062 return -EFAULT;
1063
1064 if (sin->sin_family != AF_INET)
1065 return -EINVAL;
1066
a8afca03 1067 if (!cmd.tcpm_key || !cmd.tcpm_keylen)
a915da9b
ED
1068 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1069 AF_INET);
cfb6eeb4
YH
1070
1071 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1072 return -EINVAL;
1073
a915da9b
ED
1074 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1075 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1076 GFP_KERNEL);
cfb6eeb4
YH
1077}
1078
49a72dfb
AL
1079static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1080 __be32 daddr, __be32 saddr, int nbytes)
cfb6eeb4 1081{
cfb6eeb4 1082 struct tcp4_pseudohdr *bp;
49a72dfb 1083 struct scatterlist sg;
cfb6eeb4
YH
1084
1085 bp = &hp->md5_blk.ip4;
cfb6eeb4
YH
1086
1087 /*
49a72dfb 1088 * 1. the TCP pseudo-header (in the order: source IP address,
cfb6eeb4
YH
1089 * destination IP address, zero-padded protocol number, and
1090 * segment length)
1091 */
1092 bp->saddr = saddr;
1093 bp->daddr = daddr;
1094 bp->pad = 0;
076fb722 1095 bp->protocol = IPPROTO_TCP;
49a72dfb 1096 bp->len = cpu_to_be16(nbytes);
c7da57a1 1097
49a72dfb
AL
1098 sg_init_one(&sg, bp, sizeof(*bp));
1099 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1100}
1101
a915da9b 1102static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 1103 __be32 daddr, __be32 saddr, const struct tcphdr *th)
49a72dfb
AL
1104{
1105 struct tcp_md5sig_pool *hp;
1106 struct hash_desc *desc;
1107
1108 hp = tcp_get_md5sig_pool();
1109 if (!hp)
1110 goto clear_hash_noput;
1111 desc = &hp->md5_desc;
1112
1113 if (crypto_hash_init(desc))
1114 goto clear_hash;
1115 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1116 goto clear_hash;
1117 if (tcp_md5_hash_header(hp, th))
1118 goto clear_hash;
1119 if (tcp_md5_hash_key(hp, key))
1120 goto clear_hash;
1121 if (crypto_hash_final(desc, md5_hash))
cfb6eeb4
YH
1122 goto clear_hash;
1123
cfb6eeb4 1124 tcp_put_md5sig_pool();
cfb6eeb4 1125 return 0;
49a72dfb 1126
cfb6eeb4
YH
1127clear_hash:
1128 tcp_put_md5sig_pool();
1129clear_hash_noput:
1130 memset(md5_hash, 0, 16);
49a72dfb 1131 return 1;
cfb6eeb4
YH
1132}
1133
49a72dfb 1134int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
318cf7aa
ED
1135 const struct sock *sk, const struct request_sock *req,
1136 const struct sk_buff *skb)
cfb6eeb4 1137{
49a72dfb
AL
1138 struct tcp_md5sig_pool *hp;
1139 struct hash_desc *desc;
318cf7aa 1140 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
1141 __be32 saddr, daddr;
1142
1143 if (sk) {
c720c7e8
ED
1144 saddr = inet_sk(sk)->inet_saddr;
1145 daddr = inet_sk(sk)->inet_daddr;
49a72dfb
AL
1146 } else if (req) {
1147 saddr = inet_rsk(req)->loc_addr;
1148 daddr = inet_rsk(req)->rmt_addr;
cfb6eeb4 1149 } else {
49a72dfb
AL
1150 const struct iphdr *iph = ip_hdr(skb);
1151 saddr = iph->saddr;
1152 daddr = iph->daddr;
cfb6eeb4 1153 }
49a72dfb
AL
1154
1155 hp = tcp_get_md5sig_pool();
1156 if (!hp)
1157 goto clear_hash_noput;
1158 desc = &hp->md5_desc;
1159
1160 if (crypto_hash_init(desc))
1161 goto clear_hash;
1162
1163 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1164 goto clear_hash;
1165 if (tcp_md5_hash_header(hp, th))
1166 goto clear_hash;
1167 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1168 goto clear_hash;
1169 if (tcp_md5_hash_key(hp, key))
1170 goto clear_hash;
1171 if (crypto_hash_final(desc, md5_hash))
1172 goto clear_hash;
1173
1174 tcp_put_md5sig_pool();
1175 return 0;
1176
1177clear_hash:
1178 tcp_put_md5sig_pool();
1179clear_hash_noput:
1180 memset(md5_hash, 0, 16);
1181 return 1;
cfb6eeb4 1182}
49a72dfb 1183EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
cfb6eeb4 1184
318cf7aa 1185static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
cfb6eeb4
YH
1186{
1187 /*
1188 * This gets called for each TCP segment that arrives
1189 * so we want to be efficient.
1190 * We have 3 drop cases:
1191 * o No MD5 hash and one expected.
1192 * o MD5 hash and we're not expecting one.
1193 * o MD5 hash and its wrong.
1194 */
cf533ea5 1195 const __u8 *hash_location = NULL;
cfb6eeb4 1196 struct tcp_md5sig_key *hash_expected;
eddc9ec5 1197 const struct iphdr *iph = ip_hdr(skb);
cf533ea5 1198 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4 1199 int genhash;
cfb6eeb4
YH
1200 unsigned char newhash[16];
1201
a915da9b
ED
1202 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1203 AF_INET);
7d5d5525 1204 hash_location = tcp_parse_md5sig_option(th);
cfb6eeb4 1205
cfb6eeb4
YH
1206 /* We've parsed the options - do we have a hash? */
1207 if (!hash_expected && !hash_location)
1208 return 0;
1209
1210 if (hash_expected && !hash_location) {
785957d3 1211 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
cfb6eeb4
YH
1212 return 1;
1213 }
1214
1215 if (!hash_expected && hash_location) {
785957d3 1216 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
cfb6eeb4
YH
1217 return 1;
1218 }
1219
1220 /* Okay, so this is hash_expected and hash_location -
1221 * so we need to calculate the checksum.
1222 */
49a72dfb
AL
1223 genhash = tcp_v4_md5_hash_skb(newhash,
1224 hash_expected,
1225 NULL, NULL, skb);
cfb6eeb4
YH
1226
1227 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1228 if (net_ratelimit()) {
673d57e7
HH
1229 printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1230 &iph->saddr, ntohs(th->source),
1231 &iph->daddr, ntohs(th->dest),
cfb6eeb4 1232 genhash ? " tcp_v4_calc_md5_hash failed" : "");
cfb6eeb4
YH
1233 }
1234 return 1;
1235 }
1236 return 0;
1237}
1238
1239#endif
1240
72a3effa 1241struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1da177e4 1242 .family = PF_INET,
2e6599cb 1243 .obj_size = sizeof(struct tcp_request_sock),
72659ecc 1244 .rtx_syn_ack = tcp_v4_rtx_synack,
60236fdd
ACM
1245 .send_ack = tcp_v4_reqsk_send_ack,
1246 .destructor = tcp_v4_reqsk_destructor,
1da177e4 1247 .send_reset = tcp_v4_send_reset,
72659ecc 1248 .syn_ack_timeout = tcp_syn_ack_timeout,
1da177e4
LT
1249};
1250
cfb6eeb4 1251#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1252static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
cfb6eeb4 1253 .md5_lookup = tcp_v4_reqsk_md5_lookup,
e3afe7b7 1254 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 1255};
b6332e6c 1256#endif
cfb6eeb4 1257
1da177e4
LT
1258int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1259{
4957faad 1260 struct tcp_extend_values tmp_ext;
1da177e4 1261 struct tcp_options_received tmp_opt;
cf533ea5 1262 const u8 *hash_location;
60236fdd 1263 struct request_sock *req;
e6b4d113 1264 struct inet_request_sock *ireq;
4957faad 1265 struct tcp_sock *tp = tcp_sk(sk);
e6b4d113 1266 struct dst_entry *dst = NULL;
eddc9ec5
ACM
1267 __be32 saddr = ip_hdr(skb)->saddr;
1268 __be32 daddr = ip_hdr(skb)->daddr;
1da177e4 1269 __u32 isn = TCP_SKB_CB(skb)->when;
1da177e4 1270 int want_cookie = 0;
1da177e4
LT
1271
1272 /* Never answer to SYNs send to broadcast or multicast */
511c3f92 1273 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1da177e4
LT
1274 goto drop;
1275
1276 /* TW buckets are converted to open requests without
1277 * limitations, they conserve resources and peer is
1278 * evidently real one.
1279 */
463c84b9 1280 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
946cedcc
ED
1281 want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1282 if (!want_cookie)
1283 goto drop;
1da177e4
LT
1284 }
1285
1286 /* Accept backlog is full. If we have already queued enough
1287 * of warm entries in syn queue, drop request. It is better than
1288 * clogging syn queue with openreqs with exponentially increasing
1289 * timeout.
1290 */
463c84b9 1291 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1da177e4
LT
1292 goto drop;
1293
ce4a7d0d 1294 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1da177e4
LT
1295 if (!req)
1296 goto drop;
1297
cfb6eeb4
YH
1298#ifdef CONFIG_TCP_MD5SIG
1299 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1300#endif
1301
1da177e4 1302 tcp_clear_options(&tmp_opt);
bee7ca9e 1303 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
4957faad 1304 tmp_opt.user_mss = tp->rx_opt.user_mss;
bb5b7c11 1305 tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
4957faad
WAS
1306
1307 if (tmp_opt.cookie_plus > 0 &&
1308 tmp_opt.saw_tstamp &&
1309 !tp->rx_opt.cookie_out_never &&
1310 (sysctl_tcp_cookie_size > 0 ||
1311 (tp->cookie_values != NULL &&
1312 tp->cookie_values->cookie_desired > 0))) {
1313 u8 *c;
1314 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1315 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1316
1317 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1318 goto drop_and_release;
1319
1320 /* Secret recipe starts with IP addresses */
0eae88f3
ED
1321 *mess++ ^= (__force u32)daddr;
1322 *mess++ ^= (__force u32)saddr;
1da177e4 1323
4957faad
WAS
1324 /* plus variable length Initiator Cookie */
1325 c = (u8 *)mess;
1326 while (l-- > 0)
1327 *c++ ^= *hash_location++;
1328
4957faad 1329 want_cookie = 0; /* not our kind of cookie */
4957faad
WAS
1330 tmp_ext.cookie_out_never = 0; /* false */
1331 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1332 } else if (!tp->rx_opt.cookie_in_always) {
1333 /* redundant indications, but ensure initialization. */
1334 tmp_ext.cookie_out_never = 1; /* true */
1335 tmp_ext.cookie_plus = 0;
1336 } else {
1337 goto drop_and_release;
1338 }
1339 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1da177e4 1340
4dfc2817 1341 if (want_cookie && !tmp_opt.saw_tstamp)
1da177e4 1342 tcp_clear_options(&tmp_opt);
1da177e4 1343
1da177e4 1344 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1da177e4
LT
1345 tcp_openreq_init(req, &tmp_opt, skb);
1346
bb5b7c11
DM
1347 ireq = inet_rsk(req);
1348 ireq->loc_addr = daddr;
1349 ireq->rmt_addr = saddr;
1350 ireq->no_srccheck = inet_sk(sk)->transparent;
1351 ireq->opt = tcp_v4_save_options(sk, skb);
1352
284904aa 1353 if (security_inet_conn_request(sk, skb, req))
bb5b7c11 1354 goto drop_and_free;
284904aa 1355
172d69e6 1356 if (!want_cookie || tmp_opt.tstamp_ok)
aa8223c7 1357 TCP_ECN_create_request(req, tcp_hdr(skb));
1da177e4
LT
1358
1359 if (want_cookie) {
1da177e4 1360 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
172d69e6 1361 req->cookie_ts = tmp_opt.tstamp_ok;
1da177e4
LT
1362 } else if (!isn) {
1363 struct inet_peer *peer = NULL;
6bd023f3 1364 struct flowi4 fl4;
1da177e4
LT
1365
1366 /* VJ's idea. We save last timestamp seen
1367 * from the destination in peer table, when entering
1368 * state TIME-WAIT, and check against it before
1369 * accepting new connection request.
1370 *
1371 * If "isn" is not zero, this request hit alive
1372 * timewait bucket, so that all the necessary checks
1373 * are made in the function processing timewait state.
1374 */
1375 if (tmp_opt.saw_tstamp &&
295ff7ed 1376 tcp_death_row.sysctl_tw_recycle &&
6bd023f3 1377 (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
ed2361e6
DM
1378 fl4.daddr == saddr &&
1379 (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) {
317fe0e6 1380 inet_peer_refcheck(peer);
2c1409a0 1381 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1da177e4
LT
1382 (s32)(peer->tcp_ts - req->ts_recent) >
1383 TCP_PAWS_WINDOW) {
de0744af 1384 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
7cd04fa7 1385 goto drop_and_release;
1da177e4
LT
1386 }
1387 }
1388 /* Kill the following clause, if you dislike this way. */
1389 else if (!sysctl_tcp_syncookies &&
463c84b9 1390 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1da177e4
LT
1391 (sysctl_max_syn_backlog >> 2)) &&
1392 (!peer || !peer->tcp_ts_stamp) &&
1393 (!dst || !dst_metric(dst, RTAX_RTT))) {
1394 /* Without syncookies last quarter of
1395 * backlog is filled with destinations,
1396 * proven to be alive.
1397 * It means that we continue to communicate
1398 * to destinations, already remembered
1399 * to the moment of synflood.
1400 */
673d57e7
HH
1401 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
1402 &saddr, ntohs(tcp_hdr(skb)->source));
7cd04fa7 1403 goto drop_and_release;
1da177e4
LT
1404 }
1405
a94f723d 1406 isn = tcp_v4_init_sequence(skb);
1da177e4 1407 }
2e6599cb 1408 tcp_rsk(req)->snt_isn = isn;
9ad7c049 1409 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1da177e4 1410
72659ecc
OP
1411 if (tcp_v4_send_synack(sk, dst, req,
1412 (struct request_values *)&tmp_ext) ||
4957faad 1413 want_cookie)
1da177e4
LT
1414 goto drop_and_free;
1415
7cd04fa7 1416 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1da177e4
LT
1417 return 0;
1418
7cd04fa7
DL
1419drop_and_release:
1420 dst_release(dst);
1da177e4 1421drop_and_free:
60236fdd 1422 reqsk_free(req);
1da177e4 1423drop:
1da177e4
LT
1424 return 0;
1425}
4bc2f18b 1426EXPORT_SYMBOL(tcp_v4_conn_request);
1da177e4
LT
1427
1428
1429/*
1430 * The three way handshake has completed - we got a valid synack -
1431 * now create the new socket.
1432 */
1433struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
60236fdd 1434 struct request_sock *req,
1da177e4
LT
1435 struct dst_entry *dst)
1436{
2e6599cb 1437 struct inet_request_sock *ireq;
1da177e4
LT
1438 struct inet_sock *newinet;
1439 struct tcp_sock *newtp;
1440 struct sock *newsk;
cfb6eeb4
YH
1441#ifdef CONFIG_TCP_MD5SIG
1442 struct tcp_md5sig_key *key;
1443#endif
f6d8bd05 1444 struct ip_options_rcu *inet_opt;
1da177e4
LT
1445
1446 if (sk_acceptq_is_full(sk))
1447 goto exit_overflow;
1448
1da177e4
LT
1449 newsk = tcp_create_openreq_child(sk, req, skb);
1450 if (!newsk)
093d2823 1451 goto exit_nonewsk;
1da177e4 1452
bcd76111 1453 newsk->sk_gso_type = SKB_GSO_TCPV4;
1da177e4
LT
1454
1455 newtp = tcp_sk(newsk);
1456 newinet = inet_sk(newsk);
2e6599cb 1457 ireq = inet_rsk(req);
c720c7e8
ED
1458 newinet->inet_daddr = ireq->rmt_addr;
1459 newinet->inet_rcv_saddr = ireq->loc_addr;
1460 newinet->inet_saddr = ireq->loc_addr;
f6d8bd05
ED
1461 inet_opt = ireq->opt;
1462 rcu_assign_pointer(newinet->inet_opt, inet_opt);
2e6599cb 1463 ireq->opt = NULL;
463c84b9 1464 newinet->mc_index = inet_iif(skb);
eddc9ec5 1465 newinet->mc_ttl = ip_hdr(skb)->ttl;
4c507d28 1466 newinet->rcv_tos = ip_hdr(skb)->tos;
d83d8461 1467 inet_csk(newsk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
1468 if (inet_opt)
1469 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
c720c7e8 1470 newinet->inet_id = newtp->write_seq ^ jiffies;
1da177e4 1471
0e734419
DM
1472 if (!dst && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
1473 goto put_and_exit;
1474
1475 sk_setup_caps(newsk, dst);
1476
5d424d5a 1477 tcp_mtup_init(newsk);
1da177e4 1478 tcp_sync_mss(newsk, dst_mtu(dst));
0dbaee3b 1479 newtp->advmss = dst_metric_advmss(dst);
f5fff5dc
TQ
1480 if (tcp_sk(sk)->rx_opt.user_mss &&
1481 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1482 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1483
1da177e4 1484 tcp_initialize_rcv_mss(newsk);
9ad7c049
JC
1485 if (tcp_rsk(req)->snt_synack)
1486 tcp_valid_rtt_meas(newsk,
1487 tcp_time_stamp - tcp_rsk(req)->snt_synack);
1488 newtp->total_retrans = req->retrans;
1da177e4 1489
cfb6eeb4
YH
1490#ifdef CONFIG_TCP_MD5SIG
1491 /* Copy over the MD5 key from the original socket */
a915da9b
ED
1492 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1493 AF_INET);
c720c7e8 1494 if (key != NULL) {
cfb6eeb4
YH
1495 /*
1496 * We're using one, so create a matching key
1497 * on the newsk structure. If we fail to get
1498 * memory, then we end up not copying the key
1499 * across. Shucks.
1500 */
a915da9b
ED
1501 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1502 AF_INET, key->key, key->keylen, GFP_ATOMIC);
a465419b 1503 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
cfb6eeb4
YH
1504 }
1505#endif
1506
0e734419
DM
1507 if (__inet_inherit_port(sk, newsk) < 0)
1508 goto put_and_exit;
9327f705 1509 __inet_hash_nolisten(newsk, NULL);
1da177e4
LT
1510
1511 return newsk;
1512
1513exit_overflow:
de0744af 1514 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
093d2823
BS
1515exit_nonewsk:
1516 dst_release(dst);
1da177e4 1517exit:
de0744af 1518 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4 1519 return NULL;
0e734419 1520put_and_exit:
709e8697 1521 tcp_clear_xmit_timers(newsk);
d8a6e65f 1522 tcp_cleanup_congestion_control(newsk);
918eb399 1523 bh_unlock_sock(newsk);
0e734419
DM
1524 sock_put(newsk);
1525 goto exit;
1da177e4 1526}
4bc2f18b 1527EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1da177e4
LT
1528
1529static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1530{
aa8223c7 1531 struct tcphdr *th = tcp_hdr(skb);
eddc9ec5 1532 const struct iphdr *iph = ip_hdr(skb);
1da177e4 1533 struct sock *nsk;
60236fdd 1534 struct request_sock **prev;
1da177e4 1535 /* Find possible connection requests. */
463c84b9
ACM
1536 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1537 iph->saddr, iph->daddr);
1da177e4
LT
1538 if (req)
1539 return tcp_check_req(sk, skb, req, prev);
1540
3b1e0a65 1541 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
c67499c0 1542 th->source, iph->daddr, th->dest, inet_iif(skb));
1da177e4
LT
1543
1544 if (nsk) {
1545 if (nsk->sk_state != TCP_TIME_WAIT) {
1546 bh_lock_sock(nsk);
1547 return nsk;
1548 }
9469c7b4 1549 inet_twsk_put(inet_twsk(nsk));
1da177e4
LT
1550 return NULL;
1551 }
1552
1553#ifdef CONFIG_SYN_COOKIES
af9b4738 1554 if (!th->syn)
1da177e4
LT
1555 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1556#endif
1557 return sk;
1558}
1559
b51655b9 1560static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1da177e4 1561{
eddc9ec5
ACM
1562 const struct iphdr *iph = ip_hdr(skb);
1563
84fa7933 1564 if (skb->ip_summed == CHECKSUM_COMPLETE) {
eddc9ec5
ACM
1565 if (!tcp_v4_check(skb->len, iph->saddr,
1566 iph->daddr, skb->csum)) {
fb286bb2 1567 skb->ip_summed = CHECKSUM_UNNECESSARY;
1da177e4 1568 return 0;
fb286bb2 1569 }
1da177e4 1570 }
fb286bb2 1571
eddc9ec5 1572 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
fb286bb2
HX
1573 skb->len, IPPROTO_TCP, 0);
1574
1da177e4 1575 if (skb->len <= 76) {
fb286bb2 1576 return __skb_checksum_complete(skb);
1da177e4
LT
1577 }
1578 return 0;
1579}
1580
1581
1582/* The socket must have it's spinlock held when we get
1583 * here.
1584 *
1585 * We have a potential double-lock case here, so even when
1586 * doing backlog processing we use the BH locking scheme.
1587 * This is because we cannot sleep with the original spinlock
1588 * held.
1589 */
1590int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1591{
cfb6eeb4
YH
1592 struct sock *rsk;
1593#ifdef CONFIG_TCP_MD5SIG
1594 /*
1595 * We really want to reject the packet as early as possible
1596 * if:
1597 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1598 * o There is an MD5 option and we're not expecting one
1599 */
7174259e 1600 if (tcp_v4_inbound_md5_hash(sk, skb))
cfb6eeb4
YH
1601 goto discard;
1602#endif
1603
1da177e4 1604 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
bdeab991 1605 sock_rps_save_rxhash(sk, skb);
aa8223c7 1606 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1607 rsk = sk;
1da177e4 1608 goto reset;
cfb6eeb4 1609 }
1da177e4
LT
1610 return 0;
1611 }
1612
ab6a5bb6 1613 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1da177e4
LT
1614 goto csum_err;
1615
1616 if (sk->sk_state == TCP_LISTEN) {
1617 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1618 if (!nsk)
1619 goto discard;
1620
1621 if (nsk != sk) {
bdeab991 1622 sock_rps_save_rxhash(nsk, skb);
cfb6eeb4
YH
1623 if (tcp_child_process(sk, nsk, skb)) {
1624 rsk = nsk;
1da177e4 1625 goto reset;
cfb6eeb4 1626 }
1da177e4
LT
1627 return 0;
1628 }
ca55158c 1629 } else
bdeab991 1630 sock_rps_save_rxhash(sk, skb);
ca55158c 1631
aa8223c7 1632 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1633 rsk = sk;
1da177e4 1634 goto reset;
cfb6eeb4 1635 }
1da177e4
LT
1636 return 0;
1637
1638reset:
cfb6eeb4 1639 tcp_v4_send_reset(rsk, skb);
1da177e4
LT
1640discard:
1641 kfree_skb(skb);
1642 /* Be careful here. If this function gets more complicated and
1643 * gcc suffers from register pressure on the x86, sk (in %ebx)
1644 * might be destroyed here. This current version compiles correctly,
1645 * but you have been warned.
1646 */
1647 return 0;
1648
1649csum_err:
63231bdd 1650 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1da177e4
LT
1651 goto discard;
1652}
4bc2f18b 1653EXPORT_SYMBOL(tcp_v4_do_rcv);
1da177e4
LT
1654
1655/*
1656 * From tcp_input.c
1657 */
1658
1659int tcp_v4_rcv(struct sk_buff *skb)
1660{
eddc9ec5 1661 const struct iphdr *iph;
cf533ea5 1662 const struct tcphdr *th;
1da177e4
LT
1663 struct sock *sk;
1664 int ret;
a86b1e30 1665 struct net *net = dev_net(skb->dev);
1da177e4
LT
1666
1667 if (skb->pkt_type != PACKET_HOST)
1668 goto discard_it;
1669
1670 /* Count it even if it's bad */
63231bdd 1671 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1da177e4
LT
1672
1673 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1674 goto discard_it;
1675
aa8223c7 1676 th = tcp_hdr(skb);
1da177e4
LT
1677
1678 if (th->doff < sizeof(struct tcphdr) / 4)
1679 goto bad_packet;
1680 if (!pskb_may_pull(skb, th->doff * 4))
1681 goto discard_it;
1682
1683 /* An explanation is required here, I think.
1684 * Packet length and doff are validated by header prediction,
caa20d9a 1685 * provided case of th->doff==0 is eliminated.
1da177e4 1686 * So, we defer the checks. */
60476372 1687 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1da177e4
LT
1688 goto bad_packet;
1689
aa8223c7 1690 th = tcp_hdr(skb);
eddc9ec5 1691 iph = ip_hdr(skb);
1da177e4
LT
1692 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1693 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1694 skb->len - th->doff * 4);
1695 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1696 TCP_SKB_CB(skb)->when = 0;
b82d1bb4 1697 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1da177e4
LT
1698 TCP_SKB_CB(skb)->sacked = 0;
1699
9a1f27c4 1700 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1da177e4
LT
1701 if (!sk)
1702 goto no_tcp_socket;
1703
bb134d5d
ED
1704process:
1705 if (sk->sk_state == TCP_TIME_WAIT)
1706 goto do_time_wait;
1707
6cce09f8
ED
1708 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1709 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
d218d111 1710 goto discard_and_relse;
6cce09f8 1711 }
d218d111 1712
1da177e4
LT
1713 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1714 goto discard_and_relse;
b59c2701 1715 nf_reset(skb);
1da177e4 1716
fda9ef5d 1717 if (sk_filter(sk, skb))
1da177e4
LT
1718 goto discard_and_relse;
1719
1720 skb->dev = NULL;
1721
c6366184 1722 bh_lock_sock_nested(sk);
1da177e4
LT
1723 ret = 0;
1724 if (!sock_owned_by_user(sk)) {
1a2449a8
CL
1725#ifdef CONFIG_NET_DMA
1726 struct tcp_sock *tp = tcp_sk(sk);
1727 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
f67b4599 1728 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1a2449a8 1729 if (tp->ucopy.dma_chan)
1da177e4 1730 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8
CL
1731 else
1732#endif
1733 {
1734 if (!tcp_prequeue(sk, skb))
ae8d7f88 1735 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8 1736 }
6cce09f8 1737 } else if (unlikely(sk_add_backlog(sk, skb))) {
6b03a53a 1738 bh_unlock_sock(sk);
6cce09f8 1739 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
6b03a53a
ZY
1740 goto discard_and_relse;
1741 }
1da177e4
LT
1742 bh_unlock_sock(sk);
1743
1744 sock_put(sk);
1745
1746 return ret;
1747
1748no_tcp_socket:
1749 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1750 goto discard_it;
1751
1752 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1753bad_packet:
63231bdd 1754 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1da177e4 1755 } else {
cfb6eeb4 1756 tcp_v4_send_reset(NULL, skb);
1da177e4
LT
1757 }
1758
1759discard_it:
1760 /* Discard frame. */
1761 kfree_skb(skb);
e905a9ed 1762 return 0;
1da177e4
LT
1763
1764discard_and_relse:
1765 sock_put(sk);
1766 goto discard_it;
1767
1768do_time_wait:
1769 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
9469c7b4 1770 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1771 goto discard_it;
1772 }
1773
1774 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
63231bdd 1775 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
9469c7b4 1776 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1777 goto discard_it;
1778 }
9469c7b4 1779 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1da177e4 1780 case TCP_TW_SYN: {
c346dca1 1781 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
c67499c0 1782 &tcp_hashinfo,
eddc9ec5 1783 iph->daddr, th->dest,
463c84b9 1784 inet_iif(skb));
1da177e4 1785 if (sk2) {
9469c7b4
YH
1786 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1787 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1788 sk = sk2;
1789 goto process;
1790 }
1791 /* Fall through to ACK */
1792 }
1793 case TCP_TW_ACK:
1794 tcp_v4_timewait_ack(sk, skb);
1795 break;
1796 case TCP_TW_RST:
1797 goto no_tcp_socket;
1798 case TCP_TW_SUCCESS:;
1799 }
1800 goto discard_it;
1801}
1802
3f419d2d 1803struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
1da177e4 1804{
3f419d2d 1805 struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
1da177e4 1806 struct inet_sock *inet = inet_sk(sk);
3f419d2d 1807 struct inet_peer *peer;
1da177e4 1808
c5216cc7
DM
1809 if (!rt ||
1810 inet->cork.fl.u.ip4.daddr != inet->inet_daddr) {
b534ecf1 1811 peer = inet_getpeer_v4(inet->inet_daddr, 1);
3f419d2d 1812 *release_it = true;
1da177e4
LT
1813 } else {
1814 if (!rt->peer)
a48eff12 1815 rt_bind_peer(rt, inet->inet_daddr, 1);
1da177e4 1816 peer = rt->peer;
3f419d2d 1817 *release_it = false;
1da177e4
LT
1818 }
1819
3f419d2d 1820 return peer;
1da177e4 1821}
3f419d2d 1822EXPORT_SYMBOL(tcp_v4_get_peer);
1da177e4 1823
ccb7c410 1824void *tcp_v4_tw_get_peer(struct sock *sk)
1da177e4 1825{
cf533ea5 1826 const struct inet_timewait_sock *tw = inet_twsk(sk);
1da177e4 1827
ccb7c410 1828 return inet_getpeer_v4(tw->tw_daddr, 1);
1da177e4 1829}
ccb7c410
DM
1830EXPORT_SYMBOL(tcp_v4_tw_get_peer);
1831
1832static struct timewait_sock_ops tcp_timewait_sock_ops = {
1833 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1834 .twsk_unique = tcp_twsk_unique,
1835 .twsk_destructor= tcp_twsk_destructor,
1836 .twsk_getpeer = tcp_v4_tw_get_peer,
1837};
1da177e4 1838
3b401a81 1839const struct inet_connection_sock_af_ops ipv4_specific = {
543d9cfe
ACM
1840 .queue_xmit = ip_queue_xmit,
1841 .send_check = tcp_v4_send_check,
1842 .rebuild_header = inet_sk_rebuild_header,
1843 .conn_request = tcp_v4_conn_request,
1844 .syn_recv_sock = tcp_v4_syn_recv_sock,
3f419d2d 1845 .get_peer = tcp_v4_get_peer,
543d9cfe
ACM
1846 .net_header_len = sizeof(struct iphdr),
1847 .setsockopt = ip_setsockopt,
1848 .getsockopt = ip_getsockopt,
1849 .addr2sockaddr = inet_csk_addr2sockaddr,
1850 .sockaddr_len = sizeof(struct sockaddr_in),
ab1e0a13 1851 .bind_conflict = inet_csk_bind_conflict,
3fdadf7d 1852#ifdef CONFIG_COMPAT
543d9cfe
ACM
1853 .compat_setsockopt = compat_ip_setsockopt,
1854 .compat_getsockopt = compat_ip_getsockopt,
3fdadf7d 1855#endif
1da177e4 1856};
4bc2f18b 1857EXPORT_SYMBOL(ipv4_specific);
1da177e4 1858
cfb6eeb4 1859#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1860static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
cfb6eeb4 1861 .md5_lookup = tcp_v4_md5_lookup,
49a72dfb 1862 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 1863 .md5_parse = tcp_v4_parse_md5_keys,
cfb6eeb4 1864};
b6332e6c 1865#endif
cfb6eeb4 1866
1da177e4
LT
1867/* NOTE: A lot of things set to zero explicitly by call to
1868 * sk_alloc() so need not be done here.
1869 */
1870static int tcp_v4_init_sock(struct sock *sk)
1871{
6687e988 1872 struct inet_connection_sock *icsk = inet_csk(sk);
1da177e4
LT
1873 struct tcp_sock *tp = tcp_sk(sk);
1874
1875 skb_queue_head_init(&tp->out_of_order_queue);
1876 tcp_init_xmit_timers(sk);
1877 tcp_prequeue_init(tp);
1878
6687e988 1879 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1da177e4
LT
1880 tp->mdev = TCP_TIMEOUT_INIT;
1881
1882 /* So many TCP implementations out there (incorrectly) count the
1883 * initial SYN frame in their delayed-ACK and congestion control
1884 * algorithms that we must have the following bandaid to talk
1885 * efficiently to them. -DaveM
1886 */
9ad7c049 1887 tp->snd_cwnd = TCP_INIT_CWND;
1da177e4
LT
1888
1889 /* See draft-stevens-tcpca-spec-01 for discussion of the
1890 * initialization of these values.
1891 */
0b6a05c1 1892 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1da177e4 1893 tp->snd_cwnd_clamp = ~0;
bee7ca9e 1894 tp->mss_cache = TCP_MSS_DEFAULT;
1da177e4
LT
1895
1896 tp->reordering = sysctl_tcp_reordering;
6687e988 1897 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1da177e4
LT
1898
1899 sk->sk_state = TCP_CLOSE;
1900
1901 sk->sk_write_space = sk_stream_write_space;
1902 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1903
8292a17a 1904 icsk->icsk_af_ops = &ipv4_specific;
d83d8461 1905 icsk->icsk_sync_mss = tcp_sync_mss;
cfb6eeb4
YH
1906#ifdef CONFIG_TCP_MD5SIG
1907 tp->af_specific = &tcp_sock_ipv4_specific;
1908#endif
1da177e4 1909
435cf559
WAS
1910 /* TCP Cookie Transactions */
1911 if (sysctl_tcp_cookie_size > 0) {
1912 /* Default, cookies without s_data_payload. */
1913 tp->cookie_values =
1914 kzalloc(sizeof(*tp->cookie_values),
1915 sk->sk_allocation);
1916 if (tp->cookie_values != NULL)
1917 kref_init(&tp->cookie_values->kref);
1918 }
1919 /* Presumed zeroed, in order of appearance:
1920 * cookie_in_always, cookie_out_never,
1921 * s_data_constant, s_data_in, s_data_out
1922 */
1da177e4
LT
1923 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1924 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1925
eb4dea58 1926 local_bh_disable();
d1a4c0b3 1927 sock_update_memcg(sk);
180d8cd9 1928 sk_sockets_allocated_inc(sk);
eb4dea58 1929 local_bh_enable();
1da177e4
LT
1930
1931 return 0;
1932}
1933
7d06b2e0 1934void tcp_v4_destroy_sock(struct sock *sk)
1da177e4
LT
1935{
1936 struct tcp_sock *tp = tcp_sk(sk);
1937
1938 tcp_clear_xmit_timers(sk);
1939
6687e988 1940 tcp_cleanup_congestion_control(sk);
317a76f9 1941
1da177e4 1942 /* Cleanup up the write buffer. */
fe067e8a 1943 tcp_write_queue_purge(sk);
1da177e4
LT
1944
1945 /* Cleans up our, hopefully empty, out_of_order_queue. */
e905a9ed 1946 __skb_queue_purge(&tp->out_of_order_queue);
1da177e4 1947
cfb6eeb4
YH
1948#ifdef CONFIG_TCP_MD5SIG
1949 /* Clean up the MD5 key list, if any */
1950 if (tp->md5sig_info) {
a915da9b 1951 tcp_clear_md5_list(sk);
a8afca03 1952 kfree_rcu(tp->md5sig_info, rcu);
cfb6eeb4
YH
1953 tp->md5sig_info = NULL;
1954 }
1955#endif
1956
1a2449a8
CL
1957#ifdef CONFIG_NET_DMA
1958 /* Cleans up our sk_async_wait_queue */
e905a9ed 1959 __skb_queue_purge(&sk->sk_async_wait_queue);
1a2449a8
CL
1960#endif
1961
1da177e4
LT
1962 /* Clean prequeue, it must be empty really */
1963 __skb_queue_purge(&tp->ucopy.prequeue);
1964
1965 /* Clean up a referenced TCP bind bucket. */
463c84b9 1966 if (inet_csk(sk)->icsk_bind_hash)
ab1e0a13 1967 inet_put_port(sk);
1da177e4
LT
1968
1969 /*
1970 * If sendmsg cached page exists, toss it.
1971 */
1972 if (sk->sk_sndmsg_page) {
1973 __free_page(sk->sk_sndmsg_page);
1974 sk->sk_sndmsg_page = NULL;
1975 }
1976
435cf559
WAS
1977 /* TCP Cookie Transactions */
1978 if (tp->cookie_values != NULL) {
1979 kref_put(&tp->cookie_values->kref,
1980 tcp_cookie_values_release);
1981 tp->cookie_values = NULL;
1982 }
1983
180d8cd9 1984 sk_sockets_allocated_dec(sk);
d1a4c0b3 1985 sock_release_memcg(sk);
1da177e4 1986}
1da177e4
LT
1987EXPORT_SYMBOL(tcp_v4_destroy_sock);
1988
1989#ifdef CONFIG_PROC_FS
1990/* Proc filesystem TCP sock list dumping. */
1991
3ab5aee7 1992static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1da177e4 1993{
3ab5aee7 1994 return hlist_nulls_empty(head) ? NULL :
8feaf0c0 1995 list_entry(head->first, struct inet_timewait_sock, tw_node);
1da177e4
LT
1996}
1997
8feaf0c0 1998static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1da177e4 1999{
3ab5aee7
ED
2000 return !is_a_nulls(tw->tw_node.next) ?
2001 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1da177e4
LT
2002}
2003
a8b690f9
TH
2004/*
2005 * Get next listener socket follow cur. If cur is NULL, get first socket
2006 * starting from bucket given in st->bucket; when st->bucket is zero the
2007 * very first socket in the hash table is returned.
2008 */
1da177e4
LT
2009static void *listening_get_next(struct seq_file *seq, void *cur)
2010{
463c84b9 2011 struct inet_connection_sock *icsk;
c25eb3bf 2012 struct hlist_nulls_node *node;
1da177e4 2013 struct sock *sk = cur;
5caea4ea 2014 struct inet_listen_hashbucket *ilb;
5799de0b 2015 struct tcp_iter_state *st = seq->private;
a4146b1b 2016 struct net *net = seq_file_net(seq);
1da177e4
LT
2017
2018 if (!sk) {
a8b690f9 2019 ilb = &tcp_hashinfo.listening_hash[st->bucket];
5caea4ea 2020 spin_lock_bh(&ilb->lock);
c25eb3bf 2021 sk = sk_nulls_head(&ilb->head);
a8b690f9 2022 st->offset = 0;
1da177e4
LT
2023 goto get_sk;
2024 }
5caea4ea 2025 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1da177e4 2026 ++st->num;
a8b690f9 2027 ++st->offset;
1da177e4
LT
2028
2029 if (st->state == TCP_SEQ_STATE_OPENREQ) {
60236fdd 2030 struct request_sock *req = cur;
1da177e4 2031
72a3effa 2032 icsk = inet_csk(st->syn_wait_sk);
1da177e4
LT
2033 req = req->dl_next;
2034 while (1) {
2035 while (req) {
bdccc4ca 2036 if (req->rsk_ops->family == st->family) {
1da177e4
LT
2037 cur = req;
2038 goto out;
2039 }
2040 req = req->dl_next;
2041 }
72a3effa 2042 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1da177e4
LT
2043 break;
2044get_req:
463c84b9 2045 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1da177e4 2046 }
1bde5ac4 2047 sk = sk_nulls_next(st->syn_wait_sk);
1da177e4 2048 st->state = TCP_SEQ_STATE_LISTENING;
463c84b9 2049 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2050 } else {
e905a9ed 2051 icsk = inet_csk(sk);
463c84b9
ACM
2052 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2053 if (reqsk_queue_len(&icsk->icsk_accept_queue))
1da177e4 2054 goto start_req;
463c84b9 2055 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1bde5ac4 2056 sk = sk_nulls_next(sk);
1da177e4
LT
2057 }
2058get_sk:
c25eb3bf 2059 sk_nulls_for_each_from(sk, node) {
8475ef9f
PE
2060 if (!net_eq(sock_net(sk), net))
2061 continue;
2062 if (sk->sk_family == st->family) {
1da177e4
LT
2063 cur = sk;
2064 goto out;
2065 }
e905a9ed 2066 icsk = inet_csk(sk);
463c84b9
ACM
2067 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2068 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1da177e4
LT
2069start_req:
2070 st->uid = sock_i_uid(sk);
2071 st->syn_wait_sk = sk;
2072 st->state = TCP_SEQ_STATE_OPENREQ;
2073 st->sbucket = 0;
2074 goto get_req;
2075 }
463c84b9 2076 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2077 }
5caea4ea 2078 spin_unlock_bh(&ilb->lock);
a8b690f9 2079 st->offset = 0;
0f7ff927 2080 if (++st->bucket < INET_LHTABLE_SIZE) {
5caea4ea
ED
2081 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2082 spin_lock_bh(&ilb->lock);
c25eb3bf 2083 sk = sk_nulls_head(&ilb->head);
1da177e4
LT
2084 goto get_sk;
2085 }
2086 cur = NULL;
2087out:
2088 return cur;
2089}
2090
2091static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2092{
a8b690f9
TH
2093 struct tcp_iter_state *st = seq->private;
2094 void *rc;
2095
2096 st->bucket = 0;
2097 st->offset = 0;
2098 rc = listening_get_next(seq, NULL);
1da177e4
LT
2099
2100 while (rc && *pos) {
2101 rc = listening_get_next(seq, rc);
2102 --*pos;
2103 }
2104 return rc;
2105}
2106
6eac5604
AK
2107static inline int empty_bucket(struct tcp_iter_state *st)
2108{
3ab5aee7
ED
2109 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2110 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
6eac5604
AK
2111}
2112
a8b690f9
TH
2113/*
2114 * Get first established socket starting from bucket given in st->bucket.
2115 * If st->bucket is zero, the very first socket in the hash is returned.
2116 */
1da177e4
LT
2117static void *established_get_first(struct seq_file *seq)
2118{
5799de0b 2119 struct tcp_iter_state *st = seq->private;
a4146b1b 2120 struct net *net = seq_file_net(seq);
1da177e4
LT
2121 void *rc = NULL;
2122
a8b690f9
TH
2123 st->offset = 0;
2124 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
1da177e4 2125 struct sock *sk;
3ab5aee7 2126 struct hlist_nulls_node *node;
8feaf0c0 2127 struct inet_timewait_sock *tw;
9db66bdc 2128 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1da177e4 2129
6eac5604
AK
2130 /* Lockless fast path for the common case of empty buckets */
2131 if (empty_bucket(st))
2132 continue;
2133
9db66bdc 2134 spin_lock_bh(lock);
3ab5aee7 2135 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
f40c8174 2136 if (sk->sk_family != st->family ||
878628fb 2137 !net_eq(sock_net(sk), net)) {
1da177e4
LT
2138 continue;
2139 }
2140 rc = sk;
2141 goto out;
2142 }
2143 st->state = TCP_SEQ_STATE_TIME_WAIT;
8feaf0c0 2144 inet_twsk_for_each(tw, node,
dbca9b27 2145 &tcp_hashinfo.ehash[st->bucket].twchain) {
28518fc1 2146 if (tw->tw_family != st->family ||
878628fb 2147 !net_eq(twsk_net(tw), net)) {
1da177e4
LT
2148 continue;
2149 }
2150 rc = tw;
2151 goto out;
2152 }
9db66bdc 2153 spin_unlock_bh(lock);
1da177e4
LT
2154 st->state = TCP_SEQ_STATE_ESTABLISHED;
2155 }
2156out:
2157 return rc;
2158}
2159
2160static void *established_get_next(struct seq_file *seq, void *cur)
2161{
2162 struct sock *sk = cur;
8feaf0c0 2163 struct inet_timewait_sock *tw;
3ab5aee7 2164 struct hlist_nulls_node *node;
5799de0b 2165 struct tcp_iter_state *st = seq->private;
a4146b1b 2166 struct net *net = seq_file_net(seq);
1da177e4
LT
2167
2168 ++st->num;
a8b690f9 2169 ++st->offset;
1da177e4
LT
2170
2171 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2172 tw = cur;
2173 tw = tw_next(tw);
2174get_tw:
878628fb 2175 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
1da177e4
LT
2176 tw = tw_next(tw);
2177 }
2178 if (tw) {
2179 cur = tw;
2180 goto out;
2181 }
9db66bdc 2182 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2183 st->state = TCP_SEQ_STATE_ESTABLISHED;
2184
6eac5604 2185 /* Look for next non empty bucket */
a8b690f9 2186 st->offset = 0;
f373b53b 2187 while (++st->bucket <= tcp_hashinfo.ehash_mask &&
6eac5604
AK
2188 empty_bucket(st))
2189 ;
f373b53b 2190 if (st->bucket > tcp_hashinfo.ehash_mask)
6eac5604
AK
2191 return NULL;
2192
9db66bdc 2193 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
3ab5aee7 2194 sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
1da177e4 2195 } else
3ab5aee7 2196 sk = sk_nulls_next(sk);
1da177e4 2197
3ab5aee7 2198 sk_nulls_for_each_from(sk, node) {
878628fb 2199 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
1da177e4
LT
2200 goto found;
2201 }
2202
2203 st->state = TCP_SEQ_STATE_TIME_WAIT;
dbca9b27 2204 tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
1da177e4
LT
2205 goto get_tw;
2206found:
2207 cur = sk;
2208out:
2209 return cur;
2210}
2211
2212static void *established_get_idx(struct seq_file *seq, loff_t pos)
2213{
a8b690f9
TH
2214 struct tcp_iter_state *st = seq->private;
2215 void *rc;
2216
2217 st->bucket = 0;
2218 rc = established_get_first(seq);
1da177e4
LT
2219
2220 while (rc && pos) {
2221 rc = established_get_next(seq, rc);
2222 --pos;
7174259e 2223 }
1da177e4
LT
2224 return rc;
2225}
2226
2227static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2228{
2229 void *rc;
5799de0b 2230 struct tcp_iter_state *st = seq->private;
1da177e4 2231
1da177e4
LT
2232 st->state = TCP_SEQ_STATE_LISTENING;
2233 rc = listening_get_idx(seq, &pos);
2234
2235 if (!rc) {
1da177e4
LT
2236 st->state = TCP_SEQ_STATE_ESTABLISHED;
2237 rc = established_get_idx(seq, pos);
2238 }
2239
2240 return rc;
2241}
2242
a8b690f9
TH
2243static void *tcp_seek_last_pos(struct seq_file *seq)
2244{
2245 struct tcp_iter_state *st = seq->private;
2246 int offset = st->offset;
2247 int orig_num = st->num;
2248 void *rc = NULL;
2249
2250 switch (st->state) {
2251 case TCP_SEQ_STATE_OPENREQ:
2252 case TCP_SEQ_STATE_LISTENING:
2253 if (st->bucket >= INET_LHTABLE_SIZE)
2254 break;
2255 st->state = TCP_SEQ_STATE_LISTENING;
2256 rc = listening_get_next(seq, NULL);
2257 while (offset-- && rc)
2258 rc = listening_get_next(seq, rc);
2259 if (rc)
2260 break;
2261 st->bucket = 0;
2262 /* Fallthrough */
2263 case TCP_SEQ_STATE_ESTABLISHED:
2264 case TCP_SEQ_STATE_TIME_WAIT:
2265 st->state = TCP_SEQ_STATE_ESTABLISHED;
2266 if (st->bucket > tcp_hashinfo.ehash_mask)
2267 break;
2268 rc = established_get_first(seq);
2269 while (offset-- && rc)
2270 rc = established_get_next(seq, rc);
2271 }
2272
2273 st->num = orig_num;
2274
2275 return rc;
2276}
2277
1da177e4
LT
2278static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2279{
5799de0b 2280 struct tcp_iter_state *st = seq->private;
a8b690f9
TH
2281 void *rc;
2282
2283 if (*pos && *pos == st->last_pos) {
2284 rc = tcp_seek_last_pos(seq);
2285 if (rc)
2286 goto out;
2287 }
2288
1da177e4
LT
2289 st->state = TCP_SEQ_STATE_LISTENING;
2290 st->num = 0;
a8b690f9
TH
2291 st->bucket = 0;
2292 st->offset = 0;
2293 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2294
2295out:
2296 st->last_pos = *pos;
2297 return rc;
1da177e4
LT
2298}
2299
2300static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2301{
a8b690f9 2302 struct tcp_iter_state *st = seq->private;
1da177e4 2303 void *rc = NULL;
1da177e4
LT
2304
2305 if (v == SEQ_START_TOKEN) {
2306 rc = tcp_get_idx(seq, 0);
2307 goto out;
2308 }
1da177e4
LT
2309
2310 switch (st->state) {
2311 case TCP_SEQ_STATE_OPENREQ:
2312 case TCP_SEQ_STATE_LISTENING:
2313 rc = listening_get_next(seq, v);
2314 if (!rc) {
1da177e4 2315 st->state = TCP_SEQ_STATE_ESTABLISHED;
a8b690f9
TH
2316 st->bucket = 0;
2317 st->offset = 0;
1da177e4
LT
2318 rc = established_get_first(seq);
2319 }
2320 break;
2321 case TCP_SEQ_STATE_ESTABLISHED:
2322 case TCP_SEQ_STATE_TIME_WAIT:
2323 rc = established_get_next(seq, v);
2324 break;
2325 }
2326out:
2327 ++*pos;
a8b690f9 2328 st->last_pos = *pos;
1da177e4
LT
2329 return rc;
2330}
2331
2332static void tcp_seq_stop(struct seq_file *seq, void *v)
2333{
5799de0b 2334 struct tcp_iter_state *st = seq->private;
1da177e4
LT
2335
2336 switch (st->state) {
2337 case TCP_SEQ_STATE_OPENREQ:
2338 if (v) {
463c84b9
ACM
2339 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2340 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4
LT
2341 }
2342 case TCP_SEQ_STATE_LISTENING:
2343 if (v != SEQ_START_TOKEN)
5caea4ea 2344 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
1da177e4
LT
2345 break;
2346 case TCP_SEQ_STATE_TIME_WAIT:
2347 case TCP_SEQ_STATE_ESTABLISHED:
2348 if (v)
9db66bdc 2349 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2350 break;
2351 }
2352}
2353
73cb88ec 2354int tcp_seq_open(struct inode *inode, struct file *file)
1da177e4
LT
2355{
2356 struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
1da177e4 2357 struct tcp_iter_state *s;
52d6f3f1 2358 int err;
1da177e4 2359
52d6f3f1
DL
2360 err = seq_open_net(inode, file, &afinfo->seq_ops,
2361 sizeof(struct tcp_iter_state));
2362 if (err < 0)
2363 return err;
f40c8174 2364
52d6f3f1 2365 s = ((struct seq_file *)file->private_data)->private;
1da177e4 2366 s->family = afinfo->family;
a8b690f9 2367 s->last_pos = 0;
f40c8174
DL
2368 return 0;
2369}
73cb88ec 2370EXPORT_SYMBOL(tcp_seq_open);
f40c8174 2371
6f8b13bc 2372int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4
LT
2373{
2374 int rc = 0;
2375 struct proc_dir_entry *p;
2376
9427c4b3
DL
2377 afinfo->seq_ops.start = tcp_seq_start;
2378 afinfo->seq_ops.next = tcp_seq_next;
2379 afinfo->seq_ops.stop = tcp_seq_stop;
2380
84841c3c 2381 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
73cb88ec 2382 afinfo->seq_fops, afinfo);
84841c3c 2383 if (!p)
1da177e4
LT
2384 rc = -ENOMEM;
2385 return rc;
2386}
4bc2f18b 2387EXPORT_SYMBOL(tcp_proc_register);
1da177e4 2388
6f8b13bc 2389void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4 2390{
6f8b13bc 2391 proc_net_remove(net, afinfo->name);
1da177e4 2392}
4bc2f18b 2393EXPORT_SYMBOL(tcp_proc_unregister);
1da177e4 2394
cf533ea5 2395static void get_openreq4(const struct sock *sk, const struct request_sock *req,
5e659e4c 2396 struct seq_file *f, int i, int uid, int *len)
1da177e4 2397{
2e6599cb 2398 const struct inet_request_sock *ireq = inet_rsk(req);
1da177e4
LT
2399 int ttd = req->expires - jiffies;
2400
5e659e4c 2401 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
71338aa7 2402 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
1da177e4 2403 i,
2e6599cb 2404 ireq->loc_addr,
c720c7e8 2405 ntohs(inet_sk(sk)->inet_sport),
2e6599cb
ACM
2406 ireq->rmt_addr,
2407 ntohs(ireq->rmt_port),
1da177e4
LT
2408 TCP_SYN_RECV,
2409 0, 0, /* could print option size, but that is af dependent. */
2410 1, /* timers active (only the expire timer) */
2411 jiffies_to_clock_t(ttd),
2412 req->retrans,
2413 uid,
2414 0, /* non standard timer */
2415 0, /* open_requests have no inode */
2416 atomic_read(&sk->sk_refcnt),
5e659e4c
PE
2417 req,
2418 len);
1da177e4
LT
2419}
2420
5e659e4c 2421static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
1da177e4
LT
2422{
2423 int timer_active;
2424 unsigned long timer_expires;
cf533ea5 2425 const struct tcp_sock *tp = tcp_sk(sk);
cf4c6bf8 2426 const struct inet_connection_sock *icsk = inet_csk(sk);
cf533ea5 2427 const struct inet_sock *inet = inet_sk(sk);
c720c7e8
ED
2428 __be32 dest = inet->inet_daddr;
2429 __be32 src = inet->inet_rcv_saddr;
2430 __u16 destp = ntohs(inet->inet_dport);
2431 __u16 srcp = ntohs(inet->inet_sport);
49d09007 2432 int rx_queue;
1da177e4 2433
463c84b9 2434 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1da177e4 2435 timer_active = 1;
463c84b9
ACM
2436 timer_expires = icsk->icsk_timeout;
2437 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1da177e4 2438 timer_active = 4;
463c84b9 2439 timer_expires = icsk->icsk_timeout;
cf4c6bf8 2440 } else if (timer_pending(&sk->sk_timer)) {
1da177e4 2441 timer_active = 2;
cf4c6bf8 2442 timer_expires = sk->sk_timer.expires;
1da177e4
LT
2443 } else {
2444 timer_active = 0;
2445 timer_expires = jiffies;
2446 }
2447
49d09007
ED
2448 if (sk->sk_state == TCP_LISTEN)
2449 rx_queue = sk->sk_ack_backlog;
2450 else
2451 /*
2452 * because we dont lock socket, we might find a transient negative value
2453 */
2454 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2455
5e659e4c 2456 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
71338aa7 2457 "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
cf4c6bf8 2458 i, src, srcp, dest, destp, sk->sk_state,
47da8ee6 2459 tp->write_seq - tp->snd_una,
49d09007 2460 rx_queue,
1da177e4
LT
2461 timer_active,
2462 jiffies_to_clock_t(timer_expires - jiffies),
463c84b9 2463 icsk->icsk_retransmits,
cf4c6bf8 2464 sock_i_uid(sk),
6687e988 2465 icsk->icsk_probes_out,
cf4c6bf8
IJ
2466 sock_i_ino(sk),
2467 atomic_read(&sk->sk_refcnt), sk,
7be87351
SH
2468 jiffies_to_clock_t(icsk->icsk_rto),
2469 jiffies_to_clock_t(icsk->icsk_ack.ato),
463c84b9 2470 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1da177e4 2471 tp->snd_cwnd,
0b6a05c1 2472 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
5e659e4c 2473 len);
1da177e4
LT
2474}
2475
cf533ea5 2476static void get_timewait4_sock(const struct inet_timewait_sock *tw,
5e659e4c 2477 struct seq_file *f, int i, int *len)
1da177e4 2478{
23f33c2d 2479 __be32 dest, src;
1da177e4
LT
2480 __u16 destp, srcp;
2481 int ttd = tw->tw_ttd - jiffies;
2482
2483 if (ttd < 0)
2484 ttd = 0;
2485
2486 dest = tw->tw_daddr;
2487 src = tw->tw_rcv_saddr;
2488 destp = ntohs(tw->tw_dport);
2489 srcp = ntohs(tw->tw_sport);
2490
5e659e4c 2491 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
71338aa7 2492 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
1da177e4
LT
2493 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2494 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
5e659e4c 2495 atomic_read(&tw->tw_refcnt), tw, len);
1da177e4
LT
2496}
2497
2498#define TMPSZ 150
2499
2500static int tcp4_seq_show(struct seq_file *seq, void *v)
2501{
5799de0b 2502 struct tcp_iter_state *st;
5e659e4c 2503 int len;
1da177e4
LT
2504
2505 if (v == SEQ_START_TOKEN) {
2506 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2507 " sl local_address rem_address st tx_queue "
2508 "rx_queue tr tm->when retrnsmt uid timeout "
2509 "inode");
2510 goto out;
2511 }
2512 st = seq->private;
2513
2514 switch (st->state) {
2515 case TCP_SEQ_STATE_LISTENING:
2516 case TCP_SEQ_STATE_ESTABLISHED:
5e659e4c 2517 get_tcp4_sock(v, seq, st->num, &len);
1da177e4
LT
2518 break;
2519 case TCP_SEQ_STATE_OPENREQ:
5e659e4c 2520 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
1da177e4
LT
2521 break;
2522 case TCP_SEQ_STATE_TIME_WAIT:
5e659e4c 2523 get_timewait4_sock(v, seq, st->num, &len);
1da177e4
LT
2524 break;
2525 }
5e659e4c 2526 seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
1da177e4
LT
2527out:
2528 return 0;
2529}
2530
73cb88ec
AV
2531static const struct file_operations tcp_afinfo_seq_fops = {
2532 .owner = THIS_MODULE,
2533 .open = tcp_seq_open,
2534 .read = seq_read,
2535 .llseek = seq_lseek,
2536 .release = seq_release_net
2537};
2538
1da177e4 2539static struct tcp_seq_afinfo tcp4_seq_afinfo = {
1da177e4
LT
2540 .name = "tcp",
2541 .family = AF_INET,
73cb88ec 2542 .seq_fops = &tcp_afinfo_seq_fops,
9427c4b3
DL
2543 .seq_ops = {
2544 .show = tcp4_seq_show,
2545 },
1da177e4
LT
2546};
2547
2c8c1e72 2548static int __net_init tcp4_proc_init_net(struct net *net)
757764f6
PE
2549{
2550 return tcp_proc_register(net, &tcp4_seq_afinfo);
2551}
2552
2c8c1e72 2553static void __net_exit tcp4_proc_exit_net(struct net *net)
757764f6
PE
2554{
2555 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2556}
2557
2558static struct pernet_operations tcp4_net_ops = {
2559 .init = tcp4_proc_init_net,
2560 .exit = tcp4_proc_exit_net,
2561};
2562
1da177e4
LT
2563int __init tcp4_proc_init(void)
2564{
757764f6 2565 return register_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2566}
2567
2568void tcp4_proc_exit(void)
2569{
757764f6 2570 unregister_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2571}
2572#endif /* CONFIG_PROC_FS */
2573
bf296b12
HX
2574struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2575{
b71d1d42 2576 const struct iphdr *iph = skb_gro_network_header(skb);
bf296b12
HX
2577
2578 switch (skb->ip_summed) {
2579 case CHECKSUM_COMPLETE:
86911732 2580 if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
bf296b12
HX
2581 skb->csum)) {
2582 skb->ip_summed = CHECKSUM_UNNECESSARY;
2583 break;
2584 }
2585
2586 /* fall through */
2587 case CHECKSUM_NONE:
2588 NAPI_GRO_CB(skb)->flush = 1;
2589 return NULL;
2590 }
2591
2592 return tcp_gro_receive(head, skb);
2593}
bf296b12
HX
2594
2595int tcp4_gro_complete(struct sk_buff *skb)
2596{
b71d1d42 2597 const struct iphdr *iph = ip_hdr(skb);
bf296b12
HX
2598 struct tcphdr *th = tcp_hdr(skb);
2599
2600 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2601 iph->saddr, iph->daddr, 0);
2602 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2603
2604 return tcp_gro_complete(skb);
2605}
bf296b12 2606
1da177e4
LT
2607struct proto tcp_prot = {
2608 .name = "TCP",
2609 .owner = THIS_MODULE,
2610 .close = tcp_close,
2611 .connect = tcp_v4_connect,
2612 .disconnect = tcp_disconnect,
463c84b9 2613 .accept = inet_csk_accept,
1da177e4
LT
2614 .ioctl = tcp_ioctl,
2615 .init = tcp_v4_init_sock,
2616 .destroy = tcp_v4_destroy_sock,
2617 .shutdown = tcp_shutdown,
2618 .setsockopt = tcp_setsockopt,
2619 .getsockopt = tcp_getsockopt,
1da177e4 2620 .recvmsg = tcp_recvmsg,
7ba42910
CG
2621 .sendmsg = tcp_sendmsg,
2622 .sendpage = tcp_sendpage,
1da177e4 2623 .backlog_rcv = tcp_v4_do_rcv,
ab1e0a13
ACM
2624 .hash = inet_hash,
2625 .unhash = inet_unhash,
2626 .get_port = inet_csk_get_port,
1da177e4
LT
2627 .enter_memory_pressure = tcp_enter_memory_pressure,
2628 .sockets_allocated = &tcp_sockets_allocated,
0a5578cf 2629 .orphan_count = &tcp_orphan_count,
1da177e4
LT
2630 .memory_allocated = &tcp_memory_allocated,
2631 .memory_pressure = &tcp_memory_pressure,
1da177e4
LT
2632 .sysctl_wmem = sysctl_tcp_wmem,
2633 .sysctl_rmem = sysctl_tcp_rmem,
2634 .max_header = MAX_TCP_HEADER,
2635 .obj_size = sizeof(struct tcp_sock),
3ab5aee7 2636 .slab_flags = SLAB_DESTROY_BY_RCU,
6d6ee43e 2637 .twsk_prot = &tcp_timewait_sock_ops,
60236fdd 2638 .rsk_prot = &tcp_request_sock_ops,
39d8cda7 2639 .h.hashinfo = &tcp_hashinfo,
7ba42910 2640 .no_autobind = true,
543d9cfe
ACM
2641#ifdef CONFIG_COMPAT
2642 .compat_setsockopt = compat_tcp_setsockopt,
2643 .compat_getsockopt = compat_tcp_getsockopt,
2644#endif
d1a4c0b3
GC
2645#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
2646 .init_cgroup = tcp_init_cgroup,
2647 .destroy_cgroup = tcp_destroy_cgroup,
2648 .proto_cgroup = tcp_proto_cgroup,
2649#endif
1da177e4 2650};
4bc2f18b 2651EXPORT_SYMBOL(tcp_prot);
1da177e4 2652
046ee902
DL
2653static int __net_init tcp_sk_init(struct net *net)
2654{
2655 return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2656 PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2657}
2658
2659static void __net_exit tcp_sk_exit(struct net *net)
2660{
2661 inet_ctl_sock_destroy(net->ipv4.tcp_sock);
b099ce26
EB
2662}
2663
2664static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2665{
2666 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
046ee902
DL
2667}
2668
2669static struct pernet_operations __net_initdata tcp_sk_ops = {
b099ce26
EB
2670 .init = tcp_sk_init,
2671 .exit = tcp_sk_exit,
2672 .exit_batch = tcp_sk_exit_batch,
046ee902
DL
2673};
2674
9b0f976f 2675void __init tcp_v4_init(void)
1da177e4 2676{
5caea4ea 2677 inet_hashinfo_init(&tcp_hashinfo);
6a1b3054 2678 if (register_pernet_subsys(&tcp_sk_ops))
1da177e4 2679 panic("Failed to create the TCP control socket.\n");
1da177e4 2680}