tcp: add send_synack method to tcp_request_sock_ops
[linux-block.git] / net / ipv4 / tcp_ipv4.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
1da177e4
LT
8 * IPv4 specific functions
9 *
10 *
11 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24/*
25 * Changes:
26 * David S. Miller : New socket lookup architecture.
27 * This code is dedicated to John Dyson.
28 * David S. Miller : Change semantics of established hash,
29 * half is devoted to TIME_WAIT sockets
30 * and the rest go in the other half.
31 * Andi Kleen : Add support for syncookies and fixed
32 * some bugs: ip options weren't passed to
33 * the TCP layer, missed a check for an
34 * ACK bit.
35 * Andi Kleen : Implemented fast path mtu discovery.
36 * Fixed many serious bugs in the
60236fdd 37 * request_sock handling and moved
1da177e4
LT
38 * most of it into the af independent code.
39 * Added tail drop and some other bugfixes.
caa20d9a 40 * Added new listen semantics.
1da177e4
LT
41 * Mike McLagan : Routing by source
42 * Juan Jose Ciarlante: ip_dynaddr bits
43 * Andi Kleen: various fixes.
44 * Vitaly E. Lavrov : Transparent proxy revived after year
45 * coma.
46 * Andi Kleen : Fix new listen.
47 * Andi Kleen : Fix accept error reporting.
48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
50 * a single port at the same time.
51 */
52
afd46503 53#define pr_fmt(fmt) "TCP: " fmt
1da177e4 54
eb4dea58 55#include <linux/bottom_half.h>
1da177e4
LT
56#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
5a0e3ad6 64#include <linux/slab.h>
1da177e4 65
457c4cbc 66#include <net/net_namespace.h>
1da177e4 67#include <net/icmp.h>
304a1618 68#include <net/inet_hashtables.h>
1da177e4 69#include <net/tcp.h>
20380731 70#include <net/transp_v6.h>
1da177e4
LT
71#include <net/ipv6.h>
72#include <net/inet_common.h>
6d6ee43e 73#include <net/timewait_sock.h>
1da177e4 74#include <net/xfrm.h>
1a2449a8 75#include <net/netdma.h>
6e5714ea 76#include <net/secure_seq.h>
d1a4c0b3 77#include <net/tcp_memcontrol.h>
076bb0c8 78#include <net/busy_poll.h>
1da177e4
LT
79
80#include <linux/inet.h>
81#include <linux/ipv6.h>
82#include <linux/stddef.h>
83#include <linux/proc_fs.h>
84#include <linux/seq_file.h>
85
cfb6eeb4
YH
86#include <linux/crypto.h>
87#include <linux/scatterlist.h>
88
ab32ea5d
BH
89int sysctl_tcp_tw_reuse __read_mostly;
90int sysctl_tcp_low_latency __read_mostly;
4bc2f18b 91EXPORT_SYMBOL(sysctl_tcp_low_latency);
1da177e4 92
1da177e4 93
cfb6eeb4 94#ifdef CONFIG_TCP_MD5SIG
a915da9b 95static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 96 __be32 daddr, __be32 saddr, const struct tcphdr *th);
cfb6eeb4
YH
97#endif
98
5caea4ea 99struct inet_hashinfo tcp_hashinfo;
4bc2f18b 100EXPORT_SYMBOL(tcp_hashinfo);
1da177e4 101
936b8bdb 102static __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
1da177e4 103{
eddc9ec5
ACM
104 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
105 ip_hdr(skb)->saddr,
aa8223c7
ACM
106 tcp_hdr(skb)->dest,
107 tcp_hdr(skb)->source);
1da177e4
LT
108}
109
6d6ee43e
ACM
110int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
111{
112 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
113 struct tcp_sock *tp = tcp_sk(sk);
114
115 /* With PAWS, it is safe from the viewpoint
116 of data integrity. Even without PAWS it is safe provided sequence
117 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
118
119 Actually, the idea is close to VJ's one, only timestamp cache is
120 held not per host, but per port pair and TW bucket is used as state
121 holder.
122
123 If TW bucket has been already destroyed we fall back to VJ's scheme
124 and use initial timestamp retrieved from peer table.
125 */
126 if (tcptw->tw_ts_recent_stamp &&
127 (twp == NULL || (sysctl_tcp_tw_reuse &&
9d729f72 128 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
6d6ee43e
ACM
129 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
130 if (tp->write_seq == 0)
131 tp->write_seq = 1;
132 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
133 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
134 sock_hold(sktw);
135 return 1;
136 }
137
138 return 0;
139}
6d6ee43e
ACM
140EXPORT_SYMBOL_GPL(tcp_twsk_unique);
141
1da177e4
LT
142/* This will initiate an outgoing connection. */
143int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
144{
2d7192d6 145 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
1da177e4
LT
146 struct inet_sock *inet = inet_sk(sk);
147 struct tcp_sock *tp = tcp_sk(sk);
dca8b089 148 __be16 orig_sport, orig_dport;
bada8adc 149 __be32 daddr, nexthop;
da905bd1 150 struct flowi4 *fl4;
2d7192d6 151 struct rtable *rt;
1da177e4 152 int err;
f6d8bd05 153 struct ip_options_rcu *inet_opt;
1da177e4
LT
154
155 if (addr_len < sizeof(struct sockaddr_in))
156 return -EINVAL;
157
158 if (usin->sin_family != AF_INET)
159 return -EAFNOSUPPORT;
160
161 nexthop = daddr = usin->sin_addr.s_addr;
f6d8bd05
ED
162 inet_opt = rcu_dereference_protected(inet->inet_opt,
163 sock_owned_by_user(sk));
164 if (inet_opt && inet_opt->opt.srr) {
1da177e4
LT
165 if (!daddr)
166 return -EINVAL;
f6d8bd05 167 nexthop = inet_opt->opt.faddr;
1da177e4
LT
168 }
169
dca8b089
DM
170 orig_sport = inet->inet_sport;
171 orig_dport = usin->sin_port;
da905bd1
DM
172 fl4 = &inet->cork.fl.u.ip4;
173 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
b23dd4fe
DM
174 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
175 IPPROTO_TCP,
0e0d44ab 176 orig_sport, orig_dport, sk);
b23dd4fe
DM
177 if (IS_ERR(rt)) {
178 err = PTR_ERR(rt);
179 if (err == -ENETUNREACH)
f1d8cba6 180 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
b23dd4fe 181 return err;
584bdf8c 182 }
1da177e4
LT
183
184 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
185 ip_rt_put(rt);
186 return -ENETUNREACH;
187 }
188
f6d8bd05 189 if (!inet_opt || !inet_opt->opt.srr)
da905bd1 190 daddr = fl4->daddr;
1da177e4 191
c720c7e8 192 if (!inet->inet_saddr)
da905bd1 193 inet->inet_saddr = fl4->saddr;
c720c7e8 194 inet->inet_rcv_saddr = inet->inet_saddr;
1da177e4 195
c720c7e8 196 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
1da177e4
LT
197 /* Reset inherited state */
198 tp->rx_opt.ts_recent = 0;
199 tp->rx_opt.ts_recent_stamp = 0;
ee995283
PE
200 if (likely(!tp->repair))
201 tp->write_seq = 0;
1da177e4
LT
202 }
203
295ff7ed 204 if (tcp_death_row.sysctl_tw_recycle &&
81166dd6
DM
205 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
206 tcp_fetch_timewait_stamp(sk, &rt->dst);
1da177e4 207
c720c7e8
ED
208 inet->inet_dport = usin->sin_port;
209 inet->inet_daddr = daddr;
1da177e4 210
d83d8461 211 inet_csk(sk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
212 if (inet_opt)
213 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1da177e4 214
bee7ca9e 215 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
1da177e4
LT
216
217 /* Socket identity is still unknown (sport may be zero).
218 * However we set state to SYN-SENT and not releasing socket
219 * lock select source port, enter ourselves into the hash tables and
220 * complete initialization after this.
221 */
222 tcp_set_state(sk, TCP_SYN_SENT);
a7f5e7f1 223 err = inet_hash_connect(&tcp_death_row, sk);
1da177e4
LT
224 if (err)
225 goto failure;
226
da905bd1 227 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
b23dd4fe
DM
228 inet->inet_sport, inet->inet_dport, sk);
229 if (IS_ERR(rt)) {
230 err = PTR_ERR(rt);
231 rt = NULL;
1da177e4 232 goto failure;
b23dd4fe 233 }
1da177e4 234 /* OK, now commit destination to socket. */
bcd76111 235 sk->sk_gso_type = SKB_GSO_TCPV4;
d8d1f30b 236 sk_setup_caps(sk, &rt->dst);
1da177e4 237
ee995283 238 if (!tp->write_seq && likely(!tp->repair))
c720c7e8
ED
239 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
240 inet->inet_daddr,
241 inet->inet_sport,
1da177e4
LT
242 usin->sin_port);
243
c720c7e8 244 inet->inet_id = tp->write_seq ^ jiffies;
1da177e4 245
2b916477 246 err = tcp_connect(sk);
ee995283 247
1da177e4
LT
248 rt = NULL;
249 if (err)
250 goto failure;
251
252 return 0;
253
254failure:
7174259e
ACM
255 /*
256 * This unhashes the socket and releases the local port,
257 * if necessary.
258 */
1da177e4
LT
259 tcp_set_state(sk, TCP_CLOSE);
260 ip_rt_put(rt);
261 sk->sk_route_caps = 0;
c720c7e8 262 inet->inet_dport = 0;
1da177e4
LT
263 return err;
264}
4bc2f18b 265EXPORT_SYMBOL(tcp_v4_connect);
1da177e4 266
1da177e4 267/*
563d34d0
ED
268 * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
269 * It can be called through tcp_release_cb() if socket was owned by user
270 * at the time tcp_v4_err() was called to handle ICMP message.
1da177e4 271 */
563d34d0 272static void tcp_v4_mtu_reduced(struct sock *sk)
1da177e4
LT
273{
274 struct dst_entry *dst;
275 struct inet_sock *inet = inet_sk(sk);
563d34d0 276 u32 mtu = tcp_sk(sk)->mtu_info;
1da177e4 277
80d0a69f
DM
278 dst = inet_csk_update_pmtu(sk, mtu);
279 if (!dst)
1da177e4
LT
280 return;
281
1da177e4
LT
282 /* Something is about to be wrong... Remember soft error
283 * for the case, if this connection will not able to recover.
284 */
285 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
286 sk->sk_err_soft = EMSGSIZE;
287
288 mtu = dst_mtu(dst);
289
290 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
482fc609 291 ip_sk_accept_pmtu(sk) &&
d83d8461 292 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
1da177e4
LT
293 tcp_sync_mss(sk, mtu);
294
295 /* Resend the TCP packet because it's
296 * clear that the old packet has been
297 * dropped. This is the new "fast" path mtu
298 * discovery.
299 */
300 tcp_simple_retransmit(sk);
301 } /* else let the usual retransmit timer handle it */
302}
303
55be7a9c
DM
304static void do_redirect(struct sk_buff *skb, struct sock *sk)
305{
306 struct dst_entry *dst = __sk_dst_check(sk, 0);
307
1ed5c48f 308 if (dst)
6700c270 309 dst->ops->redirect(dst, sk, skb);
55be7a9c
DM
310}
311
1da177e4
LT
312/*
313 * This routine is called by the ICMP module when it gets some
314 * sort of error condition. If err < 0 then the socket should
315 * be closed and the error returned to the user. If err > 0
316 * it's just the icmp type << 8 | icmp code. After adjustment
317 * header points to the first 8 bytes of the tcp header. We need
318 * to find the appropriate port.
319 *
320 * The locking strategy used here is very "optimistic". When
321 * someone else accesses the socket the ICMP is just dropped
322 * and for some paths there is no check at all.
323 * A more general error queue to queue errors for later handling
324 * is probably better.
325 *
326 */
327
4d1a2d9e 328void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
1da177e4 329{
b71d1d42 330 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
4d1a2d9e 331 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
f1ecd5d9 332 struct inet_connection_sock *icsk;
1da177e4
LT
333 struct tcp_sock *tp;
334 struct inet_sock *inet;
4d1a2d9e
DL
335 const int type = icmp_hdr(icmp_skb)->type;
336 const int code = icmp_hdr(icmp_skb)->code;
1da177e4 337 struct sock *sk;
f1ecd5d9 338 struct sk_buff *skb;
0a672f74
YC
339 struct request_sock *fastopen;
340 __u32 seq, snd_una;
f1ecd5d9 341 __u32 remaining;
1da177e4 342 int err;
4d1a2d9e 343 struct net *net = dev_net(icmp_skb->dev);
1da177e4 344
4d1a2d9e 345 if (icmp_skb->len < (iph->ihl << 2) + 8) {
dcfc23ca 346 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
347 return;
348 }
349
fd54d716 350 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
4d1a2d9e 351 iph->saddr, th->source, inet_iif(icmp_skb));
1da177e4 352 if (!sk) {
dcfc23ca 353 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
354 return;
355 }
356 if (sk->sk_state == TCP_TIME_WAIT) {
9469c7b4 357 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
358 return;
359 }
360
361 bh_lock_sock(sk);
362 /* If too many ICMPs get dropped on busy
363 * servers this needs to be solved differently.
563d34d0
ED
364 * We do take care of PMTU discovery (RFC1191) special case :
365 * we can receive locally generated ICMP messages while socket is held.
1da177e4 366 */
b74aa930
ED
367 if (sock_owned_by_user(sk)) {
368 if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
369 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
370 }
1da177e4
LT
371 if (sk->sk_state == TCP_CLOSE)
372 goto out;
373
97e3ecd1 374 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
375 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
376 goto out;
377 }
378
f1ecd5d9 379 icsk = inet_csk(sk);
1da177e4
LT
380 tp = tcp_sk(sk);
381 seq = ntohl(th->seq);
0a672f74
YC
382 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
383 fastopen = tp->fastopen_rsk;
384 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
1da177e4 385 if (sk->sk_state != TCP_LISTEN &&
0a672f74 386 !between(seq, snd_una, tp->snd_nxt)) {
de0744af 387 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
388 goto out;
389 }
390
391 switch (type) {
55be7a9c
DM
392 case ICMP_REDIRECT:
393 do_redirect(icmp_skb, sk);
394 goto out;
1da177e4
LT
395 case ICMP_SOURCE_QUENCH:
396 /* Just silently ignore these. */
397 goto out;
398 case ICMP_PARAMETERPROB:
399 err = EPROTO;
400 break;
401 case ICMP_DEST_UNREACH:
402 if (code > NR_ICMP_UNREACH)
403 goto out;
404
405 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
0d4f0608
ED
406 /* We are not interested in TCP_LISTEN and open_requests
407 * (SYN-ACKs send out by Linux are always <576bytes so
408 * they should go through unfragmented).
409 */
410 if (sk->sk_state == TCP_LISTEN)
411 goto out;
412
563d34d0 413 tp->mtu_info = info;
144d56e9 414 if (!sock_owned_by_user(sk)) {
563d34d0 415 tcp_v4_mtu_reduced(sk);
144d56e9
ED
416 } else {
417 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
418 sock_hold(sk);
419 }
1da177e4
LT
420 goto out;
421 }
422
423 err = icmp_err_convert[code].errno;
f1ecd5d9
DL
424 /* check if icmp_skb allows revert of backoff
425 * (see draft-zimmermann-tcp-lcd) */
426 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
427 break;
428 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
0a672f74 429 !icsk->icsk_backoff || fastopen)
f1ecd5d9
DL
430 break;
431
8f49c270
DM
432 if (sock_owned_by_user(sk))
433 break;
434
f1ecd5d9 435 icsk->icsk_backoff--;
740b0f18 436 inet_csk(sk)->icsk_rto = (tp->srtt_us ? __tcp_set_rto(tp) :
9ad7c049 437 TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
f1ecd5d9
DL
438 tcp_bound_rto(sk);
439
440 skb = tcp_write_queue_head(sk);
441 BUG_ON(!skb);
442
443 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
444 tcp_time_stamp - TCP_SKB_CB(skb)->when);
445
446 if (remaining) {
447 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
448 remaining, TCP_RTO_MAX);
f1ecd5d9
DL
449 } else {
450 /* RTO revert clocked out retransmission.
451 * Will retransmit now */
452 tcp_retransmit_timer(sk);
453 }
454
1da177e4
LT
455 break;
456 case ICMP_TIME_EXCEEDED:
457 err = EHOSTUNREACH;
458 break;
459 default:
460 goto out;
461 }
462
463 switch (sk->sk_state) {
60236fdd 464 struct request_sock *req, **prev;
1da177e4
LT
465 case TCP_LISTEN:
466 if (sock_owned_by_user(sk))
467 goto out;
468
463c84b9
ACM
469 req = inet_csk_search_req(sk, &prev, th->dest,
470 iph->daddr, iph->saddr);
1da177e4
LT
471 if (!req)
472 goto out;
473
474 /* ICMPs are not backlogged, hence we cannot get
475 an established socket here.
476 */
547b792c 477 WARN_ON(req->sk);
1da177e4 478
2e6599cb 479 if (seq != tcp_rsk(req)->snt_isn) {
de0744af 480 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
481 goto out;
482 }
483
484 /*
485 * Still in SYN_RECV, just remove it silently.
486 * There is no good way to pass the error to the newly
487 * created socket, and POSIX does not want network
488 * errors returned from accept().
489 */
463c84b9 490 inet_csk_reqsk_queue_drop(sk, req, prev);
848bf15f 491 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4
LT
492 goto out;
493
494 case TCP_SYN_SENT:
0a672f74
YC
495 case TCP_SYN_RECV:
496 /* Only in fast or simultaneous open. If a fast open socket is
497 * is already accepted it is treated as a connected one below.
498 */
499 if (fastopen && fastopen->sk == NULL)
500 break;
501
1da177e4 502 if (!sock_owned_by_user(sk)) {
1da177e4
LT
503 sk->sk_err = err;
504
505 sk->sk_error_report(sk);
506
507 tcp_done(sk);
508 } else {
509 sk->sk_err_soft = err;
510 }
511 goto out;
512 }
513
514 /* If we've already connected we will keep trying
515 * until we time out, or the user gives up.
516 *
517 * rfc1122 4.2.3.9 allows to consider as hard errors
518 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
519 * but it is obsoleted by pmtu discovery).
520 *
521 * Note, that in modern internet, where routing is unreliable
522 * and in each dark corner broken firewalls sit, sending random
523 * errors ordered by their masters even this two messages finally lose
524 * their original sense (even Linux sends invalid PORT_UNREACHs)
525 *
526 * Now we are in compliance with RFCs.
527 * --ANK (980905)
528 */
529
530 inet = inet_sk(sk);
531 if (!sock_owned_by_user(sk) && inet->recverr) {
532 sk->sk_err = err;
533 sk->sk_error_report(sk);
534 } else { /* Only an error on timeout */
535 sk->sk_err_soft = err;
536 }
537
538out:
539 bh_unlock_sock(sk);
540 sock_put(sk);
541}
542
28850dc7 543void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1da177e4 544{
aa8223c7 545 struct tcphdr *th = tcp_hdr(skb);
1da177e4 546
84fa7933 547 if (skb->ip_summed == CHECKSUM_PARTIAL) {
419f9f89 548 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
663ead3b 549 skb->csum_start = skb_transport_header(skb) - skb->head;
ff1dcadb 550 skb->csum_offset = offsetof(struct tcphdr, check);
1da177e4 551 } else {
419f9f89 552 th->check = tcp_v4_check(skb->len, saddr, daddr,
07f0757a 553 csum_partial(th,
1da177e4
LT
554 th->doff << 2,
555 skb->csum));
556 }
557}
558
419f9f89 559/* This routine computes an IPv4 TCP checksum. */
bb296246 560void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
419f9f89 561{
cf533ea5 562 const struct inet_sock *inet = inet_sk(sk);
419f9f89
HX
563
564 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
565}
4bc2f18b 566EXPORT_SYMBOL(tcp_v4_send_check);
419f9f89 567
1da177e4
LT
568/*
569 * This routine will send an RST to the other tcp.
570 *
571 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
572 * for reset.
573 * Answer: if a packet caused RST, it is not for a socket
574 * existing in our system, if it is matched to a socket,
575 * it is just duplicate segment or bug in other side's TCP.
576 * So that we build reply only basing on parameters
577 * arrived with segment.
578 * Exception: precedence violation. We do not implement it in any case.
579 */
580
cfb6eeb4 581static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
1da177e4 582{
cf533ea5 583 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
584 struct {
585 struct tcphdr th;
586#ifdef CONFIG_TCP_MD5SIG
714e85be 587 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
cfb6eeb4
YH
588#endif
589 } rep;
1da177e4 590 struct ip_reply_arg arg;
cfb6eeb4
YH
591#ifdef CONFIG_TCP_MD5SIG
592 struct tcp_md5sig_key *key;
658ddaaf
SL
593 const __u8 *hash_location = NULL;
594 unsigned char newhash[16];
595 int genhash;
596 struct sock *sk1 = NULL;
cfb6eeb4 597#endif
a86b1e30 598 struct net *net;
1da177e4
LT
599
600 /* Never send a reset in response to a reset. */
601 if (th->rst)
602 return;
603
511c3f92 604 if (skb_rtable(skb)->rt_type != RTN_LOCAL)
1da177e4
LT
605 return;
606
607 /* Swap the send and the receive. */
cfb6eeb4
YH
608 memset(&rep, 0, sizeof(rep));
609 rep.th.dest = th->source;
610 rep.th.source = th->dest;
611 rep.th.doff = sizeof(struct tcphdr) / 4;
612 rep.th.rst = 1;
1da177e4
LT
613
614 if (th->ack) {
cfb6eeb4 615 rep.th.seq = th->ack_seq;
1da177e4 616 } else {
cfb6eeb4
YH
617 rep.th.ack = 1;
618 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
619 skb->len - (th->doff << 2));
1da177e4
LT
620 }
621
7174259e 622 memset(&arg, 0, sizeof(arg));
cfb6eeb4
YH
623 arg.iov[0].iov_base = (unsigned char *)&rep;
624 arg.iov[0].iov_len = sizeof(rep.th);
625
626#ifdef CONFIG_TCP_MD5SIG
658ddaaf
SL
627 hash_location = tcp_parse_md5sig_option(th);
628 if (!sk && hash_location) {
629 /*
630 * active side is lost. Try to find listening socket through
631 * source port, and then find md5 key through listening socket.
632 * we are not loose security here:
633 * Incoming packet is checked with md5 hash with finding key,
634 * no RST generated if md5 hash doesn't match.
635 */
636 sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
da5e3630
TH
637 &tcp_hashinfo, ip_hdr(skb)->saddr,
638 th->source, ip_hdr(skb)->daddr,
658ddaaf
SL
639 ntohs(th->source), inet_iif(skb));
640 /* don't send rst if it can't find key */
641 if (!sk1)
642 return;
643 rcu_read_lock();
644 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
645 &ip_hdr(skb)->saddr, AF_INET);
646 if (!key)
647 goto release_sk1;
648
649 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
650 if (genhash || memcmp(hash_location, newhash, 16) != 0)
651 goto release_sk1;
652 } else {
653 key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
654 &ip_hdr(skb)->saddr,
655 AF_INET) : NULL;
656 }
657
cfb6eeb4
YH
658 if (key) {
659 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
660 (TCPOPT_NOP << 16) |
661 (TCPOPT_MD5SIG << 8) |
662 TCPOLEN_MD5SIG);
663 /* Update length and the length the header thinks exists */
664 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
665 rep.th.doff = arg.iov[0].iov_len / 4;
666
49a72dfb 667 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
78e645cb
IJ
668 key, ip_hdr(skb)->saddr,
669 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
670 }
671#endif
eddc9ec5
ACM
672 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
673 ip_hdr(skb)->saddr, /* XXX */
52cd5750 674 arg.iov[0].iov_len, IPPROTO_TCP, 0);
1da177e4 675 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
88ef4a5a 676 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
e2446eaa 677 /* When socket is gone, all binding information is lost.
4c675258
AK
678 * routing might fail in this case. No choice here, if we choose to force
679 * input interface, we will misroute in case of asymmetric route.
e2446eaa 680 */
4c675258
AK
681 if (sk)
682 arg.bound_dev_if = sk->sk_bound_dev_if;
1da177e4 683
adf30907 684 net = dev_net(skb_dst(skb)->dev);
66b13d99 685 arg.tos = ip_hdr(skb)->tos;
be9f4a44 686 ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
70e73416 687 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
1da177e4 688
63231bdd
PE
689 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
690 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
658ddaaf
SL
691
692#ifdef CONFIG_TCP_MD5SIG
693release_sk1:
694 if (sk1) {
695 rcu_read_unlock();
696 sock_put(sk1);
697 }
698#endif
1da177e4
LT
699}
700
701/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
702 outside socket context is ugly, certainly. What can I do?
703 */
704
9501f972 705static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
ee684b6f 706 u32 win, u32 tsval, u32 tsecr, int oif,
88ef4a5a 707 struct tcp_md5sig_key *key,
66b13d99 708 int reply_flags, u8 tos)
1da177e4 709{
cf533ea5 710 const struct tcphdr *th = tcp_hdr(skb);
1da177e4
LT
711 struct {
712 struct tcphdr th;
714e85be 713 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
cfb6eeb4 714#ifdef CONFIG_TCP_MD5SIG
714e85be 715 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
cfb6eeb4
YH
716#endif
717 ];
1da177e4
LT
718 } rep;
719 struct ip_reply_arg arg;
adf30907 720 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4
LT
721
722 memset(&rep.th, 0, sizeof(struct tcphdr));
7174259e 723 memset(&arg, 0, sizeof(arg));
1da177e4
LT
724
725 arg.iov[0].iov_base = (unsigned char *)&rep;
726 arg.iov[0].iov_len = sizeof(rep.th);
ee684b6f 727 if (tsecr) {
cfb6eeb4
YH
728 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
729 (TCPOPT_TIMESTAMP << 8) |
730 TCPOLEN_TIMESTAMP);
ee684b6f
AV
731 rep.opt[1] = htonl(tsval);
732 rep.opt[2] = htonl(tsecr);
cb48cfe8 733 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
1da177e4
LT
734 }
735
736 /* Swap the send and the receive. */
737 rep.th.dest = th->source;
738 rep.th.source = th->dest;
739 rep.th.doff = arg.iov[0].iov_len / 4;
740 rep.th.seq = htonl(seq);
741 rep.th.ack_seq = htonl(ack);
742 rep.th.ack = 1;
743 rep.th.window = htons(win);
744
cfb6eeb4 745#ifdef CONFIG_TCP_MD5SIG
cfb6eeb4 746 if (key) {
ee684b6f 747 int offset = (tsecr) ? 3 : 0;
cfb6eeb4
YH
748
749 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
750 (TCPOPT_NOP << 16) |
751 (TCPOPT_MD5SIG << 8) |
752 TCPOLEN_MD5SIG);
753 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
754 rep.th.doff = arg.iov[0].iov_len/4;
755
49a72dfb 756 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
90b7e112
AL
757 key, ip_hdr(skb)->saddr,
758 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
759 }
760#endif
88ef4a5a 761 arg.flags = reply_flags;
eddc9ec5
ACM
762 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
763 ip_hdr(skb)->saddr, /* XXX */
1da177e4
LT
764 arg.iov[0].iov_len, IPPROTO_TCP, 0);
765 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
9501f972
YH
766 if (oif)
767 arg.bound_dev_if = oif;
66b13d99 768 arg.tos = tos;
be9f4a44 769 ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
70e73416 770 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
1da177e4 771
63231bdd 772 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
1da177e4
LT
773}
774
775static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
776{
8feaf0c0 777 struct inet_timewait_sock *tw = inet_twsk(sk);
cfb6eeb4 778 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1da177e4 779
9501f972 780 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
7174259e 781 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
ee684b6f 782 tcp_time_stamp + tcptw->tw_ts_offset,
9501f972
YH
783 tcptw->tw_ts_recent,
784 tw->tw_bound_dev_if,
88ef4a5a 785 tcp_twsk_md5_key(tcptw),
66b13d99
ED
786 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
787 tw->tw_tos
9501f972 788 );
1da177e4 789
8feaf0c0 790 inet_twsk_put(tw);
1da177e4
LT
791}
792
6edafaaf 793static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
7174259e 794 struct request_sock *req)
1da177e4 795{
168a8f58
JC
796 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
797 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
798 */
799 tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
800 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
801 tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
ee684b6f 802 tcp_time_stamp,
9501f972
YH
803 req->ts_recent,
804 0,
a915da9b
ED
805 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
806 AF_INET),
66b13d99
ED
807 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
808 ip_hdr(skb)->tos);
1da177e4
LT
809}
810
1da177e4 811/*
9bf1d83e 812 * Send a SYN-ACK after having received a SYN.
60236fdd 813 * This still operates on a request_sock only, not on a big
1da177e4
LT
814 * socket.
815 */
72659ecc 816static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
d6274bd8 817 struct flowi *fl,
72659ecc 818 struct request_sock *req,
843f4a55
YC
819 u16 queue_mapping,
820 struct tcp_fastopen_cookie *foc)
1da177e4 821{
2e6599cb 822 const struct inet_request_sock *ireq = inet_rsk(req);
6bd023f3 823 struct flowi4 fl4;
1da177e4 824 int err = -1;
d41db5af 825 struct sk_buff *skb;
1da177e4
LT
826
827 /* First, grab a route. */
ba3f7f04 828 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
fd80eb94 829 return -1;
1da177e4 830
843f4a55 831 skb = tcp_make_synack(sk, dst, req, foc);
1da177e4
LT
832
833 if (skb) {
634fb979 834 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
1da177e4 835
fff32699 836 skb_set_queue_mapping(skb, queue_mapping);
634fb979
ED
837 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
838 ireq->ir_rmt_addr,
2e6599cb 839 ireq->opt);
b9df3cb8 840 err = net_xmit_eval(err);
016818d0
NC
841 if (!tcp_rsk(req)->snt_synack && !err)
842 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1da177e4
LT
843 }
844
1da177e4
LT
845 return err;
846}
847
1a2c6181 848static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
fd80eb94 849{
d6274bd8
OP
850 const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific;
851 int res = af_ops->send_synack(sk, NULL, NULL, req, 0, NULL);
e6c022a4 852
f19c29e3 853 if (!res) {
e6c022a4 854 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
f19c29e3
YC
855 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
856 }
e6c022a4 857 return res;
fd80eb94
DL
858}
859
1da177e4 860/*
60236fdd 861 * IPv4 request_sock destructor.
1da177e4 862 */
60236fdd 863static void tcp_v4_reqsk_destructor(struct request_sock *req)
1da177e4 864{
a51482bd 865 kfree(inet_rsk(req)->opt);
1da177e4
LT
866}
867
946cedcc 868/*
a2a385d6 869 * Return true if a syncookie should be sent
946cedcc 870 */
a2a385d6 871bool tcp_syn_flood_action(struct sock *sk,
946cedcc
ED
872 const struct sk_buff *skb,
873 const char *proto)
1da177e4 874{
946cedcc 875 const char *msg = "Dropping request";
a2a385d6 876 bool want_cookie = false;
946cedcc
ED
877 struct listen_sock *lopt;
878
2a1d4bd4 879#ifdef CONFIG_SYN_COOKIES
946cedcc 880 if (sysctl_tcp_syncookies) {
2a1d4bd4 881 msg = "Sending cookies";
a2a385d6 882 want_cookie = true;
946cedcc
ED
883 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
884 } else
80e40daa 885#endif
946cedcc
ED
886 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
887
888 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
5ad37d5d 889 if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) {
946cedcc 890 lopt->synflood_warned = 1;
afd46503 891 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
946cedcc
ED
892 proto, ntohs(tcp_hdr(skb)->dest), msg);
893 }
894 return want_cookie;
2a1d4bd4 895}
946cedcc 896EXPORT_SYMBOL(tcp_syn_flood_action);
1da177e4
LT
897
898/*
60236fdd 899 * Save and compile IPv4 options into the request_sock if needed.
1da177e4 900 */
5dff747b 901static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
1da177e4 902{
f6d8bd05
ED
903 const struct ip_options *opt = &(IPCB(skb)->opt);
904 struct ip_options_rcu *dopt = NULL;
1da177e4
LT
905
906 if (opt && opt->optlen) {
f6d8bd05
ED
907 int opt_size = sizeof(*dopt) + opt->optlen;
908
1da177e4
LT
909 dopt = kmalloc(opt_size, GFP_ATOMIC);
910 if (dopt) {
f6d8bd05 911 if (ip_options_echo(&dopt->opt, skb)) {
1da177e4
LT
912 kfree(dopt);
913 dopt = NULL;
914 }
915 }
916 }
917 return dopt;
918}
919
cfb6eeb4
YH
920#ifdef CONFIG_TCP_MD5SIG
921/*
922 * RFC2385 MD5 checksumming requires a mapping of
923 * IP address->MD5 Key.
924 * We need to maintain these in the sk structure.
925 */
926
927/* Find the Key structure for an address. */
a915da9b
ED
928struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
929 const union tcp_md5_addr *addr,
930 int family)
cfb6eeb4
YH
931{
932 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 933 struct tcp_md5sig_key *key;
a915da9b 934 unsigned int size = sizeof(struct in_addr);
a8afca03 935 struct tcp_md5sig_info *md5sig;
cfb6eeb4 936
a8afca03
ED
937 /* caller either holds rcu_read_lock() or socket lock */
938 md5sig = rcu_dereference_check(tp->md5sig_info,
b4fb05ea
ED
939 sock_owned_by_user(sk) ||
940 lockdep_is_held(&sk->sk_lock.slock));
a8afca03 941 if (!md5sig)
cfb6eeb4 942 return NULL;
a915da9b
ED
943#if IS_ENABLED(CONFIG_IPV6)
944 if (family == AF_INET6)
945 size = sizeof(struct in6_addr);
946#endif
b67bfe0d 947 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
a915da9b
ED
948 if (key->family != family)
949 continue;
950 if (!memcmp(&key->addr, addr, size))
951 return key;
cfb6eeb4
YH
952 }
953 return NULL;
954}
a915da9b 955EXPORT_SYMBOL(tcp_md5_do_lookup);
cfb6eeb4
YH
956
957struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
958 struct sock *addr_sk)
959{
a915da9b
ED
960 union tcp_md5_addr *addr;
961
962 addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
963 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4 964}
cfb6eeb4
YH
965EXPORT_SYMBOL(tcp_v4_md5_lookup);
966
f5b99bcd
AB
967static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
968 struct request_sock *req)
cfb6eeb4 969{
a915da9b
ED
970 union tcp_md5_addr *addr;
971
634fb979 972 addr = (union tcp_md5_addr *)&inet_rsk(req)->ir_rmt_addr;
a915da9b 973 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4
YH
974}
975
976/* This can be called on a newly created socket, from other files */
a915da9b
ED
977int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
978 int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
cfb6eeb4
YH
979{
980 /* Add Key to the list */
b0a713e9 981 struct tcp_md5sig_key *key;
cfb6eeb4 982 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 983 struct tcp_md5sig_info *md5sig;
cfb6eeb4 984
c0353c7b 985 key = tcp_md5_do_lookup(sk, addr, family);
cfb6eeb4
YH
986 if (key) {
987 /* Pre-existing entry - just update that one. */
a915da9b 988 memcpy(key->key, newkey, newkeylen);
b0a713e9 989 key->keylen = newkeylen;
a915da9b
ED
990 return 0;
991 }
260fcbeb 992
a8afca03
ED
993 md5sig = rcu_dereference_protected(tp->md5sig_info,
994 sock_owned_by_user(sk));
a915da9b
ED
995 if (!md5sig) {
996 md5sig = kmalloc(sizeof(*md5sig), gfp);
997 if (!md5sig)
cfb6eeb4 998 return -ENOMEM;
cfb6eeb4 999
a915da9b
ED
1000 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1001 INIT_HLIST_HEAD(&md5sig->head);
a8afca03 1002 rcu_assign_pointer(tp->md5sig_info, md5sig);
a915da9b 1003 }
cfb6eeb4 1004
5f3d9cb2 1005 key = sock_kmalloc(sk, sizeof(*key), gfp);
a915da9b
ED
1006 if (!key)
1007 return -ENOMEM;
71cea17e 1008 if (!tcp_alloc_md5sig_pool()) {
5f3d9cb2 1009 sock_kfree_s(sk, key, sizeof(*key));
a915da9b 1010 return -ENOMEM;
cfb6eeb4 1011 }
a915da9b
ED
1012
1013 memcpy(key->key, newkey, newkeylen);
1014 key->keylen = newkeylen;
1015 key->family = family;
1016 memcpy(&key->addr, addr,
1017 (family == AF_INET6) ? sizeof(struct in6_addr) :
1018 sizeof(struct in_addr));
1019 hlist_add_head_rcu(&key->node, &md5sig->head);
cfb6eeb4
YH
1020 return 0;
1021}
a915da9b 1022EXPORT_SYMBOL(tcp_md5_do_add);
cfb6eeb4 1023
a915da9b 1024int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
cfb6eeb4 1025{
a915da9b
ED
1026 struct tcp_md5sig_key *key;
1027
c0353c7b 1028 key = tcp_md5_do_lookup(sk, addr, family);
a915da9b
ED
1029 if (!key)
1030 return -ENOENT;
1031 hlist_del_rcu(&key->node);
5f3d9cb2 1032 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1033 kfree_rcu(key, rcu);
a915da9b 1034 return 0;
cfb6eeb4 1035}
a915da9b 1036EXPORT_SYMBOL(tcp_md5_do_del);
cfb6eeb4 1037
e0683e70 1038static void tcp_clear_md5_list(struct sock *sk)
cfb6eeb4
YH
1039{
1040 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 1041 struct tcp_md5sig_key *key;
b67bfe0d 1042 struct hlist_node *n;
a8afca03 1043 struct tcp_md5sig_info *md5sig;
cfb6eeb4 1044
a8afca03
ED
1045 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1046
b67bfe0d 1047 hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
a915da9b 1048 hlist_del_rcu(&key->node);
5f3d9cb2 1049 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1050 kfree_rcu(key, rcu);
cfb6eeb4
YH
1051 }
1052}
1053
7174259e
ACM
1054static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1055 int optlen)
cfb6eeb4
YH
1056{
1057 struct tcp_md5sig cmd;
1058 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
cfb6eeb4
YH
1059
1060 if (optlen < sizeof(cmd))
1061 return -EINVAL;
1062
7174259e 1063 if (copy_from_user(&cmd, optval, sizeof(cmd)))
cfb6eeb4
YH
1064 return -EFAULT;
1065
1066 if (sin->sin_family != AF_INET)
1067 return -EINVAL;
1068
a8afca03 1069 if (!cmd.tcpm_key || !cmd.tcpm_keylen)
a915da9b
ED
1070 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1071 AF_INET);
cfb6eeb4
YH
1072
1073 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1074 return -EINVAL;
1075
a915da9b
ED
1076 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1077 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1078 GFP_KERNEL);
cfb6eeb4
YH
1079}
1080
49a72dfb
AL
1081static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1082 __be32 daddr, __be32 saddr, int nbytes)
cfb6eeb4 1083{
cfb6eeb4 1084 struct tcp4_pseudohdr *bp;
49a72dfb 1085 struct scatterlist sg;
cfb6eeb4
YH
1086
1087 bp = &hp->md5_blk.ip4;
cfb6eeb4
YH
1088
1089 /*
49a72dfb 1090 * 1. the TCP pseudo-header (in the order: source IP address,
cfb6eeb4
YH
1091 * destination IP address, zero-padded protocol number, and
1092 * segment length)
1093 */
1094 bp->saddr = saddr;
1095 bp->daddr = daddr;
1096 bp->pad = 0;
076fb722 1097 bp->protocol = IPPROTO_TCP;
49a72dfb 1098 bp->len = cpu_to_be16(nbytes);
c7da57a1 1099
49a72dfb
AL
1100 sg_init_one(&sg, bp, sizeof(*bp));
1101 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1102}
1103
a915da9b 1104static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 1105 __be32 daddr, __be32 saddr, const struct tcphdr *th)
49a72dfb
AL
1106{
1107 struct tcp_md5sig_pool *hp;
1108 struct hash_desc *desc;
1109
1110 hp = tcp_get_md5sig_pool();
1111 if (!hp)
1112 goto clear_hash_noput;
1113 desc = &hp->md5_desc;
1114
1115 if (crypto_hash_init(desc))
1116 goto clear_hash;
1117 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1118 goto clear_hash;
1119 if (tcp_md5_hash_header(hp, th))
1120 goto clear_hash;
1121 if (tcp_md5_hash_key(hp, key))
1122 goto clear_hash;
1123 if (crypto_hash_final(desc, md5_hash))
cfb6eeb4
YH
1124 goto clear_hash;
1125
cfb6eeb4 1126 tcp_put_md5sig_pool();
cfb6eeb4 1127 return 0;
49a72dfb 1128
cfb6eeb4
YH
1129clear_hash:
1130 tcp_put_md5sig_pool();
1131clear_hash_noput:
1132 memset(md5_hash, 0, 16);
49a72dfb 1133 return 1;
cfb6eeb4
YH
1134}
1135
49a72dfb 1136int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
318cf7aa
ED
1137 const struct sock *sk, const struct request_sock *req,
1138 const struct sk_buff *skb)
cfb6eeb4 1139{
49a72dfb
AL
1140 struct tcp_md5sig_pool *hp;
1141 struct hash_desc *desc;
318cf7aa 1142 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
1143 __be32 saddr, daddr;
1144
1145 if (sk) {
c720c7e8
ED
1146 saddr = inet_sk(sk)->inet_saddr;
1147 daddr = inet_sk(sk)->inet_daddr;
49a72dfb 1148 } else if (req) {
634fb979
ED
1149 saddr = inet_rsk(req)->ir_loc_addr;
1150 daddr = inet_rsk(req)->ir_rmt_addr;
cfb6eeb4 1151 } else {
49a72dfb
AL
1152 const struct iphdr *iph = ip_hdr(skb);
1153 saddr = iph->saddr;
1154 daddr = iph->daddr;
cfb6eeb4 1155 }
49a72dfb
AL
1156
1157 hp = tcp_get_md5sig_pool();
1158 if (!hp)
1159 goto clear_hash_noput;
1160 desc = &hp->md5_desc;
1161
1162 if (crypto_hash_init(desc))
1163 goto clear_hash;
1164
1165 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1166 goto clear_hash;
1167 if (tcp_md5_hash_header(hp, th))
1168 goto clear_hash;
1169 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1170 goto clear_hash;
1171 if (tcp_md5_hash_key(hp, key))
1172 goto clear_hash;
1173 if (crypto_hash_final(desc, md5_hash))
1174 goto clear_hash;
1175
1176 tcp_put_md5sig_pool();
1177 return 0;
1178
1179clear_hash:
1180 tcp_put_md5sig_pool();
1181clear_hash_noput:
1182 memset(md5_hash, 0, 16);
1183 return 1;
cfb6eeb4 1184}
49a72dfb 1185EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
cfb6eeb4 1186
a2a385d6 1187static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
cfb6eeb4
YH
1188{
1189 /*
1190 * This gets called for each TCP segment that arrives
1191 * so we want to be efficient.
1192 * We have 3 drop cases:
1193 * o No MD5 hash and one expected.
1194 * o MD5 hash and we're not expecting one.
1195 * o MD5 hash and its wrong.
1196 */
cf533ea5 1197 const __u8 *hash_location = NULL;
cfb6eeb4 1198 struct tcp_md5sig_key *hash_expected;
eddc9ec5 1199 const struct iphdr *iph = ip_hdr(skb);
cf533ea5 1200 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4 1201 int genhash;
cfb6eeb4
YH
1202 unsigned char newhash[16];
1203
a915da9b
ED
1204 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1205 AF_INET);
7d5d5525 1206 hash_location = tcp_parse_md5sig_option(th);
cfb6eeb4 1207
cfb6eeb4
YH
1208 /* We've parsed the options - do we have a hash? */
1209 if (!hash_expected && !hash_location)
a2a385d6 1210 return false;
cfb6eeb4
YH
1211
1212 if (hash_expected && !hash_location) {
785957d3 1213 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
a2a385d6 1214 return true;
cfb6eeb4
YH
1215 }
1216
1217 if (!hash_expected && hash_location) {
785957d3 1218 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
a2a385d6 1219 return true;
cfb6eeb4
YH
1220 }
1221
1222 /* Okay, so this is hash_expected and hash_location -
1223 * so we need to calculate the checksum.
1224 */
49a72dfb
AL
1225 genhash = tcp_v4_md5_hash_skb(newhash,
1226 hash_expected,
1227 NULL, NULL, skb);
cfb6eeb4
YH
1228
1229 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
e87cc472
JP
1230 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1231 &iph->saddr, ntohs(th->source),
1232 &iph->daddr, ntohs(th->dest),
1233 genhash ? " tcp_v4_calc_md5_hash failed"
1234 : "");
a2a385d6 1235 return true;
cfb6eeb4 1236 }
a2a385d6 1237 return false;
cfb6eeb4
YH
1238}
1239
1240#endif
1241
16bea70a
OP
1242static void tcp_v4_init_req(struct request_sock *req, struct sock *sk,
1243 struct sk_buff *skb)
1244{
1245 struct inet_request_sock *ireq = inet_rsk(req);
1246
1247 ireq->ir_loc_addr = ip_hdr(skb)->daddr;
1248 ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
1249 ireq->no_srccheck = inet_sk(sk)->transparent;
1250 ireq->opt = tcp_v4_save_options(skb);
1251}
1252
d94e0417
OP
1253static struct dst_entry *tcp_v4_route_req(struct sock *sk, struct flowi *fl,
1254 const struct request_sock *req,
1255 bool *strict)
1256{
1257 struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
1258
1259 if (strict) {
1260 if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr)
1261 *strict = true;
1262 else
1263 *strict = false;
1264 }
1265
1266 return dst;
1267}
1268
72a3effa 1269struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1da177e4 1270 .family = PF_INET,
2e6599cb 1271 .obj_size = sizeof(struct tcp_request_sock),
72659ecc 1272 .rtx_syn_ack = tcp_v4_rtx_synack,
60236fdd
ACM
1273 .send_ack = tcp_v4_reqsk_send_ack,
1274 .destructor = tcp_v4_reqsk_destructor,
1da177e4 1275 .send_reset = tcp_v4_send_reset,
72659ecc 1276 .syn_ack_timeout = tcp_syn_ack_timeout,
1da177e4
LT
1277};
1278
b2e4b3de 1279static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
16bea70a 1280#ifdef CONFIG_TCP_MD5SIG
cfb6eeb4 1281 .md5_lookup = tcp_v4_reqsk_md5_lookup,
e3afe7b7 1282 .calc_md5_hash = tcp_v4_md5_hash_skb,
b6332e6c 1283#endif
16bea70a 1284 .init_req = tcp_v4_init_req,
fb7b37a7
OP
1285#ifdef CONFIG_SYN_COOKIES
1286 .cookie_init_seq = cookie_v4_init_sequence,
1287#endif
d94e0417 1288 .route_req = tcp_v4_route_req,
936b8bdb 1289 .init_seq = tcp_v4_init_sequence,
d6274bd8 1290 .send_synack = tcp_v4_send_synack,
16bea70a 1291};
cfb6eeb4 1292
1da177e4
LT
1293int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1294{
1295 struct tcp_options_received tmp_opt;
60236fdd 1296 struct request_sock *req;
4957faad 1297 struct tcp_sock *tp = tcp_sk(sk);
e6b4d113 1298 struct dst_entry *dst = NULL;
eddc9ec5 1299 __be32 saddr = ip_hdr(skb)->saddr;
1da177e4 1300 __u32 isn = TCP_SKB_CB(skb)->when;
843f4a55 1301 bool want_cookie = false, fastopen;
168a8f58
JC
1302 struct flowi4 fl4;
1303 struct tcp_fastopen_cookie foc = { .len = -1 };
16bea70a 1304 const struct tcp_request_sock_ops *af_ops;
843f4a55 1305 int err;
1da177e4
LT
1306
1307 /* Never answer to SYNs send to broadcast or multicast */
511c3f92 1308 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1da177e4
LT
1309 goto drop;
1310
1311 /* TW buckets are converted to open requests without
1312 * limitations, they conserve resources and peer is
1313 * evidently real one.
1314 */
5ad37d5d
HFS
1315 if ((sysctl_tcp_syncookies == 2 ||
1316 inet_csk_reqsk_queue_is_full(sk)) && !isn) {
946cedcc
ED
1317 want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1318 if (!want_cookie)
1319 goto drop;
1da177e4
LT
1320 }
1321
1322 /* Accept backlog is full. If we have already queued enough
1323 * of warm entries in syn queue, drop request. It is better than
1324 * clogging syn queue with openreqs with exponentially increasing
1325 * timeout.
1326 */
2aeef18d
NS
1327 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
1328 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1da177e4 1329 goto drop;
2aeef18d 1330 }
1da177e4 1331
ce4a7d0d 1332 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1da177e4
LT
1333 if (!req)
1334 goto drop;
1335
16bea70a 1336 af_ops = tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
cfb6eeb4 1337
1da177e4 1338 tcp_clear_options(&tmp_opt);
bee7ca9e 1339 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
4957faad 1340 tmp_opt.user_mss = tp->rx_opt.user_mss;
1a2c6181 1341 tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
1da177e4 1342
4dfc2817 1343 if (want_cookie && !tmp_opt.saw_tstamp)
1da177e4 1344 tcp_clear_options(&tmp_opt);
1da177e4 1345
1da177e4 1346 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
e0f802fb 1347 tcp_openreq_init(req, &tmp_opt, skb, sk);
1da177e4 1348
16bea70a 1349 af_ops->init_req(req, sk, skb);
bb5b7c11 1350
284904aa 1351 if (security_inet_conn_request(sk, skb, req))
bb5b7c11 1352 goto drop_and_free;
284904aa 1353
172d69e6 1354 if (!want_cookie || tmp_opt.tstamp_ok)
5d134f1c 1355 TCP_ECN_create_request(req, skb, sock_net(sk));
1da177e4
LT
1356
1357 if (want_cookie) {
fb7b37a7 1358 isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
172d69e6 1359 req->cookie_ts = tmp_opt.tstamp_ok;
1da177e4 1360 } else if (!isn) {
1da177e4
LT
1361 /* VJ's idea. We save last timestamp seen
1362 * from the destination in peer table, when entering
1363 * state TIME-WAIT, and check against it before
1364 * accepting new connection request.
1365 *
1366 * If "isn" is not zero, this request hit alive
1367 * timewait bucket, so that all the necessary checks
1368 * are made in the function processing timewait state.
1369 */
d94e0417
OP
1370 if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle) {
1371 bool strict;
1372
1373 dst = af_ops->route_req(sk, (struct flowi *)&fl4, req,
1374 &strict);
1375 if (dst && strict &&
1376 !tcp_peer_is_proven(req, dst, true)) {
de0744af 1377 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
7cd04fa7 1378 goto drop_and_release;
1da177e4
LT
1379 }
1380 }
1381 /* Kill the following clause, if you dislike this way. */
1382 else if (!sysctl_tcp_syncookies &&
463c84b9 1383 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1da177e4 1384 (sysctl_max_syn_backlog >> 2)) &&
81166dd6 1385 !tcp_peer_is_proven(req, dst, false)) {
1da177e4
LT
1386 /* Without syncookies last quarter of
1387 * backlog is filled with destinations,
1388 * proven to be alive.
1389 * It means that we continue to communicate
1390 * to destinations, already remembered
1391 * to the moment of synflood.
1392 */
afd46503 1393 LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
673d57e7 1394 &saddr, ntohs(tcp_hdr(skb)->source));
7cd04fa7 1395 goto drop_and_release;
1da177e4
LT
1396 }
1397
936b8bdb 1398 isn = af_ops->init_seq(skb);
1da177e4 1399 }
d94e0417
OP
1400 if (!dst) {
1401 dst = af_ops->route_req(sk, (struct flowi *)&fl4, req, NULL);
1402 if (!dst)
1403 goto drop_and_free;
1404 }
168a8f58 1405
843f4a55 1406 tcp_rsk(req)->snt_isn = isn;
843f4a55
YC
1407 tcp_openreq_init_rwin(req, sk, dst);
1408 fastopen = !want_cookie &&
1409 tcp_try_fastopen(sk, skb, req, &foc, dst);
d6274bd8
OP
1410 err = af_ops->send_synack(sk, dst, NULL, req,
1411 skb_get_queue_mapping(skb), &foc);
843f4a55 1412 if (!fastopen) {
168a8f58
JC
1413 if (err || want_cookie)
1414 goto drop_and_free;
1415
1416 tcp_rsk(req)->listener = NULL;
168a8f58 1417 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
843f4a55 1418 }
1da177e4 1419
1da177e4
LT
1420 return 0;
1421
7cd04fa7
DL
1422drop_and_release:
1423 dst_release(dst);
1da177e4 1424drop_and_free:
60236fdd 1425 reqsk_free(req);
1da177e4 1426drop:
848bf15f 1427 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4
LT
1428 return 0;
1429}
4bc2f18b 1430EXPORT_SYMBOL(tcp_v4_conn_request);
1da177e4
LT
1431
1432
1433/*
1434 * The three way handshake has completed - we got a valid synack -
1435 * now create the new socket.
1436 */
1437struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
60236fdd 1438 struct request_sock *req,
1da177e4
LT
1439 struct dst_entry *dst)
1440{
2e6599cb 1441 struct inet_request_sock *ireq;
1da177e4
LT
1442 struct inet_sock *newinet;
1443 struct tcp_sock *newtp;
1444 struct sock *newsk;
cfb6eeb4
YH
1445#ifdef CONFIG_TCP_MD5SIG
1446 struct tcp_md5sig_key *key;
1447#endif
f6d8bd05 1448 struct ip_options_rcu *inet_opt;
1da177e4
LT
1449
1450 if (sk_acceptq_is_full(sk))
1451 goto exit_overflow;
1452
1da177e4
LT
1453 newsk = tcp_create_openreq_child(sk, req, skb);
1454 if (!newsk)
093d2823 1455 goto exit_nonewsk;
1da177e4 1456
bcd76111 1457 newsk->sk_gso_type = SKB_GSO_TCPV4;
fae6ef87 1458 inet_sk_rx_dst_set(newsk, skb);
1da177e4
LT
1459
1460 newtp = tcp_sk(newsk);
1461 newinet = inet_sk(newsk);
2e6599cb 1462 ireq = inet_rsk(req);
634fb979
ED
1463 newinet->inet_daddr = ireq->ir_rmt_addr;
1464 newinet->inet_rcv_saddr = ireq->ir_loc_addr;
1465 newinet->inet_saddr = ireq->ir_loc_addr;
f6d8bd05
ED
1466 inet_opt = ireq->opt;
1467 rcu_assign_pointer(newinet->inet_opt, inet_opt);
2e6599cb 1468 ireq->opt = NULL;
463c84b9 1469 newinet->mc_index = inet_iif(skb);
eddc9ec5 1470 newinet->mc_ttl = ip_hdr(skb)->ttl;
4c507d28 1471 newinet->rcv_tos = ip_hdr(skb)->tos;
d83d8461 1472 inet_csk(newsk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
1473 if (inet_opt)
1474 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
c720c7e8 1475 newinet->inet_id = newtp->write_seq ^ jiffies;
1da177e4 1476
dfd25fff
ED
1477 if (!dst) {
1478 dst = inet_csk_route_child_sock(sk, newsk, req);
1479 if (!dst)
1480 goto put_and_exit;
1481 } else {
1482 /* syncookie case : see end of cookie_v4_check() */
1483 }
0e734419
DM
1484 sk_setup_caps(newsk, dst);
1485
1da177e4 1486 tcp_sync_mss(newsk, dst_mtu(dst));
0dbaee3b 1487 newtp->advmss = dst_metric_advmss(dst);
f5fff5dc
TQ
1488 if (tcp_sk(sk)->rx_opt.user_mss &&
1489 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1490 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1491
1da177e4
LT
1492 tcp_initialize_rcv_mss(newsk);
1493
cfb6eeb4
YH
1494#ifdef CONFIG_TCP_MD5SIG
1495 /* Copy over the MD5 key from the original socket */
a915da9b
ED
1496 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1497 AF_INET);
c720c7e8 1498 if (key != NULL) {
cfb6eeb4
YH
1499 /*
1500 * We're using one, so create a matching key
1501 * on the newsk structure. If we fail to get
1502 * memory, then we end up not copying the key
1503 * across. Shucks.
1504 */
a915da9b
ED
1505 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1506 AF_INET, key->key, key->keylen, GFP_ATOMIC);
a465419b 1507 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
cfb6eeb4
YH
1508 }
1509#endif
1510
0e734419
DM
1511 if (__inet_inherit_port(sk, newsk) < 0)
1512 goto put_and_exit;
9327f705 1513 __inet_hash_nolisten(newsk, NULL);
1da177e4
LT
1514
1515 return newsk;
1516
1517exit_overflow:
de0744af 1518 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
093d2823
BS
1519exit_nonewsk:
1520 dst_release(dst);
1da177e4 1521exit:
de0744af 1522 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4 1523 return NULL;
0e734419 1524put_and_exit:
e337e24d
CP
1525 inet_csk_prepare_forced_close(newsk);
1526 tcp_done(newsk);
0e734419 1527 goto exit;
1da177e4 1528}
4bc2f18b 1529EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1da177e4
LT
1530
1531static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1532{
aa8223c7 1533 struct tcphdr *th = tcp_hdr(skb);
eddc9ec5 1534 const struct iphdr *iph = ip_hdr(skb);
1da177e4 1535 struct sock *nsk;
60236fdd 1536 struct request_sock **prev;
1da177e4 1537 /* Find possible connection requests. */
463c84b9
ACM
1538 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1539 iph->saddr, iph->daddr);
1da177e4 1540 if (req)
8336886f 1541 return tcp_check_req(sk, skb, req, prev, false);
1da177e4 1542
3b1e0a65 1543 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
c67499c0 1544 th->source, iph->daddr, th->dest, inet_iif(skb));
1da177e4
LT
1545
1546 if (nsk) {
1547 if (nsk->sk_state != TCP_TIME_WAIT) {
1548 bh_lock_sock(nsk);
1549 return nsk;
1550 }
9469c7b4 1551 inet_twsk_put(inet_twsk(nsk));
1da177e4
LT
1552 return NULL;
1553 }
1554
1555#ifdef CONFIG_SYN_COOKIES
af9b4738 1556 if (!th->syn)
1da177e4
LT
1557 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1558#endif
1559 return sk;
1560}
1561
1da177e4
LT
1562/* The socket must have it's spinlock held when we get
1563 * here.
1564 *
1565 * We have a potential double-lock case here, so even when
1566 * doing backlog processing we use the BH locking scheme.
1567 * This is because we cannot sleep with the original spinlock
1568 * held.
1569 */
1570int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1571{
cfb6eeb4
YH
1572 struct sock *rsk;
1573#ifdef CONFIG_TCP_MD5SIG
1574 /*
1575 * We really want to reject the packet as early as possible
1576 * if:
1577 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1578 * o There is an MD5 option and we're not expecting one
1579 */
7174259e 1580 if (tcp_v4_inbound_md5_hash(sk, skb))
cfb6eeb4
YH
1581 goto discard;
1582#endif
1583
1da177e4 1584 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
404e0a8b
ED
1585 struct dst_entry *dst = sk->sk_rx_dst;
1586
bdeab991 1587 sock_rps_save_rxhash(sk, skb);
404e0a8b 1588 if (dst) {
505fbcf0
ED
1589 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1590 dst->ops->check(dst, 0) == NULL) {
92101b3b
DM
1591 dst_release(dst);
1592 sk->sk_rx_dst = NULL;
1593 }
1594 }
c995ae22 1595 tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1da177e4
LT
1596 return 0;
1597 }
1598
ab6a5bb6 1599 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1da177e4
LT
1600 goto csum_err;
1601
1602 if (sk->sk_state == TCP_LISTEN) {
1603 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1604 if (!nsk)
1605 goto discard;
1606
1607 if (nsk != sk) {
bdeab991 1608 sock_rps_save_rxhash(nsk, skb);
cfb6eeb4
YH
1609 if (tcp_child_process(sk, nsk, skb)) {
1610 rsk = nsk;
1da177e4 1611 goto reset;
cfb6eeb4 1612 }
1da177e4
LT
1613 return 0;
1614 }
ca55158c 1615 } else
bdeab991 1616 sock_rps_save_rxhash(sk, skb);
ca55158c 1617
aa8223c7 1618 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1619 rsk = sk;
1da177e4 1620 goto reset;
cfb6eeb4 1621 }
1da177e4
LT
1622 return 0;
1623
1624reset:
cfb6eeb4 1625 tcp_v4_send_reset(rsk, skb);
1da177e4
LT
1626discard:
1627 kfree_skb(skb);
1628 /* Be careful here. If this function gets more complicated and
1629 * gcc suffers from register pressure on the x86, sk (in %ebx)
1630 * might be destroyed here. This current version compiles correctly,
1631 * but you have been warned.
1632 */
1633 return 0;
1634
1635csum_err:
6a5dc9e5 1636 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
63231bdd 1637 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1da177e4
LT
1638 goto discard;
1639}
4bc2f18b 1640EXPORT_SYMBOL(tcp_v4_do_rcv);
1da177e4 1641
160eb5a6 1642void tcp_v4_early_demux(struct sk_buff *skb)
41063e9d 1643{
41063e9d
DM
1644 const struct iphdr *iph;
1645 const struct tcphdr *th;
1646 struct sock *sk;
41063e9d 1647
41063e9d 1648 if (skb->pkt_type != PACKET_HOST)
160eb5a6 1649 return;
41063e9d 1650
45f00f99 1651 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
160eb5a6 1652 return;
41063e9d
DM
1653
1654 iph = ip_hdr(skb);
45f00f99 1655 th = tcp_hdr(skb);
41063e9d
DM
1656
1657 if (th->doff < sizeof(struct tcphdr) / 4)
160eb5a6 1658 return;
41063e9d 1659
45f00f99 1660 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
41063e9d 1661 iph->saddr, th->source,
7011d085 1662 iph->daddr, ntohs(th->dest),
9cb429d6 1663 skb->skb_iif);
41063e9d
DM
1664 if (sk) {
1665 skb->sk = sk;
1666 skb->destructor = sock_edemux;
1667 if (sk->sk_state != TCP_TIME_WAIT) {
1668 struct dst_entry *dst = sk->sk_rx_dst;
505fbcf0 1669
41063e9d
DM
1670 if (dst)
1671 dst = dst_check(dst, 0);
92101b3b 1672 if (dst &&
505fbcf0 1673 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
92101b3b 1674 skb_dst_set_noref(skb, dst);
41063e9d
DM
1675 }
1676 }
41063e9d
DM
1677}
1678
b2fb4f54
ED
1679/* Packet is added to VJ-style prequeue for processing in process
1680 * context, if a reader task is waiting. Apparently, this exciting
1681 * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1682 * failed somewhere. Latency? Burstiness? Well, at least now we will
1683 * see, why it failed. 8)8) --ANK
1684 *
1685 */
1686bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1687{
1688 struct tcp_sock *tp = tcp_sk(sk);
1689
1690 if (sysctl_tcp_low_latency || !tp->ucopy.task)
1691 return false;
1692
1693 if (skb->len <= tcp_hdrlen(skb) &&
1694 skb_queue_len(&tp->ucopy.prequeue) == 0)
1695 return false;
1696
58717686 1697 skb_dst_force(skb);
b2fb4f54
ED
1698 __skb_queue_tail(&tp->ucopy.prequeue, skb);
1699 tp->ucopy.memory += skb->truesize;
1700 if (tp->ucopy.memory > sk->sk_rcvbuf) {
1701 struct sk_buff *skb1;
1702
1703 BUG_ON(sock_owned_by_user(sk));
1704
1705 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
1706 sk_backlog_rcv(sk, skb1);
1707 NET_INC_STATS_BH(sock_net(sk),
1708 LINUX_MIB_TCPPREQUEUEDROPPED);
1709 }
1710
1711 tp->ucopy.memory = 0;
1712 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1713 wake_up_interruptible_sync_poll(sk_sleep(sk),
1714 POLLIN | POLLRDNORM | POLLRDBAND);
1715 if (!inet_csk_ack_scheduled(sk))
1716 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1717 (3 * tcp_rto_min(sk)) / 4,
1718 TCP_RTO_MAX);
1719 }
1720 return true;
1721}
1722EXPORT_SYMBOL(tcp_prequeue);
1723
1da177e4
LT
1724/*
1725 * From tcp_input.c
1726 */
1727
1728int tcp_v4_rcv(struct sk_buff *skb)
1729{
eddc9ec5 1730 const struct iphdr *iph;
cf533ea5 1731 const struct tcphdr *th;
1da177e4
LT
1732 struct sock *sk;
1733 int ret;
a86b1e30 1734 struct net *net = dev_net(skb->dev);
1da177e4
LT
1735
1736 if (skb->pkt_type != PACKET_HOST)
1737 goto discard_it;
1738
1739 /* Count it even if it's bad */
63231bdd 1740 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1da177e4
LT
1741
1742 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1743 goto discard_it;
1744
aa8223c7 1745 th = tcp_hdr(skb);
1da177e4
LT
1746
1747 if (th->doff < sizeof(struct tcphdr) / 4)
1748 goto bad_packet;
1749 if (!pskb_may_pull(skb, th->doff * 4))
1750 goto discard_it;
1751
1752 /* An explanation is required here, I think.
1753 * Packet length and doff are validated by header prediction,
caa20d9a 1754 * provided case of th->doff==0 is eliminated.
1da177e4 1755 * So, we defer the checks. */
ed70fcfc
TH
1756
1757 if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
6a5dc9e5 1758 goto csum_error;
1da177e4 1759
aa8223c7 1760 th = tcp_hdr(skb);
eddc9ec5 1761 iph = ip_hdr(skb);
1da177e4
LT
1762 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1763 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1764 skb->len - th->doff * 4);
1765 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1766 TCP_SKB_CB(skb)->when = 0;
b82d1bb4 1767 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1da177e4
LT
1768 TCP_SKB_CB(skb)->sacked = 0;
1769
9a1f27c4 1770 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1da177e4
LT
1771 if (!sk)
1772 goto no_tcp_socket;
1773
bb134d5d
ED
1774process:
1775 if (sk->sk_state == TCP_TIME_WAIT)
1776 goto do_time_wait;
1777
6cce09f8
ED
1778 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1779 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
d218d111 1780 goto discard_and_relse;
6cce09f8 1781 }
d218d111 1782
1da177e4
LT
1783 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1784 goto discard_and_relse;
b59c2701 1785 nf_reset(skb);
1da177e4 1786
fda9ef5d 1787 if (sk_filter(sk, skb))
1da177e4
LT
1788 goto discard_and_relse;
1789
8b80cda5 1790 sk_mark_napi_id(sk, skb);
1da177e4
LT
1791 skb->dev = NULL;
1792
c6366184 1793 bh_lock_sock_nested(sk);
1da177e4
LT
1794 ret = 0;
1795 if (!sock_owned_by_user(sk)) {
1a2449a8
CL
1796#ifdef CONFIG_NET_DMA
1797 struct tcp_sock *tp = tcp_sk(sk);
1798 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
a2bd1140 1799 tp->ucopy.dma_chan = net_dma_find_channel();
1a2449a8 1800 if (tp->ucopy.dma_chan)
1da177e4 1801 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8
CL
1802 else
1803#endif
1804 {
1805 if (!tcp_prequeue(sk, skb))
ae8d7f88 1806 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8 1807 }
da882c1f
ED
1808 } else if (unlikely(sk_add_backlog(sk, skb,
1809 sk->sk_rcvbuf + sk->sk_sndbuf))) {
6b03a53a 1810 bh_unlock_sock(sk);
6cce09f8 1811 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
6b03a53a
ZY
1812 goto discard_and_relse;
1813 }
1da177e4
LT
1814 bh_unlock_sock(sk);
1815
1816 sock_put(sk);
1817
1818 return ret;
1819
1820no_tcp_socket:
1821 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1822 goto discard_it;
1823
1824 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
6a5dc9e5
ED
1825csum_error:
1826 TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
1da177e4 1827bad_packet:
63231bdd 1828 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1da177e4 1829 } else {
cfb6eeb4 1830 tcp_v4_send_reset(NULL, skb);
1da177e4
LT
1831 }
1832
1833discard_it:
1834 /* Discard frame. */
1835 kfree_skb(skb);
e905a9ed 1836 return 0;
1da177e4
LT
1837
1838discard_and_relse:
1839 sock_put(sk);
1840 goto discard_it;
1841
1842do_time_wait:
1843 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
9469c7b4 1844 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1845 goto discard_it;
1846 }
1847
6a5dc9e5 1848 if (skb->len < (th->doff << 2)) {
9469c7b4 1849 inet_twsk_put(inet_twsk(sk));
6a5dc9e5
ED
1850 goto bad_packet;
1851 }
1852 if (tcp_checksum_complete(skb)) {
1853 inet_twsk_put(inet_twsk(sk));
1854 goto csum_error;
1da177e4 1855 }
9469c7b4 1856 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1da177e4 1857 case TCP_TW_SYN: {
c346dca1 1858 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
c67499c0 1859 &tcp_hashinfo,
da5e3630 1860 iph->saddr, th->source,
eddc9ec5 1861 iph->daddr, th->dest,
463c84b9 1862 inet_iif(skb));
1da177e4 1863 if (sk2) {
9469c7b4
YH
1864 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1865 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1866 sk = sk2;
1867 goto process;
1868 }
1869 /* Fall through to ACK */
1870 }
1871 case TCP_TW_ACK:
1872 tcp_v4_timewait_ack(sk, skb);
1873 break;
1874 case TCP_TW_RST:
1875 goto no_tcp_socket;
1876 case TCP_TW_SUCCESS:;
1877 }
1878 goto discard_it;
1879}
1880
ccb7c410
DM
1881static struct timewait_sock_ops tcp_timewait_sock_ops = {
1882 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1883 .twsk_unique = tcp_twsk_unique,
1884 .twsk_destructor= tcp_twsk_destructor,
ccb7c410 1885};
1da177e4 1886
63d02d15 1887void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
5d299f3d
ED
1888{
1889 struct dst_entry *dst = skb_dst(skb);
1890
1891 dst_hold(dst);
1892 sk->sk_rx_dst = dst;
1893 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1894}
63d02d15 1895EXPORT_SYMBOL(inet_sk_rx_dst_set);
5d299f3d 1896
3b401a81 1897const struct inet_connection_sock_af_ops ipv4_specific = {
543d9cfe
ACM
1898 .queue_xmit = ip_queue_xmit,
1899 .send_check = tcp_v4_send_check,
1900 .rebuild_header = inet_sk_rebuild_header,
5d299f3d 1901 .sk_rx_dst_set = inet_sk_rx_dst_set,
543d9cfe
ACM
1902 .conn_request = tcp_v4_conn_request,
1903 .syn_recv_sock = tcp_v4_syn_recv_sock,
543d9cfe
ACM
1904 .net_header_len = sizeof(struct iphdr),
1905 .setsockopt = ip_setsockopt,
1906 .getsockopt = ip_getsockopt,
1907 .addr2sockaddr = inet_csk_addr2sockaddr,
1908 .sockaddr_len = sizeof(struct sockaddr_in),
ab1e0a13 1909 .bind_conflict = inet_csk_bind_conflict,
3fdadf7d 1910#ifdef CONFIG_COMPAT
543d9cfe
ACM
1911 .compat_setsockopt = compat_ip_setsockopt,
1912 .compat_getsockopt = compat_ip_getsockopt,
3fdadf7d 1913#endif
1da177e4 1914};
4bc2f18b 1915EXPORT_SYMBOL(ipv4_specific);
1da177e4 1916
cfb6eeb4 1917#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1918static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
cfb6eeb4 1919 .md5_lookup = tcp_v4_md5_lookup,
49a72dfb 1920 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 1921 .md5_parse = tcp_v4_parse_md5_keys,
cfb6eeb4 1922};
b6332e6c 1923#endif
cfb6eeb4 1924
1da177e4
LT
1925/* NOTE: A lot of things set to zero explicitly by call to
1926 * sk_alloc() so need not be done here.
1927 */
1928static int tcp_v4_init_sock(struct sock *sk)
1929{
6687e988 1930 struct inet_connection_sock *icsk = inet_csk(sk);
1da177e4 1931
900f65d3 1932 tcp_init_sock(sk);
1da177e4 1933
8292a17a 1934 icsk->icsk_af_ops = &ipv4_specific;
900f65d3 1935
cfb6eeb4 1936#ifdef CONFIG_TCP_MD5SIG
ac807fa8 1937 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
cfb6eeb4 1938#endif
1da177e4 1939
1da177e4
LT
1940 return 0;
1941}
1942
7d06b2e0 1943void tcp_v4_destroy_sock(struct sock *sk)
1da177e4
LT
1944{
1945 struct tcp_sock *tp = tcp_sk(sk);
1946
1947 tcp_clear_xmit_timers(sk);
1948
6687e988 1949 tcp_cleanup_congestion_control(sk);
317a76f9 1950
1da177e4 1951 /* Cleanup up the write buffer. */
fe067e8a 1952 tcp_write_queue_purge(sk);
1da177e4
LT
1953
1954 /* Cleans up our, hopefully empty, out_of_order_queue. */
e905a9ed 1955 __skb_queue_purge(&tp->out_of_order_queue);
1da177e4 1956
cfb6eeb4
YH
1957#ifdef CONFIG_TCP_MD5SIG
1958 /* Clean up the MD5 key list, if any */
1959 if (tp->md5sig_info) {
a915da9b 1960 tcp_clear_md5_list(sk);
a8afca03 1961 kfree_rcu(tp->md5sig_info, rcu);
cfb6eeb4
YH
1962 tp->md5sig_info = NULL;
1963 }
1964#endif
1965
1a2449a8
CL
1966#ifdef CONFIG_NET_DMA
1967 /* Cleans up our sk_async_wait_queue */
e905a9ed 1968 __skb_queue_purge(&sk->sk_async_wait_queue);
1a2449a8
CL
1969#endif
1970
1da177e4
LT
1971 /* Clean prequeue, it must be empty really */
1972 __skb_queue_purge(&tp->ucopy.prequeue);
1973
1974 /* Clean up a referenced TCP bind bucket. */
463c84b9 1975 if (inet_csk(sk)->icsk_bind_hash)
ab1e0a13 1976 inet_put_port(sk);
1da177e4 1977
168a8f58 1978 BUG_ON(tp->fastopen_rsk != NULL);
435cf559 1979
cf60af03
YC
1980 /* If socket is aborted during connect operation */
1981 tcp_free_fastopen_req(tp);
1982
180d8cd9 1983 sk_sockets_allocated_dec(sk);
d1a4c0b3 1984 sock_release_memcg(sk);
1da177e4 1985}
1da177e4
LT
1986EXPORT_SYMBOL(tcp_v4_destroy_sock);
1987
1988#ifdef CONFIG_PROC_FS
1989/* Proc filesystem TCP sock list dumping. */
1990
a8b690f9
TH
1991/*
1992 * Get next listener socket follow cur. If cur is NULL, get first socket
1993 * starting from bucket given in st->bucket; when st->bucket is zero the
1994 * very first socket in the hash table is returned.
1995 */
1da177e4
LT
1996static void *listening_get_next(struct seq_file *seq, void *cur)
1997{
463c84b9 1998 struct inet_connection_sock *icsk;
c25eb3bf 1999 struct hlist_nulls_node *node;
1da177e4 2000 struct sock *sk = cur;
5caea4ea 2001 struct inet_listen_hashbucket *ilb;
5799de0b 2002 struct tcp_iter_state *st = seq->private;
a4146b1b 2003 struct net *net = seq_file_net(seq);
1da177e4
LT
2004
2005 if (!sk) {
a8b690f9 2006 ilb = &tcp_hashinfo.listening_hash[st->bucket];
5caea4ea 2007 spin_lock_bh(&ilb->lock);
c25eb3bf 2008 sk = sk_nulls_head(&ilb->head);
a8b690f9 2009 st->offset = 0;
1da177e4
LT
2010 goto get_sk;
2011 }
5caea4ea 2012 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1da177e4 2013 ++st->num;
a8b690f9 2014 ++st->offset;
1da177e4
LT
2015
2016 if (st->state == TCP_SEQ_STATE_OPENREQ) {
60236fdd 2017 struct request_sock *req = cur;
1da177e4 2018
72a3effa 2019 icsk = inet_csk(st->syn_wait_sk);
1da177e4
LT
2020 req = req->dl_next;
2021 while (1) {
2022 while (req) {
bdccc4ca 2023 if (req->rsk_ops->family == st->family) {
1da177e4
LT
2024 cur = req;
2025 goto out;
2026 }
2027 req = req->dl_next;
2028 }
72a3effa 2029 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1da177e4
LT
2030 break;
2031get_req:
463c84b9 2032 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1da177e4 2033 }
1bde5ac4 2034 sk = sk_nulls_next(st->syn_wait_sk);
1da177e4 2035 st->state = TCP_SEQ_STATE_LISTENING;
463c84b9 2036 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2037 } else {
e905a9ed 2038 icsk = inet_csk(sk);
463c84b9
ACM
2039 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2040 if (reqsk_queue_len(&icsk->icsk_accept_queue))
1da177e4 2041 goto start_req;
463c84b9 2042 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1bde5ac4 2043 sk = sk_nulls_next(sk);
1da177e4
LT
2044 }
2045get_sk:
c25eb3bf 2046 sk_nulls_for_each_from(sk, node) {
8475ef9f
PE
2047 if (!net_eq(sock_net(sk), net))
2048 continue;
2049 if (sk->sk_family == st->family) {
1da177e4
LT
2050 cur = sk;
2051 goto out;
2052 }
e905a9ed 2053 icsk = inet_csk(sk);
463c84b9
ACM
2054 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2055 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1da177e4
LT
2056start_req:
2057 st->uid = sock_i_uid(sk);
2058 st->syn_wait_sk = sk;
2059 st->state = TCP_SEQ_STATE_OPENREQ;
2060 st->sbucket = 0;
2061 goto get_req;
2062 }
463c84b9 2063 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2064 }
5caea4ea 2065 spin_unlock_bh(&ilb->lock);
a8b690f9 2066 st->offset = 0;
0f7ff927 2067 if (++st->bucket < INET_LHTABLE_SIZE) {
5caea4ea
ED
2068 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2069 spin_lock_bh(&ilb->lock);
c25eb3bf 2070 sk = sk_nulls_head(&ilb->head);
1da177e4
LT
2071 goto get_sk;
2072 }
2073 cur = NULL;
2074out:
2075 return cur;
2076}
2077
2078static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2079{
a8b690f9
TH
2080 struct tcp_iter_state *st = seq->private;
2081 void *rc;
2082
2083 st->bucket = 0;
2084 st->offset = 0;
2085 rc = listening_get_next(seq, NULL);
1da177e4
LT
2086
2087 while (rc && *pos) {
2088 rc = listening_get_next(seq, rc);
2089 --*pos;
2090 }
2091 return rc;
2092}
2093
05dbc7b5 2094static inline bool empty_bucket(const struct tcp_iter_state *st)
6eac5604 2095{
05dbc7b5 2096 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
6eac5604
AK
2097}
2098
a8b690f9
TH
2099/*
2100 * Get first established socket starting from bucket given in st->bucket.
2101 * If st->bucket is zero, the very first socket in the hash is returned.
2102 */
1da177e4
LT
2103static void *established_get_first(struct seq_file *seq)
2104{
5799de0b 2105 struct tcp_iter_state *st = seq->private;
a4146b1b 2106 struct net *net = seq_file_net(seq);
1da177e4
LT
2107 void *rc = NULL;
2108
a8b690f9
TH
2109 st->offset = 0;
2110 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
1da177e4 2111 struct sock *sk;
3ab5aee7 2112 struct hlist_nulls_node *node;
9db66bdc 2113 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1da177e4 2114
6eac5604
AK
2115 /* Lockless fast path for the common case of empty buckets */
2116 if (empty_bucket(st))
2117 continue;
2118
9db66bdc 2119 spin_lock_bh(lock);
3ab5aee7 2120 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
f40c8174 2121 if (sk->sk_family != st->family ||
878628fb 2122 !net_eq(sock_net(sk), net)) {
1da177e4
LT
2123 continue;
2124 }
2125 rc = sk;
2126 goto out;
2127 }
9db66bdc 2128 spin_unlock_bh(lock);
1da177e4
LT
2129 }
2130out:
2131 return rc;
2132}
2133
2134static void *established_get_next(struct seq_file *seq, void *cur)
2135{
2136 struct sock *sk = cur;
3ab5aee7 2137 struct hlist_nulls_node *node;
5799de0b 2138 struct tcp_iter_state *st = seq->private;
a4146b1b 2139 struct net *net = seq_file_net(seq);
1da177e4
LT
2140
2141 ++st->num;
a8b690f9 2142 ++st->offset;
1da177e4 2143
05dbc7b5 2144 sk = sk_nulls_next(sk);
1da177e4 2145
3ab5aee7 2146 sk_nulls_for_each_from(sk, node) {
878628fb 2147 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
05dbc7b5 2148 return sk;
1da177e4
LT
2149 }
2150
05dbc7b5
ED
2151 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2152 ++st->bucket;
2153 return established_get_first(seq);
1da177e4
LT
2154}
2155
2156static void *established_get_idx(struct seq_file *seq, loff_t pos)
2157{
a8b690f9
TH
2158 struct tcp_iter_state *st = seq->private;
2159 void *rc;
2160
2161 st->bucket = 0;
2162 rc = established_get_first(seq);
1da177e4
LT
2163
2164 while (rc && pos) {
2165 rc = established_get_next(seq, rc);
2166 --pos;
7174259e 2167 }
1da177e4
LT
2168 return rc;
2169}
2170
2171static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2172{
2173 void *rc;
5799de0b 2174 struct tcp_iter_state *st = seq->private;
1da177e4 2175
1da177e4
LT
2176 st->state = TCP_SEQ_STATE_LISTENING;
2177 rc = listening_get_idx(seq, &pos);
2178
2179 if (!rc) {
1da177e4
LT
2180 st->state = TCP_SEQ_STATE_ESTABLISHED;
2181 rc = established_get_idx(seq, pos);
2182 }
2183
2184 return rc;
2185}
2186
a8b690f9
TH
2187static void *tcp_seek_last_pos(struct seq_file *seq)
2188{
2189 struct tcp_iter_state *st = seq->private;
2190 int offset = st->offset;
2191 int orig_num = st->num;
2192 void *rc = NULL;
2193
2194 switch (st->state) {
2195 case TCP_SEQ_STATE_OPENREQ:
2196 case TCP_SEQ_STATE_LISTENING:
2197 if (st->bucket >= INET_LHTABLE_SIZE)
2198 break;
2199 st->state = TCP_SEQ_STATE_LISTENING;
2200 rc = listening_get_next(seq, NULL);
2201 while (offset-- && rc)
2202 rc = listening_get_next(seq, rc);
2203 if (rc)
2204 break;
2205 st->bucket = 0;
05dbc7b5 2206 st->state = TCP_SEQ_STATE_ESTABLISHED;
a8b690f9
TH
2207 /* Fallthrough */
2208 case TCP_SEQ_STATE_ESTABLISHED:
a8b690f9
TH
2209 if (st->bucket > tcp_hashinfo.ehash_mask)
2210 break;
2211 rc = established_get_first(seq);
2212 while (offset-- && rc)
2213 rc = established_get_next(seq, rc);
2214 }
2215
2216 st->num = orig_num;
2217
2218 return rc;
2219}
2220
1da177e4
LT
2221static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2222{
5799de0b 2223 struct tcp_iter_state *st = seq->private;
a8b690f9
TH
2224 void *rc;
2225
2226 if (*pos && *pos == st->last_pos) {
2227 rc = tcp_seek_last_pos(seq);
2228 if (rc)
2229 goto out;
2230 }
2231
1da177e4
LT
2232 st->state = TCP_SEQ_STATE_LISTENING;
2233 st->num = 0;
a8b690f9
TH
2234 st->bucket = 0;
2235 st->offset = 0;
2236 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2237
2238out:
2239 st->last_pos = *pos;
2240 return rc;
1da177e4
LT
2241}
2242
2243static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2244{
a8b690f9 2245 struct tcp_iter_state *st = seq->private;
1da177e4 2246 void *rc = NULL;
1da177e4
LT
2247
2248 if (v == SEQ_START_TOKEN) {
2249 rc = tcp_get_idx(seq, 0);
2250 goto out;
2251 }
1da177e4
LT
2252
2253 switch (st->state) {
2254 case TCP_SEQ_STATE_OPENREQ:
2255 case TCP_SEQ_STATE_LISTENING:
2256 rc = listening_get_next(seq, v);
2257 if (!rc) {
1da177e4 2258 st->state = TCP_SEQ_STATE_ESTABLISHED;
a8b690f9
TH
2259 st->bucket = 0;
2260 st->offset = 0;
1da177e4
LT
2261 rc = established_get_first(seq);
2262 }
2263 break;
2264 case TCP_SEQ_STATE_ESTABLISHED:
1da177e4
LT
2265 rc = established_get_next(seq, v);
2266 break;
2267 }
2268out:
2269 ++*pos;
a8b690f9 2270 st->last_pos = *pos;
1da177e4
LT
2271 return rc;
2272}
2273
2274static void tcp_seq_stop(struct seq_file *seq, void *v)
2275{
5799de0b 2276 struct tcp_iter_state *st = seq->private;
1da177e4
LT
2277
2278 switch (st->state) {
2279 case TCP_SEQ_STATE_OPENREQ:
2280 if (v) {
463c84b9
ACM
2281 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2282 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4
LT
2283 }
2284 case TCP_SEQ_STATE_LISTENING:
2285 if (v != SEQ_START_TOKEN)
5caea4ea 2286 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
1da177e4 2287 break;
1da177e4
LT
2288 case TCP_SEQ_STATE_ESTABLISHED:
2289 if (v)
9db66bdc 2290 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2291 break;
2292 }
2293}
2294
73cb88ec 2295int tcp_seq_open(struct inode *inode, struct file *file)
1da177e4 2296{
d9dda78b 2297 struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
1da177e4 2298 struct tcp_iter_state *s;
52d6f3f1 2299 int err;
1da177e4 2300
52d6f3f1
DL
2301 err = seq_open_net(inode, file, &afinfo->seq_ops,
2302 sizeof(struct tcp_iter_state));
2303 if (err < 0)
2304 return err;
f40c8174 2305
52d6f3f1 2306 s = ((struct seq_file *)file->private_data)->private;
1da177e4 2307 s->family = afinfo->family;
a8b690f9 2308 s->last_pos = 0;
f40c8174
DL
2309 return 0;
2310}
73cb88ec 2311EXPORT_SYMBOL(tcp_seq_open);
f40c8174 2312
6f8b13bc 2313int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4
LT
2314{
2315 int rc = 0;
2316 struct proc_dir_entry *p;
2317
9427c4b3
DL
2318 afinfo->seq_ops.start = tcp_seq_start;
2319 afinfo->seq_ops.next = tcp_seq_next;
2320 afinfo->seq_ops.stop = tcp_seq_stop;
2321
84841c3c 2322 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
73cb88ec 2323 afinfo->seq_fops, afinfo);
84841c3c 2324 if (!p)
1da177e4
LT
2325 rc = -ENOMEM;
2326 return rc;
2327}
4bc2f18b 2328EXPORT_SYMBOL(tcp_proc_register);
1da177e4 2329
6f8b13bc 2330void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4 2331{
ece31ffd 2332 remove_proc_entry(afinfo->name, net->proc_net);
1da177e4 2333}
4bc2f18b 2334EXPORT_SYMBOL(tcp_proc_unregister);
1da177e4 2335
cf533ea5 2336static void get_openreq4(const struct sock *sk, const struct request_sock *req,
652586df 2337 struct seq_file *f, int i, kuid_t uid)
1da177e4 2338{
2e6599cb 2339 const struct inet_request_sock *ireq = inet_rsk(req);
a399a805 2340 long delta = req->expires - jiffies;
1da177e4 2341
5e659e4c 2342 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
652586df 2343 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
1da177e4 2344 i,
634fb979 2345 ireq->ir_loc_addr,
c720c7e8 2346 ntohs(inet_sk(sk)->inet_sport),
634fb979
ED
2347 ireq->ir_rmt_addr,
2348 ntohs(ireq->ir_rmt_port),
1da177e4
LT
2349 TCP_SYN_RECV,
2350 0, 0, /* could print option size, but that is af dependent. */
2351 1, /* timers active (only the expire timer) */
a399a805 2352 jiffies_delta_to_clock_t(delta),
e6c022a4 2353 req->num_timeout,
a7cb5a49 2354 from_kuid_munged(seq_user_ns(f), uid),
1da177e4
LT
2355 0, /* non standard timer */
2356 0, /* open_requests have no inode */
2357 atomic_read(&sk->sk_refcnt),
652586df 2358 req);
1da177e4
LT
2359}
2360
652586df 2361static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
1da177e4
LT
2362{
2363 int timer_active;
2364 unsigned long timer_expires;
cf533ea5 2365 const struct tcp_sock *tp = tcp_sk(sk);
cf4c6bf8 2366 const struct inet_connection_sock *icsk = inet_csk(sk);
cf533ea5 2367 const struct inet_sock *inet = inet_sk(sk);
168a8f58 2368 struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
c720c7e8
ED
2369 __be32 dest = inet->inet_daddr;
2370 __be32 src = inet->inet_rcv_saddr;
2371 __u16 destp = ntohs(inet->inet_dport);
2372 __u16 srcp = ntohs(inet->inet_sport);
49d09007 2373 int rx_queue;
1da177e4 2374
6ba8a3b1
ND
2375 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2376 icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
2377 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1da177e4 2378 timer_active = 1;
463c84b9
ACM
2379 timer_expires = icsk->icsk_timeout;
2380 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1da177e4 2381 timer_active = 4;
463c84b9 2382 timer_expires = icsk->icsk_timeout;
cf4c6bf8 2383 } else if (timer_pending(&sk->sk_timer)) {
1da177e4 2384 timer_active = 2;
cf4c6bf8 2385 timer_expires = sk->sk_timer.expires;
1da177e4
LT
2386 } else {
2387 timer_active = 0;
2388 timer_expires = jiffies;
2389 }
2390
49d09007
ED
2391 if (sk->sk_state == TCP_LISTEN)
2392 rx_queue = sk->sk_ack_backlog;
2393 else
2394 /*
2395 * because we dont lock socket, we might find a transient negative value
2396 */
2397 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2398
5e659e4c 2399 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
652586df 2400 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
cf4c6bf8 2401 i, src, srcp, dest, destp, sk->sk_state,
47da8ee6 2402 tp->write_seq - tp->snd_una,
49d09007 2403 rx_queue,
1da177e4 2404 timer_active,
a399a805 2405 jiffies_delta_to_clock_t(timer_expires - jiffies),
463c84b9 2406 icsk->icsk_retransmits,
a7cb5a49 2407 from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
6687e988 2408 icsk->icsk_probes_out,
cf4c6bf8
IJ
2409 sock_i_ino(sk),
2410 atomic_read(&sk->sk_refcnt), sk,
7be87351
SH
2411 jiffies_to_clock_t(icsk->icsk_rto),
2412 jiffies_to_clock_t(icsk->icsk_ack.ato),
463c84b9 2413 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1da177e4 2414 tp->snd_cwnd,
168a8f58
JC
2415 sk->sk_state == TCP_LISTEN ?
2416 (fastopenq ? fastopenq->max_qlen : 0) :
652586df 2417 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
1da177e4
LT
2418}
2419
cf533ea5 2420static void get_timewait4_sock(const struct inet_timewait_sock *tw,
652586df 2421 struct seq_file *f, int i)
1da177e4 2422{
23f33c2d 2423 __be32 dest, src;
1da177e4 2424 __u16 destp, srcp;
e2a1d3e4 2425 s32 delta = tw->tw_ttd - inet_tw_time_stamp();
1da177e4
LT
2426
2427 dest = tw->tw_daddr;
2428 src = tw->tw_rcv_saddr;
2429 destp = ntohs(tw->tw_dport);
2430 srcp = ntohs(tw->tw_sport);
2431
5e659e4c 2432 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
652586df 2433 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
1da177e4 2434 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
a399a805 2435 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
652586df 2436 atomic_read(&tw->tw_refcnt), tw);
1da177e4
LT
2437}
2438
2439#define TMPSZ 150
2440
2441static int tcp4_seq_show(struct seq_file *seq, void *v)
2442{
5799de0b 2443 struct tcp_iter_state *st;
05dbc7b5 2444 struct sock *sk = v;
1da177e4 2445
652586df 2446 seq_setwidth(seq, TMPSZ - 1);
1da177e4 2447 if (v == SEQ_START_TOKEN) {
652586df 2448 seq_puts(seq, " sl local_address rem_address st tx_queue "
1da177e4
LT
2449 "rx_queue tr tm->when retrnsmt uid timeout "
2450 "inode");
2451 goto out;
2452 }
2453 st = seq->private;
2454
2455 switch (st->state) {
2456 case TCP_SEQ_STATE_LISTENING:
2457 case TCP_SEQ_STATE_ESTABLISHED:
05dbc7b5 2458 if (sk->sk_state == TCP_TIME_WAIT)
652586df 2459 get_timewait4_sock(v, seq, st->num);
05dbc7b5 2460 else
652586df 2461 get_tcp4_sock(v, seq, st->num);
1da177e4
LT
2462 break;
2463 case TCP_SEQ_STATE_OPENREQ:
652586df 2464 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid);
1da177e4
LT
2465 break;
2466 }
1da177e4 2467out:
652586df 2468 seq_pad(seq, '\n');
1da177e4
LT
2469 return 0;
2470}
2471
73cb88ec
AV
2472static const struct file_operations tcp_afinfo_seq_fops = {
2473 .owner = THIS_MODULE,
2474 .open = tcp_seq_open,
2475 .read = seq_read,
2476 .llseek = seq_lseek,
2477 .release = seq_release_net
2478};
2479
1da177e4 2480static struct tcp_seq_afinfo tcp4_seq_afinfo = {
1da177e4
LT
2481 .name = "tcp",
2482 .family = AF_INET,
73cb88ec 2483 .seq_fops = &tcp_afinfo_seq_fops,
9427c4b3
DL
2484 .seq_ops = {
2485 .show = tcp4_seq_show,
2486 },
1da177e4
LT
2487};
2488
2c8c1e72 2489static int __net_init tcp4_proc_init_net(struct net *net)
757764f6
PE
2490{
2491 return tcp_proc_register(net, &tcp4_seq_afinfo);
2492}
2493
2c8c1e72 2494static void __net_exit tcp4_proc_exit_net(struct net *net)
757764f6
PE
2495{
2496 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2497}
2498
2499static struct pernet_operations tcp4_net_ops = {
2500 .init = tcp4_proc_init_net,
2501 .exit = tcp4_proc_exit_net,
2502};
2503
1da177e4
LT
2504int __init tcp4_proc_init(void)
2505{
757764f6 2506 return register_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2507}
2508
2509void tcp4_proc_exit(void)
2510{
757764f6 2511 unregister_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2512}
2513#endif /* CONFIG_PROC_FS */
2514
2515struct proto tcp_prot = {
2516 .name = "TCP",
2517 .owner = THIS_MODULE,
2518 .close = tcp_close,
2519 .connect = tcp_v4_connect,
2520 .disconnect = tcp_disconnect,
463c84b9 2521 .accept = inet_csk_accept,
1da177e4
LT
2522 .ioctl = tcp_ioctl,
2523 .init = tcp_v4_init_sock,
2524 .destroy = tcp_v4_destroy_sock,
2525 .shutdown = tcp_shutdown,
2526 .setsockopt = tcp_setsockopt,
2527 .getsockopt = tcp_getsockopt,
1da177e4 2528 .recvmsg = tcp_recvmsg,
7ba42910
CG
2529 .sendmsg = tcp_sendmsg,
2530 .sendpage = tcp_sendpage,
1da177e4 2531 .backlog_rcv = tcp_v4_do_rcv,
46d3ceab 2532 .release_cb = tcp_release_cb,
563d34d0 2533 .mtu_reduced = tcp_v4_mtu_reduced,
ab1e0a13
ACM
2534 .hash = inet_hash,
2535 .unhash = inet_unhash,
2536 .get_port = inet_csk_get_port,
1da177e4 2537 .enter_memory_pressure = tcp_enter_memory_pressure,
c9bee3b7 2538 .stream_memory_free = tcp_stream_memory_free,
1da177e4 2539 .sockets_allocated = &tcp_sockets_allocated,
0a5578cf 2540 .orphan_count = &tcp_orphan_count,
1da177e4
LT
2541 .memory_allocated = &tcp_memory_allocated,
2542 .memory_pressure = &tcp_memory_pressure,
a4fe34bf 2543 .sysctl_mem = sysctl_tcp_mem,
1da177e4
LT
2544 .sysctl_wmem = sysctl_tcp_wmem,
2545 .sysctl_rmem = sysctl_tcp_rmem,
2546 .max_header = MAX_TCP_HEADER,
2547 .obj_size = sizeof(struct tcp_sock),
3ab5aee7 2548 .slab_flags = SLAB_DESTROY_BY_RCU,
6d6ee43e 2549 .twsk_prot = &tcp_timewait_sock_ops,
60236fdd 2550 .rsk_prot = &tcp_request_sock_ops,
39d8cda7 2551 .h.hashinfo = &tcp_hashinfo,
7ba42910 2552 .no_autobind = true,
543d9cfe
ACM
2553#ifdef CONFIG_COMPAT
2554 .compat_setsockopt = compat_tcp_setsockopt,
2555 .compat_getsockopt = compat_tcp_getsockopt,
2556#endif
c255a458 2557#ifdef CONFIG_MEMCG_KMEM
d1a4c0b3
GC
2558 .init_cgroup = tcp_init_cgroup,
2559 .destroy_cgroup = tcp_destroy_cgroup,
2560 .proto_cgroup = tcp_proto_cgroup,
2561#endif
1da177e4 2562};
4bc2f18b 2563EXPORT_SYMBOL(tcp_prot);
1da177e4 2564
046ee902
DL
2565static int __net_init tcp_sk_init(struct net *net)
2566{
5d134f1c 2567 net->ipv4.sysctl_tcp_ecn = 2;
be9f4a44 2568 return 0;
046ee902
DL
2569}
2570
2571static void __net_exit tcp_sk_exit(struct net *net)
2572{
b099ce26
EB
2573}
2574
2575static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2576{
2577 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
046ee902
DL
2578}
2579
2580static struct pernet_operations __net_initdata tcp_sk_ops = {
b099ce26
EB
2581 .init = tcp_sk_init,
2582 .exit = tcp_sk_exit,
2583 .exit_batch = tcp_sk_exit_batch,
046ee902
DL
2584};
2585
9b0f976f 2586void __init tcp_v4_init(void)
1da177e4 2587{
5caea4ea 2588 inet_hashinfo_init(&tcp_hashinfo);
6a1b3054 2589 if (register_pernet_subsys(&tcp_sk_ops))
1da177e4 2590 panic("Failed to create the TCP control socket.\n");
1da177e4 2591}