tcp: add queue_add_hash to tcp_request_sock_ops
[linux-2.6-block.git] / net / ipv4 / tcp_ipv4.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
1da177e4
LT
8 * IPv4 specific functions
9 *
10 *
11 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24/*
25 * Changes:
26 * David S. Miller : New socket lookup architecture.
27 * This code is dedicated to John Dyson.
28 * David S. Miller : Change semantics of established hash,
29 * half is devoted to TIME_WAIT sockets
30 * and the rest go in the other half.
31 * Andi Kleen : Add support for syncookies and fixed
32 * some bugs: ip options weren't passed to
33 * the TCP layer, missed a check for an
34 * ACK bit.
35 * Andi Kleen : Implemented fast path mtu discovery.
36 * Fixed many serious bugs in the
60236fdd 37 * request_sock handling and moved
1da177e4
LT
38 * most of it into the af independent code.
39 * Added tail drop and some other bugfixes.
caa20d9a 40 * Added new listen semantics.
1da177e4
LT
41 * Mike McLagan : Routing by source
42 * Juan Jose Ciarlante: ip_dynaddr bits
43 * Andi Kleen: various fixes.
44 * Vitaly E. Lavrov : Transparent proxy revived after year
45 * coma.
46 * Andi Kleen : Fix new listen.
47 * Andi Kleen : Fix accept error reporting.
48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
50 * a single port at the same time.
51 */
52
afd46503 53#define pr_fmt(fmt) "TCP: " fmt
1da177e4 54
eb4dea58 55#include <linux/bottom_half.h>
1da177e4
LT
56#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
5a0e3ad6 64#include <linux/slab.h>
1da177e4 65
457c4cbc 66#include <net/net_namespace.h>
1da177e4 67#include <net/icmp.h>
304a1618 68#include <net/inet_hashtables.h>
1da177e4 69#include <net/tcp.h>
20380731 70#include <net/transp_v6.h>
1da177e4
LT
71#include <net/ipv6.h>
72#include <net/inet_common.h>
6d6ee43e 73#include <net/timewait_sock.h>
1da177e4 74#include <net/xfrm.h>
1a2449a8 75#include <net/netdma.h>
6e5714ea 76#include <net/secure_seq.h>
d1a4c0b3 77#include <net/tcp_memcontrol.h>
076bb0c8 78#include <net/busy_poll.h>
1da177e4
LT
79
80#include <linux/inet.h>
81#include <linux/ipv6.h>
82#include <linux/stddef.h>
83#include <linux/proc_fs.h>
84#include <linux/seq_file.h>
85
cfb6eeb4
YH
86#include <linux/crypto.h>
87#include <linux/scatterlist.h>
88
ab32ea5d
BH
89int sysctl_tcp_tw_reuse __read_mostly;
90int sysctl_tcp_low_latency __read_mostly;
4bc2f18b 91EXPORT_SYMBOL(sysctl_tcp_low_latency);
1da177e4 92
1da177e4 93
cfb6eeb4 94#ifdef CONFIG_TCP_MD5SIG
a915da9b 95static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 96 __be32 daddr, __be32 saddr, const struct tcphdr *th);
cfb6eeb4
YH
97#endif
98
5caea4ea 99struct inet_hashinfo tcp_hashinfo;
4bc2f18b 100EXPORT_SYMBOL(tcp_hashinfo);
1da177e4 101
936b8bdb 102static __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
1da177e4 103{
eddc9ec5
ACM
104 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
105 ip_hdr(skb)->saddr,
aa8223c7
ACM
106 tcp_hdr(skb)->dest,
107 tcp_hdr(skb)->source);
1da177e4
LT
108}
109
6d6ee43e
ACM
110int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
111{
112 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
113 struct tcp_sock *tp = tcp_sk(sk);
114
115 /* With PAWS, it is safe from the viewpoint
116 of data integrity. Even without PAWS it is safe provided sequence
117 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
118
119 Actually, the idea is close to VJ's one, only timestamp cache is
120 held not per host, but per port pair and TW bucket is used as state
121 holder.
122
123 If TW bucket has been already destroyed we fall back to VJ's scheme
124 and use initial timestamp retrieved from peer table.
125 */
126 if (tcptw->tw_ts_recent_stamp &&
127 (twp == NULL || (sysctl_tcp_tw_reuse &&
9d729f72 128 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
6d6ee43e
ACM
129 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
130 if (tp->write_seq == 0)
131 tp->write_seq = 1;
132 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
133 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
134 sock_hold(sktw);
135 return 1;
136 }
137
138 return 0;
139}
6d6ee43e
ACM
140EXPORT_SYMBOL_GPL(tcp_twsk_unique);
141
1da177e4
LT
142/* This will initiate an outgoing connection. */
143int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
144{
2d7192d6 145 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
1da177e4
LT
146 struct inet_sock *inet = inet_sk(sk);
147 struct tcp_sock *tp = tcp_sk(sk);
dca8b089 148 __be16 orig_sport, orig_dport;
bada8adc 149 __be32 daddr, nexthop;
da905bd1 150 struct flowi4 *fl4;
2d7192d6 151 struct rtable *rt;
1da177e4 152 int err;
f6d8bd05 153 struct ip_options_rcu *inet_opt;
1da177e4
LT
154
155 if (addr_len < sizeof(struct sockaddr_in))
156 return -EINVAL;
157
158 if (usin->sin_family != AF_INET)
159 return -EAFNOSUPPORT;
160
161 nexthop = daddr = usin->sin_addr.s_addr;
f6d8bd05
ED
162 inet_opt = rcu_dereference_protected(inet->inet_opt,
163 sock_owned_by_user(sk));
164 if (inet_opt && inet_opt->opt.srr) {
1da177e4
LT
165 if (!daddr)
166 return -EINVAL;
f6d8bd05 167 nexthop = inet_opt->opt.faddr;
1da177e4
LT
168 }
169
dca8b089
DM
170 orig_sport = inet->inet_sport;
171 orig_dport = usin->sin_port;
da905bd1
DM
172 fl4 = &inet->cork.fl.u.ip4;
173 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
b23dd4fe
DM
174 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
175 IPPROTO_TCP,
0e0d44ab 176 orig_sport, orig_dport, sk);
b23dd4fe
DM
177 if (IS_ERR(rt)) {
178 err = PTR_ERR(rt);
179 if (err == -ENETUNREACH)
f1d8cba6 180 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
b23dd4fe 181 return err;
584bdf8c 182 }
1da177e4
LT
183
184 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
185 ip_rt_put(rt);
186 return -ENETUNREACH;
187 }
188
f6d8bd05 189 if (!inet_opt || !inet_opt->opt.srr)
da905bd1 190 daddr = fl4->daddr;
1da177e4 191
c720c7e8 192 if (!inet->inet_saddr)
da905bd1 193 inet->inet_saddr = fl4->saddr;
c720c7e8 194 inet->inet_rcv_saddr = inet->inet_saddr;
1da177e4 195
c720c7e8 196 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
1da177e4
LT
197 /* Reset inherited state */
198 tp->rx_opt.ts_recent = 0;
199 tp->rx_opt.ts_recent_stamp = 0;
ee995283
PE
200 if (likely(!tp->repair))
201 tp->write_seq = 0;
1da177e4
LT
202 }
203
295ff7ed 204 if (tcp_death_row.sysctl_tw_recycle &&
81166dd6
DM
205 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
206 tcp_fetch_timewait_stamp(sk, &rt->dst);
1da177e4 207
c720c7e8
ED
208 inet->inet_dport = usin->sin_port;
209 inet->inet_daddr = daddr;
1da177e4 210
d83d8461 211 inet_csk(sk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
212 if (inet_opt)
213 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1da177e4 214
bee7ca9e 215 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
1da177e4
LT
216
217 /* Socket identity is still unknown (sport may be zero).
218 * However we set state to SYN-SENT and not releasing socket
219 * lock select source port, enter ourselves into the hash tables and
220 * complete initialization after this.
221 */
222 tcp_set_state(sk, TCP_SYN_SENT);
a7f5e7f1 223 err = inet_hash_connect(&tcp_death_row, sk);
1da177e4
LT
224 if (err)
225 goto failure;
226
da905bd1 227 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
b23dd4fe
DM
228 inet->inet_sport, inet->inet_dport, sk);
229 if (IS_ERR(rt)) {
230 err = PTR_ERR(rt);
231 rt = NULL;
1da177e4 232 goto failure;
b23dd4fe 233 }
1da177e4 234 /* OK, now commit destination to socket. */
bcd76111 235 sk->sk_gso_type = SKB_GSO_TCPV4;
d8d1f30b 236 sk_setup_caps(sk, &rt->dst);
1da177e4 237
ee995283 238 if (!tp->write_seq && likely(!tp->repair))
c720c7e8
ED
239 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
240 inet->inet_daddr,
241 inet->inet_sport,
1da177e4
LT
242 usin->sin_port);
243
c720c7e8 244 inet->inet_id = tp->write_seq ^ jiffies;
1da177e4 245
2b916477 246 err = tcp_connect(sk);
ee995283 247
1da177e4
LT
248 rt = NULL;
249 if (err)
250 goto failure;
251
252 return 0;
253
254failure:
7174259e
ACM
255 /*
256 * This unhashes the socket and releases the local port,
257 * if necessary.
258 */
1da177e4
LT
259 tcp_set_state(sk, TCP_CLOSE);
260 ip_rt_put(rt);
261 sk->sk_route_caps = 0;
c720c7e8 262 inet->inet_dport = 0;
1da177e4
LT
263 return err;
264}
4bc2f18b 265EXPORT_SYMBOL(tcp_v4_connect);
1da177e4 266
1da177e4 267/*
563d34d0
ED
268 * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
269 * It can be called through tcp_release_cb() if socket was owned by user
270 * at the time tcp_v4_err() was called to handle ICMP message.
1da177e4 271 */
563d34d0 272static void tcp_v4_mtu_reduced(struct sock *sk)
1da177e4
LT
273{
274 struct dst_entry *dst;
275 struct inet_sock *inet = inet_sk(sk);
563d34d0 276 u32 mtu = tcp_sk(sk)->mtu_info;
1da177e4 277
80d0a69f
DM
278 dst = inet_csk_update_pmtu(sk, mtu);
279 if (!dst)
1da177e4
LT
280 return;
281
1da177e4
LT
282 /* Something is about to be wrong... Remember soft error
283 * for the case, if this connection will not able to recover.
284 */
285 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
286 sk->sk_err_soft = EMSGSIZE;
287
288 mtu = dst_mtu(dst);
289
290 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
482fc609 291 ip_sk_accept_pmtu(sk) &&
d83d8461 292 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
1da177e4
LT
293 tcp_sync_mss(sk, mtu);
294
295 /* Resend the TCP packet because it's
296 * clear that the old packet has been
297 * dropped. This is the new "fast" path mtu
298 * discovery.
299 */
300 tcp_simple_retransmit(sk);
301 } /* else let the usual retransmit timer handle it */
302}
303
55be7a9c
DM
304static void do_redirect(struct sk_buff *skb, struct sock *sk)
305{
306 struct dst_entry *dst = __sk_dst_check(sk, 0);
307
1ed5c48f 308 if (dst)
6700c270 309 dst->ops->redirect(dst, sk, skb);
55be7a9c
DM
310}
311
1da177e4
LT
312/*
313 * This routine is called by the ICMP module when it gets some
314 * sort of error condition. If err < 0 then the socket should
315 * be closed and the error returned to the user. If err > 0
316 * it's just the icmp type << 8 | icmp code. After adjustment
317 * header points to the first 8 bytes of the tcp header. We need
318 * to find the appropriate port.
319 *
320 * The locking strategy used here is very "optimistic". When
321 * someone else accesses the socket the ICMP is just dropped
322 * and for some paths there is no check at all.
323 * A more general error queue to queue errors for later handling
324 * is probably better.
325 *
326 */
327
4d1a2d9e 328void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
1da177e4 329{
b71d1d42 330 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
4d1a2d9e 331 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
f1ecd5d9 332 struct inet_connection_sock *icsk;
1da177e4
LT
333 struct tcp_sock *tp;
334 struct inet_sock *inet;
4d1a2d9e
DL
335 const int type = icmp_hdr(icmp_skb)->type;
336 const int code = icmp_hdr(icmp_skb)->code;
1da177e4 337 struct sock *sk;
f1ecd5d9 338 struct sk_buff *skb;
0a672f74
YC
339 struct request_sock *fastopen;
340 __u32 seq, snd_una;
f1ecd5d9 341 __u32 remaining;
1da177e4 342 int err;
4d1a2d9e 343 struct net *net = dev_net(icmp_skb->dev);
1da177e4 344
4d1a2d9e 345 if (icmp_skb->len < (iph->ihl << 2) + 8) {
dcfc23ca 346 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
347 return;
348 }
349
fd54d716 350 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
4d1a2d9e 351 iph->saddr, th->source, inet_iif(icmp_skb));
1da177e4 352 if (!sk) {
dcfc23ca 353 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
354 return;
355 }
356 if (sk->sk_state == TCP_TIME_WAIT) {
9469c7b4 357 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
358 return;
359 }
360
361 bh_lock_sock(sk);
362 /* If too many ICMPs get dropped on busy
363 * servers this needs to be solved differently.
563d34d0
ED
364 * We do take care of PMTU discovery (RFC1191) special case :
365 * we can receive locally generated ICMP messages while socket is held.
1da177e4 366 */
b74aa930
ED
367 if (sock_owned_by_user(sk)) {
368 if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
369 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
370 }
1da177e4
LT
371 if (sk->sk_state == TCP_CLOSE)
372 goto out;
373
97e3ecd1 374 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
375 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
376 goto out;
377 }
378
f1ecd5d9 379 icsk = inet_csk(sk);
1da177e4
LT
380 tp = tcp_sk(sk);
381 seq = ntohl(th->seq);
0a672f74
YC
382 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
383 fastopen = tp->fastopen_rsk;
384 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
1da177e4 385 if (sk->sk_state != TCP_LISTEN &&
0a672f74 386 !between(seq, snd_una, tp->snd_nxt)) {
de0744af 387 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
388 goto out;
389 }
390
391 switch (type) {
55be7a9c
DM
392 case ICMP_REDIRECT:
393 do_redirect(icmp_skb, sk);
394 goto out;
1da177e4
LT
395 case ICMP_SOURCE_QUENCH:
396 /* Just silently ignore these. */
397 goto out;
398 case ICMP_PARAMETERPROB:
399 err = EPROTO;
400 break;
401 case ICMP_DEST_UNREACH:
402 if (code > NR_ICMP_UNREACH)
403 goto out;
404
405 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
0d4f0608
ED
406 /* We are not interested in TCP_LISTEN and open_requests
407 * (SYN-ACKs send out by Linux are always <576bytes so
408 * they should go through unfragmented).
409 */
410 if (sk->sk_state == TCP_LISTEN)
411 goto out;
412
563d34d0 413 tp->mtu_info = info;
144d56e9 414 if (!sock_owned_by_user(sk)) {
563d34d0 415 tcp_v4_mtu_reduced(sk);
144d56e9
ED
416 } else {
417 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
418 sock_hold(sk);
419 }
1da177e4
LT
420 goto out;
421 }
422
423 err = icmp_err_convert[code].errno;
f1ecd5d9
DL
424 /* check if icmp_skb allows revert of backoff
425 * (see draft-zimmermann-tcp-lcd) */
426 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
427 break;
428 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
0a672f74 429 !icsk->icsk_backoff || fastopen)
f1ecd5d9
DL
430 break;
431
8f49c270
DM
432 if (sock_owned_by_user(sk))
433 break;
434
f1ecd5d9 435 icsk->icsk_backoff--;
740b0f18 436 inet_csk(sk)->icsk_rto = (tp->srtt_us ? __tcp_set_rto(tp) :
9ad7c049 437 TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
f1ecd5d9
DL
438 tcp_bound_rto(sk);
439
440 skb = tcp_write_queue_head(sk);
441 BUG_ON(!skb);
442
443 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
444 tcp_time_stamp - TCP_SKB_CB(skb)->when);
445
446 if (remaining) {
447 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
448 remaining, TCP_RTO_MAX);
f1ecd5d9
DL
449 } else {
450 /* RTO revert clocked out retransmission.
451 * Will retransmit now */
452 tcp_retransmit_timer(sk);
453 }
454
1da177e4
LT
455 break;
456 case ICMP_TIME_EXCEEDED:
457 err = EHOSTUNREACH;
458 break;
459 default:
460 goto out;
461 }
462
463 switch (sk->sk_state) {
60236fdd 464 struct request_sock *req, **prev;
1da177e4
LT
465 case TCP_LISTEN:
466 if (sock_owned_by_user(sk))
467 goto out;
468
463c84b9
ACM
469 req = inet_csk_search_req(sk, &prev, th->dest,
470 iph->daddr, iph->saddr);
1da177e4
LT
471 if (!req)
472 goto out;
473
474 /* ICMPs are not backlogged, hence we cannot get
475 an established socket here.
476 */
547b792c 477 WARN_ON(req->sk);
1da177e4 478
2e6599cb 479 if (seq != tcp_rsk(req)->snt_isn) {
de0744af 480 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
481 goto out;
482 }
483
484 /*
485 * Still in SYN_RECV, just remove it silently.
486 * There is no good way to pass the error to the newly
487 * created socket, and POSIX does not want network
488 * errors returned from accept().
489 */
463c84b9 490 inet_csk_reqsk_queue_drop(sk, req, prev);
848bf15f 491 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4
LT
492 goto out;
493
494 case TCP_SYN_SENT:
0a672f74
YC
495 case TCP_SYN_RECV:
496 /* Only in fast or simultaneous open. If a fast open socket is
497 * is already accepted it is treated as a connected one below.
498 */
499 if (fastopen && fastopen->sk == NULL)
500 break;
501
1da177e4 502 if (!sock_owned_by_user(sk)) {
1da177e4
LT
503 sk->sk_err = err;
504
505 sk->sk_error_report(sk);
506
507 tcp_done(sk);
508 } else {
509 sk->sk_err_soft = err;
510 }
511 goto out;
512 }
513
514 /* If we've already connected we will keep trying
515 * until we time out, or the user gives up.
516 *
517 * rfc1122 4.2.3.9 allows to consider as hard errors
518 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
519 * but it is obsoleted by pmtu discovery).
520 *
521 * Note, that in modern internet, where routing is unreliable
522 * and in each dark corner broken firewalls sit, sending random
523 * errors ordered by their masters even this two messages finally lose
524 * their original sense (even Linux sends invalid PORT_UNREACHs)
525 *
526 * Now we are in compliance with RFCs.
527 * --ANK (980905)
528 */
529
530 inet = inet_sk(sk);
531 if (!sock_owned_by_user(sk) && inet->recverr) {
532 sk->sk_err = err;
533 sk->sk_error_report(sk);
534 } else { /* Only an error on timeout */
535 sk->sk_err_soft = err;
536 }
537
538out:
539 bh_unlock_sock(sk);
540 sock_put(sk);
541}
542
28850dc7 543void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1da177e4 544{
aa8223c7 545 struct tcphdr *th = tcp_hdr(skb);
1da177e4 546
84fa7933 547 if (skb->ip_summed == CHECKSUM_PARTIAL) {
419f9f89 548 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
663ead3b 549 skb->csum_start = skb_transport_header(skb) - skb->head;
ff1dcadb 550 skb->csum_offset = offsetof(struct tcphdr, check);
1da177e4 551 } else {
419f9f89 552 th->check = tcp_v4_check(skb->len, saddr, daddr,
07f0757a 553 csum_partial(th,
1da177e4
LT
554 th->doff << 2,
555 skb->csum));
556 }
557}
558
419f9f89 559/* This routine computes an IPv4 TCP checksum. */
bb296246 560void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
419f9f89 561{
cf533ea5 562 const struct inet_sock *inet = inet_sk(sk);
419f9f89
HX
563
564 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
565}
4bc2f18b 566EXPORT_SYMBOL(tcp_v4_send_check);
419f9f89 567
1da177e4
LT
568/*
569 * This routine will send an RST to the other tcp.
570 *
571 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
572 * for reset.
573 * Answer: if a packet caused RST, it is not for a socket
574 * existing in our system, if it is matched to a socket,
575 * it is just duplicate segment or bug in other side's TCP.
576 * So that we build reply only basing on parameters
577 * arrived with segment.
578 * Exception: precedence violation. We do not implement it in any case.
579 */
580
cfb6eeb4 581static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
1da177e4 582{
cf533ea5 583 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
584 struct {
585 struct tcphdr th;
586#ifdef CONFIG_TCP_MD5SIG
714e85be 587 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
cfb6eeb4
YH
588#endif
589 } rep;
1da177e4 590 struct ip_reply_arg arg;
cfb6eeb4
YH
591#ifdef CONFIG_TCP_MD5SIG
592 struct tcp_md5sig_key *key;
658ddaaf
SL
593 const __u8 *hash_location = NULL;
594 unsigned char newhash[16];
595 int genhash;
596 struct sock *sk1 = NULL;
cfb6eeb4 597#endif
a86b1e30 598 struct net *net;
1da177e4
LT
599
600 /* Never send a reset in response to a reset. */
601 if (th->rst)
602 return;
603
511c3f92 604 if (skb_rtable(skb)->rt_type != RTN_LOCAL)
1da177e4
LT
605 return;
606
607 /* Swap the send and the receive. */
cfb6eeb4
YH
608 memset(&rep, 0, sizeof(rep));
609 rep.th.dest = th->source;
610 rep.th.source = th->dest;
611 rep.th.doff = sizeof(struct tcphdr) / 4;
612 rep.th.rst = 1;
1da177e4
LT
613
614 if (th->ack) {
cfb6eeb4 615 rep.th.seq = th->ack_seq;
1da177e4 616 } else {
cfb6eeb4
YH
617 rep.th.ack = 1;
618 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
619 skb->len - (th->doff << 2));
1da177e4
LT
620 }
621
7174259e 622 memset(&arg, 0, sizeof(arg));
cfb6eeb4
YH
623 arg.iov[0].iov_base = (unsigned char *)&rep;
624 arg.iov[0].iov_len = sizeof(rep.th);
625
626#ifdef CONFIG_TCP_MD5SIG
658ddaaf
SL
627 hash_location = tcp_parse_md5sig_option(th);
628 if (!sk && hash_location) {
629 /*
630 * active side is lost. Try to find listening socket through
631 * source port, and then find md5 key through listening socket.
632 * we are not loose security here:
633 * Incoming packet is checked with md5 hash with finding key,
634 * no RST generated if md5 hash doesn't match.
635 */
636 sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
da5e3630
TH
637 &tcp_hashinfo, ip_hdr(skb)->saddr,
638 th->source, ip_hdr(skb)->daddr,
658ddaaf
SL
639 ntohs(th->source), inet_iif(skb));
640 /* don't send rst if it can't find key */
641 if (!sk1)
642 return;
643 rcu_read_lock();
644 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
645 &ip_hdr(skb)->saddr, AF_INET);
646 if (!key)
647 goto release_sk1;
648
649 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
650 if (genhash || memcmp(hash_location, newhash, 16) != 0)
651 goto release_sk1;
652 } else {
653 key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
654 &ip_hdr(skb)->saddr,
655 AF_INET) : NULL;
656 }
657
cfb6eeb4
YH
658 if (key) {
659 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
660 (TCPOPT_NOP << 16) |
661 (TCPOPT_MD5SIG << 8) |
662 TCPOLEN_MD5SIG);
663 /* Update length and the length the header thinks exists */
664 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
665 rep.th.doff = arg.iov[0].iov_len / 4;
666
49a72dfb 667 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
78e645cb
IJ
668 key, ip_hdr(skb)->saddr,
669 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
670 }
671#endif
eddc9ec5
ACM
672 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
673 ip_hdr(skb)->saddr, /* XXX */
52cd5750 674 arg.iov[0].iov_len, IPPROTO_TCP, 0);
1da177e4 675 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
88ef4a5a 676 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
e2446eaa 677 /* When socket is gone, all binding information is lost.
4c675258
AK
678 * routing might fail in this case. No choice here, if we choose to force
679 * input interface, we will misroute in case of asymmetric route.
e2446eaa 680 */
4c675258
AK
681 if (sk)
682 arg.bound_dev_if = sk->sk_bound_dev_if;
1da177e4 683
adf30907 684 net = dev_net(skb_dst(skb)->dev);
66b13d99 685 arg.tos = ip_hdr(skb)->tos;
be9f4a44 686 ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
70e73416 687 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
1da177e4 688
63231bdd
PE
689 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
690 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
658ddaaf
SL
691
692#ifdef CONFIG_TCP_MD5SIG
693release_sk1:
694 if (sk1) {
695 rcu_read_unlock();
696 sock_put(sk1);
697 }
698#endif
1da177e4
LT
699}
700
701/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
702 outside socket context is ugly, certainly. What can I do?
703 */
704
9501f972 705static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
ee684b6f 706 u32 win, u32 tsval, u32 tsecr, int oif,
88ef4a5a 707 struct tcp_md5sig_key *key,
66b13d99 708 int reply_flags, u8 tos)
1da177e4 709{
cf533ea5 710 const struct tcphdr *th = tcp_hdr(skb);
1da177e4
LT
711 struct {
712 struct tcphdr th;
714e85be 713 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
cfb6eeb4 714#ifdef CONFIG_TCP_MD5SIG
714e85be 715 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
cfb6eeb4
YH
716#endif
717 ];
1da177e4
LT
718 } rep;
719 struct ip_reply_arg arg;
adf30907 720 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4
LT
721
722 memset(&rep.th, 0, sizeof(struct tcphdr));
7174259e 723 memset(&arg, 0, sizeof(arg));
1da177e4
LT
724
725 arg.iov[0].iov_base = (unsigned char *)&rep;
726 arg.iov[0].iov_len = sizeof(rep.th);
ee684b6f 727 if (tsecr) {
cfb6eeb4
YH
728 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
729 (TCPOPT_TIMESTAMP << 8) |
730 TCPOLEN_TIMESTAMP);
ee684b6f
AV
731 rep.opt[1] = htonl(tsval);
732 rep.opt[2] = htonl(tsecr);
cb48cfe8 733 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
1da177e4
LT
734 }
735
736 /* Swap the send and the receive. */
737 rep.th.dest = th->source;
738 rep.th.source = th->dest;
739 rep.th.doff = arg.iov[0].iov_len / 4;
740 rep.th.seq = htonl(seq);
741 rep.th.ack_seq = htonl(ack);
742 rep.th.ack = 1;
743 rep.th.window = htons(win);
744
cfb6eeb4 745#ifdef CONFIG_TCP_MD5SIG
cfb6eeb4 746 if (key) {
ee684b6f 747 int offset = (tsecr) ? 3 : 0;
cfb6eeb4
YH
748
749 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
750 (TCPOPT_NOP << 16) |
751 (TCPOPT_MD5SIG << 8) |
752 TCPOLEN_MD5SIG);
753 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
754 rep.th.doff = arg.iov[0].iov_len/4;
755
49a72dfb 756 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
90b7e112
AL
757 key, ip_hdr(skb)->saddr,
758 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
759 }
760#endif
88ef4a5a 761 arg.flags = reply_flags;
eddc9ec5
ACM
762 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
763 ip_hdr(skb)->saddr, /* XXX */
1da177e4
LT
764 arg.iov[0].iov_len, IPPROTO_TCP, 0);
765 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
9501f972
YH
766 if (oif)
767 arg.bound_dev_if = oif;
66b13d99 768 arg.tos = tos;
be9f4a44 769 ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
70e73416 770 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
1da177e4 771
63231bdd 772 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
1da177e4
LT
773}
774
775static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
776{
8feaf0c0 777 struct inet_timewait_sock *tw = inet_twsk(sk);
cfb6eeb4 778 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1da177e4 779
9501f972 780 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
7174259e 781 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
ee684b6f 782 tcp_time_stamp + tcptw->tw_ts_offset,
9501f972
YH
783 tcptw->tw_ts_recent,
784 tw->tw_bound_dev_if,
88ef4a5a 785 tcp_twsk_md5_key(tcptw),
66b13d99
ED
786 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
787 tw->tw_tos
9501f972 788 );
1da177e4 789
8feaf0c0 790 inet_twsk_put(tw);
1da177e4
LT
791}
792
6edafaaf 793static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
7174259e 794 struct request_sock *req)
1da177e4 795{
168a8f58
JC
796 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
797 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
798 */
799 tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
800 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
801 tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
ee684b6f 802 tcp_time_stamp,
9501f972
YH
803 req->ts_recent,
804 0,
a915da9b
ED
805 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
806 AF_INET),
66b13d99
ED
807 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
808 ip_hdr(skb)->tos);
1da177e4
LT
809}
810
1da177e4 811/*
9bf1d83e 812 * Send a SYN-ACK after having received a SYN.
60236fdd 813 * This still operates on a request_sock only, not on a big
1da177e4
LT
814 * socket.
815 */
72659ecc 816static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
d6274bd8 817 struct flowi *fl,
72659ecc 818 struct request_sock *req,
843f4a55
YC
819 u16 queue_mapping,
820 struct tcp_fastopen_cookie *foc)
1da177e4 821{
2e6599cb 822 const struct inet_request_sock *ireq = inet_rsk(req);
6bd023f3 823 struct flowi4 fl4;
1da177e4 824 int err = -1;
d41db5af 825 struct sk_buff *skb;
1da177e4
LT
826
827 /* First, grab a route. */
ba3f7f04 828 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
fd80eb94 829 return -1;
1da177e4 830
843f4a55 831 skb = tcp_make_synack(sk, dst, req, foc);
1da177e4
LT
832
833 if (skb) {
634fb979 834 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
1da177e4 835
fff32699 836 skb_set_queue_mapping(skb, queue_mapping);
634fb979
ED
837 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
838 ireq->ir_rmt_addr,
2e6599cb 839 ireq->opt);
b9df3cb8 840 err = net_xmit_eval(err);
016818d0
NC
841 if (!tcp_rsk(req)->snt_synack && !err)
842 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1da177e4
LT
843 }
844
1da177e4
LT
845 return err;
846}
847
848/*
60236fdd 849 * IPv4 request_sock destructor.
1da177e4 850 */
60236fdd 851static void tcp_v4_reqsk_destructor(struct request_sock *req)
1da177e4 852{
a51482bd 853 kfree(inet_rsk(req)->opt);
1da177e4
LT
854}
855
946cedcc 856/*
a2a385d6 857 * Return true if a syncookie should be sent
946cedcc 858 */
a2a385d6 859bool tcp_syn_flood_action(struct sock *sk,
946cedcc
ED
860 const struct sk_buff *skb,
861 const char *proto)
1da177e4 862{
946cedcc 863 const char *msg = "Dropping request";
a2a385d6 864 bool want_cookie = false;
946cedcc
ED
865 struct listen_sock *lopt;
866
2a1d4bd4 867#ifdef CONFIG_SYN_COOKIES
946cedcc 868 if (sysctl_tcp_syncookies) {
2a1d4bd4 869 msg = "Sending cookies";
a2a385d6 870 want_cookie = true;
946cedcc
ED
871 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
872 } else
80e40daa 873#endif
946cedcc
ED
874 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
875
876 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
5ad37d5d 877 if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) {
946cedcc 878 lopt->synflood_warned = 1;
afd46503 879 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
946cedcc
ED
880 proto, ntohs(tcp_hdr(skb)->dest), msg);
881 }
882 return want_cookie;
2a1d4bd4 883}
946cedcc 884EXPORT_SYMBOL(tcp_syn_flood_action);
1da177e4
LT
885
886/*
60236fdd 887 * Save and compile IPv4 options into the request_sock if needed.
1da177e4 888 */
5dff747b 889static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
1da177e4 890{
f6d8bd05
ED
891 const struct ip_options *opt = &(IPCB(skb)->opt);
892 struct ip_options_rcu *dopt = NULL;
1da177e4
LT
893
894 if (opt && opt->optlen) {
f6d8bd05
ED
895 int opt_size = sizeof(*dopt) + opt->optlen;
896
1da177e4
LT
897 dopt = kmalloc(opt_size, GFP_ATOMIC);
898 if (dopt) {
f6d8bd05 899 if (ip_options_echo(&dopt->opt, skb)) {
1da177e4
LT
900 kfree(dopt);
901 dopt = NULL;
902 }
903 }
904 }
905 return dopt;
906}
907
cfb6eeb4
YH
908#ifdef CONFIG_TCP_MD5SIG
909/*
910 * RFC2385 MD5 checksumming requires a mapping of
911 * IP address->MD5 Key.
912 * We need to maintain these in the sk structure.
913 */
914
915/* Find the Key structure for an address. */
a915da9b
ED
916struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
917 const union tcp_md5_addr *addr,
918 int family)
cfb6eeb4
YH
919{
920 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 921 struct tcp_md5sig_key *key;
a915da9b 922 unsigned int size = sizeof(struct in_addr);
a8afca03 923 struct tcp_md5sig_info *md5sig;
cfb6eeb4 924
a8afca03
ED
925 /* caller either holds rcu_read_lock() or socket lock */
926 md5sig = rcu_dereference_check(tp->md5sig_info,
b4fb05ea
ED
927 sock_owned_by_user(sk) ||
928 lockdep_is_held(&sk->sk_lock.slock));
a8afca03 929 if (!md5sig)
cfb6eeb4 930 return NULL;
a915da9b
ED
931#if IS_ENABLED(CONFIG_IPV6)
932 if (family == AF_INET6)
933 size = sizeof(struct in6_addr);
934#endif
b67bfe0d 935 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
a915da9b
ED
936 if (key->family != family)
937 continue;
938 if (!memcmp(&key->addr, addr, size))
939 return key;
cfb6eeb4
YH
940 }
941 return NULL;
942}
a915da9b 943EXPORT_SYMBOL(tcp_md5_do_lookup);
cfb6eeb4
YH
944
945struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
946 struct sock *addr_sk)
947{
a915da9b
ED
948 union tcp_md5_addr *addr;
949
950 addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
951 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4 952}
cfb6eeb4
YH
953EXPORT_SYMBOL(tcp_v4_md5_lookup);
954
f5b99bcd
AB
955static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
956 struct request_sock *req)
cfb6eeb4 957{
a915da9b
ED
958 union tcp_md5_addr *addr;
959
634fb979 960 addr = (union tcp_md5_addr *)&inet_rsk(req)->ir_rmt_addr;
a915da9b 961 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4
YH
962}
963
964/* This can be called on a newly created socket, from other files */
a915da9b
ED
965int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
966 int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
cfb6eeb4
YH
967{
968 /* Add Key to the list */
b0a713e9 969 struct tcp_md5sig_key *key;
cfb6eeb4 970 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 971 struct tcp_md5sig_info *md5sig;
cfb6eeb4 972
c0353c7b 973 key = tcp_md5_do_lookup(sk, addr, family);
cfb6eeb4
YH
974 if (key) {
975 /* Pre-existing entry - just update that one. */
a915da9b 976 memcpy(key->key, newkey, newkeylen);
b0a713e9 977 key->keylen = newkeylen;
a915da9b
ED
978 return 0;
979 }
260fcbeb 980
a8afca03
ED
981 md5sig = rcu_dereference_protected(tp->md5sig_info,
982 sock_owned_by_user(sk));
a915da9b
ED
983 if (!md5sig) {
984 md5sig = kmalloc(sizeof(*md5sig), gfp);
985 if (!md5sig)
cfb6eeb4 986 return -ENOMEM;
cfb6eeb4 987
a915da9b
ED
988 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
989 INIT_HLIST_HEAD(&md5sig->head);
a8afca03 990 rcu_assign_pointer(tp->md5sig_info, md5sig);
a915da9b 991 }
cfb6eeb4 992
5f3d9cb2 993 key = sock_kmalloc(sk, sizeof(*key), gfp);
a915da9b
ED
994 if (!key)
995 return -ENOMEM;
71cea17e 996 if (!tcp_alloc_md5sig_pool()) {
5f3d9cb2 997 sock_kfree_s(sk, key, sizeof(*key));
a915da9b 998 return -ENOMEM;
cfb6eeb4 999 }
a915da9b
ED
1000
1001 memcpy(key->key, newkey, newkeylen);
1002 key->keylen = newkeylen;
1003 key->family = family;
1004 memcpy(&key->addr, addr,
1005 (family == AF_INET6) ? sizeof(struct in6_addr) :
1006 sizeof(struct in_addr));
1007 hlist_add_head_rcu(&key->node, &md5sig->head);
cfb6eeb4
YH
1008 return 0;
1009}
a915da9b 1010EXPORT_SYMBOL(tcp_md5_do_add);
cfb6eeb4 1011
a915da9b 1012int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
cfb6eeb4 1013{
a915da9b
ED
1014 struct tcp_md5sig_key *key;
1015
c0353c7b 1016 key = tcp_md5_do_lookup(sk, addr, family);
a915da9b
ED
1017 if (!key)
1018 return -ENOENT;
1019 hlist_del_rcu(&key->node);
5f3d9cb2 1020 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1021 kfree_rcu(key, rcu);
a915da9b 1022 return 0;
cfb6eeb4 1023}
a915da9b 1024EXPORT_SYMBOL(tcp_md5_do_del);
cfb6eeb4 1025
e0683e70 1026static void tcp_clear_md5_list(struct sock *sk)
cfb6eeb4
YH
1027{
1028 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 1029 struct tcp_md5sig_key *key;
b67bfe0d 1030 struct hlist_node *n;
a8afca03 1031 struct tcp_md5sig_info *md5sig;
cfb6eeb4 1032
a8afca03
ED
1033 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1034
b67bfe0d 1035 hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
a915da9b 1036 hlist_del_rcu(&key->node);
5f3d9cb2 1037 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1038 kfree_rcu(key, rcu);
cfb6eeb4
YH
1039 }
1040}
1041
7174259e
ACM
1042static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1043 int optlen)
cfb6eeb4
YH
1044{
1045 struct tcp_md5sig cmd;
1046 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
cfb6eeb4
YH
1047
1048 if (optlen < sizeof(cmd))
1049 return -EINVAL;
1050
7174259e 1051 if (copy_from_user(&cmd, optval, sizeof(cmd)))
cfb6eeb4
YH
1052 return -EFAULT;
1053
1054 if (sin->sin_family != AF_INET)
1055 return -EINVAL;
1056
a8afca03 1057 if (!cmd.tcpm_key || !cmd.tcpm_keylen)
a915da9b
ED
1058 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1059 AF_INET);
cfb6eeb4
YH
1060
1061 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1062 return -EINVAL;
1063
a915da9b
ED
1064 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1065 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1066 GFP_KERNEL);
cfb6eeb4
YH
1067}
1068
49a72dfb
AL
1069static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1070 __be32 daddr, __be32 saddr, int nbytes)
cfb6eeb4 1071{
cfb6eeb4 1072 struct tcp4_pseudohdr *bp;
49a72dfb 1073 struct scatterlist sg;
cfb6eeb4
YH
1074
1075 bp = &hp->md5_blk.ip4;
cfb6eeb4
YH
1076
1077 /*
49a72dfb 1078 * 1. the TCP pseudo-header (in the order: source IP address,
cfb6eeb4
YH
1079 * destination IP address, zero-padded protocol number, and
1080 * segment length)
1081 */
1082 bp->saddr = saddr;
1083 bp->daddr = daddr;
1084 bp->pad = 0;
076fb722 1085 bp->protocol = IPPROTO_TCP;
49a72dfb 1086 bp->len = cpu_to_be16(nbytes);
c7da57a1 1087
49a72dfb
AL
1088 sg_init_one(&sg, bp, sizeof(*bp));
1089 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1090}
1091
a915da9b 1092static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 1093 __be32 daddr, __be32 saddr, const struct tcphdr *th)
49a72dfb
AL
1094{
1095 struct tcp_md5sig_pool *hp;
1096 struct hash_desc *desc;
1097
1098 hp = tcp_get_md5sig_pool();
1099 if (!hp)
1100 goto clear_hash_noput;
1101 desc = &hp->md5_desc;
1102
1103 if (crypto_hash_init(desc))
1104 goto clear_hash;
1105 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1106 goto clear_hash;
1107 if (tcp_md5_hash_header(hp, th))
1108 goto clear_hash;
1109 if (tcp_md5_hash_key(hp, key))
1110 goto clear_hash;
1111 if (crypto_hash_final(desc, md5_hash))
cfb6eeb4
YH
1112 goto clear_hash;
1113
cfb6eeb4 1114 tcp_put_md5sig_pool();
cfb6eeb4 1115 return 0;
49a72dfb 1116
cfb6eeb4
YH
1117clear_hash:
1118 tcp_put_md5sig_pool();
1119clear_hash_noput:
1120 memset(md5_hash, 0, 16);
49a72dfb 1121 return 1;
cfb6eeb4
YH
1122}
1123
49a72dfb 1124int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
318cf7aa
ED
1125 const struct sock *sk, const struct request_sock *req,
1126 const struct sk_buff *skb)
cfb6eeb4 1127{
49a72dfb
AL
1128 struct tcp_md5sig_pool *hp;
1129 struct hash_desc *desc;
318cf7aa 1130 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
1131 __be32 saddr, daddr;
1132
1133 if (sk) {
c720c7e8
ED
1134 saddr = inet_sk(sk)->inet_saddr;
1135 daddr = inet_sk(sk)->inet_daddr;
49a72dfb 1136 } else if (req) {
634fb979
ED
1137 saddr = inet_rsk(req)->ir_loc_addr;
1138 daddr = inet_rsk(req)->ir_rmt_addr;
cfb6eeb4 1139 } else {
49a72dfb
AL
1140 const struct iphdr *iph = ip_hdr(skb);
1141 saddr = iph->saddr;
1142 daddr = iph->daddr;
cfb6eeb4 1143 }
49a72dfb
AL
1144
1145 hp = tcp_get_md5sig_pool();
1146 if (!hp)
1147 goto clear_hash_noput;
1148 desc = &hp->md5_desc;
1149
1150 if (crypto_hash_init(desc))
1151 goto clear_hash;
1152
1153 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1154 goto clear_hash;
1155 if (tcp_md5_hash_header(hp, th))
1156 goto clear_hash;
1157 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1158 goto clear_hash;
1159 if (tcp_md5_hash_key(hp, key))
1160 goto clear_hash;
1161 if (crypto_hash_final(desc, md5_hash))
1162 goto clear_hash;
1163
1164 tcp_put_md5sig_pool();
1165 return 0;
1166
1167clear_hash:
1168 tcp_put_md5sig_pool();
1169clear_hash_noput:
1170 memset(md5_hash, 0, 16);
1171 return 1;
cfb6eeb4 1172}
49a72dfb 1173EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
cfb6eeb4 1174
a2a385d6 1175static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
cfb6eeb4
YH
1176{
1177 /*
1178 * This gets called for each TCP segment that arrives
1179 * so we want to be efficient.
1180 * We have 3 drop cases:
1181 * o No MD5 hash and one expected.
1182 * o MD5 hash and we're not expecting one.
1183 * o MD5 hash and its wrong.
1184 */
cf533ea5 1185 const __u8 *hash_location = NULL;
cfb6eeb4 1186 struct tcp_md5sig_key *hash_expected;
eddc9ec5 1187 const struct iphdr *iph = ip_hdr(skb);
cf533ea5 1188 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4 1189 int genhash;
cfb6eeb4
YH
1190 unsigned char newhash[16];
1191
a915da9b
ED
1192 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1193 AF_INET);
7d5d5525 1194 hash_location = tcp_parse_md5sig_option(th);
cfb6eeb4 1195
cfb6eeb4
YH
1196 /* We've parsed the options - do we have a hash? */
1197 if (!hash_expected && !hash_location)
a2a385d6 1198 return false;
cfb6eeb4
YH
1199
1200 if (hash_expected && !hash_location) {
785957d3 1201 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
a2a385d6 1202 return true;
cfb6eeb4
YH
1203 }
1204
1205 if (!hash_expected && hash_location) {
785957d3 1206 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
a2a385d6 1207 return true;
cfb6eeb4
YH
1208 }
1209
1210 /* Okay, so this is hash_expected and hash_location -
1211 * so we need to calculate the checksum.
1212 */
49a72dfb
AL
1213 genhash = tcp_v4_md5_hash_skb(newhash,
1214 hash_expected,
1215 NULL, NULL, skb);
cfb6eeb4
YH
1216
1217 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
e87cc472
JP
1218 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1219 &iph->saddr, ntohs(th->source),
1220 &iph->daddr, ntohs(th->dest),
1221 genhash ? " tcp_v4_calc_md5_hash failed"
1222 : "");
a2a385d6 1223 return true;
cfb6eeb4 1224 }
a2a385d6 1225 return false;
cfb6eeb4
YH
1226}
1227
1228#endif
1229
16bea70a
OP
1230static void tcp_v4_init_req(struct request_sock *req, struct sock *sk,
1231 struct sk_buff *skb)
1232{
1233 struct inet_request_sock *ireq = inet_rsk(req);
1234
1235 ireq->ir_loc_addr = ip_hdr(skb)->daddr;
1236 ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
1237 ireq->no_srccheck = inet_sk(sk)->transparent;
1238 ireq->opt = tcp_v4_save_options(skb);
1239}
1240
d94e0417
OP
1241static struct dst_entry *tcp_v4_route_req(struct sock *sk, struct flowi *fl,
1242 const struct request_sock *req,
1243 bool *strict)
1244{
1245 struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
1246
1247 if (strict) {
1248 if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr)
1249 *strict = true;
1250 else
1251 *strict = false;
1252 }
1253
1254 return dst;
1255}
1256
72a3effa 1257struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1da177e4 1258 .family = PF_INET,
2e6599cb 1259 .obj_size = sizeof(struct tcp_request_sock),
5db92c99 1260 .rtx_syn_ack = tcp_rtx_synack,
60236fdd
ACM
1261 .send_ack = tcp_v4_reqsk_send_ack,
1262 .destructor = tcp_v4_reqsk_destructor,
1da177e4 1263 .send_reset = tcp_v4_send_reset,
72659ecc 1264 .syn_ack_timeout = tcp_syn_ack_timeout,
1da177e4
LT
1265};
1266
b2e4b3de 1267static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
2aec4a29 1268 .mss_clamp = TCP_MSS_DEFAULT,
16bea70a 1269#ifdef CONFIG_TCP_MD5SIG
cfb6eeb4 1270 .md5_lookup = tcp_v4_reqsk_md5_lookup,
e3afe7b7 1271 .calc_md5_hash = tcp_v4_md5_hash_skb,
b6332e6c 1272#endif
16bea70a 1273 .init_req = tcp_v4_init_req,
fb7b37a7
OP
1274#ifdef CONFIG_SYN_COOKIES
1275 .cookie_init_seq = cookie_v4_init_sequence,
1276#endif
d94e0417 1277 .route_req = tcp_v4_route_req,
936b8bdb 1278 .init_seq = tcp_v4_init_sequence,
d6274bd8 1279 .send_synack = tcp_v4_send_synack,
695da14e 1280 .queue_hash_add = inet_csk_reqsk_queue_hash_add,
16bea70a 1281};
cfb6eeb4 1282
1da177e4
LT
1283int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1284{
1285 struct tcp_options_received tmp_opt;
60236fdd 1286 struct request_sock *req;
4957faad 1287 struct tcp_sock *tp = tcp_sk(sk);
e6b4d113 1288 struct dst_entry *dst = NULL;
eddc9ec5 1289 __be32 saddr = ip_hdr(skb)->saddr;
1da177e4 1290 __u32 isn = TCP_SKB_CB(skb)->when;
843f4a55 1291 bool want_cookie = false, fastopen;
168a8f58
JC
1292 struct flowi4 fl4;
1293 struct tcp_fastopen_cookie foc = { .len = -1 };
16bea70a 1294 const struct tcp_request_sock_ops *af_ops;
843f4a55 1295 int err;
1da177e4
LT
1296
1297 /* Never answer to SYNs send to broadcast or multicast */
511c3f92 1298 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1da177e4
LT
1299 goto drop;
1300
1301 /* TW buckets are converted to open requests without
1302 * limitations, they conserve resources and peer is
1303 * evidently real one.
1304 */
5ad37d5d
HFS
1305 if ((sysctl_tcp_syncookies == 2 ||
1306 inet_csk_reqsk_queue_is_full(sk)) && !isn) {
946cedcc
ED
1307 want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1308 if (!want_cookie)
1309 goto drop;
1da177e4
LT
1310 }
1311
1312 /* Accept backlog is full. If we have already queued enough
1313 * of warm entries in syn queue, drop request. It is better than
1314 * clogging syn queue with openreqs with exponentially increasing
1315 * timeout.
1316 */
2aeef18d
NS
1317 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
1318 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1da177e4 1319 goto drop;
2aeef18d 1320 }
1da177e4 1321
ce4a7d0d 1322 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1da177e4
LT
1323 if (!req)
1324 goto drop;
1325
16bea70a 1326 af_ops = tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
cfb6eeb4 1327
1da177e4 1328 tcp_clear_options(&tmp_opt);
2aec4a29 1329 tmp_opt.mss_clamp = af_ops->mss_clamp;
4957faad 1330 tmp_opt.user_mss = tp->rx_opt.user_mss;
1a2c6181 1331 tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
1da177e4 1332
4dfc2817 1333 if (want_cookie && !tmp_opt.saw_tstamp)
1da177e4 1334 tcp_clear_options(&tmp_opt);
1da177e4 1335
1da177e4 1336 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
e0f802fb 1337 tcp_openreq_init(req, &tmp_opt, skb, sk);
1da177e4 1338
16bea70a 1339 af_ops->init_req(req, sk, skb);
bb5b7c11 1340
284904aa 1341 if (security_inet_conn_request(sk, skb, req))
bb5b7c11 1342 goto drop_and_free;
284904aa 1343
172d69e6 1344 if (!want_cookie || tmp_opt.tstamp_ok)
5d134f1c 1345 TCP_ECN_create_request(req, skb, sock_net(sk));
1da177e4
LT
1346
1347 if (want_cookie) {
fb7b37a7 1348 isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
172d69e6 1349 req->cookie_ts = tmp_opt.tstamp_ok;
1da177e4 1350 } else if (!isn) {
1da177e4
LT
1351 /* VJ's idea. We save last timestamp seen
1352 * from the destination in peer table, when entering
1353 * state TIME-WAIT, and check against it before
1354 * accepting new connection request.
1355 *
1356 * If "isn" is not zero, this request hit alive
1357 * timewait bucket, so that all the necessary checks
1358 * are made in the function processing timewait state.
1359 */
d94e0417
OP
1360 if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle) {
1361 bool strict;
1362
1363 dst = af_ops->route_req(sk, (struct flowi *)&fl4, req,
1364 &strict);
1365 if (dst && strict &&
1366 !tcp_peer_is_proven(req, dst, true)) {
de0744af 1367 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
7cd04fa7 1368 goto drop_and_release;
1da177e4
LT
1369 }
1370 }
1371 /* Kill the following clause, if you dislike this way. */
1372 else if (!sysctl_tcp_syncookies &&
463c84b9 1373 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1da177e4 1374 (sysctl_max_syn_backlog >> 2)) &&
81166dd6 1375 !tcp_peer_is_proven(req, dst, false)) {
1da177e4
LT
1376 /* Without syncookies last quarter of
1377 * backlog is filled with destinations,
1378 * proven to be alive.
1379 * It means that we continue to communicate
1380 * to destinations, already remembered
1381 * to the moment of synflood.
1382 */
afd46503 1383 LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
673d57e7 1384 &saddr, ntohs(tcp_hdr(skb)->source));
7cd04fa7 1385 goto drop_and_release;
1da177e4
LT
1386 }
1387
936b8bdb 1388 isn = af_ops->init_seq(skb);
1da177e4 1389 }
d94e0417
OP
1390 if (!dst) {
1391 dst = af_ops->route_req(sk, (struct flowi *)&fl4, req, NULL);
1392 if (!dst)
1393 goto drop_and_free;
1394 }
168a8f58 1395
843f4a55 1396 tcp_rsk(req)->snt_isn = isn;
843f4a55
YC
1397 tcp_openreq_init_rwin(req, sk, dst);
1398 fastopen = !want_cookie &&
1399 tcp_try_fastopen(sk, skb, req, &foc, dst);
d6274bd8
OP
1400 err = af_ops->send_synack(sk, dst, NULL, req,
1401 skb_get_queue_mapping(skb), &foc);
843f4a55 1402 if (!fastopen) {
168a8f58
JC
1403 if (err || want_cookie)
1404 goto drop_and_free;
1405
1406 tcp_rsk(req)->listener = NULL;
695da14e 1407 af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
843f4a55 1408 }
1da177e4 1409
1da177e4
LT
1410 return 0;
1411
7cd04fa7
DL
1412drop_and_release:
1413 dst_release(dst);
1da177e4 1414drop_and_free:
60236fdd 1415 reqsk_free(req);
1da177e4 1416drop:
848bf15f 1417 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4
LT
1418 return 0;
1419}
4bc2f18b 1420EXPORT_SYMBOL(tcp_v4_conn_request);
1da177e4
LT
1421
1422
1423/*
1424 * The three way handshake has completed - we got a valid synack -
1425 * now create the new socket.
1426 */
1427struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
60236fdd 1428 struct request_sock *req,
1da177e4
LT
1429 struct dst_entry *dst)
1430{
2e6599cb 1431 struct inet_request_sock *ireq;
1da177e4
LT
1432 struct inet_sock *newinet;
1433 struct tcp_sock *newtp;
1434 struct sock *newsk;
cfb6eeb4
YH
1435#ifdef CONFIG_TCP_MD5SIG
1436 struct tcp_md5sig_key *key;
1437#endif
f6d8bd05 1438 struct ip_options_rcu *inet_opt;
1da177e4
LT
1439
1440 if (sk_acceptq_is_full(sk))
1441 goto exit_overflow;
1442
1da177e4
LT
1443 newsk = tcp_create_openreq_child(sk, req, skb);
1444 if (!newsk)
093d2823 1445 goto exit_nonewsk;
1da177e4 1446
bcd76111 1447 newsk->sk_gso_type = SKB_GSO_TCPV4;
fae6ef87 1448 inet_sk_rx_dst_set(newsk, skb);
1da177e4
LT
1449
1450 newtp = tcp_sk(newsk);
1451 newinet = inet_sk(newsk);
2e6599cb 1452 ireq = inet_rsk(req);
634fb979
ED
1453 newinet->inet_daddr = ireq->ir_rmt_addr;
1454 newinet->inet_rcv_saddr = ireq->ir_loc_addr;
1455 newinet->inet_saddr = ireq->ir_loc_addr;
f6d8bd05
ED
1456 inet_opt = ireq->opt;
1457 rcu_assign_pointer(newinet->inet_opt, inet_opt);
2e6599cb 1458 ireq->opt = NULL;
463c84b9 1459 newinet->mc_index = inet_iif(skb);
eddc9ec5 1460 newinet->mc_ttl = ip_hdr(skb)->ttl;
4c507d28 1461 newinet->rcv_tos = ip_hdr(skb)->tos;
d83d8461 1462 inet_csk(newsk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
1463 if (inet_opt)
1464 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
c720c7e8 1465 newinet->inet_id = newtp->write_seq ^ jiffies;
1da177e4 1466
dfd25fff
ED
1467 if (!dst) {
1468 dst = inet_csk_route_child_sock(sk, newsk, req);
1469 if (!dst)
1470 goto put_and_exit;
1471 } else {
1472 /* syncookie case : see end of cookie_v4_check() */
1473 }
0e734419
DM
1474 sk_setup_caps(newsk, dst);
1475
1da177e4 1476 tcp_sync_mss(newsk, dst_mtu(dst));
0dbaee3b 1477 newtp->advmss = dst_metric_advmss(dst);
f5fff5dc
TQ
1478 if (tcp_sk(sk)->rx_opt.user_mss &&
1479 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1480 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1481
1da177e4
LT
1482 tcp_initialize_rcv_mss(newsk);
1483
cfb6eeb4
YH
1484#ifdef CONFIG_TCP_MD5SIG
1485 /* Copy over the MD5 key from the original socket */
a915da9b
ED
1486 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1487 AF_INET);
c720c7e8 1488 if (key != NULL) {
cfb6eeb4
YH
1489 /*
1490 * We're using one, so create a matching key
1491 * on the newsk structure. If we fail to get
1492 * memory, then we end up not copying the key
1493 * across. Shucks.
1494 */
a915da9b
ED
1495 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1496 AF_INET, key->key, key->keylen, GFP_ATOMIC);
a465419b 1497 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
cfb6eeb4
YH
1498 }
1499#endif
1500
0e734419
DM
1501 if (__inet_inherit_port(sk, newsk) < 0)
1502 goto put_and_exit;
9327f705 1503 __inet_hash_nolisten(newsk, NULL);
1da177e4
LT
1504
1505 return newsk;
1506
1507exit_overflow:
de0744af 1508 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
093d2823
BS
1509exit_nonewsk:
1510 dst_release(dst);
1da177e4 1511exit:
de0744af 1512 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4 1513 return NULL;
0e734419 1514put_and_exit:
e337e24d
CP
1515 inet_csk_prepare_forced_close(newsk);
1516 tcp_done(newsk);
0e734419 1517 goto exit;
1da177e4 1518}
4bc2f18b 1519EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1da177e4
LT
1520
1521static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1522{
aa8223c7 1523 struct tcphdr *th = tcp_hdr(skb);
eddc9ec5 1524 const struct iphdr *iph = ip_hdr(skb);
1da177e4 1525 struct sock *nsk;
60236fdd 1526 struct request_sock **prev;
1da177e4 1527 /* Find possible connection requests. */
463c84b9
ACM
1528 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1529 iph->saddr, iph->daddr);
1da177e4 1530 if (req)
8336886f 1531 return tcp_check_req(sk, skb, req, prev, false);
1da177e4 1532
3b1e0a65 1533 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
c67499c0 1534 th->source, iph->daddr, th->dest, inet_iif(skb));
1da177e4
LT
1535
1536 if (nsk) {
1537 if (nsk->sk_state != TCP_TIME_WAIT) {
1538 bh_lock_sock(nsk);
1539 return nsk;
1540 }
9469c7b4 1541 inet_twsk_put(inet_twsk(nsk));
1da177e4
LT
1542 return NULL;
1543 }
1544
1545#ifdef CONFIG_SYN_COOKIES
af9b4738 1546 if (!th->syn)
1da177e4
LT
1547 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1548#endif
1549 return sk;
1550}
1551
1da177e4
LT
1552/* The socket must have it's spinlock held when we get
1553 * here.
1554 *
1555 * We have a potential double-lock case here, so even when
1556 * doing backlog processing we use the BH locking scheme.
1557 * This is because we cannot sleep with the original spinlock
1558 * held.
1559 */
1560int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1561{
cfb6eeb4
YH
1562 struct sock *rsk;
1563#ifdef CONFIG_TCP_MD5SIG
1564 /*
1565 * We really want to reject the packet as early as possible
1566 * if:
1567 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1568 * o There is an MD5 option and we're not expecting one
1569 */
7174259e 1570 if (tcp_v4_inbound_md5_hash(sk, skb))
cfb6eeb4
YH
1571 goto discard;
1572#endif
1573
1da177e4 1574 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
404e0a8b
ED
1575 struct dst_entry *dst = sk->sk_rx_dst;
1576
bdeab991 1577 sock_rps_save_rxhash(sk, skb);
404e0a8b 1578 if (dst) {
505fbcf0
ED
1579 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1580 dst->ops->check(dst, 0) == NULL) {
92101b3b
DM
1581 dst_release(dst);
1582 sk->sk_rx_dst = NULL;
1583 }
1584 }
c995ae22 1585 tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1da177e4
LT
1586 return 0;
1587 }
1588
ab6a5bb6 1589 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1da177e4
LT
1590 goto csum_err;
1591
1592 if (sk->sk_state == TCP_LISTEN) {
1593 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1594 if (!nsk)
1595 goto discard;
1596
1597 if (nsk != sk) {
bdeab991 1598 sock_rps_save_rxhash(nsk, skb);
cfb6eeb4
YH
1599 if (tcp_child_process(sk, nsk, skb)) {
1600 rsk = nsk;
1da177e4 1601 goto reset;
cfb6eeb4 1602 }
1da177e4
LT
1603 return 0;
1604 }
ca55158c 1605 } else
bdeab991 1606 sock_rps_save_rxhash(sk, skb);
ca55158c 1607
aa8223c7 1608 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1609 rsk = sk;
1da177e4 1610 goto reset;
cfb6eeb4 1611 }
1da177e4
LT
1612 return 0;
1613
1614reset:
cfb6eeb4 1615 tcp_v4_send_reset(rsk, skb);
1da177e4
LT
1616discard:
1617 kfree_skb(skb);
1618 /* Be careful here. If this function gets more complicated and
1619 * gcc suffers from register pressure on the x86, sk (in %ebx)
1620 * might be destroyed here. This current version compiles correctly,
1621 * but you have been warned.
1622 */
1623 return 0;
1624
1625csum_err:
6a5dc9e5 1626 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
63231bdd 1627 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1da177e4
LT
1628 goto discard;
1629}
4bc2f18b 1630EXPORT_SYMBOL(tcp_v4_do_rcv);
1da177e4 1631
160eb5a6 1632void tcp_v4_early_demux(struct sk_buff *skb)
41063e9d 1633{
41063e9d
DM
1634 const struct iphdr *iph;
1635 const struct tcphdr *th;
1636 struct sock *sk;
41063e9d 1637
41063e9d 1638 if (skb->pkt_type != PACKET_HOST)
160eb5a6 1639 return;
41063e9d 1640
45f00f99 1641 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
160eb5a6 1642 return;
41063e9d
DM
1643
1644 iph = ip_hdr(skb);
45f00f99 1645 th = tcp_hdr(skb);
41063e9d
DM
1646
1647 if (th->doff < sizeof(struct tcphdr) / 4)
160eb5a6 1648 return;
41063e9d 1649
45f00f99 1650 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
41063e9d 1651 iph->saddr, th->source,
7011d085 1652 iph->daddr, ntohs(th->dest),
9cb429d6 1653 skb->skb_iif);
41063e9d
DM
1654 if (sk) {
1655 skb->sk = sk;
1656 skb->destructor = sock_edemux;
1657 if (sk->sk_state != TCP_TIME_WAIT) {
1658 struct dst_entry *dst = sk->sk_rx_dst;
505fbcf0 1659
41063e9d
DM
1660 if (dst)
1661 dst = dst_check(dst, 0);
92101b3b 1662 if (dst &&
505fbcf0 1663 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
92101b3b 1664 skb_dst_set_noref(skb, dst);
41063e9d
DM
1665 }
1666 }
41063e9d
DM
1667}
1668
b2fb4f54
ED
1669/* Packet is added to VJ-style prequeue for processing in process
1670 * context, if a reader task is waiting. Apparently, this exciting
1671 * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1672 * failed somewhere. Latency? Burstiness? Well, at least now we will
1673 * see, why it failed. 8)8) --ANK
1674 *
1675 */
1676bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1677{
1678 struct tcp_sock *tp = tcp_sk(sk);
1679
1680 if (sysctl_tcp_low_latency || !tp->ucopy.task)
1681 return false;
1682
1683 if (skb->len <= tcp_hdrlen(skb) &&
1684 skb_queue_len(&tp->ucopy.prequeue) == 0)
1685 return false;
1686
58717686 1687 skb_dst_force(skb);
b2fb4f54
ED
1688 __skb_queue_tail(&tp->ucopy.prequeue, skb);
1689 tp->ucopy.memory += skb->truesize;
1690 if (tp->ucopy.memory > sk->sk_rcvbuf) {
1691 struct sk_buff *skb1;
1692
1693 BUG_ON(sock_owned_by_user(sk));
1694
1695 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
1696 sk_backlog_rcv(sk, skb1);
1697 NET_INC_STATS_BH(sock_net(sk),
1698 LINUX_MIB_TCPPREQUEUEDROPPED);
1699 }
1700
1701 tp->ucopy.memory = 0;
1702 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1703 wake_up_interruptible_sync_poll(sk_sleep(sk),
1704 POLLIN | POLLRDNORM | POLLRDBAND);
1705 if (!inet_csk_ack_scheduled(sk))
1706 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1707 (3 * tcp_rto_min(sk)) / 4,
1708 TCP_RTO_MAX);
1709 }
1710 return true;
1711}
1712EXPORT_SYMBOL(tcp_prequeue);
1713
1da177e4
LT
1714/*
1715 * From tcp_input.c
1716 */
1717
1718int tcp_v4_rcv(struct sk_buff *skb)
1719{
eddc9ec5 1720 const struct iphdr *iph;
cf533ea5 1721 const struct tcphdr *th;
1da177e4
LT
1722 struct sock *sk;
1723 int ret;
a86b1e30 1724 struct net *net = dev_net(skb->dev);
1da177e4
LT
1725
1726 if (skb->pkt_type != PACKET_HOST)
1727 goto discard_it;
1728
1729 /* Count it even if it's bad */
63231bdd 1730 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1da177e4
LT
1731
1732 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1733 goto discard_it;
1734
aa8223c7 1735 th = tcp_hdr(skb);
1da177e4
LT
1736
1737 if (th->doff < sizeof(struct tcphdr) / 4)
1738 goto bad_packet;
1739 if (!pskb_may_pull(skb, th->doff * 4))
1740 goto discard_it;
1741
1742 /* An explanation is required here, I think.
1743 * Packet length and doff are validated by header prediction,
caa20d9a 1744 * provided case of th->doff==0 is eliminated.
1da177e4 1745 * So, we defer the checks. */
ed70fcfc
TH
1746
1747 if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
6a5dc9e5 1748 goto csum_error;
1da177e4 1749
aa8223c7 1750 th = tcp_hdr(skb);
eddc9ec5 1751 iph = ip_hdr(skb);
1da177e4
LT
1752 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1753 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1754 skb->len - th->doff * 4);
1755 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1756 TCP_SKB_CB(skb)->when = 0;
b82d1bb4 1757 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1da177e4
LT
1758 TCP_SKB_CB(skb)->sacked = 0;
1759
9a1f27c4 1760 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1da177e4
LT
1761 if (!sk)
1762 goto no_tcp_socket;
1763
bb134d5d
ED
1764process:
1765 if (sk->sk_state == TCP_TIME_WAIT)
1766 goto do_time_wait;
1767
6cce09f8
ED
1768 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1769 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
d218d111 1770 goto discard_and_relse;
6cce09f8 1771 }
d218d111 1772
1da177e4
LT
1773 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1774 goto discard_and_relse;
b59c2701 1775 nf_reset(skb);
1da177e4 1776
fda9ef5d 1777 if (sk_filter(sk, skb))
1da177e4
LT
1778 goto discard_and_relse;
1779
8b80cda5 1780 sk_mark_napi_id(sk, skb);
1da177e4
LT
1781 skb->dev = NULL;
1782
c6366184 1783 bh_lock_sock_nested(sk);
1da177e4
LT
1784 ret = 0;
1785 if (!sock_owned_by_user(sk)) {
1a2449a8
CL
1786#ifdef CONFIG_NET_DMA
1787 struct tcp_sock *tp = tcp_sk(sk);
1788 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
a2bd1140 1789 tp->ucopy.dma_chan = net_dma_find_channel();
1a2449a8 1790 if (tp->ucopy.dma_chan)
1da177e4 1791 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8
CL
1792 else
1793#endif
1794 {
1795 if (!tcp_prequeue(sk, skb))
ae8d7f88 1796 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8 1797 }
da882c1f
ED
1798 } else if (unlikely(sk_add_backlog(sk, skb,
1799 sk->sk_rcvbuf + sk->sk_sndbuf))) {
6b03a53a 1800 bh_unlock_sock(sk);
6cce09f8 1801 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
6b03a53a
ZY
1802 goto discard_and_relse;
1803 }
1da177e4
LT
1804 bh_unlock_sock(sk);
1805
1806 sock_put(sk);
1807
1808 return ret;
1809
1810no_tcp_socket:
1811 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1812 goto discard_it;
1813
1814 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
6a5dc9e5
ED
1815csum_error:
1816 TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
1da177e4 1817bad_packet:
63231bdd 1818 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1da177e4 1819 } else {
cfb6eeb4 1820 tcp_v4_send_reset(NULL, skb);
1da177e4
LT
1821 }
1822
1823discard_it:
1824 /* Discard frame. */
1825 kfree_skb(skb);
e905a9ed 1826 return 0;
1da177e4
LT
1827
1828discard_and_relse:
1829 sock_put(sk);
1830 goto discard_it;
1831
1832do_time_wait:
1833 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
9469c7b4 1834 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1835 goto discard_it;
1836 }
1837
6a5dc9e5 1838 if (skb->len < (th->doff << 2)) {
9469c7b4 1839 inet_twsk_put(inet_twsk(sk));
6a5dc9e5
ED
1840 goto bad_packet;
1841 }
1842 if (tcp_checksum_complete(skb)) {
1843 inet_twsk_put(inet_twsk(sk));
1844 goto csum_error;
1da177e4 1845 }
9469c7b4 1846 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1da177e4 1847 case TCP_TW_SYN: {
c346dca1 1848 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
c67499c0 1849 &tcp_hashinfo,
da5e3630 1850 iph->saddr, th->source,
eddc9ec5 1851 iph->daddr, th->dest,
463c84b9 1852 inet_iif(skb));
1da177e4 1853 if (sk2) {
9469c7b4
YH
1854 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1855 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1856 sk = sk2;
1857 goto process;
1858 }
1859 /* Fall through to ACK */
1860 }
1861 case TCP_TW_ACK:
1862 tcp_v4_timewait_ack(sk, skb);
1863 break;
1864 case TCP_TW_RST:
1865 goto no_tcp_socket;
1866 case TCP_TW_SUCCESS:;
1867 }
1868 goto discard_it;
1869}
1870
ccb7c410
DM
1871static struct timewait_sock_ops tcp_timewait_sock_ops = {
1872 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1873 .twsk_unique = tcp_twsk_unique,
1874 .twsk_destructor= tcp_twsk_destructor,
ccb7c410 1875};
1da177e4 1876
63d02d15 1877void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
5d299f3d
ED
1878{
1879 struct dst_entry *dst = skb_dst(skb);
1880
1881 dst_hold(dst);
1882 sk->sk_rx_dst = dst;
1883 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1884}
63d02d15 1885EXPORT_SYMBOL(inet_sk_rx_dst_set);
5d299f3d 1886
3b401a81 1887const struct inet_connection_sock_af_ops ipv4_specific = {
543d9cfe
ACM
1888 .queue_xmit = ip_queue_xmit,
1889 .send_check = tcp_v4_send_check,
1890 .rebuild_header = inet_sk_rebuild_header,
5d299f3d 1891 .sk_rx_dst_set = inet_sk_rx_dst_set,
543d9cfe
ACM
1892 .conn_request = tcp_v4_conn_request,
1893 .syn_recv_sock = tcp_v4_syn_recv_sock,
543d9cfe
ACM
1894 .net_header_len = sizeof(struct iphdr),
1895 .setsockopt = ip_setsockopt,
1896 .getsockopt = ip_getsockopt,
1897 .addr2sockaddr = inet_csk_addr2sockaddr,
1898 .sockaddr_len = sizeof(struct sockaddr_in),
ab1e0a13 1899 .bind_conflict = inet_csk_bind_conflict,
3fdadf7d 1900#ifdef CONFIG_COMPAT
543d9cfe
ACM
1901 .compat_setsockopt = compat_ip_setsockopt,
1902 .compat_getsockopt = compat_ip_getsockopt,
3fdadf7d 1903#endif
1da177e4 1904};
4bc2f18b 1905EXPORT_SYMBOL(ipv4_specific);
1da177e4 1906
cfb6eeb4 1907#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1908static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
cfb6eeb4 1909 .md5_lookup = tcp_v4_md5_lookup,
49a72dfb 1910 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 1911 .md5_parse = tcp_v4_parse_md5_keys,
cfb6eeb4 1912};
b6332e6c 1913#endif
cfb6eeb4 1914
1da177e4
LT
1915/* NOTE: A lot of things set to zero explicitly by call to
1916 * sk_alloc() so need not be done here.
1917 */
1918static int tcp_v4_init_sock(struct sock *sk)
1919{
6687e988 1920 struct inet_connection_sock *icsk = inet_csk(sk);
1da177e4 1921
900f65d3 1922 tcp_init_sock(sk);
1da177e4 1923
8292a17a 1924 icsk->icsk_af_ops = &ipv4_specific;
900f65d3 1925
cfb6eeb4 1926#ifdef CONFIG_TCP_MD5SIG
ac807fa8 1927 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
cfb6eeb4 1928#endif
1da177e4 1929
1da177e4
LT
1930 return 0;
1931}
1932
7d06b2e0 1933void tcp_v4_destroy_sock(struct sock *sk)
1da177e4
LT
1934{
1935 struct tcp_sock *tp = tcp_sk(sk);
1936
1937 tcp_clear_xmit_timers(sk);
1938
6687e988 1939 tcp_cleanup_congestion_control(sk);
317a76f9 1940
1da177e4 1941 /* Cleanup up the write buffer. */
fe067e8a 1942 tcp_write_queue_purge(sk);
1da177e4
LT
1943
1944 /* Cleans up our, hopefully empty, out_of_order_queue. */
e905a9ed 1945 __skb_queue_purge(&tp->out_of_order_queue);
1da177e4 1946
cfb6eeb4
YH
1947#ifdef CONFIG_TCP_MD5SIG
1948 /* Clean up the MD5 key list, if any */
1949 if (tp->md5sig_info) {
a915da9b 1950 tcp_clear_md5_list(sk);
a8afca03 1951 kfree_rcu(tp->md5sig_info, rcu);
cfb6eeb4
YH
1952 tp->md5sig_info = NULL;
1953 }
1954#endif
1955
1a2449a8
CL
1956#ifdef CONFIG_NET_DMA
1957 /* Cleans up our sk_async_wait_queue */
e905a9ed 1958 __skb_queue_purge(&sk->sk_async_wait_queue);
1a2449a8
CL
1959#endif
1960
1da177e4
LT
1961 /* Clean prequeue, it must be empty really */
1962 __skb_queue_purge(&tp->ucopy.prequeue);
1963
1964 /* Clean up a referenced TCP bind bucket. */
463c84b9 1965 if (inet_csk(sk)->icsk_bind_hash)
ab1e0a13 1966 inet_put_port(sk);
1da177e4 1967
168a8f58 1968 BUG_ON(tp->fastopen_rsk != NULL);
435cf559 1969
cf60af03
YC
1970 /* If socket is aborted during connect operation */
1971 tcp_free_fastopen_req(tp);
1972
180d8cd9 1973 sk_sockets_allocated_dec(sk);
d1a4c0b3 1974 sock_release_memcg(sk);
1da177e4 1975}
1da177e4
LT
1976EXPORT_SYMBOL(tcp_v4_destroy_sock);
1977
1978#ifdef CONFIG_PROC_FS
1979/* Proc filesystem TCP sock list dumping. */
1980
a8b690f9
TH
1981/*
1982 * Get next listener socket follow cur. If cur is NULL, get first socket
1983 * starting from bucket given in st->bucket; when st->bucket is zero the
1984 * very first socket in the hash table is returned.
1985 */
1da177e4
LT
1986static void *listening_get_next(struct seq_file *seq, void *cur)
1987{
463c84b9 1988 struct inet_connection_sock *icsk;
c25eb3bf 1989 struct hlist_nulls_node *node;
1da177e4 1990 struct sock *sk = cur;
5caea4ea 1991 struct inet_listen_hashbucket *ilb;
5799de0b 1992 struct tcp_iter_state *st = seq->private;
a4146b1b 1993 struct net *net = seq_file_net(seq);
1da177e4
LT
1994
1995 if (!sk) {
a8b690f9 1996 ilb = &tcp_hashinfo.listening_hash[st->bucket];
5caea4ea 1997 spin_lock_bh(&ilb->lock);
c25eb3bf 1998 sk = sk_nulls_head(&ilb->head);
a8b690f9 1999 st->offset = 0;
1da177e4
LT
2000 goto get_sk;
2001 }
5caea4ea 2002 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1da177e4 2003 ++st->num;
a8b690f9 2004 ++st->offset;
1da177e4
LT
2005
2006 if (st->state == TCP_SEQ_STATE_OPENREQ) {
60236fdd 2007 struct request_sock *req = cur;
1da177e4 2008
72a3effa 2009 icsk = inet_csk(st->syn_wait_sk);
1da177e4
LT
2010 req = req->dl_next;
2011 while (1) {
2012 while (req) {
bdccc4ca 2013 if (req->rsk_ops->family == st->family) {
1da177e4
LT
2014 cur = req;
2015 goto out;
2016 }
2017 req = req->dl_next;
2018 }
72a3effa 2019 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1da177e4
LT
2020 break;
2021get_req:
463c84b9 2022 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1da177e4 2023 }
1bde5ac4 2024 sk = sk_nulls_next(st->syn_wait_sk);
1da177e4 2025 st->state = TCP_SEQ_STATE_LISTENING;
463c84b9 2026 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2027 } else {
e905a9ed 2028 icsk = inet_csk(sk);
463c84b9
ACM
2029 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2030 if (reqsk_queue_len(&icsk->icsk_accept_queue))
1da177e4 2031 goto start_req;
463c84b9 2032 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1bde5ac4 2033 sk = sk_nulls_next(sk);
1da177e4
LT
2034 }
2035get_sk:
c25eb3bf 2036 sk_nulls_for_each_from(sk, node) {
8475ef9f
PE
2037 if (!net_eq(sock_net(sk), net))
2038 continue;
2039 if (sk->sk_family == st->family) {
1da177e4
LT
2040 cur = sk;
2041 goto out;
2042 }
e905a9ed 2043 icsk = inet_csk(sk);
463c84b9
ACM
2044 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2045 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1da177e4
LT
2046start_req:
2047 st->uid = sock_i_uid(sk);
2048 st->syn_wait_sk = sk;
2049 st->state = TCP_SEQ_STATE_OPENREQ;
2050 st->sbucket = 0;
2051 goto get_req;
2052 }
463c84b9 2053 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2054 }
5caea4ea 2055 spin_unlock_bh(&ilb->lock);
a8b690f9 2056 st->offset = 0;
0f7ff927 2057 if (++st->bucket < INET_LHTABLE_SIZE) {
5caea4ea
ED
2058 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2059 spin_lock_bh(&ilb->lock);
c25eb3bf 2060 sk = sk_nulls_head(&ilb->head);
1da177e4
LT
2061 goto get_sk;
2062 }
2063 cur = NULL;
2064out:
2065 return cur;
2066}
2067
2068static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2069{
a8b690f9
TH
2070 struct tcp_iter_state *st = seq->private;
2071 void *rc;
2072
2073 st->bucket = 0;
2074 st->offset = 0;
2075 rc = listening_get_next(seq, NULL);
1da177e4
LT
2076
2077 while (rc && *pos) {
2078 rc = listening_get_next(seq, rc);
2079 --*pos;
2080 }
2081 return rc;
2082}
2083
05dbc7b5 2084static inline bool empty_bucket(const struct tcp_iter_state *st)
6eac5604 2085{
05dbc7b5 2086 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
6eac5604
AK
2087}
2088
a8b690f9
TH
2089/*
2090 * Get first established socket starting from bucket given in st->bucket.
2091 * If st->bucket is zero, the very first socket in the hash is returned.
2092 */
1da177e4
LT
2093static void *established_get_first(struct seq_file *seq)
2094{
5799de0b 2095 struct tcp_iter_state *st = seq->private;
a4146b1b 2096 struct net *net = seq_file_net(seq);
1da177e4
LT
2097 void *rc = NULL;
2098
a8b690f9
TH
2099 st->offset = 0;
2100 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
1da177e4 2101 struct sock *sk;
3ab5aee7 2102 struct hlist_nulls_node *node;
9db66bdc 2103 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1da177e4 2104
6eac5604
AK
2105 /* Lockless fast path for the common case of empty buckets */
2106 if (empty_bucket(st))
2107 continue;
2108
9db66bdc 2109 spin_lock_bh(lock);
3ab5aee7 2110 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
f40c8174 2111 if (sk->sk_family != st->family ||
878628fb 2112 !net_eq(sock_net(sk), net)) {
1da177e4
LT
2113 continue;
2114 }
2115 rc = sk;
2116 goto out;
2117 }
9db66bdc 2118 spin_unlock_bh(lock);
1da177e4
LT
2119 }
2120out:
2121 return rc;
2122}
2123
2124static void *established_get_next(struct seq_file *seq, void *cur)
2125{
2126 struct sock *sk = cur;
3ab5aee7 2127 struct hlist_nulls_node *node;
5799de0b 2128 struct tcp_iter_state *st = seq->private;
a4146b1b 2129 struct net *net = seq_file_net(seq);
1da177e4
LT
2130
2131 ++st->num;
a8b690f9 2132 ++st->offset;
1da177e4 2133
05dbc7b5 2134 sk = sk_nulls_next(sk);
1da177e4 2135
3ab5aee7 2136 sk_nulls_for_each_from(sk, node) {
878628fb 2137 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
05dbc7b5 2138 return sk;
1da177e4
LT
2139 }
2140
05dbc7b5
ED
2141 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2142 ++st->bucket;
2143 return established_get_first(seq);
1da177e4
LT
2144}
2145
2146static void *established_get_idx(struct seq_file *seq, loff_t pos)
2147{
a8b690f9
TH
2148 struct tcp_iter_state *st = seq->private;
2149 void *rc;
2150
2151 st->bucket = 0;
2152 rc = established_get_first(seq);
1da177e4
LT
2153
2154 while (rc && pos) {
2155 rc = established_get_next(seq, rc);
2156 --pos;
7174259e 2157 }
1da177e4
LT
2158 return rc;
2159}
2160
2161static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2162{
2163 void *rc;
5799de0b 2164 struct tcp_iter_state *st = seq->private;
1da177e4 2165
1da177e4
LT
2166 st->state = TCP_SEQ_STATE_LISTENING;
2167 rc = listening_get_idx(seq, &pos);
2168
2169 if (!rc) {
1da177e4
LT
2170 st->state = TCP_SEQ_STATE_ESTABLISHED;
2171 rc = established_get_idx(seq, pos);
2172 }
2173
2174 return rc;
2175}
2176
a8b690f9
TH
2177static void *tcp_seek_last_pos(struct seq_file *seq)
2178{
2179 struct tcp_iter_state *st = seq->private;
2180 int offset = st->offset;
2181 int orig_num = st->num;
2182 void *rc = NULL;
2183
2184 switch (st->state) {
2185 case TCP_SEQ_STATE_OPENREQ:
2186 case TCP_SEQ_STATE_LISTENING:
2187 if (st->bucket >= INET_LHTABLE_SIZE)
2188 break;
2189 st->state = TCP_SEQ_STATE_LISTENING;
2190 rc = listening_get_next(seq, NULL);
2191 while (offset-- && rc)
2192 rc = listening_get_next(seq, rc);
2193 if (rc)
2194 break;
2195 st->bucket = 0;
05dbc7b5 2196 st->state = TCP_SEQ_STATE_ESTABLISHED;
a8b690f9
TH
2197 /* Fallthrough */
2198 case TCP_SEQ_STATE_ESTABLISHED:
a8b690f9
TH
2199 if (st->bucket > tcp_hashinfo.ehash_mask)
2200 break;
2201 rc = established_get_first(seq);
2202 while (offset-- && rc)
2203 rc = established_get_next(seq, rc);
2204 }
2205
2206 st->num = orig_num;
2207
2208 return rc;
2209}
2210
1da177e4
LT
2211static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2212{
5799de0b 2213 struct tcp_iter_state *st = seq->private;
a8b690f9
TH
2214 void *rc;
2215
2216 if (*pos && *pos == st->last_pos) {
2217 rc = tcp_seek_last_pos(seq);
2218 if (rc)
2219 goto out;
2220 }
2221
1da177e4
LT
2222 st->state = TCP_SEQ_STATE_LISTENING;
2223 st->num = 0;
a8b690f9
TH
2224 st->bucket = 0;
2225 st->offset = 0;
2226 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2227
2228out:
2229 st->last_pos = *pos;
2230 return rc;
1da177e4
LT
2231}
2232
2233static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2234{
a8b690f9 2235 struct tcp_iter_state *st = seq->private;
1da177e4 2236 void *rc = NULL;
1da177e4
LT
2237
2238 if (v == SEQ_START_TOKEN) {
2239 rc = tcp_get_idx(seq, 0);
2240 goto out;
2241 }
1da177e4
LT
2242
2243 switch (st->state) {
2244 case TCP_SEQ_STATE_OPENREQ:
2245 case TCP_SEQ_STATE_LISTENING:
2246 rc = listening_get_next(seq, v);
2247 if (!rc) {
1da177e4 2248 st->state = TCP_SEQ_STATE_ESTABLISHED;
a8b690f9
TH
2249 st->bucket = 0;
2250 st->offset = 0;
1da177e4
LT
2251 rc = established_get_first(seq);
2252 }
2253 break;
2254 case TCP_SEQ_STATE_ESTABLISHED:
1da177e4
LT
2255 rc = established_get_next(seq, v);
2256 break;
2257 }
2258out:
2259 ++*pos;
a8b690f9 2260 st->last_pos = *pos;
1da177e4
LT
2261 return rc;
2262}
2263
2264static void tcp_seq_stop(struct seq_file *seq, void *v)
2265{
5799de0b 2266 struct tcp_iter_state *st = seq->private;
1da177e4
LT
2267
2268 switch (st->state) {
2269 case TCP_SEQ_STATE_OPENREQ:
2270 if (v) {
463c84b9
ACM
2271 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2272 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4
LT
2273 }
2274 case TCP_SEQ_STATE_LISTENING:
2275 if (v != SEQ_START_TOKEN)
5caea4ea 2276 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
1da177e4 2277 break;
1da177e4
LT
2278 case TCP_SEQ_STATE_ESTABLISHED:
2279 if (v)
9db66bdc 2280 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2281 break;
2282 }
2283}
2284
73cb88ec 2285int tcp_seq_open(struct inode *inode, struct file *file)
1da177e4 2286{
d9dda78b 2287 struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
1da177e4 2288 struct tcp_iter_state *s;
52d6f3f1 2289 int err;
1da177e4 2290
52d6f3f1
DL
2291 err = seq_open_net(inode, file, &afinfo->seq_ops,
2292 sizeof(struct tcp_iter_state));
2293 if (err < 0)
2294 return err;
f40c8174 2295
52d6f3f1 2296 s = ((struct seq_file *)file->private_data)->private;
1da177e4 2297 s->family = afinfo->family;
a8b690f9 2298 s->last_pos = 0;
f40c8174
DL
2299 return 0;
2300}
73cb88ec 2301EXPORT_SYMBOL(tcp_seq_open);
f40c8174 2302
6f8b13bc 2303int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4
LT
2304{
2305 int rc = 0;
2306 struct proc_dir_entry *p;
2307
9427c4b3
DL
2308 afinfo->seq_ops.start = tcp_seq_start;
2309 afinfo->seq_ops.next = tcp_seq_next;
2310 afinfo->seq_ops.stop = tcp_seq_stop;
2311
84841c3c 2312 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
73cb88ec 2313 afinfo->seq_fops, afinfo);
84841c3c 2314 if (!p)
1da177e4
LT
2315 rc = -ENOMEM;
2316 return rc;
2317}
4bc2f18b 2318EXPORT_SYMBOL(tcp_proc_register);
1da177e4 2319
6f8b13bc 2320void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4 2321{
ece31ffd 2322 remove_proc_entry(afinfo->name, net->proc_net);
1da177e4 2323}
4bc2f18b 2324EXPORT_SYMBOL(tcp_proc_unregister);
1da177e4 2325
cf533ea5 2326static void get_openreq4(const struct sock *sk, const struct request_sock *req,
652586df 2327 struct seq_file *f, int i, kuid_t uid)
1da177e4 2328{
2e6599cb 2329 const struct inet_request_sock *ireq = inet_rsk(req);
a399a805 2330 long delta = req->expires - jiffies;
1da177e4 2331
5e659e4c 2332 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
652586df 2333 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
1da177e4 2334 i,
634fb979 2335 ireq->ir_loc_addr,
c720c7e8 2336 ntohs(inet_sk(sk)->inet_sport),
634fb979
ED
2337 ireq->ir_rmt_addr,
2338 ntohs(ireq->ir_rmt_port),
1da177e4
LT
2339 TCP_SYN_RECV,
2340 0, 0, /* could print option size, but that is af dependent. */
2341 1, /* timers active (only the expire timer) */
a399a805 2342 jiffies_delta_to_clock_t(delta),
e6c022a4 2343 req->num_timeout,
a7cb5a49 2344 from_kuid_munged(seq_user_ns(f), uid),
1da177e4
LT
2345 0, /* non standard timer */
2346 0, /* open_requests have no inode */
2347 atomic_read(&sk->sk_refcnt),
652586df 2348 req);
1da177e4
LT
2349}
2350
652586df 2351static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
1da177e4
LT
2352{
2353 int timer_active;
2354 unsigned long timer_expires;
cf533ea5 2355 const struct tcp_sock *tp = tcp_sk(sk);
cf4c6bf8 2356 const struct inet_connection_sock *icsk = inet_csk(sk);
cf533ea5 2357 const struct inet_sock *inet = inet_sk(sk);
168a8f58 2358 struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
c720c7e8
ED
2359 __be32 dest = inet->inet_daddr;
2360 __be32 src = inet->inet_rcv_saddr;
2361 __u16 destp = ntohs(inet->inet_dport);
2362 __u16 srcp = ntohs(inet->inet_sport);
49d09007 2363 int rx_queue;
1da177e4 2364
6ba8a3b1
ND
2365 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2366 icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
2367 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1da177e4 2368 timer_active = 1;
463c84b9
ACM
2369 timer_expires = icsk->icsk_timeout;
2370 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1da177e4 2371 timer_active = 4;
463c84b9 2372 timer_expires = icsk->icsk_timeout;
cf4c6bf8 2373 } else if (timer_pending(&sk->sk_timer)) {
1da177e4 2374 timer_active = 2;
cf4c6bf8 2375 timer_expires = sk->sk_timer.expires;
1da177e4
LT
2376 } else {
2377 timer_active = 0;
2378 timer_expires = jiffies;
2379 }
2380
49d09007
ED
2381 if (sk->sk_state == TCP_LISTEN)
2382 rx_queue = sk->sk_ack_backlog;
2383 else
2384 /*
2385 * because we dont lock socket, we might find a transient negative value
2386 */
2387 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2388
5e659e4c 2389 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
652586df 2390 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
cf4c6bf8 2391 i, src, srcp, dest, destp, sk->sk_state,
47da8ee6 2392 tp->write_seq - tp->snd_una,
49d09007 2393 rx_queue,
1da177e4 2394 timer_active,
a399a805 2395 jiffies_delta_to_clock_t(timer_expires - jiffies),
463c84b9 2396 icsk->icsk_retransmits,
a7cb5a49 2397 from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
6687e988 2398 icsk->icsk_probes_out,
cf4c6bf8
IJ
2399 sock_i_ino(sk),
2400 atomic_read(&sk->sk_refcnt), sk,
7be87351
SH
2401 jiffies_to_clock_t(icsk->icsk_rto),
2402 jiffies_to_clock_t(icsk->icsk_ack.ato),
463c84b9 2403 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1da177e4 2404 tp->snd_cwnd,
168a8f58
JC
2405 sk->sk_state == TCP_LISTEN ?
2406 (fastopenq ? fastopenq->max_qlen : 0) :
652586df 2407 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
1da177e4
LT
2408}
2409
cf533ea5 2410static void get_timewait4_sock(const struct inet_timewait_sock *tw,
652586df 2411 struct seq_file *f, int i)
1da177e4 2412{
23f33c2d 2413 __be32 dest, src;
1da177e4 2414 __u16 destp, srcp;
e2a1d3e4 2415 s32 delta = tw->tw_ttd - inet_tw_time_stamp();
1da177e4
LT
2416
2417 dest = tw->tw_daddr;
2418 src = tw->tw_rcv_saddr;
2419 destp = ntohs(tw->tw_dport);
2420 srcp = ntohs(tw->tw_sport);
2421
5e659e4c 2422 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
652586df 2423 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
1da177e4 2424 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
a399a805 2425 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
652586df 2426 atomic_read(&tw->tw_refcnt), tw);
1da177e4
LT
2427}
2428
2429#define TMPSZ 150
2430
2431static int tcp4_seq_show(struct seq_file *seq, void *v)
2432{
5799de0b 2433 struct tcp_iter_state *st;
05dbc7b5 2434 struct sock *sk = v;
1da177e4 2435
652586df 2436 seq_setwidth(seq, TMPSZ - 1);
1da177e4 2437 if (v == SEQ_START_TOKEN) {
652586df 2438 seq_puts(seq, " sl local_address rem_address st tx_queue "
1da177e4
LT
2439 "rx_queue tr tm->when retrnsmt uid timeout "
2440 "inode");
2441 goto out;
2442 }
2443 st = seq->private;
2444
2445 switch (st->state) {
2446 case TCP_SEQ_STATE_LISTENING:
2447 case TCP_SEQ_STATE_ESTABLISHED:
05dbc7b5 2448 if (sk->sk_state == TCP_TIME_WAIT)
652586df 2449 get_timewait4_sock(v, seq, st->num);
05dbc7b5 2450 else
652586df 2451 get_tcp4_sock(v, seq, st->num);
1da177e4
LT
2452 break;
2453 case TCP_SEQ_STATE_OPENREQ:
652586df 2454 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid);
1da177e4
LT
2455 break;
2456 }
1da177e4 2457out:
652586df 2458 seq_pad(seq, '\n');
1da177e4
LT
2459 return 0;
2460}
2461
73cb88ec
AV
2462static const struct file_operations tcp_afinfo_seq_fops = {
2463 .owner = THIS_MODULE,
2464 .open = tcp_seq_open,
2465 .read = seq_read,
2466 .llseek = seq_lseek,
2467 .release = seq_release_net
2468};
2469
1da177e4 2470static struct tcp_seq_afinfo tcp4_seq_afinfo = {
1da177e4
LT
2471 .name = "tcp",
2472 .family = AF_INET,
73cb88ec 2473 .seq_fops = &tcp_afinfo_seq_fops,
9427c4b3
DL
2474 .seq_ops = {
2475 .show = tcp4_seq_show,
2476 },
1da177e4
LT
2477};
2478
2c8c1e72 2479static int __net_init tcp4_proc_init_net(struct net *net)
757764f6
PE
2480{
2481 return tcp_proc_register(net, &tcp4_seq_afinfo);
2482}
2483
2c8c1e72 2484static void __net_exit tcp4_proc_exit_net(struct net *net)
757764f6
PE
2485{
2486 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2487}
2488
2489static struct pernet_operations tcp4_net_ops = {
2490 .init = tcp4_proc_init_net,
2491 .exit = tcp4_proc_exit_net,
2492};
2493
1da177e4
LT
2494int __init tcp4_proc_init(void)
2495{
757764f6 2496 return register_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2497}
2498
2499void tcp4_proc_exit(void)
2500{
757764f6 2501 unregister_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2502}
2503#endif /* CONFIG_PROC_FS */
2504
2505struct proto tcp_prot = {
2506 .name = "TCP",
2507 .owner = THIS_MODULE,
2508 .close = tcp_close,
2509 .connect = tcp_v4_connect,
2510 .disconnect = tcp_disconnect,
463c84b9 2511 .accept = inet_csk_accept,
1da177e4
LT
2512 .ioctl = tcp_ioctl,
2513 .init = tcp_v4_init_sock,
2514 .destroy = tcp_v4_destroy_sock,
2515 .shutdown = tcp_shutdown,
2516 .setsockopt = tcp_setsockopt,
2517 .getsockopt = tcp_getsockopt,
1da177e4 2518 .recvmsg = tcp_recvmsg,
7ba42910
CG
2519 .sendmsg = tcp_sendmsg,
2520 .sendpage = tcp_sendpage,
1da177e4 2521 .backlog_rcv = tcp_v4_do_rcv,
46d3ceab 2522 .release_cb = tcp_release_cb,
563d34d0 2523 .mtu_reduced = tcp_v4_mtu_reduced,
ab1e0a13
ACM
2524 .hash = inet_hash,
2525 .unhash = inet_unhash,
2526 .get_port = inet_csk_get_port,
1da177e4 2527 .enter_memory_pressure = tcp_enter_memory_pressure,
c9bee3b7 2528 .stream_memory_free = tcp_stream_memory_free,
1da177e4 2529 .sockets_allocated = &tcp_sockets_allocated,
0a5578cf 2530 .orphan_count = &tcp_orphan_count,
1da177e4
LT
2531 .memory_allocated = &tcp_memory_allocated,
2532 .memory_pressure = &tcp_memory_pressure,
a4fe34bf 2533 .sysctl_mem = sysctl_tcp_mem,
1da177e4
LT
2534 .sysctl_wmem = sysctl_tcp_wmem,
2535 .sysctl_rmem = sysctl_tcp_rmem,
2536 .max_header = MAX_TCP_HEADER,
2537 .obj_size = sizeof(struct tcp_sock),
3ab5aee7 2538 .slab_flags = SLAB_DESTROY_BY_RCU,
6d6ee43e 2539 .twsk_prot = &tcp_timewait_sock_ops,
60236fdd 2540 .rsk_prot = &tcp_request_sock_ops,
39d8cda7 2541 .h.hashinfo = &tcp_hashinfo,
7ba42910 2542 .no_autobind = true,
543d9cfe
ACM
2543#ifdef CONFIG_COMPAT
2544 .compat_setsockopt = compat_tcp_setsockopt,
2545 .compat_getsockopt = compat_tcp_getsockopt,
2546#endif
c255a458 2547#ifdef CONFIG_MEMCG_KMEM
d1a4c0b3
GC
2548 .init_cgroup = tcp_init_cgroup,
2549 .destroy_cgroup = tcp_destroy_cgroup,
2550 .proto_cgroup = tcp_proto_cgroup,
2551#endif
1da177e4 2552};
4bc2f18b 2553EXPORT_SYMBOL(tcp_prot);
1da177e4 2554
046ee902
DL
2555static int __net_init tcp_sk_init(struct net *net)
2556{
5d134f1c 2557 net->ipv4.sysctl_tcp_ecn = 2;
be9f4a44 2558 return 0;
046ee902
DL
2559}
2560
2561static void __net_exit tcp_sk_exit(struct net *net)
2562{
b099ce26
EB
2563}
2564
2565static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2566{
2567 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
046ee902
DL
2568}
2569
2570static struct pernet_operations __net_initdata tcp_sk_ops = {
b099ce26
EB
2571 .init = tcp_sk_init,
2572 .exit = tcp_sk_exit,
2573 .exit_batch = tcp_sk_exit_batch,
046ee902
DL
2574};
2575
9b0f976f 2576void __init tcp_v4_init(void)
1da177e4 2577{
5caea4ea 2578 inet_hashinfo_init(&tcp_hashinfo);
6a1b3054 2579 if (register_pernet_subsys(&tcp_sk_ops))
1da177e4 2580 panic("Failed to create the TCP control socket.\n");
1da177e4 2581}