netfilter: conntrack: avoid using ->error callback if possible
[linux-2.6-block.git] / net / netfilter / nf_conntrack_proto_tcp.c
1 /* (C) 1999-2001 Paul `Rusty' Russell
2  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3  * (C) 2002-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4  * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 #include <linux/types.h>
12 #include <linux/timer.h>
13 #include <linux/module.h>
14 #include <linux/in.h>
15 #include <linux/tcp.h>
16 #include <linux/spinlock.h>
17 #include <linux/skbuff.h>
18 #include <linux/ipv6.h>
19 #include <net/ip6_checksum.h>
20 #include <asm/unaligned.h>
21
22 #include <net/tcp.h>
23
24 #include <linux/netfilter.h>
25 #include <linux/netfilter_ipv4.h>
26 #include <linux/netfilter_ipv6.h>
27 #include <net/netfilter/nf_conntrack.h>
28 #include <net/netfilter/nf_conntrack_l4proto.h>
29 #include <net/netfilter/nf_conntrack_ecache.h>
30 #include <net/netfilter/nf_conntrack_seqadj.h>
31 #include <net/netfilter/nf_conntrack_synproxy.h>
32 #include <net/netfilter/nf_conntrack_timeout.h>
33 #include <net/netfilter/nf_log.h>
34 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
35 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
36
37 /* "Be conservative in what you do,
38     be liberal in what you accept from others."
39     If it's non-zero, we mark only out of window RST segments as INVALID. */
40 static int nf_ct_tcp_be_liberal __read_mostly = 0;
41
42 /* If it is set to zero, we disable picking up already established
43    connections. */
44 static int nf_ct_tcp_loose __read_mostly = 1;
45
46 /* Max number of the retransmitted packets without receiving an (acceptable)
47    ACK from the destination. If this number is reached, a shorter timer
48    will be started. */
49 static int nf_ct_tcp_max_retrans __read_mostly = 3;
50
51   /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
52      closely.  They're more complex. --RR */
53
54 static const char *const tcp_conntrack_names[] = {
55         "NONE",
56         "SYN_SENT",
57         "SYN_RECV",
58         "ESTABLISHED",
59         "FIN_WAIT",
60         "CLOSE_WAIT",
61         "LAST_ACK",
62         "TIME_WAIT",
63         "CLOSE",
64         "SYN_SENT2",
65 };
66
67 #define SECS * HZ
68 #define MINS * 60 SECS
69 #define HOURS * 60 MINS
70 #define DAYS * 24 HOURS
71
72 static const unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] = {
73         [TCP_CONNTRACK_SYN_SENT]        = 2 MINS,
74         [TCP_CONNTRACK_SYN_RECV]        = 60 SECS,
75         [TCP_CONNTRACK_ESTABLISHED]     = 5 DAYS,
76         [TCP_CONNTRACK_FIN_WAIT]        = 2 MINS,
77         [TCP_CONNTRACK_CLOSE_WAIT]      = 60 SECS,
78         [TCP_CONNTRACK_LAST_ACK]        = 30 SECS,
79         [TCP_CONNTRACK_TIME_WAIT]       = 2 MINS,
80         [TCP_CONNTRACK_CLOSE]           = 10 SECS,
81         [TCP_CONNTRACK_SYN_SENT2]       = 2 MINS,
82 /* RFC1122 says the R2 limit should be at least 100 seconds.
83    Linux uses 15 packets as limit, which corresponds
84    to ~13-30min depending on RTO. */
85         [TCP_CONNTRACK_RETRANS]         = 5 MINS,
86         [TCP_CONNTRACK_UNACK]           = 5 MINS,
87 };
88
89 #define sNO TCP_CONNTRACK_NONE
90 #define sSS TCP_CONNTRACK_SYN_SENT
91 #define sSR TCP_CONNTRACK_SYN_RECV
92 #define sES TCP_CONNTRACK_ESTABLISHED
93 #define sFW TCP_CONNTRACK_FIN_WAIT
94 #define sCW TCP_CONNTRACK_CLOSE_WAIT
95 #define sLA TCP_CONNTRACK_LAST_ACK
96 #define sTW TCP_CONNTRACK_TIME_WAIT
97 #define sCL TCP_CONNTRACK_CLOSE
98 #define sS2 TCP_CONNTRACK_SYN_SENT2
99 #define sIV TCP_CONNTRACK_MAX
100 #define sIG TCP_CONNTRACK_IGNORE
101
102 /* What TCP flags are set from RST/SYN/FIN/ACK. */
103 enum tcp_bit_set {
104         TCP_SYN_SET,
105         TCP_SYNACK_SET,
106         TCP_FIN_SET,
107         TCP_ACK_SET,
108         TCP_RST_SET,
109         TCP_NONE_SET,
110 };
111
112 /*
113  * The TCP state transition table needs a few words...
114  *
115  * We are the man in the middle. All the packets go through us
116  * but might get lost in transit to the destination.
117  * It is assumed that the destinations can't receive segments
118  * we haven't seen.
119  *
120  * The checked segment is in window, but our windows are *not*
121  * equivalent with the ones of the sender/receiver. We always
122  * try to guess the state of the current sender.
123  *
124  * The meaning of the states are:
125  *
126  * NONE:        initial state
127  * SYN_SENT:    SYN-only packet seen
128  * SYN_SENT2:   SYN-only packet seen from reply dir, simultaneous open
129  * SYN_RECV:    SYN-ACK packet seen
130  * ESTABLISHED: ACK packet seen
131  * FIN_WAIT:    FIN packet seen
132  * CLOSE_WAIT:  ACK seen (after FIN)
133  * LAST_ACK:    FIN seen (after FIN)
134  * TIME_WAIT:   last ACK seen
135  * CLOSE:       closed connection (RST)
136  *
137  * Packets marked as IGNORED (sIG):
138  *      if they may be either invalid or valid
139  *      and the receiver may send back a connection
140  *      closing RST or a SYN/ACK.
141  *
142  * Packets marked as INVALID (sIV):
143  *      if we regard them as truly invalid packets
144  */
145 static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
146         {
147 /* ORIGINAL */
148 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
149 /*syn*/    { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
150 /*
151  *      sNO -> sSS      Initialize a new connection
152  *      sSS -> sSS      Retransmitted SYN
153  *      sS2 -> sS2      Late retransmitted SYN
154  *      sSR -> sIG
155  *      sES -> sIG      Error: SYNs in window outside the SYN_SENT state
156  *                      are errors. Receiver will reply with RST
157  *                      and close the connection.
158  *                      Or we are not in sync and hold a dead connection.
159  *      sFW -> sIG
160  *      sCW -> sIG
161  *      sLA -> sIG
162  *      sTW -> sSS      Reopened connection (RFC 1122).
163  *      sCL -> sSS
164  */
165 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
166 /*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR },
167 /*
168  *      sNO -> sIV      Too late and no reason to do anything
169  *      sSS -> sIV      Client can't send SYN and then SYN/ACK
170  *      sS2 -> sSR      SYN/ACK sent to SYN2 in simultaneous open
171  *      sSR -> sSR      Late retransmitted SYN/ACK in simultaneous open
172  *      sES -> sIV      Invalid SYN/ACK packets sent by the client
173  *      sFW -> sIV
174  *      sCW -> sIV
175  *      sLA -> sIV
176  *      sTW -> sIV
177  *      sCL -> sIV
178  */
179 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
180 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
181 /*
182  *      sNO -> sIV      Too late and no reason to do anything...
183  *      sSS -> sIV      Client migth not send FIN in this state:
184  *                      we enforce waiting for a SYN/ACK reply first.
185  *      sS2 -> sIV
186  *      sSR -> sFW      Close started.
187  *      sES -> sFW
188  *      sFW -> sLA      FIN seen in both directions, waiting for
189  *                      the last ACK.
190  *                      Migth be a retransmitted FIN as well...
191  *      sCW -> sLA
192  *      sLA -> sLA      Retransmitted FIN. Remain in the same state.
193  *      sTW -> sTW
194  *      sCL -> sCL
195  */
196 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
197 /*ack*/    { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
198 /*
199  *      sNO -> sES      Assumed.
200  *      sSS -> sIV      ACK is invalid: we haven't seen a SYN/ACK yet.
201  *      sS2 -> sIV
202  *      sSR -> sES      Established state is reached.
203  *      sES -> sES      :-)
204  *      sFW -> sCW      Normal close request answered by ACK.
205  *      sCW -> sCW
206  *      sLA -> sTW      Last ACK detected (RFC5961 challenged)
207  *      sTW -> sTW      Retransmitted last ACK. Remain in the same state.
208  *      sCL -> sCL
209  */
210 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
211 /*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
212 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
213         },
214         {
215 /* REPLY */
216 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
217 /*syn*/    { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sSS, sIV, sS2 },
218 /*
219  *      sNO -> sIV      Never reached.
220  *      sSS -> sS2      Simultaneous open
221  *      sS2 -> sS2      Retransmitted simultaneous SYN
222  *      sSR -> sIV      Invalid SYN packets sent by the server
223  *      sES -> sIV
224  *      sFW -> sIV
225  *      sCW -> sIV
226  *      sLA -> sIV
227  *      sTW -> sSS      Reopened connection, but server may have switched role
228  *      sCL -> sIV
229  */
230 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
231 /*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
232 /*
233  *      sSS -> sSR      Standard open.
234  *      sS2 -> sSR      Simultaneous open
235  *      sSR -> sIG      Retransmitted SYN/ACK, ignore it.
236  *      sES -> sIG      Late retransmitted SYN/ACK?
237  *      sFW -> sIG      Might be SYN/ACK answering ignored SYN
238  *      sCW -> sIG
239  *      sLA -> sIG
240  *      sTW -> sIG
241  *      sCL -> sIG
242  */
243 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
244 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
245 /*
246  *      sSS -> sIV      Server might not send FIN in this state.
247  *      sS2 -> sIV
248  *      sSR -> sFW      Close started.
249  *      sES -> sFW
250  *      sFW -> sLA      FIN seen in both directions.
251  *      sCW -> sLA
252  *      sLA -> sLA      Retransmitted FIN.
253  *      sTW -> sTW
254  *      sCL -> sCL
255  */
256 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
257 /*ack*/    { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
258 /*
259  *      sSS -> sIG      Might be a half-open connection.
260  *      sS2 -> sIG
261  *      sSR -> sSR      Might answer late resent SYN.
262  *      sES -> sES      :-)
263  *      sFW -> sCW      Normal close request answered by ACK.
264  *      sCW -> sCW
265  *      sLA -> sTW      Last ACK detected (RFC5961 challenged)
266  *      sTW -> sTW      Retransmitted last ACK.
267  *      sCL -> sCL
268  */
269 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
270 /*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
271 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
272         }
273 };
274
275 static inline struct nf_tcp_net *tcp_pernet(struct net *net)
276 {
277         return &net->ct.nf_ct_proto.tcp;
278 }
279
280 #ifdef CONFIG_NF_CONNTRACK_PROCFS
281 /* Print out the private part of the conntrack. */
282 static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
283 {
284         if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
285                 return;
286
287         seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]);
288 }
289 #endif
290
291 static unsigned int get_conntrack_index(const struct tcphdr *tcph)
292 {
293         if (tcph->rst) return TCP_RST_SET;
294         else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
295         else if (tcph->fin) return TCP_FIN_SET;
296         else if (tcph->ack) return TCP_ACK_SET;
297         else return TCP_NONE_SET;
298 }
299
300 /* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
301    in IP Filter' by Guido van Rooij.
302
303    http://www.sane.nl/events/sane2000/papers.html
304    http://www.darkart.com/mirrors/www.obfuscation.org/ipf/
305
306    The boundaries and the conditions are changed according to RFC793:
307    the packet must intersect the window (i.e. segments may be
308    after the right or before the left edge) and thus receivers may ACK
309    segments after the right edge of the window.
310
311         td_maxend = max(sack + max(win,1)) seen in reply packets
312         td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
313         td_maxwin += seq + len - sender.td_maxend
314                         if seq + len > sender.td_maxend
315         td_end    = max(seq + len) seen in sent packets
316
317    I.   Upper bound for valid data:     seq <= sender.td_maxend
318    II.  Lower bound for valid data:     seq + len >= sender.td_end - receiver.td_maxwin
319    III. Upper bound for valid (s)ack:   sack <= receiver.td_end
320    IV.  Lower bound for valid (s)ack:   sack >= receiver.td_end - MAXACKWINDOW
321
322    where sack is the highest right edge of sack block found in the packet
323    or ack in the case of packet without SACK option.
324
325    The upper bound limit for a valid (s)ack is not ignored -
326    we doesn't have to deal with fragments.
327 */
328
329 static inline __u32 segment_seq_plus_len(__u32 seq,
330                                          size_t len,
331                                          unsigned int dataoff,
332                                          const struct tcphdr *tcph)
333 {
334         /* XXX Should I use payload length field in IP/IPv6 header ?
335          * - YK */
336         return (seq + len - dataoff - tcph->doff*4
337                 + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
338 }
339
340 /* Fixme: what about big packets? */
341 #define MAXACKWINCONST                  66000
342 #define MAXACKWINDOW(sender)                                            \
343         ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin     \
344                                               : MAXACKWINCONST)
345
346 /*
347  * Simplified tcp_parse_options routine from tcp_input.c
348  */
349 static void tcp_options(const struct sk_buff *skb,
350                         unsigned int dataoff,
351                         const struct tcphdr *tcph,
352                         struct ip_ct_tcp_state *state)
353 {
354         unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
355         const unsigned char *ptr;
356         int length = (tcph->doff*4) - sizeof(struct tcphdr);
357
358         if (!length)
359                 return;
360
361         ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
362                                  length, buff);
363         BUG_ON(ptr == NULL);
364
365         state->td_scale =
366         state->flags = 0;
367
368         while (length > 0) {
369                 int opcode=*ptr++;
370                 int opsize;
371
372                 switch (opcode) {
373                 case TCPOPT_EOL:
374                         return;
375                 case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
376                         length--;
377                         continue;
378                 default:
379                         if (length < 2)
380                                 return;
381                         opsize=*ptr++;
382                         if (opsize < 2) /* "silly options" */
383                                 return;
384                         if (opsize > length)
385                                 return; /* don't parse partial options */
386
387                         if (opcode == TCPOPT_SACK_PERM
388                             && opsize == TCPOLEN_SACK_PERM)
389                                 state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
390                         else if (opcode == TCPOPT_WINDOW
391                                  && opsize == TCPOLEN_WINDOW) {
392                                 state->td_scale = *(u_int8_t *)ptr;
393
394                                 if (state->td_scale > TCP_MAX_WSCALE)
395                                         state->td_scale = TCP_MAX_WSCALE;
396
397                                 state->flags |=
398                                         IP_CT_TCP_FLAG_WINDOW_SCALE;
399                         }
400                         ptr += opsize - 2;
401                         length -= opsize;
402                 }
403         }
404 }
405
406 static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
407                      const struct tcphdr *tcph, __u32 *sack)
408 {
409         unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
410         const unsigned char *ptr;
411         int length = (tcph->doff*4) - sizeof(struct tcphdr);
412         __u32 tmp;
413
414         if (!length)
415                 return;
416
417         ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
418                                  length, buff);
419         BUG_ON(ptr == NULL);
420
421         /* Fast path for timestamp-only option */
422         if (length == TCPOLEN_TSTAMP_ALIGNED
423             && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
424                                        | (TCPOPT_NOP << 16)
425                                        | (TCPOPT_TIMESTAMP << 8)
426                                        | TCPOLEN_TIMESTAMP))
427                 return;
428
429         while (length > 0) {
430                 int opcode = *ptr++;
431                 int opsize, i;
432
433                 switch (opcode) {
434                 case TCPOPT_EOL:
435                         return;
436                 case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
437                         length--;
438                         continue;
439                 default:
440                         if (length < 2)
441                                 return;
442                         opsize = *ptr++;
443                         if (opsize < 2) /* "silly options" */
444                                 return;
445                         if (opsize > length)
446                                 return; /* don't parse partial options */
447
448                         if (opcode == TCPOPT_SACK
449                             && opsize >= (TCPOLEN_SACK_BASE
450                                           + TCPOLEN_SACK_PERBLOCK)
451                             && !((opsize - TCPOLEN_SACK_BASE)
452                                  % TCPOLEN_SACK_PERBLOCK)) {
453                                 for (i = 0;
454                                      i < (opsize - TCPOLEN_SACK_BASE);
455                                      i += TCPOLEN_SACK_PERBLOCK) {
456                                         tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
457
458                                         if (after(tmp, *sack))
459                                                 *sack = tmp;
460                                 }
461                                 return;
462                         }
463                         ptr += opsize - 2;
464                         length -= opsize;
465                 }
466         }
467 }
468
469 static bool tcp_in_window(const struct nf_conn *ct,
470                           struct ip_ct_tcp *state,
471                           enum ip_conntrack_dir dir,
472                           unsigned int index,
473                           const struct sk_buff *skb,
474                           unsigned int dataoff,
475                           const struct tcphdr *tcph)
476 {
477         struct net *net = nf_ct_net(ct);
478         struct nf_tcp_net *tn = tcp_pernet(net);
479         struct ip_ct_tcp_state *sender = &state->seen[dir];
480         struct ip_ct_tcp_state *receiver = &state->seen[!dir];
481         const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
482         __u32 seq, ack, sack, end, win, swin;
483         s32 receiver_offset;
484         bool res, in_recv_win;
485
486         /*
487          * Get the required data from the packet.
488          */
489         seq = ntohl(tcph->seq);
490         ack = sack = ntohl(tcph->ack_seq);
491         win = ntohs(tcph->window);
492         end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
493
494         if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
495                 tcp_sack(skb, dataoff, tcph, &sack);
496
497         /* Take into account NAT sequence number mangling */
498         receiver_offset = nf_ct_seq_offset(ct, !dir, ack - 1);
499         ack -= receiver_offset;
500         sack -= receiver_offset;
501
502         pr_debug("tcp_in_window: START\n");
503         pr_debug("tcp_in_window: ");
504         nf_ct_dump_tuple(tuple);
505         pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
506                  seq, ack, receiver_offset, sack, receiver_offset, win, end);
507         pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
508                  "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
509                  sender->td_end, sender->td_maxend, sender->td_maxwin,
510                  sender->td_scale,
511                  receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
512                  receiver->td_scale);
513
514         if (sender->td_maxwin == 0) {
515                 /*
516                  * Initialize sender data.
517                  */
518                 if (tcph->syn) {
519                         /*
520                          * SYN-ACK in reply to a SYN
521                          * or SYN from reply direction in simultaneous open.
522                          */
523                         sender->td_end =
524                         sender->td_maxend = end;
525                         sender->td_maxwin = (win == 0 ? 1 : win);
526
527                         tcp_options(skb, dataoff, tcph, sender);
528                         /*
529                          * RFC 1323:
530                          * Both sides must send the Window Scale option
531                          * to enable window scaling in either direction.
532                          */
533                         if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
534                               && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
535                                 sender->td_scale =
536                                 receiver->td_scale = 0;
537                         if (!tcph->ack)
538                                 /* Simultaneous open */
539                                 return true;
540                 } else {
541                         /*
542                          * We are in the middle of a connection,
543                          * its history is lost for us.
544                          * Let's try to use the data from the packet.
545                          */
546                         sender->td_end = end;
547                         swin = win << sender->td_scale;
548                         sender->td_maxwin = (swin == 0 ? 1 : swin);
549                         sender->td_maxend = end + sender->td_maxwin;
550                         /*
551                          * We haven't seen traffic in the other direction yet
552                          * but we have to tweak window tracking to pass III
553                          * and IV until that happens.
554                          */
555                         if (receiver->td_maxwin == 0)
556                                 receiver->td_end = receiver->td_maxend = sack;
557                 }
558         } else if (((state->state == TCP_CONNTRACK_SYN_SENT
559                      && dir == IP_CT_DIR_ORIGINAL)
560                    || (state->state == TCP_CONNTRACK_SYN_RECV
561                      && dir == IP_CT_DIR_REPLY))
562                    && after(end, sender->td_end)) {
563                 /*
564                  * RFC 793: "if a TCP is reinitialized ... then it need
565                  * not wait at all; it must only be sure to use sequence
566                  * numbers larger than those recently used."
567                  */
568                 sender->td_end =
569                 sender->td_maxend = end;
570                 sender->td_maxwin = (win == 0 ? 1 : win);
571
572                 tcp_options(skb, dataoff, tcph, sender);
573         }
574
575         if (!(tcph->ack)) {
576                 /*
577                  * If there is no ACK, just pretend it was set and OK.
578                  */
579                 ack = sack = receiver->td_end;
580         } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
581                     (TCP_FLAG_ACK|TCP_FLAG_RST))
582                    && (ack == 0)) {
583                 /*
584                  * Broken TCP stacks, that set ACK in RST packets as well
585                  * with zero ack value.
586                  */
587                 ack = sack = receiver->td_end;
588         }
589
590         if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)
591                 /*
592                  * RST sent answering SYN.
593                  */
594                 seq = end = sender->td_end;
595
596         pr_debug("tcp_in_window: ");
597         nf_ct_dump_tuple(tuple);
598         pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
599                  seq, ack, receiver_offset, sack, receiver_offset, win, end);
600         pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
601                  "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
602                  sender->td_end, sender->td_maxend, sender->td_maxwin,
603                  sender->td_scale,
604                  receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
605                  receiver->td_scale);
606
607         /* Is the ending sequence in the receive window (if available)? */
608         in_recv_win = !receiver->td_maxwin ||
609                       after(end, sender->td_end - receiver->td_maxwin - 1);
610
611         pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
612                  before(seq, sender->td_maxend + 1),
613                  (in_recv_win ? 1 : 0),
614                  before(sack, receiver->td_end + 1),
615                  after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
616
617         if (before(seq, sender->td_maxend + 1) &&
618             in_recv_win &&
619             before(sack, receiver->td_end + 1) &&
620             after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
621                 /*
622                  * Take into account window scaling (RFC 1323).
623                  */
624                 if (!tcph->syn)
625                         win <<= sender->td_scale;
626
627                 /*
628                  * Update sender data.
629                  */
630                 swin = win + (sack - ack);
631                 if (sender->td_maxwin < swin)
632                         sender->td_maxwin = swin;
633                 if (after(end, sender->td_end)) {
634                         sender->td_end = end;
635                         sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
636                 }
637                 if (tcph->ack) {
638                         if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
639                                 sender->td_maxack = ack;
640                                 sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
641                         } else if (after(ack, sender->td_maxack))
642                                 sender->td_maxack = ack;
643                 }
644
645                 /*
646                  * Update receiver data.
647                  */
648                 if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
649                         receiver->td_maxwin += end - sender->td_maxend;
650                 if (after(sack + win, receiver->td_maxend - 1)) {
651                         receiver->td_maxend = sack + win;
652                         if (win == 0)
653                                 receiver->td_maxend++;
654                 }
655                 if (ack == receiver->td_end)
656                         receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
657
658                 /*
659                  * Check retransmissions.
660                  */
661                 if (index == TCP_ACK_SET) {
662                         if (state->last_dir == dir
663                             && state->last_seq == seq
664                             && state->last_ack == ack
665                             && state->last_end == end
666                             && state->last_win == win)
667                                 state->retrans++;
668                         else {
669                                 state->last_dir = dir;
670                                 state->last_seq = seq;
671                                 state->last_ack = ack;
672                                 state->last_end = end;
673                                 state->last_win = win;
674                                 state->retrans = 0;
675                         }
676                 }
677                 res = true;
678         } else {
679                 res = false;
680                 if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
681                     tn->tcp_be_liberal)
682                         res = true;
683                 if (!res) {
684                         nf_ct_l4proto_log_invalid(skb, ct,
685                         "%s",
686                         before(seq, sender->td_maxend + 1) ?
687                         in_recv_win ?
688                         before(sack, receiver->td_end + 1) ?
689                         after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
690                         : "ACK is under the lower bound (possible overly delayed ACK)"
691                         : "ACK is over the upper bound (ACKed data not seen yet)"
692                         : "SEQ is under the lower bound (already ACKed data retransmitted)"
693                         : "SEQ is over the upper bound (over the window of the receiver)");
694                 }
695         }
696
697         pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
698                  "receiver end=%u maxend=%u maxwin=%u\n",
699                  res, sender->td_end, sender->td_maxend, sender->td_maxwin,
700                  receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
701
702         return res;
703 }
704
705 /* table of valid flag combinations - PUSH, ECE and CWR are always valid */
706 static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
707                                  TCPHDR_URG) + 1] =
708 {
709         [TCPHDR_SYN]                            = 1,
710         [TCPHDR_SYN|TCPHDR_URG]                 = 1,
711         [TCPHDR_SYN|TCPHDR_ACK]                 = 1,
712         [TCPHDR_RST]                            = 1,
713         [TCPHDR_RST|TCPHDR_ACK]                 = 1,
714         [TCPHDR_FIN|TCPHDR_ACK]                 = 1,
715         [TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG]      = 1,
716         [TCPHDR_ACK]                            = 1,
717         [TCPHDR_ACK|TCPHDR_URG]                 = 1,
718 };
719
720 static void tcp_error_log(const struct sk_buff *skb,
721                           const struct nf_hook_state *state,
722                           const char *msg)
723 {
724         nf_l4proto_log_invalid(skb, state->net, state->pf, IPPROTO_TCP, "%s", msg);
725 }
726
727 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
728 static bool tcp_error(const struct tcphdr *th,
729                       struct sk_buff *skb,
730                       unsigned int dataoff,
731                       const struct nf_hook_state *state)
732 {
733         unsigned int tcplen = skb->len - dataoff;
734         u8 tcpflags;
735
736         /* Not whole TCP header or malformed packet */
737         if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
738                 tcp_error_log(skb, state, "truncated packet");
739                 return true;
740         }
741
742         /* Checksum invalid? Ignore.
743          * We skip checking packets on the outgoing path
744          * because the checksum is assumed to be correct.
745          */
746         /* FIXME: Source route IP option packets --RR */
747         if (state->net->ct.sysctl_checksum &&
748             state->hook == NF_INET_PRE_ROUTING &&
749             nf_checksum(skb, state->hook, dataoff, IPPROTO_TCP, state->pf)) {
750                 tcp_error_log(skb, state, "bad checksum");
751                 return true;
752         }
753
754         /* Check TCP flags. */
755         tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
756         if (!tcp_valid_flags[tcpflags]) {
757                 tcp_error_log(skb, state, "invalid tcp flag combination");
758                 return true;
759         }
760
761         return false;
762 }
763
764 static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
765                              unsigned int dataoff,
766                              const struct tcphdr *th)
767 {
768         enum tcp_conntrack new_state;
769         struct net *net = nf_ct_net(ct);
770         const struct nf_tcp_net *tn = tcp_pernet(net);
771         const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
772         const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
773
774         /* Don't need lock here: this conntrack not in circulation yet */
775         new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
776
777         /* Invalid: delete conntrack */
778         if (new_state >= TCP_CONNTRACK_MAX) {
779                 pr_debug("nf_ct_tcp: invalid new deleting.\n");
780                 return false;
781         }
782
783         if (new_state == TCP_CONNTRACK_SYN_SENT) {
784                 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
785                 /* SYN packet */
786                 ct->proto.tcp.seen[0].td_end =
787                         segment_seq_plus_len(ntohl(th->seq), skb->len,
788                                              dataoff, th);
789                 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
790                 if (ct->proto.tcp.seen[0].td_maxwin == 0)
791                         ct->proto.tcp.seen[0].td_maxwin = 1;
792                 ct->proto.tcp.seen[0].td_maxend =
793                         ct->proto.tcp.seen[0].td_end;
794
795                 tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
796         } else if (tn->tcp_loose == 0) {
797                 /* Don't try to pick up connections. */
798                 return false;
799         } else {
800                 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
801                 /*
802                  * We are in the middle of a connection,
803                  * its history is lost for us.
804                  * Let's try to use the data from the packet.
805                  */
806                 ct->proto.tcp.seen[0].td_end =
807                         segment_seq_plus_len(ntohl(th->seq), skb->len,
808                                              dataoff, th);
809                 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
810                 if (ct->proto.tcp.seen[0].td_maxwin == 0)
811                         ct->proto.tcp.seen[0].td_maxwin = 1;
812                 ct->proto.tcp.seen[0].td_maxend =
813                         ct->proto.tcp.seen[0].td_end +
814                         ct->proto.tcp.seen[0].td_maxwin;
815
816                 /* We assume SACK and liberal window checking to handle
817                  * window scaling */
818                 ct->proto.tcp.seen[0].flags =
819                 ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
820                                               IP_CT_TCP_FLAG_BE_LIBERAL;
821         }
822
823         /* tcp_packet will set them */
824         ct->proto.tcp.last_index = TCP_NONE_SET;
825
826         pr_debug("%s: sender end=%u maxend=%u maxwin=%u scale=%i "
827                  "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
828                  __func__,
829                  sender->td_end, sender->td_maxend, sender->td_maxwin,
830                  sender->td_scale,
831                  receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
832                  receiver->td_scale);
833         return true;
834 }
835
836 /* Returns verdict for packet, or -1 for invalid. */
837 static int tcp_packet(struct nf_conn *ct,
838                       struct sk_buff *skb,
839                       unsigned int dataoff,
840                       enum ip_conntrack_info ctinfo,
841                       const struct nf_hook_state *state)
842 {
843         struct net *net = nf_ct_net(ct);
844         struct nf_tcp_net *tn = tcp_pernet(net);
845         struct nf_conntrack_tuple *tuple;
846         enum tcp_conntrack new_state, old_state;
847         unsigned int index, *timeouts;
848         enum ip_conntrack_dir dir;
849         const struct tcphdr *th;
850         struct tcphdr _tcph;
851         unsigned long timeout;
852
853         th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
854         if (th == NULL)
855                 return -NF_ACCEPT;
856
857         if (tcp_error(th, skb, dataoff, state))
858                 return -NF_ACCEPT;
859
860         if (!nf_ct_is_confirmed(ct) && !tcp_new(ct, skb, dataoff, th))
861                 return -NF_ACCEPT;
862
863         spin_lock_bh(&ct->lock);
864         old_state = ct->proto.tcp.state;
865         dir = CTINFO2DIR(ctinfo);
866         index = get_conntrack_index(th);
867         new_state = tcp_conntracks[dir][index][old_state];
868         tuple = &ct->tuplehash[dir].tuple;
869
870         switch (new_state) {
871         case TCP_CONNTRACK_SYN_SENT:
872                 if (old_state < TCP_CONNTRACK_TIME_WAIT)
873                         break;
874                 /* RFC 1122: "When a connection is closed actively,
875                  * it MUST linger in TIME-WAIT state for a time 2xMSL
876                  * (Maximum Segment Lifetime). However, it MAY accept
877                  * a new SYN from the remote TCP to reopen the connection
878                  * directly from TIME-WAIT state, if..."
879                  * We ignore the conditions because we are in the
880                  * TIME-WAIT state anyway.
881                  *
882                  * Handle aborted connections: we and the server
883                  * think there is an existing connection but the client
884                  * aborts it and starts a new one.
885                  */
886                 if (((ct->proto.tcp.seen[dir].flags
887                       | ct->proto.tcp.seen[!dir].flags)
888                      & IP_CT_TCP_FLAG_CLOSE_INIT)
889                     || (ct->proto.tcp.last_dir == dir
890                         && ct->proto.tcp.last_index == TCP_RST_SET)) {
891                         /* Attempt to reopen a closed/aborted connection.
892                          * Delete this connection and look up again. */
893                         spin_unlock_bh(&ct->lock);
894
895                         /* Only repeat if we can actually remove the timer.
896                          * Destruction may already be in progress in process
897                          * context and we must give it a chance to terminate.
898                          */
899                         if (nf_ct_kill(ct))
900                                 return -NF_REPEAT;
901                         return NF_DROP;
902                 }
903                 /* Fall through */
904         case TCP_CONNTRACK_IGNORE:
905                 /* Ignored packets:
906                  *
907                  * Our connection entry may be out of sync, so ignore
908                  * packets which may signal the real connection between
909                  * the client and the server.
910                  *
911                  * a) SYN in ORIGINAL
912                  * b) SYN/ACK in REPLY
913                  * c) ACK in reply direction after initial SYN in original.
914                  *
915                  * If the ignored packet is invalid, the receiver will send
916                  * a RST we'll catch below.
917                  */
918                 if (index == TCP_SYNACK_SET
919                     && ct->proto.tcp.last_index == TCP_SYN_SET
920                     && ct->proto.tcp.last_dir != dir
921                     && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
922                         /* b) This SYN/ACK acknowledges a SYN that we earlier
923                          * ignored as invalid. This means that the client and
924                          * the server are both in sync, while the firewall is
925                          * not. We get in sync from the previously annotated
926                          * values.
927                          */
928                         old_state = TCP_CONNTRACK_SYN_SENT;
929                         new_state = TCP_CONNTRACK_SYN_RECV;
930                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end =
931                                 ct->proto.tcp.last_end;
932                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend =
933                                 ct->proto.tcp.last_end;
934                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin =
935                                 ct->proto.tcp.last_win == 0 ?
936                                         1 : ct->proto.tcp.last_win;
937                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
938                                 ct->proto.tcp.last_wscale;
939                         ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
940                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
941                                 ct->proto.tcp.last_flags;
942                         memset(&ct->proto.tcp.seen[dir], 0,
943                                sizeof(struct ip_ct_tcp_state));
944                         break;
945                 }
946                 ct->proto.tcp.last_index = index;
947                 ct->proto.tcp.last_dir = dir;
948                 ct->proto.tcp.last_seq = ntohl(th->seq);
949                 ct->proto.tcp.last_end =
950                     segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
951                 ct->proto.tcp.last_win = ntohs(th->window);
952
953                 /* a) This is a SYN in ORIGINAL. The client and the server
954                  * may be in sync but we are not. In that case, we annotate
955                  * the TCP options and let the packet go through. If it is a
956                  * valid SYN packet, the server will reply with a SYN/ACK, and
957                  * then we'll get in sync. Otherwise, the server potentially
958                  * responds with a challenge ACK if implementing RFC5961.
959                  */
960                 if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
961                         struct ip_ct_tcp_state seen = {};
962
963                         ct->proto.tcp.last_flags =
964                         ct->proto.tcp.last_wscale = 0;
965                         tcp_options(skb, dataoff, th, &seen);
966                         if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
967                                 ct->proto.tcp.last_flags |=
968                                         IP_CT_TCP_FLAG_WINDOW_SCALE;
969                                 ct->proto.tcp.last_wscale = seen.td_scale;
970                         }
971                         if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) {
972                                 ct->proto.tcp.last_flags |=
973                                         IP_CT_TCP_FLAG_SACK_PERM;
974                         }
975                         /* Mark the potential for RFC5961 challenge ACK,
976                          * this pose a special problem for LAST_ACK state
977                          * as ACK is intrepretated as ACKing last FIN.
978                          */
979                         if (old_state == TCP_CONNTRACK_LAST_ACK)
980                                 ct->proto.tcp.last_flags |=
981                                         IP_CT_EXP_CHALLENGE_ACK;
982                 }
983                 spin_unlock_bh(&ct->lock);
984                 nf_ct_l4proto_log_invalid(skb, ct, "invalid packet ignored in "
985                                           "state %s ", tcp_conntrack_names[old_state]);
986                 return NF_ACCEPT;
987         case TCP_CONNTRACK_MAX:
988                 /* Special case for SYN proxy: when the SYN to the server or
989                  * the SYN/ACK from the server is lost, the client may transmit
990                  * a keep-alive packet while in SYN_SENT state. This needs to
991                  * be associated with the original conntrack entry in order to
992                  * generate a new SYN with the correct sequence number.
993                  */
994                 if (nfct_synproxy(ct) && old_state == TCP_CONNTRACK_SYN_SENT &&
995                     index == TCP_ACK_SET && dir == IP_CT_DIR_ORIGINAL &&
996                     ct->proto.tcp.last_dir == IP_CT_DIR_ORIGINAL &&
997                     ct->proto.tcp.seen[dir].td_end - 1 == ntohl(th->seq)) {
998                         pr_debug("nf_ct_tcp: SYN proxy client keep alive\n");
999                         spin_unlock_bh(&ct->lock);
1000                         return NF_ACCEPT;
1001                 }
1002
1003                 /* Invalid packet */
1004                 pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
1005                          dir, get_conntrack_index(th), old_state);
1006                 spin_unlock_bh(&ct->lock);
1007                 nf_ct_l4proto_log_invalid(skb, ct, "invalid state");
1008                 return -NF_ACCEPT;
1009         case TCP_CONNTRACK_TIME_WAIT:
1010                 /* RFC5961 compliance cause stack to send "challenge-ACK"
1011                  * e.g. in response to spurious SYNs.  Conntrack MUST
1012                  * not believe this ACK is acking last FIN.
1013                  */
1014                 if (old_state == TCP_CONNTRACK_LAST_ACK &&
1015                     index == TCP_ACK_SET &&
1016                     ct->proto.tcp.last_dir != dir &&
1017                     ct->proto.tcp.last_index == TCP_SYN_SET &&
1018                     (ct->proto.tcp.last_flags & IP_CT_EXP_CHALLENGE_ACK)) {
1019                         /* Detected RFC5961 challenge ACK */
1020                         ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
1021                         spin_unlock_bh(&ct->lock);
1022                         nf_ct_l4proto_log_invalid(skb, ct, "challenge-ack ignored");
1023                         return NF_ACCEPT; /* Don't change state */
1024                 }
1025                 break;
1026         case TCP_CONNTRACK_SYN_SENT2:
1027                 /* tcp_conntracks table is not smart enough to handle
1028                  * simultaneous open.
1029                  */
1030                 ct->proto.tcp.last_flags |= IP_CT_TCP_SIMULTANEOUS_OPEN;
1031                 break;
1032         case TCP_CONNTRACK_SYN_RECV:
1033                 if (dir == IP_CT_DIR_REPLY && index == TCP_ACK_SET &&
1034                     ct->proto.tcp.last_flags & IP_CT_TCP_SIMULTANEOUS_OPEN)
1035                         new_state = TCP_CONNTRACK_ESTABLISHED;
1036                 break;
1037         case TCP_CONNTRACK_CLOSE:
1038                 if (index == TCP_RST_SET
1039                     && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
1040                     && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
1041                         /* Invalid RST  */
1042                         spin_unlock_bh(&ct->lock);
1043                         nf_ct_l4proto_log_invalid(skb, ct, "invalid rst");
1044                         return -NF_ACCEPT;
1045                 }
1046                 if (index == TCP_RST_SET
1047                     && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
1048                          && ct->proto.tcp.last_index == TCP_SYN_SET)
1049                         || (!test_bit(IPS_ASSURED_BIT, &ct->status)
1050                             && ct->proto.tcp.last_index == TCP_ACK_SET))
1051                     && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
1052                         /* RST sent to invalid SYN or ACK we had let through
1053                          * at a) and c) above:
1054                          *
1055                          * a) SYN was in window then
1056                          * c) we hold a half-open connection.
1057                          *
1058                          * Delete our connection entry.
1059                          * We skip window checking, because packet might ACK
1060                          * segments we ignored. */
1061                         goto in_window;
1062                 }
1063                 /* Just fall through */
1064         default:
1065                 /* Keep compilers happy. */
1066                 break;
1067         }
1068
1069         if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
1070                            skb, dataoff, th)) {
1071                 spin_unlock_bh(&ct->lock);
1072                 return -NF_ACCEPT;
1073         }
1074      in_window:
1075         /* From now on we have got in-window packets */
1076         ct->proto.tcp.last_index = index;
1077         ct->proto.tcp.last_dir = dir;
1078
1079         pr_debug("tcp_conntracks: ");
1080         nf_ct_dump_tuple(tuple);
1081         pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
1082                  (th->syn ? 1 : 0), (th->ack ? 1 : 0),
1083                  (th->fin ? 1 : 0), (th->rst ? 1 : 0),
1084                  old_state, new_state);
1085
1086         ct->proto.tcp.state = new_state;
1087         if (old_state != new_state
1088             && new_state == TCP_CONNTRACK_FIN_WAIT)
1089                 ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
1090
1091         timeouts = nf_ct_timeout_lookup(ct);
1092         if (!timeouts)
1093                 timeouts = tn->timeouts;
1094
1095         if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
1096             timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1097                 timeout = timeouts[TCP_CONNTRACK_RETRANS];
1098         else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
1099                  IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
1100                  timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
1101                 timeout = timeouts[TCP_CONNTRACK_UNACK];
1102         else if (ct->proto.tcp.last_win == 0 &&
1103                  timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1104                 timeout = timeouts[TCP_CONNTRACK_RETRANS];
1105         else
1106                 timeout = timeouts[new_state];
1107         spin_unlock_bh(&ct->lock);
1108
1109         if (new_state != old_state)
1110                 nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
1111
1112         if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1113                 /* If only reply is a RST, we can consider ourselves not to
1114                    have an established connection: this is a fairly common
1115                    problem case, so we can delete the conntrack
1116                    immediately.  --RR */
1117                 if (th->rst) {
1118                         nf_ct_kill_acct(ct, ctinfo, skb);
1119                         return NF_ACCEPT;
1120                 }
1121                 /* ESTABLISHED without SEEN_REPLY, i.e. mid-connection
1122                  * pickup with loose=1. Avoid large ESTABLISHED timeout.
1123                  */
1124                 if (new_state == TCP_CONNTRACK_ESTABLISHED &&
1125                     timeout > timeouts[TCP_CONNTRACK_UNACK])
1126                         timeout = timeouts[TCP_CONNTRACK_UNACK];
1127         } else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
1128                    && (old_state == TCP_CONNTRACK_SYN_RECV
1129                        || old_state == TCP_CONNTRACK_ESTABLISHED)
1130                    && new_state == TCP_CONNTRACK_ESTABLISHED) {
1131                 /* Set ASSURED if we see see valid ack in ESTABLISHED
1132                    after SYN_RECV or a valid answer for a picked up
1133                    connection. */
1134                 set_bit(IPS_ASSURED_BIT, &ct->status);
1135                 nf_conntrack_event_cache(IPCT_ASSURED, ct);
1136         }
1137         nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
1138
1139         return NF_ACCEPT;
1140 }
1141
1142 static bool tcp_can_early_drop(const struct nf_conn *ct)
1143 {
1144         switch (ct->proto.tcp.state) {
1145         case TCP_CONNTRACK_FIN_WAIT:
1146         case TCP_CONNTRACK_LAST_ACK:
1147         case TCP_CONNTRACK_TIME_WAIT:
1148         case TCP_CONNTRACK_CLOSE:
1149         case TCP_CONNTRACK_CLOSE_WAIT:
1150                 return true;
1151         default:
1152                 break;
1153         }
1154
1155         return false;
1156 }
1157
1158 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1159
1160 #include <linux/netfilter/nfnetlink.h>
1161 #include <linux/netfilter/nfnetlink_conntrack.h>
1162
1163 static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
1164                          struct nf_conn *ct)
1165 {
1166         struct nlattr *nest_parms;
1167         struct nf_ct_tcp_flags tmp = {};
1168
1169         spin_lock_bh(&ct->lock);
1170         nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
1171         if (!nest_parms)
1172                 goto nla_put_failure;
1173
1174         if (nla_put_u8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state) ||
1175             nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
1176                        ct->proto.tcp.seen[0].td_scale) ||
1177             nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
1178                        ct->proto.tcp.seen[1].td_scale))
1179                 goto nla_put_failure;
1180
1181         tmp.flags = ct->proto.tcp.seen[0].flags;
1182         if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
1183                     sizeof(struct nf_ct_tcp_flags), &tmp))
1184                 goto nla_put_failure;
1185
1186         tmp.flags = ct->proto.tcp.seen[1].flags;
1187         if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
1188                     sizeof(struct nf_ct_tcp_flags), &tmp))
1189                 goto nla_put_failure;
1190         spin_unlock_bh(&ct->lock);
1191
1192         nla_nest_end(skb, nest_parms);
1193
1194         return 0;
1195
1196 nla_put_failure:
1197         spin_unlock_bh(&ct->lock);
1198         return -1;
1199 }
1200
1201 static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
1202         [CTA_PROTOINFO_TCP_STATE]           = { .type = NLA_U8 },
1203         [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
1204         [CTA_PROTOINFO_TCP_WSCALE_REPLY]    = { .type = NLA_U8 },
1205         [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]  = { .len = sizeof(struct nf_ct_tcp_flags) },
1206         [CTA_PROTOINFO_TCP_FLAGS_REPLY]     = { .len =  sizeof(struct nf_ct_tcp_flags) },
1207 };
1208
1209 #define TCP_NLATTR_SIZE ( \
1210         NLA_ALIGN(NLA_HDRLEN + 1) + \
1211         NLA_ALIGN(NLA_HDRLEN + 1) + \
1212         NLA_ALIGN(NLA_HDRLEN + sizeof(sizeof(struct nf_ct_tcp_flags))) + \
1213         NLA_ALIGN(NLA_HDRLEN + sizeof(sizeof(struct nf_ct_tcp_flags))))
1214
1215 static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1216 {
1217         struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
1218         struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
1219         int err;
1220
1221         /* updates could not contain anything about the private
1222          * protocol info, in that case skip the parsing */
1223         if (!pattr)
1224                 return 0;
1225
1226         err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr,
1227                                tcp_nla_policy, NULL);
1228         if (err < 0)
1229                 return err;
1230
1231         if (tb[CTA_PROTOINFO_TCP_STATE] &&
1232             nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
1233                 return -EINVAL;
1234
1235         spin_lock_bh(&ct->lock);
1236         if (tb[CTA_PROTOINFO_TCP_STATE])
1237                 ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
1238
1239         if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
1240                 struct nf_ct_tcp_flags *attr =
1241                         nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
1242                 ct->proto.tcp.seen[0].flags &= ~attr->mask;
1243                 ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
1244         }
1245
1246         if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
1247                 struct nf_ct_tcp_flags *attr =
1248                         nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
1249                 ct->proto.tcp.seen[1].flags &= ~attr->mask;
1250                 ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
1251         }
1252
1253         if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
1254             tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
1255             ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1256             ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
1257                 ct->proto.tcp.seen[0].td_scale =
1258                         nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
1259                 ct->proto.tcp.seen[1].td_scale =
1260                         nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
1261         }
1262         spin_unlock_bh(&ct->lock);
1263
1264         return 0;
1265 }
1266
1267 static unsigned int tcp_nlattr_tuple_size(void)
1268 {
1269         static unsigned int size __read_mostly;
1270
1271         if (!size)
1272                 size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1273
1274         return size;
1275 }
1276 #endif
1277
1278 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
1279
1280 #include <linux/netfilter/nfnetlink.h>
1281 #include <linux/netfilter/nfnetlink_cttimeout.h>
1282
1283 static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
1284                                      struct net *net, void *data)
1285 {
1286         struct nf_tcp_net *tn = tcp_pernet(net);
1287         unsigned int *timeouts = data;
1288         int i;
1289
1290         if (!timeouts)
1291                 timeouts = tn->timeouts;
1292         /* set default TCP timeouts. */
1293         for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++)
1294                 timeouts[i] = tn->timeouts[i];
1295
1296         if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) {
1297                 timeouts[TCP_CONNTRACK_SYN_SENT] =
1298                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ;
1299         }
1300
1301         if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) {
1302                 timeouts[TCP_CONNTRACK_SYN_RECV] =
1303                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ;
1304         }
1305         if (tb[CTA_TIMEOUT_TCP_ESTABLISHED]) {
1306                 timeouts[TCP_CONNTRACK_ESTABLISHED] =
1307                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_ESTABLISHED]))*HZ;
1308         }
1309         if (tb[CTA_TIMEOUT_TCP_FIN_WAIT]) {
1310                 timeouts[TCP_CONNTRACK_FIN_WAIT] =
1311                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_FIN_WAIT]))*HZ;
1312         }
1313         if (tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]) {
1314                 timeouts[TCP_CONNTRACK_CLOSE_WAIT] =
1315                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]))*HZ;
1316         }
1317         if (tb[CTA_TIMEOUT_TCP_LAST_ACK]) {
1318                 timeouts[TCP_CONNTRACK_LAST_ACK] =
1319                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_LAST_ACK]))*HZ;
1320         }
1321         if (tb[CTA_TIMEOUT_TCP_TIME_WAIT]) {
1322                 timeouts[TCP_CONNTRACK_TIME_WAIT] =
1323                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_TIME_WAIT]))*HZ;
1324         }
1325         if (tb[CTA_TIMEOUT_TCP_CLOSE]) {
1326                 timeouts[TCP_CONNTRACK_CLOSE] =
1327                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE]))*HZ;
1328         }
1329         if (tb[CTA_TIMEOUT_TCP_SYN_SENT2]) {
1330                 timeouts[TCP_CONNTRACK_SYN_SENT2] =
1331                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT2]))*HZ;
1332         }
1333         if (tb[CTA_TIMEOUT_TCP_RETRANS]) {
1334                 timeouts[TCP_CONNTRACK_RETRANS] =
1335                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_RETRANS]))*HZ;
1336         }
1337         if (tb[CTA_TIMEOUT_TCP_UNACK]) {
1338                 timeouts[TCP_CONNTRACK_UNACK] =
1339                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_UNACK]))*HZ;
1340         }
1341
1342         timeouts[CTA_TIMEOUT_TCP_UNSPEC] = timeouts[CTA_TIMEOUT_TCP_SYN_SENT];
1343         return 0;
1344 }
1345
1346 static int
1347 tcp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
1348 {
1349         const unsigned int *timeouts = data;
1350
1351         if (nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT,
1352                         htonl(timeouts[TCP_CONNTRACK_SYN_SENT] / HZ)) ||
1353             nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_RECV,
1354                          htonl(timeouts[TCP_CONNTRACK_SYN_RECV] / HZ)) ||
1355             nla_put_be32(skb, CTA_TIMEOUT_TCP_ESTABLISHED,
1356                          htonl(timeouts[TCP_CONNTRACK_ESTABLISHED] / HZ)) ||
1357             nla_put_be32(skb, CTA_TIMEOUT_TCP_FIN_WAIT,
1358                          htonl(timeouts[TCP_CONNTRACK_FIN_WAIT] / HZ)) ||
1359             nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE_WAIT,
1360                          htonl(timeouts[TCP_CONNTRACK_CLOSE_WAIT] / HZ)) ||
1361             nla_put_be32(skb, CTA_TIMEOUT_TCP_LAST_ACK,
1362                          htonl(timeouts[TCP_CONNTRACK_LAST_ACK] / HZ)) ||
1363             nla_put_be32(skb, CTA_TIMEOUT_TCP_TIME_WAIT,
1364                          htonl(timeouts[TCP_CONNTRACK_TIME_WAIT] / HZ)) ||
1365             nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE,
1366                          htonl(timeouts[TCP_CONNTRACK_CLOSE] / HZ)) ||
1367             nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT2,
1368                          htonl(timeouts[TCP_CONNTRACK_SYN_SENT2] / HZ)) ||
1369             nla_put_be32(skb, CTA_TIMEOUT_TCP_RETRANS,
1370                          htonl(timeouts[TCP_CONNTRACK_RETRANS] / HZ)) ||
1371             nla_put_be32(skb, CTA_TIMEOUT_TCP_UNACK,
1372                          htonl(timeouts[TCP_CONNTRACK_UNACK] / HZ)))
1373                 goto nla_put_failure;
1374         return 0;
1375
1376 nla_put_failure:
1377         return -ENOSPC;
1378 }
1379
1380 static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
1381         [CTA_TIMEOUT_TCP_SYN_SENT]      = { .type = NLA_U32 },
1382         [CTA_TIMEOUT_TCP_SYN_RECV]      = { .type = NLA_U32 },
1383         [CTA_TIMEOUT_TCP_ESTABLISHED]   = { .type = NLA_U32 },
1384         [CTA_TIMEOUT_TCP_FIN_WAIT]      = { .type = NLA_U32 },
1385         [CTA_TIMEOUT_TCP_CLOSE_WAIT]    = { .type = NLA_U32 },
1386         [CTA_TIMEOUT_TCP_LAST_ACK]      = { .type = NLA_U32 },
1387         [CTA_TIMEOUT_TCP_TIME_WAIT]     = { .type = NLA_U32 },
1388         [CTA_TIMEOUT_TCP_CLOSE]         = { .type = NLA_U32 },
1389         [CTA_TIMEOUT_TCP_SYN_SENT2]     = { .type = NLA_U32 },
1390         [CTA_TIMEOUT_TCP_RETRANS]       = { .type = NLA_U32 },
1391         [CTA_TIMEOUT_TCP_UNACK]         = { .type = NLA_U32 },
1392 };
1393 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
1394
1395 #ifdef CONFIG_SYSCTL
1396 static struct ctl_table tcp_sysctl_table[] = {
1397         {
1398                 .procname       = "nf_conntrack_tcp_timeout_syn_sent",
1399                 .maxlen         = sizeof(unsigned int),
1400                 .mode           = 0644,
1401                 .proc_handler   = proc_dointvec_jiffies,
1402         },
1403         {
1404                 .procname       = "nf_conntrack_tcp_timeout_syn_recv",
1405                 .maxlen         = sizeof(unsigned int),
1406                 .mode           = 0644,
1407                 .proc_handler   = proc_dointvec_jiffies,
1408         },
1409         {
1410                 .procname       = "nf_conntrack_tcp_timeout_established",
1411                 .maxlen         = sizeof(unsigned int),
1412                 .mode           = 0644,
1413                 .proc_handler   = proc_dointvec_jiffies,
1414         },
1415         {
1416                 .procname       = "nf_conntrack_tcp_timeout_fin_wait",
1417                 .maxlen         = sizeof(unsigned int),
1418                 .mode           = 0644,
1419                 .proc_handler   = proc_dointvec_jiffies,
1420         },
1421         {
1422                 .procname       = "nf_conntrack_tcp_timeout_close_wait",
1423                 .maxlen         = sizeof(unsigned int),
1424                 .mode           = 0644,
1425                 .proc_handler   = proc_dointvec_jiffies,
1426         },
1427         {
1428                 .procname       = "nf_conntrack_tcp_timeout_last_ack",
1429                 .maxlen         = sizeof(unsigned int),
1430                 .mode           = 0644,
1431                 .proc_handler   = proc_dointvec_jiffies,
1432         },
1433         {
1434                 .procname       = "nf_conntrack_tcp_timeout_time_wait",
1435                 .maxlen         = sizeof(unsigned int),
1436                 .mode           = 0644,
1437                 .proc_handler   = proc_dointvec_jiffies,
1438         },
1439         {
1440                 .procname       = "nf_conntrack_tcp_timeout_close",
1441                 .maxlen         = sizeof(unsigned int),
1442                 .mode           = 0644,
1443                 .proc_handler   = proc_dointvec_jiffies,
1444         },
1445         {
1446                 .procname       = "nf_conntrack_tcp_timeout_max_retrans",
1447                 .maxlen         = sizeof(unsigned int),
1448                 .mode           = 0644,
1449                 .proc_handler   = proc_dointvec_jiffies,
1450         },
1451         {
1452                 .procname       = "nf_conntrack_tcp_timeout_unacknowledged",
1453                 .maxlen         = sizeof(unsigned int),
1454                 .mode           = 0644,
1455                 .proc_handler   = proc_dointvec_jiffies,
1456         },
1457         {
1458                 .procname       = "nf_conntrack_tcp_loose",
1459                 .maxlen         = sizeof(unsigned int),
1460                 .mode           = 0644,
1461                 .proc_handler   = proc_dointvec,
1462         },
1463         {
1464                 .procname       = "nf_conntrack_tcp_be_liberal",
1465                 .maxlen         = sizeof(unsigned int),
1466                 .mode           = 0644,
1467                 .proc_handler   = proc_dointvec,
1468         },
1469         {
1470                 .procname       = "nf_conntrack_tcp_max_retrans",
1471                 .maxlen         = sizeof(unsigned int),
1472                 .mode           = 0644,
1473                 .proc_handler   = proc_dointvec,
1474         },
1475         { }
1476 };
1477 #endif /* CONFIG_SYSCTL */
1478
1479 static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
1480                                     struct nf_tcp_net *tn)
1481 {
1482 #ifdef CONFIG_SYSCTL
1483         if (pn->ctl_table)
1484                 return 0;
1485
1486         pn->ctl_table = kmemdup(tcp_sysctl_table,
1487                                 sizeof(tcp_sysctl_table),
1488                                 GFP_KERNEL);
1489         if (!pn->ctl_table)
1490                 return -ENOMEM;
1491
1492         pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1493         pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1494         pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1495         pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1496         pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1497         pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1498         pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1499         pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1500         pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1501         pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK];
1502         pn->ctl_table[10].data = &tn->tcp_loose;
1503         pn->ctl_table[11].data = &tn->tcp_be_liberal;
1504         pn->ctl_table[12].data = &tn->tcp_max_retrans;
1505 #endif
1506         return 0;
1507 }
1508
1509 static int tcp_init_net(struct net *net, u_int16_t proto)
1510 {
1511         struct nf_tcp_net *tn = tcp_pernet(net);
1512         struct nf_proto_net *pn = &tn->pn;
1513
1514         if (!pn->users) {
1515                 int i;
1516
1517                 for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
1518                         tn->timeouts[i] = tcp_timeouts[i];
1519
1520                 /* timeouts[0] is unused, make it same as SYN_SENT so
1521                  * ->timeouts[0] contains 'new' timeout, like udp or icmp.
1522                  */
1523                 tn->timeouts[0] = tcp_timeouts[TCP_CONNTRACK_SYN_SENT];
1524                 tn->tcp_loose = nf_ct_tcp_loose;
1525                 tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
1526                 tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
1527         }
1528
1529         return tcp_kmemdup_sysctl_table(pn, tn);
1530 }
1531
1532 static struct nf_proto_net *tcp_get_net_proto(struct net *net)
1533 {
1534         return &net->ct.nf_ct_proto.tcp.pn;
1535 }
1536
1537 const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 =
1538 {
1539         .l3proto                = PF_INET,
1540         .l4proto                = IPPROTO_TCP,
1541 #ifdef CONFIG_NF_CONNTRACK_PROCFS
1542         .print_conntrack        = tcp_print_conntrack,
1543 #endif
1544         .packet                 = tcp_packet,
1545         .can_early_drop         = tcp_can_early_drop,
1546 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1547         .to_nlattr              = tcp_to_nlattr,
1548         .from_nlattr            = nlattr_to_tcp,
1549         .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
1550         .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
1551         .nlattr_tuple_size      = tcp_nlattr_tuple_size,
1552         .nlattr_size            = TCP_NLATTR_SIZE,
1553         .nla_policy             = nf_ct_port_nla_policy,
1554 #endif
1555 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
1556         .ctnl_timeout           = {
1557                 .nlattr_to_obj  = tcp_timeout_nlattr_to_obj,
1558                 .obj_to_nlattr  = tcp_timeout_obj_to_nlattr,
1559                 .nlattr_max     = CTA_TIMEOUT_TCP_MAX,
1560                 .obj_size       = sizeof(unsigned int) *
1561                                         TCP_CONNTRACK_TIMEOUT_MAX,
1562                 .nla_policy     = tcp_timeout_nla_policy,
1563         },
1564 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
1565         .init_net               = tcp_init_net,
1566         .get_net_proto          = tcp_get_net_proto,
1567 };
1568 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
1569
1570 const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 =
1571 {
1572         .l3proto                = PF_INET6,
1573         .l4proto                = IPPROTO_TCP,
1574 #ifdef CONFIG_NF_CONNTRACK_PROCFS
1575         .print_conntrack        = tcp_print_conntrack,
1576 #endif
1577         .packet                 = tcp_packet,
1578         .can_early_drop         = tcp_can_early_drop,
1579 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1580         .nlattr_size            = TCP_NLATTR_SIZE,
1581         .to_nlattr              = tcp_to_nlattr,
1582         .from_nlattr            = nlattr_to_tcp,
1583         .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
1584         .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
1585         .nlattr_tuple_size      = tcp_nlattr_tuple_size,
1586         .nla_policy             = nf_ct_port_nla_policy,
1587 #endif
1588 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
1589         .ctnl_timeout           = {
1590                 .nlattr_to_obj  = tcp_timeout_nlattr_to_obj,
1591                 .obj_to_nlattr  = tcp_timeout_obj_to_nlattr,
1592                 .nlattr_max     = CTA_TIMEOUT_TCP_MAX,
1593                 .obj_size       = sizeof(unsigned int) *
1594                                         TCP_CONNTRACK_TIMEOUT_MAX,
1595                 .nla_policy     = tcp_timeout_nla_policy,
1596         },
1597 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
1598         .init_net               = tcp_init_net,
1599         .get_net_proto          = tcp_get_net_proto,
1600 };
1601 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);