Merge tag 'md/4.6-rc6-fix' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
[linux-2.6-block.git] / net / netfilter / nf_conntrack_proto_tcp.c
1 /* (C) 1999-2001 Paul `Rusty' Russell
2  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3  * (C) 2002-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4  * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 #include <linux/types.h>
12 #include <linux/timer.h>
13 #include <linux/module.h>
14 #include <linux/in.h>
15 #include <linux/tcp.h>
16 #include <linux/spinlock.h>
17 #include <linux/skbuff.h>
18 #include <linux/ipv6.h>
19 #include <net/ip6_checksum.h>
20 #include <asm/unaligned.h>
21
22 #include <net/tcp.h>
23
24 #include <linux/netfilter.h>
25 #include <linux/netfilter_ipv4.h>
26 #include <linux/netfilter_ipv6.h>
27 #include <net/netfilter/nf_conntrack.h>
28 #include <net/netfilter/nf_conntrack_l4proto.h>
29 #include <net/netfilter/nf_conntrack_ecache.h>
30 #include <net/netfilter/nf_conntrack_seqadj.h>
31 #include <net/netfilter/nf_conntrack_synproxy.h>
32 #include <net/netfilter/nf_log.h>
33 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
34 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
35
36 /* "Be conservative in what you do,
37     be liberal in what you accept from others."
38     If it's non-zero, we mark only out of window RST segments as INVALID. */
39 static int nf_ct_tcp_be_liberal __read_mostly = 0;
40
41 /* If it is set to zero, we disable picking up already established
42    connections. */
43 static int nf_ct_tcp_loose __read_mostly = 1;
44
45 /* Max number of the retransmitted packets without receiving an (acceptable)
46    ACK from the destination. If this number is reached, a shorter timer
47    will be started. */
48 static int nf_ct_tcp_max_retrans __read_mostly = 3;
49
50   /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
51      closely.  They're more complex. --RR */
52
53 static const char *const tcp_conntrack_names[] = {
54         "NONE",
55         "SYN_SENT",
56         "SYN_RECV",
57         "ESTABLISHED",
58         "FIN_WAIT",
59         "CLOSE_WAIT",
60         "LAST_ACK",
61         "TIME_WAIT",
62         "CLOSE",
63         "SYN_SENT2",
64 };
65
66 #define SECS * HZ
67 #define MINS * 60 SECS
68 #define HOURS * 60 MINS
69 #define DAYS * 24 HOURS
70
71 static unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] __read_mostly = {
72         [TCP_CONNTRACK_SYN_SENT]        = 2 MINS,
73         [TCP_CONNTRACK_SYN_RECV]        = 60 SECS,
74         [TCP_CONNTRACK_ESTABLISHED]     = 5 DAYS,
75         [TCP_CONNTRACK_FIN_WAIT]        = 2 MINS,
76         [TCP_CONNTRACK_CLOSE_WAIT]      = 60 SECS,
77         [TCP_CONNTRACK_LAST_ACK]        = 30 SECS,
78         [TCP_CONNTRACK_TIME_WAIT]       = 2 MINS,
79         [TCP_CONNTRACK_CLOSE]           = 10 SECS,
80         [TCP_CONNTRACK_SYN_SENT2]       = 2 MINS,
81 /* RFC1122 says the R2 limit should be at least 100 seconds.
82    Linux uses 15 packets as limit, which corresponds
83    to ~13-30min depending on RTO. */
84         [TCP_CONNTRACK_RETRANS]         = 5 MINS,
85         [TCP_CONNTRACK_UNACK]           = 5 MINS,
86 };
87
88 #define sNO TCP_CONNTRACK_NONE
89 #define sSS TCP_CONNTRACK_SYN_SENT
90 #define sSR TCP_CONNTRACK_SYN_RECV
91 #define sES TCP_CONNTRACK_ESTABLISHED
92 #define sFW TCP_CONNTRACK_FIN_WAIT
93 #define sCW TCP_CONNTRACK_CLOSE_WAIT
94 #define sLA TCP_CONNTRACK_LAST_ACK
95 #define sTW TCP_CONNTRACK_TIME_WAIT
96 #define sCL TCP_CONNTRACK_CLOSE
97 #define sS2 TCP_CONNTRACK_SYN_SENT2
98 #define sIV TCP_CONNTRACK_MAX
99 #define sIG TCP_CONNTRACK_IGNORE
100
101 /* What TCP flags are set from RST/SYN/FIN/ACK. */
102 enum tcp_bit_set {
103         TCP_SYN_SET,
104         TCP_SYNACK_SET,
105         TCP_FIN_SET,
106         TCP_ACK_SET,
107         TCP_RST_SET,
108         TCP_NONE_SET,
109 };
110
111 /*
112  * The TCP state transition table needs a few words...
113  *
114  * We are the man in the middle. All the packets go through us
115  * but might get lost in transit to the destination.
116  * It is assumed that the destinations can't receive segments
117  * we haven't seen.
118  *
119  * The checked segment is in window, but our windows are *not*
120  * equivalent with the ones of the sender/receiver. We always
121  * try to guess the state of the current sender.
122  *
123  * The meaning of the states are:
124  *
125  * NONE:        initial state
126  * SYN_SENT:    SYN-only packet seen
127  * SYN_SENT2:   SYN-only packet seen from reply dir, simultaneous open
128  * SYN_RECV:    SYN-ACK packet seen
129  * ESTABLISHED: ACK packet seen
130  * FIN_WAIT:    FIN packet seen
131  * CLOSE_WAIT:  ACK seen (after FIN)
132  * LAST_ACK:    FIN seen (after FIN)
133  * TIME_WAIT:   last ACK seen
134  * CLOSE:       closed connection (RST)
135  *
136  * Packets marked as IGNORED (sIG):
137  *      if they may be either invalid or valid
138  *      and the receiver may send back a connection
139  *      closing RST or a SYN/ACK.
140  *
141  * Packets marked as INVALID (sIV):
142  *      if we regard them as truly invalid packets
143  */
144 static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
145         {
146 /* ORIGINAL */
147 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
148 /*syn*/    { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
149 /*
150  *      sNO -> sSS      Initialize a new connection
151  *      sSS -> sSS      Retransmitted SYN
152  *      sS2 -> sS2      Late retransmitted SYN
153  *      sSR -> sIG
154  *      sES -> sIG      Error: SYNs in window outside the SYN_SENT state
155  *                      are errors. Receiver will reply with RST
156  *                      and close the connection.
157  *                      Or we are not in sync and hold a dead connection.
158  *      sFW -> sIG
159  *      sCW -> sIG
160  *      sLA -> sIG
161  *      sTW -> sSS      Reopened connection (RFC 1122).
162  *      sCL -> sSS
163  */
164 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
165 /*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR },
166 /*
167  *      sNO -> sIV      Too late and no reason to do anything
168  *      sSS -> sIV      Client can't send SYN and then SYN/ACK
169  *      sS2 -> sSR      SYN/ACK sent to SYN2 in simultaneous open
170  *      sSR -> sSR      Late retransmitted SYN/ACK in simultaneous open
171  *      sES -> sIV      Invalid SYN/ACK packets sent by the client
172  *      sFW -> sIV
173  *      sCW -> sIV
174  *      sLA -> sIV
175  *      sTW -> sIV
176  *      sCL -> sIV
177  */
178 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
179 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
180 /*
181  *      sNO -> sIV      Too late and no reason to do anything...
182  *      sSS -> sIV      Client migth not send FIN in this state:
183  *                      we enforce waiting for a SYN/ACK reply first.
184  *      sS2 -> sIV
185  *      sSR -> sFW      Close started.
186  *      sES -> sFW
187  *      sFW -> sLA      FIN seen in both directions, waiting for
188  *                      the last ACK.
189  *                      Migth be a retransmitted FIN as well...
190  *      sCW -> sLA
191  *      sLA -> sLA      Retransmitted FIN. Remain in the same state.
192  *      sTW -> sTW
193  *      sCL -> sCL
194  */
195 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
196 /*ack*/    { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
197 /*
198  *      sNO -> sES      Assumed.
199  *      sSS -> sIV      ACK is invalid: we haven't seen a SYN/ACK yet.
200  *      sS2 -> sIV
201  *      sSR -> sES      Established state is reached.
202  *      sES -> sES      :-)
203  *      sFW -> sCW      Normal close request answered by ACK.
204  *      sCW -> sCW
205  *      sLA -> sTW      Last ACK detected (RFC5961 challenged)
206  *      sTW -> sTW      Retransmitted last ACK. Remain in the same state.
207  *      sCL -> sCL
208  */
209 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
210 /*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
211 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
212         },
213         {
214 /* REPLY */
215 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
216 /*syn*/    { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sSS, sIV, sS2 },
217 /*
218  *      sNO -> sIV      Never reached.
219  *      sSS -> sS2      Simultaneous open
220  *      sS2 -> sS2      Retransmitted simultaneous SYN
221  *      sSR -> sIV      Invalid SYN packets sent by the server
222  *      sES -> sIV
223  *      sFW -> sIV
224  *      sCW -> sIV
225  *      sLA -> sIV
226  *      sTW -> sSS      Reopened connection, but server may have switched role
227  *      sCL -> sIV
228  */
229 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
230 /*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
231 /*
232  *      sSS -> sSR      Standard open.
233  *      sS2 -> sSR      Simultaneous open
234  *      sSR -> sIG      Retransmitted SYN/ACK, ignore it.
235  *      sES -> sIG      Late retransmitted SYN/ACK?
236  *      sFW -> sIG      Might be SYN/ACK answering ignored SYN
237  *      sCW -> sIG
238  *      sLA -> sIG
239  *      sTW -> sIG
240  *      sCL -> sIG
241  */
242 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
243 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
244 /*
245  *      sSS -> sIV      Server might not send FIN in this state.
246  *      sS2 -> sIV
247  *      sSR -> sFW      Close started.
248  *      sES -> sFW
249  *      sFW -> sLA      FIN seen in both directions.
250  *      sCW -> sLA
251  *      sLA -> sLA      Retransmitted FIN.
252  *      sTW -> sTW
253  *      sCL -> sCL
254  */
255 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
256 /*ack*/    { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
257 /*
258  *      sSS -> sIG      Might be a half-open connection.
259  *      sS2 -> sIG
260  *      sSR -> sSR      Might answer late resent SYN.
261  *      sES -> sES      :-)
262  *      sFW -> sCW      Normal close request answered by ACK.
263  *      sCW -> sCW
264  *      sLA -> sTW      Last ACK detected (RFC5961 challenged)
265  *      sTW -> sTW      Retransmitted last ACK.
266  *      sCL -> sCL
267  */
268 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
269 /*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
270 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
271         }
272 };
273
274 static inline struct nf_tcp_net *tcp_pernet(struct net *net)
275 {
276         return &net->ct.nf_ct_proto.tcp;
277 }
278
279 static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
280                              struct net *net, struct nf_conntrack_tuple *tuple)
281 {
282         const struct tcphdr *hp;
283         struct tcphdr _hdr;
284
285         /* Actually only need first 8 bytes. */
286         hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
287         if (hp == NULL)
288                 return false;
289
290         tuple->src.u.tcp.port = hp->source;
291         tuple->dst.u.tcp.port = hp->dest;
292
293         return true;
294 }
295
296 static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
297                              const struct nf_conntrack_tuple *orig)
298 {
299         tuple->src.u.tcp.port = orig->dst.u.tcp.port;
300         tuple->dst.u.tcp.port = orig->src.u.tcp.port;
301         return true;
302 }
303
304 /* Print out the per-protocol part of the tuple. */
305 static void tcp_print_tuple(struct seq_file *s,
306                             const struct nf_conntrack_tuple *tuple)
307 {
308         seq_printf(s, "sport=%hu dport=%hu ",
309                    ntohs(tuple->src.u.tcp.port),
310                    ntohs(tuple->dst.u.tcp.port));
311 }
312
313 /* Print out the private part of the conntrack. */
314 static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
315 {
316         enum tcp_conntrack state;
317
318         spin_lock_bh(&ct->lock);
319         state = ct->proto.tcp.state;
320         spin_unlock_bh(&ct->lock);
321
322         seq_printf(s, "%s ", tcp_conntrack_names[state]);
323 }
324
325 static unsigned int get_conntrack_index(const struct tcphdr *tcph)
326 {
327         if (tcph->rst) return TCP_RST_SET;
328         else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
329         else if (tcph->fin) return TCP_FIN_SET;
330         else if (tcph->ack) return TCP_ACK_SET;
331         else return TCP_NONE_SET;
332 }
333
334 /* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
335    in IP Filter' by Guido van Rooij.
336
337    http://www.sane.nl/events/sane2000/papers.html
338    http://www.darkart.com/mirrors/www.obfuscation.org/ipf/
339
340    The boundaries and the conditions are changed according to RFC793:
341    the packet must intersect the window (i.e. segments may be
342    after the right or before the left edge) and thus receivers may ACK
343    segments after the right edge of the window.
344
345         td_maxend = max(sack + max(win,1)) seen in reply packets
346         td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
347         td_maxwin += seq + len - sender.td_maxend
348                         if seq + len > sender.td_maxend
349         td_end    = max(seq + len) seen in sent packets
350
351    I.   Upper bound for valid data:     seq <= sender.td_maxend
352    II.  Lower bound for valid data:     seq + len >= sender.td_end - receiver.td_maxwin
353    III. Upper bound for valid (s)ack:   sack <= receiver.td_end
354    IV.  Lower bound for valid (s)ack:   sack >= receiver.td_end - MAXACKWINDOW
355
356    where sack is the highest right edge of sack block found in the packet
357    or ack in the case of packet without SACK option.
358
359    The upper bound limit for a valid (s)ack is not ignored -
360    we doesn't have to deal with fragments.
361 */
362
363 static inline __u32 segment_seq_plus_len(__u32 seq,
364                                          size_t len,
365                                          unsigned int dataoff,
366                                          const struct tcphdr *tcph)
367 {
368         /* XXX Should I use payload length field in IP/IPv6 header ?
369          * - YK */
370         return (seq + len - dataoff - tcph->doff*4
371                 + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
372 }
373
374 /* Fixme: what about big packets? */
375 #define MAXACKWINCONST                  66000
376 #define MAXACKWINDOW(sender)                                            \
377         ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin     \
378                                               : MAXACKWINCONST)
379
380 /*
381  * Simplified tcp_parse_options routine from tcp_input.c
382  */
383 static void tcp_options(const struct sk_buff *skb,
384                         unsigned int dataoff,
385                         const struct tcphdr *tcph,
386                         struct ip_ct_tcp_state *state)
387 {
388         unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
389         const unsigned char *ptr;
390         int length = (tcph->doff*4) - sizeof(struct tcphdr);
391
392         if (!length)
393                 return;
394
395         ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
396                                  length, buff);
397         BUG_ON(ptr == NULL);
398
399         state->td_scale =
400         state->flags = 0;
401
402         while (length > 0) {
403                 int opcode=*ptr++;
404                 int opsize;
405
406                 switch (opcode) {
407                 case TCPOPT_EOL:
408                         return;
409                 case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
410                         length--;
411                         continue;
412                 default:
413                         if (length < 2)
414                                 return;
415                         opsize=*ptr++;
416                         if (opsize < 2) /* "silly options" */
417                                 return;
418                         if (opsize > length)
419                                 return; /* don't parse partial options */
420
421                         if (opcode == TCPOPT_SACK_PERM
422                             && opsize == TCPOLEN_SACK_PERM)
423                                 state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
424                         else if (opcode == TCPOPT_WINDOW
425                                  && opsize == TCPOLEN_WINDOW) {
426                                 state->td_scale = *(u_int8_t *)ptr;
427
428                                 if (state->td_scale > 14) {
429                                         /* See RFC1323 */
430                                         state->td_scale = 14;
431                                 }
432                                 state->flags |=
433                                         IP_CT_TCP_FLAG_WINDOW_SCALE;
434                         }
435                         ptr += opsize - 2;
436                         length -= opsize;
437                 }
438         }
439 }
440
441 static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
442                      const struct tcphdr *tcph, __u32 *sack)
443 {
444         unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
445         const unsigned char *ptr;
446         int length = (tcph->doff*4) - sizeof(struct tcphdr);
447         __u32 tmp;
448
449         if (!length)
450                 return;
451
452         ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
453                                  length, buff);
454         BUG_ON(ptr == NULL);
455
456         /* Fast path for timestamp-only option */
457         if (length == TCPOLEN_TSTAMP_ALIGNED
458             && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
459                                        | (TCPOPT_NOP << 16)
460                                        | (TCPOPT_TIMESTAMP << 8)
461                                        | TCPOLEN_TIMESTAMP))
462                 return;
463
464         while (length > 0) {
465                 int opcode = *ptr++;
466                 int opsize, i;
467
468                 switch (opcode) {
469                 case TCPOPT_EOL:
470                         return;
471                 case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
472                         length--;
473                         continue;
474                 default:
475                         if (length < 2)
476                                 return;
477                         opsize = *ptr++;
478                         if (opsize < 2) /* "silly options" */
479                                 return;
480                         if (opsize > length)
481                                 return; /* don't parse partial options */
482
483                         if (opcode == TCPOPT_SACK
484                             && opsize >= (TCPOLEN_SACK_BASE
485                                           + TCPOLEN_SACK_PERBLOCK)
486                             && !((opsize - TCPOLEN_SACK_BASE)
487                                  % TCPOLEN_SACK_PERBLOCK)) {
488                                 for (i = 0;
489                                      i < (opsize - TCPOLEN_SACK_BASE);
490                                      i += TCPOLEN_SACK_PERBLOCK) {
491                                         tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
492
493                                         if (after(tmp, *sack))
494                                                 *sack = tmp;
495                                 }
496                                 return;
497                         }
498                         ptr += opsize - 2;
499                         length -= opsize;
500                 }
501         }
502 }
503
504 static bool tcp_in_window(const struct nf_conn *ct,
505                           struct ip_ct_tcp *state,
506                           enum ip_conntrack_dir dir,
507                           unsigned int index,
508                           const struct sk_buff *skb,
509                           unsigned int dataoff,
510                           const struct tcphdr *tcph,
511                           u_int8_t pf)
512 {
513         struct net *net = nf_ct_net(ct);
514         struct nf_tcp_net *tn = tcp_pernet(net);
515         struct ip_ct_tcp_state *sender = &state->seen[dir];
516         struct ip_ct_tcp_state *receiver = &state->seen[!dir];
517         const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
518         __u32 seq, ack, sack, end, win, swin;
519         s32 receiver_offset;
520         bool res, in_recv_win;
521
522         /*
523          * Get the required data from the packet.
524          */
525         seq = ntohl(tcph->seq);
526         ack = sack = ntohl(tcph->ack_seq);
527         win = ntohs(tcph->window);
528         end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
529
530         if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
531                 tcp_sack(skb, dataoff, tcph, &sack);
532
533         /* Take into account NAT sequence number mangling */
534         receiver_offset = nf_ct_seq_offset(ct, !dir, ack - 1);
535         ack -= receiver_offset;
536         sack -= receiver_offset;
537
538         pr_debug("tcp_in_window: START\n");
539         pr_debug("tcp_in_window: ");
540         nf_ct_dump_tuple(tuple);
541         pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
542                  seq, ack, receiver_offset, sack, receiver_offset, win, end);
543         pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
544                  "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
545                  sender->td_end, sender->td_maxend, sender->td_maxwin,
546                  sender->td_scale,
547                  receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
548                  receiver->td_scale);
549
550         if (sender->td_maxwin == 0) {
551                 /*
552                  * Initialize sender data.
553                  */
554                 if (tcph->syn) {
555                         /*
556                          * SYN-ACK in reply to a SYN
557                          * or SYN from reply direction in simultaneous open.
558                          */
559                         sender->td_end =
560                         sender->td_maxend = end;
561                         sender->td_maxwin = (win == 0 ? 1 : win);
562
563                         tcp_options(skb, dataoff, tcph, sender);
564                         /*
565                          * RFC 1323:
566                          * Both sides must send the Window Scale option
567                          * to enable window scaling in either direction.
568                          */
569                         if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
570                               && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
571                                 sender->td_scale =
572                                 receiver->td_scale = 0;
573                         if (!tcph->ack)
574                                 /* Simultaneous open */
575                                 return true;
576                 } else {
577                         /*
578                          * We are in the middle of a connection,
579                          * its history is lost for us.
580                          * Let's try to use the data from the packet.
581                          */
582                         sender->td_end = end;
583                         swin = win << sender->td_scale;
584                         sender->td_maxwin = (swin == 0 ? 1 : swin);
585                         sender->td_maxend = end + sender->td_maxwin;
586                         /*
587                          * We haven't seen traffic in the other direction yet
588                          * but we have to tweak window tracking to pass III
589                          * and IV until that happens.
590                          */
591                         if (receiver->td_maxwin == 0)
592                                 receiver->td_end = receiver->td_maxend = sack;
593                 }
594         } else if (((state->state == TCP_CONNTRACK_SYN_SENT
595                      && dir == IP_CT_DIR_ORIGINAL)
596                    || (state->state == TCP_CONNTRACK_SYN_RECV
597                      && dir == IP_CT_DIR_REPLY))
598                    && after(end, sender->td_end)) {
599                 /*
600                  * RFC 793: "if a TCP is reinitialized ... then it need
601                  * not wait at all; it must only be sure to use sequence
602                  * numbers larger than those recently used."
603                  */
604                 sender->td_end =
605                 sender->td_maxend = end;
606                 sender->td_maxwin = (win == 0 ? 1 : win);
607
608                 tcp_options(skb, dataoff, tcph, sender);
609         }
610
611         if (!(tcph->ack)) {
612                 /*
613                  * If there is no ACK, just pretend it was set and OK.
614                  */
615                 ack = sack = receiver->td_end;
616         } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
617                     (TCP_FLAG_ACK|TCP_FLAG_RST))
618                    && (ack == 0)) {
619                 /*
620                  * Broken TCP stacks, that set ACK in RST packets as well
621                  * with zero ack value.
622                  */
623                 ack = sack = receiver->td_end;
624         }
625
626         if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)
627                 /*
628                  * RST sent answering SYN.
629                  */
630                 seq = end = sender->td_end;
631
632         pr_debug("tcp_in_window: ");
633         nf_ct_dump_tuple(tuple);
634         pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
635                  seq, ack, receiver_offset, sack, receiver_offset, win, end);
636         pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
637                  "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
638                  sender->td_end, sender->td_maxend, sender->td_maxwin,
639                  sender->td_scale,
640                  receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
641                  receiver->td_scale);
642
643         /* Is the ending sequence in the receive window (if available)? */
644         in_recv_win = !receiver->td_maxwin ||
645                       after(end, sender->td_end - receiver->td_maxwin - 1);
646
647         pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
648                  before(seq, sender->td_maxend + 1),
649                  (in_recv_win ? 1 : 0),
650                  before(sack, receiver->td_end + 1),
651                  after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
652
653         if (before(seq, sender->td_maxend + 1) &&
654             in_recv_win &&
655             before(sack, receiver->td_end + 1) &&
656             after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
657                 /*
658                  * Take into account window scaling (RFC 1323).
659                  */
660                 if (!tcph->syn)
661                         win <<= sender->td_scale;
662
663                 /*
664                  * Update sender data.
665                  */
666                 swin = win + (sack - ack);
667                 if (sender->td_maxwin < swin)
668                         sender->td_maxwin = swin;
669                 if (after(end, sender->td_end)) {
670                         sender->td_end = end;
671                         sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
672                 }
673                 if (tcph->ack) {
674                         if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
675                                 sender->td_maxack = ack;
676                                 sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
677                         } else if (after(ack, sender->td_maxack))
678                                 sender->td_maxack = ack;
679                 }
680
681                 /*
682                  * Update receiver data.
683                  */
684                 if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
685                         receiver->td_maxwin += end - sender->td_maxend;
686                 if (after(sack + win, receiver->td_maxend - 1)) {
687                         receiver->td_maxend = sack + win;
688                         if (win == 0)
689                                 receiver->td_maxend++;
690                 }
691                 if (ack == receiver->td_end)
692                         receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
693
694                 /*
695                  * Check retransmissions.
696                  */
697                 if (index == TCP_ACK_SET) {
698                         if (state->last_dir == dir
699                             && state->last_seq == seq
700                             && state->last_ack == ack
701                             && state->last_end == end
702                             && state->last_win == win)
703                                 state->retrans++;
704                         else {
705                                 state->last_dir = dir;
706                                 state->last_seq = seq;
707                                 state->last_ack = ack;
708                                 state->last_end = end;
709                                 state->last_win = win;
710                                 state->retrans = 0;
711                         }
712                 }
713                 res = true;
714         } else {
715                 res = false;
716                 if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
717                     tn->tcp_be_liberal)
718                         res = true;
719                 if (!res && LOG_INVALID(net, IPPROTO_TCP))
720                         nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
721                         "nf_ct_tcp: %s ",
722                         before(seq, sender->td_maxend + 1) ?
723                         in_recv_win ?
724                         before(sack, receiver->td_end + 1) ?
725                         after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
726                         : "ACK is under the lower bound (possible overly delayed ACK)"
727                         : "ACK is over the upper bound (ACKed data not seen yet)"
728                         : "SEQ is under the lower bound (already ACKed data retransmitted)"
729                         : "SEQ is over the upper bound (over the window of the receiver)");
730         }
731
732         pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
733                  "receiver end=%u maxend=%u maxwin=%u\n",
734                  res, sender->td_end, sender->td_maxend, sender->td_maxwin,
735                  receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
736
737         return res;
738 }
739
740 /* table of valid flag combinations - PUSH, ECE and CWR are always valid */
741 static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
742                                  TCPHDR_URG) + 1] =
743 {
744         [TCPHDR_SYN]                            = 1,
745         [TCPHDR_SYN|TCPHDR_URG]                 = 1,
746         [TCPHDR_SYN|TCPHDR_ACK]                 = 1,
747         [TCPHDR_RST]                            = 1,
748         [TCPHDR_RST|TCPHDR_ACK]                 = 1,
749         [TCPHDR_FIN|TCPHDR_ACK]                 = 1,
750         [TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG]      = 1,
751         [TCPHDR_ACK]                            = 1,
752         [TCPHDR_ACK|TCPHDR_URG]                 = 1,
753 };
754
755 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
756 static int tcp_error(struct net *net, struct nf_conn *tmpl,
757                      struct sk_buff *skb,
758                      unsigned int dataoff,
759                      enum ip_conntrack_info *ctinfo,
760                      u_int8_t pf,
761                      unsigned int hooknum)
762 {
763         const struct tcphdr *th;
764         struct tcphdr _tcph;
765         unsigned int tcplen = skb->len - dataoff;
766         u_int8_t tcpflags;
767
768         /* Smaller that minimal TCP header? */
769         th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
770         if (th == NULL) {
771                 if (LOG_INVALID(net, IPPROTO_TCP))
772                         nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
773                                 "nf_ct_tcp: short packet ");
774                 return -NF_ACCEPT;
775         }
776
777         /* Not whole TCP header or malformed packet */
778         if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
779                 if (LOG_INVALID(net, IPPROTO_TCP))
780                         nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
781                                 "nf_ct_tcp: truncated/malformed packet ");
782                 return -NF_ACCEPT;
783         }
784
785         /* Checksum invalid? Ignore.
786          * We skip checking packets on the outgoing path
787          * because the checksum is assumed to be correct.
788          */
789         /* FIXME: Source route IP option packets --RR */
790         if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
791             nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
792                 if (LOG_INVALID(net, IPPROTO_TCP))
793                         nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
794                                   "nf_ct_tcp: bad TCP checksum ");
795                 return -NF_ACCEPT;
796         }
797
798         /* Check TCP flags. */
799         tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
800         if (!tcp_valid_flags[tcpflags]) {
801                 if (LOG_INVALID(net, IPPROTO_TCP))
802                         nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
803                                   "nf_ct_tcp: invalid TCP flag combination ");
804                 return -NF_ACCEPT;
805         }
806
807         return NF_ACCEPT;
808 }
809
810 static unsigned int *tcp_get_timeouts(struct net *net)
811 {
812         return tcp_pernet(net)->timeouts;
813 }
814
815 /* Returns verdict for packet, or -1 for invalid. */
816 static int tcp_packet(struct nf_conn *ct,
817                       const struct sk_buff *skb,
818                       unsigned int dataoff,
819                       enum ip_conntrack_info ctinfo,
820                       u_int8_t pf,
821                       unsigned int hooknum,
822                       unsigned int *timeouts)
823 {
824         struct net *net = nf_ct_net(ct);
825         struct nf_tcp_net *tn = tcp_pernet(net);
826         struct nf_conntrack_tuple *tuple;
827         enum tcp_conntrack new_state, old_state;
828         enum ip_conntrack_dir dir;
829         const struct tcphdr *th;
830         struct tcphdr _tcph;
831         unsigned long timeout;
832         unsigned int index;
833
834         th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
835         BUG_ON(th == NULL);
836
837         spin_lock_bh(&ct->lock);
838         old_state = ct->proto.tcp.state;
839         dir = CTINFO2DIR(ctinfo);
840         index = get_conntrack_index(th);
841         new_state = tcp_conntracks[dir][index][old_state];
842         tuple = &ct->tuplehash[dir].tuple;
843
844         switch (new_state) {
845         case TCP_CONNTRACK_SYN_SENT:
846                 if (old_state < TCP_CONNTRACK_TIME_WAIT)
847                         break;
848                 /* RFC 1122: "When a connection is closed actively,
849                  * it MUST linger in TIME-WAIT state for a time 2xMSL
850                  * (Maximum Segment Lifetime). However, it MAY accept
851                  * a new SYN from the remote TCP to reopen the connection
852                  * directly from TIME-WAIT state, if..."
853                  * We ignore the conditions because we are in the
854                  * TIME-WAIT state anyway.
855                  *
856                  * Handle aborted connections: we and the server
857                  * think there is an existing connection but the client
858                  * aborts it and starts a new one.
859                  */
860                 if (((ct->proto.tcp.seen[dir].flags
861                       | ct->proto.tcp.seen[!dir].flags)
862                      & IP_CT_TCP_FLAG_CLOSE_INIT)
863                     || (ct->proto.tcp.last_dir == dir
864                         && ct->proto.tcp.last_index == TCP_RST_SET)) {
865                         /* Attempt to reopen a closed/aborted connection.
866                          * Delete this connection and look up again. */
867                         spin_unlock_bh(&ct->lock);
868
869                         /* Only repeat if we can actually remove the timer.
870                          * Destruction may already be in progress in process
871                          * context and we must give it a chance to terminate.
872                          */
873                         if (nf_ct_kill(ct))
874                                 return -NF_REPEAT;
875                         return NF_DROP;
876                 }
877                 /* Fall through */
878         case TCP_CONNTRACK_IGNORE:
879                 /* Ignored packets:
880                  *
881                  * Our connection entry may be out of sync, so ignore
882                  * packets which may signal the real connection between
883                  * the client and the server.
884                  *
885                  * a) SYN in ORIGINAL
886                  * b) SYN/ACK in REPLY
887                  * c) ACK in reply direction after initial SYN in original.
888                  *
889                  * If the ignored packet is invalid, the receiver will send
890                  * a RST we'll catch below.
891                  */
892                 if (index == TCP_SYNACK_SET
893                     && ct->proto.tcp.last_index == TCP_SYN_SET
894                     && ct->proto.tcp.last_dir != dir
895                     && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
896                         /* b) This SYN/ACK acknowledges a SYN that we earlier
897                          * ignored as invalid. This means that the client and
898                          * the server are both in sync, while the firewall is
899                          * not. We get in sync from the previously annotated
900                          * values.
901                          */
902                         old_state = TCP_CONNTRACK_SYN_SENT;
903                         new_state = TCP_CONNTRACK_SYN_RECV;
904                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end =
905                                 ct->proto.tcp.last_end;
906                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend =
907                                 ct->proto.tcp.last_end;
908                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin =
909                                 ct->proto.tcp.last_win == 0 ?
910                                         1 : ct->proto.tcp.last_win;
911                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
912                                 ct->proto.tcp.last_wscale;
913                         ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
914                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
915                                 ct->proto.tcp.last_flags;
916                         memset(&ct->proto.tcp.seen[dir], 0,
917                                sizeof(struct ip_ct_tcp_state));
918                         break;
919                 }
920                 ct->proto.tcp.last_index = index;
921                 ct->proto.tcp.last_dir = dir;
922                 ct->proto.tcp.last_seq = ntohl(th->seq);
923                 ct->proto.tcp.last_end =
924                     segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
925                 ct->proto.tcp.last_win = ntohs(th->window);
926
927                 /* a) This is a SYN in ORIGINAL. The client and the server
928                  * may be in sync but we are not. In that case, we annotate
929                  * the TCP options and let the packet go through. If it is a
930                  * valid SYN packet, the server will reply with a SYN/ACK, and
931                  * then we'll get in sync. Otherwise, the server potentially
932                  * responds with a challenge ACK if implementing RFC5961.
933                  */
934                 if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
935                         struct ip_ct_tcp_state seen = {};
936
937                         ct->proto.tcp.last_flags =
938                         ct->proto.tcp.last_wscale = 0;
939                         tcp_options(skb, dataoff, th, &seen);
940                         if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
941                                 ct->proto.tcp.last_flags |=
942                                         IP_CT_TCP_FLAG_WINDOW_SCALE;
943                                 ct->proto.tcp.last_wscale = seen.td_scale;
944                         }
945                         if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) {
946                                 ct->proto.tcp.last_flags |=
947                                         IP_CT_TCP_FLAG_SACK_PERM;
948                         }
949                         /* Mark the potential for RFC5961 challenge ACK,
950                          * this pose a special problem for LAST_ACK state
951                          * as ACK is intrepretated as ACKing last FIN.
952                          */
953                         if (old_state == TCP_CONNTRACK_LAST_ACK)
954                                 ct->proto.tcp.last_flags |=
955                                         IP_CT_EXP_CHALLENGE_ACK;
956                 }
957                 spin_unlock_bh(&ct->lock);
958                 if (LOG_INVALID(net, IPPROTO_TCP))
959                         nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
960                                   "nf_ct_tcp: invalid packet ignored in "
961                                   "state %s ", tcp_conntrack_names[old_state]);
962                 return NF_ACCEPT;
963         case TCP_CONNTRACK_MAX:
964                 /* Special case for SYN proxy: when the SYN to the server or
965                  * the SYN/ACK from the server is lost, the client may transmit
966                  * a keep-alive packet while in SYN_SENT state. This needs to
967                  * be associated with the original conntrack entry in order to
968                  * generate a new SYN with the correct sequence number.
969                  */
970                 if (nfct_synproxy(ct) && old_state == TCP_CONNTRACK_SYN_SENT &&
971                     index == TCP_ACK_SET && dir == IP_CT_DIR_ORIGINAL &&
972                     ct->proto.tcp.last_dir == IP_CT_DIR_ORIGINAL &&
973                     ct->proto.tcp.seen[dir].td_end - 1 == ntohl(th->seq)) {
974                         pr_debug("nf_ct_tcp: SYN proxy client keep alive\n");
975                         spin_unlock_bh(&ct->lock);
976                         return NF_ACCEPT;
977                 }
978
979                 /* Invalid packet */
980                 pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
981                          dir, get_conntrack_index(th), old_state);
982                 spin_unlock_bh(&ct->lock);
983                 if (LOG_INVALID(net, IPPROTO_TCP))
984                         nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
985                                   "nf_ct_tcp: invalid state ");
986                 return -NF_ACCEPT;
987         case TCP_CONNTRACK_TIME_WAIT:
988                 /* RFC5961 compliance cause stack to send "challenge-ACK"
989                  * e.g. in response to spurious SYNs.  Conntrack MUST
990                  * not believe this ACK is acking last FIN.
991                  */
992                 if (old_state == TCP_CONNTRACK_LAST_ACK &&
993                     index == TCP_ACK_SET &&
994                     ct->proto.tcp.last_dir != dir &&
995                     ct->proto.tcp.last_index == TCP_SYN_SET &&
996                     (ct->proto.tcp.last_flags & IP_CT_EXP_CHALLENGE_ACK)) {
997                         /* Detected RFC5961 challenge ACK */
998                         ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
999                         spin_unlock_bh(&ct->lock);
1000                         if (LOG_INVALID(net, IPPROTO_TCP))
1001                                 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
1002                                       "nf_ct_tcp: challenge-ACK ignored ");
1003                         return NF_ACCEPT; /* Don't change state */
1004                 }
1005                 break;
1006         case TCP_CONNTRACK_CLOSE:
1007                 if (index == TCP_RST_SET
1008                     && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
1009                     && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
1010                         /* Invalid RST  */
1011                         spin_unlock_bh(&ct->lock);
1012                         if (LOG_INVALID(net, IPPROTO_TCP))
1013                                 nf_log_packet(net, pf, 0, skb, NULL, NULL,
1014                                               NULL, "nf_ct_tcp: invalid RST ");
1015                         return -NF_ACCEPT;
1016                 }
1017                 if (index == TCP_RST_SET
1018                     && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
1019                          && ct->proto.tcp.last_index == TCP_SYN_SET)
1020                         || (!test_bit(IPS_ASSURED_BIT, &ct->status)
1021                             && ct->proto.tcp.last_index == TCP_ACK_SET))
1022                     && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
1023                         /* RST sent to invalid SYN or ACK we had let through
1024                          * at a) and c) above:
1025                          *
1026                          * a) SYN was in window then
1027                          * c) we hold a half-open connection.
1028                          *
1029                          * Delete our connection entry.
1030                          * We skip window checking, because packet might ACK
1031                          * segments we ignored. */
1032                         goto in_window;
1033                 }
1034                 /* Just fall through */
1035         default:
1036                 /* Keep compilers happy. */
1037                 break;
1038         }
1039
1040         if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
1041                            skb, dataoff, th, pf)) {
1042                 spin_unlock_bh(&ct->lock);
1043                 return -NF_ACCEPT;
1044         }
1045      in_window:
1046         /* From now on we have got in-window packets */
1047         ct->proto.tcp.last_index = index;
1048         ct->proto.tcp.last_dir = dir;
1049
1050         pr_debug("tcp_conntracks: ");
1051         nf_ct_dump_tuple(tuple);
1052         pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
1053                  (th->syn ? 1 : 0), (th->ack ? 1 : 0),
1054                  (th->fin ? 1 : 0), (th->rst ? 1 : 0),
1055                  old_state, new_state);
1056
1057         ct->proto.tcp.state = new_state;
1058         if (old_state != new_state
1059             && new_state == TCP_CONNTRACK_FIN_WAIT)
1060                 ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
1061
1062         if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
1063             timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1064                 timeout = timeouts[TCP_CONNTRACK_RETRANS];
1065         else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
1066                  IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
1067                  timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
1068                 timeout = timeouts[TCP_CONNTRACK_UNACK];
1069         else
1070                 timeout = timeouts[new_state];
1071         spin_unlock_bh(&ct->lock);
1072
1073         if (new_state != old_state)
1074                 nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
1075
1076         if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1077                 /* If only reply is a RST, we can consider ourselves not to
1078                    have an established connection: this is a fairly common
1079                    problem case, so we can delete the conntrack
1080                    immediately.  --RR */
1081                 if (th->rst) {
1082                         nf_ct_kill_acct(ct, ctinfo, skb);
1083                         return NF_ACCEPT;
1084                 }
1085                 /* ESTABLISHED without SEEN_REPLY, i.e. mid-connection
1086                  * pickup with loose=1. Avoid large ESTABLISHED timeout.
1087                  */
1088                 if (new_state == TCP_CONNTRACK_ESTABLISHED &&
1089                     timeout > timeouts[TCP_CONNTRACK_UNACK])
1090                         timeout = timeouts[TCP_CONNTRACK_UNACK];
1091         } else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
1092                    && (old_state == TCP_CONNTRACK_SYN_RECV
1093                        || old_state == TCP_CONNTRACK_ESTABLISHED)
1094                    && new_state == TCP_CONNTRACK_ESTABLISHED) {
1095                 /* Set ASSURED if we see see valid ack in ESTABLISHED
1096                    after SYN_RECV or a valid answer for a picked up
1097                    connection. */
1098                 set_bit(IPS_ASSURED_BIT, &ct->status);
1099                 nf_conntrack_event_cache(IPCT_ASSURED, ct);
1100         }
1101         nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
1102
1103         return NF_ACCEPT;
1104 }
1105
1106 /* Called when a new connection for this protocol found. */
1107 static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1108                     unsigned int dataoff, unsigned int *timeouts)
1109 {
1110         enum tcp_conntrack new_state;
1111         const struct tcphdr *th;
1112         struct tcphdr _tcph;
1113         struct net *net = nf_ct_net(ct);
1114         struct nf_tcp_net *tn = tcp_pernet(net);
1115         const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
1116         const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
1117
1118         th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
1119         BUG_ON(th == NULL);
1120
1121         /* Don't need lock here: this conntrack not in circulation yet */
1122         new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
1123
1124         /* Invalid: delete conntrack */
1125         if (new_state >= TCP_CONNTRACK_MAX) {
1126                 pr_debug("nf_ct_tcp: invalid new deleting.\n");
1127                 return false;
1128         }
1129
1130         if (new_state == TCP_CONNTRACK_SYN_SENT) {
1131                 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1132                 /* SYN packet */
1133                 ct->proto.tcp.seen[0].td_end =
1134                         segment_seq_plus_len(ntohl(th->seq), skb->len,
1135                                              dataoff, th);
1136                 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1137                 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1138                         ct->proto.tcp.seen[0].td_maxwin = 1;
1139                 ct->proto.tcp.seen[0].td_maxend =
1140                         ct->proto.tcp.seen[0].td_end;
1141
1142                 tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
1143         } else if (tn->tcp_loose == 0) {
1144                 /* Don't try to pick up connections. */
1145                 return false;
1146         } else {
1147                 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1148                 /*
1149                  * We are in the middle of a connection,
1150                  * its history is lost for us.
1151                  * Let's try to use the data from the packet.
1152                  */
1153                 ct->proto.tcp.seen[0].td_end =
1154                         segment_seq_plus_len(ntohl(th->seq), skb->len,
1155                                              dataoff, th);
1156                 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1157                 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1158                         ct->proto.tcp.seen[0].td_maxwin = 1;
1159                 ct->proto.tcp.seen[0].td_maxend =
1160                         ct->proto.tcp.seen[0].td_end +
1161                         ct->proto.tcp.seen[0].td_maxwin;
1162
1163                 /* We assume SACK and liberal window checking to handle
1164                  * window scaling */
1165                 ct->proto.tcp.seen[0].flags =
1166                 ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
1167                                               IP_CT_TCP_FLAG_BE_LIBERAL;
1168         }
1169
1170         /* tcp_packet will set them */
1171         ct->proto.tcp.last_index = TCP_NONE_SET;
1172
1173         pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
1174                  "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
1175                  sender->td_end, sender->td_maxend, sender->td_maxwin,
1176                  sender->td_scale,
1177                  receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
1178                  receiver->td_scale);
1179         return true;
1180 }
1181
1182 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1183
1184 #include <linux/netfilter/nfnetlink.h>
1185 #include <linux/netfilter/nfnetlink_conntrack.h>
1186
1187 static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
1188                          struct nf_conn *ct)
1189 {
1190         struct nlattr *nest_parms;
1191         struct nf_ct_tcp_flags tmp = {};
1192
1193         spin_lock_bh(&ct->lock);
1194         nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
1195         if (!nest_parms)
1196                 goto nla_put_failure;
1197
1198         if (nla_put_u8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state) ||
1199             nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
1200                        ct->proto.tcp.seen[0].td_scale) ||
1201             nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
1202                        ct->proto.tcp.seen[1].td_scale))
1203                 goto nla_put_failure;
1204
1205         tmp.flags = ct->proto.tcp.seen[0].flags;
1206         if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
1207                     sizeof(struct nf_ct_tcp_flags), &tmp))
1208                 goto nla_put_failure;
1209
1210         tmp.flags = ct->proto.tcp.seen[1].flags;
1211         if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
1212                     sizeof(struct nf_ct_tcp_flags), &tmp))
1213                 goto nla_put_failure;
1214         spin_unlock_bh(&ct->lock);
1215
1216         nla_nest_end(skb, nest_parms);
1217
1218         return 0;
1219
1220 nla_put_failure:
1221         spin_unlock_bh(&ct->lock);
1222         return -1;
1223 }
1224
1225 static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
1226         [CTA_PROTOINFO_TCP_STATE]           = { .type = NLA_U8 },
1227         [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
1228         [CTA_PROTOINFO_TCP_WSCALE_REPLY]    = { .type = NLA_U8 },
1229         [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]  = { .len = sizeof(struct nf_ct_tcp_flags) },
1230         [CTA_PROTOINFO_TCP_FLAGS_REPLY]     = { .len =  sizeof(struct nf_ct_tcp_flags) },
1231 };
1232
1233 static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1234 {
1235         struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
1236         struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
1237         int err;
1238
1239         /* updates could not contain anything about the private
1240          * protocol info, in that case skip the parsing */
1241         if (!pattr)
1242                 return 0;
1243
1244         err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, tcp_nla_policy);
1245         if (err < 0)
1246                 return err;
1247
1248         if (tb[CTA_PROTOINFO_TCP_STATE] &&
1249             nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
1250                 return -EINVAL;
1251
1252         spin_lock_bh(&ct->lock);
1253         if (tb[CTA_PROTOINFO_TCP_STATE])
1254                 ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
1255
1256         if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
1257                 struct nf_ct_tcp_flags *attr =
1258                         nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
1259                 ct->proto.tcp.seen[0].flags &= ~attr->mask;
1260                 ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
1261         }
1262
1263         if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
1264                 struct nf_ct_tcp_flags *attr =
1265                         nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
1266                 ct->proto.tcp.seen[1].flags &= ~attr->mask;
1267                 ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
1268         }
1269
1270         if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
1271             tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
1272             ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1273             ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
1274                 ct->proto.tcp.seen[0].td_scale =
1275                         nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
1276                 ct->proto.tcp.seen[1].td_scale =
1277                         nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
1278         }
1279         spin_unlock_bh(&ct->lock);
1280
1281         return 0;
1282 }
1283
1284 static int tcp_nlattr_size(void)
1285 {
1286         return nla_total_size(0)           /* CTA_PROTOINFO_TCP */
1287                 + nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1);
1288 }
1289
1290 static int tcp_nlattr_tuple_size(void)
1291 {
1292         return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1293 }
1294 #endif
1295
1296 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1297
1298 #include <linux/netfilter/nfnetlink.h>
1299 #include <linux/netfilter/nfnetlink_cttimeout.h>
1300
1301 static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
1302                                      struct net *net, void *data)
1303 {
1304         unsigned int *timeouts = data;
1305         struct nf_tcp_net *tn = tcp_pernet(net);
1306         int i;
1307
1308         /* set default TCP timeouts. */
1309         for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++)
1310                 timeouts[i] = tn->timeouts[i];
1311
1312         if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) {
1313                 timeouts[TCP_CONNTRACK_SYN_SENT] =
1314                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ;
1315         }
1316         if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) {
1317                 timeouts[TCP_CONNTRACK_SYN_RECV] =
1318                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ;
1319         }
1320         if (tb[CTA_TIMEOUT_TCP_ESTABLISHED]) {
1321                 timeouts[TCP_CONNTRACK_ESTABLISHED] =
1322                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_ESTABLISHED]))*HZ;
1323         }
1324         if (tb[CTA_TIMEOUT_TCP_FIN_WAIT]) {
1325                 timeouts[TCP_CONNTRACK_FIN_WAIT] =
1326                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_FIN_WAIT]))*HZ;
1327         }
1328         if (tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]) {
1329                 timeouts[TCP_CONNTRACK_CLOSE_WAIT] =
1330                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]))*HZ;
1331         }
1332         if (tb[CTA_TIMEOUT_TCP_LAST_ACK]) {
1333                 timeouts[TCP_CONNTRACK_LAST_ACK] =
1334                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_LAST_ACK]))*HZ;
1335         }
1336         if (tb[CTA_TIMEOUT_TCP_TIME_WAIT]) {
1337                 timeouts[TCP_CONNTRACK_TIME_WAIT] =
1338                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_TIME_WAIT]))*HZ;
1339         }
1340         if (tb[CTA_TIMEOUT_TCP_CLOSE]) {
1341                 timeouts[TCP_CONNTRACK_CLOSE] =
1342                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE]))*HZ;
1343         }
1344         if (tb[CTA_TIMEOUT_TCP_SYN_SENT2]) {
1345                 timeouts[TCP_CONNTRACK_SYN_SENT2] =
1346                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT2]))*HZ;
1347         }
1348         if (tb[CTA_TIMEOUT_TCP_RETRANS]) {
1349                 timeouts[TCP_CONNTRACK_RETRANS] =
1350                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_RETRANS]))*HZ;
1351         }
1352         if (tb[CTA_TIMEOUT_TCP_UNACK]) {
1353                 timeouts[TCP_CONNTRACK_UNACK] =
1354                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_UNACK]))*HZ;
1355         }
1356         return 0;
1357 }
1358
1359 static int
1360 tcp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
1361 {
1362         const unsigned int *timeouts = data;
1363
1364         if (nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT,
1365                         htonl(timeouts[TCP_CONNTRACK_SYN_SENT] / HZ)) ||
1366             nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_RECV,
1367                          htonl(timeouts[TCP_CONNTRACK_SYN_RECV] / HZ)) ||
1368             nla_put_be32(skb, CTA_TIMEOUT_TCP_ESTABLISHED,
1369                          htonl(timeouts[TCP_CONNTRACK_ESTABLISHED] / HZ)) ||
1370             nla_put_be32(skb, CTA_TIMEOUT_TCP_FIN_WAIT,
1371                          htonl(timeouts[TCP_CONNTRACK_FIN_WAIT] / HZ)) ||
1372             nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE_WAIT,
1373                          htonl(timeouts[TCP_CONNTRACK_CLOSE_WAIT] / HZ)) ||
1374             nla_put_be32(skb, CTA_TIMEOUT_TCP_LAST_ACK,
1375                          htonl(timeouts[TCP_CONNTRACK_LAST_ACK] / HZ)) ||
1376             nla_put_be32(skb, CTA_TIMEOUT_TCP_TIME_WAIT,
1377                          htonl(timeouts[TCP_CONNTRACK_TIME_WAIT] / HZ)) ||
1378             nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE,
1379                          htonl(timeouts[TCP_CONNTRACK_CLOSE] / HZ)) ||
1380             nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT2,
1381                          htonl(timeouts[TCP_CONNTRACK_SYN_SENT2] / HZ)) ||
1382             nla_put_be32(skb, CTA_TIMEOUT_TCP_RETRANS,
1383                          htonl(timeouts[TCP_CONNTRACK_RETRANS] / HZ)) ||
1384             nla_put_be32(skb, CTA_TIMEOUT_TCP_UNACK,
1385                          htonl(timeouts[TCP_CONNTRACK_UNACK] / HZ)))
1386                 goto nla_put_failure;
1387         return 0;
1388
1389 nla_put_failure:
1390         return -ENOSPC;
1391 }
1392
1393 static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
1394         [CTA_TIMEOUT_TCP_SYN_SENT]      = { .type = NLA_U32 },
1395         [CTA_TIMEOUT_TCP_SYN_RECV]      = { .type = NLA_U32 },
1396         [CTA_TIMEOUT_TCP_ESTABLISHED]   = { .type = NLA_U32 },
1397         [CTA_TIMEOUT_TCP_FIN_WAIT]      = { .type = NLA_U32 },
1398         [CTA_TIMEOUT_TCP_CLOSE_WAIT]    = { .type = NLA_U32 },
1399         [CTA_TIMEOUT_TCP_LAST_ACK]      = { .type = NLA_U32 },
1400         [CTA_TIMEOUT_TCP_TIME_WAIT]     = { .type = NLA_U32 },
1401         [CTA_TIMEOUT_TCP_CLOSE]         = { .type = NLA_U32 },
1402         [CTA_TIMEOUT_TCP_SYN_SENT2]     = { .type = NLA_U32 },
1403         [CTA_TIMEOUT_TCP_RETRANS]       = { .type = NLA_U32 },
1404         [CTA_TIMEOUT_TCP_UNACK]         = { .type = NLA_U32 },
1405 };
1406 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1407
1408 #ifdef CONFIG_SYSCTL
1409 static struct ctl_table tcp_sysctl_table[] = {
1410         {
1411                 .procname       = "nf_conntrack_tcp_timeout_syn_sent",
1412                 .maxlen         = sizeof(unsigned int),
1413                 .mode           = 0644,
1414                 .proc_handler   = proc_dointvec_jiffies,
1415         },
1416         {
1417                 .procname       = "nf_conntrack_tcp_timeout_syn_recv",
1418                 .maxlen         = sizeof(unsigned int),
1419                 .mode           = 0644,
1420                 .proc_handler   = proc_dointvec_jiffies,
1421         },
1422         {
1423                 .procname       = "nf_conntrack_tcp_timeout_established",
1424                 .maxlen         = sizeof(unsigned int),
1425                 .mode           = 0644,
1426                 .proc_handler   = proc_dointvec_jiffies,
1427         },
1428         {
1429                 .procname       = "nf_conntrack_tcp_timeout_fin_wait",
1430                 .maxlen         = sizeof(unsigned int),
1431                 .mode           = 0644,
1432                 .proc_handler   = proc_dointvec_jiffies,
1433         },
1434         {
1435                 .procname       = "nf_conntrack_tcp_timeout_close_wait",
1436                 .maxlen         = sizeof(unsigned int),
1437                 .mode           = 0644,
1438                 .proc_handler   = proc_dointvec_jiffies,
1439         },
1440         {
1441                 .procname       = "nf_conntrack_tcp_timeout_last_ack",
1442                 .maxlen         = sizeof(unsigned int),
1443                 .mode           = 0644,
1444                 .proc_handler   = proc_dointvec_jiffies,
1445         },
1446         {
1447                 .procname       = "nf_conntrack_tcp_timeout_time_wait",
1448                 .maxlen         = sizeof(unsigned int),
1449                 .mode           = 0644,
1450                 .proc_handler   = proc_dointvec_jiffies,
1451         },
1452         {
1453                 .procname       = "nf_conntrack_tcp_timeout_close",
1454                 .maxlen         = sizeof(unsigned int),
1455                 .mode           = 0644,
1456                 .proc_handler   = proc_dointvec_jiffies,
1457         },
1458         {
1459                 .procname       = "nf_conntrack_tcp_timeout_max_retrans",
1460                 .maxlen         = sizeof(unsigned int),
1461                 .mode           = 0644,
1462                 .proc_handler   = proc_dointvec_jiffies,
1463         },
1464         {
1465                 .procname       = "nf_conntrack_tcp_timeout_unacknowledged",
1466                 .maxlen         = sizeof(unsigned int),
1467                 .mode           = 0644,
1468                 .proc_handler   = proc_dointvec_jiffies,
1469         },
1470         {
1471                 .procname       = "nf_conntrack_tcp_loose",
1472                 .maxlen         = sizeof(unsigned int),
1473                 .mode           = 0644,
1474                 .proc_handler   = proc_dointvec,
1475         },
1476         {
1477                 .procname       = "nf_conntrack_tcp_be_liberal",
1478                 .maxlen         = sizeof(unsigned int),
1479                 .mode           = 0644,
1480                 .proc_handler   = proc_dointvec,
1481         },
1482         {
1483                 .procname       = "nf_conntrack_tcp_max_retrans",
1484                 .maxlen         = sizeof(unsigned int),
1485                 .mode           = 0644,
1486                 .proc_handler   = proc_dointvec,
1487         },
1488         { }
1489 };
1490
1491 #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1492 static struct ctl_table tcp_compat_sysctl_table[] = {
1493         {
1494                 .procname       = "ip_conntrack_tcp_timeout_syn_sent",
1495                 .maxlen         = sizeof(unsigned int),
1496                 .mode           = 0644,
1497                 .proc_handler   = proc_dointvec_jiffies,
1498         },
1499         {
1500                 .procname       = "ip_conntrack_tcp_timeout_syn_sent2",
1501                 .maxlen         = sizeof(unsigned int),
1502                 .mode           = 0644,
1503                 .proc_handler   = proc_dointvec_jiffies,
1504         },
1505         {
1506                 .procname       = "ip_conntrack_tcp_timeout_syn_recv",
1507                 .maxlen         = sizeof(unsigned int),
1508                 .mode           = 0644,
1509                 .proc_handler   = proc_dointvec_jiffies,
1510         },
1511         {
1512                 .procname       = "ip_conntrack_tcp_timeout_established",
1513                 .maxlen         = sizeof(unsigned int),
1514                 .mode           = 0644,
1515                 .proc_handler   = proc_dointvec_jiffies,
1516         },
1517         {
1518                 .procname       = "ip_conntrack_tcp_timeout_fin_wait",
1519                 .maxlen         = sizeof(unsigned int),
1520                 .mode           = 0644,
1521                 .proc_handler   = proc_dointvec_jiffies,
1522         },
1523         {
1524                 .procname       = "ip_conntrack_tcp_timeout_close_wait",
1525                 .maxlen         = sizeof(unsigned int),
1526                 .mode           = 0644,
1527                 .proc_handler   = proc_dointvec_jiffies,
1528         },
1529         {
1530                 .procname       = "ip_conntrack_tcp_timeout_last_ack",
1531                 .maxlen         = sizeof(unsigned int),
1532                 .mode           = 0644,
1533                 .proc_handler   = proc_dointvec_jiffies,
1534         },
1535         {
1536                 .procname       = "ip_conntrack_tcp_timeout_time_wait",
1537                 .maxlen         = sizeof(unsigned int),
1538                 .mode           = 0644,
1539                 .proc_handler   = proc_dointvec_jiffies,
1540         },
1541         {
1542                 .procname       = "ip_conntrack_tcp_timeout_close",
1543                 .maxlen         = sizeof(unsigned int),
1544                 .mode           = 0644,
1545                 .proc_handler   = proc_dointvec_jiffies,
1546         },
1547         {
1548                 .procname       = "ip_conntrack_tcp_timeout_max_retrans",
1549                 .maxlen         = sizeof(unsigned int),
1550                 .mode           = 0644,
1551                 .proc_handler   = proc_dointvec_jiffies,
1552         },
1553         {
1554                 .procname       = "ip_conntrack_tcp_loose",
1555                 .maxlen         = sizeof(unsigned int),
1556                 .mode           = 0644,
1557                 .proc_handler   = proc_dointvec,
1558         },
1559         {
1560                 .procname       = "ip_conntrack_tcp_be_liberal",
1561                 .maxlen         = sizeof(unsigned int),
1562                 .mode           = 0644,
1563                 .proc_handler   = proc_dointvec,
1564         },
1565         {
1566                 .procname       = "ip_conntrack_tcp_max_retrans",
1567                 .maxlen         = sizeof(unsigned int),
1568                 .mode           = 0644,
1569                 .proc_handler   = proc_dointvec,
1570         },
1571         { }
1572 };
1573 #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
1574 #endif /* CONFIG_SYSCTL */
1575
1576 static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
1577                                     struct nf_tcp_net *tn)
1578 {
1579 #ifdef CONFIG_SYSCTL
1580         if (pn->ctl_table)
1581                 return 0;
1582
1583         pn->ctl_table = kmemdup(tcp_sysctl_table,
1584                                 sizeof(tcp_sysctl_table),
1585                                 GFP_KERNEL);
1586         if (!pn->ctl_table)
1587                 return -ENOMEM;
1588
1589         pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1590         pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1591         pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1592         pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1593         pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1594         pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1595         pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1596         pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1597         pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1598         pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK];
1599         pn->ctl_table[10].data = &tn->tcp_loose;
1600         pn->ctl_table[11].data = &tn->tcp_be_liberal;
1601         pn->ctl_table[12].data = &tn->tcp_max_retrans;
1602 #endif
1603         return 0;
1604 }
1605
1606 static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
1607                                            struct nf_tcp_net *tn)
1608 {
1609 #ifdef CONFIG_SYSCTL
1610 #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1611         pn->ctl_compat_table = kmemdup(tcp_compat_sysctl_table,
1612                                        sizeof(tcp_compat_sysctl_table),
1613                                        GFP_KERNEL);
1614         if (!pn->ctl_compat_table)
1615                 return -ENOMEM;
1616
1617         pn->ctl_compat_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1618         pn->ctl_compat_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT2];
1619         pn->ctl_compat_table[2].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1620         pn->ctl_compat_table[3].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1621         pn->ctl_compat_table[4].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1622         pn->ctl_compat_table[5].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1623         pn->ctl_compat_table[6].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1624         pn->ctl_compat_table[7].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1625         pn->ctl_compat_table[8].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1626         pn->ctl_compat_table[9].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1627         pn->ctl_compat_table[10].data = &tn->tcp_loose;
1628         pn->ctl_compat_table[11].data = &tn->tcp_be_liberal;
1629         pn->ctl_compat_table[12].data = &tn->tcp_max_retrans;
1630 #endif
1631 #endif
1632         return 0;
1633 }
1634
1635 static int tcp_init_net(struct net *net, u_int16_t proto)
1636 {
1637         int ret;
1638         struct nf_tcp_net *tn = tcp_pernet(net);
1639         struct nf_proto_net *pn = &tn->pn;
1640
1641         if (!pn->users) {
1642                 int i;
1643
1644                 for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
1645                         tn->timeouts[i] = tcp_timeouts[i];
1646
1647                 tn->tcp_loose = nf_ct_tcp_loose;
1648                 tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
1649                 tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
1650         }
1651
1652         if (proto == AF_INET) {
1653                 ret = tcp_kmemdup_compat_sysctl_table(pn, tn);
1654                 if (ret < 0)
1655                         return ret;
1656
1657                 ret = tcp_kmemdup_sysctl_table(pn, tn);
1658                 if (ret < 0)
1659                         nf_ct_kfree_compat_sysctl_table(pn);
1660         } else
1661                 ret = tcp_kmemdup_sysctl_table(pn, tn);
1662
1663         return ret;
1664 }
1665
1666 static struct nf_proto_net *tcp_get_net_proto(struct net *net)
1667 {
1668         return &net->ct.nf_ct_proto.tcp.pn;
1669 }
1670
1671 struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
1672 {
1673         .l3proto                = PF_INET,
1674         .l4proto                = IPPROTO_TCP,
1675         .name                   = "tcp",
1676         .pkt_to_tuple           = tcp_pkt_to_tuple,
1677         .invert_tuple           = tcp_invert_tuple,
1678         .print_tuple            = tcp_print_tuple,
1679         .print_conntrack        = tcp_print_conntrack,
1680         .packet                 = tcp_packet,
1681         .get_timeouts           = tcp_get_timeouts,
1682         .new                    = tcp_new,
1683         .error                  = tcp_error,
1684 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1685         .to_nlattr              = tcp_to_nlattr,
1686         .nlattr_size            = tcp_nlattr_size,
1687         .from_nlattr            = nlattr_to_tcp,
1688         .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
1689         .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
1690         .nlattr_tuple_size      = tcp_nlattr_tuple_size,
1691         .nla_policy             = nf_ct_port_nla_policy,
1692 #endif
1693 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1694         .ctnl_timeout           = {
1695                 .nlattr_to_obj  = tcp_timeout_nlattr_to_obj,
1696                 .obj_to_nlattr  = tcp_timeout_obj_to_nlattr,
1697                 .nlattr_max     = CTA_TIMEOUT_TCP_MAX,
1698                 .obj_size       = sizeof(unsigned int) *
1699                                         TCP_CONNTRACK_TIMEOUT_MAX,
1700                 .nla_policy     = tcp_timeout_nla_policy,
1701         },
1702 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1703         .init_net               = tcp_init_net,
1704         .get_net_proto          = tcp_get_net_proto,
1705 };
1706 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
1707
1708 struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
1709 {
1710         .l3proto                = PF_INET6,
1711         .l4proto                = IPPROTO_TCP,
1712         .name                   = "tcp",
1713         .pkt_to_tuple           = tcp_pkt_to_tuple,
1714         .invert_tuple           = tcp_invert_tuple,
1715         .print_tuple            = tcp_print_tuple,
1716         .print_conntrack        = tcp_print_conntrack,
1717         .packet                 = tcp_packet,
1718         .get_timeouts           = tcp_get_timeouts,
1719         .new                    = tcp_new,
1720         .error                  = tcp_error,
1721 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1722         .to_nlattr              = tcp_to_nlattr,
1723         .nlattr_size            = tcp_nlattr_size,
1724         .from_nlattr            = nlattr_to_tcp,
1725         .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
1726         .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
1727         .nlattr_tuple_size      = tcp_nlattr_tuple_size,
1728         .nla_policy             = nf_ct_port_nla_policy,
1729 #endif
1730 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1731         .ctnl_timeout           = {
1732                 .nlattr_to_obj  = tcp_timeout_nlattr_to_obj,
1733                 .obj_to_nlattr  = tcp_timeout_obj_to_nlattr,
1734                 .nlattr_max     = CTA_TIMEOUT_TCP_MAX,
1735                 .obj_size       = sizeof(unsigned int) *
1736                                         TCP_CONNTRACK_TIMEOUT_MAX,
1737                 .nla_policy     = tcp_timeout_nla_policy,
1738         },
1739 #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1740         .init_net               = tcp_init_net,
1741         .get_net_proto          = tcp_get_net_proto,
1742 };
1743 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);