1 /* Connection state tracking for netfilter. This is separated from,
2 but required by, the NAT layer; it can also be used by an iptables
5 /* (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
7 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
13 * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
14 * - new API and handling of conntrack/nat helpers
15 * - now capable of multiple expectations for one master
16 * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
17 * - add usage/reference counts to ip_conntrack_expect
18 * - export ip_conntrack[_expect]_{find_get,put} functions
19 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
20 * - generalize L3 protocol denendent part.
21 * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
22 * - add support various size of conntrack structures.
23 * 26 Jan 2006: Harald Welte <laforge@netfilter.org>
24 * - restructure nf_conn (introduce nf_conn_help)
25 * - redesign 'features' how they were originally intended
26 * 26 Feb 2006: Pablo Neira Ayuso <pablo@eurodev.net>
27 * - add support for L3 protocol module load on demand.
29 * Derived from net/ipv4/netfilter/ip_conntrack_core.c
32 #include <linux/config.h>
33 #include <linux/types.h>
34 #include <linux/netfilter.h>
35 #include <linux/module.h>
36 #include <linux/skbuff.h>
37 #include <linux/proc_fs.h>
38 #include <linux/vmalloc.h>
39 #include <linux/stddef.h>
40 #include <linux/slab.h>
41 #include <linux/random.h>
42 #include <linux/jhash.h>
43 #include <linux/err.h>
44 #include <linux/percpu.h>
45 #include <linux/moduleparam.h>
46 #include <linux/notifier.h>
47 #include <linux/kernel.h>
48 #include <linux/netdevice.h>
49 #include <linux/socket.h>
51 /* This rwlock protects the main hash table, protocol/helper/expected
52 registrations, conntrack timers*/
53 #define ASSERT_READ_LOCK(x)
54 #define ASSERT_WRITE_LOCK(x)
56 #include <net/netfilter/nf_conntrack.h>
57 #include <net/netfilter/nf_conntrack_l3proto.h>
58 #include <net/netfilter/nf_conntrack_protocol.h>
59 #include <net/netfilter/nf_conntrack_helper.h>
60 #include <net/netfilter/nf_conntrack_core.h>
61 #include <linux/netfilter_ipv4/listhelp.h>
63 #define NF_CONNTRACK_VERSION "0.5.0"
68 #define DEBUGP(format, args...)
71 DEFINE_RWLOCK(nf_conntrack_lock);
73 /* nf_conntrack_standalone needs this */
74 atomic_t nf_conntrack_count = ATOMIC_INIT(0);
76 void (*nf_conntrack_destroyed)(struct nf_conn *conntrack) = NULL;
77 LIST_HEAD(nf_conntrack_expect_list);
78 struct nf_conntrack_protocol **nf_ct_protos[PF_MAX];
79 struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX];
80 static LIST_HEAD(helpers);
81 unsigned int nf_conntrack_htable_size = 0;
83 struct list_head *nf_conntrack_hash;
84 static kmem_cache_t *nf_conntrack_expect_cachep;
85 struct nf_conn nf_conntrack_untracked;
86 unsigned int nf_ct_log_invalid;
87 static LIST_HEAD(unconfirmed);
88 static int nf_conntrack_vmalloc;
90 static unsigned int nf_conntrack_next_id;
91 static unsigned int nf_conntrack_expect_next_id;
92 #ifdef CONFIG_NF_CONNTRACK_EVENTS
93 ATOMIC_NOTIFIER_HEAD(nf_conntrack_chain);
94 ATOMIC_NOTIFIER_HEAD(nf_conntrack_expect_chain);
96 DEFINE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache);
98 /* deliver cached events and clear cache entry - must be called with locally
99 * disabled softirqs */
101 __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
103 DEBUGP("ecache: delivering events for %p\n", ecache->ct);
104 if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct)
106 atomic_notifier_call_chain(&nf_conntrack_chain, ecache->events,
110 nf_ct_put(ecache->ct);
114 /* Deliver all cached events for a particular conntrack. This is called
115 * by code prior to async packet handling for freeing the skb */
116 void nf_ct_deliver_cached_events(const struct nf_conn *ct)
118 struct nf_conntrack_ecache *ecache;
121 ecache = &__get_cpu_var(nf_conntrack_ecache);
122 if (ecache->ct == ct)
123 __nf_ct_deliver_cached_events(ecache);
127 /* Deliver cached events for old pending events, if current conntrack != old */
128 void __nf_ct_event_cache_init(struct nf_conn *ct)
130 struct nf_conntrack_ecache *ecache;
132 /* take care of delivering potentially old events */
133 ecache = &__get_cpu_var(nf_conntrack_ecache);
134 BUG_ON(ecache->ct == ct);
136 __nf_ct_deliver_cached_events(ecache);
137 /* initialize for this conntrack/packet */
139 nf_conntrack_get(&ct->ct_general);
142 /* flush the event cache - touches other CPU's data and must not be called
143 * while packets are still passing through the code */
144 static void nf_ct_event_cache_flush(void)
146 struct nf_conntrack_ecache *ecache;
150 ecache = &per_cpu(nf_conntrack_ecache, cpu);
152 nf_ct_put(ecache->ct);
156 static inline void nf_ct_event_cache_flush(void) {}
157 #endif /* CONFIG_NF_CONNTRACK_EVENTS */
159 DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat);
160 EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat);
163 * This scheme offers various size of "struct nf_conn" dependent on
164 * features(helper, nat, ...)
167 #define NF_CT_FEATURES_NAMELEN 256
169 /* name of slab cache. printed in /proc/slabinfo */
172 /* size of slab cache */
175 /* slab cache pointer */
176 kmem_cache_t *cachep;
178 /* allocated slab cache + modules which uses this slab cache */
182 int (*init_conntrack)(struct nf_conn *, u_int32_t);
184 } nf_ct_cache[NF_CT_F_NUM];
186 /* protect members of nf_ct_cache except of "use" */
187 DEFINE_RWLOCK(nf_ct_cache_lock);
189 /* This avoids calling kmem_cache_create() with same name simultaneously */
190 static DEFINE_MUTEX(nf_ct_cache_mutex);
192 extern struct nf_conntrack_protocol nf_conntrack_generic_protocol;
193 struct nf_conntrack_protocol *
194 __nf_ct_proto_find(u_int16_t l3proto, u_int8_t protocol)
196 if (unlikely(l3proto >= AF_MAX || nf_ct_protos[l3proto] == NULL))
197 return &nf_conntrack_generic_protocol;
199 return nf_ct_protos[l3proto][protocol];
202 /* this is guaranteed to always return a valid protocol helper, since
203 * it falls back to generic_protocol */
204 struct nf_conntrack_protocol *
205 nf_ct_proto_find_get(u_int16_t l3proto, u_int8_t protocol)
207 struct nf_conntrack_protocol *p;
210 p = __nf_ct_proto_find(l3proto, protocol);
212 if (!try_module_get(p->me))
213 p = &nf_conntrack_generic_protocol;
220 void nf_ct_proto_put(struct nf_conntrack_protocol *p)
225 struct nf_conntrack_l3proto *
226 nf_ct_l3proto_find_get(u_int16_t l3proto)
228 struct nf_conntrack_l3proto *p;
231 p = __nf_ct_l3proto_find(l3proto);
233 if (!try_module_get(p->me))
234 p = &nf_conntrack_generic_l3proto;
241 void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p)
247 nf_ct_l3proto_try_module_get(unsigned short l3proto)
250 struct nf_conntrack_l3proto *p;
252 retry: p = nf_ct_l3proto_find_get(l3proto);
253 if (p == &nf_conntrack_generic_l3proto) {
254 ret = request_module("nf_conntrack-%d", l3proto);
264 void nf_ct_l3proto_module_put(unsigned short l3proto)
266 struct nf_conntrack_l3proto *p;
269 p = __nf_ct_l3proto_find(l3proto);
275 static int nf_conntrack_hash_rnd_initted;
276 static unsigned int nf_conntrack_hash_rnd;
278 static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
279 unsigned int size, unsigned int rnd)
282 a = jhash((void *)tuple->src.u3.all, sizeof(tuple->src.u3.all),
283 ((tuple->src.l3num) << 16) | tuple->dst.protonum);
284 b = jhash((void *)tuple->dst.u3.all, sizeof(tuple->dst.u3.all),
285 (tuple->src.u.all << 16) | tuple->dst.u.all);
287 return jhash_2words(a, b, rnd) % size;
290 static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple)
292 return __hash_conntrack(tuple, nf_conntrack_htable_size,
293 nf_conntrack_hash_rnd);
296 int nf_conntrack_register_cache(u_int32_t features, const char *name,
301 kmem_cache_t *cachep;
303 DEBUGP("nf_conntrack_register_cache: features=0x%x, name=%s, size=%d\n",
304 features, name, size);
306 if (features < NF_CT_F_BASIC || features >= NF_CT_F_NUM) {
307 DEBUGP("nf_conntrack_register_cache: invalid features.: 0x%x\n",
312 mutex_lock(&nf_ct_cache_mutex);
314 write_lock_bh(&nf_ct_cache_lock);
315 /* e.g: multiple helpers are loaded */
316 if (nf_ct_cache[features].use > 0) {
317 DEBUGP("nf_conntrack_register_cache: already resisterd.\n");
318 if ((!strncmp(nf_ct_cache[features].name, name,
319 NF_CT_FEATURES_NAMELEN))
320 && nf_ct_cache[features].size == size) {
321 DEBUGP("nf_conntrack_register_cache: reusing.\n");
322 nf_ct_cache[features].use++;
327 write_unlock_bh(&nf_ct_cache_lock);
328 mutex_unlock(&nf_ct_cache_mutex);
331 write_unlock_bh(&nf_ct_cache_lock);
334 * The memory space for name of slab cache must be alive until
335 * cache is destroyed.
337 cache_name = kmalloc(sizeof(char)*NF_CT_FEATURES_NAMELEN, GFP_ATOMIC);
338 if (cache_name == NULL) {
339 DEBUGP("nf_conntrack_register_cache: can't alloc cache_name\n");
344 if (strlcpy(cache_name, name, NF_CT_FEATURES_NAMELEN)
345 >= NF_CT_FEATURES_NAMELEN) {
346 printk("nf_conntrack_register_cache: name too long\n");
351 cachep = kmem_cache_create(cache_name, size, 0, 0,
354 printk("nf_conntrack_register_cache: Can't create slab cache "
355 "for the features = 0x%x\n", features);
360 write_lock_bh(&nf_ct_cache_lock);
361 nf_ct_cache[features].use = 1;
362 nf_ct_cache[features].size = size;
363 nf_ct_cache[features].cachep = cachep;
364 nf_ct_cache[features].name = cache_name;
365 write_unlock_bh(&nf_ct_cache_lock);
372 mutex_unlock(&nf_ct_cache_mutex);
376 /* FIXME: In the current, only nf_conntrack_cleanup() can call this function. */
377 void nf_conntrack_unregister_cache(u_int32_t features)
379 kmem_cache_t *cachep;
383 * This assures that kmem_cache_create() isn't called before destroying
386 DEBUGP("nf_conntrack_unregister_cache: 0x%04x\n", features);
387 mutex_lock(&nf_ct_cache_mutex);
389 write_lock_bh(&nf_ct_cache_lock);
390 if (--nf_ct_cache[features].use > 0) {
391 write_unlock_bh(&nf_ct_cache_lock);
392 mutex_unlock(&nf_ct_cache_mutex);
395 cachep = nf_ct_cache[features].cachep;
396 name = nf_ct_cache[features].name;
397 nf_ct_cache[features].cachep = NULL;
398 nf_ct_cache[features].name = NULL;
399 nf_ct_cache[features].size = 0;
400 write_unlock_bh(&nf_ct_cache_lock);
404 kmem_cache_destroy(cachep);
407 mutex_unlock(&nf_ct_cache_mutex);
411 nf_ct_get_tuple(const struct sk_buff *skb,
413 unsigned int dataoff,
416 struct nf_conntrack_tuple *tuple,
417 const struct nf_conntrack_l3proto *l3proto,
418 const struct nf_conntrack_protocol *protocol)
420 NF_CT_TUPLE_U_BLANK(tuple);
422 tuple->src.l3num = l3num;
423 if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0)
426 tuple->dst.protonum = protonum;
427 tuple->dst.dir = IP_CT_DIR_ORIGINAL;
429 return protocol->pkt_to_tuple(skb, dataoff, tuple);
433 nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
434 const struct nf_conntrack_tuple *orig,
435 const struct nf_conntrack_l3proto *l3proto,
436 const struct nf_conntrack_protocol *protocol)
438 NF_CT_TUPLE_U_BLANK(inverse);
440 inverse->src.l3num = orig->src.l3num;
441 if (l3proto->invert_tuple(inverse, orig) == 0)
444 inverse->dst.dir = !orig->dst.dir;
446 inverse->dst.protonum = orig->dst.protonum;
447 return protocol->invert_tuple(inverse, orig);
450 /* nf_conntrack_expect helper functions */
451 void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
453 struct nf_conn_help *master_help = nfct_help(exp->master);
455 NF_CT_ASSERT(master_help);
456 ASSERT_WRITE_LOCK(&nf_conntrack_lock);
457 NF_CT_ASSERT(!timer_pending(&exp->timeout));
459 list_del(&exp->list);
460 NF_CT_STAT_INC(expect_delete);
461 master_help->expecting--;
462 nf_conntrack_expect_put(exp);
465 static void expectation_timed_out(unsigned long ul_expect)
467 struct nf_conntrack_expect *exp = (void *)ul_expect;
469 write_lock_bh(&nf_conntrack_lock);
470 nf_ct_unlink_expect(exp);
471 write_unlock_bh(&nf_conntrack_lock);
472 nf_conntrack_expect_put(exp);
475 struct nf_conntrack_expect *
476 __nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple)
478 struct nf_conntrack_expect *i;
480 list_for_each_entry(i, &nf_conntrack_expect_list, list) {
481 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
489 /* Just find a expectation corresponding to a tuple. */
490 struct nf_conntrack_expect *
491 nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple)
493 struct nf_conntrack_expect *i;
495 read_lock_bh(&nf_conntrack_lock);
496 i = __nf_conntrack_expect_find(tuple);
497 read_unlock_bh(&nf_conntrack_lock);
502 /* If an expectation for this connection is found, it gets delete from
503 * global list then returned. */
504 static struct nf_conntrack_expect *
505 find_expectation(const struct nf_conntrack_tuple *tuple)
507 struct nf_conntrack_expect *i;
509 list_for_each_entry(i, &nf_conntrack_expect_list, list) {
510 /* If master is not in hash table yet (ie. packet hasn't left
511 this machine yet), how can other end know about expected?
512 Hence these are not the droids you are looking for (if
513 master ct never got confirmed, we'd hold a reference to it
514 and weird things would happen to future packets). */
515 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
516 && nf_ct_is_confirmed(i->master)) {
517 if (i->flags & NF_CT_EXPECT_PERMANENT) {
520 } else if (del_timer(&i->timeout)) {
521 nf_ct_unlink_expect(i);
529 /* delete all expectations for this conntrack */
530 void nf_ct_remove_expectations(struct nf_conn *ct)
532 struct nf_conntrack_expect *i, *tmp;
533 struct nf_conn_help *help = nfct_help(ct);
535 /* Optimization: most connection never expect any others. */
536 if (!help || help->expecting == 0)
539 list_for_each_entry_safe(i, tmp, &nf_conntrack_expect_list, list) {
540 if (i->master == ct && del_timer(&i->timeout)) {
541 nf_ct_unlink_expect(i);
542 nf_conntrack_expect_put(i);
548 clean_from_lists(struct nf_conn *ct)
552 DEBUGP("clean_from_lists(%p)\n", ct);
553 ASSERT_WRITE_LOCK(&nf_conntrack_lock);
555 ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
556 hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
557 LIST_DELETE(&nf_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
558 LIST_DELETE(&nf_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
560 /* Destroy all pending expectations */
561 nf_ct_remove_expectations(ct);
565 destroy_conntrack(struct nf_conntrack *nfct)
567 struct nf_conn *ct = (struct nf_conn *)nfct;
568 struct nf_conntrack_l3proto *l3proto;
569 struct nf_conntrack_protocol *proto;
571 DEBUGP("destroy_conntrack(%p)\n", ct);
572 NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
573 NF_CT_ASSERT(!timer_pending(&ct->timeout));
575 nf_conntrack_event(IPCT_DESTROY, ct);
576 set_bit(IPS_DYING_BIT, &ct->status);
578 /* To make sure we don't get any weird locking issues here:
579 * destroy_conntrack() MUST NOT be called with a write lock
580 * to nf_conntrack_lock!!! -HW */
581 l3proto = __nf_ct_l3proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num);
582 if (l3proto && l3proto->destroy)
583 l3proto->destroy(ct);
585 proto = __nf_ct_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
586 if (proto && proto->destroy)
589 if (nf_conntrack_destroyed)
590 nf_conntrack_destroyed(ct);
592 write_lock_bh(&nf_conntrack_lock);
593 /* Expectations will have been removed in clean_from_lists,
594 * except TFTP can create an expectation on the first packet,
595 * before connection is in the list, so we need to clean here,
597 nf_ct_remove_expectations(ct);
599 /* We overload first tuple to link into unconfirmed list. */
600 if (!nf_ct_is_confirmed(ct)) {
601 BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list));
602 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
605 NF_CT_STAT_INC(delete);
606 write_unlock_bh(&nf_conntrack_lock);
609 nf_ct_put(ct->master);
611 DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
612 nf_conntrack_free(ct);
615 static void death_by_timeout(unsigned long ul_conntrack)
617 struct nf_conn *ct = (void *)ul_conntrack;
619 write_lock_bh(&nf_conntrack_lock);
620 /* Inside lock so preempt is disabled on module removal path.
621 * Otherwise we can get spurious warnings. */
622 NF_CT_STAT_INC(delete_list);
623 clean_from_lists(ct);
624 write_unlock_bh(&nf_conntrack_lock);
629 conntrack_tuple_cmp(const struct nf_conntrack_tuple_hash *i,
630 const struct nf_conntrack_tuple *tuple,
631 const struct nf_conn *ignored_conntrack)
633 ASSERT_READ_LOCK(&nf_conntrack_lock);
634 return nf_ct_tuplehash_to_ctrack(i) != ignored_conntrack
635 && nf_ct_tuple_equal(tuple, &i->tuple);
638 struct nf_conntrack_tuple_hash *
639 __nf_conntrack_find(const struct nf_conntrack_tuple *tuple,
640 const struct nf_conn *ignored_conntrack)
642 struct nf_conntrack_tuple_hash *h;
643 unsigned int hash = hash_conntrack(tuple);
645 ASSERT_READ_LOCK(&nf_conntrack_lock);
646 list_for_each_entry(h, &nf_conntrack_hash[hash], list) {
647 if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
648 NF_CT_STAT_INC(found);
651 NF_CT_STAT_INC(searched);
657 /* Find a connection corresponding to a tuple. */
658 struct nf_conntrack_tuple_hash *
659 nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple,
660 const struct nf_conn *ignored_conntrack)
662 struct nf_conntrack_tuple_hash *h;
664 read_lock_bh(&nf_conntrack_lock);
665 h = __nf_conntrack_find(tuple, ignored_conntrack);
667 atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use);
668 read_unlock_bh(&nf_conntrack_lock);
673 static void __nf_conntrack_hash_insert(struct nf_conn *ct,
675 unsigned int repl_hash)
677 ct->id = ++nf_conntrack_next_id;
678 list_prepend(&nf_conntrack_hash[hash],
679 &ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
680 list_prepend(&nf_conntrack_hash[repl_hash],
681 &ct->tuplehash[IP_CT_DIR_REPLY].list);
684 void nf_conntrack_hash_insert(struct nf_conn *ct)
686 unsigned int hash, repl_hash;
688 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
689 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
691 write_lock_bh(&nf_conntrack_lock);
692 __nf_conntrack_hash_insert(ct, hash, repl_hash);
693 write_unlock_bh(&nf_conntrack_lock);
696 /* Confirm a connection given skb; places it in hash table */
698 __nf_conntrack_confirm(struct sk_buff **pskb)
700 unsigned int hash, repl_hash;
702 enum ip_conntrack_info ctinfo;
704 ct = nf_ct_get(*pskb, &ctinfo);
706 /* ipt_REJECT uses nf_conntrack_attach to attach related
707 ICMP/TCP RST packets in other direction. Actual packet
708 which created connection will be IP_CT_NEW or for an
709 expected connection, IP_CT_RELATED. */
710 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
713 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
714 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
716 /* We're not in hash table, and we refuse to set up related
717 connections for unconfirmed conns. But packet copies and
718 REJECT will give spurious warnings here. */
719 /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
721 /* No external references means noone else could have
723 NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
724 DEBUGP("Confirming conntrack %p\n", ct);
726 write_lock_bh(&nf_conntrack_lock);
728 /* See if there's one in the list already, including reverse:
729 NAT could have grabbed it without realizing, since we're
730 not in the hash. If there is, we lost race. */
731 if (!LIST_FIND(&nf_conntrack_hash[hash],
733 struct nf_conntrack_tuple_hash *,
734 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
735 && !LIST_FIND(&nf_conntrack_hash[repl_hash],
737 struct nf_conntrack_tuple_hash *,
738 &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
739 struct nf_conn_help *help;
740 /* Remove from unconfirmed list */
741 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
743 __nf_conntrack_hash_insert(ct, hash, repl_hash);
744 /* Timer relative to confirmation time, not original
745 setting time, otherwise we'd get timer wrap in
746 weird delay cases. */
747 ct->timeout.expires += jiffies;
748 add_timer(&ct->timeout);
749 atomic_inc(&ct->ct_general.use);
750 set_bit(IPS_CONFIRMED_BIT, &ct->status);
751 NF_CT_STAT_INC(insert);
752 write_unlock_bh(&nf_conntrack_lock);
753 help = nfct_help(ct);
754 if (help && help->helper)
755 nf_conntrack_event_cache(IPCT_HELPER, *pskb);
756 #ifdef CONFIG_NF_NAT_NEEDED
757 if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
758 test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
759 nf_conntrack_event_cache(IPCT_NATINFO, *pskb);
761 nf_conntrack_event_cache(master_ct(ct) ?
762 IPCT_RELATED : IPCT_NEW, *pskb);
766 NF_CT_STAT_INC(insert_failed);
767 write_unlock_bh(&nf_conntrack_lock);
771 /* Returns true if a connection correspondings to the tuple (required
774 nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
775 const struct nf_conn *ignored_conntrack)
777 struct nf_conntrack_tuple_hash *h;
779 read_lock_bh(&nf_conntrack_lock);
780 h = __nf_conntrack_find(tuple, ignored_conntrack);
781 read_unlock_bh(&nf_conntrack_lock);
786 /* There's a small race here where we may free a just-assured
787 connection. Too bad: we're in trouble anyway. */
788 static inline int unreplied(const struct nf_conntrack_tuple_hash *i)
790 return !(test_bit(IPS_ASSURED_BIT,
791 &nf_ct_tuplehash_to_ctrack(i)->status));
794 static int early_drop(struct list_head *chain)
796 /* Traverse backwards: gives us oldest, which is roughly LRU */
797 struct nf_conntrack_tuple_hash *h;
798 struct nf_conn *ct = NULL;
801 read_lock_bh(&nf_conntrack_lock);
802 h = LIST_FIND_B(chain, unreplied, struct nf_conntrack_tuple_hash *);
804 ct = nf_ct_tuplehash_to_ctrack(h);
805 atomic_inc(&ct->ct_general.use);
807 read_unlock_bh(&nf_conntrack_lock);
812 if (del_timer(&ct->timeout)) {
813 death_by_timeout((unsigned long)ct);
815 NF_CT_STAT_INC(early_drop);
821 static inline int helper_cmp(const struct nf_conntrack_helper *i,
822 const struct nf_conntrack_tuple *rtuple)
824 return nf_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
827 static struct nf_conntrack_helper *
828 __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
830 return LIST_FIND(&helpers, helper_cmp,
831 struct nf_conntrack_helper *,
835 struct nf_conntrack_helper *
836 nf_ct_helper_find_get( const struct nf_conntrack_tuple *tuple)
838 struct nf_conntrack_helper *helper;
840 /* need nf_conntrack_lock to assure that helper exists until
841 * try_module_get() is called */
842 read_lock_bh(&nf_conntrack_lock);
844 helper = __nf_ct_helper_find(tuple);
846 /* need to increase module usage count to assure helper will
847 * not go away while the caller is e.g. busy putting a
848 * conntrack in the hash that uses the helper */
849 if (!try_module_get(helper->me))
853 read_unlock_bh(&nf_conntrack_lock);
858 void nf_ct_helper_put(struct nf_conntrack_helper *helper)
860 module_put(helper->me);
863 static struct nf_conn *
864 __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
865 const struct nf_conntrack_tuple *repl,
866 const struct nf_conntrack_l3proto *l3proto)
868 struct nf_conn *conntrack = NULL;
869 u_int32_t features = 0;
870 struct nf_conntrack_helper *helper;
872 if (unlikely(!nf_conntrack_hash_rnd_initted)) {
873 get_random_bytes(&nf_conntrack_hash_rnd, 4);
874 nf_conntrack_hash_rnd_initted = 1;
878 && atomic_read(&nf_conntrack_count) >= nf_conntrack_max) {
879 unsigned int hash = hash_conntrack(orig);
880 /* Try dropping from this hash chain. */
881 if (!early_drop(&nf_conntrack_hash[hash])) {
884 "nf_conntrack: table full, dropping"
886 return ERR_PTR(-ENOMEM);
890 /* find features needed by this conntrack. */
891 features = l3proto->get_features(orig);
893 /* FIXME: protect helper list per RCU */
894 read_lock_bh(&nf_conntrack_lock);
895 helper = __nf_ct_helper_find(repl);
897 features |= NF_CT_F_HELP;
898 read_unlock_bh(&nf_conntrack_lock);
900 DEBUGP("nf_conntrack_alloc: features=0x%x\n", features);
902 read_lock_bh(&nf_ct_cache_lock);
904 if (unlikely(!nf_ct_cache[features].use)) {
905 DEBUGP("nf_conntrack_alloc: not supported features = 0x%x\n",
910 conntrack = kmem_cache_alloc(nf_ct_cache[features].cachep, GFP_ATOMIC);
911 if (conntrack == NULL) {
912 DEBUGP("nf_conntrack_alloc: Can't alloc conntrack from cache\n");
916 memset(conntrack, 0, nf_ct_cache[features].size);
917 conntrack->features = features;
919 struct nf_conn_help *help = nfct_help(conntrack);
921 help->helper = helper;
924 atomic_set(&conntrack->ct_general.use, 1);
925 conntrack->ct_general.destroy = destroy_conntrack;
926 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
927 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
928 /* Don't set timer yet: wait for confirmation */
929 init_timer(&conntrack->timeout);
930 conntrack->timeout.data = (unsigned long)conntrack;
931 conntrack->timeout.function = death_by_timeout;
933 atomic_inc(&nf_conntrack_count);
935 read_unlock_bh(&nf_ct_cache_lock);
939 struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
940 const struct nf_conntrack_tuple *repl)
942 struct nf_conntrack_l3proto *l3proto;
944 l3proto = __nf_ct_l3proto_find(orig->src.l3num);
945 return __nf_conntrack_alloc(orig, repl, l3proto);
948 void nf_conntrack_free(struct nf_conn *conntrack)
950 u_int32_t features = conntrack->features;
951 NF_CT_ASSERT(features >= NF_CT_F_BASIC && features < NF_CT_F_NUM);
952 DEBUGP("nf_conntrack_free: features = 0x%x, conntrack=%p\n", features,
954 kmem_cache_free(nf_ct_cache[features].cachep, conntrack);
955 atomic_dec(&nf_conntrack_count);
958 /* Allocate a new conntrack: we return -ENOMEM if classification
959 failed due to stress. Otherwise it really is unclassifiable. */
960 static struct nf_conntrack_tuple_hash *
961 init_conntrack(const struct nf_conntrack_tuple *tuple,
962 struct nf_conntrack_l3proto *l3proto,
963 struct nf_conntrack_protocol *protocol,
965 unsigned int dataoff)
967 struct nf_conn *conntrack;
968 struct nf_conntrack_tuple repl_tuple;
969 struct nf_conntrack_expect *exp;
971 if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, protocol)) {
972 DEBUGP("Can't invert tuple.\n");
976 conntrack = __nf_conntrack_alloc(tuple, &repl_tuple, l3proto);
977 if (conntrack == NULL || IS_ERR(conntrack)) {
978 DEBUGP("Can't allocate conntrack.\n");
979 return (struct nf_conntrack_tuple_hash *)conntrack;
982 if (!protocol->new(conntrack, skb, dataoff)) {
983 nf_conntrack_free(conntrack);
984 DEBUGP("init conntrack: can't track with proto module\n");
988 write_lock_bh(&nf_conntrack_lock);
989 exp = find_expectation(tuple);
992 DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
994 /* Welcome, Mr. Bond. We've been expecting you... */
995 __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
996 conntrack->master = exp->master;
997 #ifdef CONFIG_NF_CONNTRACK_MARK
998 conntrack->mark = exp->master->mark;
1000 nf_conntrack_get(&conntrack->master->ct_general);
1001 NF_CT_STAT_INC(expect_new);
1003 NF_CT_STAT_INC(new);
1005 /* Overload tuple linked list to put us in unconfirmed list. */
1006 list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
1008 write_unlock_bh(&nf_conntrack_lock);
1012 exp->expectfn(conntrack, exp);
1013 nf_conntrack_expect_put(exp);
1016 return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
1019 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
1020 static inline struct nf_conn *
1021 resolve_normal_ct(struct sk_buff *skb,
1022 unsigned int dataoff,
1025 struct nf_conntrack_l3proto *l3proto,
1026 struct nf_conntrack_protocol *proto,
1028 enum ip_conntrack_info *ctinfo)
1030 struct nf_conntrack_tuple tuple;
1031 struct nf_conntrack_tuple_hash *h;
1034 if (!nf_ct_get_tuple(skb, (unsigned int)(skb->nh.raw - skb->data),
1035 dataoff, l3num, protonum, &tuple, l3proto,
1037 DEBUGP("resolve_normal_ct: Can't get tuple\n");
1041 /* look for tuple match */
1042 h = nf_conntrack_find_get(&tuple, NULL);
1044 h = init_conntrack(&tuple, l3proto, proto, skb, dataoff);
1050 ct = nf_ct_tuplehash_to_ctrack(h);
1052 /* It exists; we have (non-exclusive) reference. */
1053 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
1054 *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
1055 /* Please set reply bit if this packet OK */
1058 /* Once we've had two way comms, always ESTABLISHED. */
1059 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1060 DEBUGP("nf_conntrack_in: normal packet for %p\n", ct);
1061 *ctinfo = IP_CT_ESTABLISHED;
1062 } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
1063 DEBUGP("nf_conntrack_in: related packet for %p\n", ct);
1064 *ctinfo = IP_CT_RELATED;
1066 DEBUGP("nf_conntrack_in: new packet for %p\n", ct);
1067 *ctinfo = IP_CT_NEW;
1071 skb->nfct = &ct->ct_general;
1072 skb->nfctinfo = *ctinfo;
1077 nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
1080 enum ip_conntrack_info ctinfo;
1081 struct nf_conntrack_l3proto *l3proto;
1082 struct nf_conntrack_protocol *proto;
1083 unsigned int dataoff;
1088 /* Previously seen (loopback or untracked)? Ignore. */
1089 if ((*pskb)->nfct) {
1090 NF_CT_STAT_INC(ignore);
1094 l3proto = __nf_ct_l3proto_find((u_int16_t)pf);
1095 if ((ret = l3proto->prepare(pskb, hooknum, &dataoff, &protonum)) <= 0) {
1096 DEBUGP("not prepared to track yet or error occured\n");
1100 proto = __nf_ct_proto_find((u_int16_t)pf, protonum);
1102 /* It may be an special packet, error, unclean...
1103 * inverse of the return code tells to the netfilter
1104 * core what to do with the packet. */
1105 if (proto->error != NULL &&
1106 (ret = proto->error(*pskb, dataoff, &ctinfo, pf, hooknum)) <= 0) {
1107 NF_CT_STAT_INC(error);
1108 NF_CT_STAT_INC(invalid);
1112 ct = resolve_normal_ct(*pskb, dataoff, pf, protonum, l3proto, proto,
1113 &set_reply, &ctinfo);
1115 /* Not valid part of a connection */
1116 NF_CT_STAT_INC(invalid);
1121 /* Too stressed to deal. */
1122 NF_CT_STAT_INC(drop);
1126 NF_CT_ASSERT((*pskb)->nfct);
1128 ret = proto->packet(ct, *pskb, dataoff, ctinfo, pf, hooknum);
1130 /* Invalid: inverse of the return code tells
1131 * the netfilter core what to do */
1132 DEBUGP("nf_conntrack_in: Can't track with proto module\n");
1133 nf_conntrack_put((*pskb)->nfct);
1134 (*pskb)->nfct = NULL;
1135 NF_CT_STAT_INC(invalid);
1139 if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
1140 nf_conntrack_event_cache(IPCT_STATUS, *pskb);
1145 int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
1146 const struct nf_conntrack_tuple *orig)
1148 return nf_ct_invert_tuple(inverse, orig,
1149 __nf_ct_l3proto_find(orig->src.l3num),
1150 __nf_ct_proto_find(orig->src.l3num,
1151 orig->dst.protonum));
1154 /* Would two expected things clash? */
1155 static inline int expect_clash(const struct nf_conntrack_expect *a,
1156 const struct nf_conntrack_expect *b)
1158 /* Part covered by intersection of masks must be unequal,
1159 otherwise they clash */
1160 struct nf_conntrack_tuple intersect_mask;
1163 intersect_mask.src.l3num = a->mask.src.l3num & b->mask.src.l3num;
1164 intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
1165 intersect_mask.dst.u.all = a->mask.dst.u.all & b->mask.dst.u.all;
1166 intersect_mask.dst.protonum = a->mask.dst.protonum
1167 & b->mask.dst.protonum;
1169 for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
1170 intersect_mask.src.u3.all[count] =
1171 a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
1174 for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
1175 intersect_mask.dst.u3.all[count] =
1176 a->mask.dst.u3.all[count] & b->mask.dst.u3.all[count];
1179 return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
1182 static inline int expect_matches(const struct nf_conntrack_expect *a,
1183 const struct nf_conntrack_expect *b)
1185 return a->master == b->master
1186 && nf_ct_tuple_equal(&a->tuple, &b->tuple)
1187 && nf_ct_tuple_equal(&a->mask, &b->mask);
1190 /* Generally a bad idea to call this: could have matched already. */
1191 void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp)
1193 struct nf_conntrack_expect *i;
1195 write_lock_bh(&nf_conntrack_lock);
1196 /* choose the the oldest expectation to evict */
1197 list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
1198 if (expect_matches(i, exp) && del_timer(&i->timeout)) {
1199 nf_ct_unlink_expect(i);
1200 write_unlock_bh(&nf_conntrack_lock);
1201 nf_conntrack_expect_put(i);
1205 write_unlock_bh(&nf_conntrack_lock);
1208 /* We don't increase the master conntrack refcount for non-fulfilled
1209 * conntracks. During the conntrack destruction, the expectations are
1210 * always killed before the conntrack itself */
1211 struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me)
1213 struct nf_conntrack_expect *new;
1215 new = kmem_cache_alloc(nf_conntrack_expect_cachep, GFP_ATOMIC);
1217 DEBUGP("expect_related: OOM allocating expect\n");
1221 atomic_set(&new->use, 1);
1225 void nf_conntrack_expect_put(struct nf_conntrack_expect *exp)
1227 if (atomic_dec_and_test(&exp->use))
1228 kmem_cache_free(nf_conntrack_expect_cachep, exp);
1231 static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp)
1233 struct nf_conn_help *master_help = nfct_help(exp->master);
1235 atomic_inc(&exp->use);
1236 master_help->expecting++;
1237 list_add(&exp->list, &nf_conntrack_expect_list);
1239 init_timer(&exp->timeout);
1240 exp->timeout.data = (unsigned long)exp;
1241 exp->timeout.function = expectation_timed_out;
1242 exp->timeout.expires = jiffies + master_help->helper->timeout * HZ;
1243 add_timer(&exp->timeout);
1245 exp->id = ++nf_conntrack_expect_next_id;
1246 atomic_inc(&exp->use);
1247 NF_CT_STAT_INC(expect_create);
1250 /* Race with expectations being used means we could have none to find; OK. */
1251 static void evict_oldest_expect(struct nf_conn *master)
1253 struct nf_conntrack_expect *i;
1255 list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
1256 if (i->master == master) {
1257 if (del_timer(&i->timeout)) {
1258 nf_ct_unlink_expect(i);
1259 nf_conntrack_expect_put(i);
1266 static inline int refresh_timer(struct nf_conntrack_expect *i)
1268 struct nf_conn_help *master_help = nfct_help(i->master);
1270 if (!del_timer(&i->timeout))
1273 i->timeout.expires = jiffies + master_help->helper->timeout*HZ;
1274 add_timer(&i->timeout);
1278 int nf_conntrack_expect_related(struct nf_conntrack_expect *expect)
1280 struct nf_conntrack_expect *i;
1281 struct nf_conn *master = expect->master;
1282 struct nf_conn_help *master_help = nfct_help(master);
1285 NF_CT_ASSERT(master_help);
1287 DEBUGP("nf_conntrack_expect_related %p\n", related_to);
1288 DEBUGP("tuple: "); NF_CT_DUMP_TUPLE(&expect->tuple);
1289 DEBUGP("mask: "); NF_CT_DUMP_TUPLE(&expect->mask);
1291 write_lock_bh(&nf_conntrack_lock);
1292 list_for_each_entry(i, &nf_conntrack_expect_list, list) {
1293 if (expect_matches(i, expect)) {
1294 /* Refresh timer: if it's dying, ignore.. */
1295 if (refresh_timer(i)) {
1299 } else if (expect_clash(i, expect)) {
1304 /* Will be over limit? */
1305 if (master_help->helper->max_expected &&
1306 master_help->expecting >= master_help->helper->max_expected)
1307 evict_oldest_expect(master);
1309 nf_conntrack_expect_insert(expect);
1310 nf_conntrack_expect_event(IPEXP_NEW, expect);
1313 write_unlock_bh(&nf_conntrack_lock);
1317 int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
1320 BUG_ON(me->timeout == 0);
1322 ret = nf_conntrack_register_cache(NF_CT_F_HELP, "nf_conntrack:help",
1323 sizeof(struct nf_conn)
1324 + sizeof(struct nf_conn_help)
1325 + __alignof__(struct nf_conn_help));
1327 printk(KERN_ERR "nf_conntrack_helper_reigster: Unable to create slab cache for conntracks\n");
1330 write_lock_bh(&nf_conntrack_lock);
1331 list_prepend(&helpers, me);
1332 write_unlock_bh(&nf_conntrack_lock);
1337 struct nf_conntrack_helper *
1338 __nf_conntrack_helper_find_byname(const char *name)
1340 struct nf_conntrack_helper *h;
1342 list_for_each_entry(h, &helpers, list) {
1343 if (!strcmp(h->name, name))
1350 static inline int unhelp(struct nf_conntrack_tuple_hash *i,
1351 const struct nf_conntrack_helper *me)
1353 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
1354 struct nf_conn_help *help = nfct_help(ct);
1356 if (help && help->helper == me) {
1357 nf_conntrack_event(IPCT_HELPER, ct);
1358 help->helper = NULL;
1363 void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
1366 struct nf_conntrack_expect *exp, *tmp;
1368 /* Need write lock here, to delete helper. */
1369 write_lock_bh(&nf_conntrack_lock);
1370 LIST_DELETE(&helpers, me);
1372 /* Get rid of expectations */
1373 list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) {
1374 struct nf_conn_help *help = nfct_help(exp->master);
1375 if (help->helper == me && del_timer(&exp->timeout)) {
1376 nf_ct_unlink_expect(exp);
1377 nf_conntrack_expect_put(exp);
1381 /* Get rid of expecteds, set helpers to NULL. */
1382 LIST_FIND_W(&unconfirmed, unhelp, struct nf_conntrack_tuple_hash*, me);
1383 for (i = 0; i < nf_conntrack_htable_size; i++)
1384 LIST_FIND_W(&nf_conntrack_hash[i], unhelp,
1385 struct nf_conntrack_tuple_hash *, me);
1386 write_unlock_bh(&nf_conntrack_lock);
1388 /* Someone could be still looking at the helper in a bh. */
1392 /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
1393 void __nf_ct_refresh_acct(struct nf_conn *ct,
1394 enum ip_conntrack_info ctinfo,
1395 const struct sk_buff *skb,
1396 unsigned long extra_jiffies,
1401 NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
1404 write_lock_bh(&nf_conntrack_lock);
1406 /* If not in hash table, timer will not be active yet */
1407 if (!nf_ct_is_confirmed(ct)) {
1408 ct->timeout.expires = extra_jiffies;
1409 event = IPCT_REFRESH;
1411 /* Need del_timer for race avoidance (may already be dying). */
1412 if (del_timer(&ct->timeout)) {
1413 ct->timeout.expires = jiffies + extra_jiffies;
1414 add_timer(&ct->timeout);
1415 event = IPCT_REFRESH;
1419 #ifdef CONFIG_NF_CT_ACCT
1421 ct->counters[CTINFO2DIR(ctinfo)].packets++;
1422 ct->counters[CTINFO2DIR(ctinfo)].bytes +=
1423 skb->len - (unsigned int)(skb->nh.raw - skb->data);
1424 if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
1425 || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
1426 event |= IPCT_COUNTER_FILLING;
1430 write_unlock_bh(&nf_conntrack_lock);
1432 /* must be unlocked when calling event cache */
1434 nf_conntrack_event_cache(event, skb);
1437 #if defined(CONFIG_NF_CT_NETLINK) || \
1438 defined(CONFIG_NF_CT_NETLINK_MODULE)
1440 #include <linux/netfilter/nfnetlink.h>
1441 #include <linux/netfilter/nfnetlink_conntrack.h>
1442 #include <linux/mutex.h>
1445 /* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
1446 * in ip_conntrack_core, since we don't want the protocols to autoload
1447 * or depend on ctnetlink */
1448 int nf_ct_port_tuple_to_nfattr(struct sk_buff *skb,
1449 const struct nf_conntrack_tuple *tuple)
1451 NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t),
1452 &tuple->src.u.tcp.port);
1453 NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t),
1454 &tuple->dst.u.tcp.port);
1461 static const size_t cta_min_proto[CTA_PROTO_MAX] = {
1462 [CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t),
1463 [CTA_PROTO_DST_PORT-1] = sizeof(u_int16_t)
1466 int nf_ct_port_nfattr_to_tuple(struct nfattr *tb[],
1467 struct nf_conntrack_tuple *t)
1469 if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1])
1472 if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
1476 *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]);
1478 *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]);
1484 /* Used by ipt_REJECT and ip6t_REJECT. */
1485 void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
1488 enum ip_conntrack_info ctinfo;
1490 /* This ICMP is in reverse direction to the packet which caused it */
1491 ct = nf_ct_get(skb, &ctinfo);
1492 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
1493 ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
1495 ctinfo = IP_CT_RELATED;
1497 /* Attach to new skbuff, and increment count */
1498 nskb->nfct = &ct->ct_general;
1499 nskb->nfctinfo = ctinfo;
1500 nf_conntrack_get(nskb->nfct);
1504 do_iter(const struct nf_conntrack_tuple_hash *i,
1505 int (*iter)(struct nf_conn *i, void *data),
1508 return iter(nf_ct_tuplehash_to_ctrack(i), data);
1511 /* Bring out ya dead! */
1512 static struct nf_conntrack_tuple_hash *
1513 get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
1514 void *data, unsigned int *bucket)
1516 struct nf_conntrack_tuple_hash *h = NULL;
1518 write_lock_bh(&nf_conntrack_lock);
1519 for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
1520 h = LIST_FIND_W(&nf_conntrack_hash[*bucket], do_iter,
1521 struct nf_conntrack_tuple_hash *, iter, data);
1526 h = LIST_FIND_W(&unconfirmed, do_iter,
1527 struct nf_conntrack_tuple_hash *, iter, data);
1529 atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use);
1530 write_unlock_bh(&nf_conntrack_lock);
1536 nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data)
1538 struct nf_conntrack_tuple_hash *h;
1539 unsigned int bucket = 0;
1541 while ((h = get_next_corpse(iter, data, &bucket)) != NULL) {
1542 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
1543 /* Time to push up daises... */
1544 if (del_timer(&ct->timeout))
1545 death_by_timeout((unsigned long)ct);
1546 /* ... else the timer will get him soon. */
1552 static int kill_all(struct nf_conn *i, void *data)
1557 static void free_conntrack_hash(struct list_head *hash, int vmalloced, int size)
1562 free_pages((unsigned long)hash,
1563 get_order(sizeof(struct list_head) * size));
1566 void nf_conntrack_flush()
1568 nf_ct_iterate_cleanup(kill_all, NULL);
1571 /* Mishearing the voices in his head, our hero wonders how he's
1572 supposed to kill the mall. */
1573 void nf_conntrack_cleanup(void)
1577 ip_ct_attach = NULL;
1579 /* This makes sure all current packets have passed through
1580 netfilter framework. Roll on, two-stage module
1584 nf_ct_event_cache_flush();
1586 nf_conntrack_flush();
1587 if (atomic_read(&nf_conntrack_count) != 0) {
1589 goto i_see_dead_people;
1591 /* wait until all references to nf_conntrack_untracked are dropped */
1592 while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
1595 for (i = 0; i < NF_CT_F_NUM; i++) {
1596 if (nf_ct_cache[i].use == 0)
1599 NF_CT_ASSERT(nf_ct_cache[i].use == 1);
1600 nf_ct_cache[i].use = 1;
1601 nf_conntrack_unregister_cache(i);
1603 kmem_cache_destroy(nf_conntrack_expect_cachep);
1604 free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc,
1605 nf_conntrack_htable_size);
1607 /* free l3proto protocol tables */
1608 for (i = 0; i < PF_MAX; i++)
1609 if (nf_ct_protos[i]) {
1610 kfree(nf_ct_protos[i]);
1611 nf_ct_protos[i] = NULL;
1615 static struct list_head *alloc_hashtable(int size, int *vmalloced)
1617 struct list_head *hash;
1621 hash = (void*)__get_free_pages(GFP_KERNEL,
1622 get_order(sizeof(struct list_head)
1626 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
1627 hash = vmalloc(sizeof(struct list_head) * size);
1631 for (i = 0; i < size; i++)
1632 INIT_LIST_HEAD(&hash[i]);
1637 int set_hashsize(const char *val, struct kernel_param *kp)
1639 int i, bucket, hashsize, vmalloced;
1640 int old_vmalloced, old_size;
1642 struct list_head *hash, *old_hash;
1643 struct nf_conntrack_tuple_hash *h;
1645 /* On boot, we can set this without any fancy locking. */
1646 if (!nf_conntrack_htable_size)
1647 return param_set_uint(val, kp);
1649 hashsize = simple_strtol(val, NULL, 0);
1653 hash = alloc_hashtable(hashsize, &vmalloced);
1657 /* We have to rehahs for the new table anyway, so we also can
1658 * use a newrandom seed */
1659 get_random_bytes(&rnd, 4);
1661 write_lock_bh(&nf_conntrack_lock);
1662 for (i = 0; i < nf_conntrack_htable_size; i++) {
1663 while (!list_empty(&nf_conntrack_hash[i])) {
1664 h = list_entry(nf_conntrack_hash[i].next,
1665 struct nf_conntrack_tuple_hash, list);
1667 bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
1668 list_add_tail(&h->list, &hash[bucket]);
1671 old_size = nf_conntrack_htable_size;
1672 old_vmalloced = nf_conntrack_vmalloc;
1673 old_hash = nf_conntrack_hash;
1675 nf_conntrack_htable_size = hashsize;
1676 nf_conntrack_vmalloc = vmalloced;
1677 nf_conntrack_hash = hash;
1678 nf_conntrack_hash_rnd = rnd;
1679 write_unlock_bh(&nf_conntrack_lock);
1681 free_conntrack_hash(old_hash, old_vmalloced, old_size);
1685 module_param_call(hashsize, set_hashsize, param_get_uint,
1686 &nf_conntrack_htable_size, 0600);
1688 int __init nf_conntrack_init(void)
1693 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
1694 * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
1695 if (!nf_conntrack_htable_size) {
1696 nf_conntrack_htable_size
1697 = (((num_physpages << PAGE_SHIFT) / 16384)
1698 / sizeof(struct list_head));
1699 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
1700 nf_conntrack_htable_size = 8192;
1701 if (nf_conntrack_htable_size < 16)
1702 nf_conntrack_htable_size = 16;
1704 nf_conntrack_max = 8 * nf_conntrack_htable_size;
1706 printk("nf_conntrack version %s (%u buckets, %d max)\n",
1707 NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
1710 nf_conntrack_hash = alloc_hashtable(nf_conntrack_htable_size,
1711 &nf_conntrack_vmalloc);
1712 if (!nf_conntrack_hash) {
1713 printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
1717 ret = nf_conntrack_register_cache(NF_CT_F_BASIC, "nf_conntrack:basic",
1718 sizeof(struct nf_conn));
1720 printk(KERN_ERR "Unable to create nf_conn slab cache\n");
1724 nf_conntrack_expect_cachep = kmem_cache_create("nf_conntrack_expect",
1725 sizeof(struct nf_conntrack_expect),
1727 if (!nf_conntrack_expect_cachep) {
1728 printk(KERN_ERR "Unable to create nf_expect slab cache\n");
1729 goto err_free_conntrack_slab;
1732 /* Don't NEED lock here, but good form anyway. */
1733 write_lock_bh(&nf_conntrack_lock);
1734 for (i = 0; i < PF_MAX; i++)
1735 nf_ct_l3protos[i] = &nf_conntrack_generic_l3proto;
1736 write_unlock_bh(&nf_conntrack_lock);
1738 /* For use by REJECT target */
1739 ip_ct_attach = __nf_conntrack_attach;
1741 /* Set up fake conntrack:
1742 - to never be deleted, not in any hashes */
1743 atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
1744 /* - and look it like as a confirmed connection */
1745 set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
1749 err_free_conntrack_slab:
1750 nf_conntrack_unregister_cache(NF_CT_F_BASIC);
1752 free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc,
1753 nf_conntrack_htable_size);