2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3 * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
9 * Development of this code funded by Astaro AG (http://www.astaro.com/)
12 #include <linux/kernel.h>
13 #include <linux/init.h>
14 #include <linux/module.h>
15 #include <linux/netlink.h>
16 #include <linux/netfilter.h>
17 #include <linux/netfilter/nf_tables.h>
18 #include <net/netfilter/nf_tables.h>
19 #include <net/netfilter/nf_conntrack.h>
20 #include <net/netfilter/nf_conntrack_acct.h>
21 #include <net/netfilter/nf_conntrack_tuple.h>
22 #include <net/netfilter/nf_conntrack_helper.h>
23 #include <net/netfilter/nf_conntrack_ecache.h>
24 #include <net/netfilter/nf_conntrack_labels.h>
27 enum nft_ct_keys key:8;
28 enum ip_conntrack_dir dir:8;
30 enum nft_registers dreg:8;
31 enum nft_registers sreg:8;
35 #ifdef CONFIG_NF_CONNTRACK_ZONES
36 static DEFINE_PER_CPU(struct nf_conn *, nft_ct_pcpu_template);
37 static unsigned int nft_ct_pcpu_template_refcnt __read_mostly;
40 static u64 nft_ct_get_eval_counter(const struct nf_conn_counter *c,
42 enum ip_conntrack_dir d)
44 if (d < IP_CT_DIR_MAX)
45 return k == NFT_CT_BYTES ? atomic64_read(&c[d].bytes) :
46 atomic64_read(&c[d].packets);
48 return nft_ct_get_eval_counter(c, k, IP_CT_DIR_ORIGINAL) +
49 nft_ct_get_eval_counter(c, k, IP_CT_DIR_REPLY);
52 static void nft_ct_get_eval(const struct nft_expr *expr,
53 struct nft_regs *regs,
54 const struct nft_pktinfo *pkt)
56 const struct nft_ct *priv = nft_expr_priv(expr);
57 u32 *dest = ®s->data[priv->dreg];
58 enum ip_conntrack_info ctinfo;
59 const struct nf_conn *ct;
60 const struct nf_conn_help *help;
61 const struct nf_conntrack_tuple *tuple;
62 const struct nf_conntrack_helper *helper;
65 ct = nf_ct_get(pkt->skb, &ctinfo);
70 state = NF_CT_STATE_INVALID_BIT;
71 else if (nf_ct_is_untracked(ct))
72 state = NF_CT_STATE_UNTRACKED_BIT;
74 state = NF_CT_STATE_BIT(ctinfo);
85 case NFT_CT_DIRECTION:
86 *dest = CTINFO2DIR(ctinfo);
91 #ifdef CONFIG_NF_CONNTRACK_MARK
96 #ifdef CONFIG_NF_CONNTRACK_SECMARK
101 case NFT_CT_EXPIRATION:
102 *dest = jiffies_to_msecs(nf_ct_expires(ct));
105 if (ct->master == NULL)
107 help = nfct_help(ct->master);
110 helper = rcu_dereference(help->helper);
113 strncpy((char *)dest, helper->name, NF_CT_HELPER_NAME_LEN);
115 #ifdef CONFIG_NF_CONNTRACK_LABELS
116 case NFT_CT_LABELS: {
117 struct nf_conn_labels *labels = nf_ct_labels_find(ct);
120 memcpy(dest, labels->bits, NF_CT_LABELS_MAX_SIZE);
122 memset(dest, 0, NF_CT_LABELS_MAX_SIZE);
126 case NFT_CT_BYTES: /* fallthrough */
128 const struct nf_conn_acct *acct = nf_conn_acct_find(ct);
132 count = nft_ct_get_eval_counter(acct->counter,
133 priv->key, priv->dir);
134 memcpy(dest, &count, sizeof(count));
137 case NFT_CT_AVGPKT: {
138 const struct nf_conn_acct *acct = nf_conn_acct_find(ct);
139 u64 avgcnt = 0, bcnt = 0, pcnt = 0;
142 pcnt = nft_ct_get_eval_counter(acct->counter,
143 NFT_CT_PKTS, priv->dir);
144 bcnt = nft_ct_get_eval_counter(acct->counter,
145 NFT_CT_BYTES, priv->dir);
147 avgcnt = div64_u64(bcnt, pcnt);
150 memcpy(dest, &avgcnt, sizeof(avgcnt));
153 case NFT_CT_L3PROTOCOL:
154 *dest = nf_ct_l3num(ct);
156 case NFT_CT_PROTOCOL:
157 *dest = nf_ct_protonum(ct);
159 #ifdef CONFIG_NF_CONNTRACK_ZONES
161 const struct nf_conntrack_zone *zone = nf_ct_zone(ct);
163 if (priv->dir < IP_CT_DIR_MAX)
164 *dest = nf_ct_zone_id(zone, priv->dir);
175 tuple = &ct->tuplehash[priv->dir].tuple;
178 memcpy(dest, tuple->src.u3.all,
179 nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
182 memcpy(dest, tuple->dst.u3.all,
183 nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
185 case NFT_CT_PROTO_SRC:
186 *dest = (__force __u16)tuple->src.u.all;
188 case NFT_CT_PROTO_DST:
189 *dest = (__force __u16)tuple->dst.u.all;
196 regs->verdict.code = NFT_BREAK;
199 #ifdef CONFIG_NF_CONNTRACK_ZONES
200 static void nft_ct_set_zone_eval(const struct nft_expr *expr,
201 struct nft_regs *regs,
202 const struct nft_pktinfo *pkt)
204 struct nf_conntrack_zone zone = { .dir = NF_CT_DEFAULT_ZONE_DIR };
205 const struct nft_ct *priv = nft_expr_priv(expr);
206 struct sk_buff *skb = pkt->skb;
207 enum ip_conntrack_info ctinfo;
208 u16 value = regs->data[priv->sreg];
211 ct = nf_ct_get(skb, &ctinfo);
212 if (ct) /* already tracked */
218 case IP_CT_DIR_ORIGINAL:
219 zone.dir = NF_CT_ZONE_DIR_ORIG;
221 case IP_CT_DIR_REPLY:
222 zone.dir = NF_CT_ZONE_DIR_REPL;
228 ct = this_cpu_read(nft_ct_pcpu_template);
230 if (likely(atomic_read(&ct->ct_general.use) == 1)) {
231 nf_ct_zone_add(ct, &zone);
233 /* previous skb got queued to userspace */
234 ct = nf_ct_tmpl_alloc(nft_net(pkt), &zone, GFP_ATOMIC);
236 regs->verdict.code = NF_DROP;
241 atomic_inc(&ct->ct_general.use);
242 nf_ct_set(skb, ct, IP_CT_NEW);
246 static void nft_ct_set_eval(const struct nft_expr *expr,
247 struct nft_regs *regs,
248 const struct nft_pktinfo *pkt)
250 const struct nft_ct *priv = nft_expr_priv(expr);
251 struct sk_buff *skb = pkt->skb;
252 #ifdef CONFIG_NF_CONNTRACK_MARK
253 u32 value = regs->data[priv->sreg];
255 enum ip_conntrack_info ctinfo;
258 ct = nf_ct_get(skb, &ctinfo);
263 #ifdef CONFIG_NF_CONNTRACK_MARK
265 if (ct->mark != value) {
267 nf_conntrack_event_cache(IPCT_MARK, ct);
271 #ifdef CONFIG_NF_CONNTRACK_LABELS
273 nf_connlabels_replace(ct,
274 ®s->data[priv->sreg],
275 ®s->data[priv->sreg],
276 NF_CT_LABELS_MAX_SIZE / sizeof(u32));
284 static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = {
285 [NFTA_CT_DREG] = { .type = NLA_U32 },
286 [NFTA_CT_KEY] = { .type = NLA_U32 },
287 [NFTA_CT_DIRECTION] = { .type = NLA_U8 },
288 [NFTA_CT_SREG] = { .type = NLA_U32 },
291 static int nft_ct_netns_get(struct net *net, uint8_t family)
295 if (family == NFPROTO_INET) {
296 err = nf_ct_netns_get(net, NFPROTO_IPV4);
299 err = nf_ct_netns_get(net, NFPROTO_IPV6);
303 err = nf_ct_netns_get(net, family);
310 nf_ct_netns_put(net, NFPROTO_IPV4);
315 static void nft_ct_netns_put(struct net *net, uint8_t family)
317 if (family == NFPROTO_INET) {
318 nf_ct_netns_put(net, NFPROTO_IPV4);
319 nf_ct_netns_put(net, NFPROTO_IPV6);
321 nf_ct_netns_put(net, family);
324 #ifdef CONFIG_NF_CONNTRACK_ZONES
325 static void nft_ct_tmpl_put_pcpu(void)
330 for_each_possible_cpu(cpu) {
331 ct = per_cpu(nft_ct_pcpu_template, cpu);
335 per_cpu(nft_ct_pcpu_template, cpu) = NULL;
339 static bool nft_ct_tmpl_alloc_pcpu(void)
341 struct nf_conntrack_zone zone = { .id = 0 };
345 if (nft_ct_pcpu_template_refcnt)
348 for_each_possible_cpu(cpu) {
349 tmp = nf_ct_tmpl_alloc(&init_net, &zone, GFP_KERNEL);
351 nft_ct_tmpl_put_pcpu();
355 atomic_set(&tmp->ct_general.use, 1);
356 per_cpu(nft_ct_pcpu_template, cpu) = tmp;
363 static int nft_ct_get_init(const struct nft_ctx *ctx,
364 const struct nft_expr *expr,
365 const struct nlattr * const tb[])
367 struct nft_ct *priv = nft_expr_priv(expr);
371 priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
372 priv->dir = IP_CT_DIR_MAX;
374 case NFT_CT_DIRECTION:
375 if (tb[NFTA_CT_DIRECTION] != NULL)
381 #ifdef CONFIG_NF_CONNTRACK_MARK
384 #ifdef CONFIG_NF_CONNTRACK_SECMARK
387 case NFT_CT_EXPIRATION:
388 if (tb[NFTA_CT_DIRECTION] != NULL)
392 #ifdef CONFIG_NF_CONNTRACK_LABELS
394 if (tb[NFTA_CT_DIRECTION] != NULL)
396 len = NF_CT_LABELS_MAX_SIZE;
400 if (tb[NFTA_CT_DIRECTION] != NULL)
402 len = NF_CT_HELPER_NAME_LEN;
405 case NFT_CT_L3PROTOCOL:
406 case NFT_CT_PROTOCOL:
407 /* For compatibility, do not report error if NFTA_CT_DIRECTION
408 * attribute is specified.
414 if (tb[NFTA_CT_DIRECTION] == NULL)
417 switch (ctx->afi->family) {
419 len = FIELD_SIZEOF(struct nf_conntrack_tuple,
424 len = FIELD_SIZEOF(struct nf_conntrack_tuple,
428 return -EAFNOSUPPORT;
431 case NFT_CT_PROTO_SRC:
432 case NFT_CT_PROTO_DST:
433 if (tb[NFTA_CT_DIRECTION] == NULL)
435 len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u.all);
442 #ifdef CONFIG_NF_CONNTRACK_ZONES
451 if (tb[NFTA_CT_DIRECTION] != NULL) {
452 priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
454 case IP_CT_DIR_ORIGINAL:
455 case IP_CT_DIR_REPLY:
462 priv->dreg = nft_parse_register(tb[NFTA_CT_DREG]);
463 err = nft_validate_register_store(ctx, priv->dreg, NULL,
464 NFT_DATA_VALUE, len);
468 err = nft_ct_netns_get(ctx->net, ctx->afi->family);
472 if (priv->key == NFT_CT_BYTES ||
473 priv->key == NFT_CT_PKTS ||
474 priv->key == NFT_CT_AVGPKT)
475 nf_ct_set_acct(ctx->net, true);
480 static void __nft_ct_set_destroy(const struct nft_ctx *ctx, struct nft_ct *priv)
483 #ifdef CONFIG_NF_CONNTRACK_LABELS
485 nf_connlabels_put(ctx->net);
488 #ifdef CONFIG_NF_CONNTRACK_ZONES
490 if (--nft_ct_pcpu_template_refcnt == 0)
491 nft_ct_tmpl_put_pcpu();
498 static int nft_ct_set_init(const struct nft_ctx *ctx,
499 const struct nft_expr *expr,
500 const struct nlattr * const tb[])
502 struct nft_ct *priv = nft_expr_priv(expr);
506 priv->dir = IP_CT_DIR_MAX;
507 priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
509 #ifdef CONFIG_NF_CONNTRACK_MARK
511 if (tb[NFTA_CT_DIRECTION])
513 len = FIELD_SIZEOF(struct nf_conn, mark);
516 #ifdef CONFIG_NF_CONNTRACK_LABELS
518 if (tb[NFTA_CT_DIRECTION])
520 len = NF_CT_LABELS_MAX_SIZE;
521 err = nf_connlabels_get(ctx->net, (len * BITS_PER_BYTE) - 1);
526 #ifdef CONFIG_NF_CONNTRACK_ZONES
528 if (!nft_ct_tmpl_alloc_pcpu())
530 nft_ct_pcpu_template_refcnt++;
537 if (tb[NFTA_CT_DIRECTION]) {
538 priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
540 case IP_CT_DIR_ORIGINAL:
541 case IP_CT_DIR_REPLY:
548 priv->sreg = nft_parse_register(tb[NFTA_CT_SREG]);
549 err = nft_validate_register_load(priv->sreg, len);
553 err = nft_ct_netns_get(ctx->net, ctx->afi->family);
560 __nft_ct_set_destroy(ctx, priv);
564 static void nft_ct_get_destroy(const struct nft_ctx *ctx,
565 const struct nft_expr *expr)
567 nf_ct_netns_put(ctx->net, ctx->afi->family);
570 static void nft_ct_set_destroy(const struct nft_ctx *ctx,
571 const struct nft_expr *expr)
573 struct nft_ct *priv = nft_expr_priv(expr);
575 __nft_ct_set_destroy(ctx, priv);
576 nft_ct_netns_put(ctx->net, ctx->afi->family);
579 static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
581 const struct nft_ct *priv = nft_expr_priv(expr);
583 if (nft_dump_register(skb, NFTA_CT_DREG, priv->dreg))
584 goto nla_put_failure;
585 if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
586 goto nla_put_failure;
591 case NFT_CT_PROTO_SRC:
592 case NFT_CT_PROTO_DST:
593 if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
594 goto nla_put_failure;
600 if (priv->dir < IP_CT_DIR_MAX &&
601 nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
602 goto nla_put_failure;
614 static int nft_ct_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
616 const struct nft_ct *priv = nft_expr_priv(expr);
618 if (nft_dump_register(skb, NFTA_CT_SREG, priv->sreg))
619 goto nla_put_failure;
620 if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
621 goto nla_put_failure;
625 if (priv->dir < IP_CT_DIR_MAX &&
626 nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
627 goto nla_put_failure;
639 static struct nft_expr_type nft_ct_type;
640 static const struct nft_expr_ops nft_ct_get_ops = {
641 .type = &nft_ct_type,
642 .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
643 .eval = nft_ct_get_eval,
644 .init = nft_ct_get_init,
645 .destroy = nft_ct_get_destroy,
646 .dump = nft_ct_get_dump,
649 static const struct nft_expr_ops nft_ct_set_ops = {
650 .type = &nft_ct_type,
651 .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
652 .eval = nft_ct_set_eval,
653 .init = nft_ct_set_init,
654 .destroy = nft_ct_set_destroy,
655 .dump = nft_ct_set_dump,
658 #ifdef CONFIG_NF_CONNTRACK_ZONES
659 static const struct nft_expr_ops nft_ct_set_zone_ops = {
660 .type = &nft_ct_type,
661 .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
662 .eval = nft_ct_set_zone_eval,
663 .init = nft_ct_set_init,
664 .destroy = nft_ct_set_destroy,
665 .dump = nft_ct_set_dump,
669 static const struct nft_expr_ops *
670 nft_ct_select_ops(const struct nft_ctx *ctx,
671 const struct nlattr * const tb[])
673 if (tb[NFTA_CT_KEY] == NULL)
674 return ERR_PTR(-EINVAL);
676 if (tb[NFTA_CT_DREG] && tb[NFTA_CT_SREG])
677 return ERR_PTR(-EINVAL);
679 if (tb[NFTA_CT_DREG])
680 return &nft_ct_get_ops;
682 if (tb[NFTA_CT_SREG]) {
683 #ifdef CONFIG_NF_CONNTRACK_ZONES
684 if (nla_get_be32(tb[NFTA_CT_KEY]) == htonl(NFT_CT_ZONE))
685 return &nft_ct_set_zone_ops;
687 return &nft_ct_set_ops;
690 return ERR_PTR(-EINVAL);
693 static struct nft_expr_type nft_ct_type __read_mostly = {
695 .select_ops = &nft_ct_select_ops,
696 .policy = nft_ct_policy,
697 .maxattr = NFTA_CT_MAX,
698 .owner = THIS_MODULE,
701 static void nft_notrack_eval(const struct nft_expr *expr,
702 struct nft_regs *regs,
703 const struct nft_pktinfo *pkt)
705 struct sk_buff *skb = pkt->skb;
706 enum ip_conntrack_info ctinfo;
709 ct = nf_ct_get(pkt->skb, &ctinfo);
710 /* Previously seen (loopback or untracked)? Ignore. */
714 ct = nf_ct_untracked_get();
715 atomic_inc(&ct->ct_general.use);
716 nf_ct_set(skb, ct, IP_CT_NEW);
719 static struct nft_expr_type nft_notrack_type;
720 static const struct nft_expr_ops nft_notrack_ops = {
721 .type = &nft_notrack_type,
722 .size = NFT_EXPR_SIZE(0),
723 .eval = nft_notrack_eval,
726 static struct nft_expr_type nft_notrack_type __read_mostly = {
728 .ops = &nft_notrack_ops,
729 .owner = THIS_MODULE,
732 static int __init nft_ct_module_init(void)
736 BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE > NFT_REG_SIZE);
738 err = nft_register_expr(&nft_ct_type);
742 err = nft_register_expr(&nft_notrack_type);
748 nft_unregister_expr(&nft_ct_type);
752 static void __exit nft_ct_module_exit(void)
754 nft_unregister_expr(&nft_notrack_type);
755 nft_unregister_expr(&nft_ct_type);
758 module_init(nft_ct_module_init);
759 module_exit(nft_ct_module_exit);
761 MODULE_LICENSE("GPL");
762 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
763 MODULE_ALIAS_NFT_EXPR("ct");
764 MODULE_ALIAS_NFT_EXPR("notrack");