2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3 * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
9 * Development of this code funded by Astaro AG (http://www.astaro.com/)
12 #include <linux/kernel.h>
13 #include <linux/init.h>
14 #include <linux/module.h>
15 #include <linux/netlink.h>
16 #include <linux/netfilter.h>
17 #include <linux/netfilter/nf_tables.h>
18 #include <net/netfilter/nf_tables.h>
19 #include <net/netfilter/nf_conntrack.h>
20 #include <net/netfilter/nf_conntrack_acct.h>
21 #include <net/netfilter/nf_conntrack_tuple.h>
22 #include <net/netfilter/nf_conntrack_helper.h>
23 #include <net/netfilter/nf_conntrack_ecache.h>
24 #include <net/netfilter/nf_conntrack_labels.h>
27 enum nft_ct_keys key:8;
28 enum ip_conntrack_dir dir:8;
30 enum nft_registers dreg:8;
31 enum nft_registers sreg:8;
35 #ifdef CONFIG_NF_CONNTRACK_ZONES
36 static DEFINE_PER_CPU(struct nf_conn *, nft_ct_pcpu_template);
37 static unsigned int nft_ct_pcpu_template_refcnt __read_mostly;
40 static u64 nft_ct_get_eval_counter(const struct nf_conn_counter *c,
42 enum ip_conntrack_dir d)
44 if (d < IP_CT_DIR_MAX)
45 return k == NFT_CT_BYTES ? atomic64_read(&c[d].bytes) :
46 atomic64_read(&c[d].packets);
48 return nft_ct_get_eval_counter(c, k, IP_CT_DIR_ORIGINAL) +
49 nft_ct_get_eval_counter(c, k, IP_CT_DIR_REPLY);
52 static void nft_ct_get_eval(const struct nft_expr *expr,
53 struct nft_regs *regs,
54 const struct nft_pktinfo *pkt)
56 const struct nft_ct *priv = nft_expr_priv(expr);
57 u32 *dest = ®s->data[priv->dreg];
58 enum ip_conntrack_info ctinfo;
59 const struct nf_conn *ct;
60 const struct nf_conn_help *help;
61 const struct nf_conntrack_tuple *tuple;
62 const struct nf_conntrack_helper *helper;
65 ct = nf_ct_get(pkt->skb, &ctinfo);
70 state = NF_CT_STATE_INVALID_BIT;
71 else if (nf_ct_is_untracked(ct))
72 state = NF_CT_STATE_UNTRACKED_BIT;
74 state = NF_CT_STATE_BIT(ctinfo);
85 case NFT_CT_DIRECTION:
86 nft_reg_store8(dest, CTINFO2DIR(ctinfo));
91 #ifdef CONFIG_NF_CONNTRACK_MARK
96 #ifdef CONFIG_NF_CONNTRACK_SECMARK
101 case NFT_CT_EXPIRATION:
102 *dest = jiffies_to_msecs(nf_ct_expires(ct));
105 if (ct->master == NULL)
107 help = nfct_help(ct->master);
110 helper = rcu_dereference(help->helper);
113 strncpy((char *)dest, helper->name, NF_CT_HELPER_NAME_LEN);
115 #ifdef CONFIG_NF_CONNTRACK_LABELS
116 case NFT_CT_LABELS: {
117 struct nf_conn_labels *labels = nf_ct_labels_find(ct);
120 memcpy(dest, labels->bits, NF_CT_LABELS_MAX_SIZE);
122 memset(dest, 0, NF_CT_LABELS_MAX_SIZE);
126 case NFT_CT_BYTES: /* fallthrough */
128 const struct nf_conn_acct *acct = nf_conn_acct_find(ct);
132 count = nft_ct_get_eval_counter(acct->counter,
133 priv->key, priv->dir);
134 memcpy(dest, &count, sizeof(count));
137 case NFT_CT_AVGPKT: {
138 const struct nf_conn_acct *acct = nf_conn_acct_find(ct);
139 u64 avgcnt = 0, bcnt = 0, pcnt = 0;
142 pcnt = nft_ct_get_eval_counter(acct->counter,
143 NFT_CT_PKTS, priv->dir);
144 bcnt = nft_ct_get_eval_counter(acct->counter,
145 NFT_CT_BYTES, priv->dir);
147 avgcnt = div64_u64(bcnt, pcnt);
150 memcpy(dest, &avgcnt, sizeof(avgcnt));
153 case NFT_CT_L3PROTOCOL:
154 nft_reg_store8(dest, nf_ct_l3num(ct));
156 case NFT_CT_PROTOCOL:
157 nft_reg_store8(dest, nf_ct_protonum(ct));
159 #ifdef CONFIG_NF_CONNTRACK_ZONES
161 const struct nf_conntrack_zone *zone = nf_ct_zone(ct);
164 if (priv->dir < IP_CT_DIR_MAX)
165 zoneid = nf_ct_zone_id(zone, priv->dir);
169 nft_reg_store16(dest, zoneid);
177 tuple = &ct->tuplehash[priv->dir].tuple;
180 memcpy(dest, tuple->src.u3.all,
181 nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
184 memcpy(dest, tuple->dst.u3.all,
185 nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
187 case NFT_CT_PROTO_SRC:
188 nft_reg_store16(dest, (__force u16)tuple->src.u.all);
190 case NFT_CT_PROTO_DST:
191 nft_reg_store16(dest, (__force u16)tuple->dst.u.all);
198 regs->verdict.code = NFT_BREAK;
201 #ifdef CONFIG_NF_CONNTRACK_ZONES
202 static void nft_ct_set_zone_eval(const struct nft_expr *expr,
203 struct nft_regs *regs,
204 const struct nft_pktinfo *pkt)
206 struct nf_conntrack_zone zone = { .dir = NF_CT_DEFAULT_ZONE_DIR };
207 const struct nft_ct *priv = nft_expr_priv(expr);
208 struct sk_buff *skb = pkt->skb;
209 enum ip_conntrack_info ctinfo;
210 u16 value = nft_reg_load16(®s->data[priv->sreg]);
213 ct = nf_ct_get(skb, &ctinfo);
214 if (ct) /* already tracked */
220 case IP_CT_DIR_ORIGINAL:
221 zone.dir = NF_CT_ZONE_DIR_ORIG;
223 case IP_CT_DIR_REPLY:
224 zone.dir = NF_CT_ZONE_DIR_REPL;
230 ct = this_cpu_read(nft_ct_pcpu_template);
232 if (likely(atomic_read(&ct->ct_general.use) == 1)) {
233 nf_ct_zone_add(ct, &zone);
235 /* previous skb got queued to userspace */
236 ct = nf_ct_tmpl_alloc(nft_net(pkt), &zone, GFP_ATOMIC);
238 regs->verdict.code = NF_DROP;
243 atomic_inc(&ct->ct_general.use);
244 nf_ct_set(skb, ct, IP_CT_NEW);
248 static void nft_ct_set_eval(const struct nft_expr *expr,
249 struct nft_regs *regs,
250 const struct nft_pktinfo *pkt)
252 const struct nft_ct *priv = nft_expr_priv(expr);
253 struct sk_buff *skb = pkt->skb;
254 #ifdef CONFIG_NF_CONNTRACK_MARK
255 u32 value = regs->data[priv->sreg];
257 enum ip_conntrack_info ctinfo;
260 ct = nf_ct_get(skb, &ctinfo);
265 #ifdef CONFIG_NF_CONNTRACK_MARK
267 if (ct->mark != value) {
269 nf_conntrack_event_cache(IPCT_MARK, ct);
273 #ifdef CONFIG_NF_CONNTRACK_LABELS
275 nf_connlabels_replace(ct,
276 ®s->data[priv->sreg],
277 ®s->data[priv->sreg],
278 NF_CT_LABELS_MAX_SIZE / sizeof(u32));
286 static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = {
287 [NFTA_CT_DREG] = { .type = NLA_U32 },
288 [NFTA_CT_KEY] = { .type = NLA_U32 },
289 [NFTA_CT_DIRECTION] = { .type = NLA_U8 },
290 [NFTA_CT_SREG] = { .type = NLA_U32 },
293 static int nft_ct_netns_get(struct net *net, uint8_t family)
297 if (family == NFPROTO_INET) {
298 err = nf_ct_netns_get(net, NFPROTO_IPV4);
301 err = nf_ct_netns_get(net, NFPROTO_IPV6);
305 err = nf_ct_netns_get(net, family);
312 nf_ct_netns_put(net, NFPROTO_IPV4);
317 static void nft_ct_netns_put(struct net *net, uint8_t family)
319 if (family == NFPROTO_INET) {
320 nf_ct_netns_put(net, NFPROTO_IPV4);
321 nf_ct_netns_put(net, NFPROTO_IPV6);
323 nf_ct_netns_put(net, family);
326 #ifdef CONFIG_NF_CONNTRACK_ZONES
327 static void nft_ct_tmpl_put_pcpu(void)
332 for_each_possible_cpu(cpu) {
333 ct = per_cpu(nft_ct_pcpu_template, cpu);
337 per_cpu(nft_ct_pcpu_template, cpu) = NULL;
341 static bool nft_ct_tmpl_alloc_pcpu(void)
343 struct nf_conntrack_zone zone = { .id = 0 };
347 if (nft_ct_pcpu_template_refcnt)
350 for_each_possible_cpu(cpu) {
351 tmp = nf_ct_tmpl_alloc(&init_net, &zone, GFP_KERNEL);
353 nft_ct_tmpl_put_pcpu();
357 atomic_set(&tmp->ct_general.use, 1);
358 per_cpu(nft_ct_pcpu_template, cpu) = tmp;
365 static int nft_ct_get_init(const struct nft_ctx *ctx,
366 const struct nft_expr *expr,
367 const struct nlattr * const tb[])
369 struct nft_ct *priv = nft_expr_priv(expr);
373 priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
374 priv->dir = IP_CT_DIR_MAX;
376 case NFT_CT_DIRECTION:
377 if (tb[NFTA_CT_DIRECTION] != NULL)
383 #ifdef CONFIG_NF_CONNTRACK_MARK
386 #ifdef CONFIG_NF_CONNTRACK_SECMARK
389 case NFT_CT_EXPIRATION:
390 if (tb[NFTA_CT_DIRECTION] != NULL)
394 #ifdef CONFIG_NF_CONNTRACK_LABELS
396 if (tb[NFTA_CT_DIRECTION] != NULL)
398 len = NF_CT_LABELS_MAX_SIZE;
402 if (tb[NFTA_CT_DIRECTION] != NULL)
404 len = NF_CT_HELPER_NAME_LEN;
407 case NFT_CT_L3PROTOCOL:
408 case NFT_CT_PROTOCOL:
409 /* For compatibility, do not report error if NFTA_CT_DIRECTION
410 * attribute is specified.
416 if (tb[NFTA_CT_DIRECTION] == NULL)
419 switch (ctx->afi->family) {
421 len = FIELD_SIZEOF(struct nf_conntrack_tuple,
426 len = FIELD_SIZEOF(struct nf_conntrack_tuple,
430 return -EAFNOSUPPORT;
433 case NFT_CT_PROTO_SRC:
434 case NFT_CT_PROTO_DST:
435 if (tb[NFTA_CT_DIRECTION] == NULL)
437 len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u.all);
444 #ifdef CONFIG_NF_CONNTRACK_ZONES
453 if (tb[NFTA_CT_DIRECTION] != NULL) {
454 priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
456 case IP_CT_DIR_ORIGINAL:
457 case IP_CT_DIR_REPLY:
464 priv->dreg = nft_parse_register(tb[NFTA_CT_DREG]);
465 err = nft_validate_register_store(ctx, priv->dreg, NULL,
466 NFT_DATA_VALUE, len);
470 err = nft_ct_netns_get(ctx->net, ctx->afi->family);
474 if (priv->key == NFT_CT_BYTES ||
475 priv->key == NFT_CT_PKTS ||
476 priv->key == NFT_CT_AVGPKT)
477 nf_ct_set_acct(ctx->net, true);
482 static void __nft_ct_set_destroy(const struct nft_ctx *ctx, struct nft_ct *priv)
485 #ifdef CONFIG_NF_CONNTRACK_LABELS
487 nf_connlabels_put(ctx->net);
490 #ifdef CONFIG_NF_CONNTRACK_ZONES
492 if (--nft_ct_pcpu_template_refcnt == 0)
493 nft_ct_tmpl_put_pcpu();
500 static int nft_ct_set_init(const struct nft_ctx *ctx,
501 const struct nft_expr *expr,
502 const struct nlattr * const tb[])
504 struct nft_ct *priv = nft_expr_priv(expr);
508 priv->dir = IP_CT_DIR_MAX;
509 priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
511 #ifdef CONFIG_NF_CONNTRACK_MARK
513 if (tb[NFTA_CT_DIRECTION])
515 len = FIELD_SIZEOF(struct nf_conn, mark);
518 #ifdef CONFIG_NF_CONNTRACK_LABELS
520 if (tb[NFTA_CT_DIRECTION])
522 len = NF_CT_LABELS_MAX_SIZE;
523 err = nf_connlabels_get(ctx->net, (len * BITS_PER_BYTE) - 1);
528 #ifdef CONFIG_NF_CONNTRACK_ZONES
530 if (!nft_ct_tmpl_alloc_pcpu())
532 nft_ct_pcpu_template_refcnt++;
540 if (tb[NFTA_CT_DIRECTION]) {
541 priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
543 case IP_CT_DIR_ORIGINAL:
544 case IP_CT_DIR_REPLY:
552 priv->sreg = nft_parse_register(tb[NFTA_CT_SREG]);
553 err = nft_validate_register_load(priv->sreg, len);
557 err = nft_ct_netns_get(ctx->net, ctx->afi->family);
564 __nft_ct_set_destroy(ctx, priv);
568 static void nft_ct_get_destroy(const struct nft_ctx *ctx,
569 const struct nft_expr *expr)
571 nf_ct_netns_put(ctx->net, ctx->afi->family);
574 static void nft_ct_set_destroy(const struct nft_ctx *ctx,
575 const struct nft_expr *expr)
577 struct nft_ct *priv = nft_expr_priv(expr);
579 __nft_ct_set_destroy(ctx, priv);
580 nft_ct_netns_put(ctx->net, ctx->afi->family);
583 static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
585 const struct nft_ct *priv = nft_expr_priv(expr);
587 if (nft_dump_register(skb, NFTA_CT_DREG, priv->dreg))
588 goto nla_put_failure;
589 if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
590 goto nla_put_failure;
595 case NFT_CT_PROTO_SRC:
596 case NFT_CT_PROTO_DST:
597 if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
598 goto nla_put_failure;
604 if (priv->dir < IP_CT_DIR_MAX &&
605 nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
606 goto nla_put_failure;
618 static int nft_ct_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
620 const struct nft_ct *priv = nft_expr_priv(expr);
622 if (nft_dump_register(skb, NFTA_CT_SREG, priv->sreg))
623 goto nla_put_failure;
624 if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
625 goto nla_put_failure;
629 if (priv->dir < IP_CT_DIR_MAX &&
630 nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
631 goto nla_put_failure;
643 static struct nft_expr_type nft_ct_type;
644 static const struct nft_expr_ops nft_ct_get_ops = {
645 .type = &nft_ct_type,
646 .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
647 .eval = nft_ct_get_eval,
648 .init = nft_ct_get_init,
649 .destroy = nft_ct_get_destroy,
650 .dump = nft_ct_get_dump,
653 static const struct nft_expr_ops nft_ct_set_ops = {
654 .type = &nft_ct_type,
655 .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
656 .eval = nft_ct_set_eval,
657 .init = nft_ct_set_init,
658 .destroy = nft_ct_set_destroy,
659 .dump = nft_ct_set_dump,
662 #ifdef CONFIG_NF_CONNTRACK_ZONES
663 static const struct nft_expr_ops nft_ct_set_zone_ops = {
664 .type = &nft_ct_type,
665 .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
666 .eval = nft_ct_set_zone_eval,
667 .init = nft_ct_set_init,
668 .destroy = nft_ct_set_destroy,
669 .dump = nft_ct_set_dump,
673 static const struct nft_expr_ops *
674 nft_ct_select_ops(const struct nft_ctx *ctx,
675 const struct nlattr * const tb[])
677 if (tb[NFTA_CT_KEY] == NULL)
678 return ERR_PTR(-EINVAL);
680 if (tb[NFTA_CT_DREG] && tb[NFTA_CT_SREG])
681 return ERR_PTR(-EINVAL);
683 if (tb[NFTA_CT_DREG])
684 return &nft_ct_get_ops;
686 if (tb[NFTA_CT_SREG]) {
687 #ifdef CONFIG_NF_CONNTRACK_ZONES
688 if (nla_get_be32(tb[NFTA_CT_KEY]) == htonl(NFT_CT_ZONE))
689 return &nft_ct_set_zone_ops;
691 return &nft_ct_set_ops;
694 return ERR_PTR(-EINVAL);
697 static struct nft_expr_type nft_ct_type __read_mostly = {
699 .select_ops = &nft_ct_select_ops,
700 .policy = nft_ct_policy,
701 .maxattr = NFTA_CT_MAX,
702 .owner = THIS_MODULE,
705 static void nft_notrack_eval(const struct nft_expr *expr,
706 struct nft_regs *regs,
707 const struct nft_pktinfo *pkt)
709 struct sk_buff *skb = pkt->skb;
710 enum ip_conntrack_info ctinfo;
713 ct = nf_ct_get(pkt->skb, &ctinfo);
714 /* Previously seen (loopback or untracked)? Ignore. */
718 ct = nf_ct_untracked_get();
719 atomic_inc(&ct->ct_general.use);
720 nf_ct_set(skb, ct, IP_CT_NEW);
723 static struct nft_expr_type nft_notrack_type;
724 static const struct nft_expr_ops nft_notrack_ops = {
725 .type = &nft_notrack_type,
726 .size = NFT_EXPR_SIZE(0),
727 .eval = nft_notrack_eval,
730 static struct nft_expr_type nft_notrack_type __read_mostly = {
732 .ops = &nft_notrack_ops,
733 .owner = THIS_MODULE,
736 static int __init nft_ct_module_init(void)
740 BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE > NFT_REG_SIZE);
742 err = nft_register_expr(&nft_ct_type);
746 err = nft_register_expr(&nft_notrack_type);
752 nft_unregister_expr(&nft_ct_type);
756 static void __exit nft_ct_module_exit(void)
758 nft_unregister_expr(&nft_notrack_type);
759 nft_unregister_expr(&nft_ct_type);
762 module_init(nft_ct_module_init);
763 module_exit(nft_ct_module_exit);
765 MODULE_LICENSE("GPL");
766 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
767 MODULE_ALIAS_NFT_EXPR("ct");
768 MODULE_ALIAS_NFT_EXPR("notrack");