regulator: lochnagar: Use a consisent comment style for SPDX header
[linux-2.6-block.git] / net / netfilter / nft_ct.c
1 /*
2  * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3  * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License version 2 as
7  * published by the Free Software Foundation.
8  *
9  * Development of this code funded by Astaro AG (http://www.astaro.com/)
10  */
11
12 #include <linux/kernel.h>
13 #include <linux/init.h>
14 #include <linux/module.h>
15 #include <linux/netlink.h>
16 #include <linux/netfilter.h>
17 #include <linux/netfilter/nf_tables.h>
18 #include <net/netfilter/nf_tables.h>
19 #include <net/netfilter/nf_conntrack.h>
20 #include <net/netfilter/nf_conntrack_acct.h>
21 #include <net/netfilter/nf_conntrack_tuple.h>
22 #include <net/netfilter/nf_conntrack_helper.h>
23 #include <net/netfilter/nf_conntrack_ecache.h>
24 #include <net/netfilter/nf_conntrack_labels.h>
25 #include <net/netfilter/nf_conntrack_timeout.h>
26 #include <net/netfilter/nf_conntrack_l4proto.h>
27
28 struct nft_ct {
29         enum nft_ct_keys        key:8;
30         enum ip_conntrack_dir   dir:8;
31         union {
32                 enum nft_registers      dreg:8;
33                 enum nft_registers      sreg:8;
34         };
35 };
36
37 struct nft_ct_helper_obj  {
38         struct nf_conntrack_helper *helper4;
39         struct nf_conntrack_helper *helper6;
40         u8 l4proto;
41 };
42
43 #ifdef CONFIG_NF_CONNTRACK_ZONES
44 static DEFINE_PER_CPU(struct nf_conn *, nft_ct_pcpu_template);
45 static unsigned int nft_ct_pcpu_template_refcnt __read_mostly;
46 #endif
47
48 static u64 nft_ct_get_eval_counter(const struct nf_conn_counter *c,
49                                    enum nft_ct_keys k,
50                                    enum ip_conntrack_dir d)
51 {
52         if (d < IP_CT_DIR_MAX)
53                 return k == NFT_CT_BYTES ? atomic64_read(&c[d].bytes) :
54                                            atomic64_read(&c[d].packets);
55
56         return nft_ct_get_eval_counter(c, k, IP_CT_DIR_ORIGINAL) +
57                nft_ct_get_eval_counter(c, k, IP_CT_DIR_REPLY);
58 }
59
60 static void nft_ct_get_eval(const struct nft_expr *expr,
61                             struct nft_regs *regs,
62                             const struct nft_pktinfo *pkt)
63 {
64         const struct nft_ct *priv = nft_expr_priv(expr);
65         u32 *dest = &regs->data[priv->dreg];
66         enum ip_conntrack_info ctinfo;
67         const struct nf_conn *ct;
68         const struct nf_conn_help *help;
69         const struct nf_conntrack_tuple *tuple;
70         const struct nf_conntrack_helper *helper;
71         unsigned int state;
72
73         ct = nf_ct_get(pkt->skb, &ctinfo);
74
75         switch (priv->key) {
76         case NFT_CT_STATE:
77                 if (ct)
78                         state = NF_CT_STATE_BIT(ctinfo);
79                 else if (ctinfo == IP_CT_UNTRACKED)
80                         state = NF_CT_STATE_UNTRACKED_BIT;
81                 else
82                         state = NF_CT_STATE_INVALID_BIT;
83                 *dest = state;
84                 return;
85         default:
86                 break;
87         }
88
89         if (ct == NULL)
90                 goto err;
91
92         switch (priv->key) {
93         case NFT_CT_DIRECTION:
94                 nft_reg_store8(dest, CTINFO2DIR(ctinfo));
95                 return;
96         case NFT_CT_STATUS:
97                 *dest = ct->status;
98                 return;
99 #ifdef CONFIG_NF_CONNTRACK_MARK
100         case NFT_CT_MARK:
101                 *dest = ct->mark;
102                 return;
103 #endif
104 #ifdef CONFIG_NF_CONNTRACK_SECMARK
105         case NFT_CT_SECMARK:
106                 *dest = ct->secmark;
107                 return;
108 #endif
109         case NFT_CT_EXPIRATION:
110                 *dest = jiffies_to_msecs(nf_ct_expires(ct));
111                 return;
112         case NFT_CT_HELPER:
113                 if (ct->master == NULL)
114                         goto err;
115                 help = nfct_help(ct->master);
116                 if (help == NULL)
117                         goto err;
118                 helper = rcu_dereference(help->helper);
119                 if (helper == NULL)
120                         goto err;
121                 strncpy((char *)dest, helper->name, NF_CT_HELPER_NAME_LEN);
122                 return;
123 #ifdef CONFIG_NF_CONNTRACK_LABELS
124         case NFT_CT_LABELS: {
125                 struct nf_conn_labels *labels = nf_ct_labels_find(ct);
126
127                 if (labels)
128                         memcpy(dest, labels->bits, NF_CT_LABELS_MAX_SIZE);
129                 else
130                         memset(dest, 0, NF_CT_LABELS_MAX_SIZE);
131                 return;
132         }
133 #endif
134         case NFT_CT_BYTES: /* fallthrough */
135         case NFT_CT_PKTS: {
136                 const struct nf_conn_acct *acct = nf_conn_acct_find(ct);
137                 u64 count = 0;
138
139                 if (acct)
140                         count = nft_ct_get_eval_counter(acct->counter,
141                                                         priv->key, priv->dir);
142                 memcpy(dest, &count, sizeof(count));
143                 return;
144         }
145         case NFT_CT_AVGPKT: {
146                 const struct nf_conn_acct *acct = nf_conn_acct_find(ct);
147                 u64 avgcnt = 0, bcnt = 0, pcnt = 0;
148
149                 if (acct) {
150                         pcnt = nft_ct_get_eval_counter(acct->counter,
151                                                        NFT_CT_PKTS, priv->dir);
152                         bcnt = nft_ct_get_eval_counter(acct->counter,
153                                                        NFT_CT_BYTES, priv->dir);
154                         if (pcnt != 0)
155                                 avgcnt = div64_u64(bcnt, pcnt);
156                 }
157
158                 memcpy(dest, &avgcnt, sizeof(avgcnt));
159                 return;
160         }
161         case NFT_CT_L3PROTOCOL:
162                 nft_reg_store8(dest, nf_ct_l3num(ct));
163                 return;
164         case NFT_CT_PROTOCOL:
165                 nft_reg_store8(dest, nf_ct_protonum(ct));
166                 return;
167 #ifdef CONFIG_NF_CONNTRACK_ZONES
168         case NFT_CT_ZONE: {
169                 const struct nf_conntrack_zone *zone = nf_ct_zone(ct);
170                 u16 zoneid;
171
172                 if (priv->dir < IP_CT_DIR_MAX)
173                         zoneid = nf_ct_zone_id(zone, priv->dir);
174                 else
175                         zoneid = zone->id;
176
177                 nft_reg_store16(dest, zoneid);
178                 return;
179         }
180 #endif
181         default:
182                 break;
183         }
184
185         tuple = &ct->tuplehash[priv->dir].tuple;
186         switch (priv->key) {
187         case NFT_CT_SRC:
188                 memcpy(dest, tuple->src.u3.all,
189                        nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
190                 return;
191         case NFT_CT_DST:
192                 memcpy(dest, tuple->dst.u3.all,
193                        nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
194                 return;
195         case NFT_CT_PROTO_SRC:
196                 nft_reg_store16(dest, (__force u16)tuple->src.u.all);
197                 return;
198         case NFT_CT_PROTO_DST:
199                 nft_reg_store16(dest, (__force u16)tuple->dst.u.all);
200                 return;
201         case NFT_CT_SRC_IP:
202                 if (nf_ct_l3num(ct) != NFPROTO_IPV4)
203                         goto err;
204                 *dest = tuple->src.u3.ip;
205                 return;
206         case NFT_CT_DST_IP:
207                 if (nf_ct_l3num(ct) != NFPROTO_IPV4)
208                         goto err;
209                 *dest = tuple->dst.u3.ip;
210                 return;
211         case NFT_CT_SRC_IP6:
212                 if (nf_ct_l3num(ct) != NFPROTO_IPV6)
213                         goto err;
214                 memcpy(dest, tuple->src.u3.ip6, sizeof(struct in6_addr));
215                 return;
216         case NFT_CT_DST_IP6:
217                 if (nf_ct_l3num(ct) != NFPROTO_IPV6)
218                         goto err;
219                 memcpy(dest, tuple->dst.u3.ip6, sizeof(struct in6_addr));
220                 return;
221         default:
222                 break;
223         }
224         return;
225 err:
226         regs->verdict.code = NFT_BREAK;
227 }
228
229 #ifdef CONFIG_NF_CONNTRACK_ZONES
230 static void nft_ct_set_zone_eval(const struct nft_expr *expr,
231                                  struct nft_regs *regs,
232                                  const struct nft_pktinfo *pkt)
233 {
234         struct nf_conntrack_zone zone = { .dir = NF_CT_DEFAULT_ZONE_DIR };
235         const struct nft_ct *priv = nft_expr_priv(expr);
236         struct sk_buff *skb = pkt->skb;
237         enum ip_conntrack_info ctinfo;
238         u16 value = nft_reg_load16(&regs->data[priv->sreg]);
239         struct nf_conn *ct;
240
241         ct = nf_ct_get(skb, &ctinfo);
242         if (ct) /* already tracked */
243                 return;
244
245         zone.id = value;
246
247         switch (priv->dir) {
248         case IP_CT_DIR_ORIGINAL:
249                 zone.dir = NF_CT_ZONE_DIR_ORIG;
250                 break;
251         case IP_CT_DIR_REPLY:
252                 zone.dir = NF_CT_ZONE_DIR_REPL;
253                 break;
254         default:
255                 break;
256         }
257
258         ct = this_cpu_read(nft_ct_pcpu_template);
259
260         if (likely(atomic_read(&ct->ct_general.use) == 1)) {
261                 nf_ct_zone_add(ct, &zone);
262         } else {
263                 /* previous skb got queued to userspace */
264                 ct = nf_ct_tmpl_alloc(nft_net(pkt), &zone, GFP_ATOMIC);
265                 if (!ct) {
266                         regs->verdict.code = NF_DROP;
267                         return;
268                 }
269         }
270
271         atomic_inc(&ct->ct_general.use);
272         nf_ct_set(skb, ct, IP_CT_NEW);
273 }
274 #endif
275
276 static void nft_ct_set_eval(const struct nft_expr *expr,
277                             struct nft_regs *regs,
278                             const struct nft_pktinfo *pkt)
279 {
280         const struct nft_ct *priv = nft_expr_priv(expr);
281         struct sk_buff *skb = pkt->skb;
282 #ifdef CONFIG_NF_CONNTRACK_MARK
283         u32 value = regs->data[priv->sreg];
284 #endif
285         enum ip_conntrack_info ctinfo;
286         struct nf_conn *ct;
287
288         ct = nf_ct_get(skb, &ctinfo);
289         if (ct == NULL || nf_ct_is_template(ct))
290                 return;
291
292         switch (priv->key) {
293 #ifdef CONFIG_NF_CONNTRACK_MARK
294         case NFT_CT_MARK:
295                 if (ct->mark != value) {
296                         ct->mark = value;
297                         nf_conntrack_event_cache(IPCT_MARK, ct);
298                 }
299                 break;
300 #endif
301 #ifdef CONFIG_NF_CONNTRACK_LABELS
302         case NFT_CT_LABELS:
303                 nf_connlabels_replace(ct,
304                                       &regs->data[priv->sreg],
305                                       &regs->data[priv->sreg],
306                                       NF_CT_LABELS_MAX_SIZE / sizeof(u32));
307                 break;
308 #endif
309 #ifdef CONFIG_NF_CONNTRACK_EVENTS
310         case NFT_CT_EVENTMASK: {
311                 struct nf_conntrack_ecache *e = nf_ct_ecache_find(ct);
312                 u32 ctmask = regs->data[priv->sreg];
313
314                 if (e) {
315                         if (e->ctmask != ctmask)
316                                 e->ctmask = ctmask;
317                         break;
318                 }
319
320                 if (ctmask && !nf_ct_is_confirmed(ct))
321                         nf_ct_ecache_ext_add(ct, ctmask, 0, GFP_ATOMIC);
322                 break;
323         }
324 #endif
325         default:
326                 break;
327         }
328 }
329
330 static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = {
331         [NFTA_CT_DREG]          = { .type = NLA_U32 },
332         [NFTA_CT_KEY]           = { .type = NLA_U32 },
333         [NFTA_CT_DIRECTION]     = { .type = NLA_U8 },
334         [NFTA_CT_SREG]          = { .type = NLA_U32 },
335 };
336
337 #ifdef CONFIG_NF_CONNTRACK_ZONES
338 static void nft_ct_tmpl_put_pcpu(void)
339 {
340         struct nf_conn *ct;
341         int cpu;
342
343         for_each_possible_cpu(cpu) {
344                 ct = per_cpu(nft_ct_pcpu_template, cpu);
345                 if (!ct)
346                         break;
347                 nf_ct_put(ct);
348                 per_cpu(nft_ct_pcpu_template, cpu) = NULL;
349         }
350 }
351
352 static bool nft_ct_tmpl_alloc_pcpu(void)
353 {
354         struct nf_conntrack_zone zone = { .id = 0 };
355         struct nf_conn *tmp;
356         int cpu;
357
358         if (nft_ct_pcpu_template_refcnt)
359                 return true;
360
361         for_each_possible_cpu(cpu) {
362                 tmp = nf_ct_tmpl_alloc(&init_net, &zone, GFP_KERNEL);
363                 if (!tmp) {
364                         nft_ct_tmpl_put_pcpu();
365                         return false;
366                 }
367
368                 atomic_set(&tmp->ct_general.use, 1);
369                 per_cpu(nft_ct_pcpu_template, cpu) = tmp;
370         }
371
372         return true;
373 }
374 #endif
375
376 static int nft_ct_get_init(const struct nft_ctx *ctx,
377                            const struct nft_expr *expr,
378                            const struct nlattr * const tb[])
379 {
380         struct nft_ct *priv = nft_expr_priv(expr);
381         unsigned int len;
382         int err;
383
384         priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
385         priv->dir = IP_CT_DIR_MAX;
386         switch (priv->key) {
387         case NFT_CT_DIRECTION:
388                 if (tb[NFTA_CT_DIRECTION] != NULL)
389                         return -EINVAL;
390                 len = sizeof(u8);
391                 break;
392         case NFT_CT_STATE:
393         case NFT_CT_STATUS:
394 #ifdef CONFIG_NF_CONNTRACK_MARK
395         case NFT_CT_MARK:
396 #endif
397 #ifdef CONFIG_NF_CONNTRACK_SECMARK
398         case NFT_CT_SECMARK:
399 #endif
400         case NFT_CT_EXPIRATION:
401                 if (tb[NFTA_CT_DIRECTION] != NULL)
402                         return -EINVAL;
403                 len = sizeof(u32);
404                 break;
405 #ifdef CONFIG_NF_CONNTRACK_LABELS
406         case NFT_CT_LABELS:
407                 if (tb[NFTA_CT_DIRECTION] != NULL)
408                         return -EINVAL;
409                 len = NF_CT_LABELS_MAX_SIZE;
410                 break;
411 #endif
412         case NFT_CT_HELPER:
413                 if (tb[NFTA_CT_DIRECTION] != NULL)
414                         return -EINVAL;
415                 len = NF_CT_HELPER_NAME_LEN;
416                 break;
417
418         case NFT_CT_L3PROTOCOL:
419         case NFT_CT_PROTOCOL:
420                 /* For compatibility, do not report error if NFTA_CT_DIRECTION
421                  * attribute is specified.
422                  */
423                 len = sizeof(u8);
424                 break;
425         case NFT_CT_SRC:
426         case NFT_CT_DST:
427                 if (tb[NFTA_CT_DIRECTION] == NULL)
428                         return -EINVAL;
429
430                 switch (ctx->family) {
431                 case NFPROTO_IPV4:
432                         len = FIELD_SIZEOF(struct nf_conntrack_tuple,
433                                            src.u3.ip);
434                         break;
435                 case NFPROTO_IPV6:
436                 case NFPROTO_INET:
437                         len = FIELD_SIZEOF(struct nf_conntrack_tuple,
438                                            src.u3.ip6);
439                         break;
440                 default:
441                         return -EAFNOSUPPORT;
442                 }
443                 break;
444         case NFT_CT_SRC_IP:
445         case NFT_CT_DST_IP:
446                 if (tb[NFTA_CT_DIRECTION] == NULL)
447                         return -EINVAL;
448
449                 len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u3.ip);
450                 break;
451         case NFT_CT_SRC_IP6:
452         case NFT_CT_DST_IP6:
453                 if (tb[NFTA_CT_DIRECTION] == NULL)
454                         return -EINVAL;
455
456                 len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u3.ip6);
457                 break;
458         case NFT_CT_PROTO_SRC:
459         case NFT_CT_PROTO_DST:
460                 if (tb[NFTA_CT_DIRECTION] == NULL)
461                         return -EINVAL;
462                 len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u.all);
463                 break;
464         case NFT_CT_BYTES:
465         case NFT_CT_PKTS:
466         case NFT_CT_AVGPKT:
467                 len = sizeof(u64);
468                 break;
469 #ifdef CONFIG_NF_CONNTRACK_ZONES
470         case NFT_CT_ZONE:
471                 len = sizeof(u16);
472                 break;
473 #endif
474         default:
475                 return -EOPNOTSUPP;
476         }
477
478         if (tb[NFTA_CT_DIRECTION] != NULL) {
479                 priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
480                 switch (priv->dir) {
481                 case IP_CT_DIR_ORIGINAL:
482                 case IP_CT_DIR_REPLY:
483                         break;
484                 default:
485                         return -EINVAL;
486                 }
487         }
488
489         priv->dreg = nft_parse_register(tb[NFTA_CT_DREG]);
490         err = nft_validate_register_store(ctx, priv->dreg, NULL,
491                                           NFT_DATA_VALUE, len);
492         if (err < 0)
493                 return err;
494
495         err = nf_ct_netns_get(ctx->net, ctx->family);
496         if (err < 0)
497                 return err;
498
499         if (priv->key == NFT_CT_BYTES ||
500             priv->key == NFT_CT_PKTS  ||
501             priv->key == NFT_CT_AVGPKT)
502                 nf_ct_set_acct(ctx->net, true);
503
504         return 0;
505 }
506
507 static void __nft_ct_set_destroy(const struct nft_ctx *ctx, struct nft_ct *priv)
508 {
509         switch (priv->key) {
510 #ifdef CONFIG_NF_CONNTRACK_LABELS
511         case NFT_CT_LABELS:
512                 nf_connlabels_put(ctx->net);
513                 break;
514 #endif
515 #ifdef CONFIG_NF_CONNTRACK_ZONES
516         case NFT_CT_ZONE:
517                 if (--nft_ct_pcpu_template_refcnt == 0)
518                         nft_ct_tmpl_put_pcpu();
519 #endif
520         default:
521                 break;
522         }
523 }
524
525 static int nft_ct_set_init(const struct nft_ctx *ctx,
526                            const struct nft_expr *expr,
527                            const struct nlattr * const tb[])
528 {
529         struct nft_ct *priv = nft_expr_priv(expr);
530         unsigned int len;
531         int err;
532
533         priv->dir = IP_CT_DIR_MAX;
534         priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
535         switch (priv->key) {
536 #ifdef CONFIG_NF_CONNTRACK_MARK
537         case NFT_CT_MARK:
538                 if (tb[NFTA_CT_DIRECTION])
539                         return -EINVAL;
540                 len = FIELD_SIZEOF(struct nf_conn, mark);
541                 break;
542 #endif
543 #ifdef CONFIG_NF_CONNTRACK_LABELS
544         case NFT_CT_LABELS:
545                 if (tb[NFTA_CT_DIRECTION])
546                         return -EINVAL;
547                 len = NF_CT_LABELS_MAX_SIZE;
548                 err = nf_connlabels_get(ctx->net, (len * BITS_PER_BYTE) - 1);
549                 if (err)
550                         return err;
551                 break;
552 #endif
553 #ifdef CONFIG_NF_CONNTRACK_ZONES
554         case NFT_CT_ZONE:
555                 if (!nft_ct_tmpl_alloc_pcpu())
556                         return -ENOMEM;
557                 nft_ct_pcpu_template_refcnt++;
558                 len = sizeof(u16);
559                 break;
560 #endif
561 #ifdef CONFIG_NF_CONNTRACK_EVENTS
562         case NFT_CT_EVENTMASK:
563                 if (tb[NFTA_CT_DIRECTION])
564                         return -EINVAL;
565                 len = sizeof(u32);
566                 break;
567 #endif
568         default:
569                 return -EOPNOTSUPP;
570         }
571
572         if (tb[NFTA_CT_DIRECTION]) {
573                 priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
574                 switch (priv->dir) {
575                 case IP_CT_DIR_ORIGINAL:
576                 case IP_CT_DIR_REPLY:
577                         break;
578                 default:
579                         err = -EINVAL;
580                         goto err1;
581                 }
582         }
583
584         priv->sreg = nft_parse_register(tb[NFTA_CT_SREG]);
585         err = nft_validate_register_load(priv->sreg, len);
586         if (err < 0)
587                 goto err1;
588
589         err = nf_ct_netns_get(ctx->net, ctx->family);
590         if (err < 0)
591                 goto err1;
592
593         return 0;
594
595 err1:
596         __nft_ct_set_destroy(ctx, priv);
597         return err;
598 }
599
600 static void nft_ct_get_destroy(const struct nft_ctx *ctx,
601                                const struct nft_expr *expr)
602 {
603         nf_ct_netns_put(ctx->net, ctx->family);
604 }
605
606 static void nft_ct_set_destroy(const struct nft_ctx *ctx,
607                                const struct nft_expr *expr)
608 {
609         struct nft_ct *priv = nft_expr_priv(expr);
610
611         __nft_ct_set_destroy(ctx, priv);
612         nf_ct_netns_put(ctx->net, ctx->family);
613 }
614
615 static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
616 {
617         const struct nft_ct *priv = nft_expr_priv(expr);
618
619         if (nft_dump_register(skb, NFTA_CT_DREG, priv->dreg))
620                 goto nla_put_failure;
621         if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
622                 goto nla_put_failure;
623
624         switch (priv->key) {
625         case NFT_CT_SRC:
626         case NFT_CT_DST:
627         case NFT_CT_SRC_IP:
628         case NFT_CT_DST_IP:
629         case NFT_CT_SRC_IP6:
630         case NFT_CT_DST_IP6:
631         case NFT_CT_PROTO_SRC:
632         case NFT_CT_PROTO_DST:
633                 if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
634                         goto nla_put_failure;
635                 break;
636         case NFT_CT_BYTES:
637         case NFT_CT_PKTS:
638         case NFT_CT_AVGPKT:
639         case NFT_CT_ZONE:
640                 if (priv->dir < IP_CT_DIR_MAX &&
641                     nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
642                         goto nla_put_failure;
643                 break;
644         default:
645                 break;
646         }
647
648         return 0;
649
650 nla_put_failure:
651         return -1;
652 }
653
654 static int nft_ct_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
655 {
656         const struct nft_ct *priv = nft_expr_priv(expr);
657
658         if (nft_dump_register(skb, NFTA_CT_SREG, priv->sreg))
659                 goto nla_put_failure;
660         if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
661                 goto nla_put_failure;
662
663         switch (priv->key) {
664         case NFT_CT_ZONE:
665                 if (priv->dir < IP_CT_DIR_MAX &&
666                     nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
667                         goto nla_put_failure;
668                 break;
669         default:
670                 break;
671         }
672
673         return 0;
674
675 nla_put_failure:
676         return -1;
677 }
678
679 static struct nft_expr_type nft_ct_type;
680 static const struct nft_expr_ops nft_ct_get_ops = {
681         .type           = &nft_ct_type,
682         .size           = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
683         .eval           = nft_ct_get_eval,
684         .init           = nft_ct_get_init,
685         .destroy        = nft_ct_get_destroy,
686         .dump           = nft_ct_get_dump,
687 };
688
689 static const struct nft_expr_ops nft_ct_set_ops = {
690         .type           = &nft_ct_type,
691         .size           = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
692         .eval           = nft_ct_set_eval,
693         .init           = nft_ct_set_init,
694         .destroy        = nft_ct_set_destroy,
695         .dump           = nft_ct_set_dump,
696 };
697
698 #ifdef CONFIG_NF_CONNTRACK_ZONES
699 static const struct nft_expr_ops nft_ct_set_zone_ops = {
700         .type           = &nft_ct_type,
701         .size           = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
702         .eval           = nft_ct_set_zone_eval,
703         .init           = nft_ct_set_init,
704         .destroy        = nft_ct_set_destroy,
705         .dump           = nft_ct_set_dump,
706 };
707 #endif
708
709 static const struct nft_expr_ops *
710 nft_ct_select_ops(const struct nft_ctx *ctx,
711                     const struct nlattr * const tb[])
712 {
713         if (tb[NFTA_CT_KEY] == NULL)
714                 return ERR_PTR(-EINVAL);
715
716         if (tb[NFTA_CT_DREG] && tb[NFTA_CT_SREG])
717                 return ERR_PTR(-EINVAL);
718
719         if (tb[NFTA_CT_DREG])
720                 return &nft_ct_get_ops;
721
722         if (tb[NFTA_CT_SREG]) {
723 #ifdef CONFIG_NF_CONNTRACK_ZONES
724                 if (nla_get_be32(tb[NFTA_CT_KEY]) == htonl(NFT_CT_ZONE))
725                         return &nft_ct_set_zone_ops;
726 #endif
727                 return &nft_ct_set_ops;
728         }
729
730         return ERR_PTR(-EINVAL);
731 }
732
733 static struct nft_expr_type nft_ct_type __read_mostly = {
734         .name           = "ct",
735         .select_ops     = nft_ct_select_ops,
736         .policy         = nft_ct_policy,
737         .maxattr        = NFTA_CT_MAX,
738         .owner          = THIS_MODULE,
739 };
740
741 static void nft_notrack_eval(const struct nft_expr *expr,
742                              struct nft_regs *regs,
743                              const struct nft_pktinfo *pkt)
744 {
745         struct sk_buff *skb = pkt->skb;
746         enum ip_conntrack_info ctinfo;
747         struct nf_conn *ct;
748
749         ct = nf_ct_get(pkt->skb, &ctinfo);
750         /* Previously seen (loopback or untracked)?  Ignore. */
751         if (ct || ctinfo == IP_CT_UNTRACKED)
752                 return;
753
754         nf_ct_set(skb, ct, IP_CT_UNTRACKED);
755 }
756
757 static struct nft_expr_type nft_notrack_type;
758 static const struct nft_expr_ops nft_notrack_ops = {
759         .type           = &nft_notrack_type,
760         .size           = NFT_EXPR_SIZE(0),
761         .eval           = nft_notrack_eval,
762 };
763
764 static struct nft_expr_type nft_notrack_type __read_mostly = {
765         .name           = "notrack",
766         .ops            = &nft_notrack_ops,
767         .owner          = THIS_MODULE,
768 };
769
770 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
771 static int
772 nft_ct_timeout_parse_policy(void *timeouts,
773                             const struct nf_conntrack_l4proto *l4proto,
774                             struct net *net, const struct nlattr *attr)
775 {
776         struct nlattr **tb;
777         int ret = 0;
778
779         if (!l4proto->ctnl_timeout.nlattr_to_obj)
780                 return 0;
781
782         tb = kcalloc(l4proto->ctnl_timeout.nlattr_max + 1, sizeof(*tb),
783                      GFP_KERNEL);
784
785         if (!tb)
786                 return -ENOMEM;
787
788         ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max,
789                                attr, l4proto->ctnl_timeout.nla_policy,
790                                NULL);
791         if (ret < 0)
792                 goto err;
793
794         ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeouts);
795
796 err:
797         kfree(tb);
798         return ret;
799 }
800
801 struct nft_ct_timeout_obj {
802         struct nf_conn          *tmpl;
803         u8                      l4proto;
804 };
805
806 static void nft_ct_timeout_obj_eval(struct nft_object *obj,
807                                     struct nft_regs *regs,
808                                     const struct nft_pktinfo *pkt)
809 {
810         const struct nft_ct_timeout_obj *priv = nft_obj_data(obj);
811         struct nf_conn *ct = (struct nf_conn *)skb_nfct(pkt->skb);
812         struct sk_buff *skb = pkt->skb;
813
814         if (ct ||
815             priv->l4proto != pkt->tprot)
816                 return;
817
818         nf_ct_set(skb, priv->tmpl, IP_CT_NEW);
819 }
820
821 static int nft_ct_timeout_obj_init(const struct nft_ctx *ctx,
822                                    const struct nlattr * const tb[],
823                                    struct nft_object *obj)
824 {
825         const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
826         struct nft_ct_timeout_obj *priv = nft_obj_data(obj);
827         const struct nf_conntrack_l4proto *l4proto;
828         struct nf_conn_timeout *timeout_ext;
829         struct nf_ct_timeout *timeout;
830         int l3num = ctx->family;
831         struct nf_conn *tmpl;
832         __u8 l4num;
833         int ret;
834
835         if (!tb[NFTA_CT_TIMEOUT_L4PROTO] ||
836             !tb[NFTA_CT_TIMEOUT_DATA])
837                 return -EINVAL;
838
839         if (tb[NFTA_CT_TIMEOUT_L3PROTO])
840                 l3num = ntohs(nla_get_be16(tb[NFTA_CT_TIMEOUT_L3PROTO]));
841
842         l4num = nla_get_u8(tb[NFTA_CT_TIMEOUT_L4PROTO]);
843         priv->l4proto = l4num;
844
845         l4proto = nf_ct_l4proto_find_get(l3num, l4num);
846
847         if (l4proto->l4proto != l4num) {
848                 ret = -EOPNOTSUPP;
849                 goto err_proto_put;
850         }
851
852         timeout = kzalloc(sizeof(struct nf_ct_timeout) +
853                           l4proto->ctnl_timeout.obj_size, GFP_KERNEL);
854         if (timeout == NULL) {
855                 ret = -ENOMEM;
856                 goto err_proto_put;
857         }
858
859         ret = nft_ct_timeout_parse_policy(&timeout->data, l4proto, ctx->net,
860                                           tb[NFTA_CT_TIMEOUT_DATA]);
861         if (ret < 0)
862                 goto err_free_timeout;
863
864         timeout->l3num = l3num;
865         timeout->l4proto = l4proto;
866         tmpl = nf_ct_tmpl_alloc(ctx->net, zone, GFP_ATOMIC);
867         if (!tmpl) {
868                 ret = -ENOMEM;
869                 goto err_free_timeout;
870         }
871
872         timeout_ext = nf_ct_timeout_ext_add(tmpl, timeout, GFP_ATOMIC);
873         if (!timeout_ext) {
874                 ret = -ENOMEM;
875                 goto err_free_tmpl;
876         }
877
878         ret = nf_ct_netns_get(ctx->net, ctx->family);
879         if (ret < 0)
880                 goto err_free_tmpl;
881
882         priv->tmpl = tmpl;
883
884         return 0;
885
886 err_free_tmpl:
887         nf_ct_tmpl_free(tmpl);
888 err_free_timeout:
889         kfree(timeout);
890 err_proto_put:
891         nf_ct_l4proto_put(l4proto);
892         return ret;
893 }
894
895 static void nft_ct_timeout_obj_destroy(const struct nft_ctx *ctx,
896                                        struct nft_object *obj)
897 {
898         struct nft_ct_timeout_obj *priv = nft_obj_data(obj);
899         struct nf_conn_timeout *t = nf_ct_timeout_find(priv->tmpl);
900         struct nf_ct_timeout *timeout;
901
902         timeout = rcu_dereference_raw(t->timeout);
903         nf_ct_untimeout(ctx->net, timeout);
904         nf_ct_l4proto_put(timeout->l4proto);
905         nf_ct_netns_put(ctx->net, ctx->family);
906         nf_ct_tmpl_free(priv->tmpl);
907 }
908
909 static int nft_ct_timeout_obj_dump(struct sk_buff *skb,
910                                    struct nft_object *obj, bool reset)
911 {
912         const struct nft_ct_timeout_obj *priv = nft_obj_data(obj);
913         const struct nf_conn_timeout *t = nf_ct_timeout_find(priv->tmpl);
914         const struct nf_ct_timeout *timeout = rcu_dereference_raw(t->timeout);
915         struct nlattr *nest_params;
916         int ret;
917
918         if (nla_put_u8(skb, NFTA_CT_TIMEOUT_L4PROTO, timeout->l4proto->l4proto) ||
919             nla_put_be16(skb, NFTA_CT_TIMEOUT_L3PROTO, htons(timeout->l3num)))
920                 return -1;
921
922         nest_params = nla_nest_start(skb, NFTA_CT_TIMEOUT_DATA | NLA_F_NESTED);
923         if (!nest_params)
924                 return -1;
925
926         ret = timeout->l4proto->ctnl_timeout.obj_to_nlattr(skb, &timeout->data);
927         if (ret < 0)
928                 return -1;
929         nla_nest_end(skb, nest_params);
930         return 0;
931 }
932
933 static const struct nla_policy nft_ct_timeout_policy[NFTA_CT_TIMEOUT_MAX + 1] = {
934         [NFTA_CT_TIMEOUT_L3PROTO] = {.type = NLA_U16 },
935         [NFTA_CT_TIMEOUT_L4PROTO] = {.type = NLA_U8 },
936         [NFTA_CT_TIMEOUT_DATA]    = {.type = NLA_NESTED },
937 };
938
939 static struct nft_object_type nft_ct_timeout_obj_type;
940
941 static const struct nft_object_ops nft_ct_timeout_obj_ops = {
942         .type           = &nft_ct_timeout_obj_type,
943         .size           = sizeof(struct nft_ct_timeout_obj),
944         .eval           = nft_ct_timeout_obj_eval,
945         .init           = nft_ct_timeout_obj_init,
946         .destroy        = nft_ct_timeout_obj_destroy,
947         .dump           = nft_ct_timeout_obj_dump,
948 };
949
950 static struct nft_object_type nft_ct_timeout_obj_type __read_mostly = {
951         .type           = NFT_OBJECT_CT_TIMEOUT,
952         .ops            = &nft_ct_timeout_obj_ops,
953         .maxattr        = NFTA_CT_TIMEOUT_MAX,
954         .policy         = nft_ct_timeout_policy,
955         .owner          = THIS_MODULE,
956 };
957 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
958
959 static int nft_ct_helper_obj_init(const struct nft_ctx *ctx,
960                                   const struct nlattr * const tb[],
961                                   struct nft_object *obj)
962 {
963         struct nft_ct_helper_obj *priv = nft_obj_data(obj);
964         struct nf_conntrack_helper *help4, *help6;
965         char name[NF_CT_HELPER_NAME_LEN];
966         int family = ctx->family;
967         int err;
968
969         if (!tb[NFTA_CT_HELPER_NAME] || !tb[NFTA_CT_HELPER_L4PROTO])
970                 return -EINVAL;
971
972         priv->l4proto = nla_get_u8(tb[NFTA_CT_HELPER_L4PROTO]);
973         if (!priv->l4proto)
974                 return -ENOENT;
975
976         nla_strlcpy(name, tb[NFTA_CT_HELPER_NAME], sizeof(name));
977
978         if (tb[NFTA_CT_HELPER_L3PROTO])
979                 family = ntohs(nla_get_be16(tb[NFTA_CT_HELPER_L3PROTO]));
980
981         help4 = NULL;
982         help6 = NULL;
983
984         switch (family) {
985         case NFPROTO_IPV4:
986                 if (ctx->family == NFPROTO_IPV6)
987                         return -EINVAL;
988
989                 help4 = nf_conntrack_helper_try_module_get(name, family,
990                                                            priv->l4proto);
991                 break;
992         case NFPROTO_IPV6:
993                 if (ctx->family == NFPROTO_IPV4)
994                         return -EINVAL;
995
996                 help6 = nf_conntrack_helper_try_module_get(name, family,
997                                                            priv->l4proto);
998                 break;
999         case NFPROTO_NETDEV: /* fallthrough */
1000         case NFPROTO_BRIDGE: /* same */
1001         case NFPROTO_INET:
1002                 help4 = nf_conntrack_helper_try_module_get(name, NFPROTO_IPV4,
1003                                                            priv->l4proto);
1004                 help6 = nf_conntrack_helper_try_module_get(name, NFPROTO_IPV6,
1005                                                            priv->l4proto);
1006                 break;
1007         default:
1008                 return -EAFNOSUPPORT;
1009         }
1010
1011         /* && is intentional; only error if INET found neither ipv4 or ipv6 */
1012         if (!help4 && !help6)
1013                 return -ENOENT;
1014
1015         priv->helper4 = help4;
1016         priv->helper6 = help6;
1017
1018         err = nf_ct_netns_get(ctx->net, ctx->family);
1019         if (err < 0)
1020                 goto err_put_helper;
1021
1022         return 0;
1023
1024 err_put_helper:
1025         if (priv->helper4)
1026                 nf_conntrack_helper_put(priv->helper4);
1027         if (priv->helper6)
1028                 nf_conntrack_helper_put(priv->helper6);
1029         return err;
1030 }
1031
1032 static void nft_ct_helper_obj_destroy(const struct nft_ctx *ctx,
1033                                       struct nft_object *obj)
1034 {
1035         struct nft_ct_helper_obj *priv = nft_obj_data(obj);
1036
1037         if (priv->helper4)
1038                 nf_conntrack_helper_put(priv->helper4);
1039         if (priv->helper6)
1040                 nf_conntrack_helper_put(priv->helper6);
1041
1042         nf_ct_netns_put(ctx->net, ctx->family);
1043 }
1044
1045 static void nft_ct_helper_obj_eval(struct nft_object *obj,
1046                                    struct nft_regs *regs,
1047                                    const struct nft_pktinfo *pkt)
1048 {
1049         const struct nft_ct_helper_obj *priv = nft_obj_data(obj);
1050         struct nf_conn *ct = (struct nf_conn *)skb_nfct(pkt->skb);
1051         struct nf_conntrack_helper *to_assign = NULL;
1052         struct nf_conn_help *help;
1053
1054         if (!ct ||
1055             nf_ct_is_confirmed(ct) ||
1056             nf_ct_is_template(ct) ||
1057             priv->l4proto != nf_ct_protonum(ct))
1058                 return;
1059
1060         switch (nf_ct_l3num(ct)) {
1061         case NFPROTO_IPV4:
1062                 to_assign = priv->helper4;
1063                 break;
1064         case NFPROTO_IPV6:
1065                 to_assign = priv->helper6;
1066                 break;
1067         default:
1068                 WARN_ON_ONCE(1);
1069                 return;
1070         }
1071
1072         if (!to_assign)
1073                 return;
1074
1075         if (test_bit(IPS_HELPER_BIT, &ct->status))
1076                 return;
1077
1078         help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
1079         if (help) {
1080                 rcu_assign_pointer(help->helper, to_assign);
1081                 set_bit(IPS_HELPER_BIT, &ct->status);
1082         }
1083 }
1084
1085 static int nft_ct_helper_obj_dump(struct sk_buff *skb,
1086                                   struct nft_object *obj, bool reset)
1087 {
1088         const struct nft_ct_helper_obj *priv = nft_obj_data(obj);
1089         const struct nf_conntrack_helper *helper;
1090         u16 family;
1091
1092         if (priv->helper4 && priv->helper6) {
1093                 family = NFPROTO_INET;
1094                 helper = priv->helper4;
1095         } else if (priv->helper6) {
1096                 family = NFPROTO_IPV6;
1097                 helper = priv->helper6;
1098         } else {
1099                 family = NFPROTO_IPV4;
1100                 helper = priv->helper4;
1101         }
1102
1103         if (nla_put_string(skb, NFTA_CT_HELPER_NAME, helper->name))
1104                 return -1;
1105
1106         if (nla_put_u8(skb, NFTA_CT_HELPER_L4PROTO, priv->l4proto))
1107                 return -1;
1108
1109         if (nla_put_be16(skb, NFTA_CT_HELPER_L3PROTO, htons(family)))
1110                 return -1;
1111
1112         return 0;
1113 }
1114
1115 static const struct nla_policy nft_ct_helper_policy[NFTA_CT_HELPER_MAX + 1] = {
1116         [NFTA_CT_HELPER_NAME] = { .type = NLA_STRING,
1117                                   .len = NF_CT_HELPER_NAME_LEN - 1 },
1118         [NFTA_CT_HELPER_L3PROTO] = { .type = NLA_U16 },
1119         [NFTA_CT_HELPER_L4PROTO] = { .type = NLA_U8 },
1120 };
1121
1122 static struct nft_object_type nft_ct_helper_obj_type;
1123 static const struct nft_object_ops nft_ct_helper_obj_ops = {
1124         .type           = &nft_ct_helper_obj_type,
1125         .size           = sizeof(struct nft_ct_helper_obj),
1126         .eval           = nft_ct_helper_obj_eval,
1127         .init           = nft_ct_helper_obj_init,
1128         .destroy        = nft_ct_helper_obj_destroy,
1129         .dump           = nft_ct_helper_obj_dump,
1130 };
1131
1132 static struct nft_object_type nft_ct_helper_obj_type __read_mostly = {
1133         .type           = NFT_OBJECT_CT_HELPER,
1134         .ops            = &nft_ct_helper_obj_ops,
1135         .maxattr        = NFTA_CT_HELPER_MAX,
1136         .policy         = nft_ct_helper_policy,
1137         .owner          = THIS_MODULE,
1138 };
1139
1140 static int __init nft_ct_module_init(void)
1141 {
1142         int err;
1143
1144         BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE > NFT_REG_SIZE);
1145
1146         err = nft_register_expr(&nft_ct_type);
1147         if (err < 0)
1148                 return err;
1149
1150         err = nft_register_expr(&nft_notrack_type);
1151         if (err < 0)
1152                 goto err1;
1153
1154         err = nft_register_obj(&nft_ct_helper_obj_type);
1155         if (err < 0)
1156                 goto err2;
1157 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
1158         err = nft_register_obj(&nft_ct_timeout_obj_type);
1159         if (err < 0)
1160                 goto err3;
1161 #endif
1162         return 0;
1163
1164 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
1165 err3:
1166         nft_unregister_obj(&nft_ct_helper_obj_type);
1167 #endif
1168 err2:
1169         nft_unregister_expr(&nft_notrack_type);
1170 err1:
1171         nft_unregister_expr(&nft_ct_type);
1172         return err;
1173 }
1174
1175 static void __exit nft_ct_module_exit(void)
1176 {
1177 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
1178         nft_unregister_obj(&nft_ct_timeout_obj_type);
1179 #endif
1180         nft_unregister_obj(&nft_ct_helper_obj_type);
1181         nft_unregister_expr(&nft_notrack_type);
1182         nft_unregister_expr(&nft_ct_type);
1183 }
1184
1185 module_init(nft_ct_module_init);
1186 module_exit(nft_ct_module_exit);
1187
1188 MODULE_LICENSE("GPL");
1189 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
1190 MODULE_ALIAS_NFT_EXPR("ct");
1191 MODULE_ALIAS_NFT_EXPR("notrack");
1192 MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_HELPER);
1193 MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_TIMEOUT);