Commit | Line | Data |
---|---|---|
b57dc7c1 PB |
1 | // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB |
2 | /* - | |
3 | * net/sched/act_ct.c Connection Tracking action | |
4 | * | |
5 | * Authors: Paul Blakey <paulb@mellanox.com> | |
6 | * Yossi Kuperman <yossiku@mellanox.com> | |
7 | * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> | |
8 | */ | |
9 | ||
10 | #include <linux/module.h> | |
11 | #include <linux/init.h> | |
12 | #include <linux/kernel.h> | |
13 | #include <linux/skbuff.h> | |
14 | #include <linux/rtnetlink.h> | |
15 | #include <linux/pkt_cls.h> | |
16 | #include <linux/ip.h> | |
17 | #include <linux/ipv6.h> | |
18 | #include <net/netlink.h> | |
19 | #include <net/pkt_sched.h> | |
20 | #include <net/pkt_cls.h> | |
21 | #include <net/act_api.h> | |
22 | #include <net/ip.h> | |
23 | #include <net/ipv6_frag.h> | |
24 | #include <uapi/linux/tc_act/tc_ct.h> | |
25 | #include <net/tc_act/tc_ct.h> | |
26 | ||
b57dc7c1 PB |
27 | #include <net/netfilter/nf_conntrack.h> |
28 | #include <net/netfilter/nf_conntrack_core.h> | |
29 | #include <net/netfilter/nf_conntrack_zones.h> | |
30 | #include <net/netfilter/nf_conntrack_helper.h> | |
31 | #include <net/netfilter/ipv6/nf_defrag_ipv6.h> | |
40d102cd | 32 | #include <uapi/linux/netfilter/nf_nat.h> |
b57dc7c1 PB |
33 | |
34 | static struct tc_action_ops act_ct_ops; | |
35 | static unsigned int ct_net_id; | |
36 | ||
37 | struct tc_ct_action_net { | |
38 | struct tc_action_net tn; /* Must be first */ | |
39 | bool labels; | |
40 | }; | |
41 | ||
42 | /* Determine whether skb->_nfct is equal to the result of conntrack lookup. */ | |
43 | static bool tcf_ct_skb_nfct_cached(struct net *net, struct sk_buff *skb, | |
44 | u16 zone_id, bool force) | |
45 | { | |
46 | enum ip_conntrack_info ctinfo; | |
47 | struct nf_conn *ct; | |
48 | ||
49 | ct = nf_ct_get(skb, &ctinfo); | |
50 | if (!ct) | |
51 | return false; | |
52 | if (!net_eq(net, read_pnet(&ct->ct_net))) | |
53 | return false; | |
54 | if (nf_ct_zone(ct)->id != zone_id) | |
55 | return false; | |
56 | ||
57 | /* Force conntrack entry direction. */ | |
58 | if (force && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) { | |
59 | if (nf_ct_is_confirmed(ct)) | |
60 | nf_ct_kill(ct); | |
61 | ||
62 | nf_conntrack_put(&ct->ct_general); | |
63 | nf_ct_set(skb, NULL, IP_CT_UNTRACKED); | |
64 | ||
65 | return false; | |
66 | } | |
67 | ||
68 | return true; | |
69 | } | |
70 | ||
71 | /* Trim the skb to the length specified by the IP/IPv6 header, | |
72 | * removing any trailing lower-layer padding. This prepares the skb | |
73 | * for higher-layer processing that assumes skb->len excludes padding | |
74 | * (such as nf_ip_checksum). The caller needs to pull the skb to the | |
75 | * network header, and ensure ip_hdr/ipv6_hdr points to valid data. | |
76 | */ | |
77 | static int tcf_ct_skb_network_trim(struct sk_buff *skb, int family) | |
78 | { | |
79 | unsigned int len; | |
80 | int err; | |
81 | ||
82 | switch (family) { | |
83 | case NFPROTO_IPV4: | |
84 | len = ntohs(ip_hdr(skb)->tot_len); | |
85 | break; | |
86 | case NFPROTO_IPV6: | |
87 | len = sizeof(struct ipv6hdr) | |
88 | + ntohs(ipv6_hdr(skb)->payload_len); | |
89 | break; | |
90 | default: | |
91 | len = skb->len; | |
92 | } | |
93 | ||
94 | err = pskb_trim_rcsum(skb, len); | |
95 | ||
96 | return err; | |
97 | } | |
98 | ||
99 | static u8 tcf_ct_skb_nf_family(struct sk_buff *skb) | |
100 | { | |
101 | u8 family = NFPROTO_UNSPEC; | |
102 | ||
103 | switch (skb->protocol) { | |
104 | case htons(ETH_P_IP): | |
105 | family = NFPROTO_IPV4; | |
106 | break; | |
107 | case htons(ETH_P_IPV6): | |
108 | family = NFPROTO_IPV6; | |
109 | break; | |
110 | default: | |
111 | break; | |
112 | } | |
113 | ||
114 | return family; | |
115 | } | |
116 | ||
117 | static int tcf_ct_ipv4_is_fragment(struct sk_buff *skb, bool *frag) | |
118 | { | |
119 | unsigned int len; | |
120 | ||
121 | len = skb_network_offset(skb) + sizeof(struct iphdr); | |
122 | if (unlikely(skb->len < len)) | |
123 | return -EINVAL; | |
124 | if (unlikely(!pskb_may_pull(skb, len))) | |
125 | return -ENOMEM; | |
126 | ||
127 | *frag = ip_is_fragment(ip_hdr(skb)); | |
128 | return 0; | |
129 | } | |
130 | ||
131 | static int tcf_ct_ipv6_is_fragment(struct sk_buff *skb, bool *frag) | |
132 | { | |
133 | unsigned int flags = 0, len, payload_ofs = 0; | |
134 | unsigned short frag_off; | |
135 | int nexthdr; | |
136 | ||
137 | len = skb_network_offset(skb) + sizeof(struct ipv6hdr); | |
138 | if (unlikely(skb->len < len)) | |
139 | return -EINVAL; | |
140 | if (unlikely(!pskb_may_pull(skb, len))) | |
141 | return -ENOMEM; | |
142 | ||
143 | nexthdr = ipv6_find_hdr(skb, &payload_ofs, -1, &frag_off, &flags); | |
144 | if (unlikely(nexthdr < 0)) | |
145 | return -EPROTO; | |
146 | ||
147 | *frag = flags & IP6_FH_F_FRAG; | |
148 | return 0; | |
149 | } | |
150 | ||
151 | static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, | |
152 | u8 family, u16 zone) | |
153 | { | |
154 | enum ip_conntrack_info ctinfo; | |
155 | struct nf_conn *ct; | |
156 | int err = 0; | |
157 | bool frag; | |
158 | ||
159 | /* Previously seen (loopback)? Ignore. */ | |
160 | ct = nf_ct_get(skb, &ctinfo); | |
161 | if ((ct && !nf_ct_is_template(ct)) || ctinfo == IP_CT_UNTRACKED) | |
162 | return 0; | |
163 | ||
164 | if (family == NFPROTO_IPV4) | |
165 | err = tcf_ct_ipv4_is_fragment(skb, &frag); | |
166 | else | |
167 | err = tcf_ct_ipv6_is_fragment(skb, &frag); | |
168 | if (err || !frag) | |
169 | return err; | |
170 | ||
171 | skb_get(skb); | |
172 | ||
173 | if (family == NFPROTO_IPV4) { | |
174 | enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone; | |
175 | ||
176 | memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); | |
177 | local_bh_disable(); | |
178 | err = ip_defrag(net, skb, user); | |
179 | local_bh_enable(); | |
180 | if (err && err != -EINPROGRESS) | |
181 | goto out_free; | |
182 | } else { /* NFPROTO_IPV6 */ | |
183 | #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) | |
184 | enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; | |
185 | ||
186 | memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); | |
187 | err = nf_ct_frag6_gather(net, skb, user); | |
188 | if (err && err != -EINPROGRESS) | |
189 | goto out_free; | |
190 | #else | |
191 | err = -EOPNOTSUPP; | |
192 | goto out_free; | |
193 | #endif | |
194 | } | |
195 | ||
196 | skb_clear_hash(skb); | |
197 | skb->ignore_df = 1; | |
198 | return err; | |
199 | ||
200 | out_free: | |
201 | kfree_skb(skb); | |
202 | return err; | |
203 | } | |
204 | ||
205 | static void tcf_ct_params_free(struct rcu_head *head) | |
206 | { | |
207 | struct tcf_ct_params *params = container_of(head, | |
208 | struct tcf_ct_params, rcu); | |
209 | ||
210 | if (params->tmpl) | |
211 | nf_conntrack_put(¶ms->tmpl->ct_general); | |
212 | kfree(params); | |
213 | } | |
214 | ||
215 | #if IS_ENABLED(CONFIG_NF_NAT) | |
216 | /* Modelled after nf_nat_ipv[46]_fn(). | |
217 | * range is only used for new, uninitialized NAT state. | |
218 | * Returns either NF_ACCEPT or NF_DROP. | |
219 | */ | |
220 | static int ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, | |
221 | enum ip_conntrack_info ctinfo, | |
222 | const struct nf_nat_range2 *range, | |
223 | enum nf_nat_manip_type maniptype) | |
224 | { | |
225 | int hooknum, err = NF_ACCEPT; | |
226 | ||
227 | /* See HOOK2MANIP(). */ | |
228 | if (maniptype == NF_NAT_MANIP_SRC) | |
229 | hooknum = NF_INET_LOCAL_IN; /* Source NAT */ | |
230 | else | |
231 | hooknum = NF_INET_LOCAL_OUT; /* Destination NAT */ | |
232 | ||
233 | switch (ctinfo) { | |
234 | case IP_CT_RELATED: | |
235 | case IP_CT_RELATED_REPLY: | |
236 | if (skb->protocol == htons(ETH_P_IP) && | |
237 | ip_hdr(skb)->protocol == IPPROTO_ICMP) { | |
238 | if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, | |
239 | hooknum)) | |
240 | err = NF_DROP; | |
241 | goto out; | |
242 | } else if (IS_ENABLED(CONFIG_IPV6) && | |
243 | skb->protocol == htons(ETH_P_IPV6)) { | |
244 | __be16 frag_off; | |
245 | u8 nexthdr = ipv6_hdr(skb)->nexthdr; | |
246 | int hdrlen = ipv6_skip_exthdr(skb, | |
247 | sizeof(struct ipv6hdr), | |
248 | &nexthdr, &frag_off); | |
249 | ||
250 | if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { | |
251 | if (!nf_nat_icmpv6_reply_translation(skb, ct, | |
252 | ctinfo, | |
253 | hooknum, | |
254 | hdrlen)) | |
255 | err = NF_DROP; | |
256 | goto out; | |
257 | } | |
258 | } | |
259 | /* Non-ICMP, fall thru to initialize if needed. */ | |
260 | /* fall through */ | |
261 | case IP_CT_NEW: | |
262 | /* Seen it before? This can happen for loopback, retrans, | |
263 | * or local packets. | |
264 | */ | |
265 | if (!nf_nat_initialized(ct, maniptype)) { | |
266 | /* Initialize according to the NAT action. */ | |
267 | err = (range && range->flags & NF_NAT_RANGE_MAP_IPS) | |
268 | /* Action is set up to establish a new | |
269 | * mapping. | |
270 | */ | |
271 | ? nf_nat_setup_info(ct, range, maniptype) | |
272 | : nf_nat_alloc_null_binding(ct, hooknum); | |
273 | if (err != NF_ACCEPT) | |
274 | goto out; | |
275 | } | |
276 | break; | |
277 | ||
278 | case IP_CT_ESTABLISHED: | |
279 | case IP_CT_ESTABLISHED_REPLY: | |
280 | break; | |
281 | ||
282 | default: | |
283 | err = NF_DROP; | |
284 | goto out; | |
285 | } | |
286 | ||
287 | err = nf_nat_packet(ct, ctinfo, hooknum, skb); | |
288 | out: | |
289 | return err; | |
290 | } | |
291 | #endif /* CONFIG_NF_NAT */ | |
292 | ||
293 | static void tcf_ct_act_set_mark(struct nf_conn *ct, u32 mark, u32 mask) | |
294 | { | |
295 | #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) | |
296 | u32 new_mark; | |
297 | ||
298 | if (!mask) | |
299 | return; | |
300 | ||
301 | new_mark = mark | (ct->mark & ~(mask)); | |
302 | if (ct->mark != new_mark) { | |
303 | ct->mark = new_mark; | |
304 | if (nf_ct_is_confirmed(ct)) | |
305 | nf_conntrack_event_cache(IPCT_MARK, ct); | |
306 | } | |
307 | #endif | |
308 | } | |
309 | ||
310 | static void tcf_ct_act_set_labels(struct nf_conn *ct, | |
311 | u32 *labels, | |
312 | u32 *labels_m) | |
313 | { | |
314 | #if IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) | |
c593642c | 315 | size_t labels_sz = sizeof_field(struct tcf_ct_params, labels); |
b57dc7c1 PB |
316 | |
317 | if (!memchr_inv(labels_m, 0, labels_sz)) | |
318 | return; | |
319 | ||
320 | nf_connlabels_replace(ct, labels, labels_m, 4); | |
321 | #endif | |
322 | } | |
323 | ||
324 | static int tcf_ct_act_nat(struct sk_buff *skb, | |
325 | struct nf_conn *ct, | |
326 | enum ip_conntrack_info ctinfo, | |
327 | int ct_action, | |
328 | struct nf_nat_range2 *range, | |
329 | bool commit) | |
330 | { | |
331 | #if IS_ENABLED(CONFIG_NF_NAT) | |
95219afb | 332 | int err; |
b57dc7c1 PB |
333 | enum nf_nat_manip_type maniptype; |
334 | ||
335 | if (!(ct_action & TCA_CT_ACT_NAT)) | |
336 | return NF_ACCEPT; | |
337 | ||
338 | /* Add NAT extension if not confirmed yet. */ | |
339 | if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct)) | |
340 | return NF_DROP; /* Can't NAT. */ | |
341 | ||
342 | if (ctinfo != IP_CT_NEW && (ct->status & IPS_NAT_MASK) && | |
343 | (ctinfo != IP_CT_RELATED || commit)) { | |
344 | /* NAT an established or related connection like before. */ | |
345 | if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) | |
346 | /* This is the REPLY direction for a connection | |
347 | * for which NAT was applied in the forward | |
348 | * direction. Do the reverse NAT. | |
349 | */ | |
350 | maniptype = ct->status & IPS_SRC_NAT | |
351 | ? NF_NAT_MANIP_DST : NF_NAT_MANIP_SRC; | |
352 | else | |
353 | maniptype = ct->status & IPS_SRC_NAT | |
354 | ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST; | |
355 | } else if (ct_action & TCA_CT_ACT_NAT_SRC) { | |
356 | maniptype = NF_NAT_MANIP_SRC; | |
357 | } else if (ct_action & TCA_CT_ACT_NAT_DST) { | |
358 | maniptype = NF_NAT_MANIP_DST; | |
359 | } else { | |
360 | return NF_ACCEPT; | |
361 | } | |
362 | ||
95219afb AC |
363 | err = ct_nat_execute(skb, ct, ctinfo, range, maniptype); |
364 | if (err == NF_ACCEPT && | |
365 | ct->status & IPS_SRC_NAT && ct->status & IPS_DST_NAT) { | |
366 | if (maniptype == NF_NAT_MANIP_SRC) | |
367 | maniptype = NF_NAT_MANIP_DST; | |
368 | else | |
369 | maniptype = NF_NAT_MANIP_SRC; | |
370 | ||
371 | err = ct_nat_execute(skb, ct, ctinfo, range, maniptype); | |
372 | } | |
373 | return err; | |
b57dc7c1 PB |
374 | #else |
375 | return NF_ACCEPT; | |
376 | #endif | |
377 | } | |
378 | ||
379 | static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, | |
380 | struct tcf_result *res) | |
381 | { | |
382 | struct net *net = dev_net(skb->dev); | |
383 | bool cached, commit, clear, force; | |
384 | enum ip_conntrack_info ctinfo; | |
385 | struct tcf_ct *c = to_ct(a); | |
386 | struct nf_conn *tmpl = NULL; | |
387 | struct nf_hook_state state; | |
388 | int nh_ofs, err, retval; | |
389 | struct tcf_ct_params *p; | |
390 | struct nf_conn *ct; | |
391 | u8 family; | |
392 | ||
393 | p = rcu_dereference_bh(c->params); | |
394 | ||
395 | retval = READ_ONCE(c->tcf_action); | |
396 | commit = p->ct_action & TCA_CT_ACT_COMMIT; | |
397 | clear = p->ct_action & TCA_CT_ACT_CLEAR; | |
398 | force = p->ct_action & TCA_CT_ACT_FORCE; | |
399 | tmpl = p->tmpl; | |
400 | ||
401 | if (clear) { | |
402 | ct = nf_ct_get(skb, &ctinfo); | |
403 | if (ct) { | |
404 | nf_conntrack_put(&ct->ct_general); | |
405 | nf_ct_set(skb, NULL, IP_CT_UNTRACKED); | |
406 | } | |
407 | ||
408 | goto out; | |
409 | } | |
410 | ||
411 | family = tcf_ct_skb_nf_family(skb); | |
412 | if (family == NFPROTO_UNSPEC) | |
413 | goto drop; | |
414 | ||
415 | /* The conntrack module expects to be working at L3. | |
416 | * We also try to pull the IPv4/6 header to linear area | |
417 | */ | |
418 | nh_ofs = skb_network_offset(skb); | |
419 | skb_pull_rcsum(skb, nh_ofs); | |
420 | err = tcf_ct_handle_fragments(net, skb, family, p->zone); | |
421 | if (err == -EINPROGRESS) { | |
422 | retval = TC_ACT_STOLEN; | |
423 | goto out; | |
424 | } | |
425 | if (err) | |
426 | goto drop; | |
427 | ||
428 | err = tcf_ct_skb_network_trim(skb, family); | |
429 | if (err) | |
430 | goto drop; | |
431 | ||
432 | /* If we are recirculating packets to match on ct fields and | |
433 | * committing with a separate ct action, then we don't need to | |
434 | * actually run the packet through conntrack twice unless it's for a | |
435 | * different zone. | |
436 | */ | |
437 | cached = tcf_ct_skb_nfct_cached(net, skb, p->zone, force); | |
438 | if (!cached) { | |
439 | /* Associate skb with specified zone. */ | |
440 | if (tmpl) { | |
441 | ct = nf_ct_get(skb, &ctinfo); | |
442 | if (skb_nfct(skb)) | |
443 | nf_conntrack_put(skb_nfct(skb)); | |
444 | nf_conntrack_get(&tmpl->ct_general); | |
445 | nf_ct_set(skb, tmpl, IP_CT_NEW); | |
446 | } | |
447 | ||
448 | state.hook = NF_INET_PRE_ROUTING; | |
449 | state.net = net; | |
450 | state.pf = family; | |
451 | err = nf_conntrack_in(skb, &state); | |
452 | if (err != NF_ACCEPT) | |
453 | goto out_push; | |
454 | } | |
455 | ||
456 | ct = nf_ct_get(skb, &ctinfo); | |
457 | if (!ct) | |
458 | goto out_push; | |
459 | nf_ct_deliver_cached_events(ct); | |
460 | ||
461 | err = tcf_ct_act_nat(skb, ct, ctinfo, p->ct_action, &p->range, commit); | |
462 | if (err != NF_ACCEPT) | |
463 | goto drop; | |
464 | ||
465 | if (commit) { | |
466 | tcf_ct_act_set_mark(ct, p->mark, p->mark_mask); | |
467 | tcf_ct_act_set_labels(ct, p->labels, p->labels_mask); | |
468 | ||
469 | /* This will take care of sending queued events | |
470 | * even if the connection is already confirmed. | |
471 | */ | |
472 | nf_conntrack_confirm(skb); | |
473 | } | |
474 | ||
475 | out_push: | |
476 | skb_push_rcsum(skb, nh_ofs); | |
477 | ||
478 | out: | |
5e1ad95b | 479 | tcf_action_update_bstats(&c->common, skb); |
b57dc7c1 PB |
480 | return retval; |
481 | ||
482 | drop: | |
26b537a8 | 483 | tcf_action_inc_drop_qstats(&c->common); |
b57dc7c1 PB |
484 | return TC_ACT_SHOT; |
485 | } | |
486 | ||
487 | static const struct nla_policy ct_policy[TCA_CT_MAX + 1] = { | |
b57dc7c1 PB |
488 | [TCA_CT_ACTION] = { .type = NLA_U16 }, |
489 | [TCA_CT_PARMS] = { .type = NLA_EXACT_LEN, .len = sizeof(struct tc_ct) }, | |
490 | [TCA_CT_ZONE] = { .type = NLA_U16 }, | |
491 | [TCA_CT_MARK] = { .type = NLA_U32 }, | |
492 | [TCA_CT_MARK_MASK] = { .type = NLA_U32 }, | |
493 | [TCA_CT_LABELS] = { .type = NLA_BINARY, | |
494 | .len = 128 / BITS_PER_BYTE }, | |
495 | [TCA_CT_LABELS_MASK] = { .type = NLA_BINARY, | |
496 | .len = 128 / BITS_PER_BYTE }, | |
497 | [TCA_CT_NAT_IPV4_MIN] = { .type = NLA_U32 }, | |
498 | [TCA_CT_NAT_IPV4_MAX] = { .type = NLA_U32 }, | |
499 | [TCA_CT_NAT_IPV6_MIN] = { .type = NLA_EXACT_LEN, | |
500 | .len = sizeof(struct in6_addr) }, | |
501 | [TCA_CT_NAT_IPV6_MAX] = { .type = NLA_EXACT_LEN, | |
502 | .len = sizeof(struct in6_addr) }, | |
503 | [TCA_CT_NAT_PORT_MIN] = { .type = NLA_U16 }, | |
504 | [TCA_CT_NAT_PORT_MAX] = { .type = NLA_U16 }, | |
505 | }; | |
506 | ||
507 | static int tcf_ct_fill_params_nat(struct tcf_ct_params *p, | |
508 | struct tc_ct *parm, | |
509 | struct nlattr **tb, | |
510 | struct netlink_ext_ack *extack) | |
511 | { | |
512 | struct nf_nat_range2 *range; | |
513 | ||
514 | if (!(p->ct_action & TCA_CT_ACT_NAT)) | |
515 | return 0; | |
516 | ||
517 | if (!IS_ENABLED(CONFIG_NF_NAT)) { | |
518 | NL_SET_ERR_MSG_MOD(extack, "Netfilter nat isn't enabled in kernel"); | |
519 | return -EOPNOTSUPP; | |
520 | } | |
521 | ||
522 | if (!(p->ct_action & (TCA_CT_ACT_NAT_SRC | TCA_CT_ACT_NAT_DST))) | |
523 | return 0; | |
524 | ||
525 | if ((p->ct_action & TCA_CT_ACT_NAT_SRC) && | |
526 | (p->ct_action & TCA_CT_ACT_NAT_DST)) { | |
527 | NL_SET_ERR_MSG_MOD(extack, "dnat and snat can't be enabled at the same time"); | |
528 | return -EOPNOTSUPP; | |
529 | } | |
530 | ||
531 | range = &p->range; | |
532 | if (tb[TCA_CT_NAT_IPV4_MIN]) { | |
533 | struct nlattr *max_attr = tb[TCA_CT_NAT_IPV4_MAX]; | |
534 | ||
535 | p->ipv4_range = true; | |
536 | range->flags |= NF_NAT_RANGE_MAP_IPS; | |
537 | range->min_addr.ip = | |
538 | nla_get_in_addr(tb[TCA_CT_NAT_IPV4_MIN]); | |
539 | ||
540 | range->max_addr.ip = max_attr ? | |
541 | nla_get_in_addr(max_attr) : | |
542 | range->min_addr.ip; | |
543 | } else if (tb[TCA_CT_NAT_IPV6_MIN]) { | |
544 | struct nlattr *max_attr = tb[TCA_CT_NAT_IPV6_MAX]; | |
545 | ||
546 | p->ipv4_range = false; | |
547 | range->flags |= NF_NAT_RANGE_MAP_IPS; | |
548 | range->min_addr.in6 = | |
549 | nla_get_in6_addr(tb[TCA_CT_NAT_IPV6_MIN]); | |
550 | ||
551 | range->max_addr.in6 = max_attr ? | |
552 | nla_get_in6_addr(max_attr) : | |
553 | range->min_addr.in6; | |
554 | } | |
555 | ||
556 | if (tb[TCA_CT_NAT_PORT_MIN]) { | |
557 | range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; | |
558 | range->min_proto.all = nla_get_be16(tb[TCA_CT_NAT_PORT_MIN]); | |
559 | ||
560 | range->max_proto.all = tb[TCA_CT_NAT_PORT_MAX] ? | |
561 | nla_get_be16(tb[TCA_CT_NAT_PORT_MAX]) : | |
562 | range->min_proto.all; | |
563 | } | |
564 | ||
565 | return 0; | |
566 | } | |
567 | ||
568 | static void tcf_ct_set_key_val(struct nlattr **tb, | |
569 | void *val, int val_type, | |
570 | void *mask, int mask_type, | |
571 | int len) | |
572 | { | |
573 | if (!tb[val_type]) | |
574 | return; | |
575 | nla_memcpy(val, tb[val_type], len); | |
576 | ||
577 | if (!mask) | |
578 | return; | |
579 | ||
580 | if (mask_type == TCA_CT_UNSPEC || !tb[mask_type]) | |
581 | memset(mask, 0xff, len); | |
582 | else | |
583 | nla_memcpy(mask, tb[mask_type], len); | |
584 | } | |
585 | ||
586 | static int tcf_ct_fill_params(struct net *net, | |
587 | struct tcf_ct_params *p, | |
588 | struct tc_ct *parm, | |
589 | struct nlattr **tb, | |
590 | struct netlink_ext_ack *extack) | |
591 | { | |
592 | struct tc_ct_action_net *tn = net_generic(net, ct_net_id); | |
593 | struct nf_conntrack_zone zone; | |
594 | struct nf_conn *tmpl; | |
595 | int err; | |
596 | ||
597 | p->zone = NF_CT_DEFAULT_ZONE_ID; | |
598 | ||
599 | tcf_ct_set_key_val(tb, | |
600 | &p->ct_action, TCA_CT_ACTION, | |
601 | NULL, TCA_CT_UNSPEC, | |
602 | sizeof(p->ct_action)); | |
603 | ||
604 | if (p->ct_action & TCA_CT_ACT_CLEAR) | |
605 | return 0; | |
606 | ||
607 | err = tcf_ct_fill_params_nat(p, parm, tb, extack); | |
608 | if (err) | |
609 | return err; | |
610 | ||
611 | if (tb[TCA_CT_MARK]) { | |
612 | if (!IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)) { | |
613 | NL_SET_ERR_MSG_MOD(extack, "Conntrack mark isn't enabled."); | |
614 | return -EOPNOTSUPP; | |
615 | } | |
616 | tcf_ct_set_key_val(tb, | |
617 | &p->mark, TCA_CT_MARK, | |
618 | &p->mark_mask, TCA_CT_MARK_MASK, | |
619 | sizeof(p->mark)); | |
620 | } | |
621 | ||
622 | if (tb[TCA_CT_LABELS]) { | |
623 | if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)) { | |
624 | NL_SET_ERR_MSG_MOD(extack, "Conntrack labels isn't enabled."); | |
625 | return -EOPNOTSUPP; | |
626 | } | |
627 | ||
628 | if (!tn->labels) { | |
629 | NL_SET_ERR_MSG_MOD(extack, "Failed to set connlabel length"); | |
630 | return -EOPNOTSUPP; | |
631 | } | |
632 | tcf_ct_set_key_val(tb, | |
633 | p->labels, TCA_CT_LABELS, | |
634 | p->labels_mask, TCA_CT_LABELS_MASK, | |
635 | sizeof(p->labels)); | |
636 | } | |
637 | ||
638 | if (tb[TCA_CT_ZONE]) { | |
639 | if (!IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES)) { | |
640 | NL_SET_ERR_MSG_MOD(extack, "Conntrack zones isn't enabled."); | |
641 | return -EOPNOTSUPP; | |
642 | } | |
643 | ||
644 | tcf_ct_set_key_val(tb, | |
645 | &p->zone, TCA_CT_ZONE, | |
646 | NULL, TCA_CT_UNSPEC, | |
647 | sizeof(p->zone)); | |
648 | } | |
649 | ||
650 | if (p->zone == NF_CT_DEFAULT_ZONE_ID) | |
651 | return 0; | |
652 | ||
653 | nf_ct_zone_init(&zone, p->zone, NF_CT_DEFAULT_ZONE_DIR, 0); | |
654 | tmpl = nf_ct_tmpl_alloc(net, &zone, GFP_KERNEL); | |
655 | if (!tmpl) { | |
656 | NL_SET_ERR_MSG_MOD(extack, "Failed to allocate conntrack template"); | |
657 | return -ENOMEM; | |
658 | } | |
659 | __set_bit(IPS_CONFIRMED_BIT, &tmpl->status); | |
660 | nf_conntrack_get(&tmpl->ct_general); | |
661 | p->tmpl = tmpl; | |
662 | ||
663 | return 0; | |
664 | } | |
665 | ||
666 | static int tcf_ct_init(struct net *net, struct nlattr *nla, | |
667 | struct nlattr *est, struct tc_action **a, | |
668 | int replace, int bind, bool rtnl_held, | |
abbb0d33 | 669 | struct tcf_proto *tp, u32 flags, |
b57dc7c1 PB |
670 | struct netlink_ext_ack *extack) |
671 | { | |
672 | struct tc_action_net *tn = net_generic(net, ct_net_id); | |
673 | struct tcf_ct_params *params = NULL; | |
674 | struct nlattr *tb[TCA_CT_MAX + 1]; | |
675 | struct tcf_chain *goto_ch = NULL; | |
676 | struct tc_ct *parm; | |
677 | struct tcf_ct *c; | |
678 | int err, res = 0; | |
7be8ef2c | 679 | u32 index; |
b57dc7c1 PB |
680 | |
681 | if (!nla) { | |
682 | NL_SET_ERR_MSG_MOD(extack, "Ct requires attributes to be passed"); | |
683 | return -EINVAL; | |
684 | } | |
685 | ||
686 | err = nla_parse_nested(tb, TCA_CT_MAX, nla, ct_policy, extack); | |
687 | if (err < 0) | |
688 | return err; | |
689 | ||
690 | if (!tb[TCA_CT_PARMS]) { | |
691 | NL_SET_ERR_MSG_MOD(extack, "Missing required ct parameters"); | |
692 | return -EINVAL; | |
693 | } | |
694 | parm = nla_data(tb[TCA_CT_PARMS]); | |
7be8ef2c DL |
695 | index = parm->index; |
696 | err = tcf_idr_check_alloc(tn, &index, a, bind); | |
b57dc7c1 PB |
697 | if (err < 0) |
698 | return err; | |
699 | ||
700 | if (!err) { | |
e3822678 VB |
701 | err = tcf_idr_create_from_flags(tn, index, est, a, |
702 | &act_ct_ops, bind, flags); | |
b57dc7c1 | 703 | if (err) { |
7be8ef2c | 704 | tcf_idr_cleanup(tn, index); |
b57dc7c1 PB |
705 | return err; |
706 | } | |
707 | res = ACT_P_CREATED; | |
708 | } else { | |
709 | if (bind) | |
710 | return 0; | |
711 | ||
712 | if (!replace) { | |
713 | tcf_idr_release(*a, bind); | |
714 | return -EEXIST; | |
715 | } | |
716 | } | |
717 | err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); | |
718 | if (err < 0) | |
719 | goto cleanup; | |
720 | ||
721 | c = to_ct(*a); | |
722 | ||
723 | params = kzalloc(sizeof(*params), GFP_KERNEL); | |
724 | if (unlikely(!params)) { | |
725 | err = -ENOMEM; | |
726 | goto cleanup; | |
727 | } | |
728 | ||
729 | err = tcf_ct_fill_params(net, params, parm, tb, extack); | |
730 | if (err) | |
731 | goto cleanup; | |
732 | ||
733 | spin_lock_bh(&c->tcf_lock); | |
734 | goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); | |
445d3749 PM |
735 | params = rcu_replace_pointer(c->params, params, |
736 | lockdep_is_held(&c->tcf_lock)); | |
b57dc7c1 PB |
737 | spin_unlock_bh(&c->tcf_lock); |
738 | ||
739 | if (goto_ch) | |
740 | tcf_chain_put_by_act(goto_ch); | |
741 | if (params) | |
dd2af104 | 742 | call_rcu(¶ms->rcu, tcf_ct_params_free); |
b57dc7c1 PB |
743 | if (res == ACT_P_CREATED) |
744 | tcf_idr_insert(tn, *a); | |
745 | ||
746 | return res; | |
747 | ||
748 | cleanup: | |
749 | if (goto_ch) | |
750 | tcf_chain_put_by_act(goto_ch); | |
751 | kfree(params); | |
752 | tcf_idr_release(*a, bind); | |
753 | return err; | |
754 | } | |
755 | ||
756 | static void tcf_ct_cleanup(struct tc_action *a) | |
757 | { | |
758 | struct tcf_ct_params *params; | |
759 | struct tcf_ct *c = to_ct(a); | |
760 | ||
761 | params = rcu_dereference_protected(c->params, 1); | |
762 | if (params) | |
763 | call_rcu(¶ms->rcu, tcf_ct_params_free); | |
764 | } | |
765 | ||
766 | static int tcf_ct_dump_key_val(struct sk_buff *skb, | |
767 | void *val, int val_type, | |
768 | void *mask, int mask_type, | |
769 | int len) | |
770 | { | |
771 | int err; | |
772 | ||
773 | if (mask && !memchr_inv(mask, 0, len)) | |
774 | return 0; | |
775 | ||
776 | err = nla_put(skb, val_type, len, val); | |
777 | if (err) | |
778 | return err; | |
779 | ||
780 | if (mask_type != TCA_CT_UNSPEC) { | |
781 | err = nla_put(skb, mask_type, len, mask); | |
782 | if (err) | |
783 | return err; | |
784 | } | |
785 | ||
786 | return 0; | |
787 | } | |
788 | ||
789 | static int tcf_ct_dump_nat(struct sk_buff *skb, struct tcf_ct_params *p) | |
790 | { | |
791 | struct nf_nat_range2 *range = &p->range; | |
792 | ||
793 | if (!(p->ct_action & TCA_CT_ACT_NAT)) | |
794 | return 0; | |
795 | ||
796 | if (!(p->ct_action & (TCA_CT_ACT_NAT_SRC | TCA_CT_ACT_NAT_DST))) | |
797 | return 0; | |
798 | ||
799 | if (range->flags & NF_NAT_RANGE_MAP_IPS) { | |
800 | if (p->ipv4_range) { | |
801 | if (nla_put_in_addr(skb, TCA_CT_NAT_IPV4_MIN, | |
802 | range->min_addr.ip)) | |
803 | return -1; | |
804 | if (nla_put_in_addr(skb, TCA_CT_NAT_IPV4_MAX, | |
805 | range->max_addr.ip)) | |
806 | return -1; | |
807 | } else { | |
808 | if (nla_put_in6_addr(skb, TCA_CT_NAT_IPV6_MIN, | |
809 | &range->min_addr.in6)) | |
810 | return -1; | |
811 | if (nla_put_in6_addr(skb, TCA_CT_NAT_IPV6_MAX, | |
812 | &range->max_addr.in6)) | |
813 | return -1; | |
814 | } | |
815 | } | |
816 | ||
817 | if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { | |
818 | if (nla_put_be16(skb, TCA_CT_NAT_PORT_MIN, | |
819 | range->min_proto.all)) | |
820 | return -1; | |
821 | if (nla_put_be16(skb, TCA_CT_NAT_PORT_MAX, | |
822 | range->max_proto.all)) | |
823 | return -1; | |
824 | } | |
825 | ||
826 | return 0; | |
827 | } | |
828 | ||
829 | static inline int tcf_ct_dump(struct sk_buff *skb, struct tc_action *a, | |
830 | int bind, int ref) | |
831 | { | |
832 | unsigned char *b = skb_tail_pointer(skb); | |
833 | struct tcf_ct *c = to_ct(a); | |
834 | struct tcf_ct_params *p; | |
835 | ||
836 | struct tc_ct opt = { | |
837 | .index = c->tcf_index, | |
838 | .refcnt = refcount_read(&c->tcf_refcnt) - ref, | |
839 | .bindcnt = atomic_read(&c->tcf_bindcnt) - bind, | |
840 | }; | |
841 | struct tcf_t t; | |
842 | ||
843 | spin_lock_bh(&c->tcf_lock); | |
844 | p = rcu_dereference_protected(c->params, | |
845 | lockdep_is_held(&c->tcf_lock)); | |
846 | opt.action = c->tcf_action; | |
847 | ||
848 | if (tcf_ct_dump_key_val(skb, | |
849 | &p->ct_action, TCA_CT_ACTION, | |
850 | NULL, TCA_CT_UNSPEC, | |
851 | sizeof(p->ct_action))) | |
852 | goto nla_put_failure; | |
853 | ||
854 | if (p->ct_action & TCA_CT_ACT_CLEAR) | |
855 | goto skip_dump; | |
856 | ||
857 | if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && | |
858 | tcf_ct_dump_key_val(skb, | |
859 | &p->mark, TCA_CT_MARK, | |
860 | &p->mark_mask, TCA_CT_MARK_MASK, | |
861 | sizeof(p->mark))) | |
862 | goto nla_put_failure; | |
863 | ||
864 | if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && | |
865 | tcf_ct_dump_key_val(skb, | |
866 | p->labels, TCA_CT_LABELS, | |
867 | p->labels_mask, TCA_CT_LABELS_MASK, | |
868 | sizeof(p->labels))) | |
869 | goto nla_put_failure; | |
870 | ||
871 | if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) && | |
872 | tcf_ct_dump_key_val(skb, | |
873 | &p->zone, TCA_CT_ZONE, | |
874 | NULL, TCA_CT_UNSPEC, | |
875 | sizeof(p->zone))) | |
876 | goto nla_put_failure; | |
877 | ||
878 | if (tcf_ct_dump_nat(skb, p)) | |
879 | goto nla_put_failure; | |
880 | ||
881 | skip_dump: | |
882 | if (nla_put(skb, TCA_CT_PARMS, sizeof(opt), &opt)) | |
883 | goto nla_put_failure; | |
884 | ||
885 | tcf_tm_dump(&t, &c->tcf_tm); | |
886 | if (nla_put_64bit(skb, TCA_CT_TM, sizeof(t), &t, TCA_CT_PAD)) | |
887 | goto nla_put_failure; | |
888 | spin_unlock_bh(&c->tcf_lock); | |
889 | ||
890 | return skb->len; | |
891 | nla_put_failure: | |
892 | spin_unlock_bh(&c->tcf_lock); | |
893 | nlmsg_trim(skb, b); | |
894 | return -1; | |
895 | } | |
896 | ||
897 | static int tcf_ct_walker(struct net *net, struct sk_buff *skb, | |
898 | struct netlink_callback *cb, int type, | |
899 | const struct tc_action_ops *ops, | |
900 | struct netlink_ext_ack *extack) | |
901 | { | |
902 | struct tc_action_net *tn = net_generic(net, ct_net_id); | |
903 | ||
904 | return tcf_generic_walker(tn, skb, cb, type, ops, extack); | |
905 | } | |
906 | ||
907 | static int tcf_ct_search(struct net *net, struct tc_action **a, u32 index) | |
908 | { | |
909 | struct tc_action_net *tn = net_generic(net, ct_net_id); | |
910 | ||
911 | return tcf_idr_search(tn, a, index); | |
912 | } | |
913 | ||
914 | static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets, | |
915 | u64 lastuse, bool hw) | |
916 | { | |
917 | struct tcf_ct *c = to_ct(a); | |
918 | ||
c8ecebd0 | 919 | tcf_action_update_stats(a, bytes, packets, false, hw); |
b57dc7c1 PB |
920 | c->tcf_tm.lastuse = max_t(u64, c->tcf_tm.lastuse, lastuse); |
921 | } | |
922 | ||
923 | static struct tc_action_ops act_ct_ops = { | |
924 | .kind = "ct", | |
925 | .id = TCA_ID_CT, | |
926 | .owner = THIS_MODULE, | |
927 | .act = tcf_ct_act, | |
928 | .dump = tcf_ct_dump, | |
929 | .init = tcf_ct_init, | |
930 | .cleanup = tcf_ct_cleanup, | |
931 | .walk = tcf_ct_walker, | |
932 | .lookup = tcf_ct_search, | |
933 | .stats_update = tcf_stats_update, | |
934 | .size = sizeof(struct tcf_ct), | |
935 | }; | |
936 | ||
937 | static __net_init int ct_init_net(struct net *net) | |
938 | { | |
c593642c | 939 | unsigned int n_bits = sizeof_field(struct tcf_ct_params, labels) * 8; |
b57dc7c1 PB |
940 | struct tc_ct_action_net *tn = net_generic(net, ct_net_id); |
941 | ||
942 | if (nf_connlabels_get(net, n_bits - 1)) { | |
943 | tn->labels = false; | |
944 | pr_err("act_ct: Failed to set connlabels length"); | |
945 | } else { | |
946 | tn->labels = true; | |
947 | } | |
948 | ||
981471bd | 949 | return tc_action_net_init(net, &tn->tn, &act_ct_ops); |
b57dc7c1 PB |
950 | } |
951 | ||
952 | static void __net_exit ct_exit_net(struct list_head *net_list) | |
953 | { | |
954 | struct net *net; | |
955 | ||
956 | rtnl_lock(); | |
957 | list_for_each_entry(net, net_list, exit_list) { | |
958 | struct tc_ct_action_net *tn = net_generic(net, ct_net_id); | |
959 | ||
960 | if (tn->labels) | |
961 | nf_connlabels_put(net); | |
962 | } | |
963 | rtnl_unlock(); | |
964 | ||
965 | tc_action_net_exit(net_list, ct_net_id); | |
966 | } | |
967 | ||
968 | static struct pernet_operations ct_net_ops = { | |
969 | .init = ct_init_net, | |
970 | .exit_batch = ct_exit_net, | |
971 | .id = &ct_net_id, | |
972 | .size = sizeof(struct tc_ct_action_net), | |
973 | }; | |
974 | ||
975 | static int __init ct_init_module(void) | |
976 | { | |
977 | return tcf_register_action(&act_ct_ops, &ct_net_ops); | |
978 | } | |
979 | ||
980 | static void __exit ct_cleanup_module(void) | |
981 | { | |
982 | tcf_unregister_action(&act_ct_ops, &ct_net_ops); | |
983 | } | |
984 | ||
985 | module_init(ct_init_module); | |
986 | module_exit(ct_cleanup_module); | |
987 | MODULE_AUTHOR("Paul Blakey <paulb@mellanox.com>"); | |
988 | MODULE_AUTHOR("Yossi Kuperman <yossiku@mellanox.com>"); | |
989 | MODULE_AUTHOR("Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>"); | |
990 | MODULE_DESCRIPTION("Connection tracking action"); | |
991 | MODULE_LICENSE("GPL v2"); | |
992 |