Commit | Line | Data |
---|---|---|
3c171f49 PNA |
1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | #include <linux/types.h> | |
3 | #include <linux/ip.h> | |
4 | #include <linux/netfilter.h> | |
5 | #include <linux/netfilter_ipv6.h> | |
6 | #include <linux/netfilter_bridge.h> | |
7 | #include <linux/module.h> | |
8 | #include <linux/skbuff.h> | |
9 | #include <linux/icmp.h> | |
10 | #include <linux/sysctl.h> | |
11 | #include <net/route.h> | |
12 | #include <net/ip.h> | |
13 | ||
14 | #include <net/netfilter/nf_conntrack.h> | |
15 | #include <net/netfilter/nf_conntrack_core.h> | |
16 | #include <net/netfilter/nf_conntrack_helper.h> | |
17 | #include <net/netfilter/nf_conntrack_bridge.h> | |
18 | ||
19 | #include <linux/netfilter/nf_tables.h> | |
3c171f49 PNA |
20 | #include <net/netfilter/nf_tables.h> |
21 | ||
22 | #include "../br_private.h" | |
23 | ||
24 | /* Best effort variant of ip_do_fragment which preserves geometry, unless skbuff | |
25 | * has been linearized or cloned. | |
26 | */ | |
27 | static int nf_br_ip_fragment(struct net *net, struct sock *sk, | |
28 | struct sk_buff *skb, | |
46705b07 | 29 | struct nf_bridge_frag_data *data, |
3c171f49 | 30 | int (*output)(struct net *, struct sock *sk, |
46705b07 | 31 | const struct nf_bridge_frag_data *data, |
3c171f49 PNA |
32 | struct sk_buff *)) |
33 | { | |
34 | int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; | |
a1ac9c8a | 35 | bool mono_delivery_time = skb->mono_delivery_time; |
3c171f49 | 36 | unsigned int hlen, ll_rs, mtu; |
9669fffc | 37 | ktime_t tstamp = skb->tstamp; |
3c171f49 PNA |
38 | struct ip_frag_state state; |
39 | struct iphdr *iph; | |
a44af08e | 40 | int err = 0; |
3c171f49 PNA |
41 | |
42 | /* for offloaded checksums cleanup checksum before fragmentation */ | |
43 | if (skb->ip_summed == CHECKSUM_PARTIAL && | |
44 | (err = skb_checksum_help(skb))) | |
45 | goto blackhole; | |
46 | ||
47 | iph = ip_hdr(skb); | |
48 | ||
49 | /* | |
50 | * Setup starting values | |
51 | */ | |
52 | ||
53 | hlen = iph->ihl * 4; | |
54 | frag_max_size -= hlen; | |
55 | ll_rs = LL_RESERVED_SPACE(skb->dev); | |
56 | mtu = skb->dev->mtu; | |
57 | ||
58 | if (skb_has_frag_list(skb)) { | |
59 | unsigned int first_len = skb_pagelen(skb); | |
60 | struct ip_fraglist_iter iter; | |
61 | struct sk_buff *frag; | |
62 | ||
63 | if (first_len - hlen > mtu || | |
64 | skb_headroom(skb) < ll_rs) | |
65 | goto blackhole; | |
66 | ||
67 | if (skb_cloned(skb)) | |
68 | goto slow_path; | |
69 | ||
70 | skb_walk_frags(skb, frag) { | |
71 | if (frag->len > mtu || | |
72 | skb_headroom(frag) < hlen + ll_rs) | |
73 | goto blackhole; | |
74 | ||
75 | if (skb_shared(frag)) | |
76 | goto slow_path; | |
77 | } | |
78 | ||
79 | ip_fraglist_init(skb, iph, hlen, &iter); | |
80 | ||
81 | for (;;) { | |
82 | if (iter.frag) | |
83 | ip_fraglist_prepare(skb, &iter); | |
84 | ||
a1ac9c8a | 85 | skb_set_delivery_time(skb, tstamp, mono_delivery_time); |
3c171f49 PNA |
86 | err = output(net, sk, data, skb); |
87 | if (err || !iter.frag) | |
88 | break; | |
89 | ||
90 | skb = ip_fraglist_next(&iter); | |
91 | } | |
38ea9def YD |
92 | |
93 | if (!err) | |
94 | return 0; | |
95 | ||
96 | kfree_skb_list(iter.frag); | |
97 | ||
3c171f49 PNA |
98 | return err; |
99 | } | |
100 | slow_path: | |
101 | /* This is a linearized skbuff, the original geometry is lost for us. | |
102 | * This may also be a clone skbuff, we could preserve the geometry for | |
103 | * the copies but probably not worth the effort. | |
104 | */ | |
e7a409c3 | 105 | ip_frag_init(skb, hlen, ll_rs, frag_max_size, false, &state); |
3c171f49 PNA |
106 | |
107 | while (state.left > 0) { | |
108 | struct sk_buff *skb2; | |
109 | ||
110 | skb2 = ip_frag_next(skb, &state); | |
111 | if (IS_ERR(skb2)) { | |
112 | err = PTR_ERR(skb2); | |
113 | goto blackhole; | |
114 | } | |
115 | ||
a1ac9c8a | 116 | skb_set_delivery_time(skb2, tstamp, mono_delivery_time); |
3c171f49 PNA |
117 | err = output(net, sk, data, skb2); |
118 | if (err) | |
119 | goto blackhole; | |
120 | } | |
121 | consume_skb(skb); | |
122 | return err; | |
123 | ||
124 | blackhole: | |
125 | kfree_skb(skb); | |
126 | return 0; | |
127 | } | |
128 | ||
129 | /* ip_defrag() expects IPCB() in place. */ | |
130 | static void br_skb_cb_save(struct sk_buff *skb, struct br_input_skb_cb *cb, | |
131 | size_t inet_skb_parm_size) | |
132 | { | |
133 | memcpy(cb, skb->cb, sizeof(*cb)); | |
134 | memset(skb->cb, 0, inet_skb_parm_size); | |
135 | } | |
136 | ||
137 | static void br_skb_cb_restore(struct sk_buff *skb, | |
138 | const struct br_input_skb_cb *cb, | |
139 | u16 fragsz) | |
140 | { | |
141 | memcpy(skb->cb, cb, sizeof(*cb)); | |
142 | BR_INPUT_SKB_CB(skb)->frag_max_size = fragsz; | |
143 | } | |
144 | ||
145 | static unsigned int nf_ct_br_defrag4(struct sk_buff *skb, | |
146 | const struct nf_hook_state *state) | |
147 | { | |
148 | u16 zone_id = NF_CT_DEFAULT_ZONE_ID; | |
149 | enum ip_conntrack_info ctinfo; | |
150 | struct br_input_skb_cb cb; | |
151 | const struct nf_conn *ct; | |
152 | int err; | |
153 | ||
154 | if (!ip_is_fragment(ip_hdr(skb))) | |
155 | return NF_ACCEPT; | |
156 | ||
157 | ct = nf_ct_get(skb, &ctinfo); | |
158 | if (ct) | |
159 | zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo)); | |
160 | ||
161 | br_skb_cb_save(skb, &cb, sizeof(struct inet_skb_parm)); | |
162 | local_bh_disable(); | |
163 | err = ip_defrag(state->net, skb, | |
164 | IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id); | |
165 | local_bh_enable(); | |
166 | if (!err) { | |
167 | br_skb_cb_restore(skb, &cb, IPCB(skb)->frag_max_size); | |
168 | skb->ignore_df = 1; | |
169 | return NF_ACCEPT; | |
170 | } | |
171 | ||
172 | return NF_STOLEN; | |
173 | } | |
174 | ||
764dd163 PNA |
175 | static unsigned int nf_ct_br_defrag6(struct sk_buff *skb, |
176 | const struct nf_hook_state *state) | |
177 | { | |
2404b73c | 178 | #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) |
764dd163 PNA |
179 | u16 zone_id = NF_CT_DEFAULT_ZONE_ID; |
180 | enum ip_conntrack_info ctinfo; | |
181 | struct br_input_skb_cb cb; | |
182 | const struct nf_conn *ct; | |
183 | int err; | |
184 | ||
185 | ct = nf_ct_get(skb, &ctinfo); | |
186 | if (ct) | |
187 | zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo)); | |
188 | ||
189 | br_skb_cb_save(skb, &cb, sizeof(struct inet6_skb_parm)); | |
190 | ||
2404b73c FW |
191 | err = nf_ct_frag6_gather(state->net, skb, |
192 | IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id); | |
764dd163 PNA |
193 | /* queued */ |
194 | if (err == -EINPROGRESS) | |
195 | return NF_STOLEN; | |
196 | ||
197 | br_skb_cb_restore(skb, &cb, IP6CB(skb)->frag_max_size); | |
198 | return err == 0 ? NF_ACCEPT : NF_DROP; | |
2404b73c FW |
199 | #else |
200 | return NF_ACCEPT; | |
201 | #endif | |
764dd163 PNA |
202 | } |
203 | ||
3c171f49 PNA |
204 | static int nf_ct_br_ip_check(const struct sk_buff *skb) |
205 | { | |
206 | const struct iphdr *iph; | |
207 | int nhoff, len; | |
208 | ||
209 | nhoff = skb_network_offset(skb); | |
210 | iph = ip_hdr(skb); | |
211 | if (iph->ihl < 5 || | |
212 | iph->version != 4) | |
213 | return -1; | |
214 | ||
46abd173 | 215 | len = skb_ip_totlen(skb); |
3c171f49 PNA |
216 | if (skb->len < nhoff + len || |
217 | len < (iph->ihl * 4)) | |
764dd163 PNA |
218 | return -1; |
219 | ||
220 | return 0; | |
221 | } | |
222 | ||
223 | static int nf_ct_br_ipv6_check(const struct sk_buff *skb) | |
224 | { | |
225 | const struct ipv6hdr *hdr; | |
226 | int nhoff, len; | |
227 | ||
228 | nhoff = skb_network_offset(skb); | |
229 | hdr = ipv6_hdr(skb); | |
230 | if (hdr->version != 6) | |
231 | return -1; | |
232 | ||
233 | len = ntohs(hdr->payload_len) + sizeof(struct ipv6hdr) + nhoff; | |
234 | if (skb->len < len) | |
3c171f49 PNA |
235 | return -1; |
236 | ||
237 | return 0; | |
238 | } | |
239 | ||
240 | static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb, | |
241 | const struct nf_hook_state *state) | |
242 | { | |
243 | struct nf_hook_state bridge_state = *state; | |
244 | enum ip_conntrack_info ctinfo; | |
245 | struct nf_conn *ct; | |
246 | u32 len; | |
247 | int ret; | |
248 | ||
249 | ct = nf_ct_get(skb, &ctinfo); | |
250 | if ((ct && !nf_ct_is_template(ct)) || | |
251 | ctinfo == IP_CT_UNTRACKED) | |
252 | return NF_ACCEPT; | |
253 | ||
254 | switch (skb->protocol) { | |
255 | case htons(ETH_P_IP): | |
256 | if (!pskb_may_pull(skb, sizeof(struct iphdr))) | |
257 | return NF_ACCEPT; | |
258 | ||
46abd173 | 259 | len = skb_ip_totlen(skb); |
3c171f49 PNA |
260 | if (pskb_trim_rcsum(skb, len)) |
261 | return NF_ACCEPT; | |
262 | ||
263 | if (nf_ct_br_ip_check(skb)) | |
264 | return NF_ACCEPT; | |
265 | ||
266 | bridge_state.pf = NFPROTO_IPV4; | |
267 | ret = nf_ct_br_defrag4(skb, &bridge_state); | |
268 | break; | |
269 | case htons(ETH_P_IPV6): | |
764dd163 PNA |
270 | if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) |
271 | return NF_ACCEPT; | |
272 | ||
273 | len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len); | |
274 | if (pskb_trim_rcsum(skb, len)) | |
275 | return NF_ACCEPT; | |
276 | ||
277 | if (nf_ct_br_ipv6_check(skb)) | |
278 | return NF_ACCEPT; | |
279 | ||
280 | bridge_state.pf = NFPROTO_IPV6; | |
281 | ret = nf_ct_br_defrag6(skb, &bridge_state); | |
282 | break; | |
3c171f49 PNA |
283 | default: |
284 | nf_ct_set(skb, NULL, IP_CT_UNTRACKED); | |
285 | return NF_ACCEPT; | |
286 | } | |
287 | ||
288 | if (ret != NF_ACCEPT) | |
289 | return ret; | |
290 | ||
291 | return nf_conntrack_in(skb, &bridge_state); | |
292 | } | |
293 | ||
62e7151a FW |
294 | static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb, |
295 | const struct nf_hook_state *state) | |
296 | { | |
751de201 PNA |
297 | bool promisc = BR_INPUT_SKB_CB(skb)->promisc; |
298 | struct nf_conntrack *nfct = skb_nfct(skb); | |
62e7151a FW |
299 | struct nf_conn *ct; |
300 | ||
751de201 PNA |
301 | if (promisc) { |
302 | nf_reset_ct(skb); | |
303 | return NF_ACCEPT; | |
304 | } | |
305 | ||
306 | if (!nfct || skb->pkt_type == PACKET_HOST) | |
62e7151a FW |
307 | return NF_ACCEPT; |
308 | ||
309 | /* nf_conntrack_confirm() cannot handle concurrent clones, | |
310 | * this happens for broad/multicast frames with e.g. macvlan on top | |
311 | * of the bridge device. | |
312 | */ | |
751de201 PNA |
313 | ct = container_of(nfct, struct nf_conn, ct_general); |
314 | if (nf_ct_is_confirmed(ct) || nf_ct_is_template(ct)) | |
62e7151a FW |
315 | return NF_ACCEPT; |
316 | ||
317 | /* let inet prerouting call conntrack again */ | |
318 | skb->_nfct = 0; | |
319 | nf_ct_put(ct); | |
320 | ||
321 | return NF_ACCEPT; | |
322 | } | |
323 | ||
3c171f49 | 324 | static void nf_ct_bridge_frag_save(struct sk_buff *skb, |
46705b07 | 325 | struct nf_bridge_frag_data *data) |
3c171f49 PNA |
326 | { |
327 | if (skb_vlan_tag_present(skb)) { | |
328 | data->vlan_present = true; | |
329 | data->vlan_tci = skb->vlan_tci; | |
330 | data->vlan_proto = skb->vlan_proto; | |
331 | } else { | |
332 | data->vlan_present = false; | |
333 | } | |
334 | skb_copy_from_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN); | |
335 | } | |
336 | ||
337 | static unsigned int | |
338 | nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state, | |
339 | int (*output)(struct net *, struct sock *sk, | |
46705b07 | 340 | const struct nf_bridge_frag_data *data, |
3c171f49 PNA |
341 | struct sk_buff *)) |
342 | { | |
46705b07 | 343 | struct nf_bridge_frag_data data; |
3c171f49 PNA |
344 | |
345 | if (!BR_INPUT_SKB_CB(skb)->frag_max_size) | |
346 | return NF_ACCEPT; | |
347 | ||
348 | nf_ct_bridge_frag_save(skb, &data); | |
349 | switch (skb->protocol) { | |
350 | case htons(ETH_P_IP): | |
351 | nf_br_ip_fragment(state->net, state->sk, skb, &data, output); | |
352 | break; | |
353 | case htons(ETH_P_IPV6): | |
764dd163 PNA |
354 | nf_br_ip6_fragment(state->net, state->sk, skb, &data, output); |
355 | break; | |
3c171f49 PNA |
356 | default: |
357 | WARN_ON_ONCE(1); | |
358 | return NF_DROP; | |
359 | } | |
360 | ||
361 | return NF_STOLEN; | |
362 | } | |
363 | ||
364 | /* Actually only slow path refragmentation needs this. */ | |
365 | static int nf_ct_bridge_frag_restore(struct sk_buff *skb, | |
46705b07 | 366 | const struct nf_bridge_frag_data *data) |
3c171f49 PNA |
367 | { |
368 | int err; | |
369 | ||
370 | err = skb_cow_head(skb, ETH_HLEN); | |
371 | if (err) { | |
372 | kfree_skb(skb); | |
373 | return -ENOMEM; | |
374 | } | |
375 | if (data->vlan_present) | |
376 | __vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci); | |
29099462 | 377 | else if (skb_vlan_tag_present(skb)) |
378 | __vlan_hwaccel_clear_tag(skb); | |
3c171f49 PNA |
379 | |
380 | skb_copy_to_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN); | |
381 | skb_reset_mac_header(skb); | |
382 | ||
383 | return 0; | |
384 | } | |
385 | ||
386 | static int nf_ct_bridge_refrag_post(struct net *net, struct sock *sk, | |
46705b07 | 387 | const struct nf_bridge_frag_data *data, |
3c171f49 PNA |
388 | struct sk_buff *skb) |
389 | { | |
390 | int err; | |
391 | ||
392 | err = nf_ct_bridge_frag_restore(skb, data); | |
393 | if (err < 0) | |
394 | return err; | |
395 | ||
396 | return br_dev_queue_push_xmit(net, sk, skb); | |
397 | } | |
398 | ||
3c171f49 PNA |
399 | static unsigned int nf_ct_bridge_post(void *priv, struct sk_buff *skb, |
400 | const struct nf_hook_state *state) | |
401 | { | |
402 | int ret; | |
403 | ||
a70e4834 | 404 | ret = nf_confirm(priv, skb, state); |
3c171f49 PNA |
405 | if (ret != NF_ACCEPT) |
406 | return ret; | |
407 | ||
408 | return nf_ct_bridge_refrag(skb, state, nf_ct_bridge_refrag_post); | |
409 | } | |
410 | ||
411 | static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = { | |
412 | { | |
413 | .hook = nf_ct_bridge_pre, | |
414 | .pf = NFPROTO_BRIDGE, | |
415 | .hooknum = NF_BR_PRE_ROUTING, | |
416 | .priority = NF_IP_PRI_CONNTRACK, | |
417 | }, | |
62e7151a FW |
418 | { |
419 | .hook = nf_ct_bridge_in, | |
420 | .pf = NFPROTO_BRIDGE, | |
421 | .hooknum = NF_BR_LOCAL_IN, | |
422 | .priority = NF_IP_PRI_CONNTRACK_CONFIRM, | |
423 | }, | |
3c171f49 PNA |
424 | { |
425 | .hook = nf_ct_bridge_post, | |
426 | .pf = NFPROTO_BRIDGE, | |
427 | .hooknum = NF_BR_POST_ROUTING, | |
428 | .priority = NF_IP_PRI_CONNTRACK_CONFIRM, | |
429 | }, | |
430 | }; | |
431 | ||
432 | static struct nf_ct_bridge_info bridge_info = { | |
433 | .ops = nf_ct_bridge_hook_ops, | |
434 | .ops_size = ARRAY_SIZE(nf_ct_bridge_hook_ops), | |
435 | .me = THIS_MODULE, | |
436 | }; | |
437 | ||
438 | static int __init nf_conntrack_l3proto_bridge_init(void) | |
439 | { | |
440 | nf_ct_bridge_register(&bridge_info); | |
441 | ||
442 | return 0; | |
443 | } | |
444 | ||
445 | static void __exit nf_conntrack_l3proto_bridge_fini(void) | |
446 | { | |
447 | nf_ct_bridge_unregister(&bridge_info); | |
448 | } | |
449 | ||
450 | module_init(nf_conntrack_l3proto_bridge_init); | |
451 | module_exit(nf_conntrack_l3proto_bridge_fini); | |
452 | ||
453 | MODULE_ALIAS("nf_conntrack-" __stringify(AF_BRIDGE)); | |
454 | MODULE_LICENSE("GPL"); | |
94090b23 | 455 | MODULE_DESCRIPTION("Bridge IPv4 and IPv6 connection tracking"); |