506d6141e44eea67ccc34123ad4250a19bcc7ab7
[linux-2.6-block.git] / net / bridge / netfilter / nf_conntrack_bridge.c
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #include <linux/types.h>
3 #include <linux/ip.h>
4 #include <linux/netfilter.h>
5 #include <linux/netfilter_ipv6.h>
6 #include <linux/netfilter_bridge.h>
7 #include <linux/module.h>
8 #include <linux/skbuff.h>
9 #include <linux/icmp.h>
10 #include <linux/sysctl.h>
11 #include <net/route.h>
12 #include <net/ip.h>
13
14 #include <net/netfilter/nf_conntrack.h>
15 #include <net/netfilter/nf_conntrack_core.h>
16 #include <net/netfilter/nf_conntrack_helper.h>
17 #include <net/netfilter/nf_conntrack_bridge.h>
18
19 #include <linux/netfilter/nf_tables.h>
20 #include <net/netfilter/nf_tables.h>
21
22 #include "../br_private.h"
23
24 /* Best effort variant of ip_do_fragment which preserves geometry, unless skbuff
25  * has been linearized or cloned.
26  */
27 static int nf_br_ip_fragment(struct net *net, struct sock *sk,
28                              struct sk_buff *skb,
29                              struct nf_bridge_frag_data *data,
30                              int (*output)(struct net *, struct sock *sk,
31                                            const struct nf_bridge_frag_data *data,
32                                            struct sk_buff *))
33 {
34         int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
35         unsigned int hlen, ll_rs, mtu;
36         ktime_t tstamp = skb->tstamp;
37         struct ip_frag_state state;
38         struct iphdr *iph;
39         int err;
40
41         /* for offloaded checksums cleanup checksum before fragmentation */
42         if (skb->ip_summed == CHECKSUM_PARTIAL &&
43             (err = skb_checksum_help(skb)))
44                 goto blackhole;
45
46         iph = ip_hdr(skb);
47
48         /*
49          *      Setup starting values
50          */
51
52         hlen = iph->ihl * 4;
53         frag_max_size -= hlen;
54         ll_rs = LL_RESERVED_SPACE(skb->dev);
55         mtu = skb->dev->mtu;
56
57         if (skb_has_frag_list(skb)) {
58                 unsigned int first_len = skb_pagelen(skb);
59                 struct ip_fraglist_iter iter;
60                 struct sk_buff *frag;
61
62                 if (first_len - hlen > mtu ||
63                     skb_headroom(skb) < ll_rs)
64                         goto blackhole;
65
66                 if (skb_cloned(skb))
67                         goto slow_path;
68
69                 skb_walk_frags(skb, frag) {
70                         if (frag->len > mtu ||
71                             skb_headroom(frag) < hlen + ll_rs)
72                                 goto blackhole;
73
74                         if (skb_shared(frag))
75                                 goto slow_path;
76                 }
77
78                 ip_fraglist_init(skb, iph, hlen, &iter);
79
80                 for (;;) {
81                         if (iter.frag)
82                                 ip_fraglist_prepare(skb, &iter);
83
84                         skb->tstamp = tstamp;
85                         err = output(net, sk, data, skb);
86                         if (err || !iter.frag)
87                                 break;
88
89                         skb = ip_fraglist_next(&iter);
90                 }
91                 return err;
92         }
93 slow_path:
94         /* This is a linearized skbuff, the original geometry is lost for us.
95          * This may also be a clone skbuff, we could preserve the geometry for
96          * the copies but probably not worth the effort.
97          */
98         ip_frag_init(skb, hlen, ll_rs, frag_max_size, &state);
99
100         while (state.left > 0) {
101                 struct sk_buff *skb2;
102
103                 skb2 = ip_frag_next(skb, &state);
104                 if (IS_ERR(skb2)) {
105                         err = PTR_ERR(skb2);
106                         goto blackhole;
107                 }
108
109                 skb2->tstamp = tstamp;
110                 err = output(net, sk, data, skb2);
111                 if (err)
112                         goto blackhole;
113         }
114         consume_skb(skb);
115         return err;
116
117 blackhole:
118         kfree_skb(skb);
119         return 0;
120 }
121
122 /* ip_defrag() expects IPCB() in place. */
123 static void br_skb_cb_save(struct sk_buff *skb, struct br_input_skb_cb *cb,
124                            size_t inet_skb_parm_size)
125 {
126         memcpy(cb, skb->cb, sizeof(*cb));
127         memset(skb->cb, 0, inet_skb_parm_size);
128 }
129
130 static void br_skb_cb_restore(struct sk_buff *skb,
131                               const struct br_input_skb_cb *cb,
132                               u16 fragsz)
133 {
134         memcpy(skb->cb, cb, sizeof(*cb));
135         BR_INPUT_SKB_CB(skb)->frag_max_size = fragsz;
136 }
137
138 static unsigned int nf_ct_br_defrag4(struct sk_buff *skb,
139                                      const struct nf_hook_state *state)
140 {
141         u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
142         enum ip_conntrack_info ctinfo;
143         struct br_input_skb_cb cb;
144         const struct nf_conn *ct;
145         int err;
146
147         if (!ip_is_fragment(ip_hdr(skb)))
148                 return NF_ACCEPT;
149
150         ct = nf_ct_get(skb, &ctinfo);
151         if (ct)
152                 zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
153
154         br_skb_cb_save(skb, &cb, sizeof(struct inet_skb_parm));
155         local_bh_disable();
156         err = ip_defrag(state->net, skb,
157                         IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id);
158         local_bh_enable();
159         if (!err) {
160                 br_skb_cb_restore(skb, &cb, IPCB(skb)->frag_max_size);
161                 skb->ignore_df = 1;
162                 return NF_ACCEPT;
163         }
164
165         return NF_STOLEN;
166 }
167
168 static unsigned int nf_ct_br_defrag6(struct sk_buff *skb,
169                                      const struct nf_hook_state *state)
170 {
171         u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
172         enum ip_conntrack_info ctinfo;
173         struct br_input_skb_cb cb;
174         const struct nf_conn *ct;
175         int err;
176
177         ct = nf_ct_get(skb, &ctinfo);
178         if (ct)
179                 zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
180
181         br_skb_cb_save(skb, &cb, sizeof(struct inet6_skb_parm));
182
183         err = nf_ipv6_br_defrag(state->net, skb,
184                                 IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id);
185         /* queued */
186         if (err == -EINPROGRESS)
187                 return NF_STOLEN;
188
189         br_skb_cb_restore(skb, &cb, IP6CB(skb)->frag_max_size);
190         return err == 0 ? NF_ACCEPT : NF_DROP;
191 }
192
193 static int nf_ct_br_ip_check(const struct sk_buff *skb)
194 {
195         const struct iphdr *iph;
196         int nhoff, len;
197
198         nhoff = skb_network_offset(skb);
199         iph = ip_hdr(skb);
200         if (iph->ihl < 5 ||
201             iph->version != 4)
202                 return -1;
203
204         len = ntohs(iph->tot_len);
205         if (skb->len < nhoff + len ||
206             len < (iph->ihl * 4))
207                 return -1;
208
209         return 0;
210 }
211
212 static int nf_ct_br_ipv6_check(const struct sk_buff *skb)
213 {
214         const struct ipv6hdr *hdr;
215         int nhoff, len;
216
217         nhoff = skb_network_offset(skb);
218         hdr = ipv6_hdr(skb);
219         if (hdr->version != 6)
220                 return -1;
221
222         len = ntohs(hdr->payload_len) + sizeof(struct ipv6hdr) + nhoff;
223         if (skb->len < len)
224                 return -1;
225
226         return 0;
227 }
228
229 static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
230                                      const struct nf_hook_state *state)
231 {
232         struct nf_hook_state bridge_state = *state;
233         enum ip_conntrack_info ctinfo;
234         struct nf_conn *ct;
235         u32 len;
236         int ret;
237
238         ct = nf_ct_get(skb, &ctinfo);
239         if ((ct && !nf_ct_is_template(ct)) ||
240             ctinfo == IP_CT_UNTRACKED)
241                 return NF_ACCEPT;
242
243         switch (skb->protocol) {
244         case htons(ETH_P_IP):
245                 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
246                         return NF_ACCEPT;
247
248                 len = ntohs(ip_hdr(skb)->tot_len);
249                 if (pskb_trim_rcsum(skb, len))
250                         return NF_ACCEPT;
251
252                 if (nf_ct_br_ip_check(skb))
253                         return NF_ACCEPT;
254
255                 bridge_state.pf = NFPROTO_IPV4;
256                 ret = nf_ct_br_defrag4(skb, &bridge_state);
257                 break;
258         case htons(ETH_P_IPV6):
259                 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
260                         return NF_ACCEPT;
261
262                 len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
263                 if (pskb_trim_rcsum(skb, len))
264                         return NF_ACCEPT;
265
266                 if (nf_ct_br_ipv6_check(skb))
267                         return NF_ACCEPT;
268
269                 bridge_state.pf = NFPROTO_IPV6;
270                 ret = nf_ct_br_defrag6(skb, &bridge_state);
271                 break;
272         default:
273                 nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
274                 return NF_ACCEPT;
275         }
276
277         if (ret != NF_ACCEPT)
278                 return ret;
279
280         return nf_conntrack_in(skb, &bridge_state);
281 }
282
283 static void nf_ct_bridge_frag_save(struct sk_buff *skb,
284                                    struct nf_bridge_frag_data *data)
285 {
286         if (skb_vlan_tag_present(skb)) {
287                 data->vlan_present = true;
288                 data->vlan_tci = skb->vlan_tci;
289                 data->vlan_proto = skb->vlan_proto;
290         } else {
291                 data->vlan_present = false;
292         }
293         skb_copy_from_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN);
294 }
295
296 static unsigned int
297 nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state,
298                     int (*output)(struct net *, struct sock *sk,
299                                   const struct nf_bridge_frag_data *data,
300                                   struct sk_buff *))
301 {
302         struct nf_bridge_frag_data data;
303
304         if (!BR_INPUT_SKB_CB(skb)->frag_max_size)
305                 return NF_ACCEPT;
306
307         nf_ct_bridge_frag_save(skb, &data);
308         switch (skb->protocol) {
309         case htons(ETH_P_IP):
310                 nf_br_ip_fragment(state->net, state->sk, skb, &data, output);
311                 break;
312         case htons(ETH_P_IPV6):
313                 nf_br_ip6_fragment(state->net, state->sk, skb, &data, output);
314                 break;
315         default:
316                 WARN_ON_ONCE(1);
317                 return NF_DROP;
318         }
319
320         return NF_STOLEN;
321 }
322
323 /* Actually only slow path refragmentation needs this. */
324 static int nf_ct_bridge_frag_restore(struct sk_buff *skb,
325                                      const struct nf_bridge_frag_data *data)
326 {
327         int err;
328
329         err = skb_cow_head(skb, ETH_HLEN);
330         if (err) {
331                 kfree_skb(skb);
332                 return -ENOMEM;
333         }
334         if (data->vlan_present)
335                 __vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci);
336         else if (skb_vlan_tag_present(skb))
337                 __vlan_hwaccel_clear_tag(skb);
338
339         skb_copy_to_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN);
340         skb_reset_mac_header(skb);
341
342         return 0;
343 }
344
345 static int nf_ct_bridge_refrag_post(struct net *net, struct sock *sk,
346                                     const struct nf_bridge_frag_data *data,
347                                     struct sk_buff *skb)
348 {
349         int err;
350
351         err = nf_ct_bridge_frag_restore(skb, data);
352         if (err < 0)
353                 return err;
354
355         return br_dev_queue_push_xmit(net, sk, skb);
356 }
357
358 static unsigned int nf_ct_bridge_confirm(struct sk_buff *skb)
359 {
360         enum ip_conntrack_info ctinfo;
361         struct nf_conn *ct;
362         int protoff;
363
364         ct = nf_ct_get(skb, &ctinfo);
365         if (!ct || ctinfo == IP_CT_RELATED_REPLY)
366                 return nf_conntrack_confirm(skb);
367
368         switch (skb->protocol) {
369         case htons(ETH_P_IP):
370                 protoff = skb_network_offset(skb) + ip_hdrlen(skb);
371                 break;
372         case htons(ETH_P_IPV6): {
373                  unsigned char pnum = ipv6_hdr(skb)->nexthdr;
374                 __be16 frag_off;
375
376                 protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
377                                            &frag_off);
378                 if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
379                         return nf_conntrack_confirm(skb);
380                 }
381                 break;
382         default:
383                 return NF_ACCEPT;
384         }
385         return nf_confirm(skb, protoff, ct, ctinfo);
386 }
387
388 static unsigned int nf_ct_bridge_post(void *priv, struct sk_buff *skb,
389                                       const struct nf_hook_state *state)
390 {
391         int ret;
392
393         ret = nf_ct_bridge_confirm(skb);
394         if (ret != NF_ACCEPT)
395                 return ret;
396
397         return nf_ct_bridge_refrag(skb, state, nf_ct_bridge_refrag_post);
398 }
399
400 static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = {
401         {
402                 .hook           = nf_ct_bridge_pre,
403                 .pf             = NFPROTO_BRIDGE,
404                 .hooknum        = NF_BR_PRE_ROUTING,
405                 .priority       = NF_IP_PRI_CONNTRACK,
406         },
407         {
408                 .hook           = nf_ct_bridge_post,
409                 .pf             = NFPROTO_BRIDGE,
410                 .hooknum        = NF_BR_POST_ROUTING,
411                 .priority       = NF_IP_PRI_CONNTRACK_CONFIRM,
412         },
413 };
414
415 static struct nf_ct_bridge_info bridge_info = {
416         .ops            = nf_ct_bridge_hook_ops,
417         .ops_size       = ARRAY_SIZE(nf_ct_bridge_hook_ops),
418         .me             = THIS_MODULE,
419 };
420
421 static int __init nf_conntrack_l3proto_bridge_init(void)
422 {
423         nf_ct_bridge_register(&bridge_info);
424
425         return 0;
426 }
427
428 static void __exit nf_conntrack_l3proto_bridge_fini(void)
429 {
430         nf_ct_bridge_unregister(&bridge_info);
431 }
432
433 module_init(nf_conntrack_l3proto_bridge_init);
434 module_exit(nf_conntrack_l3proto_bridge_fini);
435
436 MODULE_ALIAS("nf_conntrack-" __stringify(AF_BRIDGE));
437 MODULE_LICENSE("GPL");