Commit | Line | Data |
---|---|---|
5b497af4 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
3a0af8fd | 2 | /* Copyright (c) 2016 Thomas Graf <tgraf@tgraf.ch> |
3a0af8fd TG |
3 | */ |
4 | ||
b6459415 | 5 | #include <linux/filter.h> |
3a0af8fd TG |
6 | #include <linux/kernel.h> |
7 | #include <linux/module.h> | |
8 | #include <linux/skbuff.h> | |
9 | #include <linux/types.h> | |
10 | #include <linux/bpf.h> | |
11 | #include <net/lwtunnel.h> | |
ca78801a | 12 | #include <net/gre.h> |
3bd0b152 | 13 | #include <net/ip6_route.h> |
3616d08b | 14 | #include <net/ipv6_stubs.h> |
3a0af8fd TG |
15 | |
16 | struct bpf_lwt_prog { | |
17 | struct bpf_prog *prog; | |
18 | char *name; | |
19 | }; | |
20 | ||
21 | struct bpf_lwt { | |
22 | struct bpf_lwt_prog in; | |
23 | struct bpf_lwt_prog out; | |
24 | struct bpf_lwt_prog xmit; | |
25 | int family; | |
26 | }; | |
27 | ||
28 | #define MAX_PROG_NAME 256 | |
29 | ||
30 | static inline struct bpf_lwt *bpf_lwt_lwtunnel(struct lwtunnel_state *lwt) | |
31 | { | |
32 | return (struct bpf_lwt *)lwt->data; | |
33 | } | |
34 | ||
35 | #define NO_REDIRECT false | |
36 | #define CAN_REDIRECT true | |
37 | ||
38 | static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, | |
39 | struct dst_entry *dst, bool can_redirect) | |
40 | { | |
41 | int ret; | |
42 | ||
e3366884 | 43 | /* Migration disable and BH disable are needed to protect per-cpu |
d9054a1f | 44 | * redirect_info between BPF prog and skb_do_redirect(). |
3a0af8fd | 45 | */ |
e3366884 | 46 | migrate_disable(); |
d9054a1f | 47 | local_bh_disable(); |
6aaae2b6 | 48 | bpf_compute_data_pointers(skb); |
3a0af8fd | 49 | ret = bpf_prog_run_save_cb(lwt->prog, skb); |
3a0af8fd TG |
50 | |
51 | switch (ret) { | |
52 | case BPF_OK: | |
3bd0b152 | 53 | case BPF_LWT_REROUTE: |
3a0af8fd TG |
54 | break; |
55 | ||
56 | case BPF_REDIRECT: | |
57 | if (unlikely(!can_redirect)) { | |
58 | pr_warn_once("Illegal redirect return code in prog %s\n", | |
59 | lwt->name ? : "<unknown>"); | |
60 | ret = BPF_OK; | |
61 | } else { | |
e7c87bd6 | 62 | skb_reset_mac_header(skb); |
29b22bad YZ |
63 | skb_do_redirect(skb); |
64 | ret = BPF_REDIRECT; | |
3a0af8fd TG |
65 | } |
66 | break; | |
67 | ||
68 | case BPF_DROP: | |
69 | kfree_skb(skb); | |
70 | ret = -EPERM; | |
71 | break; | |
72 | ||
73 | default: | |
74 | pr_warn_once("bpf-lwt: Illegal return value %u, expect packet loss\n", ret); | |
75 | kfree_skb(skb); | |
76 | ret = -EINVAL; | |
77 | break; | |
78 | } | |
79 | ||
d9054a1f | 80 | local_bh_enable(); |
e3366884 | 81 | migrate_enable(); |
3a0af8fd TG |
82 | |
83 | return ret; | |
84 | } | |
85 | ||
3bd0b152 PO |
86 | static int bpf_lwt_input_reroute(struct sk_buff *skb) |
87 | { | |
88 | int err = -EINVAL; | |
89 | ||
90 | if (skb->protocol == htons(ETH_P_IP)) { | |
9e8acd9c | 91 | struct net_device *dev = skb_dst(skb)->dev; |
3bd0b152 PO |
92 | struct iphdr *iph = ip_hdr(skb); |
93 | ||
9e8acd9c JB |
94 | dev_hold(dev); |
95 | skb_dst_drop(skb); | |
3bd0b152 | 96 | err = ip_route_input_noref(skb, iph->daddr, iph->saddr, |
9e8acd9c JB |
97 | iph->tos, dev); |
98 | dev_put(dev); | |
3bd0b152 | 99 | } else if (skb->protocol == htons(ETH_P_IPV6)) { |
9e8acd9c | 100 | skb_dst_drop(skb); |
3bd0b152 PO |
101 | err = ipv6_stub->ipv6_route_input(skb); |
102 | } else { | |
103 | err = -EAFNOSUPPORT; | |
104 | } | |
105 | ||
106 | if (err) | |
107 | goto err; | |
108 | return dst_input(skb); | |
109 | ||
110 | err: | |
111 | kfree_skb(skb); | |
112 | return err; | |
113 | } | |
114 | ||
3a0af8fd TG |
115 | static int bpf_input(struct sk_buff *skb) |
116 | { | |
117 | struct dst_entry *dst = skb_dst(skb); | |
118 | struct bpf_lwt *bpf; | |
119 | int ret; | |
120 | ||
121 | bpf = bpf_lwt_lwtunnel(dst->lwtstate); | |
122 | if (bpf->in.prog) { | |
123 | ret = run_lwt_bpf(skb, &bpf->in, dst, NO_REDIRECT); | |
124 | if (ret < 0) | |
125 | return ret; | |
3bd0b152 PO |
126 | if (ret == BPF_LWT_REROUTE) |
127 | return bpf_lwt_input_reroute(skb); | |
3a0af8fd TG |
128 | } |
129 | ||
130 | if (unlikely(!dst->lwtstate->orig_input)) { | |
3a0af8fd TG |
131 | kfree_skb(skb); |
132 | return -EINVAL; | |
133 | } | |
134 | ||
135 | return dst->lwtstate->orig_input(skb); | |
136 | } | |
137 | ||
138 | static int bpf_output(struct net *net, struct sock *sk, struct sk_buff *skb) | |
139 | { | |
140 | struct dst_entry *dst = skb_dst(skb); | |
141 | struct bpf_lwt *bpf; | |
142 | int ret; | |
143 | ||
144 | bpf = bpf_lwt_lwtunnel(dst->lwtstate); | |
145 | if (bpf->out.prog) { | |
146 | ret = run_lwt_bpf(skb, &bpf->out, dst, NO_REDIRECT); | |
147 | if (ret < 0) | |
148 | return ret; | |
149 | } | |
150 | ||
151 | if (unlikely(!dst->lwtstate->orig_output)) { | |
152 | pr_warn_once("orig_output not set on dst for prog %s\n", | |
153 | bpf->out.name); | |
154 | kfree_skb(skb); | |
155 | return -EINVAL; | |
156 | } | |
157 | ||
158 | return dst->lwtstate->orig_output(net, sk, skb); | |
159 | } | |
160 | ||
b02d196c | 161 | static int xmit_check_hhlen(struct sk_buff *skb, int hh_len) |
3a0af8fd | 162 | { |
3a0af8fd TG |
163 | if (skb_headroom(skb) < hh_len) { |
164 | int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb)); | |
165 | ||
166 | if (pskb_expand_head(skb, nhead, 0, GFP_ATOMIC)) | |
167 | return -ENOMEM; | |
168 | } | |
169 | ||
170 | return 0; | |
171 | } | |
172 | ||
3bd0b152 PO |
173 | static int bpf_lwt_xmit_reroute(struct sk_buff *skb) |
174 | { | |
175 | struct net_device *l3mdev = l3mdev_master_dev_rcu(skb_dst(skb)->dev); | |
176 | int oif = l3mdev ? l3mdev->ifindex : 0; | |
177 | struct dst_entry *dst = NULL; | |
fb405883 | 178 | int err = -EAFNOSUPPORT; |
3bd0b152 PO |
179 | struct sock *sk; |
180 | struct net *net; | |
181 | bool ipv4; | |
3bd0b152 PO |
182 | |
183 | if (skb->protocol == htons(ETH_P_IP)) | |
184 | ipv4 = true; | |
185 | else if (skb->protocol == htons(ETH_P_IPV6)) | |
186 | ipv4 = false; | |
187 | else | |
fb405883 | 188 | goto err; |
3bd0b152 PO |
189 | |
190 | sk = sk_to_full_sk(skb->sk); | |
191 | if (sk) { | |
192 | if (sk->sk_bound_dev_if) | |
193 | oif = sk->sk_bound_dev_if; | |
194 | net = sock_net(sk); | |
195 | } else { | |
196 | net = dev_net(skb_dst(skb)->dev); | |
197 | } | |
198 | ||
199 | if (ipv4) { | |
200 | struct iphdr *iph = ip_hdr(skb); | |
201 | struct flowi4 fl4 = {}; | |
202 | struct rtable *rt; | |
203 | ||
204 | fl4.flowi4_oif = oif; | |
205 | fl4.flowi4_mark = skb->mark; | |
206 | fl4.flowi4_uid = sock_net_uid(net, sk); | |
207 | fl4.flowi4_tos = RT_TOS(iph->tos); | |
208 | fl4.flowi4_flags = FLOWI_FLAG_ANYSRC; | |
209 | fl4.flowi4_proto = iph->protocol; | |
210 | fl4.daddr = iph->daddr; | |
211 | fl4.saddr = iph->saddr; | |
212 | ||
213 | rt = ip_route_output_key(net, &fl4); | |
fb405883 PO |
214 | if (IS_ERR(rt)) { |
215 | err = PTR_ERR(rt); | |
216 | goto err; | |
217 | } | |
3bd0b152 PO |
218 | dst = &rt->dst; |
219 | } else { | |
220 | struct ipv6hdr *iph6 = ipv6_hdr(skb); | |
221 | struct flowi6 fl6 = {}; | |
222 | ||
223 | fl6.flowi6_oif = oif; | |
224 | fl6.flowi6_mark = skb->mark; | |
225 | fl6.flowi6_uid = sock_net_uid(net, sk); | |
226 | fl6.flowlabel = ip6_flowinfo(iph6); | |
227 | fl6.flowi6_proto = iph6->nexthdr; | |
228 | fl6.daddr = iph6->daddr; | |
229 | fl6.saddr = iph6->saddr; | |
230 | ||
6c8991f4 | 231 | dst = ipv6_stub->ipv6_dst_lookup_flow(net, skb->sk, &fl6, NULL); |
fb405883 PO |
232 | if (IS_ERR(dst)) { |
233 | err = PTR_ERR(dst); | |
234 | goto err; | |
235 | } | |
3bd0b152 PO |
236 | } |
237 | if (unlikely(dst->error)) { | |
fb405883 | 238 | err = dst->error; |
3bd0b152 | 239 | dst_release(dst); |
fb405883 | 240 | goto err; |
3bd0b152 PO |
241 | } |
242 | ||
243 | /* Although skb header was reserved in bpf_lwt_push_ip_encap(), it | |
244 | * was done for the previous dst, so we are doing it here again, in | |
245 | * case the new dst needs much more space. The call below is a noop | |
246 | * if there is enough header space in skb. | |
247 | */ | |
248 | err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); | |
249 | if (unlikely(err)) | |
fb405883 | 250 | goto err; |
3bd0b152 PO |
251 | |
252 | skb_dst_drop(skb); | |
253 | skb_dst_set(skb, dst); | |
254 | ||
255 | err = dst_output(dev_net(skb_dst(skb)->dev), skb->sk, skb); | |
256 | if (unlikely(err)) | |
29b22bad | 257 | return net_xmit_errno(err); |
3bd0b152 PO |
258 | |
259 | /* ip[6]_finish_output2 understand LWTUNNEL_XMIT_DONE */ | |
260 | return LWTUNNEL_XMIT_DONE; | |
fb405883 PO |
261 | |
262 | err: | |
263 | kfree_skb(skb); | |
264 | return err; | |
3bd0b152 PO |
265 | } |
266 | ||
3a0af8fd TG |
267 | static int bpf_xmit(struct sk_buff *skb) |
268 | { | |
269 | struct dst_entry *dst = skb_dst(skb); | |
270 | struct bpf_lwt *bpf; | |
271 | ||
272 | bpf = bpf_lwt_lwtunnel(dst->lwtstate); | |
273 | if (bpf->xmit.prog) { | |
b02d196c | 274 | int hh_len = dst->dev->hard_header_len; |
3bd0b152 | 275 | __be16 proto = skb->protocol; |
3a0af8fd TG |
276 | int ret; |
277 | ||
278 | ret = run_lwt_bpf(skb, &bpf->xmit, dst, CAN_REDIRECT); | |
279 | switch (ret) { | |
280 | case BPF_OK: | |
3bd0b152 PO |
281 | /* If the header changed, e.g. via bpf_lwt_push_encap, |
282 | * BPF_LWT_REROUTE below should have been used if the | |
283 | * protocol was also changed. | |
284 | */ | |
285 | if (skb->protocol != proto) { | |
286 | kfree_skb(skb); | |
287 | return -EINVAL; | |
288 | } | |
3a0af8fd TG |
289 | /* If the header was expanded, headroom might be too |
290 | * small for L2 header to come, expand as needed. | |
291 | */ | |
b02d196c | 292 | ret = xmit_check_hhlen(skb, hh_len); |
3a0af8fd TG |
293 | if (unlikely(ret)) |
294 | return ret; | |
295 | ||
296 | return LWTUNNEL_XMIT_CONTINUE; | |
297 | case BPF_REDIRECT: | |
298 | return LWTUNNEL_XMIT_DONE; | |
3bd0b152 PO |
299 | case BPF_LWT_REROUTE: |
300 | return bpf_lwt_xmit_reroute(skb); | |
3a0af8fd TG |
301 | default: |
302 | return ret; | |
303 | } | |
304 | } | |
305 | ||
306 | return LWTUNNEL_XMIT_CONTINUE; | |
307 | } | |
308 | ||
309 | static void bpf_lwt_prog_destroy(struct bpf_lwt_prog *prog) | |
310 | { | |
311 | if (prog->prog) | |
312 | bpf_prog_put(prog->prog); | |
313 | ||
314 | kfree(prog->name); | |
315 | } | |
316 | ||
317 | static void bpf_destroy_state(struct lwtunnel_state *lwt) | |
318 | { | |
319 | struct bpf_lwt *bpf = bpf_lwt_lwtunnel(lwt); | |
320 | ||
321 | bpf_lwt_prog_destroy(&bpf->in); | |
322 | bpf_lwt_prog_destroy(&bpf->out); | |
323 | bpf_lwt_prog_destroy(&bpf->xmit); | |
324 | } | |
325 | ||
326 | static const struct nla_policy bpf_prog_policy[LWT_BPF_PROG_MAX + 1] = { | |
327 | [LWT_BPF_PROG_FD] = { .type = NLA_U32, }, | |
328 | [LWT_BPF_PROG_NAME] = { .type = NLA_NUL_STRING, | |
329 | .len = MAX_PROG_NAME }, | |
330 | }; | |
331 | ||
332 | static int bpf_parse_prog(struct nlattr *attr, struct bpf_lwt_prog *prog, | |
333 | enum bpf_prog_type type) | |
334 | { | |
335 | struct nlattr *tb[LWT_BPF_PROG_MAX + 1]; | |
336 | struct bpf_prog *p; | |
337 | int ret; | |
338 | u32 fd; | |
339 | ||
8cb08174 JB |
340 | ret = nla_parse_nested_deprecated(tb, LWT_BPF_PROG_MAX, attr, |
341 | bpf_prog_policy, NULL); | |
3a0af8fd TG |
342 | if (ret < 0) |
343 | return ret; | |
344 | ||
345 | if (!tb[LWT_BPF_PROG_FD] || !tb[LWT_BPF_PROG_NAME]) | |
346 | return -EINVAL; | |
347 | ||
71eb5255 | 348 | prog->name = nla_memdup(tb[LWT_BPF_PROG_NAME], GFP_ATOMIC); |
3a0af8fd TG |
349 | if (!prog->name) |
350 | return -ENOMEM; | |
351 | ||
352 | fd = nla_get_u32(tb[LWT_BPF_PROG_FD]); | |
353 | p = bpf_prog_get_type(fd, type); | |
354 | if (IS_ERR(p)) | |
355 | return PTR_ERR(p); | |
356 | ||
357 | prog->prog = p; | |
358 | ||
359 | return 0; | |
360 | } | |
361 | ||
362 | static const struct nla_policy bpf_nl_policy[LWT_BPF_MAX + 1] = { | |
363 | [LWT_BPF_IN] = { .type = NLA_NESTED, }, | |
364 | [LWT_BPF_OUT] = { .type = NLA_NESTED, }, | |
365 | [LWT_BPF_XMIT] = { .type = NLA_NESTED, }, | |
366 | [LWT_BPF_XMIT_HEADROOM] = { .type = NLA_U32 }, | |
367 | }; | |
368 | ||
faee6769 | 369 | static int bpf_build_state(struct net *net, struct nlattr *nla, |
3a0af8fd | 370 | unsigned int family, const void *cfg, |
9ae28727 DA |
371 | struct lwtunnel_state **ts, |
372 | struct netlink_ext_ack *extack) | |
3a0af8fd TG |
373 | { |
374 | struct nlattr *tb[LWT_BPF_MAX + 1]; | |
375 | struct lwtunnel_state *newts; | |
376 | struct bpf_lwt *bpf; | |
377 | int ret; | |
378 | ||
379 | if (family != AF_INET && family != AF_INET6) | |
380 | return -EAFNOSUPPORT; | |
381 | ||
8cb08174 JB |
382 | ret = nla_parse_nested_deprecated(tb, LWT_BPF_MAX, nla, bpf_nl_policy, |
383 | extack); | |
3a0af8fd TG |
384 | if (ret < 0) |
385 | return ret; | |
386 | ||
387 | if (!tb[LWT_BPF_IN] && !tb[LWT_BPF_OUT] && !tb[LWT_BPF_XMIT]) | |
388 | return -EINVAL; | |
389 | ||
390 | newts = lwtunnel_state_alloc(sizeof(*bpf)); | |
391 | if (!newts) | |
392 | return -ENOMEM; | |
393 | ||
394 | newts->type = LWTUNNEL_ENCAP_BPF; | |
395 | bpf = bpf_lwt_lwtunnel(newts); | |
396 | ||
397 | if (tb[LWT_BPF_IN]) { | |
398 | newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT; | |
399 | ret = bpf_parse_prog(tb[LWT_BPF_IN], &bpf->in, | |
400 | BPF_PROG_TYPE_LWT_IN); | |
401 | if (ret < 0) | |
402 | goto errout; | |
403 | } | |
404 | ||
405 | if (tb[LWT_BPF_OUT]) { | |
406 | newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; | |
407 | ret = bpf_parse_prog(tb[LWT_BPF_OUT], &bpf->out, | |
408 | BPF_PROG_TYPE_LWT_OUT); | |
409 | if (ret < 0) | |
410 | goto errout; | |
411 | } | |
412 | ||
413 | if (tb[LWT_BPF_XMIT]) { | |
414 | newts->flags |= LWTUNNEL_STATE_XMIT_REDIRECT; | |
415 | ret = bpf_parse_prog(tb[LWT_BPF_XMIT], &bpf->xmit, | |
416 | BPF_PROG_TYPE_LWT_XMIT); | |
417 | if (ret < 0) | |
418 | goto errout; | |
419 | } | |
420 | ||
421 | if (tb[LWT_BPF_XMIT_HEADROOM]) { | |
422 | u32 headroom = nla_get_u32(tb[LWT_BPF_XMIT_HEADROOM]); | |
423 | ||
424 | if (headroom > LWT_BPF_MAX_HEADROOM) { | |
425 | ret = -ERANGE; | |
426 | goto errout; | |
427 | } | |
428 | ||
429 | newts->headroom = headroom; | |
430 | } | |
431 | ||
432 | bpf->family = family; | |
433 | *ts = newts; | |
434 | ||
435 | return 0; | |
436 | ||
437 | errout: | |
438 | bpf_destroy_state(newts); | |
439 | kfree(newts); | |
440 | return ret; | |
441 | } | |
442 | ||
443 | static int bpf_fill_lwt_prog(struct sk_buff *skb, int attr, | |
444 | struct bpf_lwt_prog *prog) | |
445 | { | |
446 | struct nlattr *nest; | |
447 | ||
448 | if (!prog->prog) | |
449 | return 0; | |
450 | ||
ae0be8de | 451 | nest = nla_nest_start_noflag(skb, attr); |
3a0af8fd TG |
452 | if (!nest) |
453 | return -EMSGSIZE; | |
454 | ||
455 | if (prog->name && | |
456 | nla_put_string(skb, LWT_BPF_PROG_NAME, prog->name)) | |
457 | return -EMSGSIZE; | |
458 | ||
459 | return nla_nest_end(skb, nest); | |
460 | } | |
461 | ||
462 | static int bpf_fill_encap_info(struct sk_buff *skb, struct lwtunnel_state *lwt) | |
463 | { | |
464 | struct bpf_lwt *bpf = bpf_lwt_lwtunnel(lwt); | |
465 | ||
466 | if (bpf_fill_lwt_prog(skb, LWT_BPF_IN, &bpf->in) < 0 || | |
467 | bpf_fill_lwt_prog(skb, LWT_BPF_OUT, &bpf->out) < 0 || | |
468 | bpf_fill_lwt_prog(skb, LWT_BPF_XMIT, &bpf->xmit) < 0) | |
469 | return -EMSGSIZE; | |
470 | ||
471 | return 0; | |
472 | } | |
473 | ||
474 | static int bpf_encap_nlsize(struct lwtunnel_state *lwtstate) | |
475 | { | |
476 | int nest_len = nla_total_size(sizeof(struct nlattr)) + | |
477 | nla_total_size(MAX_PROG_NAME) + /* LWT_BPF_PROG_NAME */ | |
478 | 0; | |
479 | ||
480 | return nest_len + /* LWT_BPF_IN */ | |
481 | nest_len + /* LWT_BPF_OUT */ | |
482 | nest_len + /* LWT_BPF_XMIT */ | |
483 | 0; | |
484 | } | |
485 | ||
79471b10 | 486 | static int bpf_lwt_prog_cmp(struct bpf_lwt_prog *a, struct bpf_lwt_prog *b) |
3a0af8fd TG |
487 | { |
488 | /* FIXME: | |
489 | * The LWT state is currently rebuilt for delete requests which | |
490 | * results in a new bpf_prog instance. Comparing names for now. | |
491 | */ | |
492 | if (!a->name && !b->name) | |
493 | return 0; | |
494 | ||
495 | if (!a->name || !b->name) | |
496 | return 1; | |
497 | ||
498 | return strcmp(a->name, b->name); | |
499 | } | |
500 | ||
501 | static int bpf_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) | |
502 | { | |
503 | struct bpf_lwt *a_bpf = bpf_lwt_lwtunnel(a); | |
504 | struct bpf_lwt *b_bpf = bpf_lwt_lwtunnel(b); | |
505 | ||
506 | return bpf_lwt_prog_cmp(&a_bpf->in, &b_bpf->in) || | |
507 | bpf_lwt_prog_cmp(&a_bpf->out, &b_bpf->out) || | |
508 | bpf_lwt_prog_cmp(&a_bpf->xmit, &b_bpf->xmit); | |
509 | } | |
510 | ||
511 | static const struct lwtunnel_encap_ops bpf_encap_ops = { | |
512 | .build_state = bpf_build_state, | |
513 | .destroy_state = bpf_destroy_state, | |
514 | .input = bpf_input, | |
515 | .output = bpf_output, | |
516 | .xmit = bpf_xmit, | |
517 | .fill_encap = bpf_fill_encap_info, | |
518 | .get_encap_size = bpf_encap_nlsize, | |
519 | .cmp_encap = bpf_encap_cmp, | |
88ff7334 | 520 | .owner = THIS_MODULE, |
3a0af8fd TG |
521 | }; |
522 | ||
ca78801a PO |
523 | static int handle_gso_type(struct sk_buff *skb, unsigned int gso_type, |
524 | int encap_len) | |
525 | { | |
526 | struct skb_shared_info *shinfo = skb_shinfo(skb); | |
527 | ||
528 | gso_type |= SKB_GSO_DODGY; | |
529 | shinfo->gso_type |= gso_type; | |
530 | skb_decrease_gso_size(shinfo, encap_len); | |
531 | shinfo->gso_segs = 0; | |
532 | return 0; | |
533 | } | |
534 | ||
52f27877 PO |
535 | static int handle_gso_encap(struct sk_buff *skb, bool ipv4, int encap_len) |
536 | { | |
ca78801a PO |
537 | int next_hdr_offset; |
538 | void *next_hdr; | |
539 | __u8 protocol; | |
540 | ||
541 | /* SCTP and UDP_L4 gso need more nuanced handling than what | |
542 | * handle_gso_type() does above: skb_decrease_gso_size() is not enough. | |
543 | * So at the moment only TCP GSO packets are let through. | |
544 | */ | |
545 | if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) | |
546 | return -ENOTSUPP; | |
547 | ||
548 | if (ipv4) { | |
549 | protocol = ip_hdr(skb)->protocol; | |
550 | next_hdr_offset = sizeof(struct iphdr); | |
551 | next_hdr = skb_network_header(skb) + next_hdr_offset; | |
552 | } else { | |
553 | protocol = ipv6_hdr(skb)->nexthdr; | |
554 | next_hdr_offset = sizeof(struct ipv6hdr); | |
555 | next_hdr = skb_network_header(skb) + next_hdr_offset; | |
556 | } | |
557 | ||
558 | switch (protocol) { | |
559 | case IPPROTO_GRE: | |
560 | next_hdr_offset += sizeof(struct gre_base_hdr); | |
561 | if (next_hdr_offset > encap_len) | |
562 | return -EINVAL; | |
563 | ||
564 | if (((struct gre_base_hdr *)next_hdr)->flags & GRE_CSUM) | |
565 | return handle_gso_type(skb, SKB_GSO_GRE_CSUM, | |
566 | encap_len); | |
567 | return handle_gso_type(skb, SKB_GSO_GRE, encap_len); | |
568 | ||
569 | case IPPROTO_UDP: | |
570 | next_hdr_offset += sizeof(struct udphdr); | |
571 | if (next_hdr_offset > encap_len) | |
572 | return -EINVAL; | |
573 | ||
574 | if (((struct udphdr *)next_hdr)->check) | |
575 | return handle_gso_type(skb, SKB_GSO_UDP_TUNNEL_CSUM, | |
576 | encap_len); | |
577 | return handle_gso_type(skb, SKB_GSO_UDP_TUNNEL, encap_len); | |
578 | ||
579 | case IPPROTO_IP: | |
580 | case IPPROTO_IPV6: | |
581 | if (ipv4) | |
582 | return handle_gso_type(skb, SKB_GSO_IPXIP4, encap_len); | |
583 | else | |
584 | return handle_gso_type(skb, SKB_GSO_IPXIP6, encap_len); | |
585 | ||
586 | default: | |
587 | return -EPROTONOSUPPORT; | |
588 | } | |
52f27877 PO |
589 | } |
590 | ||
591 | int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, bool ingress) | |
592 | { | |
593 | struct iphdr *iph; | |
594 | bool ipv4; | |
595 | int err; | |
596 | ||
597 | if (unlikely(len < sizeof(struct iphdr) || len > LWT_BPF_MAX_HEADROOM)) | |
598 | return -EINVAL; | |
599 | ||
600 | /* validate protocol and length */ | |
601 | iph = (struct iphdr *)hdr; | |
602 | if (iph->version == 4) { | |
603 | ipv4 = true; | |
604 | if (unlikely(len < iph->ihl * 4)) | |
605 | return -EINVAL; | |
606 | } else if (iph->version == 6) { | |
607 | ipv4 = false; | |
608 | if (unlikely(len < sizeof(struct ipv6hdr))) | |
609 | return -EINVAL; | |
610 | } else { | |
611 | return -EINVAL; | |
612 | } | |
613 | ||
614 | if (ingress) | |
615 | err = skb_cow_head(skb, len + skb->mac_len); | |
616 | else | |
617 | err = skb_cow_head(skb, | |
618 | len + LL_RESERVED_SPACE(skb_dst(skb)->dev)); | |
619 | if (unlikely(err)) | |
620 | return err; | |
621 | ||
622 | /* push the encap headers and fix pointers */ | |
623 | skb_reset_inner_headers(skb); | |
ea0371f7 PO |
624 | skb_reset_inner_mac_header(skb); /* mac header is not yet set */ |
625 | skb_set_inner_protocol(skb, skb->protocol); | |
52f27877 PO |
626 | skb->encapsulation = 1; |
627 | skb_push(skb, len); | |
628 | if (ingress) | |
629 | skb_postpush_rcsum(skb, iph, len); | |
630 | skb_reset_network_header(skb); | |
631 | memcpy(skb_network_header(skb), hdr, len); | |
632 | bpf_compute_data_pointers(skb); | |
633 | skb_clear_hash(skb); | |
634 | ||
635 | if (ipv4) { | |
636 | skb->protocol = htons(ETH_P_IP); | |
637 | iph = ip_hdr(skb); | |
638 | ||
639 | if (!iph->check) | |
640 | iph->check = ip_fast_csum((unsigned char *)iph, | |
641 | iph->ihl); | |
642 | } else { | |
643 | skb->protocol = htons(ETH_P_IPV6); | |
644 | } | |
645 | ||
646 | if (skb_is_gso(skb)) | |
647 | return handle_gso_encap(skb, ipv4, len); | |
648 | ||
649 | return 0; | |
650 | } | |
651 | ||
3a0af8fd TG |
652 | static int __init bpf_lwt_init(void) |
653 | { | |
654 | return lwtunnel_encap_add_ops(&bpf_encap_ops, LWTUNNEL_ENCAP_BPF); | |
655 | } | |
656 | ||
657 | subsys_initcall(bpf_lwt_init) |