Commit | Line | Data |
---|---|---|
5b497af4 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
3a0af8fd | 2 | /* Copyright (c) 2016 Thomas Graf <tgraf@tgraf.ch> |
3a0af8fd TG |
3 | */ |
4 | ||
5 | #include <linux/kernel.h> | |
6 | #include <linux/module.h> | |
7 | #include <linux/skbuff.h> | |
8 | #include <linux/types.h> | |
9 | #include <linux/bpf.h> | |
10 | #include <net/lwtunnel.h> | |
ca78801a | 11 | #include <net/gre.h> |
3bd0b152 | 12 | #include <net/ip6_route.h> |
3616d08b | 13 | #include <net/ipv6_stubs.h> |
3a0af8fd TG |
14 | |
15 | struct bpf_lwt_prog { | |
16 | struct bpf_prog *prog; | |
17 | char *name; | |
18 | }; | |
19 | ||
20 | struct bpf_lwt { | |
21 | struct bpf_lwt_prog in; | |
22 | struct bpf_lwt_prog out; | |
23 | struct bpf_lwt_prog xmit; | |
24 | int family; | |
25 | }; | |
26 | ||
27 | #define MAX_PROG_NAME 256 | |
28 | ||
29 | static inline struct bpf_lwt *bpf_lwt_lwtunnel(struct lwtunnel_state *lwt) | |
30 | { | |
31 | return (struct bpf_lwt *)lwt->data; | |
32 | } | |
33 | ||
34 | #define NO_REDIRECT false | |
35 | #define CAN_REDIRECT true | |
36 | ||
37 | static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, | |
38 | struct dst_entry *dst, bool can_redirect) | |
39 | { | |
40 | int ret; | |
41 | ||
42 | /* Preempt disable is needed to protect per-cpu redirect_info between | |
43 | * BPF prog and skb_do_redirect(). The call_rcu in bpf_prog_put() and | |
44 | * access to maps strictly require a rcu_read_lock() for protection, | |
45 | * mixing with BH RCU lock doesn't work. | |
46 | */ | |
47 | preempt_disable(); | |
6aaae2b6 | 48 | bpf_compute_data_pointers(skb); |
3a0af8fd | 49 | ret = bpf_prog_run_save_cb(lwt->prog, skb); |
3a0af8fd TG |
50 | |
51 | switch (ret) { | |
52 | case BPF_OK: | |
3bd0b152 | 53 | case BPF_LWT_REROUTE: |
3a0af8fd TG |
54 | break; |
55 | ||
56 | case BPF_REDIRECT: | |
57 | if (unlikely(!can_redirect)) { | |
58 | pr_warn_once("Illegal redirect return code in prog %s\n", | |
59 | lwt->name ? : "<unknown>"); | |
60 | ret = BPF_OK; | |
61 | } else { | |
e7c87bd6 | 62 | skb_reset_mac_header(skb); |
3a0af8fd TG |
63 | ret = skb_do_redirect(skb); |
64 | if (ret == 0) | |
65 | ret = BPF_REDIRECT; | |
66 | } | |
67 | break; | |
68 | ||
69 | case BPF_DROP: | |
70 | kfree_skb(skb); | |
71 | ret = -EPERM; | |
72 | break; | |
73 | ||
74 | default: | |
75 | pr_warn_once("bpf-lwt: Illegal return value %u, expect packet loss\n", ret); | |
76 | kfree_skb(skb); | |
77 | ret = -EINVAL; | |
78 | break; | |
79 | } | |
80 | ||
81 | preempt_enable(); | |
82 | ||
83 | return ret; | |
84 | } | |
85 | ||
3bd0b152 PO |
86 | static int bpf_lwt_input_reroute(struct sk_buff *skb) |
87 | { | |
88 | int err = -EINVAL; | |
89 | ||
90 | if (skb->protocol == htons(ETH_P_IP)) { | |
91 | struct iphdr *iph = ip_hdr(skb); | |
92 | ||
93 | err = ip_route_input_noref(skb, iph->daddr, iph->saddr, | |
94 | iph->tos, skb_dst(skb)->dev); | |
95 | } else if (skb->protocol == htons(ETH_P_IPV6)) { | |
96 | err = ipv6_stub->ipv6_route_input(skb); | |
97 | } else { | |
98 | err = -EAFNOSUPPORT; | |
99 | } | |
100 | ||
101 | if (err) | |
102 | goto err; | |
103 | return dst_input(skb); | |
104 | ||
105 | err: | |
106 | kfree_skb(skb); | |
107 | return err; | |
108 | } | |
109 | ||
3a0af8fd TG |
110 | static int bpf_input(struct sk_buff *skb) |
111 | { | |
112 | struct dst_entry *dst = skb_dst(skb); | |
113 | struct bpf_lwt *bpf; | |
114 | int ret; | |
115 | ||
116 | bpf = bpf_lwt_lwtunnel(dst->lwtstate); | |
117 | if (bpf->in.prog) { | |
118 | ret = run_lwt_bpf(skb, &bpf->in, dst, NO_REDIRECT); | |
119 | if (ret < 0) | |
120 | return ret; | |
3bd0b152 PO |
121 | if (ret == BPF_LWT_REROUTE) |
122 | return bpf_lwt_input_reroute(skb); | |
3a0af8fd TG |
123 | } |
124 | ||
125 | if (unlikely(!dst->lwtstate->orig_input)) { | |
3a0af8fd TG |
126 | kfree_skb(skb); |
127 | return -EINVAL; | |
128 | } | |
129 | ||
130 | return dst->lwtstate->orig_input(skb); | |
131 | } | |
132 | ||
133 | static int bpf_output(struct net *net, struct sock *sk, struct sk_buff *skb) | |
134 | { | |
135 | struct dst_entry *dst = skb_dst(skb); | |
136 | struct bpf_lwt *bpf; | |
137 | int ret; | |
138 | ||
139 | bpf = bpf_lwt_lwtunnel(dst->lwtstate); | |
140 | if (bpf->out.prog) { | |
141 | ret = run_lwt_bpf(skb, &bpf->out, dst, NO_REDIRECT); | |
142 | if (ret < 0) | |
143 | return ret; | |
144 | } | |
145 | ||
146 | if (unlikely(!dst->lwtstate->orig_output)) { | |
147 | pr_warn_once("orig_output not set on dst for prog %s\n", | |
148 | bpf->out.name); | |
149 | kfree_skb(skb); | |
150 | return -EINVAL; | |
151 | } | |
152 | ||
153 | return dst->lwtstate->orig_output(net, sk, skb); | |
154 | } | |
155 | ||
156 | static int xmit_check_hhlen(struct sk_buff *skb) | |
157 | { | |
158 | int hh_len = skb_dst(skb)->dev->hard_header_len; | |
159 | ||
160 | if (skb_headroom(skb) < hh_len) { | |
161 | int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb)); | |
162 | ||
163 | if (pskb_expand_head(skb, nhead, 0, GFP_ATOMIC)) | |
164 | return -ENOMEM; | |
165 | } | |
166 | ||
167 | return 0; | |
168 | } | |
169 | ||
3bd0b152 PO |
170 | static int bpf_lwt_xmit_reroute(struct sk_buff *skb) |
171 | { | |
172 | struct net_device *l3mdev = l3mdev_master_dev_rcu(skb_dst(skb)->dev); | |
173 | int oif = l3mdev ? l3mdev->ifindex : 0; | |
174 | struct dst_entry *dst = NULL; | |
fb405883 | 175 | int err = -EAFNOSUPPORT; |
3bd0b152 PO |
176 | struct sock *sk; |
177 | struct net *net; | |
178 | bool ipv4; | |
3bd0b152 PO |
179 | |
180 | if (skb->protocol == htons(ETH_P_IP)) | |
181 | ipv4 = true; | |
182 | else if (skb->protocol == htons(ETH_P_IPV6)) | |
183 | ipv4 = false; | |
184 | else | |
fb405883 | 185 | goto err; |
3bd0b152 PO |
186 | |
187 | sk = sk_to_full_sk(skb->sk); | |
188 | if (sk) { | |
189 | if (sk->sk_bound_dev_if) | |
190 | oif = sk->sk_bound_dev_if; | |
191 | net = sock_net(sk); | |
192 | } else { | |
193 | net = dev_net(skb_dst(skb)->dev); | |
194 | } | |
195 | ||
196 | if (ipv4) { | |
197 | struct iphdr *iph = ip_hdr(skb); | |
198 | struct flowi4 fl4 = {}; | |
199 | struct rtable *rt; | |
200 | ||
201 | fl4.flowi4_oif = oif; | |
202 | fl4.flowi4_mark = skb->mark; | |
203 | fl4.flowi4_uid = sock_net_uid(net, sk); | |
204 | fl4.flowi4_tos = RT_TOS(iph->tos); | |
205 | fl4.flowi4_flags = FLOWI_FLAG_ANYSRC; | |
206 | fl4.flowi4_proto = iph->protocol; | |
207 | fl4.daddr = iph->daddr; | |
208 | fl4.saddr = iph->saddr; | |
209 | ||
210 | rt = ip_route_output_key(net, &fl4); | |
fb405883 PO |
211 | if (IS_ERR(rt)) { |
212 | err = PTR_ERR(rt); | |
213 | goto err; | |
214 | } | |
3bd0b152 PO |
215 | dst = &rt->dst; |
216 | } else { | |
217 | struct ipv6hdr *iph6 = ipv6_hdr(skb); | |
218 | struct flowi6 fl6 = {}; | |
219 | ||
220 | fl6.flowi6_oif = oif; | |
221 | fl6.flowi6_mark = skb->mark; | |
222 | fl6.flowi6_uid = sock_net_uid(net, sk); | |
223 | fl6.flowlabel = ip6_flowinfo(iph6); | |
224 | fl6.flowi6_proto = iph6->nexthdr; | |
225 | fl6.daddr = iph6->daddr; | |
226 | fl6.saddr = iph6->saddr; | |
227 | ||
228 | err = ipv6_stub->ipv6_dst_lookup(net, skb->sk, &dst, &fl6); | |
fb405883 PO |
229 | if (unlikely(err)) |
230 | goto err; | |
231 | if (IS_ERR(dst)) { | |
232 | err = PTR_ERR(dst); | |
233 | goto err; | |
234 | } | |
3bd0b152 PO |
235 | } |
236 | if (unlikely(dst->error)) { | |
fb405883 | 237 | err = dst->error; |
3bd0b152 | 238 | dst_release(dst); |
fb405883 | 239 | goto err; |
3bd0b152 PO |
240 | } |
241 | ||
242 | /* Although skb header was reserved in bpf_lwt_push_ip_encap(), it | |
243 | * was done for the previous dst, so we are doing it here again, in | |
244 | * case the new dst needs much more space. The call below is a noop | |
245 | * if there is enough header space in skb. | |
246 | */ | |
247 | err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); | |
248 | if (unlikely(err)) | |
fb405883 | 249 | goto err; |
3bd0b152 PO |
250 | |
251 | skb_dst_drop(skb); | |
252 | skb_dst_set(skb, dst); | |
253 | ||
254 | err = dst_output(dev_net(skb_dst(skb)->dev), skb->sk, skb); | |
255 | if (unlikely(err)) | |
bd16693f | 256 | return err; |
3bd0b152 PO |
257 | |
258 | /* ip[6]_finish_output2 understand LWTUNNEL_XMIT_DONE */ | |
259 | return LWTUNNEL_XMIT_DONE; | |
fb405883 PO |
260 | |
261 | err: | |
262 | kfree_skb(skb); | |
263 | return err; | |
3bd0b152 PO |
264 | } |
265 | ||
3a0af8fd TG |
266 | static int bpf_xmit(struct sk_buff *skb) |
267 | { | |
268 | struct dst_entry *dst = skb_dst(skb); | |
269 | struct bpf_lwt *bpf; | |
270 | ||
271 | bpf = bpf_lwt_lwtunnel(dst->lwtstate); | |
272 | if (bpf->xmit.prog) { | |
3bd0b152 | 273 | __be16 proto = skb->protocol; |
3a0af8fd TG |
274 | int ret; |
275 | ||
276 | ret = run_lwt_bpf(skb, &bpf->xmit, dst, CAN_REDIRECT); | |
277 | switch (ret) { | |
278 | case BPF_OK: | |
3bd0b152 PO |
279 | /* If the header changed, e.g. via bpf_lwt_push_encap, |
280 | * BPF_LWT_REROUTE below should have been used if the | |
281 | * protocol was also changed. | |
282 | */ | |
283 | if (skb->protocol != proto) { | |
284 | kfree_skb(skb); | |
285 | return -EINVAL; | |
286 | } | |
3a0af8fd TG |
287 | /* If the header was expanded, headroom might be too |
288 | * small for L2 header to come, expand as needed. | |
289 | */ | |
290 | ret = xmit_check_hhlen(skb); | |
291 | if (unlikely(ret)) | |
292 | return ret; | |
293 | ||
294 | return LWTUNNEL_XMIT_CONTINUE; | |
295 | case BPF_REDIRECT: | |
296 | return LWTUNNEL_XMIT_DONE; | |
3bd0b152 PO |
297 | case BPF_LWT_REROUTE: |
298 | return bpf_lwt_xmit_reroute(skb); | |
3a0af8fd TG |
299 | default: |
300 | return ret; | |
301 | } | |
302 | } | |
303 | ||
304 | return LWTUNNEL_XMIT_CONTINUE; | |
305 | } | |
306 | ||
307 | static void bpf_lwt_prog_destroy(struct bpf_lwt_prog *prog) | |
308 | { | |
309 | if (prog->prog) | |
310 | bpf_prog_put(prog->prog); | |
311 | ||
312 | kfree(prog->name); | |
313 | } | |
314 | ||
315 | static void bpf_destroy_state(struct lwtunnel_state *lwt) | |
316 | { | |
317 | struct bpf_lwt *bpf = bpf_lwt_lwtunnel(lwt); | |
318 | ||
319 | bpf_lwt_prog_destroy(&bpf->in); | |
320 | bpf_lwt_prog_destroy(&bpf->out); | |
321 | bpf_lwt_prog_destroy(&bpf->xmit); | |
322 | } | |
323 | ||
324 | static const struct nla_policy bpf_prog_policy[LWT_BPF_PROG_MAX + 1] = { | |
325 | [LWT_BPF_PROG_FD] = { .type = NLA_U32, }, | |
326 | [LWT_BPF_PROG_NAME] = { .type = NLA_NUL_STRING, | |
327 | .len = MAX_PROG_NAME }, | |
328 | }; | |
329 | ||
330 | static int bpf_parse_prog(struct nlattr *attr, struct bpf_lwt_prog *prog, | |
331 | enum bpf_prog_type type) | |
332 | { | |
333 | struct nlattr *tb[LWT_BPF_PROG_MAX + 1]; | |
334 | struct bpf_prog *p; | |
335 | int ret; | |
336 | u32 fd; | |
337 | ||
8cb08174 JB |
338 | ret = nla_parse_nested_deprecated(tb, LWT_BPF_PROG_MAX, attr, |
339 | bpf_prog_policy, NULL); | |
3a0af8fd TG |
340 | if (ret < 0) |
341 | return ret; | |
342 | ||
343 | if (!tb[LWT_BPF_PROG_FD] || !tb[LWT_BPF_PROG_NAME]) | |
344 | return -EINVAL; | |
345 | ||
71eb5255 | 346 | prog->name = nla_memdup(tb[LWT_BPF_PROG_NAME], GFP_ATOMIC); |
3a0af8fd TG |
347 | if (!prog->name) |
348 | return -ENOMEM; | |
349 | ||
350 | fd = nla_get_u32(tb[LWT_BPF_PROG_FD]); | |
351 | p = bpf_prog_get_type(fd, type); | |
352 | if (IS_ERR(p)) | |
353 | return PTR_ERR(p); | |
354 | ||
355 | prog->prog = p; | |
356 | ||
357 | return 0; | |
358 | } | |
359 | ||
360 | static const struct nla_policy bpf_nl_policy[LWT_BPF_MAX + 1] = { | |
361 | [LWT_BPF_IN] = { .type = NLA_NESTED, }, | |
362 | [LWT_BPF_OUT] = { .type = NLA_NESTED, }, | |
363 | [LWT_BPF_XMIT] = { .type = NLA_NESTED, }, | |
364 | [LWT_BPF_XMIT_HEADROOM] = { .type = NLA_U32 }, | |
365 | }; | |
366 | ||
30357d7d | 367 | static int bpf_build_state(struct nlattr *nla, |
3a0af8fd | 368 | unsigned int family, const void *cfg, |
9ae28727 DA |
369 | struct lwtunnel_state **ts, |
370 | struct netlink_ext_ack *extack) | |
3a0af8fd TG |
371 | { |
372 | struct nlattr *tb[LWT_BPF_MAX + 1]; | |
373 | struct lwtunnel_state *newts; | |
374 | struct bpf_lwt *bpf; | |
375 | int ret; | |
376 | ||
377 | if (family != AF_INET && family != AF_INET6) | |
378 | return -EAFNOSUPPORT; | |
379 | ||
8cb08174 JB |
380 | ret = nla_parse_nested_deprecated(tb, LWT_BPF_MAX, nla, bpf_nl_policy, |
381 | extack); | |
3a0af8fd TG |
382 | if (ret < 0) |
383 | return ret; | |
384 | ||
385 | if (!tb[LWT_BPF_IN] && !tb[LWT_BPF_OUT] && !tb[LWT_BPF_XMIT]) | |
386 | return -EINVAL; | |
387 | ||
388 | newts = lwtunnel_state_alloc(sizeof(*bpf)); | |
389 | if (!newts) | |
390 | return -ENOMEM; | |
391 | ||
392 | newts->type = LWTUNNEL_ENCAP_BPF; | |
393 | bpf = bpf_lwt_lwtunnel(newts); | |
394 | ||
395 | if (tb[LWT_BPF_IN]) { | |
396 | newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT; | |
397 | ret = bpf_parse_prog(tb[LWT_BPF_IN], &bpf->in, | |
398 | BPF_PROG_TYPE_LWT_IN); | |
399 | if (ret < 0) | |
400 | goto errout; | |
401 | } | |
402 | ||
403 | if (tb[LWT_BPF_OUT]) { | |
404 | newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; | |
405 | ret = bpf_parse_prog(tb[LWT_BPF_OUT], &bpf->out, | |
406 | BPF_PROG_TYPE_LWT_OUT); | |
407 | if (ret < 0) | |
408 | goto errout; | |
409 | } | |
410 | ||
411 | if (tb[LWT_BPF_XMIT]) { | |
412 | newts->flags |= LWTUNNEL_STATE_XMIT_REDIRECT; | |
413 | ret = bpf_parse_prog(tb[LWT_BPF_XMIT], &bpf->xmit, | |
414 | BPF_PROG_TYPE_LWT_XMIT); | |
415 | if (ret < 0) | |
416 | goto errout; | |
417 | } | |
418 | ||
419 | if (tb[LWT_BPF_XMIT_HEADROOM]) { | |
420 | u32 headroom = nla_get_u32(tb[LWT_BPF_XMIT_HEADROOM]); | |
421 | ||
422 | if (headroom > LWT_BPF_MAX_HEADROOM) { | |
423 | ret = -ERANGE; | |
424 | goto errout; | |
425 | } | |
426 | ||
427 | newts->headroom = headroom; | |
428 | } | |
429 | ||
430 | bpf->family = family; | |
431 | *ts = newts; | |
432 | ||
433 | return 0; | |
434 | ||
435 | errout: | |
436 | bpf_destroy_state(newts); | |
437 | kfree(newts); | |
438 | return ret; | |
439 | } | |
440 | ||
441 | static int bpf_fill_lwt_prog(struct sk_buff *skb, int attr, | |
442 | struct bpf_lwt_prog *prog) | |
443 | { | |
444 | struct nlattr *nest; | |
445 | ||
446 | if (!prog->prog) | |
447 | return 0; | |
448 | ||
ae0be8de | 449 | nest = nla_nest_start_noflag(skb, attr); |
3a0af8fd TG |
450 | if (!nest) |
451 | return -EMSGSIZE; | |
452 | ||
453 | if (prog->name && | |
454 | nla_put_string(skb, LWT_BPF_PROG_NAME, prog->name)) | |
455 | return -EMSGSIZE; | |
456 | ||
457 | return nla_nest_end(skb, nest); | |
458 | } | |
459 | ||
460 | static int bpf_fill_encap_info(struct sk_buff *skb, struct lwtunnel_state *lwt) | |
461 | { | |
462 | struct bpf_lwt *bpf = bpf_lwt_lwtunnel(lwt); | |
463 | ||
464 | if (bpf_fill_lwt_prog(skb, LWT_BPF_IN, &bpf->in) < 0 || | |
465 | bpf_fill_lwt_prog(skb, LWT_BPF_OUT, &bpf->out) < 0 || | |
466 | bpf_fill_lwt_prog(skb, LWT_BPF_XMIT, &bpf->xmit) < 0) | |
467 | return -EMSGSIZE; | |
468 | ||
469 | return 0; | |
470 | } | |
471 | ||
472 | static int bpf_encap_nlsize(struct lwtunnel_state *lwtstate) | |
473 | { | |
474 | int nest_len = nla_total_size(sizeof(struct nlattr)) + | |
475 | nla_total_size(MAX_PROG_NAME) + /* LWT_BPF_PROG_NAME */ | |
476 | 0; | |
477 | ||
478 | return nest_len + /* LWT_BPF_IN */ | |
479 | nest_len + /* LWT_BPF_OUT */ | |
480 | nest_len + /* LWT_BPF_XMIT */ | |
481 | 0; | |
482 | } | |
483 | ||
79471b10 | 484 | static int bpf_lwt_prog_cmp(struct bpf_lwt_prog *a, struct bpf_lwt_prog *b) |
3a0af8fd TG |
485 | { |
486 | /* FIXME: | |
487 | * The LWT state is currently rebuilt for delete requests which | |
488 | * results in a new bpf_prog instance. Comparing names for now. | |
489 | */ | |
490 | if (!a->name && !b->name) | |
491 | return 0; | |
492 | ||
493 | if (!a->name || !b->name) | |
494 | return 1; | |
495 | ||
496 | return strcmp(a->name, b->name); | |
497 | } | |
498 | ||
499 | static int bpf_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) | |
500 | { | |
501 | struct bpf_lwt *a_bpf = bpf_lwt_lwtunnel(a); | |
502 | struct bpf_lwt *b_bpf = bpf_lwt_lwtunnel(b); | |
503 | ||
504 | return bpf_lwt_prog_cmp(&a_bpf->in, &b_bpf->in) || | |
505 | bpf_lwt_prog_cmp(&a_bpf->out, &b_bpf->out) || | |
506 | bpf_lwt_prog_cmp(&a_bpf->xmit, &b_bpf->xmit); | |
507 | } | |
508 | ||
509 | static const struct lwtunnel_encap_ops bpf_encap_ops = { | |
510 | .build_state = bpf_build_state, | |
511 | .destroy_state = bpf_destroy_state, | |
512 | .input = bpf_input, | |
513 | .output = bpf_output, | |
514 | .xmit = bpf_xmit, | |
515 | .fill_encap = bpf_fill_encap_info, | |
516 | .get_encap_size = bpf_encap_nlsize, | |
517 | .cmp_encap = bpf_encap_cmp, | |
88ff7334 | 518 | .owner = THIS_MODULE, |
3a0af8fd TG |
519 | }; |
520 | ||
ca78801a PO |
521 | static int handle_gso_type(struct sk_buff *skb, unsigned int gso_type, |
522 | int encap_len) | |
523 | { | |
524 | struct skb_shared_info *shinfo = skb_shinfo(skb); | |
525 | ||
526 | gso_type |= SKB_GSO_DODGY; | |
527 | shinfo->gso_type |= gso_type; | |
528 | skb_decrease_gso_size(shinfo, encap_len); | |
529 | shinfo->gso_segs = 0; | |
530 | return 0; | |
531 | } | |
532 | ||
52f27877 PO |
533 | static int handle_gso_encap(struct sk_buff *skb, bool ipv4, int encap_len) |
534 | { | |
ca78801a PO |
535 | int next_hdr_offset; |
536 | void *next_hdr; | |
537 | __u8 protocol; | |
538 | ||
539 | /* SCTP and UDP_L4 gso need more nuanced handling than what | |
540 | * handle_gso_type() does above: skb_decrease_gso_size() is not enough. | |
541 | * So at the moment only TCP GSO packets are let through. | |
542 | */ | |
543 | if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) | |
544 | return -ENOTSUPP; | |
545 | ||
546 | if (ipv4) { | |
547 | protocol = ip_hdr(skb)->protocol; | |
548 | next_hdr_offset = sizeof(struct iphdr); | |
549 | next_hdr = skb_network_header(skb) + next_hdr_offset; | |
550 | } else { | |
551 | protocol = ipv6_hdr(skb)->nexthdr; | |
552 | next_hdr_offset = sizeof(struct ipv6hdr); | |
553 | next_hdr = skb_network_header(skb) + next_hdr_offset; | |
554 | } | |
555 | ||
556 | switch (protocol) { | |
557 | case IPPROTO_GRE: | |
558 | next_hdr_offset += sizeof(struct gre_base_hdr); | |
559 | if (next_hdr_offset > encap_len) | |
560 | return -EINVAL; | |
561 | ||
562 | if (((struct gre_base_hdr *)next_hdr)->flags & GRE_CSUM) | |
563 | return handle_gso_type(skb, SKB_GSO_GRE_CSUM, | |
564 | encap_len); | |
565 | return handle_gso_type(skb, SKB_GSO_GRE, encap_len); | |
566 | ||
567 | case IPPROTO_UDP: | |
568 | next_hdr_offset += sizeof(struct udphdr); | |
569 | if (next_hdr_offset > encap_len) | |
570 | return -EINVAL; | |
571 | ||
572 | if (((struct udphdr *)next_hdr)->check) | |
573 | return handle_gso_type(skb, SKB_GSO_UDP_TUNNEL_CSUM, | |
574 | encap_len); | |
575 | return handle_gso_type(skb, SKB_GSO_UDP_TUNNEL, encap_len); | |
576 | ||
577 | case IPPROTO_IP: | |
578 | case IPPROTO_IPV6: | |
579 | if (ipv4) | |
580 | return handle_gso_type(skb, SKB_GSO_IPXIP4, encap_len); | |
581 | else | |
582 | return handle_gso_type(skb, SKB_GSO_IPXIP6, encap_len); | |
583 | ||
584 | default: | |
585 | return -EPROTONOSUPPORT; | |
586 | } | |
52f27877 PO |
587 | } |
588 | ||
589 | int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, bool ingress) | |
590 | { | |
591 | struct iphdr *iph; | |
592 | bool ipv4; | |
593 | int err; | |
594 | ||
595 | if (unlikely(len < sizeof(struct iphdr) || len > LWT_BPF_MAX_HEADROOM)) | |
596 | return -EINVAL; | |
597 | ||
598 | /* validate protocol and length */ | |
599 | iph = (struct iphdr *)hdr; | |
600 | if (iph->version == 4) { | |
601 | ipv4 = true; | |
602 | if (unlikely(len < iph->ihl * 4)) | |
603 | return -EINVAL; | |
604 | } else if (iph->version == 6) { | |
605 | ipv4 = false; | |
606 | if (unlikely(len < sizeof(struct ipv6hdr))) | |
607 | return -EINVAL; | |
608 | } else { | |
609 | return -EINVAL; | |
610 | } | |
611 | ||
612 | if (ingress) | |
613 | err = skb_cow_head(skb, len + skb->mac_len); | |
614 | else | |
615 | err = skb_cow_head(skb, | |
616 | len + LL_RESERVED_SPACE(skb_dst(skb)->dev)); | |
617 | if (unlikely(err)) | |
618 | return err; | |
619 | ||
620 | /* push the encap headers and fix pointers */ | |
621 | skb_reset_inner_headers(skb); | |
ea0371f7 PO |
622 | skb_reset_inner_mac_header(skb); /* mac header is not yet set */ |
623 | skb_set_inner_protocol(skb, skb->protocol); | |
52f27877 PO |
624 | skb->encapsulation = 1; |
625 | skb_push(skb, len); | |
626 | if (ingress) | |
627 | skb_postpush_rcsum(skb, iph, len); | |
628 | skb_reset_network_header(skb); | |
629 | memcpy(skb_network_header(skb), hdr, len); | |
630 | bpf_compute_data_pointers(skb); | |
631 | skb_clear_hash(skb); | |
632 | ||
633 | if (ipv4) { | |
634 | skb->protocol = htons(ETH_P_IP); | |
635 | iph = ip_hdr(skb); | |
636 | ||
637 | if (!iph->check) | |
638 | iph->check = ip_fast_csum((unsigned char *)iph, | |
639 | iph->ihl); | |
640 | } else { | |
641 | skb->protocol = htons(ETH_P_IPV6); | |
642 | } | |
643 | ||
644 | if (skb_is_gso(skb)) | |
645 | return handle_gso_encap(skb, ipv4, len); | |
646 | ||
647 | return 0; | |
648 | } | |
649 | ||
3a0af8fd TG |
650 | static int __init bpf_lwt_init(void) |
651 | { | |
652 | return lwtunnel_encap_add_ops(&bpf_encap_ops, LWTUNNEL_ENCAP_BPF); | |
653 | } | |
654 | ||
655 | subsys_initcall(bpf_lwt_init) |