Commit | Line | Data |
---|---|---|
fe616055 DA |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Copyright (c) 2017-18 David Ahern <dsahern@gmail.com> | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or | |
5 | * modify it under the terms of version 2 of the GNU General Public | |
6 | * License as published by the Free Software Foundation. | |
7 | * | |
8 | * This program is distributed in the hope that it will be useful, but | |
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
11 | * General Public License for more details. | |
12 | */ | |
13 | #define KBUILD_MODNAME "foo" | |
14 | #include <uapi/linux/bpf.h> | |
15 | #include <linux/in.h> | |
16 | #include <linux/if_ether.h> | |
17 | #include <linux/if_packet.h> | |
18 | #include <linux/if_vlan.h> | |
19 | #include <linux/ip.h> | |
20 | #include <linux/ipv6.h> | |
21 | ||
22 | #include "bpf_helpers.h" | |
23 | ||
24 | #define IPV6_FLOWINFO_MASK cpu_to_be32(0x0FFFFFFF) | |
25 | ||
3783d437 JDB |
26 | /* For TX-traffic redirect requires net_device ifindex to be in this devmap */ |
27 | struct bpf_map_def SEC("maps") xdp_tx_ports = { | |
fe616055 DA |
28 | .type = BPF_MAP_TYPE_DEVMAP, |
29 | .key_size = sizeof(int), | |
30 | .value_size = sizeof(int), | |
31 | .max_entries = 64, | |
32 | }; | |
33 | ||
44edef77 DA |
34 | /* from include/net/ip.h */ |
35 | static __always_inline int ip_decrease_ttl(struct iphdr *iph) | |
36 | { | |
37 | u32 check = (__force u32)iph->check; | |
38 | ||
39 | check += (__force u32)htons(0x0100); | |
40 | iph->check = (__force __sum16)(check + (check >= 0xFFFF)); | |
41 | return --iph->ttl; | |
42 | } | |
43 | ||
fe616055 DA |
44 | static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags) |
45 | { | |
46 | void *data_end = (void *)(long)ctx->data_end; | |
47 | void *data = (void *)(long)ctx->data; | |
48 | struct bpf_fib_lookup fib_params; | |
49 | struct ethhdr *eth = data; | |
44edef77 DA |
50 | struct ipv6hdr *ip6h; |
51 | struct iphdr *iph; | |
fe616055 DA |
52 | u16 h_proto; |
53 | u64 nh_off; | |
4c79579b | 54 | int rc; |
fe616055 DA |
55 | |
56 | nh_off = sizeof(*eth); | |
57 | if (data + nh_off > data_end) | |
58 | return XDP_DROP; | |
59 | ||
60 | __builtin_memset(&fib_params, 0, sizeof(fib_params)); | |
61 | ||
62 | h_proto = eth->h_proto; | |
63 | if (h_proto == htons(ETH_P_IP)) { | |
44edef77 | 64 | iph = data + nh_off; |
fe616055 DA |
65 | |
66 | if (iph + 1 > data_end) | |
67 | return XDP_DROP; | |
68 | ||
44edef77 DA |
69 | if (iph->ttl <= 1) |
70 | return XDP_PASS; | |
71 | ||
fe616055 DA |
72 | fib_params.family = AF_INET; |
73 | fib_params.tos = iph->tos; | |
74 | fib_params.l4_protocol = iph->protocol; | |
75 | fib_params.sport = 0; | |
76 | fib_params.dport = 0; | |
77 | fib_params.tot_len = ntohs(iph->tot_len); | |
78 | fib_params.ipv4_src = iph->saddr; | |
79 | fib_params.ipv4_dst = iph->daddr; | |
80 | } else if (h_proto == htons(ETH_P_IPV6)) { | |
81 | struct in6_addr *src = (struct in6_addr *) fib_params.ipv6_src; | |
82 | struct in6_addr *dst = (struct in6_addr *) fib_params.ipv6_dst; | |
fe616055 | 83 | |
44edef77 DA |
84 | ip6h = data + nh_off; |
85 | if (ip6h + 1 > data_end) | |
fe616055 DA |
86 | return XDP_DROP; |
87 | ||
44edef77 DA |
88 | if (ip6h->hop_limit <= 1) |
89 | return XDP_PASS; | |
90 | ||
fe616055 | 91 | fib_params.family = AF_INET6; |
bd3a08aa | 92 | fib_params.flowinfo = *(__be32 *)ip6h & IPV6_FLOWINFO_MASK; |
44edef77 | 93 | fib_params.l4_protocol = ip6h->nexthdr; |
fe616055 DA |
94 | fib_params.sport = 0; |
95 | fib_params.dport = 0; | |
44edef77 DA |
96 | fib_params.tot_len = ntohs(ip6h->payload_len); |
97 | *src = ip6h->saddr; | |
98 | *dst = ip6h->daddr; | |
fe616055 DA |
99 | } else { |
100 | return XDP_PASS; | |
101 | } | |
102 | ||
103 | fib_params.ifindex = ctx->ingress_ifindex; | |
104 | ||
4c79579b | 105 | rc = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags); |
abcce733 JDB |
106 | /* |
107 | * Some rc (return codes) from bpf_fib_lookup() are important, | |
108 | * to understand how this XDP-prog interacts with network stack. | |
109 | * | |
110 | * BPF_FIB_LKUP_RET_NO_NEIGH: | |
111 | * Even if route lookup was a success, then the MAC-addresses are also | |
112 | * needed. This is obtained from arp/neighbour table, but if table is | |
113 | * (still) empty then BPF_FIB_LKUP_RET_NO_NEIGH is returned. To avoid | |
114 | * doing ARP lookup directly from XDP, then send packet to normal | |
115 | * network stack via XDP_PASS and expect it will do ARP resolution. | |
116 | * | |
117 | * BPF_FIB_LKUP_RET_FWD_DISABLED: | |
118 | * The bpf_fib_lookup respect sysctl net.ipv{4,6}.conf.all.forwarding | |
119 | * setting, and will return BPF_FIB_LKUP_RET_FWD_DISABLED if not | |
120 | * enabled this on ingress device. | |
121 | */ | |
122 | if (rc == BPF_FIB_LKUP_RET_SUCCESS) { | |
a32a32cb JDB |
123 | /* Verify egress index has been configured as TX-port. |
124 | * (Note: User can still have inserted an egress ifindex that | |
125 | * doesn't support XDP xmit, which will result in packet drops). | |
126 | * | |
127 | * Note: lookup in devmap supported since 0cdbb4b09a0. | |
128 | * If not supported will fail with: | |
129 | * cannot pass map_type 14 into func bpf_map_lookup_elem#1: | |
130 | */ | |
131 | if (!bpf_map_lookup_elem(&xdp_tx_ports, &fib_params.ifindex)) | |
132 | return XDP_PASS; | |
133 | ||
44edef77 DA |
134 | if (h_proto == htons(ETH_P_IP)) |
135 | ip_decrease_ttl(iph); | |
136 | else if (h_proto == htons(ETH_P_IPV6)) | |
137 | ip6h->hop_limit--; | |
138 | ||
fe616055 DA |
139 | memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN); |
140 | memcpy(eth->h_source, fib_params.smac, ETH_ALEN); | |
3783d437 | 141 | return bpf_redirect_map(&xdp_tx_ports, fib_params.ifindex, 0); |
fe616055 DA |
142 | } |
143 | ||
144 | return XDP_PASS; | |
145 | } | |
146 | ||
147 | SEC("xdp_fwd") | |
148 | int xdp_fwd_prog(struct xdp_md *ctx) | |
149 | { | |
150 | return xdp_fwd_flags(ctx, 0); | |
151 | } | |
152 | ||
153 | SEC("xdp_fwd_direct") | |
154 | int xdp_fwd_direct_prog(struct xdp_md *ctx) | |
155 | { | |
156 | return xdp_fwd_flags(ctx, BPF_FIB_LOOKUP_DIRECT); | |
157 | } | |
158 | ||
159 | char _license[] SEC("license") = "GPL"; |