1 /* XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP)
3 * GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
6 #include "xdp_sample.bpf.h"
7 #include "xdp_sample_shared.h"
8 #include "hash_func01.h"
10 /* Special map type that can XDP_REDIRECT frames to another CPU */
12 __uint(type, BPF_MAP_TYPE_CPUMAP);
13 __uint(key_size, sizeof(u32));
14 __uint(value_size, sizeof(struct bpf_cpumap_val));
15 } cpu_map SEC(".maps");
17 /* Set of maps controlling available CPU, and for iterating through
18 * selectable redirect CPUs.
21 __uint(type, BPF_MAP_TYPE_ARRAY);
24 } cpus_available SEC(".maps");
27 __uint(type, BPF_MAP_TYPE_ARRAY);
30 __uint(max_entries, 1);
31 } cpus_count SEC(".maps");
34 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
37 __uint(max_entries, 1);
38 } cpus_iterator SEC(".maps");
41 __uint(type, BPF_MAP_TYPE_DEVMAP);
42 __uint(key_size, sizeof(int));
43 __uint(value_size, sizeof(struct bpf_devmap_val));
44 __uint(max_entries, 1);
45 } tx_port SEC(".maps");
47 char tx_mac_addr[ETH_ALEN];
49 /* Helper parse functions */
51 static __always_inline
52 bool parse_eth(struct ethhdr *eth, void *data_end,
53 u16 *eth_proto, u64 *l3_offset)
58 offset = sizeof(*eth);
59 if ((void *)eth + offset > data_end)
62 eth_type = eth->h_proto;
64 /* Skip non 802.3 Ethertypes */
65 if (__builtin_expect(bpf_ntohs(eth_type) < ETH_P_802_3_MIN, 0))
68 /* Handle VLAN tagged packet */
69 if (eth_type == bpf_htons(ETH_P_8021Q) ||
70 eth_type == bpf_htons(ETH_P_8021AD)) {
71 struct vlan_hdr *vlan_hdr;
73 vlan_hdr = (void *)eth + offset;
74 offset += sizeof(*vlan_hdr);
75 if ((void *)eth + offset > data_end)
77 eth_type = vlan_hdr->h_vlan_encapsulated_proto;
79 /* Handle double VLAN tagged packet */
80 if (eth_type == bpf_htons(ETH_P_8021Q) ||
81 eth_type == bpf_htons(ETH_P_8021AD)) {
82 struct vlan_hdr *vlan_hdr;
84 vlan_hdr = (void *)eth + offset;
85 offset += sizeof(*vlan_hdr);
86 if ((void *)eth + offset > data_end)
88 eth_type = vlan_hdr->h_vlan_encapsulated_proto;
91 *eth_proto = bpf_ntohs(eth_type);
96 static __always_inline
97 u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off)
99 void *data_end = (void *)(long)ctx->data_end;
100 void *data = (void *)(long)ctx->data;
101 struct iphdr *iph = data + nh_off;
104 if (iph + 1 > data_end)
106 if (!(iph->protocol == IPPROTO_UDP))
109 udph = (void *)(iph + 1);
110 if (udph + 1 > data_end)
113 return bpf_ntohs(udph->dest);
116 static __always_inline
117 int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off)
119 void *data_end = (void *)(long)ctx->data_end;
120 void *data = (void *)(long)ctx->data;
121 struct iphdr *iph = data + nh_off;
123 if (iph + 1 > data_end)
125 return iph->protocol;
128 static __always_inline
129 int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off)
131 void *data_end = (void *)(long)ctx->data_end;
132 void *data = (void *)(long)ctx->data;
133 struct ipv6hdr *ip6h = data + nh_off;
135 if (ip6h + 1 > data_end)
137 return ip6h->nexthdr;
141 int xdp_prognum0_no_touch(struct xdp_md *ctx)
143 u32 key = bpf_get_smp_processor_id();
149 /* Only use first entry in cpus_available */
150 cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0);
153 cpu_dest = *cpu_selected;
155 rec = bpf_map_lookup_elem(&rx_cnt, &key);
158 NO_TEAR_INC(rec->processed);
160 if (cpu_dest >= nr_cpus) {
161 NO_TEAR_INC(rec->issue);
164 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
168 int xdp_prognum1_touch_data(struct xdp_md *ctx)
170 void *data_end = (void *)(long)ctx->data_end;
171 void *data = (void *)(long)ctx->data;
172 u32 key = bpf_get_smp_processor_id();
173 struct ethhdr *eth = data;
180 /* Only use first entry in cpus_available */
181 cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0);
184 cpu_dest = *cpu_selected;
186 /* Validate packet length is minimum Eth header size */
187 if (eth + 1 > data_end)
190 rec = bpf_map_lookup_elem(&rx_cnt, &key);
193 NO_TEAR_INC(rec->processed);
195 /* Read packet data, and use it (drop non 802.3 Ethertypes) */
196 eth_type = eth->h_proto;
197 if (bpf_ntohs(eth_type) < ETH_P_802_3_MIN) {
198 NO_TEAR_INC(rec->dropped);
202 if (cpu_dest >= nr_cpus) {
203 NO_TEAR_INC(rec->issue);
206 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
210 int xdp_prognum2_round_robin(struct xdp_md *ctx)
212 void *data_end = (void *)(long)ctx->data_end;
213 void *data = (void *)(long)ctx->data;
214 u32 key = bpf_get_smp_processor_id();
224 cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
228 cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0);
231 cpu_idx = *cpu_iterator;
234 if (*cpu_iterator == *cpu_max)
237 cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
240 cpu_dest = *cpu_selected;
242 rec = bpf_map_lookup_elem(&rx_cnt, &key);
245 NO_TEAR_INC(rec->processed);
247 if (cpu_dest >= nr_cpus) {
248 NO_TEAR_INC(rec->issue);
251 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
255 int xdp_prognum3_proto_separate(struct xdp_md *ctx)
257 void *data_end = (void *)(long)ctx->data_end;
258 void *data = (void *)(long)ctx->data;
259 u32 key = bpf_get_smp_processor_id();
260 struct ethhdr *eth = data;
261 u8 ip_proto = IPPROTO_UDP;
269 rec = bpf_map_lookup_elem(&rx_cnt, &key);
272 NO_TEAR_INC(rec->processed);
274 if (!(parse_eth(eth, data_end, ð_proto, &l3_offset)))
275 return XDP_PASS; /* Just skip */
277 /* Extract L4 protocol */
280 ip_proto = get_proto_ipv4(ctx, l3_offset);
283 ip_proto = get_proto_ipv6(ctx, l3_offset);
286 cpu_idx = 0; /* ARP packet handled on separate CPU */
292 /* Choose CPU based on L4 protocol */
308 cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
311 cpu_dest = *cpu_lookup;
313 if (cpu_dest >= nr_cpus) {
314 NO_TEAR_INC(rec->issue);
317 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
321 int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
323 void *data_end = (void *)(long)ctx->data_end;
324 void *data = (void *)(long)ctx->data;
325 u32 key = bpf_get_smp_processor_id();
326 struct ethhdr *eth = data;
327 u8 ip_proto = IPPROTO_UDP;
336 rec = bpf_map_lookup_elem(&rx_cnt, &key);
339 NO_TEAR_INC(rec->processed);
341 if (!(parse_eth(eth, data_end, ð_proto, &l3_offset)))
342 return XDP_PASS; /* Just skip */
344 /* Extract L4 protocol */
347 ip_proto = get_proto_ipv4(ctx, l3_offset);
350 ip_proto = get_proto_ipv6(ctx, l3_offset);
353 cpu_idx = 0; /* ARP packet handled on separate CPU */
359 /* Choose CPU based on L4 protocol */
370 /* DDoS filter UDP port 9 (pktgen) */
371 dest_port = get_dest_port_ipv4_udp(ctx, l3_offset);
372 if (dest_port == 9) {
373 NO_TEAR_INC(rec->dropped);
381 cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
384 cpu_dest = *cpu_lookup;
386 if (cpu_dest >= nr_cpus) {
387 NO_TEAR_INC(rec->issue);
390 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
393 /* Hashing initval */
394 #define INITVAL 15485863
396 static __always_inline
397 u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
399 void *data_end = (void *)(long)ctx->data_end;
400 void *data = (void *)(long)ctx->data;
401 struct iphdr *iph = data + nh_off;
404 if (iph + 1 > data_end)
407 cpu_hash = iph->saddr + iph->daddr;
408 cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol);
413 static __always_inline
414 u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
416 void *data_end = (void *)(long)ctx->data_end;
417 void *data = (void *)(long)ctx->data;
418 struct ipv6hdr *ip6h = data + nh_off;
421 if (ip6h + 1 > data_end)
424 cpu_hash = ip6h->saddr.in6_u.u6_addr32[0] + ip6h->daddr.in6_u.u6_addr32[0];
425 cpu_hash += ip6h->saddr.in6_u.u6_addr32[1] + ip6h->daddr.in6_u.u6_addr32[1];
426 cpu_hash += ip6h->saddr.in6_u.u6_addr32[2] + ip6h->daddr.in6_u.u6_addr32[2];
427 cpu_hash += ip6h->saddr.in6_u.u6_addr32[3] + ip6h->daddr.in6_u.u6_addr32[3];
428 cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr);
433 /* Load-Balance traffic based on hashing IP-addrs + L4-proto. The
434 * hashing scheme is symmetric, meaning swapping IP src/dest still hit
438 int xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
440 void *data_end = (void *)(long)ctx->data_end;
441 void *data = (void *)(long)ctx->data;
442 u32 key = bpf_get_smp_processor_id();
443 struct ethhdr *eth = data;
454 rec = bpf_map_lookup_elem(&rx_cnt, &key);
457 NO_TEAR_INC(rec->processed);
459 cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
463 if (!(parse_eth(eth, data_end, ð_proto, &l3_offset)))
464 return XDP_PASS; /* Just skip */
466 /* Hash for IPv4 and IPv6 */
469 cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset);
472 cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset);
474 case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */
479 /* Choose CPU based on hash */
480 cpu_idx = cpu_hash % *cpu_max;
482 cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
485 cpu_dest = *cpu_lookup;
487 if (cpu_dest >= nr_cpus) {
488 NO_TEAR_INC(rec->issue);
491 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
495 int xdp_redirect_cpu_devmap(struct xdp_md *ctx)
497 void *data_end = (void *)(long)ctx->data_end;
498 void *data = (void *)(long)ctx->data;
499 struct ethhdr *eth = data;
502 nh_off = sizeof(*eth);
503 if (data + nh_off > data_end)
506 swap_src_dst_mac(data);
507 return bpf_redirect_map(&tx_port, 0, 0);
511 int xdp_redirect_cpu_pass(struct xdp_md *ctx)
517 int xdp_redirect_cpu_drop(struct xdp_md *ctx)
523 int xdp_redirect_egress_prog(struct xdp_md *ctx)
525 void *data_end = (void *)(long)ctx->data_end;
526 void *data = (void *)(long)ctx->data;
527 struct ethhdr *eth = data;
530 nh_off = sizeof(*eth);
531 if (data + nh_off > data_end)
534 __builtin_memcpy(eth->h_source, (const char *)tx_mac_addr, ETH_ALEN);
539 char _license[] SEC("license") = "GPL";