Merge branch 'for-linus' into for-next
[linux-2.6-block.git] / samples / bpf / xdp_fwd_kern.c
CommitLineData
fe616055
DA
1// SPDX-License-Identifier: GPL-2.0
2/* Copyright (c) 2017-18 David Ahern <dsahern@gmail.com>
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13#define KBUILD_MODNAME "foo"
14#include <uapi/linux/bpf.h>
15#include <linux/in.h>
16#include <linux/if_ether.h>
17#include <linux/if_packet.h>
18#include <linux/if_vlan.h>
19#include <linux/ip.h>
20#include <linux/ipv6.h>
21
22#include "bpf_helpers.h"
23
24#define IPV6_FLOWINFO_MASK cpu_to_be32(0x0FFFFFFF)
25
3783d437
JDB
26/* For TX-traffic redirect requires net_device ifindex to be in this devmap */
27struct bpf_map_def SEC("maps") xdp_tx_ports = {
fe616055
DA
28 .type = BPF_MAP_TYPE_DEVMAP,
29 .key_size = sizeof(int),
30 .value_size = sizeof(int),
31 .max_entries = 64,
32};
33
44edef77
DA
34/* from include/net/ip.h */
35static __always_inline int ip_decrease_ttl(struct iphdr *iph)
36{
37 u32 check = (__force u32)iph->check;
38
39 check += (__force u32)htons(0x0100);
40 iph->check = (__force __sum16)(check + (check >= 0xFFFF));
41 return --iph->ttl;
42}
43
fe616055
DA
44static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
45{
46 void *data_end = (void *)(long)ctx->data_end;
47 void *data = (void *)(long)ctx->data;
48 struct bpf_fib_lookup fib_params;
49 struct ethhdr *eth = data;
44edef77
DA
50 struct ipv6hdr *ip6h;
51 struct iphdr *iph;
fe616055
DA
52 u16 h_proto;
53 u64 nh_off;
4c79579b 54 int rc;
fe616055
DA
55
56 nh_off = sizeof(*eth);
57 if (data + nh_off > data_end)
58 return XDP_DROP;
59
60 __builtin_memset(&fib_params, 0, sizeof(fib_params));
61
62 h_proto = eth->h_proto;
63 if (h_proto == htons(ETH_P_IP)) {
44edef77 64 iph = data + nh_off;
fe616055
DA
65
66 if (iph + 1 > data_end)
67 return XDP_DROP;
68
44edef77
DA
69 if (iph->ttl <= 1)
70 return XDP_PASS;
71
fe616055
DA
72 fib_params.family = AF_INET;
73 fib_params.tos = iph->tos;
74 fib_params.l4_protocol = iph->protocol;
75 fib_params.sport = 0;
76 fib_params.dport = 0;
77 fib_params.tot_len = ntohs(iph->tot_len);
78 fib_params.ipv4_src = iph->saddr;
79 fib_params.ipv4_dst = iph->daddr;
80 } else if (h_proto == htons(ETH_P_IPV6)) {
81 struct in6_addr *src = (struct in6_addr *) fib_params.ipv6_src;
82 struct in6_addr *dst = (struct in6_addr *) fib_params.ipv6_dst;
fe616055 83
44edef77
DA
84 ip6h = data + nh_off;
85 if (ip6h + 1 > data_end)
fe616055
DA
86 return XDP_DROP;
87
44edef77
DA
88 if (ip6h->hop_limit <= 1)
89 return XDP_PASS;
90
fe616055 91 fib_params.family = AF_INET6;
bd3a08aa 92 fib_params.flowinfo = *(__be32 *)ip6h & IPV6_FLOWINFO_MASK;
44edef77 93 fib_params.l4_protocol = ip6h->nexthdr;
fe616055
DA
94 fib_params.sport = 0;
95 fib_params.dport = 0;
44edef77
DA
96 fib_params.tot_len = ntohs(ip6h->payload_len);
97 *src = ip6h->saddr;
98 *dst = ip6h->daddr;
fe616055
DA
99 } else {
100 return XDP_PASS;
101 }
102
103 fib_params.ifindex = ctx->ingress_ifindex;
104
4c79579b 105 rc = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags);
abcce733
JDB
106 /*
107 * Some rc (return codes) from bpf_fib_lookup() are important,
108 * to understand how this XDP-prog interacts with network stack.
109 *
110 * BPF_FIB_LKUP_RET_NO_NEIGH:
111 * Even if route lookup was a success, then the MAC-addresses are also
112 * needed. This is obtained from arp/neighbour table, but if table is
113 * (still) empty then BPF_FIB_LKUP_RET_NO_NEIGH is returned. To avoid
114 * doing ARP lookup directly from XDP, then send packet to normal
115 * network stack via XDP_PASS and expect it will do ARP resolution.
116 *
117 * BPF_FIB_LKUP_RET_FWD_DISABLED:
118 * The bpf_fib_lookup respect sysctl net.ipv{4,6}.conf.all.forwarding
119 * setting, and will return BPF_FIB_LKUP_RET_FWD_DISABLED if not
120 * enabled this on ingress device.
121 */
122 if (rc == BPF_FIB_LKUP_RET_SUCCESS) {
a32a32cb
JDB
123 /* Verify egress index has been configured as TX-port.
124 * (Note: User can still have inserted an egress ifindex that
125 * doesn't support XDP xmit, which will result in packet drops).
126 *
127 * Note: lookup in devmap supported since 0cdbb4b09a0.
128 * If not supported will fail with:
129 * cannot pass map_type 14 into func bpf_map_lookup_elem#1:
130 */
131 if (!bpf_map_lookup_elem(&xdp_tx_ports, &fib_params.ifindex))
132 return XDP_PASS;
133
44edef77
DA
134 if (h_proto == htons(ETH_P_IP))
135 ip_decrease_ttl(iph);
136 else if (h_proto == htons(ETH_P_IPV6))
137 ip6h->hop_limit--;
138
fe616055
DA
139 memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN);
140 memcpy(eth->h_source, fib_params.smac, ETH_ALEN);
3783d437 141 return bpf_redirect_map(&xdp_tx_ports, fib_params.ifindex, 0);
fe616055
DA
142 }
143
144 return XDP_PASS;
145}
146
147SEC("xdp_fwd")
148int xdp_fwd_prog(struct xdp_md *ctx)
149{
150 return xdp_fwd_flags(ctx, 0);
151}
152
153SEC("xdp_fwd_direct")
154int xdp_fwd_direct_prog(struct xdp_md *ctx)
155{
156 return xdp_fwd_flags(ctx, BPF_FIB_LOOKUP_DIRECT);
157}
158
159char _license[] SEC("license") = "GPL";