Commit | Line | Data |
---|---|---|
6c77997b YZ |
1 | // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause |
2 | ||
3 | /* | |
4 | * Test suite of lwt BPF programs that reroutes packets | |
5 | * The file tests focus not only if these programs work as expected normally, | |
6 | * but also if they can handle abnormal situations gracefully. This test | |
7 | * suite currently only covers lwt_xmit hook. lwt_in tests have not been | |
8 | * implemented. | |
9 | * | |
10 | * WARNING | |
11 | * ------- | |
12 | * This test suite can crash the kernel, thus should be run in a VM. | |
13 | * | |
14 | * Setup: | |
15 | * --------- | |
16 | * all tests are performed in a single netns. A lwt encap route is setup for | |
17 | * each subtest: | |
18 | * | |
19 | * ip route add 10.0.0.0/24 encap bpf xmit <obj> sec "<section_N>" dev link_err | |
20 | * | |
21 | * Here <obj> is statically defined to test_lwt_reroute.bpf.o, and it contains | |
22 | * a single test program entry. This program sets packet mark by last byte of | |
23 | * the IPv4 daddr. For example, a packet going to 1.2.3.4 will receive a skb | |
24 | * mark 4. A packet will only be marked once, and IP x.x.x.0 will be skipped | |
25 | * to avoid route loop. We didn't use generated BPF skeleton since the | |
26 | * attachment for lwt programs are not supported by libbpf yet. | |
27 | * | |
28 | * The test program will bring up a tun device, and sets up the following | |
29 | * routes: | |
30 | * | |
31 | * ip rule add pref 100 from all fwmark <tun_index> lookup 100 | |
32 | * ip route add table 100 default dev tun0 | |
33 | * | |
34 | * For normal testing, a ping command is running in the test netns: | |
35 | * | |
36 | * ping 10.0.0.<tun_index> -c 1 -w 1 -s 100 | |
37 | * | |
38 | * For abnormal testing, fq is used as the qdisc of the tun device. Then a UDP | |
39 | * socket will try to overflow the fq queue and trigger qdisc drop error. | |
40 | * | |
41 | * Scenarios: | |
42 | * -------------------------------- | |
43 | * 1. Reroute to a running tun device | |
44 | * 2. Reroute to a device where qdisc drop | |
45 | * | |
46 | * For case 1, ping packets should be received by the tun device. | |
47 | * | |
48 | * For case 2, force UDP packets to overflow fq limit. As long as kernel | |
49 | * is not crashed, it is considered successful. | |
50 | */ | |
e7f31873 | 51 | #define NETNS "ns_lwt_reroute" |
6c77997b YZ |
52 | #include "lwt_helpers.h" |
53 | #include "network_helpers.h" | |
54 | #include <linux/net_tstamp.h> | |
55 | ||
56 | #define BPF_OBJECT "test_lwt_reroute.bpf.o" | |
57 | #define LOCAL_SRC "10.0.0.1" | |
58 | #define TEST_CIDR "10.0.0.0/24" | |
59 | #define XMIT_HOOK "xmit" | |
60 | #define XMIT_SECTION "lwt_xmit" | |
61 | #define NSEC_PER_SEC 1000000000ULL | |
62 | ||
63 | /* send a ping to be rerouted to the target device */ | |
64 | static void ping_once(const char *ip) | |
65 | { | |
66 | /* We won't get a reply. Don't fail here */ | |
fbaf59a9 | 67 | SYS_NOFAIL("ping %s -c1 -W1 -s %d", |
6c77997b YZ |
68 | ip, ICMP_PAYLOAD_SIZE); |
69 | } | |
70 | ||
71 | /* Send snd_target UDP packets to overflow the fq queue and trigger qdisc drop | |
72 | * error. This is done via TX tstamp to force buffering delayed packets. | |
73 | */ | |
74 | static int overflow_fq(int snd_target, const char *target_ip) | |
75 | { | |
76 | struct sockaddr_in addr = { | |
77 | .sin_family = AF_INET, | |
78 | .sin_port = htons(1234), | |
79 | }; | |
80 | ||
81 | char data_buf[8]; /* only #pkts matter, so use a random small buffer */ | |
82 | char control_buf[CMSG_SPACE(sizeof(uint64_t))]; | |
83 | struct iovec iov = { | |
84 | .iov_base = data_buf, | |
85 | .iov_len = sizeof(data_buf), | |
86 | }; | |
87 | int err = -1; | |
88 | int s = -1; | |
89 | struct sock_txtime txtime_on = { | |
90 | .clockid = CLOCK_MONOTONIC, | |
91 | .flags = 0, | |
92 | }; | |
93 | struct msghdr msg = { | |
94 | .msg_name = &addr, | |
95 | .msg_namelen = sizeof(addr), | |
96 | .msg_control = control_buf, | |
97 | .msg_controllen = sizeof(control_buf), | |
98 | .msg_iovlen = 1, | |
99 | .msg_iov = &iov, | |
100 | }; | |
101 | struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg); | |
102 | ||
103 | memset(data_buf, 0, sizeof(data_buf)); | |
104 | ||
105 | s = socket(AF_INET, SOCK_DGRAM, 0); | |
106 | if (!ASSERT_GE(s, 0, "socket")) | |
107 | goto out; | |
108 | ||
109 | err = setsockopt(s, SOL_SOCKET, SO_TXTIME, &txtime_on, sizeof(txtime_on)); | |
110 | if (!ASSERT_OK(err, "setsockopt(SO_TXTIME)")) | |
111 | goto out; | |
112 | ||
113 | err = inet_pton(AF_INET, target_ip, &addr.sin_addr); | |
114 | if (!ASSERT_EQ(err, 1, "inet_pton")) | |
115 | goto out; | |
116 | ||
117 | while (snd_target > 0) { | |
118 | struct timespec now; | |
119 | ||
120 | memset(control_buf, 0, sizeof(control_buf)); | |
121 | cmsg->cmsg_type = SCM_TXTIME; | |
122 | cmsg->cmsg_level = SOL_SOCKET; | |
123 | cmsg->cmsg_len = CMSG_LEN(sizeof(uint64_t)); | |
124 | ||
125 | err = clock_gettime(CLOCK_MONOTONIC, &now); | |
126 | if (!ASSERT_OK(err, "clock_gettime(CLOCK_MONOTONIC)")) { | |
127 | err = -1; | |
128 | goto out; | |
129 | } | |
130 | ||
131 | *(uint64_t *)CMSG_DATA(cmsg) = (now.tv_nsec + 1) * NSEC_PER_SEC + | |
132 | now.tv_nsec; | |
133 | ||
134 | /* we will intentionally send more than fq limit, so ignore | |
135 | * the error here. | |
136 | */ | |
137 | sendmsg(s, &msg, MSG_NOSIGNAL); | |
138 | snd_target--; | |
139 | } | |
140 | ||
141 | /* no kernel crash so far is considered success */ | |
142 | err = 0; | |
143 | ||
144 | out: | |
145 | if (s >= 0) | |
146 | close(s); | |
147 | ||
148 | return err; | |
149 | } | |
150 | ||
151 | static int setup(const char *tun_dev) | |
152 | { | |
153 | int target_index = -1; | |
154 | int tap_fd = -1; | |
155 | ||
156 | tap_fd = open_tuntap(tun_dev, false); | |
157 | if (!ASSERT_GE(tap_fd, 0, "open_tun")) | |
158 | return -1; | |
159 | ||
160 | target_index = if_nametoindex(tun_dev); | |
161 | if (!ASSERT_GE(target_index, 0, "if_nametoindex")) | |
162 | return -1; | |
163 | ||
164 | SYS(fail, "ip link add link_err type dummy"); | |
165 | SYS(fail, "ip link set lo up"); | |
166 | SYS(fail, "ip addr add dev lo " LOCAL_SRC "/32"); | |
167 | SYS(fail, "ip link set link_err up"); | |
168 | SYS(fail, "ip link set %s up", tun_dev); | |
169 | ||
170 | SYS(fail, "ip route add %s dev link_err encap bpf xmit obj %s sec lwt_xmit", | |
171 | TEST_CIDR, BPF_OBJECT); | |
172 | ||
173 | SYS(fail, "ip rule add pref 100 from all fwmark %d lookup 100", | |
174 | target_index); | |
175 | SYS(fail, "ip route add t 100 default dev %s", tun_dev); | |
176 | ||
177 | return tap_fd; | |
178 | ||
179 | fail: | |
180 | if (tap_fd >= 0) | |
181 | close(tap_fd); | |
182 | return -1; | |
183 | } | |
184 | ||
185 | static void test_lwt_reroute_normal_xmit(void) | |
186 | { | |
187 | const char *tun_dev = "tun0"; | |
188 | int tun_fd = -1; | |
189 | int ifindex = -1; | |
190 | char ip[256]; | |
191 | struct timeval timeo = { | |
192 | .tv_sec = 0, | |
193 | .tv_usec = 250000, | |
194 | }; | |
195 | ||
196 | tun_fd = setup(tun_dev); | |
197 | if (!ASSERT_GE(tun_fd, 0, "setup_reroute")) | |
198 | return; | |
199 | ||
200 | ifindex = if_nametoindex(tun_dev); | |
201 | if (!ASSERT_GE(ifindex, 0, "if_nametoindex")) | |
202 | return; | |
203 | ||
204 | snprintf(ip, 256, "10.0.0.%d", ifindex); | |
205 | ||
206 | /* ping packets should be received by the tun device */ | |
207 | ping_once(ip); | |
208 | ||
209 | if (!ASSERT_EQ(wait_for_packet(tun_fd, __expect_icmp_ipv4, &timeo), 1, | |
210 | "wait_for_packet")) | |
211 | log_err("%s xmit", __func__); | |
212 | } | |
213 | ||
214 | /* | |
215 | * Test the failure case when the skb is dropped at the qdisc. This is a | |
216 | * regression prevention at the xmit hook only. | |
217 | */ | |
218 | static void test_lwt_reroute_qdisc_dropped(void) | |
219 | { | |
220 | const char *tun_dev = "tun0"; | |
221 | int tun_fd = -1; | |
222 | int ifindex = -1; | |
223 | char ip[256]; | |
224 | ||
225 | tun_fd = setup(tun_dev); | |
226 | if (!ASSERT_GE(tun_fd, 0, "setup_reroute")) | |
227 | goto fail; | |
228 | ||
229 | SYS(fail, "tc qdisc replace dev %s root fq limit 5 flow_limit 5", tun_dev); | |
230 | ||
231 | ifindex = if_nametoindex(tun_dev); | |
232 | if (!ASSERT_GE(ifindex, 0, "if_nametoindex")) | |
233 | return; | |
234 | ||
235 | snprintf(ip, 256, "10.0.0.%d", ifindex); | |
236 | ASSERT_EQ(overflow_fq(10, ip), 0, "overflow_fq"); | |
237 | ||
238 | fail: | |
239 | if (tun_fd >= 0) | |
240 | close(tun_fd); | |
241 | } | |
242 | ||
243 | static void *test_lwt_reroute_run(void *arg) | |
244 | { | |
245 | netns_delete(); | |
246 | RUN_TEST(lwt_reroute_normal_xmit); | |
247 | RUN_TEST(lwt_reroute_qdisc_dropped); | |
248 | return NULL; | |
249 | } | |
250 | ||
251 | void test_lwt_reroute(void) | |
252 | { | |
253 | pthread_t test_thread; | |
254 | int err; | |
255 | ||
256 | /* Run the tests in their own thread to isolate the namespace changes | |
257 | * so they do not affect the environment of other tests. | |
258 | * (specifically needed because of unshare(CLONE_NEWNS) in open_netns()) | |
259 | */ | |
260 | err = pthread_create(&test_thread, NULL, &test_lwt_reroute_run, NULL); | |
261 | if (ASSERT_OK(err, "pthread_create")) | |
262 | ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join"); | |
263 | } |