Merge tag 'net-6.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
[linux-2.6-block.git] / tools / testing / selftests / bpf / xdp_hw_metadata.c
CommitLineData
297a3f12
SF
1// SPDX-License-Identifier: GPL-2.0
2
3/* Reference program for verifying XDP metadata on real HW. Functional test
4 * only, doesn't test the performance.
5 *
6 * RX:
7 * - UDP 9091 packets are diverted into AF_XDP
8 * - Metadata verified:
9 * - rx_timestamp
10 * - rx_hash
11 *
12 * TX:
13 * - TBD
14 */
15
16#include <test_progs.h>
17#include <network_helpers.h>
18#include "xdp_hw_metadata.skel.h"
19#include "xsk.h"
20
21#include <error.h>
22#include <linux/errqueue.h>
23#include <linux/if_link.h>
24#include <linux/net_tstamp.h>
25#include <linux/udp.h>
26#include <linux/sockios.h>
27#include <sys/mman.h>
28#include <net/if.h>
bb6a8888 29#include <ctype.h>
297a3f12 30#include <poll.h>
bb323478 31#include <time.h>
297a3f12
SF
32
33#include "xdp_metadata.h"
34
35#define UMEM_NUM 16
36#define UMEM_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE
37#define UMEM_SIZE (UMEM_FRAME_SIZE * UMEM_NUM)
38#define XDP_FLAGS (XDP_FLAGS_DRV_MODE | XDP_FLAGS_REPLACE)
39
40struct xsk {
41 void *umem_area;
42 struct xsk_umem *umem;
43 struct xsk_ring_prod fill;
44 struct xsk_ring_cons comp;
45 struct xsk_ring_prod tx;
46 struct xsk_ring_cons rx;
47 struct xsk_socket *socket;
48};
49
50struct xdp_hw_metadata *bpf_obj;
bb6a8888 51__u16 bind_flags = XDP_COPY;
297a3f12
SF
52struct xsk *rx_xsk;
53const char *ifname;
54int ifindex;
55int rxq;
56
57void test__fail(void) { /* for network_helpers.c */ }
58
59static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id)
60{
61 int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
62 const struct xsk_socket_config socket_config = {
63 .rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
64 .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
bb6a8888 65 .bind_flags = bind_flags,
297a3f12
SF
66 };
67 const struct xsk_umem_config umem_config = {
68 .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
69 .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
70 .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
71 .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG,
72 };
73 __u32 idx;
74 u64 addr;
75 int ret;
76 int i;
77
78 xsk->umem_area = mmap(NULL, UMEM_SIZE, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
79 if (xsk->umem_area == MAP_FAILED)
80 return -ENOMEM;
81
82 ret = xsk_umem__create(&xsk->umem,
83 xsk->umem_area, UMEM_SIZE,
84 &xsk->fill,
85 &xsk->comp,
86 &umem_config);
87 if (ret)
88 return ret;
89
90 ret = xsk_socket__create(&xsk->socket, ifindex, queue_id,
91 xsk->umem,
92 &xsk->rx,
93 &xsk->tx,
94 &socket_config);
95 if (ret)
96 return ret;
97
98 /* First half of umem is for TX. This way address matches 1-to-1
99 * to the completion queue index.
100 */
101
102 for (i = 0; i < UMEM_NUM / 2; i++) {
103 addr = i * UMEM_FRAME_SIZE;
104 printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr);
105 }
106
107 /* Second half of umem is for RX. */
108
109 ret = xsk_ring_prod__reserve(&xsk->fill, UMEM_NUM / 2, &idx);
110 for (i = 0; i < UMEM_NUM / 2; i++) {
111 addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE;
112 printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr);
113 *xsk_ring_prod__fill_addr(&xsk->fill, i) = addr;
114 }
115 xsk_ring_prod__submit(&xsk->fill, ret);
116
117 return 0;
118}
119
120static void close_xsk(struct xsk *xsk)
121{
122 if (xsk->umem)
123 xsk_umem__delete(xsk->umem);
124 if (xsk->socket)
125 xsk_socket__delete(xsk->socket);
a19a62e5 126 munmap(xsk->umem_area, UMEM_SIZE);
297a3f12
SF
127}
128
129static void refill_rx(struct xsk *xsk, __u64 addr)
130{
131 __u32 idx;
132
133 if (xsk_ring_prod__reserve(&xsk->fill, 1, &idx) == 1) {
134 printf("%p: complete idx=%u addr=%llx\n", xsk, idx, addr);
135 *xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
136 xsk_ring_prod__submit(&xsk->fill, 1);
137 }
138}
139
bb323478
JDB
140#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
141static __u64 gettime(clockid_t clock_id)
142{
143 struct timespec t;
144 int res;
145
146 /* See man clock_gettime(2) for type of clock_id's */
147 res = clock_gettime(clock_id, &t);
148
149 if (res < 0)
150 error(res, errno, "Error with clock_gettime()");
151
152 return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
153}
154
155static void verify_xdp_metadata(void *data, clockid_t clock_id)
297a3f12
SF
156{
157 struct xdp_meta *meta;
158
159 meta = data - sizeof(*meta);
160
0f26b74e
JDB
161 if (meta->rx_hash_err < 0)
162 printf("No rx_hash err=%d\n", meta->rx_hash_err);
163 else
164 printf("rx_hash: 0x%X with RSS type:0x%X\n",
165 meta->rx_hash, meta->rx_hash_type);
bb323478
JDB
166
167 printf("rx_timestamp: %llu (sec:%0.4f)\n", meta->rx_timestamp,
168 (double)meta->rx_timestamp / NANOSEC_PER_SEC);
169 if (meta->rx_timestamp) {
170 __u64 usr_clock = gettime(clock_id);
171 __u64 xdp_clock = meta->xdp_timestamp;
172 __s64 delta_X = xdp_clock - meta->rx_timestamp;
173 __s64 delta_X2U = usr_clock - xdp_clock;
174
175 printf("XDP RX-time: %llu (sec:%0.4f) delta sec:%0.4f (%0.3f usec)\n",
176 xdp_clock, (double)xdp_clock / NANOSEC_PER_SEC,
177 (double)delta_X / NANOSEC_PER_SEC,
178 (double)delta_X / 1000);
179
180 printf("AF_XDP time: %llu (sec:%0.4f) delta sec:%0.4f (%0.3f usec)\n",
181 usr_clock, (double)usr_clock / NANOSEC_PER_SEC,
182 (double)delta_X2U / NANOSEC_PER_SEC,
183 (double)delta_X2U / 1000);
184 }
185
297a3f12
SF
186}
187
188static void verify_skb_metadata(int fd)
189{
190 char cmsg_buf[1024];
191 char packet_buf[128];
192
193 struct scm_timestamping *ts;
194 struct iovec packet_iov;
195 struct cmsghdr *cmsg;
196 struct msghdr hdr;
197
198 memset(&hdr, 0, sizeof(hdr));
199 hdr.msg_iov = &packet_iov;
200 hdr.msg_iovlen = 1;
201 packet_iov.iov_base = packet_buf;
202 packet_iov.iov_len = sizeof(packet_buf);
203
204 hdr.msg_control = cmsg_buf;
205 hdr.msg_controllen = sizeof(cmsg_buf);
206
207 if (recvmsg(fd, &hdr, 0) < 0)
7bd4224d 208 error(1, errno, "recvmsg");
297a3f12
SF
209
210 for (cmsg = CMSG_FIRSTHDR(&hdr); cmsg != NULL;
211 cmsg = CMSG_NXTHDR(&hdr, cmsg)) {
212
213 if (cmsg->cmsg_level != SOL_SOCKET)
214 continue;
215
216 switch (cmsg->cmsg_type) {
217 case SCM_TIMESTAMPING:
218 ts = (struct scm_timestamping *)CMSG_DATA(cmsg);
219 if (ts->ts[2].tv_sec || ts->ts[2].tv_nsec) {
220 printf("found skb hwtstamp = %lu.%lu\n",
221 ts->ts[2].tv_sec, ts->ts[2].tv_nsec);
222 return;
223 }
224 break;
225 default:
226 break;
227 }
228 }
229
230 printf("skb hwtstamp is not found!\n");
231}
232
bb323478 233static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t clock_id)
297a3f12
SF
234{
235 const struct xdp_desc *rx_desc;
236 struct pollfd fds[rxq + 1];
237 __u64 comp_addr;
238 __u64 addr;
925a0157 239 __u32 idx = 0;
297a3f12
SF
240 int ret;
241 int i;
242
243 for (i = 0; i < rxq; i++) {
244 fds[i].fd = xsk_socket__fd(rx_xsk[i].socket);
245 fds[i].events = POLLIN;
246 fds[i].revents = 0;
247 }
248
249 fds[rxq].fd = server_fd;
250 fds[rxq].events = POLLIN;
251 fds[rxq].revents = 0;
252
253 while (true) {
254 errno = 0;
255 ret = poll(fds, rxq + 1, 1000);
e8163b98
JDB
256 printf("poll: %d (%d) skip=%llu fail=%llu redir=%llu\n",
257 ret, errno, bpf_obj->bss->pkts_skip,
258 bpf_obj->bss->pkts_fail, bpf_obj->bss->pkts_redir);
297a3f12
SF
259 if (ret < 0)
260 break;
261 if (ret == 0)
262 continue;
263
264 if (fds[rxq].revents)
265 verify_skb_metadata(server_fd);
266
267 for (i = 0; i < rxq; i++) {
bb6a8888
LZ
268 bool first_seg = true;
269 bool is_eop = true;
270
297a3f12
SF
271 if (fds[i].revents == 0)
272 continue;
273
274 struct xsk *xsk = &rx_xsk[i];
bb6a8888 275peek:
297a3f12
SF
276 ret = xsk_ring_cons__peek(&xsk->rx, 1, &idx);
277 printf("xsk_ring_cons__peek: %d\n", ret);
278 if (ret != 1)
279 continue;
280
281 rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx);
282 comp_addr = xsk_umem__extract_addr(rx_desc->addr);
283 addr = xsk_umem__add_offset_to_addr(rx_desc->addr);
bb6a8888
LZ
284 is_eop = !(rx_desc->options & XDP_PKT_CONTD);
285 printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx%s\n",
286 xsk, idx, rx_desc->addr, addr, comp_addr, is_eop ? " EoP" : "");
287 if (first_seg) {
288 verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr),
289 clock_id);
290 first_seg = false;
291 }
292
297a3f12
SF
293 xsk_ring_cons__release(&xsk->rx, 1);
294 refill_rx(xsk, comp_addr);
bb6a8888
LZ
295 if (!is_eop)
296 goto peek;
297a3f12
SF
297 }
298 }
299
300 return 0;
301}
302
303struct ethtool_channels {
304 __u32 cmd;
305 __u32 max_rx;
306 __u32 max_tx;
307 __u32 max_other;
308 __u32 max_combined;
309 __u32 rx_count;
310 __u32 tx_count;
311 __u32 other_count;
312 __u32 combined_count;
313};
314
315#define ETHTOOL_GCHANNELS 0x0000003c /* Get no of channels */
316
317static int rxq_num(const char *ifname)
318{
319 struct ethtool_channels ch = {
320 .cmd = ETHTOOL_GCHANNELS,
321 };
322
323 struct ifreq ifr = {
324 .ifr_data = (void *)&ch,
325 };
e8a3c8bd 326 strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1);
297a3f12
SF
327 int fd, ret;
328
329 fd = socket(AF_UNIX, SOCK_DGRAM, 0);
330 if (fd < 0)
7bd4224d 331 error(1, errno, "socket");
297a3f12
SF
332
333 ret = ioctl(fd, SIOCETHTOOL, &ifr);
334 if (ret < 0)
7bd4224d 335 error(1, errno, "ioctl(SIOCETHTOOL)");
297a3f12
SF
336
337 close(fd);
338
339 return ch.rx_count + ch.combined_count;
340}
341
a5f3a3f7
SF
342static void hwtstamp_ioctl(int op, const char *ifname, struct hwtstamp_config *cfg)
343{
344 struct ifreq ifr = {
345 .ifr_data = (void *)cfg,
346 };
e8a3c8bd 347 strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1);
a5f3a3f7
SF
348 int fd, ret;
349
350 fd = socket(AF_UNIX, SOCK_DGRAM, 0);
351 if (fd < 0)
7bd4224d 352 error(1, errno, "socket");
a5f3a3f7
SF
353
354 ret = ioctl(fd, op, &ifr);
355 if (ret < 0)
7bd4224d 356 error(1, errno, "ioctl(%d)", op);
a5f3a3f7
SF
357
358 close(fd);
359}
360
361static struct hwtstamp_config saved_hwtstamp_cfg;
362static const char *saved_hwtstamp_ifname;
363
364static void hwtstamp_restore(void)
365{
366 hwtstamp_ioctl(SIOCSHWTSTAMP, saved_hwtstamp_ifname, &saved_hwtstamp_cfg);
367}
368
369static void hwtstamp_enable(const char *ifname)
370{
371 struct hwtstamp_config cfg = {
372 .rx_filter = HWTSTAMP_FILTER_ALL,
373 };
374
375 hwtstamp_ioctl(SIOCGHWTSTAMP, ifname, &saved_hwtstamp_cfg);
376 saved_hwtstamp_ifname = strdup(ifname);
377 atexit(hwtstamp_restore);
378
379 hwtstamp_ioctl(SIOCSHWTSTAMP, ifname, &cfg);
380}
381
297a3f12
SF
382static void cleanup(void)
383{
384 LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
385 int ret;
386 int i;
387
388 if (bpf_obj) {
389 opts.old_prog_fd = bpf_program__fd(bpf_obj->progs.rx);
390 if (opts.old_prog_fd >= 0) {
391 printf("detaching bpf program....\n");
392 ret = bpf_xdp_detach(ifindex, XDP_FLAGS, &opts);
393 if (ret)
394 printf("failed to detach XDP program: %d\n", ret);
395 }
396 }
397
398 for (i = 0; i < rxq; i++)
399 close_xsk(&rx_xsk[i]);
400
401 if (bpf_obj)
402 xdp_hw_metadata__destroy(bpf_obj);
403}
404
405static void handle_signal(int sig)
406{
407 /* interrupting poll() is all we need */
408}
409
410static void timestamping_enable(int fd, int val)
411{
412 int ret;
413
414 ret = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val));
415 if (ret < 0)
7bd4224d 416 error(1, errno, "setsockopt(SO_TIMESTAMPING)");
297a3f12
SF
417}
418
bb6a8888
LZ
419static void print_usage(void)
420{
421 const char *usage =
422 "Usage: xdp_hw_metadata [OPTIONS] [IFNAME]\n"
423 " -m Enable multi-buffer XDP for larger MTU\n"
424 " -h Display this help and exit\n\n"
425 "Generate test packets on the other machine with:\n"
426 " echo -n xdp | nc -u -q1 <dst_ip> 9091\n";
427
428 printf("%s", usage);
429}
430
431static void read_args(int argc, char *argv[])
432{
d84b139f 433 int opt;
bb6a8888
LZ
434
435 while ((opt = getopt(argc, argv, "mh")) != -1) {
436 switch (opt) {
437 case 'm':
438 bind_flags |= XDP_USE_SG;
439 break;
440 case 'h':
441 print_usage();
442 exit(0);
443 case '?':
444 if (isprint(optopt))
445 fprintf(stderr, "Unknown option: -%c\n", optopt);
446 fallthrough;
447 default:
448 print_usage();
449 error(-1, opterr, "Command line options error");
450 }
451 }
452
453 if (optind >= argc) {
454 fprintf(stderr, "No device name provided\n");
455 print_usage();
456 exit(-1);
457 }
458
459 ifname = argv[optind];
460 ifindex = if_nametoindex(ifname);
461
462 if (!ifname)
463 error(-1, errno, "Invalid interface name");
464}
465
297a3f12
SF
466int main(int argc, char *argv[])
467{
bb323478 468 clockid_t clock_id = CLOCK_TAI;
297a3f12
SF
469 int server_fd = -1;
470 int ret;
471 int i;
472
473 struct bpf_program *prog;
474
bb6a8888 475 read_args(argc, argv);
297a3f12 476
297a3f12
SF
477 rxq = rxq_num(ifname);
478
479 printf("rxq: %d\n", rxq);
480
a5f3a3f7
SF
481 hwtstamp_enable(ifname);
482
297a3f12
SF
483 rx_xsk = malloc(sizeof(struct xsk) * rxq);
484 if (!rx_xsk)
7bd4224d 485 error(1, ENOMEM, "malloc");
297a3f12
SF
486
487 for (i = 0; i < rxq; i++) {
488 printf("open_xsk(%s, %p, %d)\n", ifname, &rx_xsk[i], i);
489 ret = open_xsk(ifindex, &rx_xsk[i], i);
490 if (ret)
7bd4224d 491 error(1, -ret, "open_xsk");
297a3f12
SF
492
493 printf("xsk_socket__fd() -> %d\n", xsk_socket__fd(rx_xsk[i].socket));
494 }
495
496 printf("open bpf program...\n");
497 bpf_obj = xdp_hw_metadata__open();
498 if (libbpf_get_error(bpf_obj))
7bd4224d 499 error(1, libbpf_get_error(bpf_obj), "xdp_hw_metadata__open");
297a3f12
SF
500
501 prog = bpf_object__find_program_by_name(bpf_obj->obj, "rx");
502 bpf_program__set_ifindex(prog, ifindex);
503 bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
504
505 printf("load bpf program...\n");
506 ret = xdp_hw_metadata__load(bpf_obj);
507 if (ret)
7bd4224d 508 error(1, -ret, "xdp_hw_metadata__load");
297a3f12
SF
509
510 printf("prepare skb endpoint...\n");
511 server_fd = start_server(AF_INET6, SOCK_DGRAM, NULL, 9092, 1000);
512 if (server_fd < 0)
7bd4224d 513 error(1, errno, "start_server");
297a3f12
SF
514 timestamping_enable(server_fd,
515 SOF_TIMESTAMPING_SOFTWARE |
516 SOF_TIMESTAMPING_RAW_HARDWARE);
517
518 printf("prepare xsk map...\n");
519 for (i = 0; i < rxq; i++) {
520 int sock_fd = xsk_socket__fd(rx_xsk[i].socket);
521 __u32 queue_id = i;
522
523 printf("map[%d] = %d\n", queue_id, sock_fd);
524 ret = bpf_map_update_elem(bpf_map__fd(bpf_obj->maps.xsk), &queue_id, &sock_fd, 0);
525 if (ret)
7bd4224d 526 error(1, -ret, "bpf_map_update_elem");
297a3f12
SF
527 }
528
529 printf("attach bpf program...\n");
530 ret = bpf_xdp_attach(ifindex,
531 bpf_program__fd(bpf_obj->progs.rx),
532 XDP_FLAGS, NULL);
533 if (ret)
7bd4224d 534 error(1, -ret, "bpf_xdp_attach");
297a3f12
SF
535
536 signal(SIGINT, handle_signal);
bb323478 537 ret = verify_metadata(rx_xsk, rxq, server_fd, clock_id);
297a3f12
SF
538 close(server_fd);
539 cleanup();
540 if (ret)
7bd4224d 541 error(1, -ret, "verify_metadata");
297a3f12 542}