1 /* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
7 * This program is distributed in the hope that it will be useful, but
8 * WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 * General Public License for more details.
14 #include <sys/socket.h>
15 #include <sys/ioctl.h>
16 #include <sys/select.h>
17 #include <netinet/in.h>
18 #include <arpa/inet.h>
22 #include <sys/ioctl.h>
30 #include <sys/resource.h>
31 #include <sys/types.h>
32 #include <sys/sendfile.h>
34 #include <linux/netlink.h>
35 #include <linux/socket.h>
36 #include <linux/sock_diag.h>
37 #include <linux/bpf.h>
38 #include <linux/if_link.h>
44 #include "../bpf/bpf_load.h"
45 #include "../bpf/bpf_util.h"
46 #include "../bpf/libbpf.h"
49 void running_handler(int a);
51 /* randomly selected ports for testing on lo */
56 int s1, s2, c1, c2, p1, p2;
61 int txmsg_redir_noisy;
70 static const struct option long_options[] = {
71 {"help", no_argument, NULL, 'h' },
72 {"cgroup", required_argument, NULL, 'c' },
73 {"rate", required_argument, NULL, 'r' },
74 {"verbose", no_argument, NULL, 'v' },
75 {"iov_count", required_argument, NULL, 'i' },
76 {"length", required_argument, NULL, 'l' },
77 {"test", required_argument, NULL, 't' },
78 {"data_test", no_argument, NULL, 'd' },
79 {"txmsg", no_argument, &txmsg_pass, 1 },
80 {"txmsg_noisy", no_argument, &txmsg_noisy, 1 },
81 {"txmsg_redir", no_argument, &txmsg_redir, 1 },
82 {"txmsg_redir_noisy", no_argument, &txmsg_redir_noisy, 1},
83 {"txmsg_drop", no_argument, &txmsg_drop, 1 },
84 {"txmsg_apply", required_argument, NULL, 'a'},
85 {"txmsg_cork", required_argument, NULL, 'k'},
86 {"txmsg_start", required_argument, NULL, 's'},
87 {"txmsg_end", required_argument, NULL, 'e'},
88 {"txmsg_ingress", no_argument, &txmsg_ingress, 1 },
89 {"txmsg_skb", no_argument, &txmsg_skb, 1 },
93 static void usage(char *argv[])
97 printf(" Usage: %s --cgroup <cgroup_path>\n", argv[0]);
98 printf(" options:\n");
99 for (i = 0; long_options[i].name != 0; i++) {
100 printf(" --%-12s", long_options[i].name);
101 if (long_options[i].flag != NULL)
102 printf(" flag (internal value:%d)\n",
103 *long_options[i].flag);
105 printf(" -%c\n", long_options[i].val);
110 static int sockmap_init_sockets(void)
113 struct sockaddr_in addr;
114 int *fds[4] = {&s1, &s2, &c1, &c2};
116 s1 = s2 = p1 = p2 = c1 = c2 = 0;
119 for (i = 0; i < 4; i++) {
120 *fds[i] = socket(AF_INET, SOCK_STREAM, 0);
122 perror("socket s1 failed()");
128 for (i = 0; i < 2; i++) {
129 err = setsockopt(*fds[i], SOL_SOCKET, SO_REUSEADDR,
130 (char *)&one, sizeof(one));
132 perror("setsockopt failed()");
137 /* Non-blocking sockets */
138 for (i = 0; i < 2; i++) {
139 err = ioctl(*fds[i], FIONBIO, (char *)&one);
141 perror("ioctl s1 failed()");
146 /* Bind server sockets */
147 memset(&addr, 0, sizeof(struct sockaddr_in));
148 addr.sin_family = AF_INET;
149 addr.sin_addr.s_addr = inet_addr("127.0.0.1");
151 addr.sin_port = htons(S1_PORT);
152 err = bind(s1, (struct sockaddr *)&addr, sizeof(addr));
154 perror("bind s1 failed()\n");
158 addr.sin_port = htons(S2_PORT);
159 err = bind(s2, (struct sockaddr *)&addr, sizeof(addr));
161 perror("bind s2 failed()\n");
165 /* Listen server sockets */
166 addr.sin_port = htons(S1_PORT);
167 err = listen(s1, 32);
169 perror("listen s1 failed()\n");
173 addr.sin_port = htons(S2_PORT);
174 err = listen(s2, 32);
176 perror("listen s1 failed()\n");
180 /* Initiate Connect */
181 addr.sin_port = htons(S1_PORT);
182 err = connect(c1, (struct sockaddr *)&addr, sizeof(addr));
183 if (err < 0 && errno != EINPROGRESS) {
184 perror("connect c1 failed()\n");
188 addr.sin_port = htons(S2_PORT);
189 err = connect(c2, (struct sockaddr *)&addr, sizeof(addr));
190 if (err < 0 && errno != EINPROGRESS) {
191 perror("connect c2 failed()\n");
193 } else if (err < 0) {
197 /* Accept Connecrtions */
198 p1 = accept(s1, NULL, NULL);
200 perror("accept s1 failed()\n");
204 p2 = accept(s2, NULL, NULL);
206 perror("accept s1 failed()\n");
210 printf("connected sockets: c1 <-> p1, c2 <-> p2\n");
211 printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n",
219 struct timespec start;
223 struct sockmap_options {
231 static int msg_loop_sendpage(int fd, int iov_length, int cnt,
233 struct sockmap_options *opt)
235 bool drop = opt->drop_expected;
240 file = fopen(".sendpage_tst.tmp", "w+");
241 for (i = 0; i < iov_length * cnt; i++, k++)
242 fwrite(&k, sizeof(char), 1, file);
244 fseek(file, 0, SEEK_SET);
247 fp = open(".sendpage_tst.tmp", O_RDONLY);
248 clock_gettime(CLOCK_MONOTONIC, &s->start);
249 for (i = 0; i < cnt; i++) {
250 int sent = sendfile(fd, fp, NULL, iov_length);
252 if (!drop && sent < 0) {
253 perror("send loop error:");
256 } else if (drop && sent >= 0) {
257 printf("sendpage loop error expected: %i\n", sent);
263 s->bytes_sent += sent;
265 clock_gettime(CLOCK_MONOTONIC, &s->end);
270 static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
271 struct msg_stats *s, bool tx,
272 struct sockmap_options *opt)
274 struct msghdr msg = {0};
275 int err, i, flags = MSG_NOSIGNAL;
278 bool data_test = opt->data_test;
279 bool drop = opt->drop_expected;
281 iov = calloc(iov_count, sizeof(struct iovec));
286 for (i = 0; i < iov_count; i++) {
287 unsigned char *d = calloc(iov_length, sizeof(char));
290 fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count);
294 iov[i].iov_len = iov_length;
296 if (data_test && tx) {
299 for (j = 0; j < iov_length; j++)
305 msg.msg_iovlen = iov_count;
309 clock_gettime(CLOCK_MONOTONIC, &s->start);
310 for (i = 0; i < cnt; i++) {
311 int sent = sendmsg(fd, &msg, flags);
313 if (!drop && sent < 0) {
314 perror("send loop error:");
316 } else if (drop && sent >= 0) {
317 printf("send loop error expected: %i\n", sent);
322 s->bytes_sent += sent;
324 clock_gettime(CLOCK_MONOTONIC, &s->end);
326 int slct, recv, max_fd = fd;
327 struct timeval timeout;
331 total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
332 err = clock_gettime(CLOCK_MONOTONIC, &s->start);
334 perror("recv start time: ");
335 while (s->bytes_recvd < total_bytes) {
343 slct = select(max_fd + 1, &w, NULL, NULL, &timeout);
346 clock_gettime(CLOCK_MONOTONIC, &s->end);
349 fprintf(stderr, "unexpected timeout\n");
351 clock_gettime(CLOCK_MONOTONIC, &s->end);
355 recv = recvmsg(fd, &msg, flags);
357 if (errno != EWOULDBLOCK) {
358 clock_gettime(CLOCK_MONOTONIC, &s->end);
359 perror("recv failed()\n");
364 s->bytes_recvd += recv;
369 for (i = 0; i < msg.msg_iovlen; i++) {
370 unsigned char *d = iov[i].iov_base;
373 j < iov[i].iov_len && recv; j++) {
377 "detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
378 i, j, d[j], k - 1, d[j+1], k + 1);
386 clock_gettime(CLOCK_MONOTONIC, &s->end);
389 for (i = 0; i < iov_count; i++)
390 free(iov[i].iov_base);
394 for (i = 0; i < iov_count; i++)
395 free(iov[i].iov_base);
400 static float giga = 1000000000;
402 static inline float sentBps(struct msg_stats s)
404 return s.bytes_sent / (s.end.tv_sec - s.start.tv_sec);
407 static inline float recvdBps(struct msg_stats s)
409 return s.bytes_recvd / (s.end.tv_sec - s.start.tv_sec);
412 static int sendmsg_test(int iov_count, int iov_buf, int cnt,
413 struct sockmap_options *opt)
415 float sent_Bps = 0, recvd_Bps = 0;
416 int rx_fd, txpid, rxpid, err = 0;
417 struct msg_stats s = {0};
429 if (opt->drop_expected)
434 err = msg_loop(rx_fd, iov_count, iov_buf,
435 cnt, &s, false, opt);
438 "msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
439 iov_count, iov_buf, cnt, err);
440 shutdown(p2, SHUT_RDWR);
441 shutdown(p1, SHUT_RDWR);
442 if (s.end.tv_sec - s.start.tv_sec) {
443 sent_Bps = sentBps(s);
444 recvd_Bps = recvdBps(s);
447 "rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s\n",
448 s.bytes_sent, sent_Bps, sent_Bps/giga,
449 s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
451 } else if (rxpid == -1) {
452 perror("msg_loop_rx: ");
459 err = msg_loop_sendpage(c1, iov_buf, cnt, &s, opt);
461 err = msg_loop(c1, iov_count, iov_buf,
466 "msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n",
467 iov_count, iov_buf, cnt, err);
468 shutdown(c1, SHUT_RDWR);
469 if (s.end.tv_sec - s.start.tv_sec) {
470 sent_Bps = sentBps(s);
471 recvd_Bps = recvdBps(s);
474 "tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n",
475 s.bytes_sent, sent_Bps, sent_Bps/giga,
476 s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
478 } else if (txpid == -1) {
479 perror("msg_loop_tx: ");
483 assert(waitpid(rxpid, &status, 0) == rxpid);
484 assert(waitpid(txpid, &status, 0) == txpid);
488 static int forever_ping_pong(int rate, struct sockmap_options *opt)
490 struct timeval timeout;
491 char buf[1024] = {0};
497 /* Ping/Pong data from client to server */
498 sc = send(c1, buf, sizeof(buf), 0);
500 perror("send failed()\n");
505 int s, rc, i, max_fd = p2;
515 s = select(max_fd + 1, &w, NULL, NULL, &timeout);
520 fprintf(stderr, "unexpected timeout\n");
524 for (i = 0; i <= max_fd && s > 0; ++i) {
525 if (!FD_ISSET(i, &w))
530 rc = recv(i, buf, sizeof(buf), 0);
532 if (errno != EWOULDBLOCK) {
533 perror("recv failed()\n");
543 sc = send(i, buf, rc, 0);
545 perror("send failed()\n");
571 int main(int argc, char **argv)
573 int iov_count = 1, length = 1024, rate = 1, tx_prog_fd;
574 struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
575 int opt, longindex, err, cg_fd = 0;
576 struct sockmap_options options = {0};
577 int test = PING_PONG;
580 while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:",
581 long_options, &longindex)) != -1) {
584 txmsg_start = atoi(optarg);
587 txmsg_end = atoi(optarg);
590 txmsg_apply = atoi(optarg);
593 txmsg_cork = atoi(optarg);
596 cg_fd = open(optarg, O_DIRECTORY, O_RDONLY);
599 "ERROR: (%i) open cg path failed: %s\n",
611 iov_count = atoi(optarg);
614 length = atoi(optarg);
617 options.data_test = true;
620 if (strcmp(optarg, "ping") == 0) {
622 } else if (strcmp(optarg, "sendmsg") == 0) {
624 } else if (strcmp(optarg, "base") == 0) {
626 } else if (strcmp(optarg, "base_sendpage") == 0) {
627 test = BASE_SENDPAGE;
628 } else if (strcmp(optarg, "sendpage") == 0) {
645 fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n",
650 if (setrlimit(RLIMIT_MEMLOCK, &r)) {
651 perror("setrlimit(RLIMIT_MEMLOCK)");
655 snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
660 signal(SIGINT, running_handler);
662 if (load_bpf_file(filename)) {
663 fprintf(stderr, "load_bpf_file: (%s) %s\n",
664 filename, strerror(errno));
668 /* If base test skip BPF setup */
669 if (test == BASE || test == BASE_SENDPAGE)
672 /* Attach programs to sockmap */
673 err = bpf_prog_attach(prog_fd[0], map_fd[0],
674 BPF_SK_SKB_STREAM_PARSER, 0);
676 fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n",
677 err, strerror(errno));
681 err = bpf_prog_attach(prog_fd[1], map_fd[0],
682 BPF_SK_SKB_STREAM_VERDICT, 0);
684 fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n",
685 err, strerror(errno));
689 /* Attach to cgroups */
690 err = bpf_prog_attach(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
692 fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n",
693 err, strerror(errno));
698 err = sockmap_init_sockets();
700 fprintf(stderr, "ERROR: test socket failed: %d\n", err);
704 /* Attach txmsg program to sockmap */
706 tx_prog_fd = prog_fd[3];
707 else if (txmsg_noisy)
708 tx_prog_fd = prog_fd[4];
709 else if (txmsg_redir)
710 tx_prog_fd = prog_fd[5];
711 else if (txmsg_redir_noisy)
712 tx_prog_fd = prog_fd[6];
714 tx_prog_fd = prog_fd[9];
715 /* apply and cork must be last */
716 else if (txmsg_apply)
717 tx_prog_fd = prog_fd[7];
719 tx_prog_fd = prog_fd[8];
726 err = bpf_prog_attach(tx_prog_fd,
727 map_fd[1], BPF_SK_MSG_VERDICT, 0);
730 "ERROR: bpf_prog_attach (txmsg): %d (%s)\n",
731 err, strerror(errno));
735 err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY);
738 "ERROR: bpf_map_update_elem (txmsg): %d (%s\n",
739 err, strerror(errno));
743 if (txmsg_redir || txmsg_redir_noisy)
748 err = bpf_map_update_elem(map_fd[2], &i, &redir_fd, BPF_ANY);
751 "ERROR: bpf_map_update_elem (txmsg): %d (%s\n",
752 err, strerror(errno));
757 err = bpf_map_update_elem(map_fd[3],
758 &i, &txmsg_apply, BPF_ANY);
761 "ERROR: bpf_map_update_elem (apply_bytes): %d (%s\n",
762 err, strerror(errno));
768 err = bpf_map_update_elem(map_fd[4],
769 &i, &txmsg_cork, BPF_ANY);
772 "ERROR: bpf_map_update_elem (cork_bytes): %d (%s\n",
773 err, strerror(errno));
779 err = bpf_map_update_elem(map_fd[5],
780 &i, &txmsg_start, BPF_ANY);
783 "ERROR: bpf_map_update_elem (txmsg_start): %d (%s)\n",
784 err, strerror(errno));
791 err = bpf_map_update_elem(map_fd[5],
792 &i, &txmsg_end, BPF_ANY);
795 "ERROR: bpf_map_update_elem (txmsg_end): %d (%s)\n",
796 err, strerror(errno));
802 int in = BPF_F_INGRESS;
805 err = bpf_map_update_elem(map_fd[6], &i, &in, BPF_ANY);
808 "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
809 err, strerror(errno));
812 err = bpf_map_update_elem(map_fd[1], &i, &p1, BPF_ANY);
815 "ERROR: bpf_map_update_elem (p1 txmsg): %d (%s)\n",
816 err, strerror(errno));
818 err = bpf_map_update_elem(map_fd[2], &i, &p1, BPF_ANY);
821 "ERROR: bpf_map_update_elem (p1 redir): %d (%s)\n",
822 err, strerror(errno));
826 err = bpf_map_update_elem(map_fd[2], &i, &p2, BPF_ANY);
829 "ERROR: bpf_map_update_elem (p2 txmsg): %d (%s)\n",
830 err, strerror(errno));
835 int skb_fd = (test == SENDMSG || test == SENDPAGE) ? p2 : p1;
836 int ingress = BPF_F_INGRESS;
839 err = bpf_map_update_elem(map_fd[7], &i, &ingress, BPF_ANY);
842 "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
843 err, strerror(errno));
847 err = bpf_map_update_elem(map_fd[0], &i, &skb_fd, BPF_ANY);
850 "ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
851 err, strerror(errno));
857 options.drop_expected = true;
859 if (test == PING_PONG)
860 err = forever_ping_pong(rate, &options);
861 else if (test == SENDMSG) {
862 options.base = false;
863 options.sendpage = false;
864 err = sendmsg_test(iov_count, length, rate, &options);
865 } else if (test == SENDPAGE) {
866 options.base = false;
867 options.sendpage = true;
868 err = sendmsg_test(iov_count, length, rate, &options);
869 } else if (test == BASE) {
871 options.sendpage = false;
872 err = sendmsg_test(iov_count, length, rate, &options);
873 } else if (test == BASE_SENDPAGE) {
875 options.sendpage = true;
876 err = sendmsg_test(iov_count, length, rate, &options);
878 fprintf(stderr, "unknown test\n");
880 bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS);
891 void running_handler(int a)