+++ /dev/null
-# List of programs to build
-hostprogs-y := sockmap
-
-# Libbpf dependencies
-LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
-
-HOSTCFLAGS += -I$(objtree)/usr/include
-HOSTCFLAGS += -I$(srctree)/tools/lib/
-HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
-HOSTCFLAGS += -I$(srctree)/tools/lib/ -I$(srctree)/tools/include
-HOSTCFLAGS += -I$(srctree)/tools/perf
-
-sockmap-objs := ../bpf/bpf_load.o $(LIBBPF) sockmap_user.o
-
-# Tell kbuild to always build the programs
-always := $(hostprogs-y)
-always += sockmap_kern.o
-
-HOSTLOADLIBES_sockmap += -lelf -lpthread
-
-# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
-# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
-LLC ?= llc
-CLANG ?= clang
-
-# Trick to allow make to be run from this directory
-all:
- $(MAKE) -C ../../ $(CURDIR)/
-
-clean:
- $(MAKE) -C ../../ M=$(CURDIR) clean
- @rm -f *~
-
-$(obj)/syscall_nrs.s: $(src)/syscall_nrs.c
- $(call if_changed_dep,cc_s_c)
-
-$(obj)/syscall_nrs.h: $(obj)/syscall_nrs.s FORCE
- $(call filechk,offsets,__SYSCALL_NRS_H__)
-
-clean-files += syscall_nrs.h
-
-FORCE:
-
-
-# Verify LLVM compiler tools are available and bpf target is supported by llc
-.PHONY: verify_cmds verify_target_bpf $(CLANG) $(LLC)
-
-verify_cmds: $(CLANG) $(LLC)
- @for TOOL in $^ ; do \
- if ! (which -- "$${TOOL}" > /dev/null 2>&1); then \
- echo "*** ERROR: Cannot find LLVM tool $${TOOL}" ;\
- exit 1; \
- else true; fi; \
- done
-
-verify_target_bpf: verify_cmds
- @if ! (${LLC} -march=bpf -mattr=help > /dev/null 2>&1); then \
- echo "*** ERROR: LLVM (${LLC}) does not support 'bpf' target" ;\
- echo " NOTICE: LLVM version >= 3.7.1 required" ;\
- exit 2; \
- else true; fi
-
-$(src)/*.c: verify_target_bpf
-
-# asm/sysreg.h - inline assembly used by it is incompatible with llvm.
-# But, there is no easy way to fix it, so just exclude it since it is
-# useless for BPF samples.
-$(obj)/%.o: $(src)/%.c
- $(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \
- -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \
- -Wno-compare-distinct-pointer-types \
- -Wno-gnu-variable-sized-type-not-at-end \
- -Wno-address-of-packed-member -Wno-tautological-compare \
- -Wno-unknown-warning-option \
- -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
+++ /dev/null
-/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
-#include <uapi/linux/bpf.h>
-#include <uapi/linux/if_ether.h>
-#include <uapi/linux/if_packet.h>
-#include <uapi/linux/ip.h>
-#include "../../tools/testing/selftests/bpf/bpf_helpers.h"
-#include "../../tools/testing/selftests/bpf/bpf_endian.h"
-
-/* Sockmap sample program connects a client and a backend together
- * using cgroups.
- *
- * client:X <---> frontend:80 client:X <---> backend:80
- *
- * For simplicity we hard code values here and bind 1:1. The hard
- * coded values are part of the setup in sockmap.sh script that
- * is associated with this BPF program.
- *
- * The bpf_printk is verbose and prints information as connections
- * are established and verdicts are decided.
- */
-
-#define bpf_printk(fmt, ...) \
-({ \
- char ____fmt[] = fmt; \
- bpf_trace_printk(____fmt, sizeof(____fmt), \
- ##__VA_ARGS__); \
-})
-
-struct bpf_map_def SEC("maps") sock_map = {
- .type = BPF_MAP_TYPE_SOCKMAP,
- .key_size = sizeof(int),
- .value_size = sizeof(int),
- .max_entries = 20,
-};
-
-struct bpf_map_def SEC("maps") sock_map_txmsg = {
- .type = BPF_MAP_TYPE_SOCKMAP,
- .key_size = sizeof(int),
- .value_size = sizeof(int),
- .max_entries = 20,
-};
-
-struct bpf_map_def SEC("maps") sock_map_redir = {
- .type = BPF_MAP_TYPE_SOCKMAP,
- .key_size = sizeof(int),
- .value_size = sizeof(int),
- .max_entries = 20,
-};
-
-struct bpf_map_def SEC("maps") sock_apply_bytes = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(int),
- .max_entries = 1
-};
-
-struct bpf_map_def SEC("maps") sock_cork_bytes = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(int),
- .max_entries = 1
-};
-
-struct bpf_map_def SEC("maps") sock_pull_bytes = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(int),
- .max_entries = 2
-};
-
-struct bpf_map_def SEC("maps") sock_redir_flags = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(int),
- .max_entries = 1
-};
-
-struct bpf_map_def SEC("maps") sock_skb_opts = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(int),
- .max_entries = 1
-};
-
-SEC("sk_skb1")
-int bpf_prog1(struct __sk_buff *skb)
-{
- return skb->len;
-}
-
-SEC("sk_skb2")
-int bpf_prog2(struct __sk_buff *skb)
-{
- __u32 lport = skb->local_port;
- __u32 rport = skb->remote_port;
- int len, *f, ret, zero = 0;
- __u64 flags = 0;
-
- if (lport == 10000)
- ret = 10;
- else
- ret = 1;
-
- len = (__u32)skb->data_end - (__u32)skb->data;
- f = bpf_map_lookup_elem(&sock_skb_opts, &zero);
- if (f && *f) {
- ret = 3;
- flags = *f;
- }
-
- bpf_printk("sk_skb2: redirect(%iB) flags=%i\n",
- len, flags);
- return bpf_sk_redirect_map(skb, &sock_map, ret, flags);
-}
-
-SEC("sockops")
-int bpf_sockmap(struct bpf_sock_ops *skops)
-{
- __u32 lport, rport;
- int op, err = 0, index, key, ret;
-
-
- op = (int) skops->op;
-
- switch (op) {
- case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
- lport = skops->local_port;
- rport = skops->remote_port;
-
- if (lport == 10000) {
- ret = 1;
- err = bpf_sock_map_update(skops, &sock_map, &ret,
- BPF_NOEXIST);
- bpf_printk("passive(%i -> %i) map ctx update err: %d\n",
- lport, bpf_ntohl(rport), err);
- }
- break;
- case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
- lport = skops->local_port;
- rport = skops->remote_port;
-
- if (bpf_ntohl(rport) == 10001) {
- ret = 10;
- err = bpf_sock_map_update(skops, &sock_map, &ret,
- BPF_NOEXIST);
- bpf_printk("active(%i -> %i) map ctx update err: %d\n",
- lport, bpf_ntohl(rport), err);
- }
- break;
- default:
- break;
- }
-
- return 0;
-}
-
-SEC("sk_msg1")
-int bpf_prog4(struct sk_msg_md *msg)
-{
- int *bytes, zero = 0, one = 1;
- int *start, *end;
-
- bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
- if (bytes)
- bpf_msg_apply_bytes(msg, *bytes);
- bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
- if (bytes)
- bpf_msg_cork_bytes(msg, *bytes);
- start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
- end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
- if (start && end)
- bpf_msg_pull_data(msg, *start, *end, 0);
- return SK_PASS;
-}
-
-SEC("sk_msg2")
-int bpf_prog5(struct sk_msg_md *msg)
-{
- int err1 = -1, err2 = -1, zero = 0, one = 1;
- int *bytes, *start, *end, len1, len2;
-
- bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
- if (bytes)
- err1 = bpf_msg_apply_bytes(msg, *bytes);
- bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
- if (bytes)
- err2 = bpf_msg_cork_bytes(msg, *bytes);
- len1 = (__u64)msg->data_end - (__u64)msg->data;
- start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
- end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
- if (start && end) {
- int err;
-
- bpf_printk("sk_msg2: pull(%i:%i)\n",
- start ? *start : 0, end ? *end : 0);
- err = bpf_msg_pull_data(msg, *start, *end, 0);
- if (err)
- bpf_printk("sk_msg2: pull_data err %i\n",
- err);
- len2 = (__u64)msg->data_end - (__u64)msg->data;
- bpf_printk("sk_msg2: length update %i->%i\n",
- len1, len2);
- }
- bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n",
- len1, err1, err2);
- return SK_PASS;
-}
-
-SEC("sk_msg3")
-int bpf_prog6(struct sk_msg_md *msg)
-{
- int *bytes, zero = 0, one = 1, key = 0;
- int *start, *end, *f;
- __u64 flags = 0;
-
- bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
- if (bytes)
- bpf_msg_apply_bytes(msg, *bytes);
- bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
- if (bytes)
- bpf_msg_cork_bytes(msg, *bytes);
- start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
- end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
- if (start && end)
- bpf_msg_pull_data(msg, *start, *end, 0);
- f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
- if (f && *f) {
- key = 2;
- flags = *f;
- }
- return bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
-}
-
-SEC("sk_msg4")
-int bpf_prog7(struct sk_msg_md *msg)
-{
- int err1 = 0, err2 = 0, zero = 0, one = 1, key = 0;
- int *f, *bytes, *start, *end, len1, len2;
- __u64 flags = 0;
-
- int err;
- bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
- if (bytes)
- err1 = bpf_msg_apply_bytes(msg, *bytes);
- bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
- if (bytes)
- err2 = bpf_msg_cork_bytes(msg, *bytes);
- len1 = (__u64)msg->data_end - (__u64)msg->data;
- start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
- end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
- if (start && end) {
-
- bpf_printk("sk_msg2: pull(%i:%i)\n",
- start ? *start : 0, end ? *end : 0);
- err = bpf_msg_pull_data(msg, *start, *end, 0);
- if (err)
- bpf_printk("sk_msg2: pull_data err %i\n",
- err);
- len2 = (__u64)msg->data_end - (__u64)msg->data;
- bpf_printk("sk_msg2: length update %i->%i\n",
- len1, len2);
- }
- f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
- if (f && *f) {
- key = 2;
- flags = *f;
- }
- bpf_printk("sk_msg3: redirect(%iB) flags=%i err=%i\n",
- len1, flags, err1 ? err1 : err2);
- err = bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
- bpf_printk("sk_msg3: err %i\n", err);
- return err;
-}
-
-SEC("sk_msg5")
-int bpf_prog8(struct sk_msg_md *msg)
-{
- void *data_end = (void *)(long) msg->data_end;
- void *data = (void *)(long) msg->data;
- int ret = 0, *bytes, zero = 0;
-
- bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
- if (bytes) {
- ret = bpf_msg_apply_bytes(msg, *bytes);
- if (ret)
- return SK_DROP;
- } else {
- return SK_DROP;
- }
- return SK_PASS;
-}
-SEC("sk_msg6")
-int bpf_prog9(struct sk_msg_md *msg)
-{
- void *data_end = (void *)(long) msg->data_end;
- void *data = (void *)(long) msg->data;
- int ret = 0, *bytes, zero = 0;
-
- bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
- if (bytes) {
- if (((__u64)data_end - (__u64)data) >= *bytes)
- return SK_PASS;
- ret = bpf_msg_cork_bytes(msg, *bytes);
- if (ret)
- return SK_DROP;
- }
- return SK_PASS;
-}
-
-SEC("sk_msg7")
-int bpf_prog10(struct sk_msg_md *msg)
-{
- int *bytes, zero = 0, one = 1;
- int *start, *end;
-
- bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
- if (bytes)
- bpf_msg_apply_bytes(msg, *bytes);
- bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
- if (bytes)
- bpf_msg_cork_bytes(msg, *bytes);
- start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
- end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
- if (start && end)
- bpf_msg_pull_data(msg, *start, *end, 0);
-
- return SK_DROP;
-}
-
-
-char _license[] SEC("license") = "GPL";
+++ /dev/null
-#Test a bunch of positive cases to verify basic functionality
-for prog in "--txmsg_redir --txmsg_skb" "--txmsg_redir --txmsg_ingress" "--txmsg" "--txmsg_redir" "--txmsg_redir --txmsg_ingress" "--txmsg_drop"; do
-for t in "sendmsg" "sendpage"; do
-for r in 1 10 100; do
- for i in 1 10 100; do
- for l in 1 10 100; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
- done
- done
-done
-done
-done
-
-#Test max iov
-t="sendmsg"
-r=1
-i=1024
-l=1
-prog="--txmsg"
-
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-echo $TEST
-$TEST
-sleep 2
-prog="--txmsg_redir"
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-echo $TEST
-$TEST
-
-# Test max iov with 1k send
-
-t="sendmsg"
-r=1
-i=1024
-l=1024
-prog="--txmsg"
-
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-echo $TEST
-$TEST
-sleep 2
-prog="--txmsg_redir"
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-echo $TEST
-$TEST
-sleep 2
-
-# Test apply with 1B
-r=1
-i=1024
-l=1024
-prog="--txmsg_apply 1"
-
-for t in "sendmsg" "sendpage"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Test apply with larger value than send
-r=1
-i=8
-l=1024
-prog="--txmsg_apply 2048"
-
-for t in "sendmsg" "sendpage"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Test apply with apply that never reaches limit
-r=1024
-i=1
-l=1
-prog="--txmsg_apply 2048"
-
-for t in "sendmsg" "sendpage"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Test apply and redirect with 1B
-r=1
-i=1024
-l=1024
-prog="--txmsg_redir --txmsg_apply 1"
-
-for t in "sendmsg" "sendpage"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-prog="--txmsg_redir --txmsg_apply 1 --txmsg_ingress"
-
-for t in "sendmsg" "sendpage"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-prog="--txmsg_redir --txmsg_apply 1 --txmsg_skb"
-
-for t in "sendmsg" "sendpage"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-
-# Test apply and redirect with larger value than send
-r=1
-i=8
-l=1024
-prog="--txmsg_redir --txmsg_apply 2048"
-
-for t in "sendmsg" "sendpage"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-prog="--txmsg_redir --txmsg_apply 2048 --txmsg_ingress"
-
-for t in "sendmsg" "sendpage"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-prog="--txmsg_redir --txmsg_apply 2048 --txmsg_skb"
-
-for t in "sendmsg" "sendpage"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-
-# Test apply and redirect with apply that never reaches limit
-r=1024
-i=1
-l=1
-prog="--txmsg_apply 2048"
-
-for t in "sendmsg" "sendpage"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Test cork with 1B not really useful but test it anyways
-r=1
-i=1024
-l=1024
-prog="--txmsg_cork 1"
-
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Test cork with a more reasonable 100B
-r=1
-i=1000
-l=1000
-prog="--txmsg_cork 100"
-
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Test cork with larger value than send
-r=1
-i=8
-l=1024
-prog="--txmsg_cork 2048"
-
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Test cork with cork that never reaches limit
-r=1024
-i=1
-l=1
-prog="--txmsg_cork 2048"
-
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-r=1
-i=1024
-l=1024
-prog="--txmsg_redir --txmsg_cork 1"
-
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Test cork with a more reasonable 100B
-r=1
-i=1000
-l=1000
-prog="--txmsg_redir --txmsg_cork 100"
-
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Test cork with larger value than send
-r=1
-i=8
-l=1024
-prog="--txmsg_redir --txmsg_cork 2048"
-
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Test cork with cork that never reaches limit
-r=1024
-i=1
-l=1
-prog="--txmsg_cork 2048"
-
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-
-# mix and match cork and apply not really useful but valid programs
-
-# Test apply < cork
-r=100
-i=1
-l=5
-prog="--txmsg_apply 10 --txmsg_cork 100"
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Try again with larger sizes so we hit overflow case
-r=100
-i=1000
-l=2048
-prog="--txmsg_apply 4096 --txmsg_cork 8096"
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Test apply > cork
-r=100
-i=1
-l=5
-prog="--txmsg_apply 100 --txmsg_cork 10"
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Again with larger sizes so we hit overflow cases
-r=100
-i=1000
-l=2048
-prog="--txmsg_apply 8096 --txmsg_cork 4096"
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-
-# Test apply = cork
-r=100
-i=1
-l=5
-prog="--txmsg_apply 10 --txmsg_cork 10"
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-r=100
-i=1000
-l=2048
-prog="--txmsg_apply 4096 --txmsg_cork 4096"
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Test apply < cork
-r=100
-i=1
-l=5
-prog="--txmsg_redir --txmsg_apply 10 --txmsg_cork 100"
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Try again with larger sizes so we hit overflow case
-r=100
-i=1000
-l=2048
-prog="--txmsg_redir --txmsg_apply 4096 --txmsg_cork 8096"
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Test apply > cork
-r=100
-i=1
-l=5
-prog="--txmsg_redir --txmsg_apply 100 --txmsg_cork 10"
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Again with larger sizes so we hit overflow cases
-r=100
-i=1000
-l=2048
-prog="--txmsg_redir --txmsg_apply 8096 --txmsg_cork 4096"
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-
-# Test apply = cork
-r=100
-i=1
-l=5
-prog="--txmsg_redir --txmsg_apply 10 --txmsg_cork 10"
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-r=100
-i=1000
-l=2048
-prog="--txmsg_redir --txmsg_apply 4096 --txmsg_cork 4096"
-for t in "sendpage" "sendmsg"; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
- echo $TEST
- $TEST
- sleep 2
-done
-
-# Tests for bpf_msg_pull_data()
-for i in `seq 99 100 1600`; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
- --txmsg --txmsg_start 0 --txmsg_end $i --txmsg_cork 1600"
- echo $TEST
- $TEST
- sleep 2
-done
-
-for i in `seq 199 100 1600`; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
- --txmsg --txmsg_start 100 --txmsg_end $i --txmsg_cork 1600"
- echo $TEST
- $TEST
- sleep 2
-done
-
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
- --txmsg --txmsg_start 1500 --txmsg_end 1600 --txmsg_cork 1600"
-echo $TEST
-$TEST
-sleep 2
-
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
- --txmsg --txmsg_start 1111 --txmsg_end 1112 --txmsg_cork 1600"
-echo $TEST
-$TEST
-sleep 2
-
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
- --txmsg --txmsg_start 1111 --txmsg_end 0 --txmsg_cork 1600"
-echo $TEST
-$TEST
-sleep 2
-
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
- --txmsg --txmsg_start 0 --txmsg_end 1601 --txmsg_cork 1600"
-echo $TEST
-$TEST
-sleep 2
-
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
- --txmsg --txmsg_start 0 --txmsg_end 1601 --txmsg_cork 1602"
-echo $TEST
-$TEST
-sleep 2
-
-# Run through gamut again with start and end
-for prog in "--txmsg" "--txmsg_redir" "--txmsg_drop"; do
-for t in "sendmsg" "sendpage"; do
-for r in 1 10 100; do
- for i in 1 10 100; do
- for l in 1 10 100; do
- TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog --txmsg_start 1 --txmsg_end 2"
- echo $TEST
- $TEST
- sleep 2
- done
- done
-done
-done
-done
-
-# Some specific tests to cover specific code paths
-./sockmap --cgroup /mnt/cgroup2/ -t sendpage \
- -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 3
-./sockmap --cgroup /mnt/cgroup2/ -t sendmsg \
- -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 3
-./sockmap --cgroup /mnt/cgroup2/ -t sendpage \
- -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 5
-./sockmap --cgroup /mnt/cgroup2/ -t sendmsg \
- -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 5
+++ /dev/null
-/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-#include <sys/select.h>
-#include <netinet/in.h>
-#include <arpa/inet.h>
-#include <unistd.h>
-#include <string.h>
-#include <errno.h>
-#include <sys/ioctl.h>
-#include <stdbool.h>
-#include <signal.h>
-#include <fcntl.h>
-#include <sys/wait.h>
-#include <time.h>
-
-#include <sys/time.h>
-#include <sys/resource.h>
-#include <sys/types.h>
-#include <sys/sendfile.h>
-
-#include <linux/netlink.h>
-#include <linux/socket.h>
-#include <linux/sock_diag.h>
-#include <linux/bpf.h>
-#include <linux/if_link.h>
-#include <assert.h>
-#include <libgen.h>
-
-#include <getopt.h>
-
-#include "../bpf/bpf_load.h"
-#include "../bpf/bpf_util.h"
-#include "../bpf/libbpf.h"
-
-int running;
-void running_handler(int a);
-
-/* randomly selected ports for testing on lo */
-#define S1_PORT 10000
-#define S2_PORT 10001
-
-/* global sockets */
-int s1, s2, c1, c2, p1, p2;
-
-int txmsg_pass;
-int txmsg_noisy;
-int txmsg_redir;
-int txmsg_redir_noisy;
-int txmsg_drop;
-int txmsg_apply;
-int txmsg_cork;
-int txmsg_start;
-int txmsg_end;
-int txmsg_ingress;
-int txmsg_skb;
-
-static const struct option long_options[] = {
- {"help", no_argument, NULL, 'h' },
- {"cgroup", required_argument, NULL, 'c' },
- {"rate", required_argument, NULL, 'r' },
- {"verbose", no_argument, NULL, 'v' },
- {"iov_count", required_argument, NULL, 'i' },
- {"length", required_argument, NULL, 'l' },
- {"test", required_argument, NULL, 't' },
- {"data_test", no_argument, NULL, 'd' },
- {"txmsg", no_argument, &txmsg_pass, 1 },
- {"txmsg_noisy", no_argument, &txmsg_noisy, 1 },
- {"txmsg_redir", no_argument, &txmsg_redir, 1 },
- {"txmsg_redir_noisy", no_argument, &txmsg_redir_noisy, 1},
- {"txmsg_drop", no_argument, &txmsg_drop, 1 },
- {"txmsg_apply", required_argument, NULL, 'a'},
- {"txmsg_cork", required_argument, NULL, 'k'},
- {"txmsg_start", required_argument, NULL, 's'},
- {"txmsg_end", required_argument, NULL, 'e'},
- {"txmsg_ingress", no_argument, &txmsg_ingress, 1 },
- {"txmsg_skb", no_argument, &txmsg_skb, 1 },
- {0, 0, NULL, 0 }
-};
-
-static void usage(char *argv[])
-{
- int i;
-
- printf(" Usage: %s --cgroup <cgroup_path>\n", argv[0]);
- printf(" options:\n");
- for (i = 0; long_options[i].name != 0; i++) {
- printf(" --%-12s", long_options[i].name);
- if (long_options[i].flag != NULL)
- printf(" flag (internal value:%d)\n",
- *long_options[i].flag);
- else
- printf(" -%c\n", long_options[i].val);
- }
- printf("\n");
-}
-
-static int sockmap_init_sockets(void)
-{
- int i, err, one = 1;
- struct sockaddr_in addr;
- int *fds[4] = {&s1, &s2, &c1, &c2};
-
- s1 = s2 = p1 = p2 = c1 = c2 = 0;
-
- /* Init sockets */
- for (i = 0; i < 4; i++) {
- *fds[i] = socket(AF_INET, SOCK_STREAM, 0);
- if (*fds[i] < 0) {
- perror("socket s1 failed()");
- return errno;
- }
- }
-
- /* Allow reuse */
- for (i = 0; i < 2; i++) {
- err = setsockopt(*fds[i], SOL_SOCKET, SO_REUSEADDR,
- (char *)&one, sizeof(one));
- if (err) {
- perror("setsockopt failed()");
- return errno;
- }
- }
-
- /* Non-blocking sockets */
- for (i = 0; i < 2; i++) {
- err = ioctl(*fds[i], FIONBIO, (char *)&one);
- if (err < 0) {
- perror("ioctl s1 failed()");
- return errno;
- }
- }
-
- /* Bind server sockets */
- memset(&addr, 0, sizeof(struct sockaddr_in));
- addr.sin_family = AF_INET;
- addr.sin_addr.s_addr = inet_addr("127.0.0.1");
-
- addr.sin_port = htons(S1_PORT);
- err = bind(s1, (struct sockaddr *)&addr, sizeof(addr));
- if (err < 0) {
- perror("bind s1 failed()\n");
- return errno;
- }
-
- addr.sin_port = htons(S2_PORT);
- err = bind(s2, (struct sockaddr *)&addr, sizeof(addr));
- if (err < 0) {
- perror("bind s2 failed()\n");
- return errno;
- }
-
- /* Listen server sockets */
- addr.sin_port = htons(S1_PORT);
- err = listen(s1, 32);
- if (err < 0) {
- perror("listen s1 failed()\n");
- return errno;
- }
-
- addr.sin_port = htons(S2_PORT);
- err = listen(s2, 32);
- if (err < 0) {
- perror("listen s1 failed()\n");
- return errno;
- }
-
- /* Initiate Connect */
- addr.sin_port = htons(S1_PORT);
- err = connect(c1, (struct sockaddr *)&addr, sizeof(addr));
- if (err < 0 && errno != EINPROGRESS) {
- perror("connect c1 failed()\n");
- return errno;
- }
-
- addr.sin_port = htons(S2_PORT);
- err = connect(c2, (struct sockaddr *)&addr, sizeof(addr));
- if (err < 0 && errno != EINPROGRESS) {
- perror("connect c2 failed()\n");
- return errno;
- } else if (err < 0) {
- err = 0;
- }
-
- /* Accept Connecrtions */
- p1 = accept(s1, NULL, NULL);
- if (p1 < 0) {
- perror("accept s1 failed()\n");
- return errno;
- }
-
- p2 = accept(s2, NULL, NULL);
- if (p2 < 0) {
- perror("accept s1 failed()\n");
- return errno;
- }
-
- printf("connected sockets: c1 <-> p1, c2 <-> p2\n");
- printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n",
- c1, s1, c2, s2);
- return 0;
-}
-
-struct msg_stats {
- size_t bytes_sent;
- size_t bytes_recvd;
- struct timespec start;
- struct timespec end;
-};
-
-struct sockmap_options {
- int verbose;
- bool base;
- bool sendpage;
- bool data_test;
- bool drop_expected;
-};
-
-static int msg_loop_sendpage(int fd, int iov_length, int cnt,
- struct msg_stats *s,
- struct sockmap_options *opt)
-{
- bool drop = opt->drop_expected;
- unsigned char k = 0;
- FILE *file;
- int i, fp;
-
- file = fopen(".sendpage_tst.tmp", "w+");
- for (i = 0; i < iov_length * cnt; i++, k++)
- fwrite(&k, sizeof(char), 1, file);
- fflush(file);
- fseek(file, 0, SEEK_SET);
- fclose(file);
-
- fp = open(".sendpage_tst.tmp", O_RDONLY);
- clock_gettime(CLOCK_MONOTONIC, &s->start);
- for (i = 0; i < cnt; i++) {
- int sent = sendfile(fd, fp, NULL, iov_length);
-
- if (!drop && sent < 0) {
- perror("send loop error:");
- close(fp);
- return sent;
- } else if (drop && sent >= 0) {
- printf("sendpage loop error expected: %i\n", sent);
- close(fp);
- return -EIO;
- }
-
- if (sent > 0)
- s->bytes_sent += sent;
- }
- clock_gettime(CLOCK_MONOTONIC, &s->end);
- close(fp);
- return 0;
-}
-
-static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
- struct msg_stats *s, bool tx,
- struct sockmap_options *opt)
-{
- struct msghdr msg = {0};
- int err, i, flags = MSG_NOSIGNAL;
- struct iovec *iov;
- unsigned char k;
- bool data_test = opt->data_test;
- bool drop = opt->drop_expected;
-
- iov = calloc(iov_count, sizeof(struct iovec));
- if (!iov)
- return errno;
-
- k = 0;
- for (i = 0; i < iov_count; i++) {
- unsigned char *d = calloc(iov_length, sizeof(char));
-
- if (!d) {
- fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count);
- goto out_errno;
- }
- iov[i].iov_base = d;
- iov[i].iov_len = iov_length;
-
- if (data_test && tx) {
- int j;
-
- for (j = 0; j < iov_length; j++)
- d[j] = k++;
- }
- }
-
- msg.msg_iov = iov;
- msg.msg_iovlen = iov_count;
- k = 0;
-
- if (tx) {
- clock_gettime(CLOCK_MONOTONIC, &s->start);
- for (i = 0; i < cnt; i++) {
- int sent = sendmsg(fd, &msg, flags);
-
- if (!drop && sent < 0) {
- perror("send loop error:");
- goto out_errno;
- } else if (drop && sent >= 0) {
- printf("send loop error expected: %i\n", sent);
- errno = -EIO;
- goto out_errno;
- }
- if (sent > 0)
- s->bytes_sent += sent;
- }
- clock_gettime(CLOCK_MONOTONIC, &s->end);
- } else {
- int slct, recv, max_fd = fd;
- struct timeval timeout;
- float total_bytes;
- fd_set w;
-
- total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
- err = clock_gettime(CLOCK_MONOTONIC, &s->start);
- if (err < 0)
- perror("recv start time: ");
- while (s->bytes_recvd < total_bytes) {
- timeout.tv_sec = 1;
- timeout.tv_usec = 0;
-
- /* FD sets */
- FD_ZERO(&w);
- FD_SET(fd, &w);
-
- slct = select(max_fd + 1, &w, NULL, NULL, &timeout);
- if (slct == -1) {
- perror("select()");
- clock_gettime(CLOCK_MONOTONIC, &s->end);
- goto out_errno;
- } else if (!slct) {
- fprintf(stderr, "unexpected timeout\n");
- errno = -EIO;
- clock_gettime(CLOCK_MONOTONIC, &s->end);
- goto out_errno;
- }
-
- recv = recvmsg(fd, &msg, flags);
- if (recv < 0) {
- if (errno != EWOULDBLOCK) {
- clock_gettime(CLOCK_MONOTONIC, &s->end);
- perror("recv failed()\n");
- goto out_errno;
- }
- }
-
- s->bytes_recvd += recv;
-
- if (data_test) {
- int j;
-
- for (i = 0; i < msg.msg_iovlen; i++) {
- unsigned char *d = iov[i].iov_base;
-
- for (j = 0;
- j < iov[i].iov_len && recv; j++) {
- if (d[j] != k++) {
- errno = -EIO;
- fprintf(stderr,
- "detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
- i, j, d[j], k - 1, d[j+1], k + 1);
- goto out_errno;
- }
- recv--;
- }
- }
- }
- }
- clock_gettime(CLOCK_MONOTONIC, &s->end);
- }
-
- for (i = 0; i < iov_count; i++)
- free(iov[i].iov_base);
- free(iov);
- return 0;
-out_errno:
- for (i = 0; i < iov_count; i++)
- free(iov[i].iov_base);
- free(iov);
- return errno;
-}
-
-static float giga = 1000000000;
-
-static inline float sentBps(struct msg_stats s)
-{
- return s.bytes_sent / (s.end.tv_sec - s.start.tv_sec);
-}
-
-static inline float recvdBps(struct msg_stats s)
-{
- return s.bytes_recvd / (s.end.tv_sec - s.start.tv_sec);
-}
-
-static int sendmsg_test(int iov_count, int iov_buf, int cnt,
- struct sockmap_options *opt)
-{
- float sent_Bps = 0, recvd_Bps = 0;
- int rx_fd, txpid, rxpid, err = 0;
- struct msg_stats s = {0};
- int status;
-
- errno = 0;
-
- if (opt->base)
- rx_fd = p1;
- else
- rx_fd = p2;
-
- rxpid = fork();
- if (rxpid == 0) {
- if (opt->drop_expected)
- exit(1);
-
- if (opt->sendpage)
- iov_count = 1;
- err = msg_loop(rx_fd, iov_count, iov_buf,
- cnt, &s, false, opt);
- if (err)
- fprintf(stderr,
- "msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
- iov_count, iov_buf, cnt, err);
- shutdown(p2, SHUT_RDWR);
- shutdown(p1, SHUT_RDWR);
- if (s.end.tv_sec - s.start.tv_sec) {
- sent_Bps = sentBps(s);
- recvd_Bps = recvdBps(s);
- }
- fprintf(stdout,
- "rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s\n",
- s.bytes_sent, sent_Bps, sent_Bps/giga,
- s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
- exit(1);
- } else if (rxpid == -1) {
- perror("msg_loop_rx: ");
- return errno;
- }
-
- txpid = fork();
- if (txpid == 0) {
- if (opt->sendpage)
- err = msg_loop_sendpage(c1, iov_buf, cnt, &s, opt);
- else
- err = msg_loop(c1, iov_count, iov_buf,
- cnt, &s, true, opt);
-
- if (err)
- fprintf(stderr,
- "msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n",
- iov_count, iov_buf, cnt, err);
- shutdown(c1, SHUT_RDWR);
- if (s.end.tv_sec - s.start.tv_sec) {
- sent_Bps = sentBps(s);
- recvd_Bps = recvdBps(s);
- }
- fprintf(stdout,
- "tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n",
- s.bytes_sent, sent_Bps, sent_Bps/giga,
- s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
- exit(1);
- } else if (txpid == -1) {
- perror("msg_loop_tx: ");
- return errno;
- }
-
- assert(waitpid(rxpid, &status, 0) == rxpid);
- assert(waitpid(txpid, &status, 0) == txpid);
- return err;
-}
-
-static int forever_ping_pong(int rate, struct sockmap_options *opt)
-{
- struct timeval timeout;
- char buf[1024] = {0};
- int sc;
-
- timeout.tv_sec = 10;
- timeout.tv_usec = 0;
-
- /* Ping/Pong data from client to server */
- sc = send(c1, buf, sizeof(buf), 0);
- if (sc < 0) {
- perror("send failed()\n");
- return sc;
- }
-
- do {
- int s, rc, i, max_fd = p2;
- fd_set w;
-
- /* FD sets */
- FD_ZERO(&w);
- FD_SET(c1, &w);
- FD_SET(c2, &w);
- FD_SET(p1, &w);
- FD_SET(p2, &w);
-
- s = select(max_fd + 1, &w, NULL, NULL, &timeout);
- if (s == -1) {
- perror("select()");
- break;
- } else if (!s) {
- fprintf(stderr, "unexpected timeout\n");
- break;
- }
-
- for (i = 0; i <= max_fd && s > 0; ++i) {
- if (!FD_ISSET(i, &w))
- continue;
-
- s--;
-
- rc = recv(i, buf, sizeof(buf), 0);
- if (rc < 0) {
- if (errno != EWOULDBLOCK) {
- perror("recv failed()\n");
- return rc;
- }
- }
-
- if (rc == 0) {
- close(i);
- break;
- }
-
- sc = send(i, buf, rc, 0);
- if (sc < 0) {
- perror("send failed()\n");
- return sc;
- }
- }
-
- if (rate)
- sleep(rate);
-
- if (opt->verbose) {
- printf(".");
- fflush(stdout);
-
- }
- } while (running);
-
- return 0;
-}
-
-enum {
- PING_PONG,
- SENDMSG,
- BASE,
- BASE_SENDPAGE,
- SENDPAGE,
-};
-
-int main(int argc, char **argv)
-{
- int iov_count = 1, length = 1024, rate = 1, tx_prog_fd;
- struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
- int opt, longindex, err, cg_fd = 0;
- struct sockmap_options options = {0};
- int test = PING_PONG;
- char filename[256];
-
- while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:",
- long_options, &longindex)) != -1) {
- switch (opt) {
- case 's':
- txmsg_start = atoi(optarg);
- break;
- case 'e':
- txmsg_end = atoi(optarg);
- break;
- case 'a':
- txmsg_apply = atoi(optarg);
- break;
- case 'k':
- txmsg_cork = atoi(optarg);
- break;
- case 'c':
- cg_fd = open(optarg, O_DIRECTORY, O_RDONLY);
- if (cg_fd < 0) {
- fprintf(stderr,
- "ERROR: (%i) open cg path failed: %s\n",
- cg_fd, optarg);
- return cg_fd;
- }
- break;
- case 'r':
- rate = atoi(optarg);
- break;
- case 'v':
- options.verbose = 1;
- break;
- case 'i':
- iov_count = atoi(optarg);
- break;
- case 'l':
- length = atoi(optarg);
- break;
- case 'd':
- options.data_test = true;
- break;
- case 't':
- if (strcmp(optarg, "ping") == 0) {
- test = PING_PONG;
- } else if (strcmp(optarg, "sendmsg") == 0) {
- test = SENDMSG;
- } else if (strcmp(optarg, "base") == 0) {
- test = BASE;
- } else if (strcmp(optarg, "base_sendpage") == 0) {
- test = BASE_SENDPAGE;
- } else if (strcmp(optarg, "sendpage") == 0) {
- test = SENDPAGE;
- } else {
- usage(argv);
- return -1;
- }
- break;
- case 0:
- break;
- case 'h':
- default:
- usage(argv);
- return -1;
- }
- }
-
- if (!cg_fd) {
- fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n",
- argv[0]);
- return -1;
- }
-
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return 1;
- }
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-
- running = 1;
-
- /* catch SIGINT */
- signal(SIGINT, running_handler);
-
- if (load_bpf_file(filename)) {
- fprintf(stderr, "load_bpf_file: (%s) %s\n",
- filename, strerror(errno));
- return 1;
- }
-
- /* If base test skip BPF setup */
- if (test == BASE || test == BASE_SENDPAGE)
- goto run;
-
- /* Attach programs to sockmap */
- err = bpf_prog_attach(prog_fd[0], map_fd[0],
- BPF_SK_SKB_STREAM_PARSER, 0);
- if (err) {
- fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n",
- err, strerror(errno));
- return err;
- }
-
- err = bpf_prog_attach(prog_fd[1], map_fd[0],
- BPF_SK_SKB_STREAM_VERDICT, 0);
- if (err) {
- fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n",
- err, strerror(errno));
- return err;
- }
-
- /* Attach to cgroups */
- err = bpf_prog_attach(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
- if (err) {
- fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n",
- err, strerror(errno));
- return err;
- }
-
-run:
- err = sockmap_init_sockets();
- if (err) {
- fprintf(stderr, "ERROR: test socket failed: %d\n", err);
- goto out;
- }
-
- /* Attach txmsg program to sockmap */
- if (txmsg_pass)
- tx_prog_fd = prog_fd[3];
- else if (txmsg_noisy)
- tx_prog_fd = prog_fd[4];
- else if (txmsg_redir)
- tx_prog_fd = prog_fd[5];
- else if (txmsg_redir_noisy)
- tx_prog_fd = prog_fd[6];
- else if (txmsg_drop)
- tx_prog_fd = prog_fd[9];
- /* apply and cork must be last */
- else if (txmsg_apply)
- tx_prog_fd = prog_fd[7];
- else if (txmsg_cork)
- tx_prog_fd = prog_fd[8];
- else
- tx_prog_fd = 0;
-
- if (tx_prog_fd) {
- int redir_fd, i = 0;
-
- err = bpf_prog_attach(tx_prog_fd,
- map_fd[1], BPF_SK_MSG_VERDICT, 0);
- if (err) {
- fprintf(stderr,
- "ERROR: bpf_prog_attach (txmsg): %d (%s)\n",
- err, strerror(errno));
- return err;
- }
-
- err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY);
- if (err) {
- fprintf(stderr,
- "ERROR: bpf_map_update_elem (txmsg): %d (%s\n",
- err, strerror(errno));
- return err;
- }
-
- if (txmsg_redir || txmsg_redir_noisy)
- redir_fd = c2;
- else
- redir_fd = c1;
-
- err = bpf_map_update_elem(map_fd[2], &i, &redir_fd, BPF_ANY);
- if (err) {
- fprintf(stderr,
- "ERROR: bpf_map_update_elem (txmsg): %d (%s\n",
- err, strerror(errno));
- return err;
- }
-
- if (txmsg_apply) {
- err = bpf_map_update_elem(map_fd[3],
- &i, &txmsg_apply, BPF_ANY);
- if (err) {
- fprintf(stderr,
- "ERROR: bpf_map_update_elem (apply_bytes): %d (%s\n",
- err, strerror(errno));
- return err;
- }
- }
-
- if (txmsg_cork) {
- err = bpf_map_update_elem(map_fd[4],
- &i, &txmsg_cork, BPF_ANY);
- if (err) {
- fprintf(stderr,
- "ERROR: bpf_map_update_elem (cork_bytes): %d (%s\n",
- err, strerror(errno));
- return err;
- }
- }
-
- if (txmsg_start) {
- err = bpf_map_update_elem(map_fd[5],
- &i, &txmsg_start, BPF_ANY);
- if (err) {
- fprintf(stderr,
- "ERROR: bpf_map_update_elem (txmsg_start): %d (%s)\n",
- err, strerror(errno));
- return err;
- }
- }
-
- if (txmsg_end) {
- i = 1;
- err = bpf_map_update_elem(map_fd[5],
- &i, &txmsg_end, BPF_ANY);
- if (err) {
- fprintf(stderr,
- "ERROR: bpf_map_update_elem (txmsg_end): %d (%s)\n",
- err, strerror(errno));
- return err;
- }
- }
-
- if (txmsg_ingress) {
- int in = BPF_F_INGRESS;
-
- i = 0;
- err = bpf_map_update_elem(map_fd[6], &i, &in, BPF_ANY);
- if (err) {
- fprintf(stderr,
- "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
- err, strerror(errno));
- }
- i = 1;
- err = bpf_map_update_elem(map_fd[1], &i, &p1, BPF_ANY);
- if (err) {
- fprintf(stderr,
- "ERROR: bpf_map_update_elem (p1 txmsg): %d (%s)\n",
- err, strerror(errno));
- }
- err = bpf_map_update_elem(map_fd[2], &i, &p1, BPF_ANY);
- if (err) {
- fprintf(stderr,
- "ERROR: bpf_map_update_elem (p1 redir): %d (%s)\n",
- err, strerror(errno));
- }
-
- i = 2;
- err = bpf_map_update_elem(map_fd[2], &i, &p2, BPF_ANY);
- if (err) {
- fprintf(stderr,
- "ERROR: bpf_map_update_elem (p2 txmsg): %d (%s)\n",
- err, strerror(errno));
- }
- }
-
- if (txmsg_skb) {
- int skb_fd = (test == SENDMSG || test == SENDPAGE) ? p2 : p1;
- int ingress = BPF_F_INGRESS;
-
- i = 0;
- err = bpf_map_update_elem(map_fd[7], &i, &ingress, BPF_ANY);
- if (err) {
- fprintf(stderr,
- "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
- err, strerror(errno));
- }
-
- i = 3;
- err = bpf_map_update_elem(map_fd[0], &i, &skb_fd, BPF_ANY);
- if (err) {
- fprintf(stderr,
- "ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
- err, strerror(errno));
- }
- }
- }
-
- if (txmsg_drop)
- options.drop_expected = true;
-
- if (test == PING_PONG)
- err = forever_ping_pong(rate, &options);
- else if (test == SENDMSG) {
- options.base = false;
- options.sendpage = false;
- err = sendmsg_test(iov_count, length, rate, &options);
- } else if (test == SENDPAGE) {
- options.base = false;
- options.sendpage = true;
- err = sendmsg_test(iov_count, length, rate, &options);
- } else if (test == BASE) {
- options.base = true;
- options.sendpage = false;
- err = sendmsg_test(iov_count, length, rate, &options);
- } else if (test == BASE_SENDPAGE) {
- options.base = true;
- options.sendpage = true;
- err = sendmsg_test(iov_count, length, rate, &options);
- } else
- fprintf(stderr, "unknown test\n");
-out:
- bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS);
- close(s1);
- close(s2);
- close(p1);
- close(p2);
- close(c1);
- close(c2);
- close(cg_fd);
- return err;
-}
-
-void running_handler(int a)
-{
- running = 0;
-}
/* BPF_FUNC_skb_set_tunnel_key flags. */
#define BPF_F_ZERO_CSUM_TX (1ULL << 1)
#define BPF_F_DONT_FRAGMENT (1ULL << 2)
+#define BPF_F_SEQ_NUMBER (1ULL << 3)
/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
* BPF_FUNC_perf_event_read_value flags.
IFLA_EVENT_BONDING_OPTIONS, /* change in bonding options */
};
+/* tun section */
+
+enum {
+ IFLA_TUN_UNSPEC,
+ IFLA_TUN_OWNER,
+ IFLA_TUN_GROUP,
+ IFLA_TUN_TYPE,
+ IFLA_TUN_PI,
+ IFLA_TUN_VNET_HDR,
+ IFLA_TUN_PERSIST,
+ IFLA_TUN_MULTI_QUEUE,
+ IFLA_TUN_NUM_QUEUES,
+ IFLA_TUN_NUM_DISABLED_QUEUES,
+ __IFLA_TUN_MAX,
+};
+
+#define IFLA_TUN_MAX (__IFLA_TUN_MAX - 1)
+
+/* rmnet section */
+
+#define RMNET_FLAGS_INGRESS_DEAGGREGATION (1U << 0)
+#define RMNET_FLAGS_INGRESS_MAP_COMMANDS (1U << 1)
+#define RMNET_FLAGS_INGRESS_MAP_CKSUMV4 (1U << 2)
+#define RMNET_FLAGS_EGRESS_MAP_CKSUMV4 (1U << 3)
+
+enum {
+ IFLA_RMNET_UNSPEC,
+ IFLA_RMNET_MUX_ID,
+ IFLA_RMNET_FLAGS,
+ __IFLA_RMNET_MAX,
+};
+
+#define IFLA_RMNET_MAX (__IFLA_RMNET_MAX - 1)
+
+struct ifla_rmnet_flags {
+ __u32 flags;
+ __u32 mask;
+};
+
#endif /* _UAPI_LINUX_IF_LINK_H */
BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
-static void bpf_program__set_expected_attach_type(struct bpf_program *prog,
- enum bpf_attach_type type)
+void bpf_program__set_expected_attach_type(struct bpf_program *prog,
+ enum bpf_attach_type type)
{
prog->expected_attach_type = type;
}
int bpf_program__set_xdp(struct bpf_program *prog);
int bpf_program__set_perf_event(struct bpf_program *prog);
void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type);
+void bpf_program__set_expected_attach_type(struct bpf_program *prog,
+ enum bpf_attach_type type);
bool bpf_program__is_socket_filter(struct bpf_program *prog);
bool bpf_program__is_tracepoint(struct bpf_program *prog);
# Order correspond to 'make run_tests' order
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
- test_sock test_btf
+ test_sock test_btf test_sockmap
TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \
- test_btf_haskv.o test_btf_nokv.o
+ test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
$(OUTPUT)/test_sock: cgroup_helpers.c
$(OUTPUT)/test_sock_addr: cgroup_helpers.c
+$(OUTPUT)/test_sockmap: cgroup_helpers.c
.PHONY: force
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/select.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <sched.h>
+
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <sys/sendfile.h>
+
+#include <linux/netlink.h>
+#include <linux/socket.h>
+#include <linux/sock_diag.h>
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <assert.h>
+#include <libgen.h>
+
+#include <getopt.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_util.h"
+#include "bpf_rlimit.h"
+#include "cgroup_helpers.h"
+
+int running;
+static void running_handler(int a);
+
+/* randomly selected ports for testing on lo */
+#define S1_PORT 10000
+#define S2_PORT 10001
+
+#define BPF_FILENAME "test_sockmap_kern.o"
+#define CG_PATH "/sockmap"
+
+/* global sockets */
+int s1, s2, c1, c2, p1, p2;
+int test_cnt;
+int passed;
+int failed;
+int map_fd[8];
+struct bpf_map *maps[8];
+int prog_fd[11];
+
+int txmsg_pass;
+int txmsg_noisy;
+int txmsg_redir;
+int txmsg_redir_noisy;
+int txmsg_drop;
+int txmsg_apply;
+int txmsg_cork;
+int txmsg_start;
+int txmsg_end;
+int txmsg_ingress;
+int txmsg_skb;
+
+static const struct option long_options[] = {
+ {"help", no_argument, NULL, 'h' },
+ {"cgroup", required_argument, NULL, 'c' },
+ {"rate", required_argument, NULL, 'r' },
+ {"verbose", no_argument, NULL, 'v' },
+ {"iov_count", required_argument, NULL, 'i' },
+ {"length", required_argument, NULL, 'l' },
+ {"test", required_argument, NULL, 't' },
+ {"data_test", no_argument, NULL, 'd' },
+ {"txmsg", no_argument, &txmsg_pass, 1 },
+ {"txmsg_noisy", no_argument, &txmsg_noisy, 1 },
+ {"txmsg_redir", no_argument, &txmsg_redir, 1 },
+ {"txmsg_redir_noisy", no_argument, &txmsg_redir_noisy, 1},
+ {"txmsg_drop", no_argument, &txmsg_drop, 1 },
+ {"txmsg_apply", required_argument, NULL, 'a'},
+ {"txmsg_cork", required_argument, NULL, 'k'},
+ {"txmsg_start", required_argument, NULL, 's'},
+ {"txmsg_end", required_argument, NULL, 'e'},
+ {"txmsg_ingress", no_argument, &txmsg_ingress, 1 },
+ {"txmsg_skb", no_argument, &txmsg_skb, 1 },
+ {0, 0, NULL, 0 }
+};
+
+static void usage(char *argv[])
+{
+ int i;
+
+ printf(" Usage: %s --cgroup <cgroup_path>\n", argv[0]);
+ printf(" options:\n");
+ for (i = 0; long_options[i].name != 0; i++) {
+ printf(" --%-12s", long_options[i].name);
+ if (long_options[i].flag != NULL)
+ printf(" flag (internal value:%d)\n",
+ *long_options[i].flag);
+ else
+ printf(" -%c\n", long_options[i].val);
+ }
+ printf("\n");
+}
+
+static int sockmap_init_sockets(int verbose)
+{
+ int i, err, one = 1;
+ struct sockaddr_in addr;
+ int *fds[4] = {&s1, &s2, &c1, &c2};
+
+ s1 = s2 = p1 = p2 = c1 = c2 = 0;
+
+ /* Init sockets */
+ for (i = 0; i < 4; i++) {
+ *fds[i] = socket(AF_INET, SOCK_STREAM, 0);
+ if (*fds[i] < 0) {
+ perror("socket s1 failed()");
+ return errno;
+ }
+ }
+
+ /* Allow reuse */
+ for (i = 0; i < 2; i++) {
+ err = setsockopt(*fds[i], SOL_SOCKET, SO_REUSEADDR,
+ (char *)&one, sizeof(one));
+ if (err) {
+ perror("setsockopt failed()");
+ return errno;
+ }
+ }
+
+ /* Non-blocking sockets */
+ for (i = 0; i < 2; i++) {
+ err = ioctl(*fds[i], FIONBIO, (char *)&one);
+ if (err < 0) {
+ perror("ioctl s1 failed()");
+ return errno;
+ }
+ }
+
+ /* Bind server sockets */
+ memset(&addr, 0, sizeof(struct sockaddr_in));
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+
+ addr.sin_port = htons(S1_PORT);
+ err = bind(s1, (struct sockaddr *)&addr, sizeof(addr));
+ if (err < 0) {
+ perror("bind s1 failed()\n");
+ return errno;
+ }
+
+ addr.sin_port = htons(S2_PORT);
+ err = bind(s2, (struct sockaddr *)&addr, sizeof(addr));
+ if (err < 0) {
+ perror("bind s2 failed()\n");
+ return errno;
+ }
+
+ /* Listen server sockets */
+ addr.sin_port = htons(S1_PORT);
+ err = listen(s1, 32);
+ if (err < 0) {
+ perror("listen s1 failed()\n");
+ return errno;
+ }
+
+ addr.sin_port = htons(S2_PORT);
+ err = listen(s2, 32);
+ if (err < 0) {
+ perror("listen s1 failed()\n");
+ return errno;
+ }
+
+ /* Initiate Connect */
+ addr.sin_port = htons(S1_PORT);
+ err = connect(c1, (struct sockaddr *)&addr, sizeof(addr));
+ if (err < 0 && errno != EINPROGRESS) {
+ perror("connect c1 failed()\n");
+ return errno;
+ }
+
+ addr.sin_port = htons(S2_PORT);
+ err = connect(c2, (struct sockaddr *)&addr, sizeof(addr));
+ if (err < 0 && errno != EINPROGRESS) {
+ perror("connect c2 failed()\n");
+ return errno;
+ } else if (err < 0) {
+ err = 0;
+ }
+
+ /* Accept Connecrtions */
+ p1 = accept(s1, NULL, NULL);
+ if (p1 < 0) {
+ perror("accept s1 failed()\n");
+ return errno;
+ }
+
+ p2 = accept(s2, NULL, NULL);
+ if (p2 < 0) {
+ perror("accept s1 failed()\n");
+ return errno;
+ }
+
+ if (verbose) {
+ printf("connected sockets: c1 <-> p1, c2 <-> p2\n");
+ printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n",
+ c1, s1, c2, s2);
+ }
+ return 0;
+}
+
+struct msg_stats {
+ size_t bytes_sent;
+ size_t bytes_recvd;
+ struct timespec start;
+ struct timespec end;
+};
+
+struct sockmap_options {
+ int verbose;
+ bool base;
+ bool sendpage;
+ bool data_test;
+ bool drop_expected;
+ int iov_count;
+ int iov_length;
+ int rate;
+};
+
+static int msg_loop_sendpage(int fd, int iov_length, int cnt,
+ struct msg_stats *s,
+ struct sockmap_options *opt)
+{
+ bool drop = opt->drop_expected;
+ unsigned char k = 0;
+ FILE *file;
+ int i, fp;
+
+ file = fopen(".sendpage_tst.tmp", "w+");
+ for (i = 0; i < iov_length * cnt; i++, k++)
+ fwrite(&k, sizeof(char), 1, file);
+ fflush(file);
+ fseek(file, 0, SEEK_SET);
+ fclose(file);
+
+ fp = open(".sendpage_tst.tmp", O_RDONLY);
+ clock_gettime(CLOCK_MONOTONIC, &s->start);
+ for (i = 0; i < cnt; i++) {
+ int sent = sendfile(fd, fp, NULL, iov_length);
+
+ if (!drop && sent < 0) {
+ perror("send loop error:");
+ close(fp);
+ return sent;
+ } else if (drop && sent >= 0) {
+ printf("sendpage loop error expected: %i\n", sent);
+ close(fp);
+ return -EIO;
+ }
+
+ if (sent > 0)
+ s->bytes_sent += sent;
+ }
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ close(fp);
+ return 0;
+}
+
+static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
+ struct msg_stats *s, bool tx,
+ struct sockmap_options *opt)
+{
+ struct msghdr msg = {0};
+ int err, i, flags = MSG_NOSIGNAL;
+ struct iovec *iov;
+ unsigned char k;
+ bool data_test = opt->data_test;
+ bool drop = opt->drop_expected;
+
+ iov = calloc(iov_count, sizeof(struct iovec));
+ if (!iov)
+ return errno;
+
+ k = 0;
+ for (i = 0; i < iov_count; i++) {
+ unsigned char *d = calloc(iov_length, sizeof(char));
+
+ if (!d) {
+ fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count);
+ goto out_errno;
+ }
+ iov[i].iov_base = d;
+ iov[i].iov_len = iov_length;
+
+ if (data_test && tx) {
+ int j;
+
+ for (j = 0; j < iov_length; j++)
+ d[j] = k++;
+ }
+ }
+
+ msg.msg_iov = iov;
+ msg.msg_iovlen = iov_count;
+ k = 0;
+
+ if (tx) {
+ clock_gettime(CLOCK_MONOTONIC, &s->start);
+ for (i = 0; i < cnt; i++) {
+ int sent = sendmsg(fd, &msg, flags);
+
+ if (!drop && sent < 0) {
+ perror("send loop error:");
+ goto out_errno;
+ } else if (drop && sent >= 0) {
+ printf("send loop error expected: %i\n", sent);
+ errno = -EIO;
+ goto out_errno;
+ }
+ if (sent > 0)
+ s->bytes_sent += sent;
+ }
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ } else {
+ int slct, recv, max_fd = fd;
+ int fd_flags = O_NONBLOCK;
+ struct timeval timeout;
+ float total_bytes;
+ fd_set w;
+
+ fcntl(fd, fd_flags);
+ total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
+ err = clock_gettime(CLOCK_MONOTONIC, &s->start);
+ if (err < 0)
+ perror("recv start time: ");
+ while (s->bytes_recvd < total_bytes) {
+ timeout.tv_sec = 1;
+ timeout.tv_usec = 0;
+
+ /* FD sets */
+ FD_ZERO(&w);
+ FD_SET(fd, &w);
+
+ slct = select(max_fd + 1, &w, NULL, NULL, &timeout);
+ if (slct == -1) {
+ perror("select()");
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ goto out_errno;
+ } else if (!slct) {
+ if (opt->verbose)
+ fprintf(stderr, "unexpected timeout\n");
+ errno = -EIO;
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ goto out_errno;
+ }
+
+ recv = recvmsg(fd, &msg, flags);
+ if (recv < 0) {
+ if (errno != EWOULDBLOCK) {
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ perror("recv failed()\n");
+ goto out_errno;
+ }
+ }
+
+ s->bytes_recvd += recv;
+
+ if (data_test) {
+ int j;
+
+ for (i = 0; i < msg.msg_iovlen; i++) {
+ unsigned char *d = iov[i].iov_base;
+
+ for (j = 0;
+ j < iov[i].iov_len && recv; j++) {
+ if (d[j] != k++) {
+ errno = -EIO;
+ fprintf(stderr,
+ "detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
+ i, j, d[j], k - 1, d[j+1], k + 1);
+ goto out_errno;
+ }
+ recv--;
+ }
+ }
+ }
+ }
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ }
+
+ for (i = 0; i < iov_count; i++)
+ free(iov[i].iov_base);
+ free(iov);
+ return 0;
+out_errno:
+ for (i = 0; i < iov_count; i++)
+ free(iov[i].iov_base);
+ free(iov);
+ return errno;
+}
+
+static float giga = 1000000000;
+
+static inline float sentBps(struct msg_stats s)
+{
+ return s.bytes_sent / (s.end.tv_sec - s.start.tv_sec);
+}
+
+static inline float recvdBps(struct msg_stats s)
+{
+ return s.bytes_recvd / (s.end.tv_sec - s.start.tv_sec);
+}
+
+static int sendmsg_test(struct sockmap_options *opt)
+{
+ float sent_Bps = 0, recvd_Bps = 0;
+ int rx_fd, txpid, rxpid, err = 0;
+ struct msg_stats s = {0};
+ int iov_count = opt->iov_count;
+ int iov_buf = opt->iov_length;
+ int cnt = opt->rate;
+ int status;
+
+ errno = 0;
+
+ if (opt->base)
+ rx_fd = p1;
+ else
+ rx_fd = p2;
+
+ rxpid = fork();
+ if (rxpid == 0) {
+ if (opt->drop_expected)
+ exit(1);
+
+ if (opt->sendpage)
+ iov_count = 1;
+ err = msg_loop(rx_fd, iov_count, iov_buf,
+ cnt, &s, false, opt);
+ if (err && opt->verbose)
+ fprintf(stderr,
+ "msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
+ iov_count, iov_buf, cnt, err);
+ shutdown(p2, SHUT_RDWR);
+ shutdown(p1, SHUT_RDWR);
+ if (s.end.tv_sec - s.start.tv_sec) {
+ sent_Bps = sentBps(s);
+ recvd_Bps = recvdBps(s);
+ }
+ if (opt->verbose)
+ fprintf(stdout,
+ "rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s\n",
+ s.bytes_sent, sent_Bps, sent_Bps/giga,
+ s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
+ exit(1);
+ } else if (rxpid == -1) {
+ perror("msg_loop_rx: ");
+ return errno;
+ }
+
+ txpid = fork();
+ if (txpid == 0) {
+ if (opt->sendpage)
+ err = msg_loop_sendpage(c1, iov_buf, cnt, &s, opt);
+ else
+ err = msg_loop(c1, iov_count, iov_buf,
+ cnt, &s, true, opt);
+
+ if (err)
+ fprintf(stderr,
+ "msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n",
+ iov_count, iov_buf, cnt, err);
+ shutdown(c1, SHUT_RDWR);
+ if (s.end.tv_sec - s.start.tv_sec) {
+ sent_Bps = sentBps(s);
+ recvd_Bps = recvdBps(s);
+ }
+ if (opt->verbose)
+ fprintf(stdout,
+ "tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n",
+ s.bytes_sent, sent_Bps, sent_Bps/giga,
+ s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
+ exit(1);
+ } else if (txpid == -1) {
+ perror("msg_loop_tx: ");
+ return errno;
+ }
+
+ assert(waitpid(rxpid, &status, 0) == rxpid);
+ assert(waitpid(txpid, &status, 0) == txpid);
+ return err;
+}
+
+static int forever_ping_pong(int rate, struct sockmap_options *opt)
+{
+ struct timeval timeout;
+ char buf[1024] = {0};
+ int sc;
+
+ timeout.tv_sec = 10;
+ timeout.tv_usec = 0;
+
+ /* Ping/Pong data from client to server */
+ sc = send(c1, buf, sizeof(buf), 0);
+ if (sc < 0) {
+ perror("send failed()\n");
+ return sc;
+ }
+
+ do {
+ int s, rc, i, max_fd = p2;
+ fd_set w;
+
+ /* FD sets */
+ FD_ZERO(&w);
+ FD_SET(c1, &w);
+ FD_SET(c2, &w);
+ FD_SET(p1, &w);
+ FD_SET(p2, &w);
+
+ s = select(max_fd + 1, &w, NULL, NULL, &timeout);
+ if (s == -1) {
+ perror("select()");
+ break;
+ } else if (!s) {
+ fprintf(stderr, "unexpected timeout\n");
+ break;
+ }
+
+ for (i = 0; i <= max_fd && s > 0; ++i) {
+ if (!FD_ISSET(i, &w))
+ continue;
+
+ s--;
+
+ rc = recv(i, buf, sizeof(buf), 0);
+ if (rc < 0) {
+ if (errno != EWOULDBLOCK) {
+ perror("recv failed()\n");
+ return rc;
+ }
+ }
+
+ if (rc == 0) {
+ close(i);
+ break;
+ }
+
+ sc = send(i, buf, rc, 0);
+ if (sc < 0) {
+ perror("send failed()\n");
+ return sc;
+ }
+ }
+
+ if (rate)
+ sleep(rate);
+
+ if (opt->verbose) {
+ printf(".");
+ fflush(stdout);
+
+ }
+ } while (running);
+
+ return 0;
+}
+
+enum {
+ PING_PONG,
+ SENDMSG,
+ BASE,
+ BASE_SENDPAGE,
+ SENDPAGE,
+};
+
+static int run_options(struct sockmap_options *options, int cg_fd, int test)
+{
+ int i, key, next_key, err, tx_prog_fd = -1, zero = 0;
+
+ /* If base test skip BPF setup */
+ if (test == BASE || test == BASE_SENDPAGE)
+ goto run;
+
+ /* Attach programs to sockmap */
+ err = bpf_prog_attach(prog_fd[0], map_fd[0],
+ BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n",
+ prog_fd[0], map_fd[0], err, strerror(errno));
+ return err;
+ }
+
+ err = bpf_prog_attach(prog_fd[1], map_fd[0],
+ BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (err) {
+ fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n",
+ err, strerror(errno));
+ return err;
+ }
+
+ /* Attach to cgroups */
+ err = bpf_prog_attach(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
+ if (err) {
+ fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n",
+ err, strerror(errno));
+ return err;
+ }
+
+run:
+ err = sockmap_init_sockets(options->verbose);
+ if (err) {
+ fprintf(stderr, "ERROR: test socket failed: %d\n", err);
+ goto out;
+ }
+
+ /* Attach txmsg program to sockmap */
+ if (txmsg_pass)
+ tx_prog_fd = prog_fd[3];
+ else if (txmsg_noisy)
+ tx_prog_fd = prog_fd[4];
+ else if (txmsg_redir)
+ tx_prog_fd = prog_fd[5];
+ else if (txmsg_redir_noisy)
+ tx_prog_fd = prog_fd[6];
+ else if (txmsg_drop)
+ tx_prog_fd = prog_fd[9];
+ /* apply and cork must be last */
+ else if (txmsg_apply)
+ tx_prog_fd = prog_fd[7];
+ else if (txmsg_cork)
+ tx_prog_fd = prog_fd[8];
+ else
+ tx_prog_fd = 0;
+
+ if (tx_prog_fd) {
+ int redir_fd, i = 0;
+
+ err = bpf_prog_attach(tx_prog_fd,
+ map_fd[1], BPF_SK_MSG_VERDICT, 0);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_prog_attach (txmsg): %d (%s)\n",
+ err, strerror(errno));
+ goto out;
+ }
+
+ err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg): %d (%s\n",
+ err, strerror(errno));
+ goto out;
+ }
+
+ if (txmsg_redir || txmsg_redir_noisy)
+ redir_fd = c2;
+ else
+ redir_fd = c1;
+
+ err = bpf_map_update_elem(map_fd[2], &i, &redir_fd, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg): %d (%s\n",
+ err, strerror(errno));
+ goto out;
+ }
+
+ if (txmsg_apply) {
+ err = bpf_map_update_elem(map_fd[3],
+ &i, &txmsg_apply, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (apply_bytes): %d (%s\n",
+ err, strerror(errno));
+ goto out;
+ }
+ }
+
+ if (txmsg_cork) {
+ err = bpf_map_update_elem(map_fd[4],
+ &i, &txmsg_cork, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (cork_bytes): %d (%s\n",
+ err, strerror(errno));
+ goto out;
+ }
+ }
+
+ if (txmsg_start) {
+ err = bpf_map_update_elem(map_fd[5],
+ &i, &txmsg_start, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg_start): %d (%s)\n",
+ err, strerror(errno));
+ goto out;
+ }
+ }
+
+ if (txmsg_end) {
+ i = 1;
+ err = bpf_map_update_elem(map_fd[5],
+ &i, &txmsg_end, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg_end): %d (%s)\n",
+ err, strerror(errno));
+ goto out;
+ }
+ }
+
+ if (txmsg_ingress) {
+ int in = BPF_F_INGRESS;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd[6], &i, &in, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
+ err, strerror(errno));
+ }
+ i = 1;
+ err = bpf_map_update_elem(map_fd[1], &i, &p1, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (p1 txmsg): %d (%s)\n",
+ err, strerror(errno));
+ }
+ err = bpf_map_update_elem(map_fd[2], &i, &p1, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (p1 redir): %d (%s)\n",
+ err, strerror(errno));
+ }
+
+ i = 2;
+ err = bpf_map_update_elem(map_fd[2], &i, &p2, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (p2 txmsg): %d (%s)\n",
+ err, strerror(errno));
+ }
+ }
+
+ if (txmsg_skb) {
+ int skb_fd = (test == SENDMSG || test == SENDPAGE) ?
+ p2 : p1;
+ int ingress = BPF_F_INGRESS;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd[7],
+ &i, &ingress, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
+ err, strerror(errno));
+ }
+
+ i = 3;
+ err = bpf_map_update_elem(map_fd[0],
+ &i, &skb_fd, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
+ err, strerror(errno));
+ }
+ }
+ }
+
+ if (txmsg_drop)
+ options->drop_expected = true;
+
+ if (test == PING_PONG)
+ err = forever_ping_pong(options->rate, options);
+ else if (test == SENDMSG) {
+ options->base = false;
+ options->sendpage = false;
+ err = sendmsg_test(options);
+ } else if (test == SENDPAGE) {
+ options->base = false;
+ options->sendpage = true;
+ err = sendmsg_test(options);
+ } else if (test == BASE) {
+ options->base = true;
+ options->sendpage = false;
+ err = sendmsg_test(options);
+ } else if (test == BASE_SENDPAGE) {
+ options->base = true;
+ options->sendpage = true;
+ err = sendmsg_test(options);
+ } else
+ fprintf(stderr, "unknown test\n");
+out:
+ /* Detatch and zero all the maps */
+ bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS);
+ bpf_prog_detach2(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER);
+ bpf_prog_detach2(prog_fd[1], map_fd[0], BPF_SK_SKB_STREAM_VERDICT);
+ if (tx_prog_fd >= 0)
+ bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT);
+
+ for (i = 0; i < 8; i++) {
+ key = next_key = 0;
+ bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
+ while (bpf_map_get_next_key(map_fd[i], &key, &next_key) == 0) {
+ bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
+ key = next_key;
+ }
+ }
+
+ close(s1);
+ close(s2);
+ close(p1);
+ close(p2);
+ close(c1);
+ close(c2);
+ return err;
+}
+
+static char *test_to_str(int test)
+{
+ switch (test) {
+ case SENDMSG:
+ return "sendmsg";
+ case SENDPAGE:
+ return "sendpage";
+ }
+ return "unknown";
+}
+
+#define OPTSTRING 60
+static void test_options(char *options)
+{
+ memset(options, 0, OPTSTRING);
+
+ if (txmsg_pass)
+ strncat(options, "pass,", OPTSTRING);
+ if (txmsg_noisy)
+ strncat(options, "pass_noisy,", OPTSTRING);
+ if (txmsg_redir)
+ strncat(options, "redir,", OPTSTRING);
+ if (txmsg_redir_noisy)
+ strncat(options, "redir_noisy,", OPTSTRING);
+ if (txmsg_drop)
+ strncat(options, "drop,", OPTSTRING);
+ if (txmsg_apply)
+ strncat(options, "apply,", OPTSTRING);
+ if (txmsg_cork)
+ strncat(options, "cork,", OPTSTRING);
+ if (txmsg_start)
+ strncat(options, "start,", OPTSTRING);
+ if (txmsg_end)
+ strncat(options, "end,", OPTSTRING);
+ if (txmsg_ingress)
+ strncat(options, "ingress,", OPTSTRING);
+ if (txmsg_skb)
+ strncat(options, "skb,", OPTSTRING);
+}
+
+static int __test_exec(int cgrp, int test, struct sockmap_options *opt)
+{
+ char *options = calloc(60, sizeof(char));
+ int err;
+
+ if (test == SENDPAGE)
+ opt->sendpage = true;
+ else
+ opt->sendpage = false;
+
+ if (txmsg_drop)
+ opt->drop_expected = true;
+ else
+ opt->drop_expected = false;
+
+ test_options(options);
+
+ fprintf(stdout,
+ "[TEST %i]: (%i, %i, %i, %s, %s): ",
+ test_cnt, opt->rate, opt->iov_count, opt->iov_length,
+ test_to_str(test), options);
+ fflush(stdout);
+ err = run_options(opt, cgrp, test);
+ fprintf(stdout, "%s\n", !err ? "PASS" : "FAILED");
+ test_cnt++;
+ !err ? passed++ : failed++;
+ free(options);
+ return err;
+}
+
+static int test_exec(int cgrp, struct sockmap_options *opt)
+{
+ int err = __test_exec(cgrp, SENDMSG, opt);
+
+ sched_yield();
+ if (err)
+ goto out;
+
+ err = __test_exec(cgrp, SENDPAGE, opt);
+ sched_yield();
+out:
+ return err;
+}
+
+static int test_loop(int cgrp)
+{
+ struct sockmap_options opt;
+
+ int err, i, l, r;
+
+ opt.verbose = 0;
+ opt.base = false;
+ opt.sendpage = false;
+ opt.data_test = false;
+ opt.drop_expected = false;
+ opt.iov_count = 0;
+ opt.iov_length = 0;
+ opt.rate = 0;
+
+ for (r = 1; r < 100; r += 33) {
+ for (i = 1; i < 100; i += 33) {
+ for (l = 1; l < 100; l += 33) {
+ opt.rate = r;
+ opt.iov_count = i;
+ opt.iov_length = l;
+ err = test_exec(cgrp, &opt);
+ if (err)
+ goto out;
+ }
+ }
+ }
+
+out:
+ return err;
+}
+
+static int test_txmsg(int cgrp)
+{
+ int err;
+
+ txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0;
+ txmsg_apply = txmsg_cork = 0;
+ txmsg_ingress = txmsg_skb = 0;
+
+ txmsg_pass = 1;
+ err = test_loop(cgrp);
+ txmsg_pass = 0;
+ if (err)
+ goto out;
+
+ txmsg_redir = 1;
+ err = test_loop(cgrp);
+ txmsg_redir = 0;
+ if (err)
+ goto out;
+
+ txmsg_drop = 1;
+ err = test_loop(cgrp);
+ txmsg_drop = 0;
+ if (err)
+ goto out;
+
+ txmsg_redir = 1;
+ txmsg_ingress = 1;
+ err = test_loop(cgrp);
+ txmsg_redir = 0;
+ txmsg_ingress = 0;
+ if (err)
+ goto out;
+out:
+ txmsg_pass = 0;
+ txmsg_redir = 0;
+ txmsg_drop = 0;
+ return err;
+}
+
+static int test_send(struct sockmap_options *opt, int cgrp)
+{
+ int err;
+
+ opt->iov_length = 1;
+ opt->iov_count = 1;
+ opt->rate = 1;
+ err = test_exec(cgrp, opt);
+ if (err)
+ goto out;
+
+ opt->iov_length = 1;
+ opt->iov_count = 1024;
+ opt->rate = 1;
+ err = test_exec(cgrp, opt);
+ if (err)
+ goto out;
+
+ opt->iov_length = 1024;
+ opt->iov_count = 1;
+ opt->rate = 1;
+ err = test_exec(cgrp, opt);
+ if (err)
+ goto out;
+
+ opt->iov_length = 1;
+ opt->iov_count = 1;
+ opt->rate = 1024;
+ err = test_exec(cgrp, opt);
+ if (err)
+ goto out;
+
+ opt->iov_length = 256;
+ opt->iov_count = 1024;
+ opt->rate = 10;
+ err = test_exec(cgrp, opt);
+ if (err)
+ goto out;
+
+ opt->rate = 100;
+ opt->iov_count = 1;
+ opt->iov_length = 5;
+ err = test_exec(cgrp, opt);
+ if (err)
+ goto out;
+out:
+ return err;
+}
+
+static int test_mixed(int cgrp)
+{
+ struct sockmap_options opt = {0};
+ int err;
+
+ txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0;
+ txmsg_apply = txmsg_cork = 0;
+ txmsg_start = txmsg_end = 0;
+ /* Test small and large iov_count values with pass/redir/apply/cork */
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_apply = 1;
+ txmsg_cork = 0;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_apply = 0;
+ txmsg_cork = 1;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_apply = 1;
+ txmsg_cork = 1;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_apply = 1024;
+ txmsg_cork = 0;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_apply = 0;
+ txmsg_cork = 1024;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_apply = 1024;
+ txmsg_cork = 1024;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_cork = 4096;
+ txmsg_apply = 4096;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_apply = 1;
+ txmsg_cork = 0;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_apply = 0;
+ txmsg_cork = 1;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_apply = 1024;
+ txmsg_cork = 0;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_apply = 0;
+ txmsg_cork = 1024;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_apply = 1024;
+ txmsg_cork = 1024;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_cork = 4096;
+ txmsg_apply = 4096;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+out:
+ return err;
+}
+
+static int test_start_end(int cgrp)
+{
+ struct sockmap_options opt = {0};
+ int err, i;
+
+ /* Test basic start/end with lots of iov_count and iov_lengths */
+ txmsg_start = 1;
+ txmsg_end = 2;
+ err = test_txmsg(cgrp);
+ if (err)
+ goto out;
+
+ /* Test start/end with cork */
+ opt.rate = 16;
+ opt.iov_count = 1;
+ opt.iov_length = 100;
+ txmsg_cork = 1600;
+
+ for (i = 99; i <= 1600; i += 100) {
+ txmsg_start = 0;
+ txmsg_end = i;
+ err = test_exec(cgrp, &opt);
+ if (err)
+ goto out;
+ }
+
+ /* Test start/end with cork but pull data in middle */
+ for (i = 199; i <= 1600; i += 100) {
+ txmsg_start = 100;
+ txmsg_end = i;
+ err = test_exec(cgrp, &opt);
+ if (err)
+ goto out;
+ }
+
+ /* Test start/end with cork pulling last sg entry */
+ txmsg_start = 1500;
+ txmsg_end = 1600;
+ err = test_exec(cgrp, &opt);
+ if (err)
+ goto out;
+
+ /* Test start/end pull of single byte in last page */
+ txmsg_start = 1111;
+ txmsg_end = 1112;
+ err = test_exec(cgrp, &opt);
+ if (err)
+ goto out;
+
+ /* Test start/end with end < start */
+ txmsg_start = 1111;
+ txmsg_end = 0;
+ err = test_exec(cgrp, &opt);
+ if (err)
+ goto out;
+
+ /* Test start/end with end > data */
+ txmsg_start = 0;
+ txmsg_end = 1601;
+ err = test_exec(cgrp, &opt);
+ if (err)
+ goto out;
+
+ /* Test start/end with start > data */
+ txmsg_start = 1601;
+ txmsg_end = 1600;
+ err = test_exec(cgrp, &opt);
+
+out:
+ txmsg_start = 0;
+ txmsg_end = 0;
+ return err;
+}
+
+char *map_names[] = {
+ "sock_map",
+ "sock_map_txmsg",
+ "sock_map_redir",
+ "sock_apply_bytes",
+ "sock_cork_bytes",
+ "sock_pull_bytes",
+ "sock_redir_flags",
+ "sock_skb_opts",
+};
+
+int prog_attach_type[] = {
+ BPF_SK_SKB_STREAM_PARSER,
+ BPF_SK_SKB_STREAM_VERDICT,
+ BPF_CGROUP_SOCK_OPS,
+ BPF_SK_MSG_VERDICT,
+ BPF_SK_MSG_VERDICT,
+ BPF_SK_MSG_VERDICT,
+ BPF_SK_MSG_VERDICT,
+ BPF_SK_MSG_VERDICT,
+ BPF_SK_MSG_VERDICT,
+ BPF_SK_MSG_VERDICT,
+};
+
+int prog_type[] = {
+ BPF_PROG_TYPE_SK_SKB,
+ BPF_PROG_TYPE_SK_SKB,
+ BPF_PROG_TYPE_SOCK_OPS,
+ BPF_PROG_TYPE_SK_MSG,
+ BPF_PROG_TYPE_SK_MSG,
+ BPF_PROG_TYPE_SK_MSG,
+ BPF_PROG_TYPE_SK_MSG,
+ BPF_PROG_TYPE_SK_MSG,
+ BPF_PROG_TYPE_SK_MSG,
+ BPF_PROG_TYPE_SK_MSG,
+};
+
+static int populate_progs(void)
+{
+ char *bpf_file = BPF_FILENAME;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int i = 0;
+ long err;
+
+ obj = bpf_object__open(bpf_file);
+ err = libbpf_get_error(obj);
+ if (err) {
+ char err_buf[256];
+
+ libbpf_strerror(err, err_buf, sizeof(err_buf));
+ printf("Unable to load eBPF objects in file '%s' : %s\n",
+ bpf_file, err_buf);
+ return -1;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ bpf_program__set_type(prog, prog_type[i]);
+ bpf_program__set_expected_attach_type(prog,
+ prog_attach_type[i]);
+ i++;
+ }
+
+ i = bpf_object__load(obj);
+ i = 0;
+ bpf_object__for_each_program(prog, obj) {
+ prog_fd[i] = bpf_program__fd(prog);
+ i++;
+ }
+
+ for (i = 0; i < sizeof(map_fd)/sizeof(int); i++) {
+ maps[i] = bpf_object__find_map_by_name(obj, map_names[i]);
+ map_fd[i] = bpf_map__fd(maps[i]);
+ if (map_fd[i] < 0) {
+ fprintf(stderr, "load_bpf_file: (%i) %s\n",
+ map_fd[i], strerror(errno));
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int test_suite(void)
+{
+ int cg_fd, err;
+
+ err = populate_progs();
+ if (err < 0) {
+ fprintf(stderr, "ERROR: (%i) load bpf failed\n", err);
+ return err;
+ }
+
+ if (setup_cgroup_environment()) {
+ fprintf(stderr, "ERROR: cgroup env failed\n");
+ return -EINVAL;
+ }
+
+ cg_fd = create_and_get_cgroup(CG_PATH);
+ if (cg_fd < 0) {
+ fprintf(stderr,
+ "ERROR: (%i) open cg path failed: %s\n",
+ cg_fd, optarg);
+ return cg_fd;
+ }
+
+ /* Tests basic commands and APIs with range of iov values */
+ txmsg_start = txmsg_end = 0;
+ err = test_txmsg(cg_fd);
+ if (err)
+ goto out;
+
+ /* Tests interesting combinations of APIs used together */
+ err = test_mixed(cg_fd);
+ if (err)
+ goto out;
+
+ /* Tests pull_data API using start/end API */
+ err = test_start_end(cg_fd);
+ if (err)
+ goto out;
+
+out:
+ printf("Summary: %i PASSED %i FAILED\n", passed, failed);
+ close(cg_fd);
+ return err;
+}
+
+int main(int argc, char **argv)
+{
+ struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
+ int iov_count = 1, length = 1024, rate = 1;
+ struct sockmap_options options = {0};
+ int opt, longindex, err, cg_fd = 0;
+ char *bpf_file = BPF_FILENAME;
+ int test = PING_PONG;
+
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return 1;
+ }
+
+ if (argc < 2)
+ return test_suite();
+
+ while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:",
+ long_options, &longindex)) != -1) {
+ switch (opt) {
+ case 's':
+ txmsg_start = atoi(optarg);
+ break;
+ case 'e':
+ txmsg_end = atoi(optarg);
+ break;
+ case 'a':
+ txmsg_apply = atoi(optarg);
+ break;
+ case 'k':
+ txmsg_cork = atoi(optarg);
+ break;
+ case 'c':
+ cg_fd = open(optarg, O_DIRECTORY, O_RDONLY);
+ if (cg_fd < 0) {
+ fprintf(stderr,
+ "ERROR: (%i) open cg path failed: %s\n",
+ cg_fd, optarg);
+ return cg_fd;
+ }
+ break;
+ case 'r':
+ rate = atoi(optarg);
+ break;
+ case 'v':
+ options.verbose = 1;
+ break;
+ case 'i':
+ iov_count = atoi(optarg);
+ break;
+ case 'l':
+ length = atoi(optarg);
+ break;
+ case 'd':
+ options.data_test = true;
+ break;
+ case 't':
+ if (strcmp(optarg, "ping") == 0) {
+ test = PING_PONG;
+ } else if (strcmp(optarg, "sendmsg") == 0) {
+ test = SENDMSG;
+ } else if (strcmp(optarg, "base") == 0) {
+ test = BASE;
+ } else if (strcmp(optarg, "base_sendpage") == 0) {
+ test = BASE_SENDPAGE;
+ } else if (strcmp(optarg, "sendpage") == 0) {
+ test = SENDPAGE;
+ } else {
+ usage(argv);
+ return -1;
+ }
+ break;
+ case 0:
+ break;
+ case 'h':
+ default:
+ usage(argv);
+ return -1;
+ }
+ }
+
+ if (!cg_fd) {
+ fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n",
+ argv[0]);
+ return -1;
+ }
+
+ err = populate_progs();
+ if (err) {
+ fprintf(stderr, "populate program: (%s) %s\n",
+ bpf_file, strerror(errno));
+ return 1;
+ }
+ running = 1;
+
+ /* catch SIGINT */
+ signal(SIGINT, running_handler);
+
+ options.iov_count = iov_count;
+ options.iov_length = length;
+ options.rate = rate;
+
+ err = run_options(&options, cg_fd, test);
+ close(cg_fd);
+ return err;
+}
+
+void running_handler(int a)
+{
+ running = 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/pkt_cls.h>
+#include <sys/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+/* Sockmap sample program connects a client and a backend together
+ * using cgroups.
+ *
+ * client:X <---> frontend:80 client:X <---> backend:80
+ *
+ * For simplicity we hard code values here and bind 1:1. The hard
+ * coded values are part of the setup in sockmap.sh script that
+ * is associated with this BPF program.
+ *
+ * The bpf_printk is verbose and prints information as connections
+ * are established and verdicts are decided.
+ */
+
+#define bpf_printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+struct bpf_map_def SEC("maps") sock_map = {
+ .type = BPF_MAP_TYPE_SOCKMAP,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_txmsg = {
+ .type = BPF_MAP_TYPE_SOCKMAP,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_redir = {
+ .type = BPF_MAP_TYPE_SOCKMAP,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_apply_bytes = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_cork_bytes = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_pull_bytes = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 2
+};
+
+struct bpf_map_def SEC("maps") sock_redir_flags = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_skb_opts = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 1
+};
+
+SEC("sk_skb1")
+int bpf_prog1(struct __sk_buff *skb)
+{
+ return skb->len;
+}
+
+SEC("sk_skb2")
+int bpf_prog2(struct __sk_buff *skb)
+{
+ __u32 lport = skb->local_port;
+ __u32 rport = skb->remote_port;
+ int len, *f, ret, zero = 0;
+ __u64 flags = 0;
+
+ if (lport == 10000)
+ ret = 10;
+ else
+ ret = 1;
+
+ len = (__u32)skb->data_end - (__u32)skb->data;
+ f = bpf_map_lookup_elem(&sock_skb_opts, &zero);
+ if (f && *f) {
+ ret = 3;
+ flags = *f;
+ }
+
+ bpf_printk("sk_skb2: redirect(%iB) flags=%i\n",
+ len, flags);
+ return bpf_sk_redirect_map(skb, &sock_map, ret, flags);
+}
+
+SEC("sockops")
+int bpf_sockmap(struct bpf_sock_ops *skops)
+{
+ __u32 lport, rport;
+ int op, err = 0, index, key, ret;
+
+
+ op = (int) skops->op;
+
+ switch (op) {
+ case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+ lport = skops->local_port;
+ rport = skops->remote_port;
+
+ if (lport == 10000) {
+ ret = 1;
+ err = bpf_sock_map_update(skops, &sock_map, &ret,
+ BPF_NOEXIST);
+ bpf_printk("passive(%i -> %i) map ctx update err: %d\n",
+ lport, bpf_ntohl(rport), err);
+ }
+ break;
+ case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+ lport = skops->local_port;
+ rport = skops->remote_port;
+
+ if (bpf_ntohl(rport) == 10001) {
+ ret = 10;
+ err = bpf_sock_map_update(skops, &sock_map, &ret,
+ BPF_NOEXIST);
+ bpf_printk("active(%i -> %i) map ctx update err: %d\n",
+ lport, bpf_ntohl(rport), err);
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+SEC("sk_msg1")
+int bpf_prog4(struct sk_msg_md *msg)
+{
+ int *bytes, zero = 0, one = 1;
+ int *start, *end;
+
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+ if (bytes)
+ bpf_msg_apply_bytes(msg, *bytes);
+ bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+ if (bytes)
+ bpf_msg_cork_bytes(msg, *bytes);
+ start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+ end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+ if (start && end)
+ bpf_msg_pull_data(msg, *start, *end, 0);
+ return SK_PASS;
+}
+
+SEC("sk_msg2")
+int bpf_prog5(struct sk_msg_md *msg)
+{
+ int err1 = -1, err2 = -1, zero = 0, one = 1;
+ int *bytes, *start, *end, len1, len2;
+
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+ if (bytes)
+ err1 = bpf_msg_apply_bytes(msg, *bytes);
+ bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+ if (bytes)
+ err2 = bpf_msg_cork_bytes(msg, *bytes);
+ len1 = (__u64)msg->data_end - (__u64)msg->data;
+ start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+ end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+ if (start && end) {
+ int err;
+
+ bpf_printk("sk_msg2: pull(%i:%i)\n",
+ start ? *start : 0, end ? *end : 0);
+ err = bpf_msg_pull_data(msg, *start, *end, 0);
+ if (err)
+ bpf_printk("sk_msg2: pull_data err %i\n",
+ err);
+ len2 = (__u64)msg->data_end - (__u64)msg->data;
+ bpf_printk("sk_msg2: length update %i->%i\n",
+ len1, len2);
+ }
+ bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n",
+ len1, err1, err2);
+ return SK_PASS;
+}
+
+SEC("sk_msg3")
+int bpf_prog6(struct sk_msg_md *msg)
+{
+ int *bytes, zero = 0, one = 1, key = 0;
+ int *start, *end, *f;
+ __u64 flags = 0;
+
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+ if (bytes)
+ bpf_msg_apply_bytes(msg, *bytes);
+ bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+ if (bytes)
+ bpf_msg_cork_bytes(msg, *bytes);
+ start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+ end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+ if (start && end)
+ bpf_msg_pull_data(msg, *start, *end, 0);
+ f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
+ if (f && *f) {
+ key = 2;
+ flags = *f;
+ }
+ return bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
+}
+
+SEC("sk_msg4")
+int bpf_prog7(struct sk_msg_md *msg)
+{
+ int err1 = 0, err2 = 0, zero = 0, one = 1, key = 0;
+ int *f, *bytes, *start, *end, len1, len2;
+ __u64 flags = 0;
+
+ int err;
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+ if (bytes)
+ err1 = bpf_msg_apply_bytes(msg, *bytes);
+ bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+ if (bytes)
+ err2 = bpf_msg_cork_bytes(msg, *bytes);
+ len1 = (__u64)msg->data_end - (__u64)msg->data;
+ start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+ end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+ if (start && end) {
+
+ bpf_printk("sk_msg2: pull(%i:%i)\n",
+ start ? *start : 0, end ? *end : 0);
+ err = bpf_msg_pull_data(msg, *start, *end, 0);
+ if (err)
+ bpf_printk("sk_msg2: pull_data err %i\n",
+ err);
+ len2 = (__u64)msg->data_end - (__u64)msg->data;
+ bpf_printk("sk_msg2: length update %i->%i\n",
+ len1, len2);
+ }
+ f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
+ if (f && *f) {
+ key = 2;
+ flags = *f;
+ }
+ bpf_printk("sk_msg3: redirect(%iB) flags=%i err=%i\n",
+ len1, flags, err1 ? err1 : err2);
+ err = bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
+ bpf_printk("sk_msg3: err %i\n", err);
+ return err;
+}
+
+SEC("sk_msg5")
+int bpf_prog8(struct sk_msg_md *msg)
+{
+ void *data_end = (void *)(long) msg->data_end;
+ void *data = (void *)(long) msg->data;
+ int ret = 0, *bytes, zero = 0;
+
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+ if (bytes) {
+ ret = bpf_msg_apply_bytes(msg, *bytes);
+ if (ret)
+ return SK_DROP;
+ } else {
+ return SK_DROP;
+ }
+ return SK_PASS;
+}
+SEC("sk_msg6")
+int bpf_prog9(struct sk_msg_md *msg)
+{
+ void *data_end = (void *)(long) msg->data_end;
+ void *data = (void *)(long) msg->data;
+ int ret = 0, *bytes, zero = 0;
+
+ bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+ if (bytes) {
+ if (((__u64)data_end - (__u64)data) >= *bytes)
+ return SK_PASS;
+ ret = bpf_msg_cork_bytes(msg, *bytes);
+ if (ret)
+ return SK_DROP;
+ }
+ return SK_PASS;
+}
+
+SEC("sk_msg7")
+int bpf_prog10(struct sk_msg_md *msg)
+{
+ int *bytes, zero = 0, one = 1;
+ int *start, *end;
+
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+ if (bytes)
+ bpf_msg_apply_bytes(msg, *bytes);
+ bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+ if (bytes)
+ bpf_msg_cork_bytes(msg, *bytes);
+ start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+ end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+ if (start && end)
+ bpf_msg_pull_data(msg, *start, *end, 0);
+
+ return SK_DROP;
+}
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";