Merge branch 'bpf-sockmap-selftests'
authorDaniel Borkmann <daniel@iogearbox.net>
Tue, 24 Apr 2018 22:06:55 +0000 (00:06 +0200)
committerDaniel Borkmann <daniel@iogearbox.net>
Tue, 24 Apr 2018 22:07:05 +0000 (00:07 +0200)
John Fastabend says:

====================
This series moves ./samples/sockmap into BPF selftests. There are a
few good reasons to do this. First, by pushing this into selftests
the tests will be run automatically. Second, sockmap was not really
a sample of anything anymore, but rather a large set of tests.

Note: There are three recent fixes outstanding against bpf branch
that can be detected occasionally by the automated tests here.

https://patchwork.ozlabs.org/patch/903138/
https://patchwork.ozlabs.org/patch/903139/
https://patchwork.ozlabs.org/patch/903140/
====================

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
samples/sockmap/Makefile [deleted file]
samples/sockmap/sockmap_kern.c [deleted file]
samples/sockmap/sockmap_test.sh [deleted file]
samples/sockmap/sockmap_user.c [deleted file]
tools/include/uapi/linux/bpf.h
tools/include/uapi/linux/if_link.h
tools/lib/bpf/libbpf.c
tools/lib/bpf/libbpf.h
tools/testing/selftests/bpf/Makefile
tools/testing/selftests/bpf/test_sockmap.c [new file with mode: 0644]
tools/testing/selftests/bpf/test_sockmap_kern.c [new file with mode: 0644]

diff --git a/samples/sockmap/Makefile b/samples/sockmap/Makefile
deleted file mode 100644 (file)
index 9bf2881..0000000
+++ /dev/null
@@ -1,75 +0,0 @@
-# List of programs to build
-hostprogs-y := sockmap
-
-# Libbpf dependencies
-LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o
-
-HOSTCFLAGS += -I$(objtree)/usr/include
-HOSTCFLAGS += -I$(srctree)/tools/lib/
-HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
-HOSTCFLAGS += -I$(srctree)/tools/lib/ -I$(srctree)/tools/include
-HOSTCFLAGS += -I$(srctree)/tools/perf
-
-sockmap-objs := ../bpf/bpf_load.o $(LIBBPF) sockmap_user.o
-
-# Tell kbuild to always build the programs
-always := $(hostprogs-y)
-always += sockmap_kern.o
-
-HOSTLOADLIBES_sockmap += -lelf -lpthread
-
-# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
-#  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
-LLC ?= llc
-CLANG ?= clang
-
-# Trick to allow make to be run from this directory
-all:
-       $(MAKE) -C ../../ $(CURDIR)/
-
-clean:
-       $(MAKE) -C ../../ M=$(CURDIR) clean
-       @rm -f *~
-
-$(obj)/syscall_nrs.s:  $(src)/syscall_nrs.c
-       $(call if_changed_dep,cc_s_c)
-
-$(obj)/syscall_nrs.h:  $(obj)/syscall_nrs.s FORCE
-       $(call filechk,offsets,__SYSCALL_NRS_H__)
-
-clean-files += syscall_nrs.h
-
-FORCE:
-
-
-# Verify LLVM compiler tools are available and bpf target is supported by llc
-.PHONY: verify_cmds verify_target_bpf $(CLANG) $(LLC)
-
-verify_cmds: $(CLANG) $(LLC)
-       @for TOOL in $^ ; do \
-               if ! (which -- "$${TOOL}" > /dev/null 2>&1); then \
-                       echo "*** ERROR: Cannot find LLVM tool $${TOOL}" ;\
-                       exit 1; \
-               else true; fi; \
-       done
-
-verify_target_bpf: verify_cmds
-       @if ! (${LLC} -march=bpf -mattr=help > /dev/null 2>&1); then \
-               echo "*** ERROR: LLVM (${LLC}) does not support 'bpf' target" ;\
-               echo "   NOTICE: LLVM version >= 3.7.1 required" ;\
-               exit 2; \
-       else true; fi
-
-$(src)/*.c: verify_target_bpf
-
-# asm/sysreg.h - inline assembly used by it is incompatible with llvm.
-# But, there is no easy way to fix it, so just exclude it since it is
-# useless for BPF samples.
-$(obj)/%.o: $(src)/%.c
-       $(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \
-               -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \
-               -Wno-compare-distinct-pointer-types \
-               -Wno-gnu-variable-sized-type-not-at-end \
-               -Wno-address-of-packed-member -Wno-tautological-compare \
-               -Wno-unknown-warning-option \
-               -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
diff --git a/samples/sockmap/sockmap_kern.c b/samples/sockmap/sockmap_kern.c
deleted file mode 100644 (file)
index 9ff8bc5..0000000
+++ /dev/null
@@ -1,341 +0,0 @@
-/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
-#include <uapi/linux/bpf.h>
-#include <uapi/linux/if_ether.h>
-#include <uapi/linux/if_packet.h>
-#include <uapi/linux/ip.h>
-#include "../../tools/testing/selftests/bpf/bpf_helpers.h"
-#include "../../tools/testing/selftests/bpf/bpf_endian.h"
-
-/* Sockmap sample program connects a client and a backend together
- * using cgroups.
- *
- *    client:X <---> frontend:80 client:X <---> backend:80
- *
- * For simplicity we hard code values here and bind 1:1. The hard
- * coded values are part of the setup in sockmap.sh script that
- * is associated with this BPF program.
- *
- * The bpf_printk is verbose and prints information as connections
- * are established and verdicts are decided.
- */
-
-#define bpf_printk(fmt, ...)                                   \
-({                                                             \
-              char ____fmt[] = fmt;                            \
-              bpf_trace_printk(____fmt, sizeof(____fmt),       \
-                               ##__VA_ARGS__);                 \
-})
-
-struct bpf_map_def SEC("maps") sock_map = {
-       .type = BPF_MAP_TYPE_SOCKMAP,
-       .key_size = sizeof(int),
-       .value_size = sizeof(int),
-       .max_entries = 20,
-};
-
-struct bpf_map_def SEC("maps") sock_map_txmsg = {
-       .type = BPF_MAP_TYPE_SOCKMAP,
-       .key_size = sizeof(int),
-       .value_size = sizeof(int),
-       .max_entries = 20,
-};
-
-struct bpf_map_def SEC("maps") sock_map_redir = {
-       .type = BPF_MAP_TYPE_SOCKMAP,
-       .key_size = sizeof(int),
-       .value_size = sizeof(int),
-       .max_entries = 20,
-};
-
-struct bpf_map_def SEC("maps") sock_apply_bytes = {
-       .type = BPF_MAP_TYPE_ARRAY,
-       .key_size = sizeof(int),
-       .value_size = sizeof(int),
-       .max_entries = 1
-};
-
-struct bpf_map_def SEC("maps") sock_cork_bytes = {
-       .type = BPF_MAP_TYPE_ARRAY,
-       .key_size = sizeof(int),
-       .value_size = sizeof(int),
-       .max_entries = 1
-};
-
-struct bpf_map_def SEC("maps") sock_pull_bytes = {
-       .type = BPF_MAP_TYPE_ARRAY,
-       .key_size = sizeof(int),
-       .value_size = sizeof(int),
-       .max_entries = 2
-};
-
-struct bpf_map_def SEC("maps") sock_redir_flags = {
-       .type = BPF_MAP_TYPE_ARRAY,
-       .key_size = sizeof(int),
-       .value_size = sizeof(int),
-       .max_entries = 1
-};
-
-struct bpf_map_def SEC("maps") sock_skb_opts = {
-       .type = BPF_MAP_TYPE_ARRAY,
-       .key_size = sizeof(int),
-       .value_size = sizeof(int),
-       .max_entries = 1
-};
-
-SEC("sk_skb1")
-int bpf_prog1(struct __sk_buff *skb)
-{
-       return skb->len;
-}
-
-SEC("sk_skb2")
-int bpf_prog2(struct __sk_buff *skb)
-{
-       __u32 lport = skb->local_port;
-       __u32 rport = skb->remote_port;
-       int len, *f, ret, zero = 0;
-       __u64 flags = 0;
-
-       if (lport == 10000)
-               ret = 10;
-       else
-               ret = 1;
-
-       len = (__u32)skb->data_end - (__u32)skb->data;
-       f = bpf_map_lookup_elem(&sock_skb_opts, &zero);
-       if (f && *f) {
-               ret = 3;
-               flags = *f;
-       }
-
-       bpf_printk("sk_skb2: redirect(%iB) flags=%i\n",
-                  len, flags);
-       return bpf_sk_redirect_map(skb, &sock_map, ret, flags);
-}
-
-SEC("sockops")
-int bpf_sockmap(struct bpf_sock_ops *skops)
-{
-       __u32 lport, rport;
-       int op, err = 0, index, key, ret;
-
-
-       op = (int) skops->op;
-
-       switch (op) {
-       case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
-               lport = skops->local_port;
-               rport = skops->remote_port;
-
-               if (lport == 10000) {
-                       ret = 1;
-                       err = bpf_sock_map_update(skops, &sock_map, &ret,
-                                                 BPF_NOEXIST);
-                       bpf_printk("passive(%i -> %i) map ctx update err: %d\n",
-                                  lport, bpf_ntohl(rport), err);
-               }
-               break;
-       case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
-               lport = skops->local_port;
-               rport = skops->remote_port;
-
-               if (bpf_ntohl(rport) == 10001) {
-                       ret = 10;
-                       err = bpf_sock_map_update(skops, &sock_map, &ret,
-                                                 BPF_NOEXIST);
-                       bpf_printk("active(%i -> %i) map ctx update err: %d\n",
-                                  lport, bpf_ntohl(rport), err);
-               }
-               break;
-       default:
-               break;
-       }
-
-       return 0;
-}
-
-SEC("sk_msg1")
-int bpf_prog4(struct sk_msg_md *msg)
-{
-       int *bytes, zero = 0, one = 1;
-       int *start, *end;
-
-       bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
-       if (bytes)
-               bpf_msg_apply_bytes(msg, *bytes);
-       bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
-       if (bytes)
-               bpf_msg_cork_bytes(msg, *bytes);
-       start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
-       end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
-       if (start && end)
-               bpf_msg_pull_data(msg, *start, *end, 0);
-       return SK_PASS;
-}
-
-SEC("sk_msg2")
-int bpf_prog5(struct sk_msg_md *msg)
-{
-       int err1 = -1, err2 = -1, zero = 0, one = 1;
-       int *bytes, *start, *end, len1, len2;
-
-       bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
-       if (bytes)
-               err1 = bpf_msg_apply_bytes(msg, *bytes);
-       bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
-       if (bytes)
-               err2 = bpf_msg_cork_bytes(msg, *bytes);
-       len1 = (__u64)msg->data_end - (__u64)msg->data;
-       start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
-       end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
-       if (start && end) {
-               int err;
-
-               bpf_printk("sk_msg2: pull(%i:%i)\n",
-                          start ? *start : 0, end ? *end : 0);
-               err = bpf_msg_pull_data(msg, *start, *end, 0);
-               if (err)
-                       bpf_printk("sk_msg2: pull_data err %i\n",
-                                  err);
-               len2 = (__u64)msg->data_end - (__u64)msg->data;
-               bpf_printk("sk_msg2: length update %i->%i\n",
-                          len1, len2);
-       }
-       bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n",
-                  len1, err1, err2);
-       return SK_PASS;
-}
-
-SEC("sk_msg3")
-int bpf_prog6(struct sk_msg_md *msg)
-{
-       int *bytes, zero = 0, one = 1, key = 0;
-       int *start, *end, *f;
-       __u64 flags = 0;
-
-       bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
-       if (bytes)
-               bpf_msg_apply_bytes(msg, *bytes);
-       bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
-       if (bytes)
-               bpf_msg_cork_bytes(msg, *bytes);
-       start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
-       end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
-       if (start && end)
-               bpf_msg_pull_data(msg, *start, *end, 0);
-       f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
-       if (f && *f) {
-               key = 2;
-               flags = *f;
-       }
-       return bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
-}
-
-SEC("sk_msg4")
-int bpf_prog7(struct sk_msg_md *msg)
-{
-       int err1 = 0, err2 = 0, zero = 0, one = 1, key = 0;
-       int *f, *bytes, *start, *end, len1, len2;
-       __u64 flags = 0;
-
-               int err;
-       bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
-       if (bytes)
-               err1 = bpf_msg_apply_bytes(msg, *bytes);
-       bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
-       if (bytes)
-               err2 = bpf_msg_cork_bytes(msg, *bytes);
-       len1 = (__u64)msg->data_end - (__u64)msg->data;
-       start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
-       end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
-       if (start && end) {
-
-               bpf_printk("sk_msg2: pull(%i:%i)\n",
-                          start ? *start : 0, end ? *end : 0);
-               err = bpf_msg_pull_data(msg, *start, *end, 0);
-               if (err)
-                       bpf_printk("sk_msg2: pull_data err %i\n",
-                                  err);
-               len2 = (__u64)msg->data_end - (__u64)msg->data;
-               bpf_printk("sk_msg2: length update %i->%i\n",
-                          len1, len2);
-       }
-       f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
-       if (f && *f) {
-               key = 2;
-               flags = *f;
-       }
-       bpf_printk("sk_msg3: redirect(%iB) flags=%i err=%i\n",
-                  len1, flags, err1 ? err1 : err2);
-       err = bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
-       bpf_printk("sk_msg3: err %i\n", err);
-       return err;
-}
-
-SEC("sk_msg5")
-int bpf_prog8(struct sk_msg_md *msg)
-{
-       void *data_end = (void *)(long) msg->data_end;
-       void *data = (void *)(long) msg->data;
-       int ret = 0, *bytes, zero = 0;
-
-       bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
-       if (bytes) {
-               ret = bpf_msg_apply_bytes(msg, *bytes);
-               if (ret)
-                       return SK_DROP;
-       } else {
-               return SK_DROP;
-       }
-       return SK_PASS;
-}
-SEC("sk_msg6")
-int bpf_prog9(struct sk_msg_md *msg)
-{
-       void *data_end = (void *)(long) msg->data_end;
-       void *data = (void *)(long) msg->data;
-       int ret = 0, *bytes, zero = 0;
-
-       bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
-       if (bytes) {
-               if (((__u64)data_end - (__u64)data) >= *bytes)
-                       return SK_PASS;
-               ret = bpf_msg_cork_bytes(msg, *bytes);
-               if (ret)
-                       return SK_DROP;
-       }
-       return SK_PASS;
-}
-
-SEC("sk_msg7")
-int bpf_prog10(struct sk_msg_md *msg)
-{
-       int *bytes, zero = 0, one = 1;
-       int *start, *end;
-
-       bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
-       if (bytes)
-               bpf_msg_apply_bytes(msg, *bytes);
-       bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
-       if (bytes)
-               bpf_msg_cork_bytes(msg, *bytes);
-       start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
-       end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
-       if (start && end)
-               bpf_msg_pull_data(msg, *start, *end, 0);
-
-       return SK_DROP;
-}
-
-
-char _license[] SEC("license") = "GPL";
diff --git a/samples/sockmap/sockmap_test.sh b/samples/sockmap/sockmap_test.sh
deleted file mode 100755 (executable)
index ace75f0..0000000
+++ /dev/null
@@ -1,488 +0,0 @@
-#Test a bunch of positive cases to verify basic functionality
-for prog in  "--txmsg_redir --txmsg_skb" "--txmsg_redir --txmsg_ingress" "--txmsg" "--txmsg_redir" "--txmsg_redir --txmsg_ingress" "--txmsg_drop"; do
-for t in "sendmsg" "sendpage"; do
-for r in 1 10 100; do
-       for i in 1 10 100; do
-               for l in 1 10 100; do
-                       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-                       echo $TEST
-                       $TEST
-                       sleep 2
-               done
-       done
-done
-done
-done
-
-#Test max iov
-t="sendmsg"
-r=1
-i=1024
-l=1
-prog="--txmsg"
-
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-echo $TEST
-$TEST
-sleep 2
-prog="--txmsg_redir"
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-echo $TEST
-$TEST
-
-# Test max iov with 1k send
-
-t="sendmsg"
-r=1
-i=1024
-l=1024
-prog="--txmsg"
-
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-echo $TEST
-$TEST
-sleep 2
-prog="--txmsg_redir"
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-echo $TEST
-$TEST
-sleep 2
-
-# Test apply with 1B
-r=1
-i=1024
-l=1024
-prog="--txmsg_apply 1"
-
-for t in "sendmsg" "sendpage"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Test apply with larger value than send
-r=1
-i=8
-l=1024
-prog="--txmsg_apply 2048"
-
-for t in "sendmsg" "sendpage"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Test apply with apply that never reaches limit
-r=1024
-i=1
-l=1
-prog="--txmsg_apply 2048"
-
-for t in "sendmsg" "sendpage"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Test apply and redirect with 1B
-r=1
-i=1024
-l=1024
-prog="--txmsg_redir --txmsg_apply 1"
-
-for t in "sendmsg" "sendpage"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-prog="--txmsg_redir --txmsg_apply 1 --txmsg_ingress"
-
-for t in "sendmsg" "sendpage"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-prog="--txmsg_redir --txmsg_apply 1 --txmsg_skb"
-
-for t in "sendmsg" "sendpage"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-
-# Test apply and redirect with larger value than send
-r=1
-i=8
-l=1024
-prog="--txmsg_redir --txmsg_apply 2048"
-
-for t in "sendmsg" "sendpage"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-prog="--txmsg_redir --txmsg_apply 2048 --txmsg_ingress"
-
-for t in "sendmsg" "sendpage"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-prog="--txmsg_redir --txmsg_apply 2048 --txmsg_skb"
-
-for t in "sendmsg" "sendpage"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-
-# Test apply and redirect with apply that never reaches limit
-r=1024
-i=1
-l=1
-prog="--txmsg_apply 2048"
-
-for t in "sendmsg" "sendpage"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Test cork with 1B not really useful but test it anyways
-r=1
-i=1024
-l=1024
-prog="--txmsg_cork 1"
-
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Test cork with a more reasonable 100B
-r=1
-i=1000
-l=1000
-prog="--txmsg_cork 100"
-
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Test cork with larger value than send
-r=1
-i=8
-l=1024
-prog="--txmsg_cork 2048"
-
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Test cork with cork that never reaches limit
-r=1024
-i=1
-l=1
-prog="--txmsg_cork 2048"
-
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-r=1
-i=1024
-l=1024
-prog="--txmsg_redir --txmsg_cork 1"
-
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Test cork with a more reasonable 100B
-r=1
-i=1000
-l=1000
-prog="--txmsg_redir --txmsg_cork 100"
-
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Test cork with larger value than send
-r=1
-i=8
-l=1024
-prog="--txmsg_redir --txmsg_cork 2048"
-
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Test cork with cork that never reaches limit
-r=1024
-i=1
-l=1
-prog="--txmsg_cork 2048"
-
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-
-# mix and match cork and apply not really useful but valid programs
-
-# Test apply < cork
-r=100
-i=1
-l=5
-prog="--txmsg_apply 10 --txmsg_cork 100"
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Try again with larger sizes so we hit overflow case
-r=100
-i=1000
-l=2048
-prog="--txmsg_apply 4096 --txmsg_cork 8096"
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Test apply > cork
-r=100
-i=1
-l=5
-prog="--txmsg_apply 100 --txmsg_cork 10"
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Again with larger sizes so we hit overflow cases
-r=100
-i=1000
-l=2048
-prog="--txmsg_apply 8096 --txmsg_cork 4096"
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-
-# Test apply = cork
-r=100
-i=1
-l=5
-prog="--txmsg_apply 10 --txmsg_cork 10"
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-r=100
-i=1000
-l=2048
-prog="--txmsg_apply 4096 --txmsg_cork 4096"
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Test apply < cork
-r=100
-i=1
-l=5
-prog="--txmsg_redir --txmsg_apply 10 --txmsg_cork 100"
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Try again with larger sizes so we hit overflow case
-r=100
-i=1000
-l=2048
-prog="--txmsg_redir --txmsg_apply 4096 --txmsg_cork 8096"
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Test apply > cork
-r=100
-i=1
-l=5
-prog="--txmsg_redir --txmsg_apply 100 --txmsg_cork 10"
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Again with larger sizes so we hit overflow cases
-r=100
-i=1000
-l=2048
-prog="--txmsg_redir --txmsg_apply 8096 --txmsg_cork 4096"
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-
-# Test apply = cork
-r=100
-i=1
-l=5
-prog="--txmsg_redir --txmsg_apply 10 --txmsg_cork 10"
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-r=100
-i=1000
-l=2048
-prog="--txmsg_redir --txmsg_apply 4096 --txmsg_cork 4096"
-for t in "sendpage" "sendmsg"; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-# Tests for bpf_msg_pull_data()
-for i in `seq 99 100 1600`; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
-               --txmsg --txmsg_start 0 --txmsg_end $i --txmsg_cork 1600"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-for i in `seq 199 100 1600`; do
-       TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
-               --txmsg --txmsg_start 100 --txmsg_end $i --txmsg_cork 1600"
-       echo $TEST
-       $TEST
-       sleep 2
-done
-
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
-       --txmsg --txmsg_start 1500 --txmsg_end 1600 --txmsg_cork 1600"
-echo $TEST
-$TEST
-sleep 2
-
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
-       --txmsg --txmsg_start 1111 --txmsg_end 1112 --txmsg_cork 1600"
-echo $TEST
-$TEST
-sleep 2
-
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
-       --txmsg --txmsg_start 1111 --txmsg_end 0 --txmsg_cork 1600"
-echo $TEST
-$TEST
-sleep 2
-
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
-       --txmsg --txmsg_start 0 --txmsg_end 1601 --txmsg_cork 1600"
-echo $TEST
-$TEST
-sleep 2
-
-TEST="./sockmap --cgroup /mnt/cgroup2/ -t sendpage -r 16 -i 1 -l 100 \
-       --txmsg --txmsg_start 0 --txmsg_end 1601 --txmsg_cork 1602"
-echo $TEST
-$TEST
-sleep 2
-
-# Run through gamut again with start and end
-for prog in "--txmsg" "--txmsg_redir" "--txmsg_drop"; do
-for t in "sendmsg" "sendpage"; do
-for r in 1 10 100; do
-       for i in 1 10 100; do
-               for l in 1 10 100; do
-                       TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog --txmsg_start 1 --txmsg_end 2"
-                       echo $TEST
-                       $TEST
-                       sleep 2
-               done
-       done
-done
-done
-done
-
-# Some specific tests to cover specific code paths
-./sockmap --cgroup /mnt/cgroup2/ -t sendpage \
-       -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 3
-./sockmap --cgroup /mnt/cgroup2/ -t sendmsg \
-       -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 3
-./sockmap --cgroup /mnt/cgroup2/ -t sendpage \
-       -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 5
-./sockmap --cgroup /mnt/cgroup2/ -t sendmsg \
-       -r 5 -i 1 -l 1 --txmsg_redir --txmsg_cork 5 --txmsg_apply 5
diff --git a/samples/sockmap/sockmap_user.c b/samples/sockmap/sockmap_user.c
deleted file mode 100644 (file)
index 6f23349..0000000
+++ /dev/null
@@ -1,894 +0,0 @@
-/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-#include <sys/select.h>
-#include <netinet/in.h>
-#include <arpa/inet.h>
-#include <unistd.h>
-#include <string.h>
-#include <errno.h>
-#include <sys/ioctl.h>
-#include <stdbool.h>
-#include <signal.h>
-#include <fcntl.h>
-#include <sys/wait.h>
-#include <time.h>
-
-#include <sys/time.h>
-#include <sys/resource.h>
-#include <sys/types.h>
-#include <sys/sendfile.h>
-
-#include <linux/netlink.h>
-#include <linux/socket.h>
-#include <linux/sock_diag.h>
-#include <linux/bpf.h>
-#include <linux/if_link.h>
-#include <assert.h>
-#include <libgen.h>
-
-#include <getopt.h>
-
-#include "../bpf/bpf_load.h"
-#include "../bpf/bpf_util.h"
-#include "../bpf/libbpf.h"
-
-int running;
-void running_handler(int a);
-
-/* randomly selected ports for testing on lo */
-#define S1_PORT 10000
-#define S2_PORT 10001
-
-/* global sockets */
-int s1, s2, c1, c2, p1, p2;
-
-int txmsg_pass;
-int txmsg_noisy;
-int txmsg_redir;
-int txmsg_redir_noisy;
-int txmsg_drop;
-int txmsg_apply;
-int txmsg_cork;
-int txmsg_start;
-int txmsg_end;
-int txmsg_ingress;
-int txmsg_skb;
-
-static const struct option long_options[] = {
-       {"help",        no_argument,            NULL, 'h' },
-       {"cgroup",      required_argument,      NULL, 'c' },
-       {"rate",        required_argument,      NULL, 'r' },
-       {"verbose",     no_argument,            NULL, 'v' },
-       {"iov_count",   required_argument,      NULL, 'i' },
-       {"length",      required_argument,      NULL, 'l' },
-       {"test",        required_argument,      NULL, 't' },
-       {"data_test",   no_argument,            NULL, 'd' },
-       {"txmsg",               no_argument,    &txmsg_pass,  1  },
-       {"txmsg_noisy",         no_argument,    &txmsg_noisy, 1  },
-       {"txmsg_redir",         no_argument,    &txmsg_redir, 1  },
-       {"txmsg_redir_noisy",   no_argument,    &txmsg_redir_noisy, 1},
-       {"txmsg_drop",          no_argument,    &txmsg_drop, 1 },
-       {"txmsg_apply", required_argument,      NULL, 'a'},
-       {"txmsg_cork",  required_argument,      NULL, 'k'},
-       {"txmsg_start", required_argument,      NULL, 's'},
-       {"txmsg_end",   required_argument,      NULL, 'e'},
-       {"txmsg_ingress", no_argument,          &txmsg_ingress, 1 },
-       {"txmsg_skb", no_argument,              &txmsg_skb, 1 },
-       {0, 0, NULL, 0 }
-};
-
-static void usage(char *argv[])
-{
-       int i;
-
-       printf(" Usage: %s --cgroup <cgroup_path>\n", argv[0]);
-       printf(" options:\n");
-       for (i = 0; long_options[i].name != 0; i++) {
-               printf(" --%-12s", long_options[i].name);
-               if (long_options[i].flag != NULL)
-                       printf(" flag (internal value:%d)\n",
-                               *long_options[i].flag);
-               else
-                       printf(" -%c\n", long_options[i].val);
-       }
-       printf("\n");
-}
-
-static int sockmap_init_sockets(void)
-{
-       int i, err, one = 1;
-       struct sockaddr_in addr;
-       int *fds[4] = {&s1, &s2, &c1, &c2};
-
-       s1 = s2 = p1 = p2 = c1 = c2 = 0;
-
-       /* Init sockets */
-       for (i = 0; i < 4; i++) {
-               *fds[i] = socket(AF_INET, SOCK_STREAM, 0);
-               if (*fds[i] < 0) {
-                       perror("socket s1 failed()");
-                       return errno;
-               }
-       }
-
-       /* Allow reuse */
-       for (i = 0; i < 2; i++) {
-               err = setsockopt(*fds[i], SOL_SOCKET, SO_REUSEADDR,
-                                (char *)&one, sizeof(one));
-               if (err) {
-                       perror("setsockopt failed()");
-                       return errno;
-               }
-       }
-
-       /* Non-blocking sockets */
-       for (i = 0; i < 2; i++) {
-               err = ioctl(*fds[i], FIONBIO, (char *)&one);
-               if (err < 0) {
-                       perror("ioctl s1 failed()");
-                       return errno;
-               }
-       }
-
-       /* Bind server sockets */
-       memset(&addr, 0, sizeof(struct sockaddr_in));
-       addr.sin_family = AF_INET;
-       addr.sin_addr.s_addr = inet_addr("127.0.0.1");
-
-       addr.sin_port = htons(S1_PORT);
-       err = bind(s1, (struct sockaddr *)&addr, sizeof(addr));
-       if (err < 0) {
-               perror("bind s1 failed()\n");
-               return errno;
-       }
-
-       addr.sin_port = htons(S2_PORT);
-       err = bind(s2, (struct sockaddr *)&addr, sizeof(addr));
-       if (err < 0) {
-               perror("bind s2 failed()\n");
-               return errno;
-       }
-
-       /* Listen server sockets */
-       addr.sin_port = htons(S1_PORT);
-       err = listen(s1, 32);
-       if (err < 0) {
-               perror("listen s1 failed()\n");
-               return errno;
-       }
-
-       addr.sin_port = htons(S2_PORT);
-       err = listen(s2, 32);
-       if (err < 0) {
-               perror("listen s1 failed()\n");
-               return errno;
-       }
-
-       /* Initiate Connect */
-       addr.sin_port = htons(S1_PORT);
-       err = connect(c1, (struct sockaddr *)&addr, sizeof(addr));
-       if (err < 0 && errno != EINPROGRESS) {
-               perror("connect c1 failed()\n");
-               return errno;
-       }
-
-       addr.sin_port = htons(S2_PORT);
-       err = connect(c2, (struct sockaddr *)&addr, sizeof(addr));
-       if (err < 0 && errno != EINPROGRESS) {
-               perror("connect c2 failed()\n");
-               return errno;
-       } else if (err < 0) {
-               err = 0;
-       }
-
-       /* Accept Connecrtions */
-       p1 = accept(s1, NULL, NULL);
-       if (p1 < 0) {
-               perror("accept s1 failed()\n");
-               return errno;
-       }
-
-       p2 = accept(s2, NULL, NULL);
-       if (p2 < 0) {
-               perror("accept s1 failed()\n");
-               return errno;
-       }
-
-       printf("connected sockets: c1 <-> p1, c2 <-> p2\n");
-       printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n",
-               c1, s1, c2, s2);
-       return 0;
-}
-
-struct msg_stats {
-       size_t bytes_sent;
-       size_t bytes_recvd;
-       struct timespec start;
-       struct timespec end;
-};
-
-struct sockmap_options {
-       int verbose;
-       bool base;
-       bool sendpage;
-       bool data_test;
-       bool drop_expected;
-};
-
-static int msg_loop_sendpage(int fd, int iov_length, int cnt,
-                            struct msg_stats *s,
-                            struct sockmap_options *opt)
-{
-       bool drop = opt->drop_expected;
-       unsigned char k = 0;
-       FILE *file;
-       int i, fp;
-
-       file = fopen(".sendpage_tst.tmp", "w+");
-       for (i = 0; i < iov_length * cnt; i++, k++)
-               fwrite(&k, sizeof(char), 1, file);
-       fflush(file);
-       fseek(file, 0, SEEK_SET);
-       fclose(file);
-
-       fp = open(".sendpage_tst.tmp", O_RDONLY);
-       clock_gettime(CLOCK_MONOTONIC, &s->start);
-       for (i = 0; i < cnt; i++) {
-               int sent = sendfile(fd, fp, NULL, iov_length);
-
-               if (!drop && sent < 0) {
-                       perror("send loop error:");
-                       close(fp);
-                       return sent;
-               } else if (drop && sent >= 0) {
-                       printf("sendpage loop error expected: %i\n", sent);
-                       close(fp);
-                       return -EIO;
-               }
-
-               if (sent > 0)
-                       s->bytes_sent += sent;
-       }
-       clock_gettime(CLOCK_MONOTONIC, &s->end);
-       close(fp);
-       return 0;
-}
-
-static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
-                   struct msg_stats *s, bool tx,
-                   struct sockmap_options *opt)
-{
-       struct msghdr msg = {0};
-       int err, i, flags = MSG_NOSIGNAL;
-       struct iovec *iov;
-       unsigned char k;
-       bool data_test = opt->data_test;
-       bool drop = opt->drop_expected;
-
-       iov = calloc(iov_count, sizeof(struct iovec));
-       if (!iov)
-               return errno;
-
-       k = 0;
-       for (i = 0; i < iov_count; i++) {
-               unsigned char *d = calloc(iov_length, sizeof(char));
-
-               if (!d) {
-                       fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count);
-                       goto out_errno;
-               }
-               iov[i].iov_base = d;
-               iov[i].iov_len = iov_length;
-
-               if (data_test && tx) {
-                       int j;
-
-                       for (j = 0; j < iov_length; j++)
-                               d[j] = k++;
-               }
-       }
-
-       msg.msg_iov = iov;
-       msg.msg_iovlen = iov_count;
-       k = 0;
-
-       if (tx) {
-               clock_gettime(CLOCK_MONOTONIC, &s->start);
-               for (i = 0; i < cnt; i++) {
-                       int sent = sendmsg(fd, &msg, flags);
-
-                       if (!drop && sent < 0) {
-                               perror("send loop error:");
-                               goto out_errno;
-                       } else if (drop && sent >= 0) {
-                               printf("send loop error expected: %i\n", sent);
-                               errno = -EIO;
-                               goto out_errno;
-                       }
-                       if (sent > 0)
-                               s->bytes_sent += sent;
-               }
-               clock_gettime(CLOCK_MONOTONIC, &s->end);
-       } else {
-               int slct, recv, max_fd = fd;
-               struct timeval timeout;
-               float total_bytes;
-               fd_set w;
-
-               total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
-               err = clock_gettime(CLOCK_MONOTONIC, &s->start);
-               if (err < 0)
-                       perror("recv start time: ");
-               while (s->bytes_recvd < total_bytes) {
-                       timeout.tv_sec = 1;
-                       timeout.tv_usec = 0;
-
-                       /* FD sets */
-                       FD_ZERO(&w);
-                       FD_SET(fd, &w);
-
-                       slct = select(max_fd + 1, &w, NULL, NULL, &timeout);
-                       if (slct == -1) {
-                               perror("select()");
-                               clock_gettime(CLOCK_MONOTONIC, &s->end);
-                               goto out_errno;
-                       } else if (!slct) {
-                               fprintf(stderr, "unexpected timeout\n");
-                               errno = -EIO;
-                               clock_gettime(CLOCK_MONOTONIC, &s->end);
-                               goto out_errno;
-                       }
-
-                       recv = recvmsg(fd, &msg, flags);
-                       if (recv < 0) {
-                               if (errno != EWOULDBLOCK) {
-                                       clock_gettime(CLOCK_MONOTONIC, &s->end);
-                                       perror("recv failed()\n");
-                                       goto out_errno;
-                               }
-                       }
-
-                       s->bytes_recvd += recv;
-
-                       if (data_test) {
-                               int j;
-
-                               for (i = 0; i < msg.msg_iovlen; i++) {
-                                       unsigned char *d = iov[i].iov_base;
-
-                                       for (j = 0;
-                                            j < iov[i].iov_len && recv; j++) {
-                                               if (d[j] != k++) {
-                                                       errno = -EIO;
-                                                       fprintf(stderr,
-                                                               "detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
-                                                               i, j, d[j], k - 1, d[j+1], k + 1);
-                                                       goto out_errno;
-                                               }
-                                               recv--;
-                                       }
-                               }
-                       }
-               }
-               clock_gettime(CLOCK_MONOTONIC, &s->end);
-       }
-
-       for (i = 0; i < iov_count; i++)
-               free(iov[i].iov_base);
-       free(iov);
-       return 0;
-out_errno:
-       for (i = 0; i < iov_count; i++)
-               free(iov[i].iov_base);
-       free(iov);
-       return errno;
-}
-
-static float giga = 1000000000;
-
-static inline float sentBps(struct msg_stats s)
-{
-       return s.bytes_sent / (s.end.tv_sec - s.start.tv_sec);
-}
-
-static inline float recvdBps(struct msg_stats s)
-{
-       return s.bytes_recvd / (s.end.tv_sec - s.start.tv_sec);
-}
-
-static int sendmsg_test(int iov_count, int iov_buf, int cnt,
-                       struct sockmap_options *opt)
-{
-       float sent_Bps = 0, recvd_Bps = 0;
-       int rx_fd, txpid, rxpid, err = 0;
-       struct msg_stats s = {0};
-       int status;
-
-       errno = 0;
-
-       if (opt->base)
-               rx_fd = p1;
-       else
-               rx_fd = p2;
-
-       rxpid = fork();
-       if (rxpid == 0) {
-               if (opt->drop_expected)
-                       exit(1);
-
-               if (opt->sendpage)
-                       iov_count = 1;
-               err = msg_loop(rx_fd, iov_count, iov_buf,
-                              cnt, &s, false, opt);
-               if (err)
-                       fprintf(stderr,
-                               "msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
-                               iov_count, iov_buf, cnt, err);
-               shutdown(p2, SHUT_RDWR);
-               shutdown(p1, SHUT_RDWR);
-               if (s.end.tv_sec - s.start.tv_sec) {
-                       sent_Bps = sentBps(s);
-                       recvd_Bps = recvdBps(s);
-               }
-               fprintf(stdout,
-                       "rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s\n",
-                       s.bytes_sent, sent_Bps, sent_Bps/giga,
-                       s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
-               exit(1);
-       } else if (rxpid == -1) {
-               perror("msg_loop_rx: ");
-               return errno;
-       }
-
-       txpid = fork();
-       if (txpid == 0) {
-               if (opt->sendpage)
-                       err = msg_loop_sendpage(c1, iov_buf, cnt, &s, opt);
-               else
-                       err = msg_loop(c1, iov_count, iov_buf,
-                                      cnt, &s, true, opt);
-
-               if (err)
-                       fprintf(stderr,
-                               "msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n",
-                               iov_count, iov_buf, cnt, err);
-               shutdown(c1, SHUT_RDWR);
-               if (s.end.tv_sec - s.start.tv_sec) {
-                       sent_Bps = sentBps(s);
-                       recvd_Bps = recvdBps(s);
-               }
-               fprintf(stdout,
-                       "tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n",
-                       s.bytes_sent, sent_Bps, sent_Bps/giga,
-                       s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
-               exit(1);
-       } else if (txpid == -1) {
-               perror("msg_loop_tx: ");
-               return errno;
-       }
-
-       assert(waitpid(rxpid, &status, 0) == rxpid);
-       assert(waitpid(txpid, &status, 0) == txpid);
-       return err;
-}
-
-static int forever_ping_pong(int rate, struct sockmap_options *opt)
-{
-       struct timeval timeout;
-       char buf[1024] = {0};
-       int sc;
-
-       timeout.tv_sec = 10;
-       timeout.tv_usec = 0;
-
-       /* Ping/Pong data from client to server */
-       sc = send(c1, buf, sizeof(buf), 0);
-       if (sc < 0) {
-               perror("send failed()\n");
-               return sc;
-       }
-
-       do {
-               int s, rc, i, max_fd = p2;
-               fd_set w;
-
-               /* FD sets */
-               FD_ZERO(&w);
-               FD_SET(c1, &w);
-               FD_SET(c2, &w);
-               FD_SET(p1, &w);
-               FD_SET(p2, &w);
-
-               s = select(max_fd + 1, &w, NULL, NULL, &timeout);
-               if (s == -1) {
-                       perror("select()");
-                       break;
-               } else if (!s) {
-                       fprintf(stderr, "unexpected timeout\n");
-                       break;
-               }
-
-               for (i = 0; i <= max_fd && s > 0; ++i) {
-                       if (!FD_ISSET(i, &w))
-                               continue;
-
-                       s--;
-
-                       rc = recv(i, buf, sizeof(buf), 0);
-                       if (rc < 0) {
-                               if (errno != EWOULDBLOCK) {
-                                       perror("recv failed()\n");
-                                       return rc;
-                               }
-                       }
-
-                       if (rc == 0) {
-                               close(i);
-                               break;
-                       }
-
-                       sc = send(i, buf, rc, 0);
-                       if (sc < 0) {
-                               perror("send failed()\n");
-                               return sc;
-                       }
-               }
-
-               if (rate)
-                       sleep(rate);
-
-               if (opt->verbose) {
-                       printf(".");
-                       fflush(stdout);
-
-               }
-       } while (running);
-
-       return 0;
-}
-
-enum {
-       PING_PONG,
-       SENDMSG,
-       BASE,
-       BASE_SENDPAGE,
-       SENDPAGE,
-};
-
-int main(int argc, char **argv)
-{
-       int iov_count = 1, length = 1024, rate = 1, tx_prog_fd;
-       struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
-       int opt, longindex, err, cg_fd = 0;
-       struct sockmap_options options = {0};
-       int test = PING_PONG;
-       char filename[256];
-
-       while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:",
-                                 long_options, &longindex)) != -1) {
-               switch (opt) {
-               case 's':
-                       txmsg_start = atoi(optarg);
-                       break;
-               case 'e':
-                       txmsg_end = atoi(optarg);
-                       break;
-               case 'a':
-                       txmsg_apply = atoi(optarg);
-                       break;
-               case 'k':
-                       txmsg_cork = atoi(optarg);
-                       break;
-               case 'c':
-                       cg_fd = open(optarg, O_DIRECTORY, O_RDONLY);
-                       if (cg_fd < 0) {
-                               fprintf(stderr,
-                                       "ERROR: (%i) open cg path failed: %s\n",
-                                       cg_fd, optarg);
-                               return cg_fd;
-                       }
-                       break;
-               case 'r':
-                       rate = atoi(optarg);
-                       break;
-               case 'v':
-                       options.verbose = 1;
-                       break;
-               case 'i':
-                       iov_count = atoi(optarg);
-                       break;
-               case 'l':
-                       length = atoi(optarg);
-                       break;
-               case 'd':
-                       options.data_test = true;
-                       break;
-               case 't':
-                       if (strcmp(optarg, "ping") == 0) {
-                               test = PING_PONG;
-                       } else if (strcmp(optarg, "sendmsg") == 0) {
-                               test = SENDMSG;
-                       } else if (strcmp(optarg, "base") == 0) {
-                               test = BASE;
-                       } else if (strcmp(optarg, "base_sendpage") == 0) {
-                               test = BASE_SENDPAGE;
-                       } else if (strcmp(optarg, "sendpage") == 0) {
-                               test = SENDPAGE;
-                       } else {
-                               usage(argv);
-                               return -1;
-                       }
-                       break;
-               case 0:
-                       break;
-               case 'h':
-               default:
-                       usage(argv);
-                       return -1;
-               }
-       }
-
-       if (!cg_fd) {
-               fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n",
-                       argv[0]);
-               return -1;
-       }
-
-       if (setrlimit(RLIMIT_MEMLOCK, &r)) {
-               perror("setrlimit(RLIMIT_MEMLOCK)");
-               return 1;
-       }
-
-       snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-
-       running = 1;
-
-       /* catch SIGINT */
-       signal(SIGINT, running_handler);
-
-       if (load_bpf_file(filename)) {
-               fprintf(stderr, "load_bpf_file: (%s) %s\n",
-                       filename, strerror(errno));
-               return 1;
-       }
-
-       /* If base test skip BPF setup */
-       if (test == BASE || test == BASE_SENDPAGE)
-               goto run;
-
-       /* Attach programs to sockmap */
-       err = bpf_prog_attach(prog_fd[0], map_fd[0],
-                               BPF_SK_SKB_STREAM_PARSER, 0);
-       if (err) {
-               fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n",
-                       err, strerror(errno));
-               return err;
-       }
-
-       err = bpf_prog_attach(prog_fd[1], map_fd[0],
-                               BPF_SK_SKB_STREAM_VERDICT, 0);
-       if (err) {
-               fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n",
-                       err, strerror(errno));
-               return err;
-       }
-
-       /* Attach to cgroups */
-       err = bpf_prog_attach(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
-       if (err) {
-               fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n",
-                       err, strerror(errno));
-               return err;
-       }
-
-run:
-       err = sockmap_init_sockets();
-       if (err) {
-               fprintf(stderr, "ERROR: test socket failed: %d\n", err);
-               goto out;
-       }
-
-       /* Attach txmsg program to sockmap */
-       if (txmsg_pass)
-               tx_prog_fd = prog_fd[3];
-       else if (txmsg_noisy)
-               tx_prog_fd = prog_fd[4];
-       else if (txmsg_redir)
-               tx_prog_fd = prog_fd[5];
-       else if (txmsg_redir_noisy)
-               tx_prog_fd = prog_fd[6];
-       else if (txmsg_drop)
-               tx_prog_fd = prog_fd[9];
-       /* apply and cork must be last */
-       else if (txmsg_apply)
-               tx_prog_fd = prog_fd[7];
-       else if (txmsg_cork)
-               tx_prog_fd = prog_fd[8];
-       else
-               tx_prog_fd = 0;
-
-       if (tx_prog_fd) {
-               int redir_fd, i = 0;
-
-               err = bpf_prog_attach(tx_prog_fd,
-                                     map_fd[1], BPF_SK_MSG_VERDICT, 0);
-               if (err) {
-                       fprintf(stderr,
-                               "ERROR: bpf_prog_attach (txmsg): %d (%s)\n",
-                               err, strerror(errno));
-                       return err;
-               }
-
-               err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY);
-               if (err) {
-                       fprintf(stderr,
-                               "ERROR: bpf_map_update_elem (txmsg):  %d (%s\n",
-                               err, strerror(errno));
-                       return err;
-               }
-
-               if (txmsg_redir || txmsg_redir_noisy)
-                       redir_fd = c2;
-               else
-                       redir_fd = c1;
-
-               err = bpf_map_update_elem(map_fd[2], &i, &redir_fd, BPF_ANY);
-               if (err) {
-                       fprintf(stderr,
-                               "ERROR: bpf_map_update_elem (txmsg):  %d (%s\n",
-                               err, strerror(errno));
-                       return err;
-               }
-
-               if (txmsg_apply) {
-                       err = bpf_map_update_elem(map_fd[3],
-                                                 &i, &txmsg_apply, BPF_ANY);
-                       if (err) {
-                               fprintf(stderr,
-                                       "ERROR: bpf_map_update_elem (apply_bytes):  %d (%s\n",
-                                       err, strerror(errno));
-                               return err;
-                       }
-               }
-
-               if (txmsg_cork) {
-                       err = bpf_map_update_elem(map_fd[4],
-                                                 &i, &txmsg_cork, BPF_ANY);
-                       if (err) {
-                               fprintf(stderr,
-                                       "ERROR: bpf_map_update_elem (cork_bytes):  %d (%s\n",
-                                       err, strerror(errno));
-                               return err;
-                       }
-               }
-
-               if (txmsg_start) {
-                       err = bpf_map_update_elem(map_fd[5],
-                                                 &i, &txmsg_start, BPF_ANY);
-                       if (err) {
-                               fprintf(stderr,
-                                       "ERROR: bpf_map_update_elem (txmsg_start):  %d (%s)\n",
-                                       err, strerror(errno));
-                               return err;
-                       }
-               }
-
-               if (txmsg_end) {
-                       i = 1;
-                       err = bpf_map_update_elem(map_fd[5],
-                                                 &i, &txmsg_end, BPF_ANY);
-                       if (err) {
-                               fprintf(stderr,
-                                       "ERROR: bpf_map_update_elem (txmsg_end):  %d (%s)\n",
-                                       err, strerror(errno));
-                               return err;
-                       }
-               }
-
-               if (txmsg_ingress) {
-                       int in = BPF_F_INGRESS;
-
-                       i = 0;
-                       err = bpf_map_update_elem(map_fd[6], &i, &in, BPF_ANY);
-                       if (err) {
-                               fprintf(stderr,
-                                       "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
-                                       err, strerror(errno));
-                       }
-                       i = 1;
-                       err = bpf_map_update_elem(map_fd[1], &i, &p1, BPF_ANY);
-                       if (err) {
-                               fprintf(stderr,
-                                       "ERROR: bpf_map_update_elem (p1 txmsg): %d (%s)\n",
-                                       err, strerror(errno));
-                       }
-                       err = bpf_map_update_elem(map_fd[2], &i, &p1, BPF_ANY);
-                       if (err) {
-                               fprintf(stderr,
-                                       "ERROR: bpf_map_update_elem (p1 redir): %d (%s)\n",
-                                       err, strerror(errno));
-                       }
-
-                       i = 2;
-                       err = bpf_map_update_elem(map_fd[2], &i, &p2, BPF_ANY);
-                       if (err) {
-                               fprintf(stderr,
-                                       "ERROR: bpf_map_update_elem (p2 txmsg): %d (%s)\n",
-                                       err, strerror(errno));
-                       }
-               }
-
-               if (txmsg_skb) {
-                       int skb_fd = (test == SENDMSG || test == SENDPAGE) ? p2 : p1;
-                       int ingress = BPF_F_INGRESS;
-
-                       i = 0;
-                       err = bpf_map_update_elem(map_fd[7], &i, &ingress, BPF_ANY);
-                       if (err) {
-                               fprintf(stderr,
-                                       "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
-                                       err, strerror(errno));
-                       }
-
-                       i = 3;
-                       err = bpf_map_update_elem(map_fd[0], &i, &skb_fd, BPF_ANY);
-                       if (err) {
-                               fprintf(stderr,
-                                       "ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
-                                       err, strerror(errno));
-                       }
-               }
-       }
-
-       if (txmsg_drop)
-               options.drop_expected = true;
-
-       if (test == PING_PONG)
-               err = forever_ping_pong(rate, &options);
-       else if (test == SENDMSG) {
-               options.base = false;
-               options.sendpage = false;
-               err = sendmsg_test(iov_count, length, rate, &options);
-       } else if (test == SENDPAGE) {
-               options.base = false;
-               options.sendpage = true;
-               err = sendmsg_test(iov_count, length, rate, &options);
-       } else if (test == BASE) {
-               options.base = true;
-               options.sendpage = false;
-               err = sendmsg_test(iov_count, length, rate, &options);
-       } else if (test == BASE_SENDPAGE) {
-               options.base = true;
-               options.sendpage = true;
-               err = sendmsg_test(iov_count, length, rate, &options);
-       } else
-               fprintf(stderr, "unknown test\n");
-out:
-       bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS);
-       close(s1);
-       close(s2);
-       close(p1);
-       close(p2);
-       close(c1);
-       close(c2);
-       close(cg_fd);
-       return err;
-}
-
-void running_handler(int a)
-{
-       running = 0;
-}
index 5841ed41b30ca6d69729dbed5854b5f967fd520d..e6679393b6877801651640d66857c4899c41beea 100644 (file)
@@ -894,6 +894,7 @@ enum bpf_func_id {
 /* BPF_FUNC_skb_set_tunnel_key flags. */
 #define BPF_F_ZERO_CSUM_TX             (1ULL << 1)
 #define BPF_F_DONT_FRAGMENT            (1ULL << 2)
+#define BPF_F_SEQ_NUMBER               (1ULL << 3)
 
 /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
  * BPF_FUNC_perf_event_read_value flags.
index 6d9447700e18c983804e1fecc4a6854e138d10f6..68699f654118592527096dc26336f57da6a01cdc 100644 (file)
@@ -941,4 +941,43 @@ enum {
        IFLA_EVENT_BONDING_OPTIONS,     /* change in bonding options */
 };
 
+/* tun section */
+
+enum {
+       IFLA_TUN_UNSPEC,
+       IFLA_TUN_OWNER,
+       IFLA_TUN_GROUP,
+       IFLA_TUN_TYPE,
+       IFLA_TUN_PI,
+       IFLA_TUN_VNET_HDR,
+       IFLA_TUN_PERSIST,
+       IFLA_TUN_MULTI_QUEUE,
+       IFLA_TUN_NUM_QUEUES,
+       IFLA_TUN_NUM_DISABLED_QUEUES,
+       __IFLA_TUN_MAX,
+};
+
+#define IFLA_TUN_MAX (__IFLA_TUN_MAX - 1)
+
+/* rmnet section */
+
+#define RMNET_FLAGS_INGRESS_DEAGGREGATION         (1U << 0)
+#define RMNET_FLAGS_INGRESS_MAP_COMMANDS          (1U << 1)
+#define RMNET_FLAGS_INGRESS_MAP_CKSUMV4           (1U << 2)
+#define RMNET_FLAGS_EGRESS_MAP_CKSUMV4            (1U << 3)
+
+enum {
+       IFLA_RMNET_UNSPEC,
+       IFLA_RMNET_MUX_ID,
+       IFLA_RMNET_FLAGS,
+       __IFLA_RMNET_MAX,
+};
+
+#define IFLA_RMNET_MAX (__IFLA_RMNET_MAX - 1)
+
+struct ifla_rmnet_flags {
+       __u32   flags;
+       __u32   mask;
+};
+
 #endif /* _UAPI_LINUX_IF_LINK_H */
index 6513e0b08795bec18bf9c21e6660e7320cd6e717..7bcdca13083a2251de4270ef6076ba61de97a16f 100644 (file)
@@ -1961,8 +1961,8 @@ BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
 BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
 BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
 
-static void bpf_program__set_expected_attach_type(struct bpf_program *prog,
-                                                enum bpf_attach_type type)
+void bpf_program__set_expected_attach_type(struct bpf_program *prog,
+                                          enum bpf_attach_type type)
 {
        prog->expected_attach_type = type;
 }
index d6ac4fa6f472632566118cfee45ab2cc04ae53a2..197f9ce2248c59eb6415042b702224d468fdad25 100644 (file)
@@ -193,6 +193,8 @@ int bpf_program__set_sched_act(struct bpf_program *prog);
 int bpf_program__set_xdp(struct bpf_program *prog);
 int bpf_program__set_perf_event(struct bpf_program *prog);
 void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type);
+void bpf_program__set_expected_attach_type(struct bpf_program *prog,
+                                          enum bpf_attach_type type);
 
 bool bpf_program__is_socket_filter(struct bpf_program *prog);
 bool bpf_program__is_tracepoint(struct bpf_program *prog);
index 64037ee2eed0f8d0b91bafcbd0019c618e99231d..0c19d5e08f08023cbf07bede191ffbc4ac3dfb46 100644 (file)
@@ -24,7 +24,7 @@ urandom_read: urandom_read.c
 # Order correspond to 'make run_tests' order
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
        test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
-       test_sock test_btf
+       test_sock test_btf test_sockmap
 
 TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
        test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o     \
@@ -32,7 +32,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
        test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
        sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
        sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \
-       test_btf_haskv.o test_btf_nokv.o
+       test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o
 
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
@@ -56,6 +56,7 @@ $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
 $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
 $(OUTPUT)/test_sock: cgroup_helpers.c
 $(OUTPUT)/test_sock_addr: cgroup_helpers.c
+$(OUTPUT)/test_sockmap: cgroup_helpers.c
 
 .PHONY: force
 
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
new file mode 100644 (file)
index 0000000..6d63a1c
--- /dev/null
@@ -0,0 +1,1465 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/select.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <sched.h>
+
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <sys/sendfile.h>
+
+#include <linux/netlink.h>
+#include <linux/socket.h>
+#include <linux/sock_diag.h>
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <assert.h>
+#include <libgen.h>
+
+#include <getopt.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_util.h"
+#include "bpf_rlimit.h"
+#include "cgroup_helpers.h"
+
+int running;
+static void running_handler(int a);
+
+/* randomly selected ports for testing on lo */
+#define S1_PORT 10000
+#define S2_PORT 10001
+
+#define BPF_FILENAME "test_sockmap_kern.o"
+#define CG_PATH "/sockmap"
+
+/* global sockets */
+int s1, s2, c1, c2, p1, p2;
+int test_cnt;
+int passed;
+int failed;
+int map_fd[8];
+struct bpf_map *maps[8];
+int prog_fd[11];
+
+int txmsg_pass;
+int txmsg_noisy;
+int txmsg_redir;
+int txmsg_redir_noisy;
+int txmsg_drop;
+int txmsg_apply;
+int txmsg_cork;
+int txmsg_start;
+int txmsg_end;
+int txmsg_ingress;
+int txmsg_skb;
+
+static const struct option long_options[] = {
+       {"help",        no_argument,            NULL, 'h' },
+       {"cgroup",      required_argument,      NULL, 'c' },
+       {"rate",        required_argument,      NULL, 'r' },
+       {"verbose",     no_argument,            NULL, 'v' },
+       {"iov_count",   required_argument,      NULL, 'i' },
+       {"length",      required_argument,      NULL, 'l' },
+       {"test",        required_argument,      NULL, 't' },
+       {"data_test",   no_argument,            NULL, 'd' },
+       {"txmsg",               no_argument,    &txmsg_pass,  1  },
+       {"txmsg_noisy",         no_argument,    &txmsg_noisy, 1  },
+       {"txmsg_redir",         no_argument,    &txmsg_redir, 1  },
+       {"txmsg_redir_noisy",   no_argument,    &txmsg_redir_noisy, 1},
+       {"txmsg_drop",          no_argument,    &txmsg_drop, 1 },
+       {"txmsg_apply", required_argument,      NULL, 'a'},
+       {"txmsg_cork",  required_argument,      NULL, 'k'},
+       {"txmsg_start", required_argument,      NULL, 's'},
+       {"txmsg_end",   required_argument,      NULL, 'e'},
+       {"txmsg_ingress", no_argument,          &txmsg_ingress, 1 },
+       {"txmsg_skb", no_argument,              &txmsg_skb, 1 },
+       {0, 0, NULL, 0 }
+};
+
+static void usage(char *argv[])
+{
+       int i;
+
+       printf(" Usage: %s --cgroup <cgroup_path>\n", argv[0]);
+       printf(" options:\n");
+       for (i = 0; long_options[i].name != 0; i++) {
+               printf(" --%-12s", long_options[i].name);
+               if (long_options[i].flag != NULL)
+                       printf(" flag (internal value:%d)\n",
+                               *long_options[i].flag);
+               else
+                       printf(" -%c\n", long_options[i].val);
+       }
+       printf("\n");
+}
+
+static int sockmap_init_sockets(int verbose)
+{
+       int i, err, one = 1;
+       struct sockaddr_in addr;
+       int *fds[4] = {&s1, &s2, &c1, &c2};
+
+       s1 = s2 = p1 = p2 = c1 = c2 = 0;
+
+       /* Init sockets */
+       for (i = 0; i < 4; i++) {
+               *fds[i] = socket(AF_INET, SOCK_STREAM, 0);
+               if (*fds[i] < 0) {
+                       perror("socket s1 failed()");
+                       return errno;
+               }
+       }
+
+       /* Allow reuse */
+       for (i = 0; i < 2; i++) {
+               err = setsockopt(*fds[i], SOL_SOCKET, SO_REUSEADDR,
+                                (char *)&one, sizeof(one));
+               if (err) {
+                       perror("setsockopt failed()");
+                       return errno;
+               }
+       }
+
+       /* Non-blocking sockets */
+       for (i = 0; i < 2; i++) {
+               err = ioctl(*fds[i], FIONBIO, (char *)&one);
+               if (err < 0) {
+                       perror("ioctl s1 failed()");
+                       return errno;
+               }
+       }
+
+       /* Bind server sockets */
+       memset(&addr, 0, sizeof(struct sockaddr_in));
+       addr.sin_family = AF_INET;
+       addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+
+       addr.sin_port = htons(S1_PORT);
+       err = bind(s1, (struct sockaddr *)&addr, sizeof(addr));
+       if (err < 0) {
+               perror("bind s1 failed()\n");
+               return errno;
+       }
+
+       addr.sin_port = htons(S2_PORT);
+       err = bind(s2, (struct sockaddr *)&addr, sizeof(addr));
+       if (err < 0) {
+               perror("bind s2 failed()\n");
+               return errno;
+       }
+
+       /* Listen server sockets */
+       addr.sin_port = htons(S1_PORT);
+       err = listen(s1, 32);
+       if (err < 0) {
+               perror("listen s1 failed()\n");
+               return errno;
+       }
+
+       addr.sin_port = htons(S2_PORT);
+       err = listen(s2, 32);
+       if (err < 0) {
+               perror("listen s1 failed()\n");
+               return errno;
+       }
+
+       /* Initiate Connect */
+       addr.sin_port = htons(S1_PORT);
+       err = connect(c1, (struct sockaddr *)&addr, sizeof(addr));
+       if (err < 0 && errno != EINPROGRESS) {
+               perror("connect c1 failed()\n");
+               return errno;
+       }
+
+       addr.sin_port = htons(S2_PORT);
+       err = connect(c2, (struct sockaddr *)&addr, sizeof(addr));
+       if (err < 0 && errno != EINPROGRESS) {
+               perror("connect c2 failed()\n");
+               return errno;
+       } else if (err < 0) {
+               err = 0;
+       }
+
+       /* Accept Connecrtions */
+       p1 = accept(s1, NULL, NULL);
+       if (p1 < 0) {
+               perror("accept s1 failed()\n");
+               return errno;
+       }
+
+       p2 = accept(s2, NULL, NULL);
+       if (p2 < 0) {
+               perror("accept s1 failed()\n");
+               return errno;
+       }
+
+       if (verbose) {
+               printf("connected sockets: c1 <-> p1, c2 <-> p2\n");
+               printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n",
+                       c1, s1, c2, s2);
+       }
+       return 0;
+}
+
+struct msg_stats {
+       size_t bytes_sent;
+       size_t bytes_recvd;
+       struct timespec start;
+       struct timespec end;
+};
+
+struct sockmap_options {
+       int verbose;
+       bool base;
+       bool sendpage;
+       bool data_test;
+       bool drop_expected;
+       int iov_count;
+       int iov_length;
+       int rate;
+};
+
+static int msg_loop_sendpage(int fd, int iov_length, int cnt,
+                            struct msg_stats *s,
+                            struct sockmap_options *opt)
+{
+       bool drop = opt->drop_expected;
+       unsigned char k = 0;
+       FILE *file;
+       int i, fp;
+
+       file = fopen(".sendpage_tst.tmp", "w+");
+       for (i = 0; i < iov_length * cnt; i++, k++)
+               fwrite(&k, sizeof(char), 1, file);
+       fflush(file);
+       fseek(file, 0, SEEK_SET);
+       fclose(file);
+
+       fp = open(".sendpage_tst.tmp", O_RDONLY);
+       clock_gettime(CLOCK_MONOTONIC, &s->start);
+       for (i = 0; i < cnt; i++) {
+               int sent = sendfile(fd, fp, NULL, iov_length);
+
+               if (!drop && sent < 0) {
+                       perror("send loop error:");
+                       close(fp);
+                       return sent;
+               } else if (drop && sent >= 0) {
+                       printf("sendpage loop error expected: %i\n", sent);
+                       close(fp);
+                       return -EIO;
+               }
+
+               if (sent > 0)
+                       s->bytes_sent += sent;
+       }
+       clock_gettime(CLOCK_MONOTONIC, &s->end);
+       close(fp);
+       return 0;
+}
+
+static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
+                   struct msg_stats *s, bool tx,
+                   struct sockmap_options *opt)
+{
+       struct msghdr msg = {0};
+       int err, i, flags = MSG_NOSIGNAL;
+       struct iovec *iov;
+       unsigned char k;
+       bool data_test = opt->data_test;
+       bool drop = opt->drop_expected;
+
+       iov = calloc(iov_count, sizeof(struct iovec));
+       if (!iov)
+               return errno;
+
+       k = 0;
+       for (i = 0; i < iov_count; i++) {
+               unsigned char *d = calloc(iov_length, sizeof(char));
+
+               if (!d) {
+                       fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count);
+                       goto out_errno;
+               }
+               iov[i].iov_base = d;
+               iov[i].iov_len = iov_length;
+
+               if (data_test && tx) {
+                       int j;
+
+                       for (j = 0; j < iov_length; j++)
+                               d[j] = k++;
+               }
+       }
+
+       msg.msg_iov = iov;
+       msg.msg_iovlen = iov_count;
+       k = 0;
+
+       if (tx) {
+               clock_gettime(CLOCK_MONOTONIC, &s->start);
+               for (i = 0; i < cnt; i++) {
+                       int sent = sendmsg(fd, &msg, flags);
+
+                       if (!drop && sent < 0) {
+                               perror("send loop error:");
+                               goto out_errno;
+                       } else if (drop && sent >= 0) {
+                               printf("send loop error expected: %i\n", sent);
+                               errno = -EIO;
+                               goto out_errno;
+                       }
+                       if (sent > 0)
+                               s->bytes_sent += sent;
+               }
+               clock_gettime(CLOCK_MONOTONIC, &s->end);
+       } else {
+               int slct, recv, max_fd = fd;
+               int fd_flags = O_NONBLOCK;
+               struct timeval timeout;
+               float total_bytes;
+               fd_set w;
+
+               fcntl(fd, fd_flags);
+               total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
+               err = clock_gettime(CLOCK_MONOTONIC, &s->start);
+               if (err < 0)
+                       perror("recv start time: ");
+               while (s->bytes_recvd < total_bytes) {
+                       timeout.tv_sec = 1;
+                       timeout.tv_usec = 0;
+
+                       /* FD sets */
+                       FD_ZERO(&w);
+                       FD_SET(fd, &w);
+
+                       slct = select(max_fd + 1, &w, NULL, NULL, &timeout);
+                       if (slct == -1) {
+                               perror("select()");
+                               clock_gettime(CLOCK_MONOTONIC, &s->end);
+                               goto out_errno;
+                       } else if (!slct) {
+                               if (opt->verbose)
+                                       fprintf(stderr, "unexpected timeout\n");
+                               errno = -EIO;
+                               clock_gettime(CLOCK_MONOTONIC, &s->end);
+                               goto out_errno;
+                       }
+
+                       recv = recvmsg(fd, &msg, flags);
+                       if (recv < 0) {
+                               if (errno != EWOULDBLOCK) {
+                                       clock_gettime(CLOCK_MONOTONIC, &s->end);
+                                       perror("recv failed()\n");
+                                       goto out_errno;
+                               }
+                       }
+
+                       s->bytes_recvd += recv;
+
+                       if (data_test) {
+                               int j;
+
+                               for (i = 0; i < msg.msg_iovlen; i++) {
+                                       unsigned char *d = iov[i].iov_base;
+
+                                       for (j = 0;
+                                            j < iov[i].iov_len && recv; j++) {
+                                               if (d[j] != k++) {
+                                                       errno = -EIO;
+                                                       fprintf(stderr,
+                                                               "detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
+                                                               i, j, d[j], k - 1, d[j+1], k + 1);
+                                                       goto out_errno;
+                                               }
+                                               recv--;
+                                       }
+                               }
+                       }
+               }
+               clock_gettime(CLOCK_MONOTONIC, &s->end);
+       }
+
+       for (i = 0; i < iov_count; i++)
+               free(iov[i].iov_base);
+       free(iov);
+       return 0;
+out_errno:
+       for (i = 0; i < iov_count; i++)
+               free(iov[i].iov_base);
+       free(iov);
+       return errno;
+}
+
+static float giga = 1000000000;
+
+static inline float sentBps(struct msg_stats s)
+{
+       return s.bytes_sent / (s.end.tv_sec - s.start.tv_sec);
+}
+
+static inline float recvdBps(struct msg_stats s)
+{
+       return s.bytes_recvd / (s.end.tv_sec - s.start.tv_sec);
+}
+
+static int sendmsg_test(struct sockmap_options *opt)
+{
+       float sent_Bps = 0, recvd_Bps = 0;
+       int rx_fd, txpid, rxpid, err = 0;
+       struct msg_stats s = {0};
+       int iov_count = opt->iov_count;
+       int iov_buf = opt->iov_length;
+       int cnt = opt->rate;
+       int status;
+
+       errno = 0;
+
+       if (opt->base)
+               rx_fd = p1;
+       else
+               rx_fd = p2;
+
+       rxpid = fork();
+       if (rxpid == 0) {
+               if (opt->drop_expected)
+                       exit(1);
+
+               if (opt->sendpage)
+                       iov_count = 1;
+               err = msg_loop(rx_fd, iov_count, iov_buf,
+                              cnt, &s, false, opt);
+               if (err && opt->verbose)
+                       fprintf(stderr,
+                               "msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
+                               iov_count, iov_buf, cnt, err);
+               shutdown(p2, SHUT_RDWR);
+               shutdown(p1, SHUT_RDWR);
+               if (s.end.tv_sec - s.start.tv_sec) {
+                       sent_Bps = sentBps(s);
+                       recvd_Bps = recvdBps(s);
+               }
+               if (opt->verbose)
+                       fprintf(stdout,
+                               "rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s\n",
+                               s.bytes_sent, sent_Bps, sent_Bps/giga,
+                               s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
+               exit(1);
+       } else if (rxpid == -1) {
+               perror("msg_loop_rx: ");
+               return errno;
+       }
+
+       txpid = fork();
+       if (txpid == 0) {
+               if (opt->sendpage)
+                       err = msg_loop_sendpage(c1, iov_buf, cnt, &s, opt);
+               else
+                       err = msg_loop(c1, iov_count, iov_buf,
+                                      cnt, &s, true, opt);
+
+               if (err)
+                       fprintf(stderr,
+                               "msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n",
+                               iov_count, iov_buf, cnt, err);
+               shutdown(c1, SHUT_RDWR);
+               if (s.end.tv_sec - s.start.tv_sec) {
+                       sent_Bps = sentBps(s);
+                       recvd_Bps = recvdBps(s);
+               }
+               if (opt->verbose)
+                       fprintf(stdout,
+                               "tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n",
+                               s.bytes_sent, sent_Bps, sent_Bps/giga,
+                               s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
+               exit(1);
+       } else if (txpid == -1) {
+               perror("msg_loop_tx: ");
+               return errno;
+       }
+
+       assert(waitpid(rxpid, &status, 0) == rxpid);
+       assert(waitpid(txpid, &status, 0) == txpid);
+       return err;
+}
+
+static int forever_ping_pong(int rate, struct sockmap_options *opt)
+{
+       struct timeval timeout;
+       char buf[1024] = {0};
+       int sc;
+
+       timeout.tv_sec = 10;
+       timeout.tv_usec = 0;
+
+       /* Ping/Pong data from client to server */
+       sc = send(c1, buf, sizeof(buf), 0);
+       if (sc < 0) {
+               perror("send failed()\n");
+               return sc;
+       }
+
+       do {
+               int s, rc, i, max_fd = p2;
+               fd_set w;
+
+               /* FD sets */
+               FD_ZERO(&w);
+               FD_SET(c1, &w);
+               FD_SET(c2, &w);
+               FD_SET(p1, &w);
+               FD_SET(p2, &w);
+
+               s = select(max_fd + 1, &w, NULL, NULL, &timeout);
+               if (s == -1) {
+                       perror("select()");
+                       break;
+               } else if (!s) {
+                       fprintf(stderr, "unexpected timeout\n");
+                       break;
+               }
+
+               for (i = 0; i <= max_fd && s > 0; ++i) {
+                       if (!FD_ISSET(i, &w))
+                               continue;
+
+                       s--;
+
+                       rc = recv(i, buf, sizeof(buf), 0);
+                       if (rc < 0) {
+                               if (errno != EWOULDBLOCK) {
+                                       perror("recv failed()\n");
+                                       return rc;
+                               }
+                       }
+
+                       if (rc == 0) {
+                               close(i);
+                               break;
+                       }
+
+                       sc = send(i, buf, rc, 0);
+                       if (sc < 0) {
+                               perror("send failed()\n");
+                               return sc;
+                       }
+               }
+
+               if (rate)
+                       sleep(rate);
+
+               if (opt->verbose) {
+                       printf(".");
+                       fflush(stdout);
+
+               }
+       } while (running);
+
+       return 0;
+}
+
+enum {
+       PING_PONG,
+       SENDMSG,
+       BASE,
+       BASE_SENDPAGE,
+       SENDPAGE,
+};
+
+static int run_options(struct sockmap_options *options, int cg_fd,  int test)
+{
+       int i, key, next_key, err, tx_prog_fd = -1, zero = 0;
+
+       /* If base test skip BPF setup */
+       if (test == BASE || test == BASE_SENDPAGE)
+               goto run;
+
+       /* Attach programs to sockmap */
+       err = bpf_prog_attach(prog_fd[0], map_fd[0],
+                               BPF_SK_SKB_STREAM_PARSER, 0);
+       if (err) {
+               fprintf(stderr,
+                       "ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n",
+                       prog_fd[0], map_fd[0], err, strerror(errno));
+               return err;
+       }
+
+       err = bpf_prog_attach(prog_fd[1], map_fd[0],
+                               BPF_SK_SKB_STREAM_VERDICT, 0);
+       if (err) {
+               fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n",
+                       err, strerror(errno));
+               return err;
+       }
+
+       /* Attach to cgroups */
+       err = bpf_prog_attach(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
+       if (err) {
+               fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n",
+                       err, strerror(errno));
+               return err;
+       }
+
+run:
+       err = sockmap_init_sockets(options->verbose);
+       if (err) {
+               fprintf(stderr, "ERROR: test socket failed: %d\n", err);
+               goto out;
+       }
+
+       /* Attach txmsg program to sockmap */
+       if (txmsg_pass)
+               tx_prog_fd = prog_fd[3];
+       else if (txmsg_noisy)
+               tx_prog_fd = prog_fd[4];
+       else if (txmsg_redir)
+               tx_prog_fd = prog_fd[5];
+       else if (txmsg_redir_noisy)
+               tx_prog_fd = prog_fd[6];
+       else if (txmsg_drop)
+               tx_prog_fd = prog_fd[9];
+       /* apply and cork must be last */
+       else if (txmsg_apply)
+               tx_prog_fd = prog_fd[7];
+       else if (txmsg_cork)
+               tx_prog_fd = prog_fd[8];
+       else
+               tx_prog_fd = 0;
+
+       if (tx_prog_fd) {
+               int redir_fd, i = 0;
+
+               err = bpf_prog_attach(tx_prog_fd,
+                                     map_fd[1], BPF_SK_MSG_VERDICT, 0);
+               if (err) {
+                       fprintf(stderr,
+                               "ERROR: bpf_prog_attach (txmsg): %d (%s)\n",
+                               err, strerror(errno));
+                       goto out;
+               }
+
+               err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY);
+               if (err) {
+                       fprintf(stderr,
+                               "ERROR: bpf_map_update_elem (txmsg):  %d (%s\n",
+                               err, strerror(errno));
+                       goto out;
+               }
+
+               if (txmsg_redir || txmsg_redir_noisy)
+                       redir_fd = c2;
+               else
+                       redir_fd = c1;
+
+               err = bpf_map_update_elem(map_fd[2], &i, &redir_fd, BPF_ANY);
+               if (err) {
+                       fprintf(stderr,
+                               "ERROR: bpf_map_update_elem (txmsg):  %d (%s\n",
+                               err, strerror(errno));
+                       goto out;
+               }
+
+               if (txmsg_apply) {
+                       err = bpf_map_update_elem(map_fd[3],
+                                                 &i, &txmsg_apply, BPF_ANY);
+                       if (err) {
+                               fprintf(stderr,
+                                       "ERROR: bpf_map_update_elem (apply_bytes):  %d (%s\n",
+                                       err, strerror(errno));
+                               goto out;
+                       }
+               }
+
+               if (txmsg_cork) {
+                       err = bpf_map_update_elem(map_fd[4],
+                                                 &i, &txmsg_cork, BPF_ANY);
+                       if (err) {
+                               fprintf(stderr,
+                                       "ERROR: bpf_map_update_elem (cork_bytes):  %d (%s\n",
+                                       err, strerror(errno));
+                               goto out;
+                       }
+               }
+
+               if (txmsg_start) {
+                       err = bpf_map_update_elem(map_fd[5],
+                                                 &i, &txmsg_start, BPF_ANY);
+                       if (err) {
+                               fprintf(stderr,
+                                       "ERROR: bpf_map_update_elem (txmsg_start):  %d (%s)\n",
+                                       err, strerror(errno));
+                               goto out;
+                       }
+               }
+
+               if (txmsg_end) {
+                       i = 1;
+                       err = bpf_map_update_elem(map_fd[5],
+                                                 &i, &txmsg_end, BPF_ANY);
+                       if (err) {
+                               fprintf(stderr,
+                                       "ERROR: bpf_map_update_elem (txmsg_end):  %d (%s)\n",
+                                       err, strerror(errno));
+                               goto out;
+                       }
+               }
+
+               if (txmsg_ingress) {
+                       int in = BPF_F_INGRESS;
+
+                       i = 0;
+                       err = bpf_map_update_elem(map_fd[6], &i, &in, BPF_ANY);
+                       if (err) {
+                               fprintf(stderr,
+                                       "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
+                                       err, strerror(errno));
+                       }
+                       i = 1;
+                       err = bpf_map_update_elem(map_fd[1], &i, &p1, BPF_ANY);
+                       if (err) {
+                               fprintf(stderr,
+                                       "ERROR: bpf_map_update_elem (p1 txmsg): %d (%s)\n",
+                                       err, strerror(errno));
+                       }
+                       err = bpf_map_update_elem(map_fd[2], &i, &p1, BPF_ANY);
+                       if (err) {
+                               fprintf(stderr,
+                                       "ERROR: bpf_map_update_elem (p1 redir): %d (%s)\n",
+                                       err, strerror(errno));
+                       }
+
+                       i = 2;
+                       err = bpf_map_update_elem(map_fd[2], &i, &p2, BPF_ANY);
+                       if (err) {
+                               fprintf(stderr,
+                                       "ERROR: bpf_map_update_elem (p2 txmsg): %d (%s)\n",
+                                       err, strerror(errno));
+                       }
+               }
+
+               if (txmsg_skb) {
+                       int skb_fd = (test == SENDMSG || test == SENDPAGE) ?
+                                       p2 : p1;
+                       int ingress = BPF_F_INGRESS;
+
+                       i = 0;
+                       err = bpf_map_update_elem(map_fd[7],
+                                                 &i, &ingress, BPF_ANY);
+                       if (err) {
+                               fprintf(stderr,
+                                       "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
+                                       err, strerror(errno));
+                       }
+
+                       i = 3;
+                       err = bpf_map_update_elem(map_fd[0],
+                                                 &i, &skb_fd, BPF_ANY);
+                       if (err) {
+                               fprintf(stderr,
+                                       "ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
+                                       err, strerror(errno));
+                       }
+               }
+       }
+
+       if (txmsg_drop)
+               options->drop_expected = true;
+
+       if (test == PING_PONG)
+               err = forever_ping_pong(options->rate, options);
+       else if (test == SENDMSG) {
+               options->base = false;
+               options->sendpage = false;
+               err = sendmsg_test(options);
+       } else if (test == SENDPAGE) {
+               options->base = false;
+               options->sendpage = true;
+               err = sendmsg_test(options);
+       } else if (test == BASE) {
+               options->base = true;
+               options->sendpage = false;
+               err = sendmsg_test(options);
+       } else if (test == BASE_SENDPAGE) {
+               options->base = true;
+               options->sendpage = true;
+               err = sendmsg_test(options);
+       } else
+               fprintf(stderr, "unknown test\n");
+out:
+       /* Detatch and zero all the maps */
+       bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS);
+       bpf_prog_detach2(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER);
+       bpf_prog_detach2(prog_fd[1], map_fd[0], BPF_SK_SKB_STREAM_VERDICT);
+       if (tx_prog_fd >= 0)
+               bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT);
+
+       for (i = 0; i < 8; i++) {
+               key = next_key = 0;
+               bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
+               while (bpf_map_get_next_key(map_fd[i], &key, &next_key) == 0) {
+                       bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
+                       key = next_key;
+               }
+       }
+
+       close(s1);
+       close(s2);
+       close(p1);
+       close(p2);
+       close(c1);
+       close(c2);
+       return err;
+}
+
+static char *test_to_str(int test)
+{
+       switch (test) {
+       case SENDMSG:
+               return "sendmsg";
+       case SENDPAGE:
+               return "sendpage";
+       }
+       return "unknown";
+}
+
+#define OPTSTRING 60
+static void test_options(char *options)
+{
+       memset(options, 0, OPTSTRING);
+
+       if (txmsg_pass)
+               strncat(options, "pass,", OPTSTRING);
+       if (txmsg_noisy)
+               strncat(options, "pass_noisy,", OPTSTRING);
+       if (txmsg_redir)
+               strncat(options, "redir,", OPTSTRING);
+       if (txmsg_redir_noisy)
+               strncat(options, "redir_noisy,", OPTSTRING);
+       if (txmsg_drop)
+               strncat(options, "drop,", OPTSTRING);
+       if (txmsg_apply)
+               strncat(options, "apply,", OPTSTRING);
+       if (txmsg_cork)
+               strncat(options, "cork,", OPTSTRING);
+       if (txmsg_start)
+               strncat(options, "start,", OPTSTRING);
+       if (txmsg_end)
+               strncat(options, "end,", OPTSTRING);
+       if (txmsg_ingress)
+               strncat(options, "ingress,", OPTSTRING);
+       if (txmsg_skb)
+               strncat(options, "skb,", OPTSTRING);
+}
+
+static int __test_exec(int cgrp, int test, struct sockmap_options *opt)
+{
+       char *options = calloc(60, sizeof(char));
+       int err;
+
+       if (test == SENDPAGE)
+               opt->sendpage = true;
+       else
+               opt->sendpage = false;
+
+       if (txmsg_drop)
+               opt->drop_expected = true;
+       else
+               opt->drop_expected = false;
+
+       test_options(options);
+
+       fprintf(stdout,
+               "[TEST %i]: (%i, %i, %i, %s, %s): ",
+               test_cnt, opt->rate, opt->iov_count, opt->iov_length,
+               test_to_str(test), options);
+       fflush(stdout);
+       err = run_options(opt, cgrp, test);
+       fprintf(stdout, "%s\n", !err ? "PASS" : "FAILED");
+       test_cnt++;
+       !err ? passed++ : failed++;
+       free(options);
+       return err;
+}
+
+static int test_exec(int cgrp, struct sockmap_options *opt)
+{
+       int err = __test_exec(cgrp, SENDMSG, opt);
+
+       sched_yield();
+       if (err)
+               goto out;
+
+       err = __test_exec(cgrp, SENDPAGE, opt);
+       sched_yield();
+out:
+       return err;
+}
+
+static int test_loop(int cgrp)
+{
+       struct sockmap_options opt;
+
+       int err, i, l, r;
+
+       opt.verbose = 0;
+       opt.base = false;
+       opt.sendpage = false;
+       opt.data_test = false;
+       opt.drop_expected = false;
+       opt.iov_count = 0;
+       opt.iov_length = 0;
+       opt.rate = 0;
+
+       for (r = 1; r < 100; r += 33) {
+               for (i = 1; i < 100; i += 33) {
+                       for (l = 1; l < 100; l += 33) {
+                               opt.rate = r;
+                               opt.iov_count = i;
+                               opt.iov_length = l;
+                               err = test_exec(cgrp, &opt);
+                               if (err)
+                                       goto out;
+                       }
+               }
+       }
+
+out:
+       return err;
+}
+
+static int test_txmsg(int cgrp)
+{
+       int err;
+
+       txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0;
+       txmsg_apply = txmsg_cork = 0;
+       txmsg_ingress = txmsg_skb = 0;
+
+       txmsg_pass = 1;
+       err = test_loop(cgrp);
+       txmsg_pass = 0;
+       if (err)
+               goto out;
+
+       txmsg_redir = 1;
+       err = test_loop(cgrp);
+       txmsg_redir = 0;
+       if (err)
+               goto out;
+
+       txmsg_drop = 1;
+       err = test_loop(cgrp);
+       txmsg_drop = 0;
+       if (err)
+               goto out;
+
+       txmsg_redir = 1;
+       txmsg_ingress = 1;
+       err = test_loop(cgrp);
+       txmsg_redir = 0;
+       txmsg_ingress = 0;
+       if (err)
+               goto out;
+out:
+       txmsg_pass = 0;
+       txmsg_redir = 0;
+       txmsg_drop = 0;
+       return err;
+}
+
+static int test_send(struct sockmap_options *opt, int cgrp)
+{
+       int err;
+
+       opt->iov_length = 1;
+       opt->iov_count = 1;
+       opt->rate = 1;
+       err = test_exec(cgrp, opt);
+       if (err)
+               goto out;
+
+       opt->iov_length = 1;
+       opt->iov_count = 1024;
+       opt->rate = 1;
+       err = test_exec(cgrp, opt);
+       if (err)
+               goto out;
+
+       opt->iov_length = 1024;
+       opt->iov_count = 1;
+       opt->rate = 1;
+       err = test_exec(cgrp, opt);
+       if (err)
+               goto out;
+
+       opt->iov_length = 1;
+       opt->iov_count = 1;
+       opt->rate = 1024;
+       err = test_exec(cgrp, opt);
+       if (err)
+               goto out;
+
+       opt->iov_length = 256;
+       opt->iov_count = 1024;
+       opt->rate = 10;
+       err = test_exec(cgrp, opt);
+       if (err)
+               goto out;
+
+       opt->rate = 100;
+       opt->iov_count = 1;
+       opt->iov_length = 5;
+       err = test_exec(cgrp, opt);
+       if (err)
+               goto out;
+out:
+       return err;
+}
+
+static int test_mixed(int cgrp)
+{
+       struct sockmap_options opt = {0};
+       int err;
+
+       txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0;
+       txmsg_apply = txmsg_cork = 0;
+       txmsg_start = txmsg_end = 0;
+       /* Test small and large iov_count values with pass/redir/apply/cork */
+       txmsg_pass = 1;
+       txmsg_redir = 0;
+       txmsg_apply = 1;
+       txmsg_cork = 0;
+       err = test_send(&opt, cgrp);
+       if (err)
+               goto out;
+
+       txmsg_pass = 1;
+       txmsg_redir = 0;
+       txmsg_apply = 0;
+       txmsg_cork = 1;
+       err = test_send(&opt, cgrp);
+       if (err)
+               goto out;
+
+       txmsg_pass = 1;
+       txmsg_redir = 0;
+       txmsg_apply = 1;
+       txmsg_cork = 1;
+       err = test_send(&opt, cgrp);
+       if (err)
+               goto out;
+
+       txmsg_pass = 1;
+       txmsg_redir = 0;
+       txmsg_apply = 1024;
+       txmsg_cork = 0;
+       err = test_send(&opt, cgrp);
+       if (err)
+               goto out;
+
+       txmsg_pass = 1;
+       txmsg_redir = 0;
+       txmsg_apply = 0;
+       txmsg_cork = 1024;
+       err = test_send(&opt, cgrp);
+       if (err)
+               goto out;
+
+       txmsg_pass = 1;
+       txmsg_redir = 0;
+       txmsg_apply = 1024;
+       txmsg_cork = 1024;
+       err = test_send(&opt, cgrp);
+       if (err)
+               goto out;
+
+       txmsg_pass = 1;
+       txmsg_redir = 0;
+       txmsg_cork = 4096;
+       txmsg_apply = 4096;
+       err = test_send(&opt, cgrp);
+       if (err)
+               goto out;
+
+       txmsg_pass = 0;
+       txmsg_redir = 1;
+       txmsg_apply = 1;
+       txmsg_cork = 0;
+       err = test_send(&opt, cgrp);
+       if (err)
+               goto out;
+
+       txmsg_pass = 0;
+       txmsg_redir = 1;
+       txmsg_apply = 0;
+       txmsg_cork = 1;
+       err = test_send(&opt, cgrp);
+       if (err)
+               goto out;
+
+       txmsg_pass = 0;
+       txmsg_redir = 1;
+       txmsg_apply = 1024;
+       txmsg_cork = 0;
+       err = test_send(&opt, cgrp);
+       if (err)
+               goto out;
+
+       txmsg_pass = 0;
+       txmsg_redir = 1;
+       txmsg_apply = 0;
+       txmsg_cork = 1024;
+       err = test_send(&opt, cgrp);
+       if (err)
+               goto out;
+
+       txmsg_pass = 0;
+       txmsg_redir = 1;
+       txmsg_apply = 1024;
+       txmsg_cork = 1024;
+       err = test_send(&opt, cgrp);
+       if (err)
+               goto out;
+
+       txmsg_pass = 0;
+       txmsg_redir = 1;
+       txmsg_cork = 4096;
+       txmsg_apply = 4096;
+       err = test_send(&opt, cgrp);
+       if (err)
+               goto out;
+out:
+       return err;
+}
+
+static int test_start_end(int cgrp)
+{
+       struct sockmap_options opt = {0};
+       int err, i;
+
+       /* Test basic start/end with lots of iov_count and iov_lengths */
+       txmsg_start = 1;
+       txmsg_end = 2;
+       err = test_txmsg(cgrp);
+       if (err)
+               goto out;
+
+       /* Test start/end with cork */
+       opt.rate = 16;
+       opt.iov_count = 1;
+       opt.iov_length = 100;
+       txmsg_cork = 1600;
+
+       for (i = 99; i <= 1600; i += 100) {
+               txmsg_start = 0;
+               txmsg_end = i;
+               err = test_exec(cgrp, &opt);
+               if (err)
+                       goto out;
+       }
+
+       /* Test start/end with cork but pull data in middle */
+       for (i = 199; i <= 1600; i += 100) {
+               txmsg_start = 100;
+               txmsg_end = i;
+               err = test_exec(cgrp, &opt);
+               if (err)
+                       goto out;
+       }
+
+       /* Test start/end with cork pulling last sg entry */
+       txmsg_start = 1500;
+       txmsg_end = 1600;
+       err = test_exec(cgrp, &opt);
+       if (err)
+               goto out;
+
+       /* Test start/end pull of single byte in last page */
+       txmsg_start = 1111;
+       txmsg_end = 1112;
+       err = test_exec(cgrp, &opt);
+       if (err)
+               goto out;
+
+       /* Test start/end with end < start */
+       txmsg_start = 1111;
+       txmsg_end = 0;
+       err = test_exec(cgrp, &opt);
+       if (err)
+               goto out;
+
+       /* Test start/end with end > data */
+       txmsg_start = 0;
+       txmsg_end = 1601;
+       err = test_exec(cgrp, &opt);
+       if (err)
+               goto out;
+
+       /* Test start/end with start > data */
+       txmsg_start = 1601;
+       txmsg_end = 1600;
+       err = test_exec(cgrp, &opt);
+
+out:
+       txmsg_start = 0;
+       txmsg_end = 0;
+       return err;
+}
+
+char *map_names[] = {
+       "sock_map",
+       "sock_map_txmsg",
+       "sock_map_redir",
+       "sock_apply_bytes",
+       "sock_cork_bytes",
+       "sock_pull_bytes",
+       "sock_redir_flags",
+       "sock_skb_opts",
+};
+
+int prog_attach_type[] = {
+       BPF_SK_SKB_STREAM_PARSER,
+       BPF_SK_SKB_STREAM_VERDICT,
+       BPF_CGROUP_SOCK_OPS,
+       BPF_SK_MSG_VERDICT,
+       BPF_SK_MSG_VERDICT,
+       BPF_SK_MSG_VERDICT,
+       BPF_SK_MSG_VERDICT,
+       BPF_SK_MSG_VERDICT,
+       BPF_SK_MSG_VERDICT,
+       BPF_SK_MSG_VERDICT,
+};
+
+int prog_type[] = {
+       BPF_PROG_TYPE_SK_SKB,
+       BPF_PROG_TYPE_SK_SKB,
+       BPF_PROG_TYPE_SOCK_OPS,
+       BPF_PROG_TYPE_SK_MSG,
+       BPF_PROG_TYPE_SK_MSG,
+       BPF_PROG_TYPE_SK_MSG,
+       BPF_PROG_TYPE_SK_MSG,
+       BPF_PROG_TYPE_SK_MSG,
+       BPF_PROG_TYPE_SK_MSG,
+       BPF_PROG_TYPE_SK_MSG,
+};
+
+static int populate_progs(void)
+{
+       char *bpf_file = BPF_FILENAME;
+       struct bpf_program *prog;
+       struct bpf_object *obj;
+       int i = 0;
+       long err;
+
+       obj = bpf_object__open(bpf_file);
+       err = libbpf_get_error(obj);
+       if (err) {
+               char err_buf[256];
+
+               libbpf_strerror(err, err_buf, sizeof(err_buf));
+               printf("Unable to load eBPF objects in file '%s' : %s\n",
+                      bpf_file, err_buf);
+               return -1;
+       }
+
+       bpf_object__for_each_program(prog, obj) {
+               bpf_program__set_type(prog, prog_type[i]);
+               bpf_program__set_expected_attach_type(prog,
+                                                     prog_attach_type[i]);
+               i++;
+       }
+
+       i = bpf_object__load(obj);
+       i = 0;
+       bpf_object__for_each_program(prog, obj) {
+               prog_fd[i] = bpf_program__fd(prog);
+               i++;
+       }
+
+       for (i = 0; i < sizeof(map_fd)/sizeof(int); i++) {
+               maps[i] = bpf_object__find_map_by_name(obj, map_names[i]);
+               map_fd[i] = bpf_map__fd(maps[i]);
+               if (map_fd[i] < 0) {
+                       fprintf(stderr, "load_bpf_file: (%i) %s\n",
+                               map_fd[i], strerror(errno));
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
+static int test_suite(void)
+{
+       int cg_fd, err;
+
+       err = populate_progs();
+       if (err < 0) {
+               fprintf(stderr, "ERROR: (%i) load bpf failed\n", err);
+               return err;
+       }
+
+       if (setup_cgroup_environment()) {
+               fprintf(stderr, "ERROR: cgroup env failed\n");
+               return -EINVAL;
+       }
+
+       cg_fd = create_and_get_cgroup(CG_PATH);
+       if (cg_fd < 0) {
+               fprintf(stderr,
+                       "ERROR: (%i) open cg path failed: %s\n",
+                       cg_fd, optarg);
+               return cg_fd;
+       }
+
+       /* Tests basic commands and APIs with range of iov values */
+       txmsg_start = txmsg_end = 0;
+       err = test_txmsg(cg_fd);
+       if (err)
+               goto out;
+
+       /* Tests interesting combinations of APIs used together */
+       err = test_mixed(cg_fd);
+       if (err)
+               goto out;
+
+       /* Tests pull_data API using start/end API */
+       err = test_start_end(cg_fd);
+       if (err)
+               goto out;
+
+out:
+       printf("Summary: %i PASSED %i FAILED\n", passed, failed);
+       close(cg_fd);
+       return err;
+}
+
+int main(int argc, char **argv)
+{
+       struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
+       int iov_count = 1, length = 1024, rate = 1;
+       struct sockmap_options options = {0};
+       int opt, longindex, err, cg_fd = 0;
+       char *bpf_file = BPF_FILENAME;
+       int test = PING_PONG;
+
+       if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+               perror("setrlimit(RLIMIT_MEMLOCK)");
+               return 1;
+       }
+
+       if (argc < 2)
+               return test_suite();
+
+       while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:",
+                                 long_options, &longindex)) != -1) {
+               switch (opt) {
+               case 's':
+                       txmsg_start = atoi(optarg);
+                       break;
+               case 'e':
+                       txmsg_end = atoi(optarg);
+                       break;
+               case 'a':
+                       txmsg_apply = atoi(optarg);
+                       break;
+               case 'k':
+                       txmsg_cork = atoi(optarg);
+                       break;
+               case 'c':
+                       cg_fd = open(optarg, O_DIRECTORY, O_RDONLY);
+                       if (cg_fd < 0) {
+                               fprintf(stderr,
+                                       "ERROR: (%i) open cg path failed: %s\n",
+                                       cg_fd, optarg);
+                               return cg_fd;
+                       }
+                       break;
+               case 'r':
+                       rate = atoi(optarg);
+                       break;
+               case 'v':
+                       options.verbose = 1;
+                       break;
+               case 'i':
+                       iov_count = atoi(optarg);
+                       break;
+               case 'l':
+                       length = atoi(optarg);
+                       break;
+               case 'd':
+                       options.data_test = true;
+                       break;
+               case 't':
+                       if (strcmp(optarg, "ping") == 0) {
+                               test = PING_PONG;
+                       } else if (strcmp(optarg, "sendmsg") == 0) {
+                               test = SENDMSG;
+                       } else if (strcmp(optarg, "base") == 0) {
+                               test = BASE;
+                       } else if (strcmp(optarg, "base_sendpage") == 0) {
+                               test = BASE_SENDPAGE;
+                       } else if (strcmp(optarg, "sendpage") == 0) {
+                               test = SENDPAGE;
+                       } else {
+                               usage(argv);
+                               return -1;
+                       }
+                       break;
+               case 0:
+                       break;
+               case 'h':
+               default:
+                       usage(argv);
+                       return -1;
+               }
+       }
+
+       if (!cg_fd) {
+               fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n",
+                       argv[0]);
+               return -1;
+       }
+
+       err = populate_progs();
+       if (err) {
+               fprintf(stderr, "populate program: (%s) %s\n",
+                       bpf_file, strerror(errno));
+               return 1;
+       }
+       running = 1;
+
+       /* catch SIGINT */
+       signal(SIGINT, running_handler);
+
+       options.iov_count = iov_count;
+       options.iov_length = length;
+       options.rate = rate;
+
+       err = run_options(&options, cg_fd, test);
+       close(cg_fd);
+       return err;
+}
+
+void running_handler(int a)
+{
+       running = 0;
+}
diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.c b/tools/testing/selftests/bpf/test_sockmap_kern.c
new file mode 100644 (file)
index 0000000..33de97e
--- /dev/null
@@ -0,0 +1,340 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/pkt_cls.h>
+#include <sys/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+/* Sockmap sample program connects a client and a backend together
+ * using cgroups.
+ *
+ *    client:X <---> frontend:80 client:X <---> backend:80
+ *
+ * For simplicity we hard code values here and bind 1:1. The hard
+ * coded values are part of the setup in sockmap.sh script that
+ * is associated with this BPF program.
+ *
+ * The bpf_printk is verbose and prints information as connections
+ * are established and verdicts are decided.
+ */
+
+#define bpf_printk(fmt, ...)                                   \
+({                                                             \
+              char ____fmt[] = fmt;                            \
+              bpf_trace_printk(____fmt, sizeof(____fmt),       \
+                               ##__VA_ARGS__);                 \
+})
+
+struct bpf_map_def SEC("maps") sock_map = {
+       .type = BPF_MAP_TYPE_SOCKMAP,
+       .key_size = sizeof(int),
+       .value_size = sizeof(int),
+       .max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_txmsg = {
+       .type = BPF_MAP_TYPE_SOCKMAP,
+       .key_size = sizeof(int),
+       .value_size = sizeof(int),
+       .max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_redir = {
+       .type = BPF_MAP_TYPE_SOCKMAP,
+       .key_size = sizeof(int),
+       .value_size = sizeof(int),
+       .max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_apply_bytes = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(int),
+       .value_size = sizeof(int),
+       .max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_cork_bytes = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(int),
+       .value_size = sizeof(int),
+       .max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_pull_bytes = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(int),
+       .value_size = sizeof(int),
+       .max_entries = 2
+};
+
+struct bpf_map_def SEC("maps") sock_redir_flags = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(int),
+       .value_size = sizeof(int),
+       .max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_skb_opts = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(int),
+       .value_size = sizeof(int),
+       .max_entries = 1
+};
+
+SEC("sk_skb1")
+int bpf_prog1(struct __sk_buff *skb)
+{
+       return skb->len;
+}
+
+SEC("sk_skb2")
+int bpf_prog2(struct __sk_buff *skb)
+{
+       __u32 lport = skb->local_port;
+       __u32 rport = skb->remote_port;
+       int len, *f, ret, zero = 0;
+       __u64 flags = 0;
+
+       if (lport == 10000)
+               ret = 10;
+       else
+               ret = 1;
+
+       len = (__u32)skb->data_end - (__u32)skb->data;
+       f = bpf_map_lookup_elem(&sock_skb_opts, &zero);
+       if (f && *f) {
+               ret = 3;
+               flags = *f;
+       }
+
+       bpf_printk("sk_skb2: redirect(%iB) flags=%i\n",
+                  len, flags);
+       return bpf_sk_redirect_map(skb, &sock_map, ret, flags);
+}
+
+SEC("sockops")
+int bpf_sockmap(struct bpf_sock_ops *skops)
+{
+       __u32 lport, rport;
+       int op, err = 0, index, key, ret;
+
+
+       op = (int) skops->op;
+
+       switch (op) {
+       case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+               lport = skops->local_port;
+               rport = skops->remote_port;
+
+               if (lport == 10000) {
+                       ret = 1;
+                       err = bpf_sock_map_update(skops, &sock_map, &ret,
+                                                 BPF_NOEXIST);
+                       bpf_printk("passive(%i -> %i) map ctx update err: %d\n",
+                                  lport, bpf_ntohl(rport), err);
+               }
+               break;
+       case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+               lport = skops->local_port;
+               rport = skops->remote_port;
+
+               if (bpf_ntohl(rport) == 10001) {
+                       ret = 10;
+                       err = bpf_sock_map_update(skops, &sock_map, &ret,
+                                                 BPF_NOEXIST);
+                       bpf_printk("active(%i -> %i) map ctx update err: %d\n",
+                                  lport, bpf_ntohl(rport), err);
+               }
+               break;
+       default:
+               break;
+       }
+
+       return 0;
+}
+
+SEC("sk_msg1")
+int bpf_prog4(struct sk_msg_md *msg)
+{
+       int *bytes, zero = 0, one = 1;
+       int *start, *end;
+
+       bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+       if (bytes)
+               bpf_msg_apply_bytes(msg, *bytes);
+       bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+       if (bytes)
+               bpf_msg_cork_bytes(msg, *bytes);
+       start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+       end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+       if (start && end)
+               bpf_msg_pull_data(msg, *start, *end, 0);
+       return SK_PASS;
+}
+
+SEC("sk_msg2")
+int bpf_prog5(struct sk_msg_md *msg)
+{
+       int err1 = -1, err2 = -1, zero = 0, one = 1;
+       int *bytes, *start, *end, len1, len2;
+
+       bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+       if (bytes)
+               err1 = bpf_msg_apply_bytes(msg, *bytes);
+       bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+       if (bytes)
+               err2 = bpf_msg_cork_bytes(msg, *bytes);
+       len1 = (__u64)msg->data_end - (__u64)msg->data;
+       start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+       end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+       if (start && end) {
+               int err;
+
+               bpf_printk("sk_msg2: pull(%i:%i)\n",
+                          start ? *start : 0, end ? *end : 0);
+               err = bpf_msg_pull_data(msg, *start, *end, 0);
+               if (err)
+                       bpf_printk("sk_msg2: pull_data err %i\n",
+                                  err);
+               len2 = (__u64)msg->data_end - (__u64)msg->data;
+               bpf_printk("sk_msg2: length update %i->%i\n",
+                          len1, len2);
+       }
+       bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n",
+                  len1, err1, err2);
+       return SK_PASS;
+}
+
+SEC("sk_msg3")
+int bpf_prog6(struct sk_msg_md *msg)
+{
+       int *bytes, zero = 0, one = 1, key = 0;
+       int *start, *end, *f;
+       __u64 flags = 0;
+
+       bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+       if (bytes)
+               bpf_msg_apply_bytes(msg, *bytes);
+       bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+       if (bytes)
+               bpf_msg_cork_bytes(msg, *bytes);
+       start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+       end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+       if (start && end)
+               bpf_msg_pull_data(msg, *start, *end, 0);
+       f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
+       if (f && *f) {
+               key = 2;
+               flags = *f;
+       }
+       return bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
+}
+
+SEC("sk_msg4")
+int bpf_prog7(struct sk_msg_md *msg)
+{
+       int err1 = 0, err2 = 0, zero = 0, one = 1, key = 0;
+       int *f, *bytes, *start, *end, len1, len2;
+       __u64 flags = 0;
+
+               int err;
+       bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+       if (bytes)
+               err1 = bpf_msg_apply_bytes(msg, *bytes);
+       bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+       if (bytes)
+               err2 = bpf_msg_cork_bytes(msg, *bytes);
+       len1 = (__u64)msg->data_end - (__u64)msg->data;
+       start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+       end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+       if (start && end) {
+
+               bpf_printk("sk_msg2: pull(%i:%i)\n",
+                          start ? *start : 0, end ? *end : 0);
+               err = bpf_msg_pull_data(msg, *start, *end, 0);
+               if (err)
+                       bpf_printk("sk_msg2: pull_data err %i\n",
+                                  err);
+               len2 = (__u64)msg->data_end - (__u64)msg->data;
+               bpf_printk("sk_msg2: length update %i->%i\n",
+                          len1, len2);
+       }
+       f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
+       if (f && *f) {
+               key = 2;
+               flags = *f;
+       }
+       bpf_printk("sk_msg3: redirect(%iB) flags=%i err=%i\n",
+                  len1, flags, err1 ? err1 : err2);
+       err = bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
+       bpf_printk("sk_msg3: err %i\n", err);
+       return err;
+}
+
+SEC("sk_msg5")
+int bpf_prog8(struct sk_msg_md *msg)
+{
+       void *data_end = (void *)(long) msg->data_end;
+       void *data = (void *)(long) msg->data;
+       int ret = 0, *bytes, zero = 0;
+
+       bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+       if (bytes) {
+               ret = bpf_msg_apply_bytes(msg, *bytes);
+               if (ret)
+                       return SK_DROP;
+       } else {
+               return SK_DROP;
+       }
+       return SK_PASS;
+}
+SEC("sk_msg6")
+int bpf_prog9(struct sk_msg_md *msg)
+{
+       void *data_end = (void *)(long) msg->data_end;
+       void *data = (void *)(long) msg->data;
+       int ret = 0, *bytes, zero = 0;
+
+       bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+       if (bytes) {
+               if (((__u64)data_end - (__u64)data) >= *bytes)
+                       return SK_PASS;
+               ret = bpf_msg_cork_bytes(msg, *bytes);
+               if (ret)
+                       return SK_DROP;
+       }
+       return SK_PASS;
+}
+
+SEC("sk_msg7")
+int bpf_prog10(struct sk_msg_md *msg)
+{
+       int *bytes, zero = 0, one = 1;
+       int *start, *end;
+
+       bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+       if (bytes)
+               bpf_msg_apply_bytes(msg, *bytes);
+       bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+       if (bytes)
+               bpf_msg_cork_bytes(msg, *bytes);
+       start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+       end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+       if (start && end)
+               bpf_msg_pull_data(msg, *start, *end, 0);
+
+       return SK_DROP;
+}
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";