xfrm: interface: Add unstable helpers for setting/getting XFRM metadata from TC-BPF
authorEyal Birger <eyal.birger@gmail.com>
Sat, 3 Dec 2022 08:46:57 +0000 (10:46 +0200)
committerMartin KaFai Lau <martin.lau@kernel.org>
Tue, 6 Dec 2022 05:58:27 +0000 (21:58 -0800)
This change adds xfrm metadata helpers using the unstable kfunc call
interface for the TC-BPF hooks. This allows steering traffic towards
different IPsec connections based on logic implemented in bpf programs.

This object is built based on the availability of BTF debug info.

When setting the xfrm metadata, percpu metadata dsts are used in order
to avoid allocating a metadata dst per packet.

In order to guarantee safe module unload, the percpu dsts are allocated
on first use and never freed. The percpu pointer is stored in
net/core/filter.c so that it can be reused on module reload.

The metadata percpu dsts take ownership of the original skb dsts so
that they may be used as part of the xfrm transmission logic - e.g.
for MTU calculations.

Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
Link: https://lore.kernel.org/r/20221203084659.1837829-3-eyal.birger@gmail.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
include/net/dst_metadata.h
include/net/xfrm.h
net/core/dst.c
net/core/filter.c
net/xfrm/Makefile
net/xfrm/xfrm_interface_bpf.c [new file with mode: 0644]
net/xfrm/xfrm_interface_core.c

index a454cf4327feb419100da9284d46515680051a91..1b7fae4c6b240ba08ab2c689d6b2f63d38a0f42d 100644 (file)
@@ -26,6 +26,7 @@ struct macsec_info {
 struct xfrm_md_info {
        u32 if_id;
        int link;
+       struct dst_entry *dst_orig;
 };
 
 struct metadata_dst {
index e0cc6791c001c293e43c1341fb9e0cc97da8a8fc..3707e6b34e6790a9095103340610819f1a9dadf9 100644 (file)
@@ -2086,4 +2086,21 @@ static inline bool xfrm6_local_dontfrag(const struct sock *sk)
        return false;
 }
 #endif
+
+#if (IS_BUILTIN(CONFIG_XFRM_INTERFACE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
+    (IS_MODULE(CONFIG_XFRM_INTERFACE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
+
+extern struct metadata_dst __percpu *xfrm_bpf_md_dst;
+
+int register_xfrm_interface_bpf(void);
+
+#else
+
+static inline int register_xfrm_interface_bpf(void)
+{
+       return 0;
+}
+
+#endif
+
 #endif /* _NET_XFRM_H */
index bc9c9be4e08018a0429fea6592db0b5560e18afb..bb14a03923886f35e05435c9af98bda7cf74a142 100644 (file)
@@ -316,6 +316,8 @@ void metadata_dst_free(struct metadata_dst *md_dst)
        if (md_dst->type == METADATA_IP_TUNNEL)
                dst_cache_destroy(&md_dst->u.tun_info.dst_cache);
 #endif
+       if (md_dst->type == METADATA_XFRM)
+               dst_release(md_dst->u.xfrm_info.dst_orig);
        kfree(md_dst);
 }
 EXPORT_SYMBOL_GPL(metadata_dst_free);
@@ -340,16 +342,18 @@ EXPORT_SYMBOL_GPL(metadata_dst_alloc_percpu);
 
 void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst)
 {
-#ifdef CONFIG_DST_CACHE
        int cpu;
 
        for_each_possible_cpu(cpu) {
                struct metadata_dst *one_md_dst = per_cpu_ptr(md_dst, cpu);
 
+#ifdef CONFIG_DST_CACHE
                if (one_md_dst->type == METADATA_IP_TUNNEL)
                        dst_cache_destroy(&one_md_dst->u.tun_info.dst_cache);
-       }
 #endif
+               if (one_md_dst->type == METADATA_XFRM)
+                       dst_release(one_md_dst->u.xfrm_info.dst_orig);
+       }
        free_percpu(md_dst);
 }
 EXPORT_SYMBOL_GPL(metadata_dst_free_percpu);
index 8607136b6e2c4a721c879268ed1c5123bf84d04e..929358677183d5827b7cf2c54700cc34eb36544d 100644 (file)
@@ -5631,6 +5631,15 @@ static const struct bpf_func_proto bpf_bind_proto = {
 };
 
 #ifdef CONFIG_XFRM
+
+#if (IS_BUILTIN(CONFIG_XFRM_INTERFACE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
+    (IS_MODULE(CONFIG_XFRM_INTERFACE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
+
+struct metadata_dst __percpu *xfrm_bpf_md_dst;
+EXPORT_SYMBOL_GPL(xfrm_bpf_md_dst);
+
+#endif
+
 BPF_CALL_5(bpf_skb_get_xfrm_state, struct sk_buff *, skb, u32, index,
           struct bpf_xfrm_state *, to, u32, size, u64, flags)
 {
index 08a2870fdd36f79db7f52045ee8c067a0a5d7955..cd47f88921f5975d71e258c58652ca0d8a46fe5f 100644 (file)
@@ -5,6 +5,12 @@
 
 xfrm_interface-$(CONFIG_XFRM_INTERFACE) += xfrm_interface_core.o
 
+ifeq ($(CONFIG_XFRM_INTERFACE),m)
+xfrm_interface-$(CONFIG_DEBUG_INFO_BTF_MODULES) += xfrm_interface_bpf.o
+else ifeq ($(CONFIG_XFRM_INTERFACE),y)
+xfrm_interface-$(CONFIG_DEBUG_INFO_BTF) += xfrm_interface_bpf.o
+endif
+
 obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \
                      xfrm_input.o xfrm_output.o \
                      xfrm_sysctl.o xfrm_replay.o xfrm_device.o
diff --git a/net/xfrm/xfrm_interface_bpf.c b/net/xfrm/xfrm_interface_bpf.c
new file mode 100644 (file)
index 0000000..1ef2162
--- /dev/null
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Unstable XFRM Helpers for TC-BPF hook
+ *
+ * These are called from SCHED_CLS BPF programs. Note that it is
+ * allowed to break compatibility for these functions since the interface they
+ * are exposed through to BPF programs is explicitly unstable.
+ */
+
+#include <linux/bpf.h>
+#include <linux/btf_ids.h>
+
+#include <net/dst_metadata.h>
+#include <net/xfrm.h>
+
+/* bpf_xfrm_info - XFRM metadata information
+ *
+ * Members:
+ * @if_id      - XFRM if_id:
+ *                 Transmit: if_id to be used in policy and state lookups
+ *                 Receive: if_id of the state matched for the incoming packet
+ * @link       - Underlying device ifindex:
+ *                 Transmit: used as the underlying device in VRF routing
+ *                 Receive: the device on which the packet had been received
+ */
+struct bpf_xfrm_info {
+       u32 if_id;
+       int link;
+};
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+                 "Global functions as their definitions will be in xfrm_interface BTF");
+
+/* bpf_skb_get_xfrm_info - Get XFRM metadata
+ *
+ * Parameters:
+ * @skb_ctx    - Pointer to ctx (__sk_buff) in TC program
+ *                 Cannot be NULL
+ * @to         - Pointer to memory to which the metadata will be copied
+ *                 Cannot be NULL
+ */
+__used noinline
+int bpf_skb_get_xfrm_info(struct __sk_buff *skb_ctx, struct bpf_xfrm_info *to)
+{
+       struct sk_buff *skb = (struct sk_buff *)skb_ctx;
+       struct xfrm_md_info *info;
+
+       info = skb_xfrm_md_info(skb);
+       if (!info)
+               return -EINVAL;
+
+       to->if_id = info->if_id;
+       to->link = info->link;
+       return 0;
+}
+
+/* bpf_skb_get_xfrm_info - Set XFRM metadata
+ *
+ * Parameters:
+ * @skb_ctx    - Pointer to ctx (__sk_buff) in TC program
+ *                 Cannot be NULL
+ * @from       - Pointer to memory from which the metadata will be copied
+ *                 Cannot be NULL
+ */
+__used noinline
+int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx,
+                         const struct bpf_xfrm_info *from)
+{
+       struct sk_buff *skb = (struct sk_buff *)skb_ctx;
+       struct metadata_dst *md_dst;
+       struct xfrm_md_info *info;
+
+       if (unlikely(skb_metadata_dst(skb)))
+               return -EINVAL;
+
+       if (!xfrm_bpf_md_dst) {
+               struct metadata_dst __percpu *tmp;
+
+               tmp = metadata_dst_alloc_percpu(0, METADATA_XFRM, GFP_ATOMIC);
+               if (!tmp)
+                       return -ENOMEM;
+               if (cmpxchg(&xfrm_bpf_md_dst, NULL, tmp))
+                       metadata_dst_free_percpu(tmp);
+       }
+       md_dst = this_cpu_ptr(xfrm_bpf_md_dst);
+
+       info = &md_dst->u.xfrm_info;
+
+       info->if_id = from->if_id;
+       info->link = from->link;
+       skb_dst_force(skb);
+       info->dst_orig = skb_dst(skb);
+
+       dst_hold((struct dst_entry *)md_dst);
+       skb_dst_set(skb, (struct dst_entry *)md_dst);
+       return 0;
+}
+
+__diag_pop()
+
+BTF_SET8_START(xfrm_ifc_kfunc_set)
+BTF_ID_FLAGS(func, bpf_skb_get_xfrm_info)
+BTF_ID_FLAGS(func, bpf_skb_set_xfrm_info)
+BTF_SET8_END(xfrm_ifc_kfunc_set)
+
+static const struct btf_kfunc_id_set xfrm_interface_kfunc_set = {
+       .owner = THIS_MODULE,
+       .set   = &xfrm_ifc_kfunc_set,
+};
+
+int __init register_xfrm_interface_bpf(void)
+{
+       return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS,
+                                        &xfrm_interface_kfunc_set);
+}
index 5a67b120c4dbd4754b2842bf8e845fa675823810..1f99dc46902719e6716b0cb93f95d1087d2c8bd8 100644 (file)
@@ -396,6 +396,14 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
 
                if_id = md_info->if_id;
                fl->flowi_oif = md_info->link;
+               if (md_info->dst_orig) {
+                       struct dst_entry *tmp_dst = dst;
+
+                       dst = md_info->dst_orig;
+                       skb_dst_set(skb, dst);
+                       md_info->dst_orig = NULL;
+                       dst_release(tmp_dst);
+               }
        } else {
                if_id = xi->p.if_id;
        }
@@ -1162,12 +1170,18 @@ static int __init xfrmi_init(void)
        if (err < 0)
                goto rtnl_link_failed;
 
+       err = register_xfrm_interface_bpf();
+       if (err < 0)
+               goto kfunc_failed;
+
        lwtunnel_encap_add_ops(&xfrmi_encap_ops, LWTUNNEL_ENCAP_XFRM);
 
        xfrm_if_register_cb(&xfrm_if_cb);
 
        return err;
 
+kfunc_failed:
+       rtnl_link_unregister(&xfrmi_link_ops);
 rtnl_link_failed:
        xfrmi6_fini();
 xfrmi6_failed: