From: Martin KaFai Lau Date: Tue, 6 May 2025 01:58:50 +0000 (-0700) Subject: bpf: Add bpf_rbtree_{root,left,right} kfunc X-Git-Tag: v6.16-rc1~131^2~41^2~5 X-Git-Url: https://git.kernel.dk/?a=commitdiff_plain;h=9e3e66c553f705de51707c7ddc7f35ce159a8ef1;p=linux-2.6-block.git bpf: Add bpf_rbtree_{root,left,right} kfunc In a bpf fq implementation that is much closer to the kernel fq, it will need to traverse the rbtree: https://lore.kernel.org/bpf/20250418224652.105998-13-martin.lau@linux.dev/ The much simplified logic that uses the bpf_rbtree_{root,left,right} to traverse the rbtree is like: struct fq_flow { struct bpf_rb_node fq_node; struct bpf_rb_node rate_node; struct bpf_refcount refcount; unsigned long sk_long; }; struct fq_flow_root { struct bpf_spin_lock lock; struct bpf_rb_root root __contains(fq_flow, fq_node); }; struct fq_flow *fq_classify(...) { struct bpf_rb_node *tofree[FQ_GC_MAX]; struct fq_flow_root *root; struct fq_flow *gc_f, *f; struct bpf_rb_node *p; int i, fcnt = 0; /* ... */ f = NULL; bpf_spin_lock(&root->lock); p = bpf_rbtree_root(&root->root); while (can_loop) { if (!p) break; gc_f = bpf_rb_entry(p, struct fq_flow, fq_node); if (gc_f->sk_long == sk_long) { f = bpf_refcount_acquire(gc_f); break; } /* To be removed from the rbtree */ if (fcnt < FQ_GC_MAX && fq_gc_candidate(gc_f, jiffies_now)) tofree[fcnt++] = p; if (gc_f->sk_long > sk_long) p = bpf_rbtree_left(&root->root, p); else p = bpf_rbtree_right(&root->root, p); } /* remove from the rbtree */ for (i = 0; i < fcnt; i++) { p = tofree[i]; tofree[i] = bpf_rbtree_remove(&root->root, p); } bpf_spin_unlock(&root->lock); /* bpf_obj_drop the fq_flow(s) that have just been removed * from the rbtree. */ for (i = 0; i < fcnt; i++) { p = tofree[i]; if (p) { gc_f = bpf_rb_entry(p, struct fq_flow, fq_node); bpf_obj_drop(gc_f); } } return f; } The above simplified code needs to traverse the rbtree for two purposes, 1) find the flow with the desired sk_long value 2) while searching for the sk_long, collect flows that are the fq_gc_candidate. They will be removed from the rbtree. This patch adds the bpf_rbtree_{root,left,right} kfunc to enable the rbtree traversal. The returned bpf_rb_node pointer will be a non-owning reference which is the same as the returned pointer of the exisiting bpf_rbtree_first kfunc. Acked-by: Kumar Kartikeya Dwivedi Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20250506015857.817950-4-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index e3a2662f4e33..36150d340c16 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2366,6 +2366,33 @@ __bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root) return (struct bpf_rb_node *)rb_first_cached(r); } +__bpf_kfunc struct bpf_rb_node *bpf_rbtree_root(struct bpf_rb_root *root) +{ + struct rb_root_cached *r = (struct rb_root_cached *)root; + + return (struct bpf_rb_node *)r->rb_root.rb_node; +} + +__bpf_kfunc struct bpf_rb_node *bpf_rbtree_left(struct bpf_rb_root *root, struct bpf_rb_node *node) +{ + struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern *)node; + + if (READ_ONCE(node_internal->owner) != root) + return NULL; + + return (struct bpf_rb_node *)node_internal->rb_node.rb_left; +} + +__bpf_kfunc struct bpf_rb_node *bpf_rbtree_right(struct bpf_rb_root *root, struct bpf_rb_node *node) +{ + struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern *)node; + + if (READ_ONCE(node_internal->owner) != root) + return NULL; + + return (struct bpf_rb_node *)node_internal->rb_node.rb_right; +} + /** * bpf_task_acquire - Acquire a reference to a task. A task acquired by this * kfunc which is not stored in a map as a kptr, must be released by calling @@ -3214,6 +3241,9 @@ BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_rbtree_add_impl) BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_rbtree_root, KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_rbtree_left, KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_rbtree_right, KF_RET_NULL) #ifdef CONFIG_CGROUPS BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bf14da00f09a..51a17e64a0a9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -12081,6 +12081,9 @@ enum special_kfunc_type { KF_bpf_rbtree_remove, KF_bpf_rbtree_add_impl, KF_bpf_rbtree_first, + KF_bpf_rbtree_root, + KF_bpf_rbtree_left, + KF_bpf_rbtree_right, KF_bpf_dynptr_from_skb, KF_bpf_dynptr_from_xdp, KF_bpf_dynptr_slice, @@ -12121,6 +12124,9 @@ BTF_ID(func, bpf_rdonly_cast) BTF_ID(func, bpf_rbtree_remove) BTF_ID(func, bpf_rbtree_add_impl) BTF_ID(func, bpf_rbtree_first) +BTF_ID(func, bpf_rbtree_root) +BTF_ID(func, bpf_rbtree_left) +BTF_ID(func, bpf_rbtree_right) #ifdef CONFIG_NET BTF_ID(func, bpf_dynptr_from_skb) BTF_ID(func, bpf_dynptr_from_xdp) @@ -12156,6 +12162,9 @@ BTF_ID(func, bpf_rcu_read_unlock) BTF_ID(func, bpf_rbtree_remove) BTF_ID(func, bpf_rbtree_add_impl) BTF_ID(func, bpf_rbtree_first) +BTF_ID(func, bpf_rbtree_root) +BTF_ID(func, bpf_rbtree_left) +BTF_ID(func, bpf_rbtree_right) #ifdef CONFIG_NET BTF_ID(func, bpf_dynptr_from_skb) BTF_ID(func, bpf_dynptr_from_xdp) @@ -12591,7 +12600,10 @@ static bool is_bpf_rbtree_api_kfunc(u32 btf_id) { return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] || btf_id == special_kfunc_list[KF_bpf_rbtree_remove] || - btf_id == special_kfunc_list[KF_bpf_rbtree_first]; + btf_id == special_kfunc_list[KF_bpf_rbtree_first] || + btf_id == special_kfunc_list[KF_bpf_rbtree_root] || + btf_id == special_kfunc_list[KF_bpf_rbtree_left] || + btf_id == special_kfunc_list[KF_bpf_rbtree_right]; } static bool is_bpf_iter_num_api_kfunc(u32 btf_id) @@ -12691,7 +12703,9 @@ static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env, break; case BPF_RB_NODE: ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] || - kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl]); + kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] || + kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_left] || + kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_right]); break; default: verbose(env, "verifier internal error: unexpected graph node argument type %s\n", @@ -13216,11 +13230,11 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ } } else { if (!type_is_non_owning_ref(reg->type) || reg->ref_obj_id) { - verbose(env, "rbtree_remove node input must be non-owning ref\n"); + verbose(env, "%s node input must be non-owning ref\n", func_name); return -EINVAL; } if (in_rbtree_lock_required_cb(env)) { - verbose(env, "rbtree_remove not allowed in rbtree cb\n"); + verbose(env, "%s not allowed in rbtree cb\n", func_name); return -EINVAL; } }