From: Martin KaFai Lau <martin.lau@linux.dev>
To: bpf@vger.kernel.org
Cc: 'Alexei Starovoitov ' <ast@kernel.org>,
'Andrii Nakryiko ' <andrii@kernel.org>,
'Daniel Borkmann ' <daniel@iogearbox.net>,
netdev@vger.kernel.org, kernel-team@meta.com,
'Amery Hung ' <ameryhung@gmail.com>
Subject: [RFC PATCH bpf-next 03/12] bpf: Add bpf_rbtree_{root,left,right} kfunc
Date: Fri, 18 Apr 2025 15:46:41 -0700 [thread overview]
Message-ID: <20250418224652.105998-4-martin.lau@linux.dev> (raw)
In-Reply-To: <20250418224652.105998-1-martin.lau@linux.dev>
From: Martin KaFai Lau <martin.lau@kernel.org>
In the kernel fq qdisc implementation, it requires to traverse a rbtree
stored with the networking "flows".
In the later bpf selftests prog, the much simplified logic that uses
the bpf_rbtree_{root,left,right} to traverse the tree is like:
struct fq_flow {
struct bpf_rb_node fq_node;
struct bpf_rb_node rate_node;
struct bpf_refcount refcount;
unsigned long sk_long;
};
struct fq_flow_root {
struct bpf_spin_lock lock;
struct bpf_rb_root root __contains(fq_flow, fq_node);
};
struct fq_flow *fq_classify(...)
{
struct bpf_rb_node *tofree[FQ_GC_MAX];
struct fq_flow_root *root;
struct fq_flow *gc_f, *f;
struct bpf_rb_node *p;
int i, fcnt = 0;
/* ... */
f = NULL;
bpf_spin_lock(&root->lock);
p = bpf_rbtree_root(&root->root);
while (can_loop) {
if (!p)
break;
gc_f = bpf_rb_entry(p, struct fq_flow, fq_node);
if (gc_f->sk_long == sk_long) {
f = bpf_refcount_acquire(gc_f);
break;
}
/* To be removed from the rbtree */
if (fcnt < FQ_GC_MAX && fq_gc_candidate(gc_f, jiffies_now))
tofree[fcnt++] = p;
if (gc_f->sk_long > sk_long)
p = bpf_rbtree_left(&root->root, p);
else
p = bpf_rbtree_right(&root->root, p);
}
/* remove from the rbtree */
for (i = 0; i < fcnt; i++) {
p = tofree[i];
tofree[i] = bpf_rbtree_remove(&root->root, p);
}
bpf_spin_unlock(&root->lock);
/* bpf_obj_drop the fq_flow(s) that have just been removed
* from the rbtree.
*/
for (i = 0; i < fcnt; i++) {
p = tofree[i];
if (p) {
gc_f = bpf_rb_entry(p, struct fq_flow, fq_node);
bpf_obj_drop(gc_f);
}
}
return f;
}
The above simplified code needs to traverse the rbtree for two purposes,
1) find the flow with the desired sk_long value
2) while searching for the sk_long, collect flows that are
the fq_gc_candidate. They will be removed from the rbtree.
This patch adds the bpf_rbtree_{root,left,right} kfunc to enable
the rbtree traversal. The returned bpf_rb_node pointer will be a
non-owning reference which is the same as the returned pointer
of the exisiting bpf_rbtree_first kfunc.
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
kernel/bpf/helpers.c | 30 ++++++++++++++++++++++++++++++
kernel/bpf/verifier.c | 23 ++++++++++++++++++-----
2 files changed, 48 insertions(+), 5 deletions(-)
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index e3a2662f4e33..36150d340c16 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2366,6 +2366,33 @@ __bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root)
return (struct bpf_rb_node *)rb_first_cached(r);
}
+__bpf_kfunc struct bpf_rb_node *bpf_rbtree_root(struct bpf_rb_root *root)
+{
+ struct rb_root_cached *r = (struct rb_root_cached *)root;
+
+ return (struct bpf_rb_node *)r->rb_root.rb_node;
+}
+
+__bpf_kfunc struct bpf_rb_node *bpf_rbtree_left(struct bpf_rb_root *root, struct bpf_rb_node *node)
+{
+ struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern *)node;
+
+ if (READ_ONCE(node_internal->owner) != root)
+ return NULL;
+
+ return (struct bpf_rb_node *)node_internal->rb_node.rb_left;
+}
+
+__bpf_kfunc struct bpf_rb_node *bpf_rbtree_right(struct bpf_rb_root *root, struct bpf_rb_node *node)
+{
+ struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern *)node;
+
+ if (READ_ONCE(node_internal->owner) != root)
+ return NULL;
+
+ return (struct bpf_rb_node *)node_internal->rb_node.rb_right;
+}
+
/**
* bpf_task_acquire - Acquire a reference to a task. A task acquired by this
* kfunc which is not stored in a map as a kptr, must be released by calling
@@ -3214,6 +3241,9 @@ BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_rbtree_add_impl)
BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_rbtree_root, KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_rbtree_left, KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_rbtree_right, KF_RET_NULL)
#ifdef CONFIG_CGROUPS
BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index bf14da00f09a..3624de1c6925 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -12081,6 +12081,9 @@ enum special_kfunc_type {
KF_bpf_rbtree_remove,
KF_bpf_rbtree_add_impl,
KF_bpf_rbtree_first,
+ KF_bpf_rbtree_root,
+ KF_bpf_rbtree_left,
+ KF_bpf_rbtree_right,
KF_bpf_dynptr_from_skb,
KF_bpf_dynptr_from_xdp,
KF_bpf_dynptr_slice,
@@ -12121,6 +12124,9 @@ BTF_ID(func, bpf_rdonly_cast)
BTF_ID(func, bpf_rbtree_remove)
BTF_ID(func, bpf_rbtree_add_impl)
BTF_ID(func, bpf_rbtree_first)
+BTF_ID(func, bpf_rbtree_root)
+BTF_ID(func, bpf_rbtree_left)
+BTF_ID(func, bpf_rbtree_right)
#ifdef CONFIG_NET
BTF_ID(func, bpf_dynptr_from_skb)
BTF_ID(func, bpf_dynptr_from_xdp)
@@ -12156,6 +12162,9 @@ BTF_ID(func, bpf_rcu_read_unlock)
BTF_ID(func, bpf_rbtree_remove)
BTF_ID(func, bpf_rbtree_add_impl)
BTF_ID(func, bpf_rbtree_first)
+BTF_ID(func, bpf_rbtree_root)
+BTF_ID(func, bpf_rbtree_left)
+BTF_ID(func, bpf_rbtree_right)
#ifdef CONFIG_NET
BTF_ID(func, bpf_dynptr_from_skb)
BTF_ID(func, bpf_dynptr_from_xdp)
@@ -12591,7 +12600,10 @@ static bool is_bpf_rbtree_api_kfunc(u32 btf_id)
{
return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] ||
btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
- btf_id == special_kfunc_list[KF_bpf_rbtree_first];
+ btf_id == special_kfunc_list[KF_bpf_rbtree_first] ||
+ btf_id == special_kfunc_list[KF_bpf_rbtree_root] ||
+ btf_id == special_kfunc_list[KF_bpf_rbtree_left] ||
+ btf_id == special_kfunc_list[KF_bpf_rbtree_right];
}
static bool is_bpf_iter_num_api_kfunc(u32 btf_id)
@@ -12691,7 +12703,9 @@ static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
break;
case BPF_RB_NODE:
ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
- kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl]);
+ kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] ||
+ kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_left] ||
+ kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_right]);
break;
default:
verbose(env, "verifier internal error: unexpected graph node argument type %s\n",
@@ -13216,15 +13230,14 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
}
} else {
if (!type_is_non_owning_ref(reg->type) || reg->ref_obj_id) {
- verbose(env, "rbtree_remove node input must be non-owning ref\n");
+ verbose(env, "%s can only take non-owning bpf_rb_node pointer\n", func_name);
return -EINVAL;
}
if (in_rbtree_lock_required_cb(env)) {
- verbose(env, "rbtree_remove not allowed in rbtree cb\n");
+ verbose(env, "%s not allowed in rbtree cb\n", func_name);
return -EINVAL;
}
}
-
ret = process_kf_arg_ptr_to_rbtree_node(env, reg, regno, meta);
if (ret < 0)
return ret;
--
2.47.1
next prev parent reply other threads:[~2025-04-18 22:47 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-04-18 22:46 [RFC PATCH bpf-next 00/12] bpf: A fq example similar to the kernel sch_fq.c implementation Martin KaFai Lau
2025-04-18 22:46 ` [RFC PATCH bpf-next 01/12] bpf: Check KF_bpf_rbtree_add_impl for the "case KF_ARG_PTR_TO_RB_NODE" Martin KaFai Lau
2025-04-22 1:05 ` Kumar Kartikeya Dwivedi
2025-04-18 22:46 ` [RFC PATCH bpf-next 02/12] bpf: Simplify reg0 marking for the rbtree kfuncs that return a bpf_rb_node pointer Martin KaFai Lau
2025-04-22 1:14 ` Kumar Kartikeya Dwivedi
2025-04-18 22:46 ` Martin KaFai Lau [this message]
2025-04-22 1:43 ` [RFC PATCH bpf-next 03/12] bpf: Add bpf_rbtree_{root,left,right} kfunc Kumar Kartikeya Dwivedi
2025-04-18 22:46 ` [RFC PATCH bpf-next 04/12] selftests/bpf: Adjust failure message in the rbtree_fail test Martin KaFai Lau
2025-04-22 1:44 ` Kumar Kartikeya Dwivedi
2025-04-18 22:46 ` [RFC PATCH bpf-next 05/12] bpf: Allow refcounted bpf_rb_node used in bpf_rbtree_{remove,left,right} Martin KaFai Lau
2025-04-22 2:32 ` Kumar Kartikeya Dwivedi
2025-04-18 22:46 ` [RFC PATCH bpf-next 06/12] selftests/bpf: Adjust test that does not allow refcounted node in rbtree_remove Martin KaFai Lau
2025-04-22 2:36 ` Kumar Kartikeya Dwivedi
2025-04-22 2:48 ` Kumar Kartikeya Dwivedi
2025-04-18 22:46 ` [RFC PATCH bpf-next 07/12] selftests/bpf: Add rbtree_search test Martin KaFai Lau
2025-04-22 3:03 ` Kumar Kartikeya Dwivedi
2025-04-18 22:46 ` [RFC PATCH bpf-next 08/12] bpf: Simplify reg0 marking for the list kfuncs that return a bpf_list_node pointer Martin KaFai Lau
2025-04-22 3:05 ` Kumar Kartikeya Dwivedi
2025-04-18 22:46 ` [RFC PATCH bpf-next 09/12] bpf: Add bpf_list_{front,back} kfunc Martin KaFai Lau
2025-04-22 3:07 ` Kumar Kartikeya Dwivedi
2025-04-18 22:46 ` [RFC PATCH bpf-next 10/12] selftests/bpf: Add test for bpf_list_{front,back} Martin KaFai Lau
2025-04-22 3:08 ` Kumar Kartikeya Dwivedi
2025-04-25 23:28 ` Martin KaFai Lau
2025-04-18 22:46 ` [RFC PATCH bpf-next 11/12] bpf: net: Add a qdisc kfunc to set sk_pacing_status Martin KaFai Lau
2025-04-18 22:46 ` [RFC PATCH bpf-next 12/12] selftests/bpf: A bpf fq implementation similar to the kernel sch_fq Martin KaFai Lau
2025-04-25 0:13 ` Alexei Starovoitov
2025-04-25 23:50 ` Martin KaFai Lau
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250418224652.105998-4-martin.lau@linux.dev \
--to=martin.lau@linux.dev \
--cc=ameryhung@gmail.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=kernel-team@meta.com \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).