From: Yonghong Song <yonghong.song@linux.dev>
To: bpf@vger.kernel.org
Cc: Alexei Starovoitov <ast@kernel.org>,
Andrii Nakryiko <andrii@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
kernel-team@fb.com, Martin KaFai Lau <martin.lau@kernel.org>,
Tejun Heo <tj@kernel.org>
Subject: [PATCH bpf-next v4 07/10] bpf: Support calling non-tailcall bpf prog
Date: Thu, 10 Oct 2024 10:56:28 -0700 [thread overview]
Message-ID: <20241010175628.1898648-1-yonghong.song@linux.dev> (raw)
In-Reply-To: <20241010175552.1895980-1-yonghong.song@linux.dev>
A kfunc bpf_prog_call() is introduced such that it can call another bpf
prog within a bpf prog. It has the same parameters as bpf_tail_call()
but acts like a normal function call.
But bpf_prog_call() could recurse to the caller prog itself. So if a bpf
prog calls bpf_prog_call(), that bpf prog will use private stacks with
maximum recursion level 4. The 4 level recursion should work for most
cases.
bpf_prog_call() cannot be used if tail_call exists in the same prog
since tail_call does not use private stack. If both prog_call and
tail_call in the same prog, verification will fail.
Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
---
include/linux/bpf.h | 2 ++
kernel/bpf/core.c | 7 +++++--
kernel/bpf/helpers.c | 20 ++++++++++++++++++++
kernel/bpf/verifier.c | 30 ++++++++++++++++++++++++++----
4 files changed, 53 insertions(+), 6 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f22ddb423fd0..952cb398eb30 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1493,6 +1493,7 @@ struct bpf_prog_aux {
bool exception_cb;
bool exception_boundary;
bool priv_stack_eligible;
+ bool has_prog_call;
struct bpf_arena *arena;
/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
const struct btf_type *attach_func_proto;
@@ -1929,6 +1930,7 @@ struct bpf_array {
#define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */
#define MAX_TAIL_CALL_CNT 33
+#define BPF_MAX_PRIV_STACK_NEST_LEVEL 4
/* Maximum number of loops for bpf_loop and bpf_iter_num.
* It's enum to expose it (and thus make it discoverable) through BTF.
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index f79d951a061f..0d2c97f63ecf 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2426,10 +2426,13 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
fp->aux->priv_stack_mode = NO_PRIV_STACK;
} else {
void __percpu *priv_stack_ptr;
+ int nest_level = 1;
+ if (fp->aux->has_prog_call)
+ nest_level = BPF_MAX_PRIV_STACK_NEST_LEVEL;
fp->aux->priv_stack_mode = PRIV_STACK_ROOT_PROG;
- priv_stack_ptr =
- __alloc_percpu_gfp(fp->aux->stack_depth, 8, GFP_KERNEL);
+ priv_stack_ptr = __alloc_percpu_gfp(
+ fp->aux->stack_depth * nest_level, 8, GFP_KERNEL);
if (!priv_stack_ptr) {
*err = -ENOMEM;
return fp;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 4053f279ed4c..9cc880dc213e 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2749,6 +2749,25 @@ __bpf_kfunc void bpf_rcu_read_unlock(void)
rcu_read_unlock();
}
+__bpf_kfunc int bpf_prog_call(void *ctx, struct bpf_map *p__map, u32 index)
+{
+ struct bpf_array *array;
+ struct bpf_prog *prog;
+
+ if (p__map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
+ return -EINVAL;
+
+ array = container_of(p__map, struct bpf_array, map);
+ if (unlikely(index >= array->map.max_entries))
+ return -E2BIG;
+
+ prog = READ_ONCE(array->ptrs[index]);
+ if (!prog)
+ return -ENOENT;
+
+ return bpf_prog_run(prog, ctx);
+}
+
struct bpf_throw_ctx {
struct bpf_prog_aux *aux;
u64 sp;
@@ -3035,6 +3054,7 @@ BTF_ID_FLAGS(func, bpf_task_get_cgroup1, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
#endif
BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_throw)
+BTF_ID_FLAGS(func, bpf_prog_call)
BTF_KFUNCS_END(generic_btf_ids)
static const struct btf_kfunc_id_set generic_kfunc_set = {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 46b0c277c6a8..e3d9820618a1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5986,6 +5986,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
static bool bpf_enable_private_stack(struct bpf_prog *prog)
{
+ if (prog->aux->has_prog_call)
+ return true;
+
if (!bpf_jit_supports_private_stack())
return false;
@@ -6092,7 +6095,9 @@ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx,
return -EACCES;
}
- if (!priv_stack_eligible && depth >= BPF_PRIV_STACK_MIN_SUBTREE_SIZE) {
+ if (!priv_stack_eligible &&
+ (depth >= BPF_PRIV_STACK_MIN_SUBTREE_SIZE ||
+ env->prog->aux->has_prog_call)) {
subprog[orig_idx].priv_stack_eligible = true;
env->prog->aux->priv_stack_eligible = priv_stack_eligible = true;
}
@@ -6181,8 +6186,13 @@ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx,
}
subprog[ret_prog[j]].tail_call_reachable = true;
}
- if (!check_priv_stack && subprog[0].tail_call_reachable)
+ if (!check_priv_stack && subprog[0].tail_call_reachable) {
+ if (env->prog->aux->has_prog_call) {
+ verbose(env, "cannot do prog call and tail call in the same prog\n");
+ return -EINVAL;
+ }
env->prog->aux->tail_call_reachable = true;
+ }
/* end of for() loop means the last insn of the 'subprog'
* was reached. Doesn't matter whether it was JA or EXIT
@@ -11322,6 +11332,7 @@ enum special_kfunc_type {
KF_bpf_preempt_enable,
KF_bpf_iter_css_task_new,
KF_bpf_session_cookie,
+ KF_bpf_prog_call,
};
BTF_SET_START(special_kfunc_set)
@@ -11387,6 +11398,7 @@ BTF_ID(func, bpf_session_cookie)
#else
BTF_ID_UNUSED
#endif
+BTF_ID(func, bpf_prog_call)
static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
{
@@ -11433,6 +11445,11 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx])
return KF_ARG_PTR_TO_CTX;
+ if (meta->func_id == special_kfunc_list[KF_bpf_prog_call] && argno == 0) {
+ env->prog->aux->has_prog_call = true;
+ return KF_ARG_PTR_TO_CTX;
+ }
+
/* In this function, we verify the kfunc's BTF as per the argument type,
* leaving the rest of the verification with respect to the register
* type to our caller. When a set of conditions hold in the BTF type of
@@ -20009,6 +20026,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
struct bpf_insn *insn;
void *old_bpf_func;
int err, num_exentries;
+ int nest_level = 1;
if (env->subprog_cnt <= 1)
return 0;
@@ -20099,9 +20117,13 @@ static int jit_subprogs(struct bpf_verifier_env *env)
} else if (!subtree_stack_depth) {
func[i]->aux->priv_stack_mode = PRIV_STACK_ROOT_PROG;
} else {
+ if (env->prog->aux->has_prog_call) {
+ func[i]->aux->has_prog_call = true;
+ nest_level = BPF_MAX_PRIV_STACK_NEST_LEVEL;
+ }
func[i]->aux->priv_stack_mode = PRIV_STACK_ROOT_PROG;
- priv_stack_ptr =
- __alloc_percpu_gfp(subtree_stack_depth, 8, GFP_KERNEL);
+ priv_stack_ptr = __alloc_percpu_gfp(
+ subtree_stack_depth * nest_level, 8, GFP_KERNEL);
if (!priv_stack_ptr) {
err = -ENOMEM;
goto out_free;
--
2.43.5
next prev parent reply other threads:[~2024-10-10 17:56 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-10-10 17:55 [PATCH bpf-next v4 00/10] bpf: Support private stack for bpf progs Yonghong Song
2024-10-10 17:55 ` [PATCH bpf-next v4 01/10] bpf: Allow each subprog having stack size of 512 bytes Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 02/10] bpf: Mark each subprog with proper private stack modes Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 03/10] bpf, x86: Refactor func emit_prologue Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 04/10] bpf, x86: Create a helper for certain "reg <op>= imm" operations Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 05/10] bpf, x86: Add jit support for private stack Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 06/10] selftests/bpf: Add private stack tests Yonghong Song
2024-10-10 17:56 ` Yonghong Song [this message]
2024-10-10 20:28 ` [PATCH bpf-next v4 07/10] bpf: Support calling non-tailcall bpf prog Alexei Starovoitov
2024-10-11 4:12 ` Yonghong Song
2024-10-15 21:18 ` Tejun Heo
2024-10-15 21:35 ` Alexei Starovoitov
2024-10-10 17:56 ` [PATCH bpf-next v4 08/10] bpf, x86: Create two helpers for some arith operations Yonghong Song
2024-10-10 20:21 ` Alexei Starovoitov
2024-10-11 4:16 ` Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 09/10] bpf, x86: Jit support for nested bpf_prog_call Yonghong Song
2024-10-10 20:53 ` Alexei Starovoitov
2024-10-11 4:20 ` Yonghong Song
2024-10-11 4:29 ` Alexei Starovoitov
2024-10-11 15:38 ` Yonghong Song
2024-10-11 15:40 ` Alexei Starovoitov
2024-10-11 16:14 ` Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 10/10] selftests/bpf: Add tests for bpf_prog_call() Yonghong Song
2024-10-15 21:28 ` [PATCH bpf-next v4 00/10] bpf: Support private stack for bpf progs Tejun Heo
2024-10-15 21:39 ` Alexei Starovoitov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241010175628.1898648-1-yonghong.song@linux.dev \
--to=yonghong.song@linux.dev \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=kernel-team@fb.com \
--cc=martin.lau@kernel.org \
--cc=tj@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox