From: Yonghong Song <yonghong.song@linux.dev>
To: bpf@vger.kernel.org
Cc: Alexei Starovoitov <ast@kernel.org>,
Andrii Nakryiko <andrii@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
kernel-team@fb.com, Martin KaFai Lau <martin.lau@kernel.org>,
Tejun Heo <tj@kernel.org>
Subject: [PATCH bpf-next v4 07/10] bpf: Support calling non-tailcall bpf prog
Date: Thu, 10 Oct 2024 10:56:28 -0700 [thread overview]
Message-ID: <20241010175628.1898648-1-yonghong.song@linux.dev> (raw)
In-Reply-To: <20241010175552.1895980-1-yonghong.song@linux.dev>
A kfunc bpf_prog_call() is introduced such that it can call another bpf
prog within a bpf prog. It has the same parameters as bpf_tail_call()
but acts like a normal function call.
But bpf_prog_call() could recurse to the caller prog itself. So if a bpf
prog calls bpf_prog_call(), that bpf prog will use private stacks with
maximum recursion level 4. The 4 level recursion should work for most
cases.
bpf_prog_call() cannot be used if tail_call exists in the same prog
since tail_call does not use private stack. If both prog_call and
tail_call in the same prog, verification will fail.
Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
---
include/linux/bpf.h | 2 ++
kernel/bpf/core.c | 7 +++++--
kernel/bpf/helpers.c | 20 ++++++++++++++++++++
kernel/bpf/verifier.c | 30 ++++++++++++++++++++++++++----
4 files changed, 53 insertions(+), 6 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f22ddb423fd0..952cb398eb30 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1493,6 +1493,7 @@ struct bpf_prog_aux {
bool exception_cb;
bool exception_boundary;
bool priv_stack_eligible;
+ bool has_prog_call;
struct bpf_arena *arena;
/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
const struct btf_type *attach_func_proto;
@@ -1929,6 +1930,7 @@ struct bpf_array {
#define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */
#define MAX_TAIL_CALL_CNT 33
+#define BPF_MAX_PRIV_STACK_NEST_LEVEL 4
/* Maximum number of loops for bpf_loop and bpf_iter_num.
* It's enum to expose it (and thus make it discoverable) through BTF.
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index f79d951a061f..0d2c97f63ecf 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2426,10 +2426,13 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
fp->aux->priv_stack_mode = NO_PRIV_STACK;
} else {
void __percpu *priv_stack_ptr;
+ int nest_level = 1;
+ if (fp->aux->has_prog_call)
+ nest_level = BPF_MAX_PRIV_STACK_NEST_LEVEL;
fp->aux->priv_stack_mode = PRIV_STACK_ROOT_PROG;
- priv_stack_ptr =
- __alloc_percpu_gfp(fp->aux->stack_depth, 8, GFP_KERNEL);
+ priv_stack_ptr = __alloc_percpu_gfp(
+ fp->aux->stack_depth * nest_level, 8, GFP_KERNEL);
if (!priv_stack_ptr) {
*err = -ENOMEM;
return fp;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 4053f279ed4c..9cc880dc213e 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2749,6 +2749,25 @@ __bpf_kfunc void bpf_rcu_read_unlock(void)
rcu_read_unlock();
}
+__bpf_kfunc int bpf_prog_call(void *ctx, struct bpf_map *p__map, u32 index)
+{
+ struct bpf_array *array;
+ struct bpf_prog *prog;
+
+ if (p__map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
+ return -EINVAL;
+
+ array = container_of(p__map, struct bpf_array, map);
+ if (unlikely(index >= array->map.max_entries))
+ return -E2BIG;
+
+ prog = READ_ONCE(array->ptrs[index]);
+ if (!prog)
+ return -ENOENT;
+
+ return bpf_prog_run(prog, ctx);
+}
+
struct bpf_throw_ctx {
struct bpf_prog_aux *aux;
u64 sp;
@@ -3035,6 +3054,7 @@ BTF_ID_FLAGS(func, bpf_task_get_cgroup1, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
#endif
BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_throw)
+BTF_ID_FLAGS(func, bpf_prog_call)
BTF_KFUNCS_END(generic_btf_ids)
static const struct btf_kfunc_id_set generic_kfunc_set = {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 46b0c277c6a8..e3d9820618a1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5986,6 +5986,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
static bool bpf_enable_private_stack(struct bpf_prog *prog)
{
+ if (prog->aux->has_prog_call)
+ return true;
+
if (!bpf_jit_supports_private_stack())
return false;
@@ -6092,7 +6095,9 @@ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx,
return -EACCES;
}
- if (!priv_stack_eligible && depth >= BPF_PRIV_STACK_MIN_SUBTREE_SIZE) {
+ if (!priv_stack_eligible &&
+ (depth >= BPF_PRIV_STACK_MIN_SUBTREE_SIZE ||
+ env->prog->aux->has_prog_call)) {
subprog[orig_idx].priv_stack_eligible = true;
env->prog->aux->priv_stack_eligible = priv_stack_eligible = true;
}
@@ -6181,8 +6186,13 @@ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx,
}
subprog[ret_prog[j]].tail_call_reachable = true;
}
- if (!check_priv_stack && subprog[0].tail_call_reachable)
+ if (!check_priv_stack && subprog[0].tail_call_reachable) {
+ if (env->prog->aux->has_prog_call) {
+ verbose(env, "cannot do prog call and tail call in the same prog\n");
+ return -EINVAL;
+ }
env->prog->aux->tail_call_reachable = true;
+ }
/* end of for() loop means the last insn of the 'subprog'
* was reached. Doesn't matter whether it was JA or EXIT
@@ -11322,6 +11332,7 @@ enum special_kfunc_type {
KF_bpf_preempt_enable,
KF_bpf_iter_css_task_new,
KF_bpf_session_cookie,
+ KF_bpf_prog_call,
};
BTF_SET_START(special_kfunc_set)
@@ -11387,6 +11398,7 @@ BTF_ID(func, bpf_session_cookie)
#else
BTF_ID_UNUSED
#endif
+BTF_ID(func, bpf_prog_call)
static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
{
@@ -11433,6 +11445,11 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx])
return KF_ARG_PTR_TO_CTX;
+ if (meta->func_id == special_kfunc_list[KF_bpf_prog_call] && argno == 0) {
+ env->prog->aux->has_prog_call = true;
+ return KF_ARG_PTR_TO_CTX;
+ }
+
/* In this function, we verify the kfunc's BTF as per the argument type,
* leaving the rest of the verification with respect to the register
* type to our caller. When a set of conditions hold in the BTF type of
@@ -20009,6 +20026,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
struct bpf_insn *insn;
void *old_bpf_func;
int err, num_exentries;
+ int nest_level = 1;
if (env->subprog_cnt <= 1)
return 0;
@@ -20099,9 +20117,13 @@ static int jit_subprogs(struct bpf_verifier_env *env)
} else if (!subtree_stack_depth) {
func[i]->aux->priv_stack_mode = PRIV_STACK_ROOT_PROG;
} else {
+ if (env->prog->aux->has_prog_call) {
+ func[i]->aux->has_prog_call = true;
+ nest_level = BPF_MAX_PRIV_STACK_NEST_LEVEL;
+ }
func[i]->aux->priv_stack_mode = PRIV_STACK_ROOT_PROG;
- priv_stack_ptr =
- __alloc_percpu_gfp(subtree_stack_depth, 8, GFP_KERNEL);
+ priv_stack_ptr = __alloc_percpu_gfp(
+ subtree_stack_depth * nest_level, 8, GFP_KERNEL);
if (!priv_stack_ptr) {
err = -ENOMEM;
goto out_free;
--
2.43.5
next prev parent reply other threads:[~2024-10-10 17:56 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-10-10 17:55 [PATCH bpf-next v4 00/10] bpf: Support private stack for bpf progs Yonghong Song
2024-10-10 17:55 ` [PATCH bpf-next v4 01/10] bpf: Allow each subprog having stack size of 512 bytes Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 02/10] bpf: Mark each subprog with proper private stack modes Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 03/10] bpf, x86: Refactor func emit_prologue Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 04/10] bpf, x86: Create a helper for certain "reg <op>= imm" operations Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 05/10] bpf, x86: Add jit support for private stack Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 06/10] selftests/bpf: Add private stack tests Yonghong Song
2024-10-10 17:56 ` Yonghong Song [this message]
2024-10-10 20:28 ` [PATCH bpf-next v4 07/10] bpf: Support calling non-tailcall bpf prog Alexei Starovoitov
2024-10-11 4:12 ` Yonghong Song
2024-10-15 21:18 ` Tejun Heo
2024-10-15 21:35 ` Alexei Starovoitov
2024-10-10 17:56 ` [PATCH bpf-next v4 08/10] bpf, x86: Create two helpers for some arith operations Yonghong Song
2024-10-10 20:21 ` Alexei Starovoitov
2024-10-11 4:16 ` Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 09/10] bpf, x86: Jit support for nested bpf_prog_call Yonghong Song
2024-10-10 20:53 ` Alexei Starovoitov
2024-10-11 4:20 ` Yonghong Song
2024-10-11 4:29 ` Alexei Starovoitov
2024-10-11 15:38 ` Yonghong Song
2024-10-11 15:40 ` Alexei Starovoitov
2024-10-11 16:14 ` Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 10/10] selftests/bpf: Add tests for bpf_prog_call() Yonghong Song
2024-10-15 21:28 ` [PATCH bpf-next v4 00/10] bpf: Support private stack for bpf progs Tejun Heo
2024-10-15 21:39 ` Alexei Starovoitov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241010175628.1898648-1-yonghong.song@linux.dev \
--to=yonghong.song@linux.dev \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=kernel-team@fb.com \
--cc=martin.lau@kernel.org \
--cc=tj@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.