BPF List
 help / color / mirror / Atom feed
From: Yonghong Song <yonghong.song@linux.dev>
To: bpf@vger.kernel.org
Cc: Alexei Starovoitov <ast@kernel.org>,
	Andrii Nakryiko <andrii@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	kernel-team@fb.com, Martin KaFai Lau <martin.lau@kernel.org>,
	Tejun Heo <tj@kernel.org>
Subject: [PATCH bpf-next v4 07/10] bpf: Support calling non-tailcall bpf prog
Date: Thu, 10 Oct 2024 10:56:28 -0700	[thread overview]
Message-ID: <20241010175628.1898648-1-yonghong.song@linux.dev> (raw)
In-Reply-To: <20241010175552.1895980-1-yonghong.song@linux.dev>

A kfunc bpf_prog_call() is introduced such that it can call another bpf
prog within a bpf prog. It has the same parameters as bpf_tail_call()
but acts like a normal function call.

But bpf_prog_call() could recurse to the caller prog itself. So if a bpf
prog calls bpf_prog_call(), that bpf prog will use private stacks with
maximum recursion level 4. The 4 level recursion should work for most
cases.

bpf_prog_call() cannot be used if tail_call exists in the same prog
since tail_call does not use private stack. If both prog_call and
tail_call in the same prog, verification will fail.

Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
---
 include/linux/bpf.h   |  2 ++
 kernel/bpf/core.c     |  7 +++++--
 kernel/bpf/helpers.c  | 20 ++++++++++++++++++++
 kernel/bpf/verifier.c | 30 ++++++++++++++++++++++++++----
 4 files changed, 53 insertions(+), 6 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f22ddb423fd0..952cb398eb30 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1493,6 +1493,7 @@ struct bpf_prog_aux {
 	bool exception_cb;
 	bool exception_boundary;
 	bool priv_stack_eligible;
+	bool has_prog_call;
 	struct bpf_arena *arena;
 	/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
 	const struct btf_type *attach_func_proto;
@@ -1929,6 +1930,7 @@ struct bpf_array {
 
 #define BPF_COMPLEXITY_LIMIT_INSNS      1000000 /* yes. 1M insns */
 #define MAX_TAIL_CALL_CNT 33
+#define BPF_MAX_PRIV_STACK_NEST_LEVEL	4
 
 /* Maximum number of loops for bpf_loop and bpf_iter_num.
  * It's enum to expose it (and thus make it discoverable) through BTF.
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index f79d951a061f..0d2c97f63ecf 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2426,10 +2426,13 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
 				fp->aux->priv_stack_mode = NO_PRIV_STACK;
 			} else {
 				void __percpu *priv_stack_ptr;
+				int nest_level = 1;
 
+				if (fp->aux->has_prog_call)
+					nest_level = BPF_MAX_PRIV_STACK_NEST_LEVEL;
 				fp->aux->priv_stack_mode = PRIV_STACK_ROOT_PROG;
-				priv_stack_ptr =
-					__alloc_percpu_gfp(fp->aux->stack_depth, 8, GFP_KERNEL);
+				priv_stack_ptr = __alloc_percpu_gfp(
+					fp->aux->stack_depth * nest_level, 8, GFP_KERNEL);
 				if (!priv_stack_ptr) {
 					*err = -ENOMEM;
 					return fp;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 4053f279ed4c..9cc880dc213e 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2749,6 +2749,25 @@ __bpf_kfunc void bpf_rcu_read_unlock(void)
 	rcu_read_unlock();
 }
 
+__bpf_kfunc int bpf_prog_call(void *ctx, struct bpf_map *p__map, u32 index)
+{
+	struct bpf_array *array;
+	struct bpf_prog *prog;
+
+	if (p__map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
+		return -EINVAL;
+
+	array = container_of(p__map, struct bpf_array, map);
+	if (unlikely(index >= array->map.max_entries))
+		return -E2BIG;
+
+	prog = READ_ONCE(array->ptrs[index]);
+	if (!prog)
+		return -ENOENT;
+
+	return bpf_prog_run(prog, ctx);
+}
+
 struct bpf_throw_ctx {
 	struct bpf_prog_aux *aux;
 	u64 sp;
@@ -3035,6 +3054,7 @@ BTF_ID_FLAGS(func, bpf_task_get_cgroup1, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
 #endif
 BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_throw)
+BTF_ID_FLAGS(func, bpf_prog_call)
 BTF_KFUNCS_END(generic_btf_ids)
 
 static const struct btf_kfunc_id_set generic_kfunc_set = {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 46b0c277c6a8..e3d9820618a1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5986,6 +5986,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
 
 static bool bpf_enable_private_stack(struct bpf_prog *prog)
 {
+	if (prog->aux->has_prog_call)
+		return true;
+
 	if (!bpf_jit_supports_private_stack())
 		return false;
 
@@ -6092,7 +6095,9 @@ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx,
 			return -EACCES;
 		}
 
-		if (!priv_stack_eligible && depth >= BPF_PRIV_STACK_MIN_SUBTREE_SIZE) {
+		if (!priv_stack_eligible &&
+		    (depth >= BPF_PRIV_STACK_MIN_SUBTREE_SIZE ||
+		     env->prog->aux->has_prog_call)) {
 			subprog[orig_idx].priv_stack_eligible = true;
 			env->prog->aux->priv_stack_eligible = priv_stack_eligible = true;
 		}
@@ -6181,8 +6186,13 @@ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx,
 			}
 			subprog[ret_prog[j]].tail_call_reachable = true;
 		}
-	if (!check_priv_stack && subprog[0].tail_call_reachable)
+	if (!check_priv_stack && subprog[0].tail_call_reachable) {
+		if (env->prog->aux->has_prog_call) {
+			verbose(env, "cannot do prog call and tail call in the same prog\n");
+			return -EINVAL;
+		}
 		env->prog->aux->tail_call_reachable = true;
+	}
 
 	/* end of for() loop means the last insn of the 'subprog'
 	 * was reached. Doesn't matter whether it was JA or EXIT
@@ -11322,6 +11332,7 @@ enum special_kfunc_type {
 	KF_bpf_preempt_enable,
 	KF_bpf_iter_css_task_new,
 	KF_bpf_session_cookie,
+	KF_bpf_prog_call,
 };
 
 BTF_SET_START(special_kfunc_set)
@@ -11387,6 +11398,7 @@ BTF_ID(func, bpf_session_cookie)
 #else
 BTF_ID_UNUSED
 #endif
+BTF_ID(func, bpf_prog_call)
 
 static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
 {
@@ -11433,6 +11445,11 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
 	if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx])
 		return KF_ARG_PTR_TO_CTX;
 
+	if (meta->func_id == special_kfunc_list[KF_bpf_prog_call] && argno == 0) {
+		env->prog->aux->has_prog_call = true;
+		return KF_ARG_PTR_TO_CTX;
+	}
+
 	/* In this function, we verify the kfunc's BTF as per the argument type,
 	 * leaving the rest of the verification with respect to the register
 	 * type to our caller. When a set of conditions hold in the BTF type of
@@ -20009,6 +20026,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 	struct bpf_insn *insn;
 	void *old_bpf_func;
 	int err, num_exentries;
+	int nest_level = 1;
 
 	if (env->subprog_cnt <= 1)
 		return 0;
@@ -20099,9 +20117,13 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 			} else if (!subtree_stack_depth) {
 				func[i]->aux->priv_stack_mode = PRIV_STACK_ROOT_PROG;
 			} else {
+				if (env->prog->aux->has_prog_call) {
+					func[i]->aux->has_prog_call = true;
+					nest_level = BPF_MAX_PRIV_STACK_NEST_LEVEL;
+				}
 				func[i]->aux->priv_stack_mode = PRIV_STACK_ROOT_PROG;
-				priv_stack_ptr =
-					__alloc_percpu_gfp(subtree_stack_depth, 8, GFP_KERNEL);
+				priv_stack_ptr = __alloc_percpu_gfp(
+					subtree_stack_depth * nest_level, 8, GFP_KERNEL);
 				if (!priv_stack_ptr) {
 					err = -ENOMEM;
 					goto out_free;
-- 
2.43.5


  parent reply	other threads:[~2024-10-10 17:56 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-10-10 17:55 [PATCH bpf-next v4 00/10] bpf: Support private stack for bpf progs Yonghong Song
2024-10-10 17:55 ` [PATCH bpf-next v4 01/10] bpf: Allow each subprog having stack size of 512 bytes Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 02/10] bpf: Mark each subprog with proper private stack modes Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 03/10] bpf, x86: Refactor func emit_prologue Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 04/10] bpf, x86: Create a helper for certain "reg <op>= imm" operations Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 05/10] bpf, x86: Add jit support for private stack Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 06/10] selftests/bpf: Add private stack tests Yonghong Song
2024-10-10 17:56 ` Yonghong Song [this message]
2024-10-10 20:28   ` [PATCH bpf-next v4 07/10] bpf: Support calling non-tailcall bpf prog Alexei Starovoitov
2024-10-11  4:12     ` Yonghong Song
2024-10-15 21:18       ` Tejun Heo
2024-10-15 21:35         ` Alexei Starovoitov
2024-10-10 17:56 ` [PATCH bpf-next v4 08/10] bpf, x86: Create two helpers for some arith operations Yonghong Song
2024-10-10 20:21   ` Alexei Starovoitov
2024-10-11  4:16     ` Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 09/10] bpf, x86: Jit support for nested bpf_prog_call Yonghong Song
2024-10-10 20:53   ` Alexei Starovoitov
2024-10-11  4:20     ` Yonghong Song
2024-10-11  4:29       ` Alexei Starovoitov
2024-10-11 15:38         ` Yonghong Song
2024-10-11 15:40           ` Alexei Starovoitov
2024-10-11 16:14             ` Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 10/10] selftests/bpf: Add tests for bpf_prog_call() Yonghong Song
2024-10-15 21:28 ` [PATCH bpf-next v4 00/10] bpf: Support private stack for bpf progs Tejun Heo
2024-10-15 21:39   ` Alexei Starovoitov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241010175628.1898648-1-yonghong.song@linux.dev \
    --to=yonghong.song@linux.dev \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=kernel-team@fb.com \
    --cc=martin.lau@kernel.org \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox