BPF List
 help / color / mirror / Atom feed
From: Yonghong Song <yonghong.song@linux.dev>
To: bpf@vger.kernel.org
Cc: Alexei Starovoitov <ast@kernel.org>,
	Andrii Nakryiko <andrii@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	kernel-team@fb.com, Martin KaFai Lau <martin.lau@kernel.org>,
	Tejun Heo <tj@kernel.org>
Subject: [PATCH bpf-next v4 01/10] bpf: Allow each subprog having stack size of 512 bytes
Date: Thu, 10 Oct 2024 10:55:57 -0700	[thread overview]
Message-ID: <20241010175557.1896301-1-yonghong.song@linux.dev> (raw)
In-Reply-To: <20241010175552.1895980-1-yonghong.song@linux.dev>

With private stack support, each subprog can have stack with up to 512
bytes. The limit of 512 bytes per subprog is kept to avoid increasing
verifier complexity since greater than 512 bytes will cause big verifier
change and increase memory consumption and verification time.

If private stack is supported, for a bpf prog, esp. when it has
subprogs, private stack will be allocated for the main prog
and for each callback subprog. For example,
  main_prog
    subprog1
      calling helper
        subprog10 (callback func)
          subprog11
    subprog2
      calling helper
        subprog10 (callback func)
          subprog11

Separate private allocations for main_prog and callback_fn subprog10
will make things easier since the helper function uses the kernel stack.

Additional subprog info is also collected for later to
allocate private stack for main prog and each callback functions.

Note that if any tail_call is called in the prog (including all subprogs),
then private stack is not used.

Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
---
 include/linux/bpf.h          |  1 +
 include/linux/bpf_verifier.h |  3 ++
 include/linux/filter.h       |  1 +
 kernel/bpf/core.c            |  5 ++
 kernel/bpf/verifier.c        | 94 +++++++++++++++++++++++++++++++-----
 5 files changed, 91 insertions(+), 13 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 19d8ca8ac960..9ef9133e0470 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1483,6 +1483,7 @@ struct bpf_prog_aux {
 	bool xdp_has_frags;
 	bool exception_cb;
 	bool exception_boundary;
+	bool priv_stack_eligible;
 	struct bpf_arena *arena;
 	/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
 	const struct btf_type *attach_func_proto;
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 4513372c5bc8..bcfe868e3801 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -659,6 +659,8 @@ struct bpf_subprog_info {
 	 * are used for bpf_fastcall spills and fills.
 	 */
 	s16 fastcall_stack_off;
+	u16 subtree_stack_depth;
+	u16 subtree_top_idx;
 	bool has_tail_call: 1;
 	bool tail_call_reachable: 1;
 	bool has_ld_abs: 1;
@@ -668,6 +670,7 @@ struct bpf_subprog_info {
 	bool args_cached: 1;
 	/* true if bpf_fastcall stack region is used by functions that can't be inlined */
 	bool keep_fastcall_stack: 1;
+	bool priv_stack_eligible: 1;
 
 	u8 arg_cnt;
 	struct bpf_subprog_arg_info args[MAX_BPF_FUNC_REG_ARGS];
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 7d7578a8eac1..3a21947f2fd4 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1119,6 +1119,7 @@ bool bpf_jit_supports_exceptions(void);
 bool bpf_jit_supports_ptr_xchg(void);
 bool bpf_jit_supports_arena(void);
 bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena);
+bool bpf_jit_supports_private_stack(void);
 u64 bpf_arch_uaddress_limit(void);
 void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie);
 bool bpf_helper_changes_pkt_data(void *func);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 5e77c58e0601..ba088b58746f 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -3044,6 +3044,11 @@ bool __weak bpf_jit_supports_exceptions(void)
 	return false;
 }
 
+bool __weak bpf_jit_supports_private_stack(void)
+{
+	return false;
+}
+
 void __weak arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie)
 {
 }
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 7d9b38ffd220..3972606f97d2 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -194,6 +194,8 @@ struct bpf_verifier_stack_elem {
 
 #define BPF_GLOBAL_PERCPU_MA_MAX_SIZE  512
 
+#define BPF_PRIV_STACK_MIN_SUBTREE_SIZE	128
+
 static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx);
 static int release_reference(struct bpf_verifier_env *env, int ref_obj_id);
 static void invalidate_non_owning_refs(struct bpf_verifier_env *env);
@@ -5982,6 +5984,41 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
 					   strict);
 }
 
+static bool bpf_enable_private_stack(struct bpf_prog *prog)
+{
+	if (!bpf_jit_supports_private_stack())
+		return false;
+
+	switch (prog->aux->prog->type) {
+	case BPF_PROG_TYPE_KPROBE:
+	case BPF_PROG_TYPE_TRACEPOINT:
+	case BPF_PROG_TYPE_PERF_EVENT:
+	case BPF_PROG_TYPE_RAW_TRACEPOINT:
+		return true;
+	case BPF_PROG_TYPE_TRACING:
+		if (prog->expected_attach_type != BPF_TRACE_ITER)
+			return true;
+		fallthrough;
+	default:
+		return false;
+	}
+}
+
+static bool is_priv_stack_supported(struct bpf_verifier_env *env)
+{
+	struct bpf_subprog_info *si = env->subprog_info;
+	bool has_tail_call = false;
+
+	for (int i = 0; i < env->subprog_cnt; i++) {
+		if (si[i].has_tail_call) {
+			has_tail_call = true;
+			break;
+		}
+	}
+
+	return !has_tail_call && bpf_enable_private_stack(env->prog);
+}
+
 static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth)
 {
 	if (env->prog->jit_requested)
@@ -5999,16 +6036,21 @@ static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth)
  * Since recursion is prevented by check_cfg() this algorithm
  * only needs a local stack of MAX_CALL_FRAMES to remember callsites
  */
-static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx)
+static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx,
+					 bool check_priv_stack, bool priv_stack_supported)
 {
 	struct bpf_subprog_info *subprog = env->subprog_info;
 	struct bpf_insn *insn = env->prog->insnsi;
 	int depth = 0, frame = 0, i, subprog_end;
 	bool tail_call_reachable = false;
+	bool priv_stack_eligible = false;
 	int ret_insn[MAX_CALL_FRAMES];
 	int ret_prog[MAX_CALL_FRAMES];
-	int j;
+	int j, subprog_stack_depth;
+	int orig_idx = idx;
 
+	if (check_priv_stack)
+		subprog[idx].subtree_top_idx = idx;
 	i = subprog[idx].start;
 process_func:
 	/* protect against potential stack overflow that might happen when
@@ -6030,18 +6072,33 @@ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx)
 	 * tailcall will unwind the current stack frame but it will not get rid
 	 * of caller's stack as shown on the example above.
 	 */
-	if (idx && subprog[idx].has_tail_call && depth >= 256) {
+	if (!check_priv_stack && idx && subprog[idx].has_tail_call && depth >= 256) {
 		verbose(env,
 			"tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
 			depth);
 		return -EACCES;
 	}
-	depth += round_up_stack_depth(env, subprog[idx].stack_depth);
-	if (depth > MAX_BPF_STACK) {
+	subprog_stack_depth = round_up_stack_depth(env, subprog[idx].stack_depth);
+	depth += subprog_stack_depth;
+	if (!check_priv_stack && !priv_stack_supported && depth > MAX_BPF_STACK) {
 		verbose(env, "combined stack size of %d calls is %d. Too large\n",
 			frame + 1, depth);
 		return -EACCES;
 	}
+	if (check_priv_stack) {
+		if (subprog_stack_depth > MAX_BPF_STACK) {
+			verbose(env, "stack size of subprog %d is %d. Too large\n",
+				idx, subprog_stack_depth);
+			return -EACCES;
+		}
+
+		if (!priv_stack_eligible && depth >= BPF_PRIV_STACK_MIN_SUBTREE_SIZE) {
+			subprog[orig_idx].priv_stack_eligible = true;
+			env->prog->aux->priv_stack_eligible = priv_stack_eligible = true;
+		}
+		subprog[orig_idx].subtree_stack_depth =
+			max_t(u16, subprog[orig_idx].subtree_stack_depth, depth);
+	}
 continue_func:
 	subprog_end = subprog[idx + 1].start;
 	for (; i < subprog_end; i++) {
@@ -6097,8 +6154,10 @@ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx)
 		}
 		i = next_insn;
 		idx = sidx;
+		if (check_priv_stack)
+			subprog[idx].subtree_top_idx = orig_idx;
 
-		if (subprog[idx].has_tail_call)
+		if (!check_priv_stack && subprog[idx].has_tail_call)
 			tail_call_reachable = true;
 
 		frame++;
@@ -6122,7 +6181,7 @@ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx)
 			}
 			subprog[ret_prog[j]].tail_call_reachable = true;
 		}
-	if (subprog[0].tail_call_reachable)
+	if (!check_priv_stack && subprog[0].tail_call_reachable)
 		env->prog->aux->tail_call_reachable = true;
 
 	/* end of for() loop means the last insn of the 'subprog'
@@ -6137,14 +6196,18 @@ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx)
 	goto continue_func;
 }
 
-static int check_max_stack_depth(struct bpf_verifier_env *env)
+static int check_max_stack_depth(struct bpf_verifier_env *env, bool check_priv_stack,
+				 bool priv_stack_supported)
 {
 	struct bpf_subprog_info *si = env->subprog_info;
+	bool check_subprog;
 	int ret;
 
 	for (int i = 0; i < env->subprog_cnt; i++) {
-		if (!i || si[i].is_async_cb) {
-			ret = check_max_stack_depth_subprog(env, i);
+		check_subprog = !i || (check_priv_stack ? si[i].is_cb : si[i].is_async_cb);
+		if (check_subprog) {
+			ret = check_max_stack_depth_subprog(env, i, check_priv_stack,
+							    priv_stack_supported);
 			if (ret < 0)
 				return ret;
 		}
@@ -22298,7 +22361,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 	struct bpf_verifier_env *env;
 	int i, len, ret = -EINVAL, err;
 	u32 log_true_size;
-	bool is_priv;
+	bool is_priv, priv_stack_supported = false;
 
 	/* no program is valid */
 	if (ARRAY_SIZE(bpf_verifier_ops) == 0)
@@ -22425,8 +22488,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 	if (ret == 0)
 		ret = remove_fastcall_spills_fills(env);
 
-	if (ret == 0)
-		ret = check_max_stack_depth(env);
+	if (ret == 0) {
+		priv_stack_supported = is_priv_stack_supported(env);
+		ret = check_max_stack_depth(env, false, priv_stack_supported);
+	}
 
 	/* instruction rewrites happen after this point */
 	if (ret == 0)
@@ -22460,6 +22525,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 								     : false;
 	}
 
+	if (ret == 0 && priv_stack_supported)
+		ret = check_max_stack_depth(env, true, true);
+
 	if (ret == 0)
 		ret = fixup_call_args(env);
 
-- 
2.43.5


  reply	other threads:[~2024-10-10 17:56 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-10-10 17:55 [PATCH bpf-next v4 00/10] bpf: Support private stack for bpf progs Yonghong Song
2024-10-10 17:55 ` Yonghong Song [this message]
2024-10-10 17:56 ` [PATCH bpf-next v4 02/10] bpf: Mark each subprog with proper private stack modes Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 03/10] bpf, x86: Refactor func emit_prologue Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 04/10] bpf, x86: Create a helper for certain "reg <op>= imm" operations Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 05/10] bpf, x86: Add jit support for private stack Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 06/10] selftests/bpf: Add private stack tests Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 07/10] bpf: Support calling non-tailcall bpf prog Yonghong Song
2024-10-10 20:28   ` Alexei Starovoitov
2024-10-11  4:12     ` Yonghong Song
2024-10-15 21:18       ` Tejun Heo
2024-10-15 21:35         ` Alexei Starovoitov
2024-10-10 17:56 ` [PATCH bpf-next v4 08/10] bpf, x86: Create two helpers for some arith operations Yonghong Song
2024-10-10 20:21   ` Alexei Starovoitov
2024-10-11  4:16     ` Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 09/10] bpf, x86: Jit support for nested bpf_prog_call Yonghong Song
2024-10-10 20:53   ` Alexei Starovoitov
2024-10-11  4:20     ` Yonghong Song
2024-10-11  4:29       ` Alexei Starovoitov
2024-10-11 15:38         ` Yonghong Song
2024-10-11 15:40           ` Alexei Starovoitov
2024-10-11 16:14             ` Yonghong Song
2024-10-10 17:56 ` [PATCH bpf-next v4 10/10] selftests/bpf: Add tests for bpf_prog_call() Yonghong Song
2024-10-15 21:28 ` [PATCH bpf-next v4 00/10] bpf: Support private stack for bpf progs Tejun Heo
2024-10-15 21:39   ` Alexei Starovoitov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241010175557.1896301-1-yonghong.song@linux.dev \
    --to=yonghong.song@linux.dev \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=kernel-team@fb.com \
    --cc=martin.lau@kernel.org \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox