From: Yonghong Song <yonghong.song@linux.dev>
To: bpf@vger.kernel.org
Cc: Alexei Starovoitov <ast@kernel.org>,
Andrii Nakryiko <andrii@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
kernel-team@fb.com, Martin KaFai Lau <martin.lau@kernel.org>
Subject: [PATCH bpf-next v3 2/5] bpf: Collect stack depth information
Date: Thu, 26 Sep 2024 16:45:16 -0700 [thread overview]
Message-ID: <20240926234516.1770154-1-yonghong.song@linux.dev> (raw)
In-Reply-To: <20240926234506.1769256-1-yonghong.song@linux.dev>
Private stack memory allocation is based on call subtrees. For example,
main_prog // stack size 50
subprog1 // stack size 50
subprog2 // stack size 50
subprog3 // stack size 50
Overall allocation size should be 150 bytes (stacks from main_prog,
subprog1 and subprog2).
To simplify jit, the root of subtrees is either the main prog
or any callback func. For example,
main_prog
subprog1 // callback subprog10
...
subprog10
subprog11
In this case, two subtrees exist. One root is main_prog and the other
root is subprog10.
The private stack is used only if
- the subtree stack size is greater than 128 bytes and
smaller than or equal to U16_MAX, and
- the prog type is kprobe, tracepoint, perf_event, raw_tracepoint
and tracing, and
- jit supports private stack, and
- no tail call in the main prog and all subprogs
The restriction of no tail call is due to the following two reasons:
- to avoid potential large memory consumption. Currently maximum tail
call count is MAX_TAIL_CALL_CNT=33. Considering private stack memory
allocation is per-cpu based. It will be a very large memory consumption
to support current MAX_TAIL_CALL_CNT.
- if the tailcall in the callback function, it is not easy to pass
the tail call cnt to the callback function and the tail call cnt
is needed to find proper offset for private stack.
So to avoid complexity, private stack does not support tail call
for now.
Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
---
include/linux/bpf.h | 3 +-
include/linux/bpf_verifier.h | 3 ++
kernel/bpf/verifier.c | 81 ++++++++++++++++++++++++++++++++++++
3 files changed, 86 insertions(+), 1 deletion(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 62909fbe9e48..156b9516d9f6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1566,7 +1566,8 @@ struct bpf_prog {
call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */
call_get_func_ip:1, /* Do we call get_func_ip() */
tstamp_type_access:1, /* Accessed __sk_buff->tstamp_type */
- sleepable:1; /* BPF program is sleepable */
+ sleepable:1, /* BPF program is sleepable */
+ pstack_eligible:1; /* Candidate for private stacks */
enum bpf_prog_type type; /* Type of BPF program */
enum bpf_attach_type expected_attach_type; /* For some prog types */
u32 len; /* Number of filter blocks */
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 4513372c5bc8..63df10f4129e 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -659,6 +659,8 @@ struct bpf_subprog_info {
* are used for bpf_fastcall spills and fills.
*/
s16 fastcall_stack_off;
+ u16 subtree_stack_depth;
+ u16 subtree_top_idx;
bool has_tail_call: 1;
bool tail_call_reachable: 1;
bool has_ld_abs: 1;
@@ -668,6 +670,7 @@ struct bpf_subprog_info {
bool args_cached: 1;
/* true if bpf_fastcall stack region is used by functions that can't be inlined */
bool keep_fastcall_stack: 1;
+ bool pstack_eligible:1;
u8 arg_cnt;
struct bpf_subprog_arg_info args[MAX_BPF_FUNC_REG_ARGS];
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 97700e32e085..69e17cb22037 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -194,6 +194,8 @@ struct bpf_verifier_stack_elem {
#define BPF_GLOBAL_PERCPU_MA_MAX_SIZE 512
+#define BPF_PSTACK_MIN_SUBTREE_SIZE 128
+
static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx);
static int release_reference(struct bpf_verifier_env *env, int ref_obj_id);
static void invalidate_non_owning_refs(struct bpf_verifier_env *env);
@@ -6192,6 +6194,82 @@ static int check_max_stack_depth(struct bpf_verifier_env *env)
return 0;
}
+static int calc_private_stack_alloc_subprog(struct bpf_verifier_env *env, int idx)
+{
+ struct bpf_subprog_info *subprog = env->subprog_info;
+ struct bpf_insn *insn = env->prog->insnsi;
+ int depth = 0, frame = 0, i, subprog_end;
+ int ret_insn[MAX_CALL_FRAMES];
+ int ret_prog[MAX_CALL_FRAMES];
+ int ps_eligible = 0;
+ int orig_idx = idx;
+
+ subprog[idx].subtree_top_idx = idx;
+ i = subprog[idx].start;
+
+process_func:
+ depth += round_up_stack_depth(env, subprog[idx].stack_depth);
+ if (depth > U16_MAX)
+ return -EACCES;
+
+ if (!ps_eligible && depth >= BPF_PSTACK_MIN_SUBTREE_SIZE) {
+ subprog[orig_idx].pstack_eligible = true;
+ ps_eligible = true;
+ }
+ subprog[orig_idx].subtree_stack_depth =
+ max_t(u16, subprog[orig_idx].subtree_stack_depth, depth);
+
+continue_func:
+ subprog_end = subprog[idx + 1].start;
+ for (; i < subprog_end; i++) {
+ int next_insn, sidx;
+
+ if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
+ continue;
+ /* remember insn and function to return to */
+ ret_insn[frame] = i + 1;
+ ret_prog[frame] = idx;
+
+ /* find the callee */
+ next_insn = i + insn[i].imm + 1;
+ sidx = find_subprog(env, next_insn);
+ if (subprog[sidx].is_cb) {
+ if (!bpf_pseudo_call(insn + i))
+ continue;
+ }
+ i = next_insn;
+ idx = sidx;
+ subprog[idx].subtree_top_idx = orig_idx;
+
+ frame++;
+ goto process_func;
+ }
+ if (frame == 0)
+ return ps_eligible;
+ depth -= round_up_stack_depth(env, subprog[idx].stack_depth);
+ frame--;
+ i = ret_insn[frame];
+ idx = ret_prog[frame];
+ goto continue_func;
+}
+
+static int calc_private_stack_alloc_size(struct bpf_verifier_env *env)
+{
+ struct bpf_subprog_info *si = env->subprog_info;
+ int ret;
+
+ for (int i = 0; i < env->subprog_cnt; i++) {
+ if (!i || si[i].is_cb) {
+ ret = calc_private_stack_alloc_subprog(env, i);
+ if (ret < 0)
+ return ret;
+ if (ret)
+ env->prog->pstack_eligible = true;
+ }
+ }
+ return 0;
+}
+
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
static int get_callee_stack_depth(struct bpf_verifier_env *env,
const struct bpf_insn *insn, int idx)
@@ -22502,6 +22580,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
: false;
}
+ if (ret == 0 && env->prog->aux->pstack_enabled)
+ ret = calc_private_stack_alloc_size(env);
+
if (ret == 0)
ret = fixup_call_args(env);
--
2.43.5
next prev parent reply other threads:[~2024-09-26 23:45 UTC|newest]
Thread overview: 54+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-09-26 23:45 [PATCH bpf-next v3 0/5] bpf: Support private stack for bpf progs Yonghong Song
2024-09-26 23:45 ` [PATCH bpf-next v3 1/5] bpf: Allow each subprog having stack size of 512 bytes Yonghong Song
2024-09-26 23:45 ` Yonghong Song [this message]
2024-09-30 14:42 ` [PATCH bpf-next v3 2/5] bpf: Collect stack depth information Alexei Starovoitov
2024-09-30 16:23 ` Yonghong Song
2024-09-26 23:45 ` [PATCH bpf-next v3 3/5] bpf: Mark each subprog with proper pstack states Yonghong Song
2024-09-30 14:49 ` Alexei Starovoitov
2024-09-30 16:26 ` Yonghong Song
2024-09-26 23:45 ` [PATCH bpf-next v3 4/5] bpf, x86: Add jit support for private stack Yonghong Song
2024-09-27 4:58 ` Leon Hwang
2024-09-27 15:24 ` Yonghong Song
2024-09-29 8:31 ` kernel test robot
2024-09-30 16:29 ` Yonghong Song
2024-09-29 13:02 ` kernel test robot
2024-09-30 16:31 ` Yonghong Song
2024-09-29 13:34 ` kernel test robot
2024-09-30 15:03 ` Alexei Starovoitov
2024-09-30 16:33 ` Yonghong Song
2024-10-01 4:31 ` Kumar Kartikeya Dwivedi
2024-10-01 4:37 ` Kumar Kartikeya Dwivedi
2024-10-01 18:49 ` Alexei Starovoitov
2024-10-01 19:53 ` yet another approach Was: " Alexei Starovoitov
2024-10-01 20:50 ` Kumar Kartikeya Dwivedi
2024-10-01 21:28 ` Alexei Starovoitov
2024-10-02 0:22 ` Kumar Kartikeya Dwivedi
2024-10-02 1:26 ` Alexei Starovoitov
2024-10-02 2:16 ` Kumar Kartikeya Dwivedi
2024-10-02 6:28 ` Yonghong Song
2024-10-02 6:48 ` Yonghong Song
2024-10-03 6:17 ` Yonghong Song
2024-10-03 13:39 ` Kumar Kartikeya Dwivedi
2024-10-03 17:35 ` Alexei Starovoitov
2024-10-03 18:53 ` Yonghong Song
2024-10-03 20:44 ` Yonghong Song
2024-10-03 20:47 ` Kumar Kartikeya Dwivedi
2024-10-03 20:54 ` Yonghong Song
2024-10-03 22:32 ` Alexei Starovoitov
2024-10-04 5:22 ` Yonghong Song
2024-10-04 19:27 ` Yonghong Song
2024-10-04 19:52 ` Alexei Starovoitov
2024-10-05 2:03 ` Yonghong Song
2024-10-08 22:10 ` Alexei Starovoitov
2024-10-09 2:06 ` Alexei Starovoitov
2024-10-09 6:31 ` Yonghong Song
2024-10-09 14:56 ` Alexei Starovoitov
2024-10-09 15:56 ` Yonghong Song
2024-10-09 16:36 ` Kumar Kartikeya Dwivedi
2024-10-09 16:38 ` Kumar Kartikeya Dwivedi
2024-10-09 17:37 ` Kumar Kartikeya Dwivedi
2024-10-09 6:12 ` Yonghong Song
2024-09-26 23:45 ` [PATCH bpf-next v3 5/5] selftests/bpf: Add private stack tests Yonghong Song
2024-09-30 13:40 ` Jiri Olsa
2024-09-30 15:05 ` Alexei Starovoitov
2024-09-30 16:35 ` Yonghong Song
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240926234516.1770154-1-yonghong.song@linux.dev \
--to=yonghong.song@linux.dev \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=kernel-team@fb.com \
--cc=martin.lau@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox