From: Leon Hwang <hffilwlqm@gmail.com>
To: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: bpf <bpf@vger.kernel.org>, Alexei Starovoitov <ast@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
Andrii Nakryiko <andrii@kernel.org>,
"Fijalkowski, Maciej" <maciej.fijalkowski@intel.com>,
Jakub Sitnicki <jakub@cloudflare.com>,
Ilya Leoshkevich <iii@linux.ibm.com>,
Hengqi Chen <hengqi.chen@gmail.com>,
kernel-patches-bot@fb.com
Subject: Re: [PATCH bpf-next 2/4] bpf, x64: Fix tailcall hierarchy
Date: Wed, 14 Feb 2024 13:47:45 +0800 [thread overview]
Message-ID: <7af3f9c6-d25a-4ca5-9e15-c1699adcf7ab@gmail.com> (raw)
In-Reply-To: <CAADnVQJ1szry9P00wweVDu4d0AQoM_49qT-_ueirvggAiCZrpw@mail.gmail.com>
On 2024/1/5 12:15, Alexei Starovoitov wrote:
> On Thu, Jan 4, 2024 at 6:23 AM Leon Hwang <hffilwlqm@gmail.com> wrote:
>>
>>
>
> Other alternatives?
I've finish the POC of an alternative, which passed all tailcall
selftests including these tailcall hierarchy ones.
In this alternative, I use a new bpf_prog_run_ctx to wrap the original
ctx and the tcc_ptr, then get the tcc_ptr and recover the original ctx
in JIT.
Then, to avoid breaking runtime with tailcall on other arch, I add an
arch-related check bpf_jit_supports_tail_call_cnt_ptr() to determin
whether to use bpf_prog_run_ctx.
Here's the diff:
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 4065bdcc5b2a4..56cea2676863e 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -259,7 +259,7 @@ struct jit_context {
/* Number of bytes emit_patch() needs to generate instructions */
#define X86_PATCH_SIZE 5
/* Number of bytes that will be skipped on tailcall */
-#define X86_TAIL_CALL_OFFSET (22 + ENDBR_INSN_SIZE)
+#define X86_TAIL_CALL_OFFSET (16 + ENDBR_INSN_SIZE)
static void push_r12(u8 **pprog)
{
@@ -407,21 +407,19 @@ static void emit_prologue(u8 **pprog, u32
stack_depth, bool ebpf_from_cbpf,
emit_nops(&prog, X86_PATCH_SIZE);
if (!ebpf_from_cbpf) {
if (tail_call_reachable && !is_subprog) {
- /* When it's the entry of the whole tailcall context,
- * zeroing rax means initialising tail_call_cnt.
- */
- EMIT2(0x31, 0xC0); /* xor eax, eax */
- EMIT1(0x50); /* push rax */
- /* Make rax as ptr that points to tail_call_cnt. */
- EMIT3(0x48, 0x89, 0xE0); /* mov rax, rsp */
- EMIT1_off32(0xE8, 2); /* call main prog */
- EMIT1(0x59); /* pop rcx, get rid of tail_call_cnt */
- EMIT1(0xC3); /* ret */
+ /* Make rax as tcc_ptr. */
+ EMIT4(0x48, 0x8B, 0x47, 0x08); /* mov rax, qword ptr [rdi + 8] */
} else {
- /* Keep the same instruction size. */
- emit_nops(&prog, 13);
+ /* Keep the same instruction layout. */
+ emit_nops(&prog, 4);
}
}
+ if (!is_subprog)
+ /* Recover the original ctx. */
+ EMIT3(0x48, 0x8B, 0x3F); /* mov rdi, qword ptr [rdi] */
+ else
+ /* Keep the same instruction layout. */
+ emit_nops(&prog, 3);
/* Exception callback receives FP as third parameter */
if (is_exception_cb) {
EMIT3(0x48, 0x89, 0xF4); /* mov rsp, rsi */
@@ -3152,6 +3150,12 @@ bool bpf_jit_supports_subprog_tailcalls(void)
return true;
}
+/* Indicate the JIT backend supports tail call count pointer in
tailcall context. */
+bool bpf_jit_supports_tail_call_cnt_ptr(void)
+{
+ return true;
+}
+
void bpf_jit_free(struct bpf_prog *prog)
{
if (prog->jited) {
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 7671530d6e4e0..fea4326c27d31 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1919,6 +1919,11 @@ int bpf_prog_array_copy(struct bpf_prog_array
*old_array,
u64 bpf_cookie,
struct bpf_prog_array **new_array);
+struct bpf_prog_run_ctx {
+ const void *ctx;
+ u32 *tail_call_cnt;
+};
+
struct bpf_run_ctx {};
struct bpf_cg_run_ctx {
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 68fb6c8142fec..c1c035c44b4ab 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -629,6 +629,10 @@ typedef unsigned int (*bpf_dispatcher_fn)(const
void *ctx,
unsigned int (*bpf_func)(const void *,
const struct bpf_insn *));
+static __always_inline u32 __bpf_prog_run_dfunc(const struct bpf_prog
*prog,
+ const void *ctx,
+ bpf_dispatcher_fn dfunc);
+
static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog,
const void *ctx,
bpf_dispatcher_fn dfunc)
@@ -641,14 +645,14 @@ static __always_inline u32 __bpf_prog_run(const
struct bpf_prog *prog,
u64 start = sched_clock();
unsigned long flags;
- ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
+ ret = __bpf_prog_run_dfunc(prog, ctx, dfunc);
stats = this_cpu_ptr(prog->stats);
flags = u64_stats_update_begin_irqsave(&stats->syncp);
u64_stats_inc(&stats->cnt);
u64_stats_add(&stats->nsecs, sched_clock() - start);
u64_stats_update_end_irqrestore(&stats->syncp, flags);
} else {
- ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
+ ret = __bpf_prog_run_dfunc(prog, ctx, dfunc);
}
return ret;
}
@@ -952,12 +956,31 @@ struct bpf_prog *bpf_int_jit_compile(struct
bpf_prog *prog);
void bpf_jit_compile(struct bpf_prog *prog);
bool bpf_jit_needs_zext(void);
bool bpf_jit_supports_subprog_tailcalls(void);
+bool bpf_jit_supports_tail_call_cnt_ptr(void);
bool bpf_jit_supports_kfunc_call(void);
bool bpf_jit_supports_far_kfunc_call(void);
bool bpf_jit_supports_exceptions(void);
void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64
sp, u64 bp), void *cookie);
bool bpf_helper_changes_pkt_data(void *func);
+static __always_inline u32 __bpf_prog_run_dfunc(const struct bpf_prog
*prog,
+ const void *ctx,
+ bpf_dispatcher_fn dfunc)
+{
+ struct bpf_prog_run_ctx run_ctx = {};
+ u32 ret, tcc = 0;
+
+ run_ctx.ctx = ctx;
+ run_ctx.tail_call_cnt = &tcc;
+
+ if (bpf_jit_supports_tail_call_cnt_ptr() && prog->jited)
+ ret = dfunc(&run_ctx, prog->insnsi, prog->bpf_func);
+ else
+ ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
+
+ return ret;
+}
+
static inline bool bpf_dump_raw_ok(const struct cred *cred)
{
/* Reconstruction of call-sites is dependent on kallsyms,
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index ea6843be2616c..80b20e99456f0 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2915,6 +2915,15 @@ bool __weak bpf_jit_supports_subprog_tailcalls(void)
return false;
}
+/* Return TRUE if the JIT backend supports tail call count pointer in
tailcall
+ * context.
+ */
+bool __weak bpf_jit_supports_tail_call_cnt_ptr(void)
+{
+ return false;
+}
+EXPORT_SYMBOL(bpf_jit_supports_tail_call_cnt_ptr);
+
bool __weak bpf_jit_supports_kfunc_call(void)
{
return false;
Why use EXPORT_SYMBOL here?
It's to avoid the building error.
ERROR: modpost: "bpf_jit_supports_tail_call_cnt_ptr"
[net/sched/act_bpf.ko] undefined!
ERROR: modpost: "bpf_jit_supports_tail_call_cnt_ptr"
[net/sched/cls_bpf.ko] undefined!
ERROR: modpost: "bpf_jit_supports_tail_call_cnt_ptr"
[net/netfilter/xt_bpf.ko] undefined!
ERROR: modpost: "bpf_jit_supports_tail_call_cnt_ptr" [net/ipv6/ipv6.ko]
undefined!
I'm not familiar with this building error. Is it OK to use EXPORT_SYMBOL
here?
Thanks,
Leon
next prev parent reply other threads:[~2024-02-14 5:47 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-01-04 14:22 [PATCH bpf-next 0/4] bpf, x64: Fix tailcall hierarchy Leon Hwang
2024-01-04 14:22 ` [PATCH bpf-next 1/4] bpf, x64: Use emit_nops() to replace memcpy()'ing x86_nops[5] Leon Hwang
2024-01-04 14:22 ` [PATCH bpf-next 2/4] bpf, x64: Fix tailcall hierarchy Leon Hwang
2024-01-05 4:15 ` Alexei Starovoitov
2024-01-05 6:15 ` Leon Hwang
2024-01-05 17:43 ` Alexei Starovoitov
2024-01-06 2:38 ` Leon Hwang
2024-01-05 10:33 ` Leon Hwang
2024-01-05 17:47 ` Alexei Starovoitov
2024-01-06 2:33 ` Leon Hwang
2024-01-06 3:34 ` Alexei Starovoitov
2024-01-05 12:40 ` Jiri Olsa
2024-01-06 0:18 ` John Fastabend
2024-01-06 3:46 ` Alexei Starovoitov
2024-02-14 5:47 ` Leon Hwang [this message]
2024-02-14 11:25 ` Maciej Fijalkowski
2024-02-14 16:31 ` Leon Hwang
2024-02-14 23:16 ` Alexei Starovoitov
2024-02-15 13:16 ` Leon Hwang
2024-02-16 2:18 ` Alexei Starovoitov
2024-02-17 13:43 ` Leon Hwang
2024-02-20 5:13 ` Leon Hwang
2024-02-20 17:34 ` Alexei Starovoitov
2024-02-20 17:33 ` Alexei Starovoitov
2024-02-21 14:42 ` Leon Hwang
2024-01-04 14:22 ` [PATCH bpf-next 3/4] bpf, x64: Rename RESTORE_TAIL_CALL_CNT() to LOAD_TAIL_CALL_CNT_PTR() Leon Hwang
2024-01-04 14:22 ` [PATCH bpf-next 4/4] selftests/bpf: Add testcases for tailcall hierarchy fixing Leon Hwang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=7af3f9c6-d25a-4ca5-9e15-c1699adcf7ab@gmail.com \
--to=hffilwlqm@gmail.com \
--cc=alexei.starovoitov@gmail.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=hengqi.chen@gmail.com \
--cc=iii@linux.ibm.com \
--cc=jakub@cloudflare.com \
--cc=kernel-patches-bot@fb.com \
--cc=maciej.fijalkowski@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox