From: Leon Hwang <hffilwlqm@gmail.com>
To: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: bpf <bpf@vger.kernel.org>, Alexei Starovoitov <ast@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
Andrii Nakryiko <andrii@kernel.org>,
"Fijalkowski, Maciej" <maciej.fijalkowski@intel.com>,
Jakub Sitnicki <jakub@cloudflare.com>,
Ilya Leoshkevich <iii@linux.ibm.com>,
Hengqi Chen <hengqi.chen@gmail.com>,
kernel-patches-bot@fb.com
Subject: Re: [PATCH bpf-next 2/4] bpf, x64: Fix tailcall hierarchy
Date: Thu, 15 Feb 2024 21:16:37 +0800 [thread overview]
Message-ID: <81607ab3-a7f5-4ad1-98c2-771c73bfb55c@gmail.com> (raw)
In-Reply-To: <CAADnVQLOswL3BY1s0B28wRZH1PU675S6_2=XknjZKNgyJ=yDxw@mail.gmail.com>
On 2024/2/15 07:16, Alexei Starovoitov wrote:
> On Tue, Feb 13, 2024 at 9:47 PM Leon Hwang <hffilwlqm@gmail.com> wrote:
>>
>>
>>
>> On 2024/1/5 12:15, Alexei Starovoitov wrote:
>>> On Thu, Jan 4, 2024 at 6:23 AM Leon Hwang <hffilwlqm@gmail.com> wrote:
>>>>
>>>>
>>>
>>> Other alternatives?
>>
>> I've finish the POC of an alternative, which passed all tailcall
>> selftests including these tailcall hierarchy ones.
>>
>> In this alternative, I use a new bpf_prog_run_ctx to wrap the original
>> ctx and the tcc_ptr, then get the tcc_ptr and recover the original ctx
>> in JIT.
>>
>> Then, to avoid breaking runtime with tailcall on other arch, I add an
>> arch-related check bpf_jit_supports_tail_call_cnt_ptr() to determin
>> whether to use bpf_prog_run_ctx.
>>
[SNIP]
>> +
>> + if (bpf_jit_supports_tail_call_cnt_ptr() && prog->jited)
>> + ret = dfunc(&run_ctx, prog->insnsi, prog->bpf_func);
>> + else
>> + ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
>
> This is no good either.
> We cannot introduce two extra run-time checks before calling every bpf prog.
> The solution must be overhead free for common cases.
>
> Can we switch to percpu tail_call_cnt instead of on stack and %rax tricks ?
>
Good idea to use percpu tail_call_cnt.
I did another POC to use percpu tail_call_cnt, which passed all tailcall
selftests too.
In this POC, in order to prepare tcc_ptr at the prologue of x86 JIT,
it's to call bpf_tail_call_cnt_prepare() to get the pointer that points
to percpu tail_call_cnt, and to store the pointer to %rax meanwhile.
Here's the diff:
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 4065bdcc5b2a4..fc1df6a7d87c9 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -241,6 +241,8 @@ int bpf_arch_text_invalidate(void *dst, size_t len)
}
struct jit_context {
+ int prologue_tail_call_offset;
+
int cleanup_addr; /* Epilogue code offset */
/*
@@ -250,6 +252,8 @@ struct jit_context {
*/
int tail_call_direct_label;
int tail_call_indirect_label;
+
+ bool tail_call_reachable;
};
/* Maximum number of bytes emitted while JITing one eBPF insn */
@@ -259,7 +263,7 @@ struct jit_context {
/* Number of bytes emit_patch() needs to generate instructions */
#define X86_PATCH_SIZE 5
/* Number of bytes that will be skipped on tailcall */
-#define X86_TAIL_CALL_OFFSET (22 + ENDBR_INSN_SIZE)
+#define X86_TAIL_CALL_OFFSET (14 + ENDBR_INSN_SIZE)
static void push_r12(u8 **pprog)
{
@@ -389,6 +393,19 @@ static void emit_cfi(u8 **pprog, u32 hash)
*pprog = prog;
}
+DEFINE_PER_CPU(u32, bpf_tail_call_cnt);
+
+__attribute__((used))
+static u32 *bpf_tail_call_cnt_prepare(void)
+{
+ u32 *tcc_ptr = this_cpu_ptr(&bpf_tail_call_cnt);
+
+ /* Initialise tail_call_cnt. */
+ *tcc_ptr = 0;
+
+ return tcc_ptr;
+}
+
/*
* Emit x86-64 prologue code for BPF program.
* bpf_tail_call helper will skip the first X86_TAIL_CALL_OFFSET bytes
@@ -396,7 +413,7 @@ static void emit_cfi(u8 **pprog, u32 hash)
*/
static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
bool tail_call_reachable, bool is_subprog,
- bool is_exception_cb)
+ bool is_exception_cb, struct jit_context *ctx)
{
u8 *prog = *pprog;
@@ -406,21 +423,15 @@ static void emit_prologue(u8 **pprog, u32
stack_depth, bool ebpf_from_cbpf,
*/
emit_nops(&prog, X86_PATCH_SIZE);
if (!ebpf_from_cbpf) {
- if (tail_call_reachable && !is_subprog) {
- /* When it's the entry of the whole tailcall context,
- * zeroing rax means initialising tail_call_cnt.
- */
- EMIT2(0x31, 0xC0); /* xor eax, eax */
- EMIT1(0x50); /* push rax */
- /* Make rax as ptr that points to tail_call_cnt. */
- EMIT3(0x48, 0x89, 0xE0); /* mov rax, rsp */
- EMIT1_off32(0xE8, 2); /* call main prog */
- EMIT1(0x59); /* pop rcx, get rid of tail_call_cnt */
- EMIT1(0xC3); /* ret */
- } else {
- /* Keep the same instruction size. */
- emit_nops(&prog, 13);
- }
+ /* These 5-bytes nops is prepared to emit_call() to call
+ * bpf_tail_call_cnt_prepare later.
+ *
+ * After calling bpf_tail_call_cnt_prepare, %rax will be
+ * the tail_call_cnt pointer that points to an initialised
+ * PER-CPU tail_call_cnt.
+ */
+ ctx->prologue_tail_call_offset = prog - *pprog;
+ emit_nops(&prog, X86_PATCH_SIZE);
}
/* Exception callback receives FP as third parameter */
if (is_exception_cb) {
@@ -583,6 +594,17 @@ static void emit_return(u8 **pprog, u8 *ip)
*pprog = prog;
}
+static void bpf_tail_call_prologue_fixup(u8 *image, struct bpf_prog *prog,
+ struct jit_context *ctx)
+{
+ bool ebpf_from_cbpf = bpf_prog_was_classic(prog);
+ u8 *ip = image + ctx->prologue_tail_call_offset;
+
+ if (!ebpf_from_cbpf && ctx->tail_call_reachable && !bpf_is_subprog(prog))
+ __bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL,
+ bpf_tail_call_cnt_prepare);
+}
+
/*
* Generate the following code:
*
@@ -1165,10 +1187,12 @@ static int do_jit(struct bpf_prog *bpf_prog, int
*addrs, u8 *image, u8 *rw_image
/* tail call's presence in current prog implies it is reachable */
tail_call_reachable |= tail_call_seen;
+ ctx->tail_call_reachable = tail_call_reachable;
emit_prologue(&prog, bpf_prog->aux->stack_depth,
bpf_prog_was_classic(bpf_prog), tail_call_reachable,
- bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb);
+ bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb,
+ ctx);
/* Exception callback will clobber callee regs for its own use, and
* restore the original callee regs from main prog's stack frame.
*/
@@ -3097,6 +3121,7 @@ struct bpf_prog *bpf_int_jit_compile(struct
bpf_prog *prog)
}
bpf_tail_call_direct_fixup(prog);
+ bpf_tail_call_prologue_fixup(image, prog, &ctx);
} else {
jit_data->addrs = addrs;
jit_data->ctx = ctx;
Thanks,
Leon
next prev parent reply other threads:[~2024-02-15 13:16 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-01-04 14:22 [PATCH bpf-next 0/4] bpf, x64: Fix tailcall hierarchy Leon Hwang
2024-01-04 14:22 ` [PATCH bpf-next 1/4] bpf, x64: Use emit_nops() to replace memcpy()'ing x86_nops[5] Leon Hwang
2024-01-04 14:22 ` [PATCH bpf-next 2/4] bpf, x64: Fix tailcall hierarchy Leon Hwang
2024-01-05 4:15 ` Alexei Starovoitov
2024-01-05 6:15 ` Leon Hwang
2024-01-05 17:43 ` Alexei Starovoitov
2024-01-06 2:38 ` Leon Hwang
2024-01-05 10:33 ` Leon Hwang
2024-01-05 17:47 ` Alexei Starovoitov
2024-01-06 2:33 ` Leon Hwang
2024-01-06 3:34 ` Alexei Starovoitov
2024-01-05 12:40 ` Jiri Olsa
2024-01-06 0:18 ` John Fastabend
2024-01-06 3:46 ` Alexei Starovoitov
2024-02-14 5:47 ` Leon Hwang
2024-02-14 11:25 ` Maciej Fijalkowski
2024-02-14 16:31 ` Leon Hwang
2024-02-14 23:16 ` Alexei Starovoitov
2024-02-15 13:16 ` Leon Hwang [this message]
2024-02-16 2:18 ` Alexei Starovoitov
2024-02-17 13:43 ` Leon Hwang
2024-02-20 5:13 ` Leon Hwang
2024-02-20 17:34 ` Alexei Starovoitov
2024-02-20 17:33 ` Alexei Starovoitov
2024-02-21 14:42 ` Leon Hwang
2024-01-04 14:22 ` [PATCH bpf-next 3/4] bpf, x64: Rename RESTORE_TAIL_CALL_CNT() to LOAD_TAIL_CALL_CNT_PTR() Leon Hwang
2024-01-04 14:22 ` [PATCH bpf-next 4/4] selftests/bpf: Add testcases for tailcall hierarchy fixing Leon Hwang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=81607ab3-a7f5-4ad1-98c2-771c73bfb55c@gmail.com \
--to=hffilwlqm@gmail.com \
--cc=alexei.starovoitov@gmail.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=hengqi.chen@gmail.com \
--cc=iii@linux.ibm.com \
--cc=jakub@cloudflare.com \
--cc=kernel-patches-bot@fb.com \
--cc=maciej.fijalkowski@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox