Re: [PATCH bpf-next 2/4] bpf, x64: Fix tailcall hierarchy

BPF List
 help / color / mirror / Atom feed

From: Leon Hwang <hffilwlqm@gmail.com>
To: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: bpf <bpf@vger.kernel.org>, Alexei Starovoitov <ast@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Andrii Nakryiko <andrii@kernel.org>,
	"Fijalkowski, Maciej" <maciej.fijalkowski@intel.com>,
	Jakub Sitnicki <jakub@cloudflare.com>,
	Ilya Leoshkevich <iii@linux.ibm.com>,
	Hengqi Chen <hengqi.chen@gmail.com>,
	kernel-patches-bot@fb.com
Subject: Re: [PATCH bpf-next 2/4] bpf, x64: Fix tailcall hierarchy
Date: Wed, 14 Feb 2024 13:47:45 +0800	[thread overview]
Message-ID: <7af3f9c6-d25a-4ca5-9e15-c1699adcf7ab@gmail.com> (raw)
In-Reply-To: <CAADnVQJ1szry9P00wweVDu4d0AQoM_49qT-_ueirvggAiCZrpw@mail.gmail.com>



On 2024/1/5 12:15, Alexei Starovoitov wrote:
> On Thu, Jan 4, 2024 at 6:23 AM Leon Hwang <hffilwlqm@gmail.com> wrote:
>>
>>
> 
> Other alternatives?

I've finish the POC of an alternative, which passed all tailcall
selftests including these tailcall hierarchy ones.

In this alternative, I use a new bpf_prog_run_ctx to wrap the original
ctx and the tcc_ptr, then get the tcc_ptr and recover the original ctx
in JIT.

Then, to avoid breaking runtime with tailcall on other arch, I add an
arch-related check bpf_jit_supports_tail_call_cnt_ptr() to determin
whether to use bpf_prog_run_ctx.

Here's the diff:

 diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 4065bdcc5b2a4..56cea2676863e 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -259,7 +259,7 @@ struct jit_context {
 /* Number of bytes emit_patch() needs to generate instructions */
 #define X86_PATCH_SIZE		5
 /* Number of bytes that will be skipped on tailcall */
-#define X86_TAIL_CALL_OFFSET	(22 + ENDBR_INSN_SIZE)
+#define X86_TAIL_CALL_OFFSET	(16 + ENDBR_INSN_SIZE)

 static void push_r12(u8 **pprog)
 {
@@ -407,21 +407,19 @@ static void emit_prologue(u8 **pprog, u32
stack_depth, bool ebpf_from_cbpf,
 	emit_nops(&prog, X86_PATCH_SIZE);
 	if (!ebpf_from_cbpf) {
 		if (tail_call_reachable && !is_subprog) {
-			/* When it's the entry of the whole tailcall context,
-			 * zeroing rax means initialising tail_call_cnt.
-			 */
-			EMIT2(0x31, 0xC0);       /* xor eax, eax */
-			EMIT1(0x50);             /* push rax */
-			/* Make rax as ptr that points to tail_call_cnt. */
-			EMIT3(0x48, 0x89, 0xE0); /* mov rax, rsp */
-			EMIT1_off32(0xE8, 2);    /* call main prog */
-			EMIT1(0x59);             /* pop rcx, get rid of tail_call_cnt */
-			EMIT1(0xC3);             /* ret */
+			/* Make rax as tcc_ptr. */
+			EMIT4(0x48, 0x8B, 0x47, 0x08); /* mov rax, qword ptr [rdi + 8] */
 		} else {
-			/* Keep the same instruction size. */
-			emit_nops(&prog, 13);
+			/* Keep the same instruction layout. */
+			emit_nops(&prog, 4);
 		}
 	}
+	if (!is_subprog)
+		/* Recover the original ctx. */
+		EMIT3(0x48, 0x8B, 0x3F); /* mov rdi, qword ptr [rdi] */
+	else
+		/* Keep the same instruction layout. */
+		emit_nops(&prog, 3);
 	/* Exception callback receives FP as third parameter */
 	if (is_exception_cb) {
 		EMIT3(0x48, 0x89, 0xF4); /* mov rsp, rsi */
@@ -3152,6 +3150,12 @@ bool bpf_jit_supports_subprog_tailcalls(void)
 	return true;
 }

+/* Indicate the JIT backend supports tail call count pointer in
tailcall context. */
+bool bpf_jit_supports_tail_call_cnt_ptr(void)
+{
+	return true;
+}
+
 void bpf_jit_free(struct bpf_prog *prog)
 {
 	if (prog->jited) {
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 7671530d6e4e0..fea4326c27d31 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1919,6 +1919,11 @@ int bpf_prog_array_copy(struct bpf_prog_array
*old_array,
 			u64 bpf_cookie,
 			struct bpf_prog_array **new_array);

+struct bpf_prog_run_ctx {
+	const void *ctx;
+	u32 *tail_call_cnt;
+};
+
 struct bpf_run_ctx {};

 struct bpf_cg_run_ctx {
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 68fb6c8142fec..c1c035c44b4ab 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -629,6 +629,10 @@ typedef unsigned int (*bpf_dispatcher_fn)(const
void *ctx,
 					  unsigned int (*bpf_func)(const void *,
 								   const struct bpf_insn *));

+static __always_inline u32 __bpf_prog_run_dfunc(const struct bpf_prog
*prog,
+						const void *ctx,
+						bpf_dispatcher_fn dfunc);
+
 static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog,
 					  const void *ctx,
 					  bpf_dispatcher_fn dfunc)
@@ -641,14 +645,14 @@ static __always_inline u32 __bpf_prog_run(const
struct bpf_prog *prog,
 		u64 start = sched_clock();
 		unsigned long flags;

-		ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
+		ret = __bpf_prog_run_dfunc(prog, ctx, dfunc);
 		stats = this_cpu_ptr(prog->stats);
 		flags = u64_stats_update_begin_irqsave(&stats->syncp);
 		u64_stats_inc(&stats->cnt);
 		u64_stats_add(&stats->nsecs, sched_clock() - start);
 		u64_stats_update_end_irqrestore(&stats->syncp, flags);
 	} else {
-		ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
+		ret = __bpf_prog_run_dfunc(prog, ctx, dfunc);
 	}
 	return ret;
 }
@@ -952,12 +956,31 @@ struct bpf_prog *bpf_int_jit_compile(struct
bpf_prog *prog);
 void bpf_jit_compile(struct bpf_prog *prog);
 bool bpf_jit_needs_zext(void);
 bool bpf_jit_supports_subprog_tailcalls(void);
+bool bpf_jit_supports_tail_call_cnt_ptr(void);
 bool bpf_jit_supports_kfunc_call(void);
 bool bpf_jit_supports_far_kfunc_call(void);
 bool bpf_jit_supports_exceptions(void);
 void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64
sp, u64 bp), void *cookie);
 bool bpf_helper_changes_pkt_data(void *func);

+static __always_inline u32 __bpf_prog_run_dfunc(const struct bpf_prog
*prog,
+						const void *ctx,
+						bpf_dispatcher_fn dfunc)
+{
+	struct bpf_prog_run_ctx run_ctx = {};
+	u32 ret, tcc = 0;
+
+	run_ctx.ctx = ctx;
+	run_ctx.tail_call_cnt = &tcc;
+
+	if (bpf_jit_supports_tail_call_cnt_ptr() && prog->jited)
+		ret = dfunc(&run_ctx, prog->insnsi, prog->bpf_func);
+	else
+		ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
+
+	return ret;
+}
+
 static inline bool bpf_dump_raw_ok(const struct cred *cred)
 {
 	/* Reconstruction of call-sites is dependent on kallsyms,
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index ea6843be2616c..80b20e99456f0 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2915,6 +2915,15 @@ bool __weak bpf_jit_supports_subprog_tailcalls(void)
 	return false;
 }

+/* Return TRUE if the JIT backend supports tail call count pointer in
tailcall
+ * context.
+ */
+bool __weak bpf_jit_supports_tail_call_cnt_ptr(void)
+{
+	return false;
+}
+EXPORT_SYMBOL(bpf_jit_supports_tail_call_cnt_ptr);
+
 bool __weak bpf_jit_supports_kfunc_call(void)
 {
 	return false;

Why use EXPORT_SYMBOL here?

It's to avoid the building error.

ERROR: modpost: "bpf_jit_supports_tail_call_cnt_ptr"
[net/sched/act_bpf.ko] undefined!
ERROR: modpost: "bpf_jit_supports_tail_call_cnt_ptr"
[net/sched/cls_bpf.ko] undefined!
ERROR: modpost: "bpf_jit_supports_tail_call_cnt_ptr"
[net/netfilter/xt_bpf.ko] undefined!
ERROR: modpost: "bpf_jit_supports_tail_call_cnt_ptr" [net/ipv6/ipv6.ko]
undefined!

I'm not familiar with this building error. Is it OK to use EXPORT_SYMBOL
here?

Thanks,
Leon

next prev parent reply	other threads:[~2024-02-14  5:47 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-04 14:22 [PATCH bpf-next 0/4] bpf, x64: Fix tailcall hierarchy Leon Hwang
2024-01-04 14:22 ` [PATCH bpf-next 1/4] bpf, x64: Use emit_nops() to replace memcpy()'ing x86_nops[5] Leon Hwang
2024-01-04 14:22 ` [PATCH bpf-next 2/4] bpf, x64: Fix tailcall hierarchy Leon Hwang
2024-01-05  4:15   ` Alexei Starovoitov
2024-01-05  6:15     ` Leon Hwang
2024-01-05 17:43       ` Alexei Starovoitov
2024-01-06  2:38         ` Leon Hwang
2024-01-05 10:33     ` Leon Hwang
2024-01-05 17:47       ` Alexei Starovoitov
2024-01-06  2:33         ` Leon Hwang
2024-01-06  3:34           ` Alexei Starovoitov
2024-01-05 12:40     ` Jiri Olsa
2024-01-06  0:18       ` John Fastabend
2024-01-06  3:46         ` Alexei Starovoitov
2024-02-14  5:47     ` Leon Hwang [this message]
2024-02-14 11:25       ` Maciej Fijalkowski
2024-02-14 16:31         ` Leon Hwang
2024-02-14 23:16       ` Alexei Starovoitov
2024-02-15 13:16         ` Leon Hwang
2024-02-16  2:18           ` Alexei Starovoitov
2024-02-17 13:43             ` Leon Hwang
2024-02-20  5:13               ` Leon Hwang
2024-02-20 17:34                 ` Alexei Starovoitov
2024-02-20 17:33               ` Alexei Starovoitov
2024-02-21 14:42                 ` Leon Hwang
2024-01-04 14:22 ` [PATCH bpf-next 3/4] bpf, x64: Rename RESTORE_TAIL_CALL_CNT() to LOAD_TAIL_CALL_CNT_PTR() Leon Hwang
2024-01-04 14:22 ` [PATCH bpf-next 4/4] selftests/bpf: Add testcases for tailcall hierarchy fixing Leon Hwang

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:4065bdcc5b2a dfblob:56cea2676863 dfblob:7671530d6e4e
dfblob:fea4326c27d3 dfblob:68fb6c8142fe dfblob:c1c035c44b4a
dfblob:ea6843be2616 dfblob:80b20e99456f )
 OR (
bs:"Re: [PATCH bpf-next 2/4] bpf, x64: Fix tailcall hierarchy" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7af3f9c6-d25a-4ca5-9e15-c1699adcf7ab@gmail.com \
    --to=hffilwlqm@gmail.com \
    --cc=alexei.starovoitov@gmail.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=hengqi.chen@gmail.com \
    --cc=iii@linux.ibm.com \
    --cc=jakub@cloudflare.com \
    --cc=kernel-patches-bot@fb.com \
    --cc=maciej.fijalkowski@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox