* [PATCH 0/2] Fix two tailcall-related issues
@ 2025-07-01 7:41 Haoran Jiang
2025-07-01 7:41 ` [PATCH 1/2] LoongArch: BPF: Optimize the calculation method of jmp_offset in the emit_bpf_tail_call function Haoran Jiang
` (2 more replies)
0 siblings, 3 replies; 8+ messages in thread
From: Haoran Jiang @ 2025-07-01 7:41 UTC (permalink / raw)
To: loongarch
Cc: bpf, kernel, chenhuacai, hengqi.chen, yangtiezhu, jolsa, haoluo,
sdf, kpsingh, john.fastabend, yonghong.song, song, eddyz87,
martin.lau, andrii, daniel, ast
1,Fix the jmp_offset calculation error in the emit_bpf_tail_call function.
2,Fix the issue that MAX_TAIL_CALL_CNT limit bypass in hybrid tailcall and BPF-to-BPF call
After applying this patch, testing results are as follows:
./test_progs --allow=tailcalls/tailcall_bpf2bpf_hierarchy_1
413/18 tailcalls/tailcall_bpf2bpf_hierarchy_1:OK
413 tailcalls:OK
Summary: 1/1 PASSED, 0 SKIPPED, 0 FAILED
./test_progs --allow=tailcalls/tailcall_bpf2bpf_hierarchy_2
413/23 tailcalls/tailcall_bpf2bpf_hierarchy_2:OK
413 tailcalls:OK
Summary: 1/1 PASSED, 0 SKIPPED, 0 FAILED
./test_progs --allow=tailcalls/tailcall_bpf2bpf_hierarchy_3
413/24 tailcalls/tailcall_bpf2bpf_hierarchy_3:OK
413 tailcalls:OK
Summary: 1/1 PASSED, 0 SKIPPED, 0 FAILED
Haoran Jiang (2):
LoongArch: BPF: Optimize the calculation method of jmp_offset in the
emit_bpf_tail_call function
LoongArch: BPF: Fix tailcall hierarchy
arch/loongarch/net/bpf_jit.c | 140 ++++++++++++++++++++---------------
1 file changed, 80 insertions(+), 60 deletions(-)
--
2.43.0
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 1/2] LoongArch: BPF: Optimize the calculation method of jmp_offset in the emit_bpf_tail_call function
2025-07-01 7:41 [PATCH 0/2] Fix two tailcall-related issues Haoran Jiang
@ 2025-07-01 7:41 ` Haoran Jiang
2025-07-07 12:29 ` Hengqi Chen
2025-07-01 7:41 ` [PATCH 2/2] LoongArch: BPF: Fix tailcall hierarchy Haoran Jiang
2025-07-03 12:31 ` [PATCH 0/2] Fix two tailcall-related issues Hengqi Chen
2 siblings, 1 reply; 8+ messages in thread
From: Haoran Jiang @ 2025-07-01 7:41 UTC (permalink / raw)
To: loongarch
Cc: bpf, kernel, chenhuacai, hengqi.chen, yangtiezhu, jolsa, haoluo,
sdf, kpsingh, john.fastabend, yonghong.song, song, eddyz87,
martin.lau, andrii, daniel, ast
For a ebpf subprog JIT,the last call bpf_int_jit_compile function will
directly enter the skip_init_ctx process. At this point, out_offset = -1,
the jmp_offset in emit_bpf_tail_call is calculated
by #define jmp_offset (out_offset - (cur_offset)) is a negative number,
which does not meet expectations.The final generated assembly as follow.
54: bgeu $a2, $t1, -8 # 0x0000004c
58: addi.d $a6, $s5, -1
5c: bltz $a6, -16 # 0x0000004c
60: alsl.d $t2, $a2, $a1, 0x3
64: ld.d $t2, $t2, 264
68: beq $t2, $zero, -28 # 0x0000004c
Before apply this patch, the follow test case will reveal soft lock issues.
cd tools/testing/selftests/bpf/
./test_progs --allow=tailcalls/tailcall_bpf2bpf_1
dmesg:
watchdog: BUG: soft lockup - CPU#2 stuck for 26s! [test_progs:25056]
Signed-off-by: Haoran Jiang <jianghaoran@kylinos.cn>
---
arch/loongarch/net/bpf_jit.c | 28 +++++++++-------------------
1 file changed, 9 insertions(+), 19 deletions(-)
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index fa1500d4aa3e..d85490e7de89 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -208,9 +208,7 @@ bool bpf_jit_supports_far_kfunc_call(void)
return true;
}
-/* initialized on the first pass of build_body() */
-static int out_offset = -1;
-static int emit_bpf_tail_call(struct jit_ctx *ctx)
+static int emit_bpf_tail_call(int insn, struct jit_ctx *ctx)
{
int off;
u8 tcc = tail_call_reg(ctx);
@@ -220,9 +218,8 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
u8 t2 = LOONGARCH_GPR_T2;
u8 t3 = LOONGARCH_GPR_T3;
const int idx0 = ctx->idx;
-
-#define cur_offset (ctx->idx - idx0)
-#define jmp_offset (out_offset - (cur_offset))
+ int tc_ninsn = 0;
+ int jmp_offset = 0;
/*
* a0: &ctx
@@ -232,8 +229,11 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
* if (index >= array->map.max_entries)
* goto out;
*/
+ tc_ninsn = insn ? ctx->offset[insn+1] - ctx->offset[insn] :
+ ctx->offset[0];
off = offsetof(struct bpf_array, map.max_entries);
emit_insn(ctx, ldwu, t1, a1, off);
+ jmp_offset = tc_ninsn - (ctx->idx - idx0);
/* bgeu $a2, $t1, jmp_offset */
if (emit_tailcall_jmp(ctx, BPF_JGE, a2, t1, jmp_offset) < 0)
goto toofar;
@@ -243,6 +243,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
* goto out;
*/
emit_insn(ctx, addid, REG_TCC, tcc, -1);
+ jmp_offset = tc_ninsn - (ctx->idx - idx0);
if (emit_tailcall_jmp(ctx, BPF_JSLT, REG_TCC, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
goto toofar;
@@ -254,6 +255,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
emit_insn(ctx, alsld, t2, a2, a1, 2);
off = offsetof(struct bpf_array, ptrs);
emit_insn(ctx, ldd, t2, t2, off);
+ jmp_offset = tc_ninsn - (ctx->idx - idx0);
/* beq $t2, $zero, jmp_offset */
if (emit_tailcall_jmp(ctx, BPF_JEQ, t2, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
goto toofar;
@@ -263,22 +265,11 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
emit_insn(ctx, ldd, t3, t2, off);
__build_epilogue(ctx, true);
- /* out: */
- if (out_offset == -1)
- out_offset = cur_offset;
- if (cur_offset != out_offset) {
- pr_err_once("tail_call out_offset = %d, expected %d!\n",
- cur_offset, out_offset);
- return -1;
- }
-
return 0;
toofar:
pr_info_once("tail_call: jump too far\n");
return -1;
-#undef cur_offset
-#undef jmp_offset
}
static void emit_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
@@ -916,7 +907,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
/* tail call */
case BPF_JMP | BPF_TAIL_CALL:
mark_tail_call(ctx);
- if (emit_bpf_tail_call(ctx) < 0)
+ if (emit_bpf_tail_call(i, ctx) < 0)
return -EINVAL;
break;
@@ -1342,7 +1333,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
if (tmp_blinded)
bpf_jit_prog_release_other(prog, prog == orig_prog ? tmp : orig_prog);
- out_offset = -1;
return prog;
--
2.43.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 2/2] LoongArch: BPF: Fix tailcall hierarchy
2025-07-01 7:41 [PATCH 0/2] Fix two tailcall-related issues Haoran Jiang
2025-07-01 7:41 ` [PATCH 1/2] LoongArch: BPF: Optimize the calculation method of jmp_offset in the emit_bpf_tail_call function Haoran Jiang
@ 2025-07-01 7:41 ` Haoran Jiang
2025-07-03 12:31 ` [PATCH 0/2] Fix two tailcall-related issues Hengqi Chen
2 siblings, 0 replies; 8+ messages in thread
From: Haoran Jiang @ 2025-07-01 7:41 UTC (permalink / raw)
To: loongarch
Cc: bpf, kernel, chenhuacai, hengqi.chen, yangtiezhu, jolsa, haoluo,
sdf, kpsingh, john.fastabend, yonghong.song, song, eddyz87,
martin.lau, andrii, daniel, ast
In specific use cases combining tailcalls and BPF-to-BPF calls,
MAX_TAIL_CALL_CNT won't work because of missing tail_call_cnt
back-propagation from callee to caller。This patch fixes this
tailcall issue caused by abusing the tailcall in bpf2bpf feature
on LoongArch like the way of "bpf, x64: Fix tailcall hierarchy".
push tail_call_cnt_ptr and tail_call_cnt into the stack,
tail_call_cnt_ptr is passed between tailcall and bpf2bpf,
uses tail_call_cnt_ptr to increment tail_call_cnt.
Signed-off-by: Haoran Jiang <jianghaoran@kylinos.cn>
---
arch/loongarch/net/bpf_jit.c | 112 ++++++++++++++++++++++-------------
1 file changed, 71 insertions(+), 41 deletions(-)
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index d85490e7de89..ba409e598b94 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -12,6 +12,9 @@
#define SAVE_RA BIT(0)
#define SAVE_TCC BIT(1)
+#define BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack) (round_up(stack, 16) - 80)
+
+
static const int regmap[] = {
/* return value from in-kernel function, and exit value for eBPF program */
[BPF_REG_0] = LOONGARCH_GPR_A5,
@@ -32,32 +35,37 @@ static const int regmap[] = {
[BPF_REG_AX] = LOONGARCH_GPR_T0,
};
-static void mark_call(struct jit_ctx *ctx)
+static void prepare_bpf_tail_call_cnt(struct jit_ctx *ctx, int *store_offset)
{
- ctx->flags |= SAVE_RA;
-}
+ const struct bpf_prog *prog = ctx->prog;
+ const bool is_main_prog = !bpf_is_subprog(prog);
-static void mark_tail_call(struct jit_ctx *ctx)
-{
- ctx->flags |= SAVE_TCC;
-}
+ if (is_main_prog) {
+ emit_insn(ctx, addid, LOONGARCH_GPR_T3, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT);
+ *store_offset -= sizeof(long);
-static bool seen_call(struct jit_ctx *ctx)
-{
- return (ctx->flags & SAVE_RA);
-}
+ emit_tailcall_jmp(ctx, BPF_JGT, REG_TCC, LOONGARCH_GPR_T3, 4);
-static bool seen_tail_call(struct jit_ctx *ctx)
-{
- return (ctx->flags & SAVE_TCC);
-}
+ /* If REG_TCC < MAX_TAIL_CALL_CNT, push REG_TCC into stack */
+ emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
-static u8 tail_call_reg(struct jit_ctx *ctx)
-{
- if (seen_call(ctx))
- return TCC_SAVED;
+ /* Calculate the pointer to REG_TCC in the stack and assign it to REG_TCC */
+ emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
+
+ emit_uncond_jmp(ctx, 2);
+
+ emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
- return REG_TCC;
+ *store_offset -= sizeof(long);
+ emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
+
+ } else {
+ *store_offset -= sizeof(long);
+ emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
+
+ *store_offset -= sizeof(long);
+ emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
+ }
}
/*
@@ -80,6 +88,10 @@ static u8 tail_call_reg(struct jit_ctx *ctx)
* | $s4 |
* +-------------------------+
* | $s5 |
+ * +-------------------------+
+ * | reg_tcc |
+ * +-------------------------+
+ * | reg_tcc_ptr |
* +-------------------------+ <--BPF_REG_FP
* | prog->aux->stack_depth |
* | (optional) |
@@ -89,21 +101,24 @@ static u8 tail_call_reg(struct jit_ctx *ctx)
static void build_prologue(struct jit_ctx *ctx)
{
int stack_adjust = 0, store_offset, bpf_stack_adjust;
+ const struct bpf_prog *prog = ctx->prog;
+ const bool is_main_prog = !bpf_is_subprog(prog);
bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
- /* To store ra, fp, s0, s1, s2, s3, s4 and s5. */
- stack_adjust += sizeof(long) * 8;
+ /* To store ra, fp, s0, s1, s2, s3, s4, s5, reg_tcc and reg_tcc_ptr */
+ stack_adjust += sizeof(long) * 10;
stack_adjust = round_up(stack_adjust, 16);
stack_adjust += bpf_stack_adjust;
/*
- * First instruction initializes the tail call count (TCC).
- * On tail call we skip this instruction, and the TCC is
+ * First instruction initializes the tail call count (TCC) register
+ * to zero. On tail call we skip this instruction, and the TCC is
* passed in REG_TCC from the caller.
*/
- emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT);
+ if (is_main_prog)
+ emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_ZERO, 0);
emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_adjust);
@@ -131,20 +146,13 @@ static void build_prologue(struct jit_ctx *ctx)
store_offset -= sizeof(long);
emit_insn(ctx, std, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, store_offset);
+ prepare_bpf_tail_call_cnt(ctx, &store_offset);
+
emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_adjust);
if (bpf_stack_adjust)
emit_insn(ctx, addid, regmap[BPF_REG_FP], LOONGARCH_GPR_SP, bpf_stack_adjust);
- /*
- * Program contains calls and tail calls, so REG_TCC need
- * to be saved across calls.
- */
- if (seen_tail_call(ctx) && seen_call(ctx))
- move_reg(ctx, TCC_SAVED, REG_TCC);
- else
- emit_insn(ctx, nop);
-
ctx->stack_size = stack_adjust;
}
@@ -177,6 +185,17 @@ static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call)
load_offset -= sizeof(long);
emit_insn(ctx, ldd, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, load_offset);
+ /*
+ * When push into the stack, follow the order of tcc then tcc_ptr.
+ * When pop from the stack, first pop tcc_ptr followed by tcc
+ */
+ load_offset -= 2*sizeof(long);
+ emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, load_offset);
+
+ /* pop tcc_ptr to REG_TCC */
+ load_offset += sizeof(long);
+ emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, load_offset);
+
emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_adjust);
if (!is_tail_call) {
@@ -211,7 +230,8 @@ bool bpf_jit_supports_far_kfunc_call(void)
static int emit_bpf_tail_call(int insn, struct jit_ctx *ctx)
{
int off;
- u8 tcc = tail_call_reg(ctx);
+ int tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(ctx->stack_size);
+
u8 a1 = LOONGARCH_GPR_A1;
u8 a2 = LOONGARCH_GPR_A2;
u8 t1 = LOONGARCH_GPR_T1;
@@ -239,12 +259,17 @@ static int emit_bpf_tail_call(int insn, struct jit_ctx *ctx)
goto toofar;
/*
- * if (--TCC < 0)
- * goto out;
+ * if ((*tcc_ptr)++ >= MAX_TAIL_CALL_CNT)
+ * goto out;
*/
- emit_insn(ctx, addid, REG_TCC, tcc, -1);
+ emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, tcc_ptr_off);
+ emit_insn(ctx, ldd, t3, REG_TCC, 0);
+ emit_insn(ctx, addid, t3, t3, 1);
+ emit_insn(ctx, std, t3, REG_TCC, 0);
+ emit_insn(ctx, addid, t2, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT);
+
jmp_offset = tc_ninsn - (ctx->idx - idx0);
- if (emit_tailcall_jmp(ctx, BPF_JSLT, REG_TCC, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
+ if (emit_tailcall_jmp(ctx, BPF_JSGT, t3, t2, jmp_offset) < 0)
goto toofar;
/*
@@ -464,6 +489,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
const s16 off = insn->off;
const s32 imm = insn->imm;
const bool is32 = BPF_CLASS(insn->code) == BPF_ALU || BPF_CLASS(insn->code) == BPF_JMP32;
+ int tcc_ptr_off;
switch (code) {
/* dst = src */
@@ -890,12 +916,17 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
/* function call */
case BPF_JMP | BPF_CALL:
- mark_call(ctx);
ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
&func_addr, &func_addr_fixed);
if (ret < 0)
return ret;
+ if (insn->src_reg == BPF_PSEUDO_CALL) {
+ tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(ctx->stack_size);
+ emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, tcc_ptr_off);
+ }
+
+
move_addr(ctx, t1, func_addr);
emit_insn(ctx, jirl, LOONGARCH_GPR_RA, t1, 0);
@@ -906,7 +937,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
/* tail call */
case BPF_JMP | BPF_TAIL_CALL:
- mark_tail_call(ctx);
if (emit_bpf_tail_call(i, ctx) < 0)
return -EINVAL;
break;
--
2.43.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 0/2] Fix two tailcall-related issues
2025-07-01 7:41 [PATCH 0/2] Fix two tailcall-related issues Haoran Jiang
2025-07-01 7:41 ` [PATCH 1/2] LoongArch: BPF: Optimize the calculation method of jmp_offset in the emit_bpf_tail_call function Haoran Jiang
2025-07-01 7:41 ` [PATCH 2/2] LoongArch: BPF: Fix tailcall hierarchy Haoran Jiang
@ 2025-07-03 12:31 ` Hengqi Chen
2025-07-04 5:52 ` 回复:[PATCH " jianghaoran
2 siblings, 1 reply; 8+ messages in thread
From: Hengqi Chen @ 2025-07-03 12:31 UTC (permalink / raw)
To: Haoran Jiang
Cc: loongarch, bpf, kernel, chenhuacai, yangtiezhu, jolsa, haoluo,
sdf, kpsingh, john.fastabend, yonghong.song, song, eddyz87,
martin.lau, andrii, daniel, ast
On Tue, Jul 1, 2025 at 3:41 PM Haoran Jiang <jianghaoran@kylinos.cn> wrote:
>
> 1,Fix the jmp_offset calculation error in the emit_bpf_tail_call function.
> 2,Fix the issue that MAX_TAIL_CALL_CNT limit bypass in hybrid tailcall and BPF-to-BPF call
>
> After applying this patch, testing results are as follows:
>
> ./test_progs --allow=tailcalls/tailcall_bpf2bpf_hierarchy_1
> 413/18 tailcalls/tailcall_bpf2bpf_hierarchy_1:OK
> 413 tailcalls:OK
> Summary: 1/1 PASSED, 0 SKIPPED, 0 FAILED
>
> ./test_progs --allow=tailcalls/tailcall_bpf2bpf_hierarchy_2
> 413/23 tailcalls/tailcall_bpf2bpf_hierarchy_2:OK
> 413 tailcalls:OK
> Summary: 1/1 PASSED, 0 SKIPPED, 0 FAILED
>
> ./test_progs --allow=tailcalls/tailcall_bpf2bpf_hierarchy_3
> 413/24 tailcalls/tailcall_bpf2bpf_hierarchy_3:OK
> 413 tailcalls:OK
> Summary: 1/1 PASSED, 0 SKIPPED, 0 FAILED
>
Thanks for the fixes. Will review this series soon.
BTW, do you test other tailcall test cases ?
Cheers,
---
Hengqi
> Haoran Jiang (2):
> LoongArch: BPF: Optimize the calculation method of jmp_offset in the
> emit_bpf_tail_call function
> LoongArch: BPF: Fix tailcall hierarchy
>
> arch/loongarch/net/bpf_jit.c | 140 ++++++++++++++++++++---------------
> 1 file changed, 80 insertions(+), 60 deletions(-)
>
> --
> 2.43.0
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* 回复:[PATCH 0/2] Fix two tailcall-related issues
2025-07-03 12:31 ` [PATCH 0/2] Fix two tailcall-related issues Hengqi Chen
@ 2025-07-04 5:52 ` jianghaoran
2025-07-08 1:36 ` [PATCH " Hengqi Chen
0 siblings, 1 reply; 8+ messages in thread
From: jianghaoran @ 2025-07-04 5:52 UTC (permalink / raw)
To: Hengqi Chen
Cc: loongarch, bpf, kernel, chenhuacai, yangtiezhu, jolsa, haoluo,
sdf, kpsingh, john.fastabend, yonghong.song, song, eddyz87,
martin.lau, andrii, daniel, ast
在 2025-07-03星期四的 20:31 +0800,Hengqi Chen写道:
> On Tue, Jul 1, 2025 at 3:41 PM Haoran Jiang <
> jianghaoran@kylinos.cn
> > wrote:
> > 1,Fix the jmp_offset calculation error in the
> > emit_bpf_tail_call function.
> > 2,Fix the issue that MAX_TAIL_CALL_CNT limit bypass in hybrid
> > tailcall and BPF-to-BPF call
> >
> > After applying this patch, testing results are as follows:
> >
> > ./test_progs --allow=tailcalls/tailcall_bpf2bpf_hierarchy_1
> > 413/18 tailcalls/tailcall_bpf2bpf_hierarchy_1:OK
> > 413 tailcalls:OK
> > Summary: 1/1 PASSED, 0 SKIPPED, 0 FAILED
> >
> > ./test_progs --allow=tailcalls/tailcall_bpf2bpf_hierarchy_2
> > 413/23 tailcalls/tailcall_bpf2bpf_hierarchy_2:OK
> > 413 tailcalls:OK
> > Summary: 1/1 PASSED, 0 SKIPPED, 0 FAILED
> >
> > ./test_progs --allow=tailcalls/tailcall_bpf2bpf_hierarchy_3
> > 413/24 tailcalls/tailcall_bpf2bpf_hierarchy_3:OK
> > 413 tailcalls:OK
> > Summary: 1/1 PASSED, 0 SKIPPED, 0 FAILED
> >
>
> Thanks for the fixes. Will review this series soon.
> BTW, do you test other tailcall test cases ?
>
> Cheers,
> ---
> Hengqi
>
tailcall_1/tailcall_2/tailcall_3/tailcall_4/tailcall_5/tailcall_6/t
ailcall_bpf2bpf_1/tailcall_bpf2bpf_2/tailcall_bpf2bpf_3/tailcall_bp
f2bpf_4/tailcall_bpf2bpf_5/tailcall_bpf2bpf_6
/tailcall_bpf2bpf_hierarchy_1/tailcall_bpf2bpf_hierarchy_2/tailcall
_bpf2bpf_hierarchy_3/tailcall_failure
These test cases passed
tailcall_bpf2bpf_fentry/tailcall_bpf2bpf_fexit/tailcall_bpf2bpf_fen
try_fexit/tailcall_bpf2bpf_fentry_entry/tailcall_bpf2bpf_hierarchy_
fentry/tailcall_bpf2bpf_hierarchy_fexit
/tailcall_bpf2bpf_hierarchy_fentry_fexit/tailcall_bpf2bpf_hierarchy
_fentry_entry/tailcall_freplace/tailcall_bpf2bpf_freplace
These test cases depend on the trampoline capability, which is
currently under review in the Linux kernel.
These two patches are relatively independent. Could we prioritize
reviewing the fixes above first?
Trampoline-dependent changes will be implemented after
trampoline is merged.
thanks
>
> > Haoran Jiang (2):
> > LoongArch: BPF: Optimize the calculation method of jmp_offset in the
> > emit_bpf_tail_call function
> > LoongArch: BPF: Fix tailcall hierarchy
> >
> > arch/loongarch/net/bpf_jit.c | 140 ++++++++++++++++++++---------------
> > 1 file changed, 80 insertions(+), 60 deletions(-)
> >
> > --
> > 2.43.0
> >
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 1/2] LoongArch: BPF: Optimize the calculation method of jmp_offset in the emit_bpf_tail_call function
2025-07-01 7:41 ` [PATCH 1/2] LoongArch: BPF: Optimize the calculation method of jmp_offset in the emit_bpf_tail_call function Haoran Jiang
@ 2025-07-07 12:29 ` Hengqi Chen
2025-07-08 2:09 ` 回复:[PATCH " jianghaoran
0 siblings, 1 reply; 8+ messages in thread
From: Hengqi Chen @ 2025-07-07 12:29 UTC (permalink / raw)
To: Haoran Jiang
Cc: loongarch, bpf, kernel, chenhuacai, yangtiezhu, jolsa, haoluo,
sdf, kpsingh, john.fastabend, yonghong.song, song, eddyz87,
martin.lau, andrii, daniel, ast
Hi Haoran,
On Tue, Jul 1, 2025 at 3:41 PM Haoran Jiang <jianghaoran@kylinos.cn> wrote:
>
> For a ebpf subprog JIT,the last call bpf_int_jit_compile function will
> directly enter the skip_init_ctx process. At this point, out_offset = -1,
> the jmp_offset in emit_bpf_tail_call is calculated
> by #define jmp_offset (out_offset - (cur_offset)) is a negative number,
> which does not meet expectations.The final generated assembly as follow.
>
OK, so this can be rephrased as:
The extra pass of bpf_int_jit_compile() skips JIT context initialization which
essentially skips offset calculation leaving out_offset = -1 ...
> 54: bgeu $a2, $t1, -8 # 0x0000004c
> 58: addi.d $a6, $s5, -1
> 5c: bltz $a6, -16 # 0x0000004c
> 60: alsl.d $t2, $a2, $a1, 0x3
> 64: ld.d $t2, $t2, 264
> 68: beq $t2, $zero, -28 # 0x0000004c
>
> Before apply this patch, the follow test case will reveal soft lock issues.
>
> cd tools/testing/selftests/bpf/
> ./test_progs --allow=tailcalls/tailcall_bpf2bpf_1
>
> dmesg:
> watchdog: BUG: soft lockup - CPU#2 stuck for 26s! [test_progs:25056]
>
Add a Fixes tag.
> Signed-off-by: Haoran Jiang <jianghaoran@kylinos.cn>
> ---
> arch/loongarch/net/bpf_jit.c | 28 +++++++++-------------------
> 1 file changed, 9 insertions(+), 19 deletions(-)
>
> diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
> index fa1500d4aa3e..d85490e7de89 100644
> --- a/arch/loongarch/net/bpf_jit.c
> +++ b/arch/loongarch/net/bpf_jit.c
> @@ -208,9 +208,7 @@ bool bpf_jit_supports_far_kfunc_call(void)
> return true;
> }
>
> -/* initialized on the first pass of build_body() */
> -static int out_offset = -1;
> -static int emit_bpf_tail_call(struct jit_ctx *ctx)
> +static int emit_bpf_tail_call(int insn, struct jit_ctx *ctx)
> {
Make ctx the first argument ?
> int off;
> u8 tcc = tail_call_reg(ctx);
> @@ -220,9 +218,8 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
> u8 t2 = LOONGARCH_GPR_T2;
> u8 t3 = LOONGARCH_GPR_T3;
> const int idx0 = ctx->idx;
> -
> -#define cur_offset (ctx->idx - idx0)
> -#define jmp_offset (out_offset - (cur_offset))
Reuse this jmp_offset macro, so that you don't have to repeat it 3
times below, WDYT ?
> + int tc_ninsn = 0;
> + int jmp_offset = 0;
>
> /*
> * a0: &ctx
> @@ -232,8 +229,11 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
> * if (index >= array->map.max_entries)
> * goto out;
> */
> + tc_ninsn = insn ? ctx->offset[insn+1] - ctx->offset[insn] :
> + ctx->offset[0];
> off = offsetof(struct bpf_array, map.max_entries);
> emit_insn(ctx, ldwu, t1, a1, off);
> + jmp_offset = tc_ninsn - (ctx->idx - idx0);
> /* bgeu $a2, $t1, jmp_offset */
> if (emit_tailcall_jmp(ctx, BPF_JGE, a2, t1, jmp_offset) < 0)
> goto toofar;
> @@ -243,6 +243,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
> * goto out;
> */
> emit_insn(ctx, addid, REG_TCC, tcc, -1);
> + jmp_offset = tc_ninsn - (ctx->idx - idx0);
> if (emit_tailcall_jmp(ctx, BPF_JSLT, REG_TCC, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
> goto toofar;
>
> @@ -254,6 +255,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
> emit_insn(ctx, alsld, t2, a2, a1, 2);
> off = offsetof(struct bpf_array, ptrs);
> emit_insn(ctx, ldd, t2, t2, off);
> + jmp_offset = tc_ninsn - (ctx->idx - idx0);
> /* beq $t2, $zero, jmp_offset */
> if (emit_tailcall_jmp(ctx, BPF_JEQ, t2, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
> goto toofar;
> @@ -263,22 +265,11 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
> emit_insn(ctx, ldd, t3, t2, off);
> __build_epilogue(ctx, true);
>
> - /* out: */
> - if (out_offset == -1)
> - out_offset = cur_offset;
> - if (cur_offset != out_offset) {
> - pr_err_once("tail_call out_offset = %d, expected %d!\n",
> - cur_offset, out_offset);
> - return -1;
> - }
> -
> return 0;
>
> toofar:
> pr_info_once("tail_call: jump too far\n");
> return -1;
> -#undef cur_offset
> -#undef jmp_offset
> }
>
> static void emit_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
> @@ -916,7 +907,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
> /* tail call */
> case BPF_JMP | BPF_TAIL_CALL:
> mark_tail_call(ctx);
> - if (emit_bpf_tail_call(ctx) < 0)
> + if (emit_bpf_tail_call(i, ctx) < 0)
> return -EINVAL;
> break;
>
> @@ -1342,7 +1333,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
> if (tmp_blinded)
> bpf_jit_prog_release_other(prog, prog == orig_prog ? tmp : orig_prog);
>
> - out_offset = -1;
>
> return prog;
>
> --
> 2.43.0
>
Cheers,
---
Hengqi
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 0/2] Fix two tailcall-related issues
2025-07-04 5:52 ` 回复:[PATCH " jianghaoran
@ 2025-07-08 1:36 ` Hengqi Chen
0 siblings, 0 replies; 8+ messages in thread
From: Hengqi Chen @ 2025-07-08 1:36 UTC (permalink / raw)
To: jianghaoran
Cc: loongarch, bpf, kernel, chenhuacai, yangtiezhu, jolsa, haoluo,
sdf, kpsingh, john.fastabend, yonghong.song, song, eddyz87,
martin.lau, andrii, daniel, ast
On Fri, Jul 4, 2025 at 1:53 PM jianghaoran <jianghaoran@kylinos.cn> wrote:
>
>
>
>
>
> 在 2025-07-03星期四的 20:31 +0800,Hengqi Chen写道:
> > On Tue, Jul 1, 2025 at 3:41 PM Haoran Jiang <
> > jianghaoran@kylinos.cn
> > > wrote:
> > > 1,Fix the jmp_offset calculation error in the
> > > emit_bpf_tail_call function.
> > > 2,Fix the issue that MAX_TAIL_CALL_CNT limit bypass in hybrid
> > > tailcall and BPF-to-BPF call
> > >
> > > After applying this patch, testing results are as follows:
> > >
> > > ./test_progs --allow=tailcalls/tailcall_bpf2bpf_hierarchy_1
> > > 413/18 tailcalls/tailcall_bpf2bpf_hierarchy_1:OK
> > > 413 tailcalls:OK
> > > Summary: 1/1 PASSED, 0 SKIPPED, 0 FAILED
> > >
> > > ./test_progs --allow=tailcalls/tailcall_bpf2bpf_hierarchy_2
> > > 413/23 tailcalls/tailcall_bpf2bpf_hierarchy_2:OK
> > > 413 tailcalls:OK
> > > Summary: 1/1 PASSED, 0 SKIPPED, 0 FAILED
> > >
> > > ./test_progs --allow=tailcalls/tailcall_bpf2bpf_hierarchy_3
> > > 413/24 tailcalls/tailcall_bpf2bpf_hierarchy_3:OK
> > > 413 tailcalls:OK
> > > Summary: 1/1 PASSED, 0 SKIPPED, 0 FAILED
> > >
> >
> > Thanks for the fixes. Will review this series soon.
> > BTW, do you test other tailcall test cases ?
> >
> > Cheers,
> > ---
> > Hengqi
> >
>
> tailcall_1/tailcall_2/tailcall_3/tailcall_4/tailcall_5/tailcall_6/t
> ailcall_bpf2bpf_1/tailcall_bpf2bpf_2/tailcall_bpf2bpf_3/tailcall_bp
> f2bpf_4/tailcall_bpf2bpf_5/tailcall_bpf2bpf_6
> /tailcall_bpf2bpf_hierarchy_1/tailcall_bpf2bpf_hierarchy_2/tailcall
> _bpf2bpf_hierarchy_3/tailcall_failure
> These test cases passed
>
> tailcall_bpf2bpf_fentry/tailcall_bpf2bpf_fexit/tailcall_bpf2bpf_fen
> try_fexit/tailcall_bpf2bpf_fentry_entry/tailcall_bpf2bpf_hierarchy_
> fentry/tailcall_bpf2bpf_hierarchy_fexit
> /tailcall_bpf2bpf_hierarchy_fentry_fexit/tailcall_bpf2bpf_hierarchy
> _fentry_entry/tailcall_freplace/tailcall_bpf2bpf_freplace
> These test cases depend on the trampoline capability, which is
> currently under review in the Linux kernel.
>
Please post the full test result of the tailcall test cases for future
references.
We can safely ignore those ENOTSUPP cases. Thanks.
> These two patches are relatively independent. Could we prioritize
> reviewing the fixes above first?
> Trampoline-dependent changes will be implemented after
> trampoline is merged.
>
> thanks
>
> >
> > > Haoran Jiang (2):
> > > LoongArch: BPF: Optimize the calculation method of jmp_offset in the
> > > emit_bpf_tail_call function
> > > LoongArch: BPF: Fix tailcall hierarchy
> > >
> > > arch/loongarch/net/bpf_jit.c | 140 ++++++++++++++++++++---------------
> > > 1 file changed, 80 insertions(+), 60 deletions(-)
> > >
> > > --
> > > 2.43.0
> > >
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* 回复:[PATCH 1/2] LoongArch: BPF: Optimize the calculation method of jmp_offset in the emit_bpf_tail_call function
2025-07-07 12:29 ` Hengqi Chen
@ 2025-07-08 2:09 ` jianghaoran
0 siblings, 0 replies; 8+ messages in thread
From: jianghaoran @ 2025-07-08 2:09 UTC (permalink / raw)
To: Hengqi Chen
Cc: loongarch, bpf, kernel, chenhuacai, yangtiezhu, jolsa, haoluo,
sdf, kpsingh, john.fastabend, yonghong.song, song, eddyz87,
martin.lau, andrii, daniel, ast
在 2025-07-07星期一的 20:29 +0800,Hengqi Chen写道:
> Hi Haoran,
>
> On Tue, Jul 1, 2025 at 3:41 PM Haoran Jiang <
> jianghaoran@kylinos.cn
> > wrote:
> > For a ebpf subprog JIT,the last call bpf_int_jit_compile
> > function will
> > directly enter the skip_init_ctx process. At this point,
> > out_offset = -1,
> > the jmp_offset in emit_bpf_tail_call is calculated
> > by #define jmp_offset (out_offset - (cur_offset)) is a negative
> > number,
> > which does not meet expectations.The final generated assembly
> > as follow.
> >
>
> OK, so this can be rephrased as:
> The extra pass of bpf_int_jit_compile() skips JIT context
> initialization which
> essentially skips offset calculation leaving out_offset = -1 ...
>
> > 54: bgeu $a2, $t1, -8 # 0x0000004c
> > 58: addi.d $a6, $s5, -1
> > 5c: bltz $a6, -16 # 0x0000004c
> > 60: alsl.d $t2, $a2, $a1, 0x3
> > 64: ld.d $t2, $t2, 264
> > 68: beq $t2, $zero, -28 # 0x0000004c
> >
> > Before apply this patch, the follow test case will reveal soft
> > lock issues.
> >
> > cd tools/testing/selftests/bpf/
> > ./test_progs --allow=tailcalls/tailcall_bpf2bpf_1
> >
> > dmesg:
> > watchdog: BUG: soft lockup - CPU#2 stuck for 26s!
> > [test_progs:25056]
> >
>
> Add a Fixes tag.
>
> > Signed-off-by: Haoran Jiang <
> > jianghaoran@kylinos.cn
> > >
> > ---
> > arch/loongarch/net/bpf_jit.c | 28 +++++++++-------------------
> > 1 file changed, 9 insertions(+), 19 deletions(-)
> >
> > diff --git a/arch/loongarch/net/bpf_jit.c
> > b/arch/loongarch/net/bpf_jit.c
> > index fa1500d4aa3e..d85490e7de89 100644
> > --- a/arch/loongarch/net/bpf_jit.c
> > +++ b/arch/loongarch/net/bpf_jit.c
> > @@ -208,9 +208,7 @@ bool bpf_jit_supports_far_kfunc_call(void)
> > return true;
> > }
> >
> > -/* initialized on the first pass of build_body() */
> > -static int out_offset = -1;
> > -static int emit_bpf_tail_call(struct jit_ctx *ctx)
> > +static int emit_bpf_tail_call(int insn, struct jit_ctx *ctx)
> > {
>
> Make ctx the first argument ?
>
> > int off;
> > u8 tcc = tail_call_reg(ctx);
> > @@ -220,9 +218,8 @@ static int emit_bpf_tail_call(struct
> > jit_ctx *ctx)
> > u8 t2 = LOONGARCH_GPR_T2;
> > u8 t3 = LOONGARCH_GPR_T3;
> > const int idx0 = ctx->idx;
> > -
> > -#define cur_offset (ctx->idx - idx0)
> > -#define jmp_offset (out_offset - (cur_offset))
>
> Reuse this jmp_offset macro, so that you don't have to repeat it
> 3
> times below, WDYT ?
>
> > + int tc_ninsn = 0;
> > + int jmp_offset = 0;
> >
> > /*
> > * a0: &ctx
> > @@ -232,8 +229,11 @@ static int emit_bpf_tail_call(struct
> > jit_ctx *ctx)
> > * if (index >= array->map.max_entries)
> > * goto out;
> > */
> > + tc_ninsn = insn ? ctx->offset[insn+1] - ctx-
> > >offset[insn] :
> > + ctx->offset[0];
> > off = offsetof(struct bpf_array, map.max_entries);
> > emit_insn(ctx, ldwu, t1, a1, off);
> > + jmp_offset = tc_ninsn - (ctx->idx - idx0);
> > /* bgeu $a2, $t1, jmp_offset */
> > if (emit_tailcall_jmp(ctx, BPF_JGE, a2, t1, jmp_offset)
> > < 0)
> > goto toofar;
> > @@ -243,6 +243,7 @@ static int emit_bpf_tail_call(struct
> > jit_ctx *ctx)
> > * goto out;
> > */
> > emit_insn(ctx, addid, REG_TCC, tcc, -1);
> > + jmp_offset = tc_ninsn - (ctx->idx - idx0);
> > if (emit_tailcall_jmp(ctx, BPF_JSLT, REG_TCC,
> > LOONGARCH_GPR_ZERO, jmp_offset) < 0)
> > goto toofar;
> >
> > @@ -254,6 +255,7 @@ static int emit_bpf_tail_call(struct
> > jit_ctx *ctx)
> > emit_insn(ctx, alsld, t2, a2, a1, 2);
> > off = offsetof(struct bpf_array, ptrs);
> > emit_insn(ctx, ldd, t2, t2, off);
> > + jmp_offset = tc_ninsn - (ctx->idx - idx0);
> > /* beq $t2, $zero, jmp_offset */
> > if (emit_tailcall_jmp(ctx, BPF_JEQ, t2,
> > LOONGARCH_GPR_ZERO, jmp_offset) < 0)
> > goto toofar;
> > @@ -263,22 +265,11 @@ static int emit_bpf_tail_call(struct
> > jit_ctx *ctx)
> > emit_insn(ctx, ldd, t3, t2, off);
> > __build_epilogue(ctx, true);
> >
> > - /* out: */
> > - if (out_offset == -1)
> > - out_offset = cur_offset;
> > - if (cur_offset != out_offset) {
> > - pr_err_once("tail_call out_offset = %d,
> > expected %d!\n",
> > - cur_offset, out_offset);
> > - return -1;
> > - }
> > -
> > return 0;
> >
> > toofar:
> > pr_info_once("tail_call: jump too far\n");
> > return -1;
> > -#undef cur_offset
> > -#undef jmp_offset
> > }
> >
> > static void emit_atomic(const struct bpf_insn *insn, struct
> > jit_ctx *ctx)
> > @@ -916,7 +907,7 @@ static int build_insn(const struct bpf_insn
> > *insn, struct jit_ctx *ctx, bool ext
> > /* tail call */
> > case BPF_JMP | BPF_TAIL_CALL:
> > mark_tail_call(ctx);
> > - if (emit_bpf_tail_call(ctx) < 0)
> > + if (emit_bpf_tail_call(i, ctx) < 0)
> > return -EINVAL;
> > break;
> >
> > @@ -1342,7 +1333,6 @@ struct bpf_prog
> > *bpf_int_jit_compile(struct bpf_prog *prog)
> > if (tmp_blinded)
> > bpf_jit_prog_release_other(prog, prog ==
> > orig_prog ? tmp : orig_prog);
> >
> > - out_offset = -1;
> >
> > return prog;
> >
> > --
> > 2.43.0
> >
>
> Cheers,
> ---
> Hengqi
Hi Hengqi,
Thank you for the review. I will make revisions according to your
comments.
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2025-07-08 2:10 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-07-01 7:41 [PATCH 0/2] Fix two tailcall-related issues Haoran Jiang
2025-07-01 7:41 ` [PATCH 1/2] LoongArch: BPF: Optimize the calculation method of jmp_offset in the emit_bpf_tail_call function Haoran Jiang
2025-07-07 12:29 ` Hengqi Chen
2025-07-08 2:09 ` 回复:[PATCH " jianghaoran
2025-07-01 7:41 ` [PATCH 2/2] LoongArch: BPF: Fix tailcall hierarchy Haoran Jiang
2025-07-03 12:31 ` [PATCH 0/2] Fix two tailcall-related issues Hengqi Chen
2025-07-04 5:52 ` 回复:[PATCH " jianghaoran
2025-07-08 1:36 ` [PATCH " Hengqi Chen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).