bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 0/2] Fix two tailcall-related issues
@ 2025-07-08  7:18 Haoran Jiang
  2025-07-08  7:18 ` [PATCH v2 1/2] LoongArch: BPF: Optimize the calculation method of jmp_offset in the emit_bpf_tail_call function Haoran Jiang
  2025-07-08  7:18 ` [PATCH v2 2/2] LoongArch: BPF: Fix tailcall hierarchy Haoran Jiang
  0 siblings, 2 replies; 7+ messages in thread
From: Haoran Jiang @ 2025-07-08  7:18 UTC (permalink / raw)
  To: loongarch
  Cc: bpf, kernel, chenhuacai, hengqi.chen, yangtiezhu, jolsa, haoluo,
	sdf, kpsingh, john.fastabend, yonghong.song, song, eddyz87,
	martin.lau, andrii, daniel, ast

v2:
1,Add a Fixes tag.
2,Ctx as the first parameter of emit_bpf_tail_call.
3,Define jmp_offset as a macro in emit_bpf_tail_call.

After merging these two patches, the test results are as follows:

./test_progs --allow=tailcalls
tester_init:PASS:tester_log_buf 0 nsec
process_subtest:PASS:obj_open_mem 0 nsec
process_subtest:PASS:specs_alloc 0 nsec
#413/1   tailcalls/tailcall_1:OK
#413/2   tailcalls/tailcall_2:OK
#413/3   tailcalls/tailcall_3:OK
#413/4   tailcalls/tailcall_4:OK
#413/5   tailcalls/tailcall_5:OK
#413/6   tailcalls/tailcall_6:OK
#413/7   tailcalls/tailcall_bpf2bpf_1:OK
#413/8   tailcalls/tailcall_bpf2bpf_2:OK
#413/9   tailcalls/tailcall_bpf2bpf_3:OK
#413/10  tailcalls/tailcall_bpf2bpf_4:OK
#413/11  tailcalls/tailcall_bpf2bpf_5:OK
#413/12  tailcalls/tailcall_bpf2bpf_6:OK
test_tailcall_count:PASS:open fentry_obj file 0 nsec
test_tailcall_count:PASS:find fentry prog 0 nsec
test_tailcall_count:PASS:set_attach_target subprog_tail 0 nsec
test_tailcall_count:PASS:load fentry_obj 0 nsec
libbpf: prog 'fentry': failed to attach: -ENOTSUPP
test_tailcall_count:FAIL:attach_trace unexpected error: -524
#413/13  tailcalls/tailcall_bpf2bpf_fentry:FAIL
test_tailcall_count:PASS:open fexit_obj file 0 nsec
test_tailcall_count:PASS:find fexit prog 0 nsec
test_tailcall_count:PASS:set_attach_target subprog_tail 0 nsec
test_tailcall_count:PASS:load fexit_obj 0 nsec
libbpf: prog 'fexit': failed to attach: -ENOTSUPP
test_tailcall_count:FAIL:attach_trace unexpected error: -524
#413/14  tailcalls/tailcall_bpf2bpf_fexit:FAIL
test_tailcall_count:PASS:open fentry_obj file 0 nsec
test_tailcall_count:PASS:find fentry prog 0 nsec
test_tailcall_count:PASS:set_attach_target subprog_tail 0 nsec
test_tailcall_count:PASS:load fentry_obj 0 nsec
libbpf: prog 'fentry': failed to attach: -ENOTSUPP
test_tailcall_count:FAIL:attach_trace unexpected error: -524
#413/15  tailcalls/tailcall_bpf2bpf_fentry_fexit:FAIL
test_tailcall_bpf2bpf_fentry_entry:PASS:load tgt_obj 0 nsec
test_tailcall_bpf2bpf_fentry_entry:PASS:find jmp_table map 0 nsec
test_tailcall_bpf2bpf_fentry_entry:PASS:find jmp_table map fd 0 nsec
test_tailcall_bpf2bpf_fentry_entry:PASS:find classifier_0 prog 0 nsec
test_tailcall_bpf2bpf_fentry_entry:PASS:find classifier_0 prog fd 0 nsec
test_tailcall_bpf2bpf_fentry_entry:PASS:update jmp_table 0 nsec
test_tailcall_bpf2bpf_fentry_entry:PASS:open fentry_obj file 0 nsec
test_tailcall_bpf2bpf_fentry_entry:PASS:find fentry prog 0 nsec
test_tailcall_bpf2bpf_fentry_entry:PASS:set_attach_target classifier_0 0 nsec
test_tailcall_bpf2bpf_fentry_entry:PASS:load fentry_obj 0 nsec
libbpf: prog 'fentry': failed to attach: -ENOTSUPP
test_tailcall_bpf2bpf_fentry_entry:FAIL:attach_trace unexpected error: -524
#413/16  tailcalls/tailcall_bpf2bpf_fentry_entry:FAIL
#413/17  tailcalls/tailcall_poke:OK
#413/18  tailcalls/tailcall_bpf2bpf_hierarchy_1:OK
test_tailcall_hierarchy_count:PASS:load obj 0 nsec
test_tailcall_hierarchy_count:PASS:find entry prog 0 nsec
test_tailcall_hierarchy_count:PASS:prog_fd 0 nsec
test_tailcall_hierarchy_count:PASS:find jmp_table 0 nsec
test_tailcall_hierarchy_count:PASS:map_fd 0 nsec
test_tailcall_hierarchy_count:PASS:update jmp_table 0 nsec
test_tailcall_hierarchy_count:PASS:find data_map 0 nsec
test_tailcall_hierarchy_count:PASS:open fentry_obj file 0 nsec
test_tailcall_hierarchy_count:PASS:find fentry prog 0 nsec
test_tailcall_hierarchy_count:PASS:set_attach_target subprog_tail 0 nsec
test_tailcall_hierarchy_count:PASS:load fentry_obj 0 nsec
libbpf: prog 'fentry': failed to attach: -ENOTSUPP
test_tailcall_hierarchy_count:FAIL:attach_trace unexpected error: -524
#413/19  tailcalls/tailcall_bpf2bpf_hierarchy_fentry:FAIL
test_tailcall_hierarchy_count:PASS:load obj 0 nsec
test_tailcall_hierarchy_count:PASS:find entry prog 0 nsec
test_tailcall_hierarchy_count:PASS:prog_fd 0 nsec
test_tailcall_hierarchy_count:PASS:find jmp_table 0 nsec
test_tailcall_hierarchy_count:PASS:map_fd 0 nsec
test_tailcall_hierarchy_count:PASS:update jmp_table 0 nsec
test_tailcall_hierarchy_count:PASS:find data_map 0 nsec
test_tailcall_hierarchy_count:PASS:open fexit_obj file 0 nsec
test_tailcall_hierarchy_count:PASS:find fexit prog 0 nsec
test_tailcall_hierarchy_count:PASS:set_attach_target subprog_tail 0 nsec
test_tailcall_hierarchy_count:PASS:load fexit_obj 0 nsec
libbpf: prog 'fexit': failed to attach: -ENOTSUPP
test_tailcall_hierarchy_count:FAIL:attach_trace unexpected error: -524
#413/20  tailcalls/tailcall_bpf2bpf_hierarchy_fexit:FAIL
test_tailcall_hierarchy_count:PASS:load obj 0 nsec
test_tailcall_hierarchy_count:PASS:find entry prog 0 nsec
test_tailcall_hierarchy_count:PASS:prog_fd 0 nsec
test_tailcall_hierarchy_count:PASS:find jmp_table 0 nsec
test_tailcall_hierarchy_count:PASS:map_fd 0 nsec
test_tailcall_hierarchy_count:PASS:update jmp_table 0 nsec
test_tailcall_hierarchy_count:PASS:find data_map 0 nsec
test_tailcall_hierarchy_count:PASS:open fentry_obj file 0 nsec
test_tailcall_hierarchy_count:PASS:find fentry prog 0 nsec
test_tailcall_hierarchy_count:PASS:set_attach_target subprog_tail 0 nsec
test_tailcall_hierarchy_count:PASS:load fentry_obj 0 nsec
libbpf: prog 'fentry': failed to attach: -ENOTSUPP
test_tailcall_hierarchy_count:FAIL:attach_trace unexpected error: -524
#413/21  tailcalls/tailcall_bpf2bpf_hierarchy_fentry_fexit:FAIL
test_tailcall_hierarchy_count:PASS:load obj 0 nsec
test_tailcall_hierarchy_count:PASS:find entry prog 0 nsec
test_tailcall_hierarchy_count:PASS:prog_fd 0 nsec
test_tailcall_hierarchy_count:PASS:open fentry_obj file 0 nsec
test_tailcall_hierarchy_count:PASS:find fentry prog 0 nsec
test_tailcall_hierarchy_count:PASS:set_attach_target entry 0 nsec
test_tailcall_hierarchy_count:PASS:load fentry_obj 0 nsec
libbpf: prog 'fentry': failed to attach: -ENOTSUPP
test_tailcall_hierarchy_count:FAIL:attach_trace unexpected error: -524
tester_init:PASS:tester_log_buf 0 nsec
process_subtest:PASS:obj_open_mem 0 nsec
process_subtest:PASS:specs_alloc 0 nsec
#413/22  tailcalls/tailcall_bpf2bpf_hierarchy_fentry_entry:FAIL
#413/23  tailcalls/tailcall_bpf2bpf_hierarchy_2:OK
#413/24  tailcalls/tailcall_bpf2bpf_hierarchy_3:OK
test_tailcall_freplace:PASS:tailcall_freplace__open 0 nsec
test_tailcall_freplace:PASS:tc_bpf2bpf__open_and_load 0 nsec
test_tailcall_freplace:PASS:set_attach_target 0 nsec
test_tailcall_freplace:PASS:tailcall_freplace__load 0 nsec
test_tailcall_freplace:PASS:update jmp_table failure 0 nsec
libbpf: prog 'entry_freplace': failed to attach to freplace: -ENOTSUPP
test_tailcall_freplace:FAIL:attach_freplace unexpected error: -524
#413/25  tailcalls/tailcall_freplace:FAIL
test_tailcall_bpf2bpf_freplace:PASS:tc_bpf2bpf__open_and_load 0 nsec
test_tailcall_bpf2bpf_freplace:PASS:tailcall_freplace__open 0 nsec
test_tailcall_bpf2bpf_freplace:PASS:set_attach_target 0 nsec
test_tailcall_bpf2bpf_freplace:PASS:tailcall_freplace__load 0 nsec
libbpf: prog 'entry_freplace': failed to attach to freplace: -ENOTSUPP
test_tailcall_bpf2bpf_freplace:FAIL:attach_freplace unexpected error: -524
#413/26  tailcalls/tailcall_bpf2bpf_freplace:FAIL
#413/27  tailcalls/tailcall_failure:OK
#413/28  tailcalls/reject_tail_call_spin_lock:OK
#413/29  tailcalls/reject_tail_call_rcu_lock:OK
#413/30  tailcalls/reject_tail_call_preempt_lock:OK
#413/31  tailcalls/reject_tail_call_ref:OK
#413     tailcalls:FAIL

All error logs:
tester_init:PASS:tester_log_buf 0 nsec
process_subtest:PASS:obj_open_mem 0 nsec
process_subtest:PASS:specs_alloc 0 nsec
test_tailcall_count:PASS:open fentry_obj file 0 nsec
test_tailcall_count:PASS:find fentry prog 0 nsec
test_tailcall_count:PASS:set_attach_target subprog_tail 0 nsec
test_tailcall_count:PASS:load fentry_obj 0 nsec
libbpf: prog 'fentry': failed to attach: -ENOTSUPP
test_tailcall_count:FAIL:attach_trace unexpected error: -524
#413/13  tailcalls/tailcall_bpf2bpf_fentry:FAIL
test_tailcall_count:PASS:open fexit_obj file 0 nsec
test_tailcall_count:PASS:find fexit prog 0 nsec
test_tailcall_count:PASS:set_attach_target subprog_tail 0 nsec
test_tailcall_count:PASS:load fexit_obj 0 nsec
libbpf: prog 'fexit': failed to attach: -ENOTSUPP
test_tailcall_count:FAIL:attach_trace unexpected error: -524
#413/14  tailcalls/tailcall_bpf2bpf_fexit:FAIL
test_tailcall_count:PASS:open fentry_obj file 0 nsec
test_tailcall_count:PASS:find fentry prog 0 nsec
test_tailcall_count:PASS:set_attach_target subprog_tail 0 nsec
test_tailcall_count:PASS:load fentry_obj 0 nsec
libbpf: prog 'fentry': failed to attach: -ENOTSUPP
test_tailcall_count:FAIL:attach_trace unexpected error: -524
#413/15  tailcalls/tailcall_bpf2bpf_fentry_fexit:FAIL
test_tailcall_bpf2bpf_fentry_entry:PASS:load tgt_obj 0 nsec
test_tailcall_bpf2bpf_fentry_entry:PASS:find jmp_table map 0 nsec
test_tailcall_bpf2bpf_fentry_entry:PASS:find jmp_table map fd 0 nsec
test_tailcall_bpf2bpf_fentry_entry:PASS:find classifier_0 prog 0 nsec
test_tailcall_bpf2bpf_fentry_entry:PASS:find classifier_0 prog fd 0 nsec
test_tailcall_bpf2bpf_fentry_entry:PASS:update jmp_table 0 nsec
test_tailcall_bpf2bpf_fentry_entry:PASS:open fentry_obj file 0 nsec
test_tailcall_bpf2bpf_fentry_entry:PASS:find fentry prog 0 nsec
test_tailcall_bpf2bpf_fentry_entry:PASS:set_attach_target classifier_0 0 nsec
test_tailcall_bpf2bpf_fentry_entry:PASS:load fentry_obj 0 nsec
libbpf: prog 'fentry': failed to attach: -ENOTSUPP
test_tailcall_bpf2bpf_fentry_entry:FAIL:attach_trace unexpected error: -524
#413/16  tailcalls/tailcall_bpf2bpf_fentry_entry:FAIL
test_tailcall_hierarchy_count:PASS:load obj 0 nsec
test_tailcall_hierarchy_count:PASS:find entry prog 0 nsec
test_tailcall_hierarchy_count:PASS:prog_fd 0 nsec
test_tailcall_hierarchy_count:PASS:find jmp_table 0 nsec
test_tailcall_hierarchy_count:PASS:map_fd 0 nsec
test_tailcall_hierarchy_count:PASS:update jmp_table 0 nsec
test_tailcall_hierarchy_count:PASS:find data_map 0 nsec
test_tailcall_hierarchy_count:PASS:open fentry_obj file 0 nsec
test_tailcall_hierarchy_count:PASS:find fentry prog 0 nsec
test_tailcall_hierarchy_count:PASS:set_attach_target subprog_tail 0 nsec
test_tailcall_hierarchy_count:PASS:load fentry_obj 0 nsec
libbpf: prog 'fentry': failed to attach: -ENOTSUPP
test_tailcall_hierarchy_count:FAIL:attach_trace unexpected error: -524
#413/19  tailcalls/tailcall_bpf2bpf_hierarchy_fentry:FAIL
test_tailcall_hierarchy_count:PASS:load obj 0 nsec
test_tailcall_hierarchy_count:PASS:find entry prog 0 nsec
test_tailcall_hierarchy_count:PASS:prog_fd 0 nsec
test_tailcall_hierarchy_count:PASS:find jmp_table 0 nsec
test_tailcall_hierarchy_count:PASS:map_fd 0 nsec
test_tailcall_hierarchy_count:PASS:update jmp_table 0 nsec
test_tailcall_hierarchy_count:PASS:find data_map 0 nsec
test_tailcall_hierarchy_count:PASS:open fexit_obj file 0 nsec
test_tailcall_hierarchy_count:PASS:find fexit prog 0 nsec
test_tailcall_hierarchy_count:PASS:set_attach_target subprog_tail 0 nsec
test_tailcall_hierarchy_count:PASS:load fexit_obj 0 nsec
libbpf: prog 'fexit': failed to attach: -ENOTSUPP
test_tailcall_hierarchy_count:FAIL:attach_trace unexpected error: -524
#413/20  tailcalls/tailcall_bpf2bpf_hierarchy_fexit:FAIL
test_tailcall_hierarchy_count:PASS:load obj 0 nsec
test_tailcall_hierarchy_count:PASS:find entry prog 0 nsec
test_tailcall_hierarchy_count:PASS:prog_fd 0 nsec
test_tailcall_hierarchy_count:PASS:find jmp_table 0 nsec
test_tailcall_hierarchy_count:PASS:map_fd 0 nsec
test_tailcall_hierarchy_count:PASS:update jmp_table 0 nsec
test_tailcall_hierarchy_count:PASS:find data_map 0 nsec
test_tailcall_hierarchy_count:PASS:open fentry_obj file 0 nsec
test_tailcall_hierarchy_count:PASS:find fentry prog 0 nsec
test_tailcall_hierarchy_count:PASS:set_attach_target subprog_tail 0 nsec
test_tailcall_hierarchy_count:PASS:load fentry_obj 0 nsec
libbpf: prog 'fentry': failed to attach: -ENOTSUPP
test_tailcall_hierarchy_count:FAIL:attach_trace unexpected error: -524
#413/21  tailcalls/tailcall_bpf2bpf_hierarchy_fentry_fexit:FAIL
test_tailcall_hierarchy_count:PASS:load obj 0 nsec
test_tailcall_hierarchy_count:PASS:find entry prog 0 nsec
test_tailcall_hierarchy_count:PASS:prog_fd 0 nsec
test_tailcall_hierarchy_count:PASS:open fentry_obj file 0 nsec
test_tailcall_hierarchy_count:PASS:find fentry prog 0 nsec
test_tailcall_hierarchy_count:PASS:set_attach_target entry 0 nsec
test_tailcall_hierarchy_count:PASS:load fentry_obj 0 nsec
libbpf: prog 'fentry': failed to attach: -ENOTSUPP
test_tailcall_hierarchy_count:FAIL:attach_trace unexpected error: -524
tester_init:PASS:tester_log_buf 0 nsec
process_subtest:PASS:obj_open_mem 0 nsec
process_subtest:PASS:specs_alloc 0 nsec
#413/22  tailcalls/tailcall_bpf2bpf_hierarchy_fentry_entry:FAIL
test_tailcall_freplace:PASS:tailcall_freplace__open 0 nsec
test_tailcall_freplace:PASS:tc_bpf2bpf__open_and_load 0 nsec
test_tailcall_freplace:PASS:set_attach_target 0 nsec
test_tailcall_freplace:PASS:tailcall_freplace__load 0 nsec
test_tailcall_freplace:PASS:update jmp_table failure 0 nsec
libbpf: prog 'entry_freplace': failed to attach to freplace: -ENOTSUPP
test_tailcall_freplace:FAIL:attach_freplace unexpected error: -524
#413/25  tailcalls/tailcall_freplace:FAIL
test_tailcall_bpf2bpf_freplace:PASS:tc_bpf2bpf__open_and_load 0 nsec
test_tailcall_bpf2bpf_freplace:PASS:tailcall_freplace__open 0 nsec
test_tailcall_bpf2bpf_freplace:PASS:set_attach_target 0 nsec
test_tailcall_bpf2bpf_freplace:PASS:tailcall_freplace__load 0 nsec
libbpf: prog 'entry_freplace': failed to attach to freplace: -ENOTSUPP
test_tailcall_bpf2bpf_freplace:FAIL:attach_freplace unexpected error: -524
#413/26  tailcalls/tailcall_bpf2bpf_freplace:FAIL
#413     tailcalls:FAIL
Summary: 0/21 PASSED, 0 SKIPPED, 1 FAILED
----------------------------------------------------------------------

v1:
1,Fix the jmp_offset calculation error in the emit_bpf_tail_call function.
2,Fix the issue that MAX_TAIL_CALL_CNT limit bypass in hybrid tailcall and BPF-to-BPF call

After applying this patch, testing results are as follows:

./test_progs --allow=tailcalls/tailcall_bpf2bpf_hierarchy_1
413/18  tailcalls/tailcall_bpf2bpf_hierarchy_1:OK
413     tailcalls:OK
Summary: 1/1 PASSED, 0 SKIPPED, 0 FAILED

./test_progs --allow=tailcalls/tailcall_bpf2bpf_hierarchy_2
413/23  tailcalls/tailcall_bpf2bpf_hierarchy_2:OK
413     tailcalls:OK
Summary: 1/1 PASSED, 0 SKIPPED, 0 FAILED

./test_progs --allow=tailcalls/tailcall_bpf2bpf_hierarchy_3
413/24  tailcalls/tailcall_bpf2bpf_hierarchy_3:OK
413     tailcalls:OK
Summary: 1/1 PASSED, 0 SKIPPED, 0 FAILED

Haoran Jiang (2):
  LoongArch: BPF: Optimize the calculation method of jmp_offset in the
    emit_bpf_tail_call function
  LoongArch: BPF: Fix tailcall hierarchy

 arch/loongarch/net/bpf_jit.c | 133 +++++++++++++++++++----------------
 1 file changed, 74 insertions(+), 59 deletions(-)

-- 
2.43.0


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v2 1/2] LoongArch: BPF: Optimize the calculation method of jmp_offset in the emit_bpf_tail_call function
  2025-07-08  7:18 [PATCH v2 0/2] Fix two tailcall-related issues Haoran Jiang
@ 2025-07-08  7:18 ` Haoran Jiang
  2025-07-16  1:31   ` Hengqi Chen
  2025-07-08  7:18 ` [PATCH v2 2/2] LoongArch: BPF: Fix tailcall hierarchy Haoran Jiang
  1 sibling, 1 reply; 7+ messages in thread
From: Haoran Jiang @ 2025-07-08  7:18 UTC (permalink / raw)
  To: loongarch
  Cc: bpf, kernel, chenhuacai, hengqi.chen, yangtiezhu, jolsa, haoluo,
	sdf, kpsingh, john.fastabend, yonghong.song, song, eddyz87,
	martin.lau, andrii, daniel, ast

The extra pass of bpf_int_jit_compile() skips JIT context initialization
which essentially skips offset calculation leaving out_offset = -1,
the jmp_offset in emit_bpf_tail_call is calculated
by #define jmp_offset (out_offset - (cur_offset)) is a negative number,
which does not meet expectations.The final generated assembly as follow.

54:	bgeu        	$a2, $t1, -8	    # 0x0000004c
58:	addi.d      	$a6, $s5, -1
5c:	bltz        	$a6, -16	    # 0x0000004c
60:	alsl.d      	$t2, $a2, $a1, 0x3
64:	ld.d        	$t2, $t2, 264
68:	beq         	$t2, $zero, -28	    # 0x0000004c

Before apply this patch, the follow test case will reveal soft lock issues.

cd tools/testing/selftests/bpf/
./test_progs --allow=tailcalls/tailcall_bpf2bpf_1

dmesg:
watchdog: BUG: soft lockup - CPU#2 stuck for 26s! [test_progs:25056]

Fixes: 5dc615520c4d ("LoongArch: Add BPF JIT support")
Signed-off-by: Haoran Jiang <jianghaoran@kylinos.cn>
---
 arch/loongarch/net/bpf_jit.c | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index fa1500d4aa3e..5061bfc978f2 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -208,9 +208,7 @@ bool bpf_jit_supports_far_kfunc_call(void)
 	return true;
 }
 
-/* initialized on the first pass of build_body() */
-static int out_offset = -1;
-static int emit_bpf_tail_call(struct jit_ctx *ctx)
+static int emit_bpf_tail_call(struct jit_ctx *ctx, int insn)
 {
 	int off;
 	u8 tcc = tail_call_reg(ctx);
@@ -220,9 +218,10 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
 	u8 t2 = LOONGARCH_GPR_T2;
 	u8 t3 = LOONGARCH_GPR_T3;
 	const int idx0 = ctx->idx;
+	int tc_ninsn = 0;
 
 #define cur_offset (ctx->idx - idx0)
-#define jmp_offset (out_offset - (cur_offset))
+#define jmp_offset (tc_ninsn - (cur_offset))
 
 	/*
 	 * a0: &ctx
@@ -232,6 +231,8 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
 	 * if (index >= array->map.max_entries)
 	 *	 goto out;
 	 */
+	tc_ninsn = insn ? ctx->offset[insn+1] - ctx->offset[insn] :
+		ctx->offset[0];
 	off = offsetof(struct bpf_array, map.max_entries);
 	emit_insn(ctx, ldwu, t1, a1, off);
 	/* bgeu $a2, $t1, jmp_offset */
@@ -263,15 +264,6 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
 	emit_insn(ctx, ldd, t3, t2, off);
 	__build_epilogue(ctx, true);
 
-	/* out: */
-	if (out_offset == -1)
-		out_offset = cur_offset;
-	if (cur_offset != out_offset) {
-		pr_err_once("tail_call out_offset = %d, expected %d!\n",
-			    cur_offset, out_offset);
-		return -1;
-	}
-
 	return 0;
 
 toofar:
@@ -916,7 +908,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
 	/* tail call */
 	case BPF_JMP | BPF_TAIL_CALL:
 		mark_tail_call(ctx);
-		if (emit_bpf_tail_call(ctx) < 0)
+		if (emit_bpf_tail_call(ctx, i) < 0)
 			return -EINVAL;
 		break;
 
@@ -1342,7 +1334,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	if (tmp_blinded)
 		bpf_jit_prog_release_other(prog, prog == orig_prog ? tmp : orig_prog);
 
-	out_offset = -1;
 
 	return prog;
 
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2 2/2] LoongArch: BPF: Fix tailcall hierarchy
  2025-07-08  7:18 [PATCH v2 0/2] Fix two tailcall-related issues Haoran Jiang
  2025-07-08  7:18 ` [PATCH v2 1/2] LoongArch: BPF: Optimize the calculation method of jmp_offset in the emit_bpf_tail_call function Haoran Jiang
@ 2025-07-08  7:18 ` Haoran Jiang
  2025-07-16  2:28   ` Hengqi Chen
  1 sibling, 1 reply; 7+ messages in thread
From: Haoran Jiang @ 2025-07-08  7:18 UTC (permalink / raw)
  To: loongarch
  Cc: bpf, kernel, chenhuacai, hengqi.chen, yangtiezhu, jolsa, haoluo,
	sdf, kpsingh, john.fastabend, yonghong.song, song, eddyz87,
	martin.lau, andrii, daniel, ast

In specific use cases combining tailcalls and BPF-to-BPF calls,
MAX_TAIL_CALL_CNT won't work because of missing tail_call_cnt
back-propagation from callee to caller。This patch fixes this
tailcall issue caused by abusing the tailcall in bpf2bpf feature
on LoongArch like the way of "bpf, x64: Fix tailcall hierarchy".

push tail_call_cnt_ptr and tail_call_cnt into the stack,
tail_call_cnt_ptr is passed between tailcall and bpf2bpf,
uses tail_call_cnt_ptr to increment tail_call_cnt.

Fixes: bb035ef0cc91 ("LoongArch: BPF: Support mixing bpf2bpf and tailcalls")
Fixes: 5dc615520c4d ("LoongArch: Add BPF JIT support")
Signed-off-by: Haoran Jiang <jianghaoran@kylinos.cn>
---
 arch/loongarch/net/bpf_jit.c | 112 +++++++++++++++++++++--------------
 1 file changed, 68 insertions(+), 44 deletions(-)

diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index 5061bfc978f2..45f804b7c556 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -7,10 +7,9 @@
 #include "bpf_jit.h"
 
 #define REG_TCC		LOONGARCH_GPR_A6
-#define TCC_SAVED	LOONGARCH_GPR_S5
 
-#define SAVE_RA		BIT(0)
-#define SAVE_TCC	BIT(1)
+#define BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack) (round_up(stack, 16) - 80)
+
 
 static const int regmap[] = {
 	/* return value from in-kernel function, and exit value for eBPF program */
@@ -32,32 +31,37 @@ static const int regmap[] = {
 	[BPF_REG_AX] = LOONGARCH_GPR_T0,
 };
 
-static void mark_call(struct jit_ctx *ctx)
+static void prepare_bpf_tail_call_cnt(struct jit_ctx *ctx, int *store_offset)
 {
-	ctx->flags |= SAVE_RA;
-}
+	const struct bpf_prog *prog = ctx->prog;
+	const bool is_main_prog = !bpf_is_subprog(prog);
 
-static void mark_tail_call(struct jit_ctx *ctx)
-{
-	ctx->flags |= SAVE_TCC;
-}
+	if (is_main_prog) {
+		emit_insn(ctx, addid, LOONGARCH_GPR_T3, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT);
+		*store_offset -= sizeof(long);
 
-static bool seen_call(struct jit_ctx *ctx)
-{
-	return (ctx->flags & SAVE_RA);
-}
+		emit_tailcall_jmp(ctx, BPF_JGT, REG_TCC, LOONGARCH_GPR_T3, 4);
 
-static bool seen_tail_call(struct jit_ctx *ctx)
-{
-	return (ctx->flags & SAVE_TCC);
-}
+		/* If REG_TCC < MAX_TAIL_CALL_CNT, push REG_TCC into stack */
+		emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
 
-static u8 tail_call_reg(struct jit_ctx *ctx)
-{
-	if (seen_call(ctx))
-		return TCC_SAVED;
+		/* Calculate the pointer to REG_TCC in the stack and assign it to REG_TCC */
+		emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
+
+		emit_uncond_jmp(ctx, 2);
+
+		emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
 
-	return REG_TCC;
+		*store_offset -= sizeof(long);
+		emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
+
+	} else {
+		*store_offset -= sizeof(long);
+		emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
+
+		*store_offset -= sizeof(long);
+		emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
+	}
 }
 
 /*
@@ -80,6 +84,10 @@ static u8 tail_call_reg(struct jit_ctx *ctx)
  *                            |           $s4           |
  *                            +-------------------------+
  *                            |           $s5           |
+ *                            +-------------------------+
+ *                            |          reg_tcc        |
+ *                            +-------------------------+
+ *                            |          reg_tcc_ptr    |
  *                            +-------------------------+ <--BPF_REG_FP
  *                            |  prog->aux->stack_depth |
  *                            |        (optional)       |
@@ -89,21 +97,24 @@ static u8 tail_call_reg(struct jit_ctx *ctx)
 static void build_prologue(struct jit_ctx *ctx)
 {
 	int stack_adjust = 0, store_offset, bpf_stack_adjust;
+	const struct bpf_prog *prog = ctx->prog;
+	const bool is_main_prog = !bpf_is_subprog(prog);
 
 	bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
 
-	/* To store ra, fp, s0, s1, s2, s3, s4 and s5. */
-	stack_adjust += sizeof(long) * 8;
+	/* To store ra, fp, s0, s1, s2, s3, s4, s5, reg_tcc and reg_tcc_ptr */
+	stack_adjust += sizeof(long) * 10;
 
 	stack_adjust = round_up(stack_adjust, 16);
 	stack_adjust += bpf_stack_adjust;
 
 	/*
-	 * First instruction initializes the tail call count (TCC).
-	 * On tail call we skip this instruction, and the TCC is
+	 * First instruction initializes the tail call count (TCC) register
+	 * to zero. On tail call we skip this instruction, and the TCC is
 	 * passed in REG_TCC from the caller.
 	 */
-	emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT);
+	if (is_main_prog)
+		emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_ZERO, 0);
 
 	emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_adjust);
 
@@ -131,20 +142,13 @@ static void build_prologue(struct jit_ctx *ctx)
 	store_offset -= sizeof(long);
 	emit_insn(ctx, std, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, store_offset);
 
+	prepare_bpf_tail_call_cnt(ctx, &store_offset);
+
 	emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_adjust);
 
 	if (bpf_stack_adjust)
 		emit_insn(ctx, addid, regmap[BPF_REG_FP], LOONGARCH_GPR_SP, bpf_stack_adjust);
 
-	/*
-	 * Program contains calls and tail calls, so REG_TCC need
-	 * to be saved across calls.
-	 */
-	if (seen_tail_call(ctx) && seen_call(ctx))
-		move_reg(ctx, TCC_SAVED, REG_TCC);
-	else
-		emit_insn(ctx, nop);
-
 	ctx->stack_size = stack_adjust;
 }
 
@@ -177,6 +181,17 @@ static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call)
 	load_offset -= sizeof(long);
 	emit_insn(ctx, ldd, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, load_offset);
 
+	/*
+	 *  When push into the stack, follow the order of tcc then tcc_ptr.
+	 *  When pop from the stack, first pop tcc_ptr followed by tcc
+	 */
+	load_offset -= 2*sizeof(long);
+	emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, load_offset);
+
+	/* pop tcc_ptr to REG_TCC */
+	load_offset += sizeof(long);
+	emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, load_offset);
+
 	emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_adjust);
 
 	if (!is_tail_call) {
@@ -211,7 +226,7 @@ bool bpf_jit_supports_far_kfunc_call(void)
 static int emit_bpf_tail_call(struct jit_ctx *ctx, int insn)
 {
 	int off;
-	u8 tcc = tail_call_reg(ctx);
+	int tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(ctx->stack_size);
 	u8 a1 = LOONGARCH_GPR_A1;
 	u8 a2 = LOONGARCH_GPR_A2;
 	u8 t1 = LOONGARCH_GPR_T1;
@@ -240,11 +255,15 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx, int insn)
 		goto toofar;
 
 	/*
-	 * if (--TCC < 0)
-	 *	 goto out;
+	 * if ((*tcc_ptr)++ >= MAX_TAIL_CALL_CNT)
+	 *      goto out;
 	 */
-	emit_insn(ctx, addid, REG_TCC, tcc, -1);
-	if (emit_tailcall_jmp(ctx, BPF_JSLT, REG_TCC, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
+	emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, tcc_ptr_off);
+	emit_insn(ctx, ldd, t3, REG_TCC, 0);
+	emit_insn(ctx, addid, t3, t3, 1);
+	emit_insn(ctx, std, t3, REG_TCC, 0);
+	emit_insn(ctx, addid, t2, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT);
+	if (emit_tailcall_jmp(ctx, BPF_JSGT, t3, t2, jmp_offset) < 0)
 		goto toofar;
 
 	/*
@@ -465,6 +484,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
 	const s16 off = insn->off;
 	const s32 imm = insn->imm;
 	const bool is32 = BPF_CLASS(insn->code) == BPF_ALU || BPF_CLASS(insn->code) == BPF_JMP32;
+	int tcc_ptr_off;
 
 	switch (code) {
 	/* dst = src */
@@ -891,12 +911,17 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
 
 	/* function call */
 	case BPF_JMP | BPF_CALL:
-		mark_call(ctx);
 		ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
 					    &func_addr, &func_addr_fixed);
 		if (ret < 0)
 			return ret;
 
+		if (insn->src_reg == BPF_PSEUDO_CALL) {
+			tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(ctx->stack_size);
+			emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, tcc_ptr_off);
+		}
+
+
 		move_addr(ctx, t1, func_addr);
 		emit_insn(ctx, jirl, LOONGARCH_GPR_RA, t1, 0);
 
@@ -907,7 +932,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
 
 	/* tail call */
 	case BPF_JMP | BPF_TAIL_CALL:
-		mark_tail_call(ctx);
 		if (emit_bpf_tail_call(ctx, i) < 0)
 			return -EINVAL;
 		break;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 1/2] LoongArch: BPF: Optimize the calculation method of jmp_offset in the emit_bpf_tail_call function
  2025-07-08  7:18 ` [PATCH v2 1/2] LoongArch: BPF: Optimize the calculation method of jmp_offset in the emit_bpf_tail_call function Haoran Jiang
@ 2025-07-16  1:31   ` Hengqi Chen
  2025-07-22  9:34     ` re:[PATCH " jianghaoran
  0 siblings, 1 reply; 7+ messages in thread
From: Hengqi Chen @ 2025-07-16  1:31 UTC (permalink / raw)
  To: Haoran Jiang
  Cc: loongarch, bpf, kernel, chenhuacai, yangtiezhu, jolsa, haoluo,
	sdf, kpsingh, john.fastabend, yonghong.song, song, eddyz87,
	martin.lau, andrii, daniel, ast

Hi Haoran,

On Tue, Jul 8, 2025 at 3:19 PM Haoran Jiang <jianghaoran@kylinos.cn> wrote:
>
> The extra pass of bpf_int_jit_compile() skips JIT context initialization
> which essentially skips offset calculation leaving out_offset = -1,
> the jmp_offset in emit_bpf_tail_call is calculated
> by #define jmp_offset (out_offset - (cur_offset)) is a negative number,
> which does not meet expectations.The final generated assembly as follow.
>

"does not meet expectations" ? Simply "is wrong" ?

The subject line should be something like:
  Fix jump offset calculation in tailcall

It's a fix, not optimization.

Other than that, feel free to add:
Reviewed-by: Hengqi Chen <hengqi.chen@gmail.com>

> 54:     bgeu            $a2, $t1, -8        # 0x0000004c
> 58:     addi.d          $a6, $s5, -1
> 5c:     bltz            $a6, -16            # 0x0000004c
> 60:     alsl.d          $t2, $a2, $a1, 0x3
> 64:     ld.d            $t2, $t2, 264
> 68:     beq             $t2, $zero, -28     # 0x0000004c
>
> Before apply this patch, the follow test case will reveal soft lock issues.
>
> cd tools/testing/selftests/bpf/
> ./test_progs --allow=tailcalls/tailcall_bpf2bpf_1
>
> dmesg:
> watchdog: BUG: soft lockup - CPU#2 stuck for 26s! [test_progs:25056]
>
> Fixes: 5dc615520c4d ("LoongArch: Add BPF JIT support")
> Signed-off-by: Haoran Jiang <jianghaoran@kylinos.cn>
> ---
>  arch/loongarch/net/bpf_jit.c | 21 ++++++---------------
>  1 file changed, 6 insertions(+), 15 deletions(-)
>
> diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
> index fa1500d4aa3e..5061bfc978f2 100644
> --- a/arch/loongarch/net/bpf_jit.c
> +++ b/arch/loongarch/net/bpf_jit.c
> @@ -208,9 +208,7 @@ bool bpf_jit_supports_far_kfunc_call(void)
>         return true;
>  }
>
> -/* initialized on the first pass of build_body() */
> -static int out_offset = -1;
> -static int emit_bpf_tail_call(struct jit_ctx *ctx)
> +static int emit_bpf_tail_call(struct jit_ctx *ctx, int insn)
>  {
>         int off;
>         u8 tcc = tail_call_reg(ctx);
> @@ -220,9 +218,10 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
>         u8 t2 = LOONGARCH_GPR_T2;
>         u8 t3 = LOONGARCH_GPR_T3;
>         const int idx0 = ctx->idx;
> +       int tc_ninsn = 0;
>
>  #define cur_offset (ctx->idx - idx0)
> -#define jmp_offset (out_offset - (cur_offset))
> +#define jmp_offset (tc_ninsn - (cur_offset))
>
>         /*
>          * a0: &ctx
> @@ -232,6 +231,8 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
>          * if (index >= array->map.max_entries)
>          *       goto out;
>          */
> +       tc_ninsn = insn ? ctx->offset[insn+1] - ctx->offset[insn] :
> +               ctx->offset[0];
>         off = offsetof(struct bpf_array, map.max_entries);
>         emit_insn(ctx, ldwu, t1, a1, off);
>         /* bgeu $a2, $t1, jmp_offset */
> @@ -263,15 +264,6 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
>         emit_insn(ctx, ldd, t3, t2, off);
>         __build_epilogue(ctx, true);
>
> -       /* out: */
> -       if (out_offset == -1)
> -               out_offset = cur_offset;
> -       if (cur_offset != out_offset) {
> -               pr_err_once("tail_call out_offset = %d, expected %d!\n",
> -                           cur_offset, out_offset);
> -               return -1;
> -       }
> -
>         return 0;
>
>  toofar:
> @@ -916,7 +908,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
>         /* tail call */
>         case BPF_JMP | BPF_TAIL_CALL:
>                 mark_tail_call(ctx);
> -               if (emit_bpf_tail_call(ctx) < 0)
> +               if (emit_bpf_tail_call(ctx, i) < 0)
>                         return -EINVAL;
>                 break;
>
> @@ -1342,7 +1334,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
>         if (tmp_blinded)
>                 bpf_jit_prog_release_other(prog, prog == orig_prog ? tmp : orig_prog);
>
> -       out_offset = -1;
>
>         return prog;
>
> --
> 2.43.0
>
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2 2/2] LoongArch: BPF: Fix tailcall hierarchy
  2025-07-08  7:18 ` [PATCH v2 2/2] LoongArch: BPF: Fix tailcall hierarchy Haoran Jiang
@ 2025-07-16  2:28   ` Hengqi Chen
  2025-07-22  9:56     ` re:[PATCH " jianghaoran
  0 siblings, 1 reply; 7+ messages in thread
From: Hengqi Chen @ 2025-07-16  2:28 UTC (permalink / raw)
  To: Haoran Jiang
  Cc: loongarch, bpf, kernel, chenhuacai, yangtiezhu, jolsa, haoluo,
	sdf, kpsingh, john.fastabend, yonghong.song, song, eddyz87,
	martin.lau, andrii, daniel, ast

On Tue, Jul 8, 2025 at 3:19 PM Haoran Jiang <jianghaoran@kylinos.cn> wrote:
>
> In specific use cases combining tailcalls and BPF-to-BPF calls,
> MAX_TAIL_CALL_CNT won't work because of missing tail_call_cnt
> back-propagation from callee to caller。This patch fixes this
> tailcall issue caused by abusing the tailcall in bpf2bpf feature
> on LoongArch like the way of "bpf, x64: Fix tailcall hierarchy".
>
> push tail_call_cnt_ptr and tail_call_cnt into the stack,
> tail_call_cnt_ptr is passed between tailcall and bpf2bpf,
> uses tail_call_cnt_ptr to increment tail_call_cnt.
>
> Fixes: bb035ef0cc91 ("LoongArch: BPF: Support mixing bpf2bpf and tailcalls")
> Fixes: 5dc615520c4d ("LoongArch: Add BPF JIT support")
> Signed-off-by: Haoran Jiang <jianghaoran@kylinos.cn>
> ---
>  arch/loongarch/net/bpf_jit.c | 112 +++++++++++++++++++++--------------
>  1 file changed, 68 insertions(+), 44 deletions(-)
>
> diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
> index 5061bfc978f2..45f804b7c556 100644
> --- a/arch/loongarch/net/bpf_jit.c
> +++ b/arch/loongarch/net/bpf_jit.c
> @@ -7,10 +7,9 @@
>  #include "bpf_jit.h"
>
>  #define REG_TCC                LOONGARCH_GPR_A6
> -#define TCC_SAVED      LOONGARCH_GPR_S5
>
> -#define SAVE_RA                BIT(0)
> -#define SAVE_TCC       BIT(1)
> +#define BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack) (round_up(stack, 16) - 80)
> +
>
>  static const int regmap[] = {
>         /* return value from in-kernel function, and exit value for eBPF program */
> @@ -32,32 +31,37 @@ static const int regmap[] = {
>         [BPF_REG_AX] = LOONGARCH_GPR_T0,
>  };
>
> -static void mark_call(struct jit_ctx *ctx)
> +static void prepare_bpf_tail_call_cnt(struct jit_ctx *ctx, int *store_offset)

Consider adding more comments(e.g. pseudocode) for prepare_bpf_tail_call_cnt().
Assembly is hard to read. (At least for me :))

>  {
> -       ctx->flags |= SAVE_RA;
> -}
> +       const struct bpf_prog *prog = ctx->prog;
> +       const bool is_main_prog = !bpf_is_subprog(prog);
>
> -static void mark_tail_call(struct jit_ctx *ctx)
> -{
> -       ctx->flags |= SAVE_TCC;
> -}
> +       if (is_main_prog) {
> +               emit_insn(ctx, addid, LOONGARCH_GPR_T3, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT);
> +               *store_offset -= sizeof(long);
>
> -static bool seen_call(struct jit_ctx *ctx)
> -{
> -       return (ctx->flags & SAVE_RA);
> -}
> +               emit_tailcall_jmp(ctx, BPF_JGT, REG_TCC, LOONGARCH_GPR_T3, 4);

Why emit_tailcall_jmp() here ? Shouldn't this be emit_cond_jmp() ?

>
> -static bool seen_tail_call(struct jit_ctx *ctx)
> -{
> -       return (ctx->flags & SAVE_TCC);
> -}
> +               /* If REG_TCC < MAX_TAIL_CALL_CNT, push REG_TCC into stack */
> +               emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
>
> -static u8 tail_call_reg(struct jit_ctx *ctx)
> -{
> -       if (seen_call(ctx))
> -               return TCC_SAVED;
> +               /* Calculate the pointer to REG_TCC in the stack and assign it to REG_TCC */
> +               emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
> +
> +               emit_uncond_jmp(ctx, 2);
> +
> +               emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
>
> -       return REG_TCC;
> +               *store_offset -= sizeof(long);
> +               emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
> +
> +       } else {
> +               *store_offset -= sizeof(long);
> +               emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
> +
> +               *store_offset -= sizeof(long);
> +               emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
> +       }
>  }
>
>  /*
> @@ -80,6 +84,10 @@ static u8 tail_call_reg(struct jit_ctx *ctx)
>   *                            |           $s4           |
>   *                            +-------------------------+
>   *                            |           $s5           |
> + *                            +-------------------------+
> + *                            |          reg_tcc        |
> + *                            +-------------------------+
> + *                            |          reg_tcc_ptr    |
>   *                            +-------------------------+ <--BPF_REG_FP
>   *                            |  prog->aux->stack_depth |
>   *                            |        (optional)       |
> @@ -89,21 +97,24 @@ static u8 tail_call_reg(struct jit_ctx *ctx)
>  static void build_prologue(struct jit_ctx *ctx)
>  {
>         int stack_adjust = 0, store_offset, bpf_stack_adjust;
> +       const struct bpf_prog *prog = ctx->prog;
> +       const bool is_main_prog = !bpf_is_subprog(prog);
>
>         bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
>
> -       /* To store ra, fp, s0, s1, s2, s3, s4 and s5. */
> -       stack_adjust += sizeof(long) * 8;
> +       /* To store ra, fp, s0, s1, s2, s3, s4, s5, reg_tcc and reg_tcc_ptr */
> +       stack_adjust += sizeof(long) * 10;
>
>         stack_adjust = round_up(stack_adjust, 16);
>         stack_adjust += bpf_stack_adjust;
>
>         /*
> -        * First instruction initializes the tail call count (TCC).
> -        * On tail call we skip this instruction, and the TCC is
> +        * First instruction initializes the tail call count (TCC) register
> +        * to zero. On tail call we skip this instruction, and the TCC is
>          * passed in REG_TCC from the caller.
>          */
> -       emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT);
> +       if (is_main_prog)
> +               emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_ZERO, 0);
>
>         emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_adjust);
>
> @@ -131,20 +142,13 @@ static void build_prologue(struct jit_ctx *ctx)
>         store_offset -= sizeof(long);
>         emit_insn(ctx, std, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, store_offset);
>
> +       prepare_bpf_tail_call_cnt(ctx, &store_offset);
> +
>         emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_adjust);
>
>         if (bpf_stack_adjust)
>                 emit_insn(ctx, addid, regmap[BPF_REG_FP], LOONGARCH_GPR_SP, bpf_stack_adjust);
>
> -       /*
> -        * Program contains calls and tail calls, so REG_TCC need
> -        * to be saved across calls.
> -        */
> -       if (seen_tail_call(ctx) && seen_call(ctx))
> -               move_reg(ctx, TCC_SAVED, REG_TCC);
> -       else
> -               emit_insn(ctx, nop);
> -
>         ctx->stack_size = stack_adjust;
>  }
>
> @@ -177,6 +181,17 @@ static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call)
>         load_offset -= sizeof(long);
>         emit_insn(ctx, ldd, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, load_offset);
>
> +       /*
> +        *  When push into the stack, follow the order of tcc then tcc_ptr.
> +        *  When pop from the stack, first pop tcc_ptr followed by tcc
> +        */
> +       load_offset -= 2*sizeof(long);
> +       emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, load_offset);
> +
> +       /* pop tcc_ptr to REG_TCC */
> +       load_offset += sizeof(long);
> +       emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, load_offset);
> +
>         emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_adjust);
>
>         if (!is_tail_call) {
> @@ -211,7 +226,7 @@ bool bpf_jit_supports_far_kfunc_call(void)
>  static int emit_bpf_tail_call(struct jit_ctx *ctx, int insn)
>  {
>         int off;
> -       u8 tcc = tail_call_reg(ctx);
> +       int tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(ctx->stack_size);
>         u8 a1 = LOONGARCH_GPR_A1;
>         u8 a2 = LOONGARCH_GPR_A2;
>         u8 t1 = LOONGARCH_GPR_T1;
> @@ -240,11 +255,15 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx, int insn)
>                 goto toofar;
>
>         /*
> -        * if (--TCC < 0)
> -        *       goto out;
> +        * if ((*tcc_ptr)++ >= MAX_TAIL_CALL_CNT)
> +        *      goto out;
>          */
> -       emit_insn(ctx, addid, REG_TCC, tcc, -1);
> -       if (emit_tailcall_jmp(ctx, BPF_JSLT, REG_TCC, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
> +       emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, tcc_ptr_off);
> +       emit_insn(ctx, ldd, t3, REG_TCC, 0);
> +       emit_insn(ctx, addid, t3, t3, 1);
> +       emit_insn(ctx, std, t3, REG_TCC, 0);
> +       emit_insn(ctx, addid, t2, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT);
> +       if (emit_tailcall_jmp(ctx, BPF_JSGT, t3, t2, jmp_offset) < 0)
>                 goto toofar;
>
>         /*
> @@ -465,6 +484,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
>         const s16 off = insn->off;
>         const s32 imm = insn->imm;
>         const bool is32 = BPF_CLASS(insn->code) == BPF_ALU || BPF_CLASS(insn->code) == BPF_JMP32;
> +       int tcc_ptr_off;
>
>         switch (code) {
>         /* dst = src */
> @@ -891,12 +911,17 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
>
>         /* function call */
>         case BPF_JMP | BPF_CALL:
> -               mark_call(ctx);
>                 ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
>                                             &func_addr, &func_addr_fixed);
>                 if (ret < 0)
>                         return ret;
>
> +               if (insn->src_reg == BPF_PSEUDO_CALL) {
> +                       tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(ctx->stack_size);
> +                       emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, tcc_ptr_off);
> +               }
> +
> +
>                 move_addr(ctx, t1, func_addr);
>                 emit_insn(ctx, jirl, LOONGARCH_GPR_RA, t1, 0);
>
> @@ -907,7 +932,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
>
>         /* tail call */
>         case BPF_JMP | BPF_TAIL_CALL:
> -               mark_tail_call(ctx);
>                 if (emit_bpf_tail_call(ctx, i) < 0)
>                         return -EINVAL;
>                 break;
> --
> 2.43.0
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* re:[PATCH v2 1/2] LoongArch: BPF: Optimize the calculation method of jmp_offset in the emit_bpf_tail_call function
  2025-07-16  1:31   ` Hengqi Chen
@ 2025-07-22  9:34     ` jianghaoran
  0 siblings, 0 replies; 7+ messages in thread
From: jianghaoran @ 2025-07-22  9:34 UTC (permalink / raw)
  To: Hengqi Chen
  Cc: loongarch, bpf, kernel, chenhuacai, yangtiezhu, jolsa, haoluo,
	sdf, kpsingh, john.fastabend, yonghong.song, song, eddyz87,
	martin.lau, andrii, daniel, ast





在 2025-07-16星期三的 09:31 +0800,Hengqi Chen写道:
> Hi Haoran,
> 
> On Tue, Jul 8, 2025 at 3:19 PM Haoran Jiang <
jianghaoran@kylinos.cn> > wrote:
> > 
> > The extra pass of bpf_int_jit_compile() skips JIT context initialization
> > which essentially skips offset calculation leaving out_offset = -1,
> > the jmp_offset in emit_bpf_tail_call is calculated
> > by #define jmp_offset (out_offset - (cur_offset)) is a negative number,
> > which does not meet expectations.The final generated assembly as follow.
> > 
> 
> "does not meet expectations" ? Simply "is wrong" ?
> 
> The subject line should be something like:
>   Fix jump offset calculation in tailcall
> 
> It's a fix, not optimization.
> 
> Other than that, feel free to add:
> Reviewed-by: Hengqi Chen <
hengqi.chen@gmail.com> >
I will make the modification in the next version.

> > 54:     bgeu            $a2, $t1, -8        # 0x0000004c
> > 58:     addi.d          $a6, $s5, -1
> > 5c:     bltz            $a6, -16            # 0x0000004c
> > 60:     alsl.d          $t2, $a2, $a1, 0x3
> > 64:     ld.d            $t2, $t2, 264
> > 68:     beq             $t2, $zero, -28     # 0x0000004c
> > 
> > Before apply this patch, the follow test case will reveal soft
> > lock issues.
> > 
> > cd tools/testing/selftests/bpf/
> > ./test_progs --allow=tailcalls/tailcall_bpf2bpf_1
> > 
> > dmesg:
> > watchdog: BUG: soft lockup - CPU#2 stuck for 26s!
> > [test_progs:25056]
> > 
> > Fixes: 5dc615520c4d ("LoongArch: Add BPF JIT support")
> > Signed-off-by: Haoran Jiang <
> > jianghaoran@kylinos.cn
> > >
> > ---
> >  arch/loongarch/net/bpf_jit.c | 21 ++++++---------------
> >  1 file changed, 6 insertions(+), 15 deletions(-)
> > 
> > diff --git a/arch/loongarch/net/bpf_jit.c
> > b/arch/loongarch/net/bpf_jit.c
> > index fa1500d4aa3e..5061bfc978f2 100644
> > --- a/arch/loongarch/net/bpf_jit.c
> > +++ b/arch/loongarch/net/bpf_jit.c
> > @@ -208,9 +208,7 @@ bool bpf_jit_supports_far_kfunc_call(void)
> >         return true;
> >  }
> > 
> > -/* initialized on the first pass of build_body() */
> > -static int out_offset = -1;
> > -static int emit_bpf_tail_call(struct jit_ctx *ctx)
> > +static int emit_bpf_tail_call(struct jit_ctx *ctx, int insn)
> >  {
> >         int off;
> >         u8 tcc = tail_call_reg(ctx);
> > @@ -220,9 +218,10 @@ static int emit_bpf_tail_call(struct
> > jit_ctx *ctx)
> >         u8 t2 = LOONGARCH_GPR_T2;
> >         u8 t3 = LOONGARCH_GPR_T3;
> >         const int idx0 = ctx->idx;
> > +       int tc_ninsn = 0;
> > 
> >  #define cur_offset (ctx->idx - idx0)
> > -#define jmp_offset (out_offset - (cur_offset))
> > +#define jmp_offset (tc_ninsn - (cur_offset))
> > 
> >         /*
> >          * a0: &ctx
> > @@ -232,6 +231,8 @@ static int emit_bpf_tail_call(struct
> > jit_ctx *ctx)
> >          * if (index >= array->map.max_entries)
> >          *       goto out;
> >          */
> > +       tc_ninsn = insn ? ctx->offset[insn+1] - ctx-
> > >offset[insn] :
> > +               ctx->offset[0];
> >         off = offsetof(struct bpf_array, map.max_entries);
> >         emit_insn(ctx, ldwu, t1, a1, off);
> >         /* bgeu $a2, $t1, jmp_offset */
> > @@ -263,15 +264,6 @@ static int emit_bpf_tail_call(struct
> > jit_ctx *ctx)
> >         emit_insn(ctx, ldd, t3, t2, off);
> >         __build_epilogue(ctx, true);
> > 
> > -       /* out: */
> > -       if (out_offset == -1)
> > -               out_offset = cur_offset;
> > -       if (cur_offset != out_offset) {
> > -               pr_err_once("tail_call out_offset = %d,
> > expected %d!\n",
> > -                           cur_offset, out_offset);
> > -               return -1;
> > -       }
> > -
> >         return 0;
> > 
> >  toofar:
> > @@ -916,7 +908,7 @@ static int build_insn(const struct bpf_insn
> > *insn, struct jit_ctx *ctx, bool ext
> >         /* tail call */
> >         case BPF_JMP | BPF_TAIL_CALL:
> >                 mark_tail_call(ctx);
> > -               if (emit_bpf_tail_call(ctx) < 0)
> > +               if (emit_bpf_tail_call(ctx, i) < 0)
> >                         return -EINVAL;
> >                 break;
> > 
> > @@ -1342,7 +1334,6 @@ struct bpf_prog
> > *bpf_int_jit_compile(struct bpf_prog *prog)
> >         if (tmp_blinded)
> >                 bpf_jit_prog_release_other(prog, prog ==
> > orig_prog ? tmp : orig_prog);
> > 
> > -       out_offset = -1;
> > 
> >         return prog;
> > 
> > --
> > 2.43.0
> > 
> > 


^ permalink raw reply	[flat|nested] 7+ messages in thread

* re:[PATCH v2 2/2] LoongArch: BPF: Fix tailcall hierarchy
  2025-07-16  2:28   ` Hengqi Chen
@ 2025-07-22  9:56     ` jianghaoran
  0 siblings, 0 replies; 7+ messages in thread
From: jianghaoran @ 2025-07-22  9:56 UTC (permalink / raw)
  To: Hengqi Chen
  Cc: loongarch, bpf, kernel, chenhuacai, yangtiezhu, jolsa, haoluo,
	sdf, kpsingh, john.fastabend, yonghong.song, song, eddyz87,
	martin.lau, andrii, daniel, ast





在 2025-07-16星期三的 10:28 +0800,Hengqi Chen写道:
> On Tue, Jul 8, 2025 at 3:19 PM Haoran Jiang <
> jianghaoran@kylinos.cn
> > wrote:
> > In specific use cases combining tailcalls and BPF-to-BPF calls,
> > MAX_TAIL_CALL_CNT won't work because of missing tail_call_cnt
> > back-propagation from callee to caller。This patch fixes this
> > tailcall issue caused by abusing the tailcall in bpf2bpf
> > feature
> > on LoongArch like the way of "bpf, x64: Fix tailcall
> > hierarchy".
> > 
> > push tail_call_cnt_ptr and tail_call_cnt into the stack,
> > tail_call_cnt_ptr is passed between tailcall and bpf2bpf,
> > uses tail_call_cnt_ptr to increment tail_call_cnt.
> > 
> > Fixes: bb035ef0cc91 ("LoongArch: BPF: Support mixing bpf2bpf
> > and tailcalls")
> > Fixes: 5dc615520c4d ("LoongArch: Add BPF JIT support")
> > Signed-off-by: Haoran Jiang <
> > jianghaoran@kylinos.cn
> > >
> > ---
> >  arch/loongarch/net/bpf_jit.c | 112 +++++++++++++++++++++----
> > ----------
> >  1 file changed, 68 insertions(+), 44 deletions(-)
> > 
> > diff --git a/arch/loongarch/net/bpf_jit.c
> > b/arch/loongarch/net/bpf_jit.c
> > index 5061bfc978f2..45f804b7c556 100644
> > --- a/arch/loongarch/net/bpf_jit.c
> > +++ b/arch/loongarch/net/bpf_jit.c
> > @@ -7,10 +7,9 @@
> >  #include "bpf_jit.h"
> > 
> >  #define REG_TCC                LOONGARCH_GPR_A6
> > -#define TCC_SAVED      LOONGARCH_GPR_S5
> > 
> > -#define SAVE_RA                BIT(0)
> > -#define SAVE_TCC       BIT(1)
> > +#define BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack)
> > (round_up(stack, 16) - 80)
> > +
> > 
> >  static const int regmap[] = {
> >         /* return value from in-kernel function, and exit value
> > for eBPF program */
> > @@ -32,32 +31,37 @@ static const int regmap[] = {
> >         [BPF_REG_AX] = LOONGARCH_GPR_T0,
> >  };
> > 
> > -static void mark_call(struct jit_ctx *ctx)
> > +static void prepare_bpf_tail_call_cnt(struct jit_ctx *ctx, int
> > *store_offset)
> 
> Consider adding more comments(e.g. pseudocode) for
> prepare_bpf_tail_call_cnt().
> Assembly is hard to read. (At least for me :))
> 

I will improve the comment information in the next version.\x01\b
> 
> >  {
> > -       ctx->flags |= SAVE_RA;
> > -}
> > +       const struct bpf_prog *prog = ctx->prog;
> > +       const bool is_main_prog = !bpf_is_subprog(prog);
> > 
> > -static void mark_tail_call(struct jit_ctx *ctx)
> > -{
> > -       ctx->flags |= SAVE_TCC;
> > -}
> > +       if (is_main_prog) {
> > +               emit_insn(ctx, addid, LOONGARCH_GPR_T3, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT);
> > +               *store_offset -= sizeof(long);
> > 
> > -static bool seen_call(struct jit_ctx *ctx)
> > -{
> > -       return (ctx->flags & SAVE_RA);
> > -}
> > +               emit_tailcall_jmp(ctx, BPF_JGT, REG_TCC, LOONGARCH_GPR_T3, 4);
> 
> Why emit_tailcall_jmp() here ? Shouldn't this be emit_cond_jmp() ?

It's more appropriate to use emit_cond_jmp here.
> 
> > 
> > -static bool seen_tail_call(struct jit_ctx *ctx)
> > -{
> > -       return (ctx->flags & SAVE_TCC);
> > -}
> > +               /* If REG_TCC < MAX_TAIL_CALL_CNT, push REG_TCC into stack */
> > +               emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
> > 
> > -static u8 tail_call_reg(struct jit_ctx *ctx)
> > -{
> > -       if (seen_call(ctx))
> > -               return TCC_SAVED;
> > +               /* Calculate the pointer to REG_TCC in the stack and assign it to REG_TCC */
> > +               emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
> > +
> > +               emit_uncond_jmp(ctx, 2);
> > +
> > +               emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
> > 
> > -       return REG_TCC;
> > +               *store_offset -= sizeof(long);
> > +               emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
> > +
> > +       } else {
> > +               *store_offset -= sizeof(long);
> > +               emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
> > +
> > +               *store_offset -= sizeof(long);
> > +               emit_insn(ctx, std, REG_TCC, LOONGARCH_GPR_SP, *store_offset);
> > +       }
> >  }
> > 
> >  /*
> > @@ -80,6 +84,10 @@ static u8 tail_call_reg(struct jit_ctx *ctx)
> >   *                            |           $s4           |
> >   *                            +-------------------------+
> >   *                            |           $s5           |
> > + *                            +-------------------------+
> > + *                            |          reg_tcc        |
> > + *                            +-------------------------+
> > + *                            |          reg_tcc_ptr    |
> >   *                            +-------------------------+ <--BPF_REG_FP
> >   *                            |  prog->aux->stack_depth |
> >   *                            |        (optional)       |
> > @@ -89,21 +97,24 @@ static u8 tail_call_reg(struct jit_ctx *ctx)
> >  static void build_prologue(struct jit_ctx *ctx)
> >  {
> >         int stack_adjust = 0, store_offset, bpf_stack_adjust;
> > +       const struct bpf_prog *prog = ctx->prog;
> > +       const bool is_main_prog = !bpf_is_subprog(prog);
> > 
> >         bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
> > 
> > -       /* To store ra, fp, s0, s1, s2, s3, s4 and s5. */
> > -       stack_adjust += sizeof(long) * 8;
> > +       /* To store ra, fp, s0, s1, s2, s3, s4, s5, reg_tcc and reg_tcc_ptr */
> > +       stack_adjust += sizeof(long) * 10;
> > 
> >         stack_adjust = round_up(stack_adjust, 16);
> >         stack_adjust += bpf_stack_adjust;
> > 
> >         /*
> > -        * First instruction initializes the tail call count (TCC).
> > -        * On tail call we skip this instruction, and the TCC is
> > +        * First instruction initializes the tail call count (TCC) register
> > +        * to zero. On tail call we skip this instruction, and the TCC is
> >          * passed in REG_TCC from the caller.
> >          */
> > -       emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT);
> > +       if (is_main_prog)
> > +               emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_ZERO, 0);
> > 
> >         emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_adjust);
> > 
> > @@ -131,20 +142,13 @@ static void build_prologue(struct jit_ctx *ctx)
> >         store_offset -= sizeof(long);
> >         emit_insn(ctx, std, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, store_offset);
> > 
> > +       prepare_bpf_tail_call_cnt(ctx, &store_offset);
> > +
> >         emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_adjust);
> > 
> >         if (bpf_stack_adjust)
> >                 emit_insn(ctx, addid, regmap[BPF_REG_FP], LOONGARCH_GPR_SP, bpf_stack_adjust);
> > 
> > -       /*
> > -        * Program contains calls and tail calls, so REG_TCC need
> > -        * to be saved across calls.
> > -        */
> > -       if (seen_tail_call(ctx) && seen_call(ctx))
> > -               move_reg(ctx, TCC_SAVED, REG_TCC);
> > -       else
> > -               emit_insn(ctx, nop);
> > -
> >         ctx->stack_size = stack_adjust;
> >  }
> > 
> > @@ -177,6 +181,17 @@ static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call)
> >         load_offset -= sizeof(long);
> >         emit_insn(ctx, ldd, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, load_offset);
> > 
> > +       /*
> > +        *  When push into the stack, follow the order of tcc then tcc_ptr.
> > +        *  When pop from the stack, first pop tcc_ptr followed by tcc
> > +        */
> > +       load_offset -= 2*sizeof(long);
> > +       emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, load_offset);
> > +
> > +       /* pop tcc_ptr to REG_TCC */
> > +       load_offset += sizeof(long);
> > +       emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, load_offset);
> > +
> >         emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_adjust);
> > 
> >         if (!is_tail_call) {
> > @@ -211,7 +226,7 @@ bool bpf_jit_supports_far_kfunc_call(void)
> >  static int emit_bpf_tail_call(struct jit_ctx *ctx, int insn)
> >  {
> >         int off;
> > -       u8 tcc = tail_call_reg(ctx);
> > +       int tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(ctx->stack_size);
> >         u8 a1 = LOONGARCH_GPR_A1;
> >         u8 a2 = LOONGARCH_GPR_A2;
> >         u8 t1 = LOONGARCH_GPR_T1;
> > @@ -240,11 +255,15 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx, int insn)
> >                 goto toofar;
> > 
> >         /*
> > -        * if (--TCC < 0)
> > -        *       goto out;
> > +        * if ((*tcc_ptr)++ >= MAX_TAIL_CALL_CNT)
> > +        *      goto out;
> >          */
> > -       emit_insn(ctx, addid, REG_TCC, tcc, -1);
> > -       if (emit_tailcall_jmp(ctx, BPF_JSLT, REG_TCC, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
> > +       emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, tcc_ptr_off);
> > +       emit_insn(ctx, ldd, t3, REG_TCC, 0);
> > +       emit_insn(ctx, addid, t3, t3, 1);
> > +       emit_insn(ctx, std, t3, REG_TCC, 0);
> > +       emit_insn(ctx, addid, t2, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT);
> > +       if (emit_tailcall_jmp(ctx, BPF_JSGT, t3, t2, jmp_offset) < 0)
> >                 goto toofar;
> > 
> >         /*
> > @@ -465,6 +484,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
> >         const s16 off = insn->off;
> >         const s32 imm = insn->imm;
> >         const bool is32 = BPF_CLASS(insn->code) == BPF_ALU || BPF_CLASS(insn->code) == BPF_JMP32;
> > +       int tcc_ptr_off;
> > 
> >         switch (code) {
> >         /* dst = src */
> > @@ -891,12 +911,17 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
> > 
> >         /* function call */
> >         case BPF_JMP | BPF_CALL:
> > -               mark_call(ctx);
> >                 ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
> >                                             &func_addr, &func_addr_fixed);
> >                 if (ret < 0)
> >                         return ret;
> > 
> > +               if (insn->src_reg == BPF_PSEUDO_CALL) {
> > +                       tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(ctx->stack_size);
> > +                       emit_insn(ctx, ldd, REG_TCC, LOONGARCH_GPR_SP, tcc_ptr_off);
> > +               }
> > +
> > +
> >                 move_addr(ctx, t1, func_addr);
> >                 emit_insn(ctx, jirl, LOONGARCH_GPR_RA, t1, 0);
> > 
> > @@ -907,7 +932,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
> > 
> >         /* tail call */
> >         case BPF_JMP | BPF_TAIL_CALL:
> > -               mark_tail_call(ctx);
> >                 if (emit_bpf_tail_call(ctx, i) < 0)
> >                         return -EINVAL;
> >                 break;
> > --
> > 2.43.0
> > 


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2025-07-22  9:56 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-07-08  7:18 [PATCH v2 0/2] Fix two tailcall-related issues Haoran Jiang
2025-07-08  7:18 ` [PATCH v2 1/2] LoongArch: BPF: Optimize the calculation method of jmp_offset in the emit_bpf_tail_call function Haoran Jiang
2025-07-16  1:31   ` Hengqi Chen
2025-07-22  9:34     ` re:[PATCH " jianghaoran
2025-07-08  7:18 ` [PATCH v2 2/2] LoongArch: BPF: Fix tailcall hierarchy Haoran Jiang
2025-07-16  2:28   ` Hengqi Chen
2025-07-22  9:56     ` re:[PATCH " jianghaoran

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).