* [PATCH bpf-next v4 1/5] bpf, verifier: Correct tail_call_reachable when no tailcall in subprog
2024-05-09 15:05 [PATCH bpf-next v4 0/5] bpf: Fix tailcall hierarchy Leon Hwang
@ 2024-05-09 15:05 ` Leon Hwang
2024-05-09 15:05 ` [PATCH bpf-next v4 2/5] bpf: Introduce bpf_jit_supports_tail_call_cnt_ptr() Leon Hwang
` (3 subsequent siblings)
4 siblings, 0 replies; 9+ messages in thread
From: Leon Hwang @ 2024-05-09 15:05 UTC (permalink / raw)
To: bpf
Cc: ast, daniel, andrii, maciej.fijalkowski, jakub, pulehui,
hffilwlqm, kernel-patches-bot
When there is tailcall in main prog but there is no tailcall in its
subprogs, prog->aux->tail_call_reachable is incorrect for this case.
In order to correct it, it has to check subprog[0].has_tail_call at the
time when to check subprog[0].tail_call_reachable in
check_max_stack_depth_subprog().
It's for fixing a tailcall issue whose patch relies on
prog->aux->tail_call_reachable.
Signed-off-by: Leon Hwang <hffilwlqm@gmail.com>
---
kernel/bpf/verifier.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9e3aba08984e8..f874ee4b24486 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6000,7 +6000,7 @@ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx)
}
subprog[ret_prog[j]].tail_call_reachable = true;
}
- if (subprog[0].tail_call_reachable)
+ if (subprog[0].tail_call_reachable || subprog[0].has_tail_call)
env->prog->aux->tail_call_reachable = true;
/* end of for() loop means the last insn of the 'subprog'
--
2.44.0
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH bpf-next v4 2/5] bpf: Introduce bpf_jit_supports_tail_call_cnt_ptr()
2024-05-09 15:05 [PATCH bpf-next v4 0/5] bpf: Fix tailcall hierarchy Leon Hwang
2024-05-09 15:05 ` [PATCH bpf-next v4 1/5] bpf, verifier: Correct tail_call_reachable when no tailcall in subprog Leon Hwang
@ 2024-05-09 15:05 ` Leon Hwang
2024-05-09 15:05 ` [PATCH bpf-next v4 3/5] bpf, x64: Fix tailcall hierarchy Leon Hwang
` (2 subsequent siblings)
4 siblings, 0 replies; 9+ messages in thread
From: Leon Hwang @ 2024-05-09 15:05 UTC (permalink / raw)
To: bpf
Cc: ast, daniel, andrii, maciej.fijalkowski, jakub, pulehui,
hffilwlqm, kernel-patches-bot
In order to store tail_call_cnt on the stack of bpf prog's caller,
introduce bpf_tail_call_run_ctx as a new ctx for bpf prog, which
wraps the original ctx and tail_call_cnt pointer.
To avoid breaking run time, introduce use_tail_call_run_ctx in
prog->aux in order to determine whether to use bpf_tail_call_run_ctx
before calling bpf prog. This flag will be set when
prog->aux->tail_call_reachable and the prog is jited and the arch
supports bpf_jit_supports_tail_call_cnt_ptr() at load time. Thereafter,
the prog's prologue has to cache tail_call_cnt_ptr, and retore the
original ctx meanwhile.
Signed-off-by: Leon Hwang <hffilwlqm@gmail.com>
---
include/linux/bpf.h | 6 ++++++
include/linux/filter.h | 13 ++++++++++---
kernel/bpf/core.c | 19 +++++++++++++++++++
3 files changed, 35 insertions(+), 3 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 90094400cc63d..95888700966f7 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1466,6 +1466,7 @@ struct bpf_prog_aux {
bool attach_tracing_prog; /* true if tracing another tracing program */
bool func_proto_unreliable;
bool tail_call_reachable;
+ bool use_tail_call_run_ctx;
bool xdp_has_frags;
bool exception_cb;
bool exception_boundary;
@@ -2047,6 +2048,11 @@ struct bpf_trace_run_ctx {
bool is_uprobe;
};
+struct bpf_tail_call_run_ctx {
+ const void *ctx;
+ u32 *tail_call_cnt_ptr;
+};
+
struct bpf_tramp_run_ctx {
struct bpf_run_ctx run_ctx;
u64 bpf_cookie;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 7a27f19bf44d0..f8e9d5e3da11f 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -671,7 +671,13 @@ static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog,
const void *ctx,
bpf_dispatcher_fn dfunc)
{
- u32 ret;
+ struct bpf_tail_call_run_ctx tail_call_run_ctx = {};
+ u32 ret, tail_call_cnt = 0;
+ const void *run_ctx;
+
+ tail_call_run_ctx.ctx = ctx;
+ tail_call_run_ctx.tail_call_cnt_ptr = &tail_call_cnt;
+ run_ctx = prog->aux->use_tail_call_run_ctx ? &tail_call_run_ctx : ctx;
cant_migrate();
if (static_branch_unlikely(&bpf_stats_enabled_key)) {
@@ -679,7 +685,7 @@ static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog,
u64 duration, start = sched_clock();
unsigned long flags;
- ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
+ ret = dfunc(run_ctx, prog->insnsi, prog->bpf_func);
duration = sched_clock() - start;
stats = this_cpu_ptr(prog->stats);
@@ -688,7 +694,7 @@ static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog,
u64_stats_add(&stats->nsecs, duration);
u64_stats_update_end_irqrestore(&stats->syncp, flags);
} else {
- ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
+ ret = dfunc(run_ctx, prog->insnsi, prog->bpf_func);
}
return ret;
}
@@ -994,6 +1000,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog);
void bpf_jit_compile(struct bpf_prog *prog);
bool bpf_jit_needs_zext(void);
bool bpf_jit_supports_subprog_tailcalls(void);
+bool bpf_jit_supports_tail_call_cnt_ptr(void);
bool bpf_jit_supports_percpu_insn(void);
bool bpf_jit_supports_kfunc_call(void);
bool bpf_jit_supports_far_kfunc_call(void);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 99b8b1c9a248c..3fad4d973b820 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2358,6 +2358,13 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
return ret;
}
+static void bpf_check_tail_call_run_ctx(struct bpf_prog *fp)
+{
+ if (fp->aux->tail_call_reachable && fp->jited &&
+ bpf_jit_supports_tail_call_cnt_ptr())
+ fp->aux->use_tail_call_run_ctx = true;
+}
+
static void bpf_prog_select_func(struct bpf_prog *fp)
{
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
@@ -2430,6 +2437,10 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
* all eBPF JITs might immediately support all features.
*/
*err = bpf_check_tail_call(fp);
+ if (*err)
+ return fp;
+
+ bpf_check_tail_call_run_ctx(fp);
return fp;
}
@@ -2941,6 +2952,14 @@ bool __weak bpf_jit_needs_zext(void)
return false;
}
+/* Return TRUE if the JIT backend supports tail call count pointer in tailcall
+ * context.
+ */
+bool __weak bpf_jit_supports_tail_call_cnt_ptr(void)
+{
+ return false;
+}
+
/* Return TRUE if the JIT backend supports mixing bpf2bpf and tailcalls. */
bool __weak bpf_jit_supports_subprog_tailcalls(void)
{
--
2.44.0
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH bpf-next v4 3/5] bpf, x64: Fix tailcall hierarchy
2024-05-09 15:05 [PATCH bpf-next v4 0/5] bpf: Fix tailcall hierarchy Leon Hwang
2024-05-09 15:05 ` [PATCH bpf-next v4 1/5] bpf, verifier: Correct tail_call_reachable when no tailcall in subprog Leon Hwang
2024-05-09 15:05 ` [PATCH bpf-next v4 2/5] bpf: Introduce bpf_jit_supports_tail_call_cnt_ptr() Leon Hwang
@ 2024-05-09 15:05 ` Leon Hwang
2024-05-16 15:28 ` Leon Hwang
2024-05-09 15:05 ` [PATCH bpf-next v4 4/5] bpf, arm64: " Leon Hwang
2024-05-09 15:05 ` [PATCH bpf-next v4 5/5] selftests/bpf: Add testcases for tailcall hierarchy fixing Leon Hwang
4 siblings, 1 reply; 9+ messages in thread
From: Leon Hwang @ 2024-05-09 15:05 UTC (permalink / raw)
To: bpf
Cc: ast, daniel, andrii, maciej.fijalkowski, jakub, pulehui,
hffilwlqm, kernel-patches-bot
This patch fixes a tailcall issue caused by abusing the tailcall in
bpf2bpf feature.
As we know, tail_call_cnt propagates by rax from caller to callee when
to call subprog in tailcall context. But, like the following example,
MAX_TAIL_CALL_CNT won't work because of missing tail_call_cnt
back-propagation from callee to caller.
\#include <linux/bpf.h>
\#include <bpf/bpf_helpers.h>
\#include "bpf_legacy.h"
struct {
__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
__uint(max_entries, 1);
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");
int count = 0;
static __noinline
int subprog_tail1(struct __sk_buff *skb)
{
bpf_tail_call_static(skb, &jmp_table, 0);
return 0;
}
static __noinline
int subprog_tail2(struct __sk_buff *skb)
{
bpf_tail_call_static(skb, &jmp_table, 0);
return 0;
}
SEC("tc")
int entry(struct __sk_buff *skb)
{
volatile int ret = 1;
count++;
subprog_tail1(skb);
subprog_tail2(skb);
return ret;
}
char __license[] SEC("license") = "GPL";
At run time, the tail_call_cnt in entry() will be propagated to
subprog_tail1() and subprog_tail2(). But, when the tail_call_cnt in
subprog_tail1() updates when bpf_tail_call_static(), the tail_call_cnt
in entry() won't be updated at the same time. As a result, in entry(),
when tail_call_cnt in entry() is less than MAX_TAIL_CALL_CNT and
subprog_tail1() returns because of MAX_TAIL_CALL_CNT limit,
bpf_tail_call_static() in suprog_tail2() is able to run because the
tail_call_cnt in subprog_tail2() propagated from entry() is less than
MAX_TAIL_CALL_CNT.
So, how many tailcalls are there for this case if no error happens?
From top-down view, does it look like hierarchy layer and layer?
With view, there will be 2+4+8+...+2^33 = 2^34 - 2 = 17,179,869,182
tailcalls for this case.
How about there are N subprog_tail() in entry()? There will be almost
N^34 tailcalls.
Then, in this patch, it resolves this case on x86_64.
In stead of propagating tail_call_cnt from caller to callee, it
propagate its pointer, tail_call_cnt_ptr, tcc_ptr for short.
However, where does it store tail_call_cnt?
It stores tail_call_cnt on the stack of bpf prog's caller by the way in
previous patch "bpf: Introduce bpf_jit_supports_tail_call_cnt_ptr()".
Then, in bpf prog's prologue, it loads tcc_ptr from bpf_tail_call_run_ctx,
and restores the original ctx from bpf_tail_call_run_ctx meanwhile.
Then, when a tailcall runs, it compares tail_call_cnt accessed by
tcc_ptr with MAX_TAIL_CALL_CNT and then increments tail_call_cnt at
tcc_ptr.
Furthermore, when trampoline is the caller of bpf prog, it is required
to prepare tail_call_cnt and tail call run ctx on the stack of the
trampoline.
Finally, enable bpf_jit_supports_tail_call_cnt_ptr() to use
bpf_tail_call_run_ctx in __bpf_prog_run().
Fixes: ebf7d1f508a7 ("bpf, x64: rework pro/epilogue and tailcall handling in JIT")
Fixes: e411901c0b77 ("bpf: allow for tailcalls in BPF subprograms for x64 JIT")
Signed-off-by: Leon Hwang <hffilwlqm@gmail.com>
---
arch/x86/net/bpf_jit_comp.c | 101 ++++++++++++++++++++++++------------
include/linux/bpf.h | 2 +
2 files changed, 70 insertions(+), 33 deletions(-)
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index ff217cc35ce92..43dc628e66222 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -273,7 +273,7 @@ struct jit_context {
/* Number of bytes emit_patch() needs to generate instructions */
#define X86_PATCH_SIZE 5
/* Number of bytes that will be skipped on tailcall */
-#define X86_TAIL_CALL_OFFSET (11 + ENDBR_INSN_SIZE)
+#define X86_TAIL_CALL_OFFSET (16 + ENDBR_INSN_SIZE)
static void push_r12(u8 **pprog)
{
@@ -420,14 +420,17 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
*/
emit_nops(&prog, X86_PATCH_SIZE);
if (!ebpf_from_cbpf) {
- if (tail_call_reachable && !is_subprog)
- /* When it's the entry of the whole tailcall context,
- * zeroing rax means initialising tail_call_cnt.
- */
- EMIT2(0x31, 0xC0); /* xor eax, eax */
- else
+ if (tail_call_reachable && !is_subprog) {
+ /* Store tcc_ptr to rax. */
+ /* mov rax, qword ptr [rdi + 8] */
+ EMIT4(0x48, 0x8B, 0x47, 0x08);
+ /* Restore the original ctx. */
+ /* mov rdi, qword ptr [rdi] */
+ EMIT3(0x48, 0x8B, 0x3F);
+ } else {
/* Keep the same instruction layout. */
- EMIT2(0x66, 0x90); /* nop2 */
+ emit_nops(&prog, 7);
+ }
}
/* Exception callback receives FP as third parameter */
if (is_exception_cb) {
@@ -453,6 +456,7 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
if (stack_depth)
EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8));
if (tail_call_reachable)
+ /* Here, rax is tail_call_cnt_ptr. */
EMIT1(0x50); /* push rax */
*pprog = prog;
}
@@ -589,13 +593,15 @@ static void emit_return(u8 **pprog, u8 *ip)
*pprog = prog;
}
+#define BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack) (-8 - round_up(stack, 8))
+
/*
* Generate the following code:
*
* ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
* if (index >= array->map.max_entries)
* goto out;
- * if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT)
+ * if ((*tcc_ptr)++ >= MAX_TAIL_CALL_CNT)
* goto out;
* prog = array->ptrs[index];
* if (prog == NULL)
@@ -608,7 +614,7 @@ static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog,
u32 stack_depth, u8 *ip,
struct jit_context *ctx)
{
- int tcc_off = -4 - round_up(stack_depth, 8);
+ int tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack_depth);
u8 *prog = *pprog, *start = *pprog;
int offset;
@@ -630,16 +636,15 @@ static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog,
EMIT2(X86_JBE, offset); /* jbe out */
/*
- * if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT)
+ * if ((*tcc_ptr)++ >= MAX_TAIL_CALL_CNT)
* goto out;
*/
- EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */
- EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
+ EMIT3_off32(0x48, 0x8B, 0x85, tcc_ptr_off); /* mov rax, qword ptr [rbp - tcc_ptr_off] */
+ EMIT3(0x83, 0x38, MAX_TAIL_CALL_CNT); /* cmp dword ptr [rax], MAX_TAIL_CALL_CNT */
offset = ctx->tail_call_indirect_label - (prog + 2 - start);
EMIT2(X86_JAE, offset); /* jae out */
- EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
- EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */
+ EMIT3(0x83, 0x00, 0x01); /* add dword ptr [rax], 1 */
/* prog = array->ptrs[index]; */
EMIT4_off32(0x48, 0x8B, 0x8C, 0xD6, /* mov rcx, [rsi + rdx * 8 + offsetof(...)] */
@@ -663,6 +668,7 @@ static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog,
pop_r12(&prog);
}
+ /* pop tail_call_cnt_ptr */
EMIT1(0x58); /* pop rax */
if (stack_depth)
EMIT3_off32(0x48, 0x81, 0xC4, /* add rsp, sd */
@@ -691,21 +697,20 @@ static void emit_bpf_tail_call_direct(struct bpf_prog *bpf_prog,
bool *callee_regs_used, u32 stack_depth,
struct jit_context *ctx)
{
- int tcc_off = -4 - round_up(stack_depth, 8);
+ int tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack_depth);
u8 *prog = *pprog, *start = *pprog;
int offset;
/*
- * if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT)
+ * if ((*tcc_ptr)++ >= MAX_TAIL_CALL_CNT)
* goto out;
*/
- EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */
- EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
+ EMIT3_off32(0x48, 0x8B, 0x85, tcc_ptr_off); /* mov rax, qword ptr [rbp - tcc_ptr_off] */
+ EMIT3(0x83, 0x38, MAX_TAIL_CALL_CNT); /* cmp dword ptr [rax], MAX_TAIL_CALL_CNT */
offset = ctx->tail_call_direct_label - (prog + 2 - start);
EMIT2(X86_JAE, offset); /* jae out */
- EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
- EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */
+ EMIT3(0x83, 0x00, 0x01); /* add dword ptr [rax], 1 */
poke->tailcall_bypass = ip + (prog - start);
poke->adj_off = X86_TAIL_CALL_OFFSET;
@@ -724,6 +729,7 @@ static void emit_bpf_tail_call_direct(struct bpf_prog *bpf_prog,
pop_r12(&prog);
}
+ /* pop tail_call_cnt_ptr */
EMIT1(0x58); /* pop rax */
if (stack_depth)
EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8));
@@ -1314,8 +1320,8 @@ static void emit_shiftx(u8 **pprog, u32 dst_reg, u8 src_reg, bool is64, u8 op)
#define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp)))
/* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */
-#define RESTORE_TAIL_CALL_CNT(stack) \
- EMIT3_off32(0x48, 0x8B, 0x85, -round_up(stack, 8) - 8)
+#define LOAD_TAIL_CALL_CNT_PTR(stack) \
+ EMIT3_off32(0x48, 0x8B, 0x85, BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack))
static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image,
int oldproglen, struct jit_context *ctx, bool jmp_padding)
@@ -2045,7 +2051,7 @@ st: if (is_imm8(insn->off))
func = (u8 *) __bpf_call_base + imm32;
if (tail_call_reachable) {
- RESTORE_TAIL_CALL_CNT(bpf_prog->aux->stack_depth);
+ LOAD_TAIL_CALL_CNT_PTR(bpf_prog->aux->stack_depth);
ip += 7;
}
if (!imm32)
@@ -2555,11 +2561,17 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
int run_ctx_off, bool save_ret,
void *image, void *rw_image)
{
- u8 *prog = *pprog;
- u8 *jmp_insn;
+ int ctx_tail_call_run_ctx_off = -run_ctx_off + offsetof(struct bpf_tramp_run_ctx,
+ tail_call_run_ctx);
+ int ctx_tcc_ptr_off = ctx_tail_call_run_ctx_off + offsetof(struct bpf_tail_call_run_ctx,
+ tail_call_cnt_ptr);
+ int ctx_tail_call_cnt_off = -run_ctx_off + offsetof(struct bpf_tramp_run_ctx,
+ tail_call_cnt);
int ctx_cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
struct bpf_prog *p = l->link.prog;
u64 cookie = l->cookie;
+ u8 *prog = *pprog;
+ u8 *jmp_insn;
/* mov rdi, cookie */
emit_mov_imm64(&prog, BPF_REG_1, (long) cookie >> 32, (u32) (long) cookie);
@@ -2604,6 +2616,23 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
emit_mov_imm64(&prog, BPF_REG_2,
(long) p->insnsi >> 32,
(u32) (long) p->insnsi);
+ if (p->aux->use_tail_call_run_ctx) {
+ /* Cache the original ctx */
+ /* mov qword ptr [rbp - ctx_tail_call_run_ctx_off], rdi */
+ EMIT3_off32(0x48, 0x89, 0xBD, ctx_tail_call_run_ctx_off);
+ /* Make rdi as tcc_ptr */
+ /* lea rdi, [rbp - ctx_tail_call_cnt_off] */
+ EMIT3_off32(0x48, 0x8D, 0xBD, ctx_tail_call_cnt_off);
+ /* Clear tail_call_cnt */
+ /* mov dword ptr [rdi], 0 */
+ EMIT2_off32(0xC7, 0x07, 0x00);
+ /* Cache tcc_ptr */
+ /* mov qword ptr [rbp - ctx_tcc_ptr_off], rdi */
+ EMIT3_off32(0x48, 0x89, 0xBD, ctx_tcc_ptr_off);
+ /* Update rdi as tail call run ctx */
+ /* lea rdi, [rbp - ctx_tail_call_run_ctx_off] */
+ EMIT3_off32(0x48, 0x8D, 0xBD, ctx_tail_call_run_ctx_off);
+ }
/* call JITed bpf program or interpreter */
if (emit_rsb_call(&prog, p->bpf_func, image + (prog - (u8 *)rw_image)))
return -EINVAL;
@@ -2840,7 +2869,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
* [ ... ]
* [ stack_arg2 ]
* RBP - arg_stack_off [ stack_arg1 ]
- * RSP [ tail_call_cnt ] BPF_TRAMP_F_TAIL_CALL_CTX
+ * RSP [ tail_call_cnt_ptr ] BPF_TRAMP_F_TAIL_CALL_CTX
*/
/* room for return value of orig_call or fentry prog */
@@ -2969,10 +2998,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
save_args(m, &prog, arg_stack_off, true);
if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) {
- /* Before calling the original function, restore the
- * tail_call_cnt from stack to rax.
+ /* Before calling the original function, load the
+ * tail_call_cnt_ptr from stack to rax.
*/
- RESTORE_TAIL_CALL_CNT(stack_size);
+ LOAD_TAIL_CALL_CNT_PTR(stack_size);
}
if (flags & BPF_TRAMP_F_ORIG_STACK) {
@@ -3031,10 +3060,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
goto cleanup;
}
} else if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) {
- /* Before running the original function, restore the
- * tail_call_cnt from stack to rax.
+ /* Before running the original function, load the
+ * tail_call_cnt_ptr from stack to rax.
*/
- RESTORE_TAIL_CALL_CNT(stack_size);
+ LOAD_TAIL_CALL_CNT_PTR(stack_size);
}
/* restore return value of orig_call or fentry prog back into RAX */
@@ -3432,6 +3461,12 @@ bool bpf_jit_supports_subprog_tailcalls(void)
return true;
}
+/* Indicate the JIT backend supports tail call count pointer in tailcall context. */
+bool bpf_jit_supports_tail_call_cnt_ptr(void)
+{
+ return true;
+}
+
bool bpf_jit_supports_percpu_insn(void)
{
return true;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 95888700966f7..94f994204acea 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2057,6 +2057,8 @@ struct bpf_tramp_run_ctx {
struct bpf_run_ctx run_ctx;
u64 bpf_cookie;
struct bpf_run_ctx *saved_run_ctx;
+ struct bpf_tail_call_run_ctx tail_call_run_ctx;
+ u32 tail_call_cnt;
};
static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx)
--
2.44.0
^ permalink raw reply related [flat|nested] 9+ messages in thread* Re: [PATCH bpf-next v4 3/5] bpf, x64: Fix tailcall hierarchy
2024-05-09 15:05 ` [PATCH bpf-next v4 3/5] bpf, x64: Fix tailcall hierarchy Leon Hwang
@ 2024-05-16 15:28 ` Leon Hwang
2024-05-16 18:56 ` Zvi Effron
0 siblings, 1 reply; 9+ messages in thread
From: Leon Hwang @ 2024-05-16 15:28 UTC (permalink / raw)
To: bpf
Cc: ast, daniel, andrii, maciej.fijalkowski, jakub, pulehui,
kernel-patches-bot
On 2024/5/9 23:05, Leon Hwang wrote:
> This patch fixes a tailcall issue caused by abusing the tailcall in
> bpf2bpf feature.
>
> As we know, tail_call_cnt propagates by rax from caller to callee when
> to call subprog in tailcall context. But, like the following example,
> MAX_TAIL_CALL_CNT won't work because of missing tail_call_cnt
> back-propagation from callee to caller.
>
> \#include <linux/bpf.h>
> \#include <bpf/bpf_helpers.h>
> \#include "bpf_legacy.h"
>
> struct {
> __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
> __uint(max_entries, 1);
> __uint(key_size, sizeof(__u32));
> __uint(value_size, sizeof(__u32));
> } jmp_table SEC(".maps");
>
> int count = 0;
>
> static __noinline
> int subprog_tail1(struct __sk_buff *skb)
> {
> bpf_tail_call_static(skb, &jmp_table, 0);
> return 0;
> }
>
> static __noinline
> int subprog_tail2(struct __sk_buff *skb)
> {
> bpf_tail_call_static(skb, &jmp_table, 0);
> return 0;
> }
>
> SEC("tc")
> int entry(struct __sk_buff *skb)
> {
> volatile int ret = 1;
>
> count++;
> subprog_tail1(skb);
> subprog_tail2(skb);
>
> return ret;
> }
>
> char __license[] SEC("license") = "GPL";
>
> At run time, the tail_call_cnt in entry() will be propagated to
> subprog_tail1() and subprog_tail2(). But, when the tail_call_cnt in
> subprog_tail1() updates when bpf_tail_call_static(), the tail_call_cnt
> in entry() won't be updated at the same time. As a result, in entry(),
> when tail_call_cnt in entry() is less than MAX_TAIL_CALL_CNT and
> subprog_tail1() returns because of MAX_TAIL_CALL_CNT limit,
> bpf_tail_call_static() in suprog_tail2() is able to run because the
> tail_call_cnt in subprog_tail2() propagated from entry() is less than
> MAX_TAIL_CALL_CNT.
>
> So, how many tailcalls are there for this case if no error happens?
>
> From top-down view, does it look like hierarchy layer and layer?
>
> With view, there will be 2+4+8+...+2^33 = 2^34 - 2 = 17,179,869,182
> tailcalls for this case.
>
> How about there are N subprog_tail() in entry()? There will be almost
> N^34 tailcalls.
>
> Then, in this patch, it resolves this case on x86_64.
>
> In stead of propagating tail_call_cnt from caller to callee, it
> propagate its pointer, tail_call_cnt_ptr, tcc_ptr for short.
>
> However, where does it store tail_call_cnt?
>
> It stores tail_call_cnt on the stack of bpf prog's caller by the way in
> previous patch "bpf: Introduce bpf_jit_supports_tail_call_cnt_ptr()".
> Then, in bpf prog's prologue, it loads tcc_ptr from bpf_tail_call_run_ctx,
> and restores the original ctx from bpf_tail_call_run_ctx meanwhile.
>
> Then, when a tailcall runs, it compares tail_call_cnt accessed by
> tcc_ptr with MAX_TAIL_CALL_CNT and then increments tail_call_cnt at
> tcc_ptr.
>
> Furthermore, when trampoline is the caller of bpf prog, it is required
> to prepare tail_call_cnt and tail call run ctx on the stack of the
> trampoline.
>
Oh, I missed a case here.
This patch set is unable to provide tcc_ptr for freplace programs that
use tail calls in bpf2bpf.
How can this approach provide tcc_ptr for freplace programs?
Achieving this is not straightforward. However, it is simpler to disable
the use of tail calls in bpf2bpf for freplace programs, even though this
is a desired feature for my project.
Therefore, I will disable it in the v5 patch set.
Thanks,
Leon
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: [PATCH bpf-next v4 3/5] bpf, x64: Fix tailcall hierarchy
2024-05-16 15:28 ` Leon Hwang
@ 2024-05-16 18:56 ` Zvi Effron
2024-05-17 15:05 ` Leon Hwang
0 siblings, 1 reply; 9+ messages in thread
From: Zvi Effron @ 2024-05-16 18:56 UTC (permalink / raw)
To: Leon Hwang
Cc: bpf, ast, daniel, andrii, maciej.fijalkowski, jakub, pulehui,
kernel-patches-bot
On Thu, May 16, 2024 at 8:28 AM Leon Hwang <hffilwlqm@gmail.com> wrote:
>
>
>
> On 2024/5/9 23:05, Leon Hwang wrote:
> > This patch fixes a tailcall issue caused by abusing the tailcall in
> > bpf2bpf feature.
> >
> > As we know, tail_call_cnt propagates by rax from caller to callee when
> > to call subprog in tailcall context. But, like the following example,
> > MAX_TAIL_CALL_CNT won't work because of missing tail_call_cnt
> > back-propagation from callee to caller.
> >
> > \#include <linux/bpf.h>
> > \#include <bpf/bpf_helpers.h>
> > \#include "bpf_legacy.h"
> >
> > struct {
> > __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
> > __uint(max_entries, 1);
> > __uint(key_size, sizeof(__u32));
> > __uint(value_size, sizeof(__u32));
> > } jmp_table SEC(".maps");
> >
> > int count = 0;
> >
> > static __noinline
> > int subprog_tail1(struct __sk_buff *skb)
> > {
> > bpf_tail_call_static(skb, &jmp_table, 0);
> > return 0;
> > }
> >
> > static __noinline
> > int subprog_tail2(struct __sk_buff *skb)
> > {
> > bpf_tail_call_static(skb, &jmp_table, 0);
> > return 0;
> > }
> >
> > SEC("tc")
> > int entry(struct __sk_buff *skb)
> > {
> > volatile int ret = 1;
> >
> > count++;
> > subprog_tail1(skb);
> > subprog_tail2(skb);
> >
> > return ret;
> > }
> >
> > char __license[] SEC("license") = "GPL";
> >
> > At run time, the tail_call_cnt in entry() will be propagated to
> > subprog_tail1() and subprog_tail2(). But, when the tail_call_cnt in
> > subprog_tail1() updates when bpf_tail_call_static(), the tail_call_cnt
> > in entry() won't be updated at the same time. As a result, in entry(),
> > when tail_call_cnt in entry() is less than MAX_TAIL_CALL_CNT and
> > subprog_tail1() returns because of MAX_TAIL_CALL_CNT limit,
> > bpf_tail_call_static() in suprog_tail2() is able to run because the
> > tail_call_cnt in subprog_tail2() propagated from entry() is less than
> > MAX_TAIL_CALL_CNT.
> >
> > So, how many tailcalls are there for this case if no error happens?
> >
> > From top-down view, does it look like hierarchy layer and layer?
> >
> > With view, there will be 2+4+8+...+2^33 = 2^34 - 2 = 17,179,869,182
> > tailcalls for this case.
> >
> > How about there are N subprog_tail() in entry()? There will be almost
> > N^34 tailcalls.
> >
> > Then, in this patch, it resolves this case on x86_64.
> >
> > In stead of propagating tail_call_cnt from caller to callee, it
> > propagate its pointer, tail_call_cnt_ptr, tcc_ptr for short.
> >
> > However, where does it store tail_call_cnt?
> >
> > It stores tail_call_cnt on the stack of bpf prog's caller by the way in
> > previous patch "bpf: Introduce bpf_jit_supports_tail_call_cnt_ptr()".
> > Then, in bpf prog's prologue, it loads tcc_ptr from bpf_tail_call_run_ctx,
> > and restores the original ctx from bpf_tail_call_run_ctx meanwhile.
> >
> > Then, when a tailcall runs, it compares tail_call_cnt accessed by
> > tcc_ptr with MAX_TAIL_CALL_CNT and then increments tail_call_cnt at
> > tcc_ptr.
> >
> > Furthermore, when trampoline is the caller of bpf prog, it is required
> > to prepare tail_call_cnt and tail call run ctx on the stack of the
> > trampoline.
> >
>
> Oh, I missed a case here.
>
> This patch set is unable to provide tcc_ptr for freplace programs that
> use tail calls in bpf2bpf.
>
> How can this approach provide tcc_ptr for freplace programs?
>
> Achieving this is not straightforward. However, it is simpler to disable
> the use of tail calls in bpf2bpf for freplace programs, even though this
> is a desired feature for my project.
>
> Therefore, I will disable it in the v5 patch set.
>
Isn't this a breaking change such that it would effectively be a regression for
any users already using tail_calls in bpf2bpf for freplace programs? And,
correct me if I'm wrong, but aren't those pieces of eBPF essentially considered
UAPI stable (unlike kfuncs)?
I appreciate that this is an esoteric use of eBPF, but as you said, you have a
use case for it, as does my team (although we haven't had a chance to implement
it yet), and if the two of us have use cases for it, I imagine other may have
as well, and some of them might already have done their implementation.
> Thanks,
> Leon
>
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: [PATCH bpf-next v4 3/5] bpf, x64: Fix tailcall hierarchy
2024-05-16 18:56 ` Zvi Effron
@ 2024-05-17 15:05 ` Leon Hwang
0 siblings, 0 replies; 9+ messages in thread
From: Leon Hwang @ 2024-05-17 15:05 UTC (permalink / raw)
To: Zvi Effron
Cc: bpf, ast, daniel, andrii, maciej.fijalkowski, jakub, pulehui,
kernel-patches-bot
On 2024/5/17 02:56, Zvi Effron wrote:
> On Thu, May 16, 2024 at 8:28 AM Leon Hwang <hffilwlqm@gmail.com> wrote:
>>
>>
>>
>> On 2024/5/9 23:05, Leon Hwang wrote:
>>> This patch fixes a tailcall issue caused by abusing the tailcall in
>>> bpf2bpf feature.
>>>
[SNIP]
>>>
>>
>> Oh, I missed a case here.
>>
>> This patch set is unable to provide tcc_ptr for freplace programs that
>> use tail calls in bpf2bpf.
>>
>> How can this approach provide tcc_ptr for freplace programs?
>>
>> Achieving this is not straightforward. However, it is simpler to disable
>> the use of tail calls in bpf2bpf for freplace programs, even though this
>> is a desired feature for my project.
>>
>> Therefore, I will disable it in the v5 patch set.
>>
>
> Isn't this a breaking change such that it would effectively be a regression for
> any users already using tail_calls in bpf2bpf for freplace programs? And,
> correct me if I'm wrong, but aren't those pieces of eBPF essentially considered
> UAPI stable (unlike kfuncs)?
Yeah, this is a breaking change. However, I think it's acceptable, as
tail_calls in subprogs was considered to be disabled[0].
[0]
https://lore.kernel.org/bpf/CAADnVQLOswL3BY1s0B28wRZH1PU675S6_2=XknjZKNgyJ=yDxw@mail.gmail.com/
>
> I appreciate that this is an esoteric use of eBPF, but as you said, you have a
> use case for it, as does my team (although we haven't had a chance to implement
> it yet), and if the two of us have use cases for it, I imagine other may have
> as well, and some of them might already have done their implementation.
>
It seems it is an useful feature for us. I haven't use it either because
of old kernel version.
So, I figure out another approach to resolve this issue.
Here's the diff just for idea discussion:
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 5159c7a229229..b0b6c84874e54 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -273,7 +273,7 @@ struct jit_context {
/* Number of bytes emit_patch() needs to generate instructions */
#define X86_PATCH_SIZE 5
/* Number of bytes that will be skipped on tailcall */
-#define X86_TAIL_CALL_OFFSET (11 + ENDBR_INSN_SIZE)
+#define X86_TAIL_CALL_OFFSET (22 + ENDBR_INSN_SIZE)
static void push_r12(u8 **pprog)
{
@@ -403,6 +403,22 @@ static void emit_cfi(u8 **pprog, u32 hash)
*pprog = prog;
}
+static notrace void bpf_prepare_tail_call_cnt_ptr()
+{
+ /* %rax stores the position to call the original prog. */
+
+ asm (
+ "pushq %r9\n\t" /* Push %r9. */
+ "movq %rax, %r9\n\t" /* Cache calling position. */
+ "xor %eax, %eax\n\t" /* Initialise tail_call_cnt. */
+ "pushq %rax\n\t" /* Push tail_call_cnt. */
+ "movq %rsp, %rax\n\t" /* Make %rax as tcc_ptr. */
+ "callq *%r9\n\t" /* Call the original prog. */
+ "popq %r9\n\t" /* Pop tail_call_cnt. */
+ "popq %r9\n\t" /* Pop %r9. */
+ );
+}
+
/*
* Emit x86-64 prologue code for BPF program.
* bpf_tail_call helper will skip the first X86_TAIL_CALL_OFFSET bytes
@@ -410,9 +426,9 @@ static void emit_cfi(u8 **pprog, u32 hash)
*/
static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
bool tail_call_reachable, bool is_subprog,
- bool is_exception_cb)
+ bool is_exception_cb, u8 *image)
{
- u8 *prog = *pprog;
+ u8 *prog = *pprog, *start = *pprog;
emit_cfi(&prog, is_subprog ? cfi_bpf_subprog_hash : cfi_bpf_hash);
/* BPF trampoline can be made to work without these nops,
@@ -420,14 +436,16 @@ static void emit_prologue(u8 **pprog, u32
stack_depth, bool ebpf_from_cbpf,
*/
emit_nops(&prog, X86_PATCH_SIZE);
if (!ebpf_from_cbpf) {
- if (tail_call_reachable && !is_subprog)
- /* When it's the entry of the whole tailcall context,
- * zeroing rax means initialising tail_call_cnt.
- */
- EMIT2(0x31, 0xC0); /* xor eax, eax */
- else
+ if (tail_call_reachable && !is_subprog) {
+ /* mov rax, offset */
+ u32 offset = image + (prog - start) + 13;
+ EMIT4_off32(0x48, 0x8B, 0x04, 0x25, offset);
+ /* call bpf_prepare_tail_call_cnt_ptr */
+ emit_call(&prog, bpf_prepare_tail_call_cnt_ptr, offset-5);
+ } else {
/* Keep the same instruction layout. */
- EMIT2(0x66, 0x90); /* nop2 */
+ emit_nops(&prog, 13);
+ }
}
/* Exception callback receives FP as third parameter */
if (is_exception_cb) {
@@ -1344,7 +1362,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int
*addrs, u8 *image, u8 *rw_image
emit_prologue(&prog, bpf_prog->aux->stack_depth,
bpf_prog_was_classic(bpf_prog), tail_call_reachable,
- bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb);
+ bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb,
+ image);
/* Exception callback will clobber callee regs for its own use, and
* restore the original callee regs from main prog's stack frame.
*/
Unlink the way to prepare tcc_ptr of current patch set by bpf prog's
caller, it prepares tcc_ptr by calling a function at prologue to reserve
tail_call_cnt memory on stack. And then, call the remain part of the bpf
prog. At the end of prologue, rax is tcc_ptr, too.
This is inspired by the original RFC PATCH[0]. And then, it avoids
unwind-breaking issue by a real function call.
[0] https://lore.kernel.org/bpf/20240104142226.87869-3-hffilwlqm@gmail.com/
However, it introduces an indirect call in
bpf_prepare_tail_call_cnt_ptr(), which costs performance by retpoline.
If to improve performance here, bpf dispatcher should be considered,
like XDP.
Thanks,
Leon
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH bpf-next v4 4/5] bpf, arm64: Fix tailcall hierarchy
2024-05-09 15:05 [PATCH bpf-next v4 0/5] bpf: Fix tailcall hierarchy Leon Hwang
` (2 preceding siblings ...)
2024-05-09 15:05 ` [PATCH bpf-next v4 3/5] bpf, x64: Fix tailcall hierarchy Leon Hwang
@ 2024-05-09 15:05 ` Leon Hwang
2024-05-09 15:05 ` [PATCH bpf-next v4 5/5] selftests/bpf: Add testcases for tailcall hierarchy fixing Leon Hwang
4 siblings, 0 replies; 9+ messages in thread
From: Leon Hwang @ 2024-05-09 15:05 UTC (permalink / raw)
To: bpf
Cc: ast, daniel, andrii, maciej.fijalkowski, jakub, pulehui,
hffilwlqm, kernel-patches-bot
Like the way of "bpf, x64: Fix tailcall hierarchy", this patch fixes
this issue on arm64.
At prologue, it loads tail_call_cnt_ptr from bpf_tail_call_run_ctx to
TCCNT_PTR register, and restores the original ctx from
bpf_tail_call_run_ctx to X0 register meanwhile.
Then, when a tailcall runs:
1. load tail_call_cnt from tail_call_cnt_ptr
2. compare tail_call_cnt with MAX_TAIL_CALL_CNT
3. increment tail_call_cnt
4. store tail_call_cnt by tail_call_cnt_ptr
Furthermore, when trampoline is the caller of bpf prog, it is required
to prepare tail_call_cnt and tail call run ctx on the stack of the
trampoline.
Finally, enable bpf_jit_supports_tail_call_cnt_ptr() to use
bpf_tail_call_run_ctx in __bpf_prog_run().
Fixes: d4609a5d8c70 ("bpf, arm64: Keep tail call count across bpf2bpf calls")
Signed-off-by: Leon Hwang <hffilwlqm@gmail.com>
---
arch/arm64/net/bpf_jit_comp.c | 63 +++++++++++++++++++++++++++--------
1 file changed, 50 insertions(+), 13 deletions(-)
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 53347d4217f4b..1160b3619f821 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -26,7 +26,7 @@
#define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
-#define TCALL_CNT (MAX_BPF_JIT_REG + 2)
+#define TCCNT_PTR (MAX_BPF_JIT_REG + 2)
#define TMP_REG_3 (MAX_BPF_JIT_REG + 3)
#define FP_BOTTOM (MAX_BPF_JIT_REG + 4)
#define ARENA_VM_START (MAX_BPF_JIT_REG + 5)
@@ -63,8 +63,8 @@ static const int bpf2a64[] = {
[TMP_REG_1] = A64_R(10),
[TMP_REG_2] = A64_R(11),
[TMP_REG_3] = A64_R(12),
- /* tail_call_cnt */
- [TCALL_CNT] = A64_R(26),
+ /* tail_call_cnt_ptr */
+ [TCCNT_PTR] = A64_R(26),
/* temporary register for blinding constants */
[BPF_REG_AX] = A64_R(9),
[FP_BOTTOM] = A64_R(27),
@@ -296,19 +296,20 @@ static bool is_lsi_offset(int offset, int scale)
#define POKE_OFFSET (BTI_INSNS + 1)
/* Tail call offset to jump into */
-#define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 8)
+#define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 9)
static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf,
bool is_exception_cb, u64 arena_vm_start)
{
const struct bpf_prog *prog = ctx->prog;
const bool is_main_prog = !bpf_is_subprog(prog);
+ const u8 r1 = bpf2a64[BPF_REG_1];
const u8 r6 = bpf2a64[BPF_REG_6];
const u8 r7 = bpf2a64[BPF_REG_7];
const u8 r8 = bpf2a64[BPF_REG_8];
const u8 r9 = bpf2a64[BPF_REG_9];
const u8 fp = bpf2a64[BPF_REG_FP];
- const u8 tcc = bpf2a64[TCALL_CNT];
+ const u8 ptr = bpf2a64[TCCNT_PTR];
const u8 fpb = bpf2a64[FP_BOTTOM];
const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
const int idx0 = ctx->idx;
@@ -359,7 +360,7 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf,
/* Save callee-saved registers */
emit(A64_PUSH(r6, r7, A64_SP), ctx);
emit(A64_PUSH(r8, r9, A64_SP), ctx);
- emit(A64_PUSH(fp, tcc, A64_SP), ctx);
+ emit(A64_PUSH(fp, ptr, A64_SP), ctx);
emit(A64_PUSH(fpb, A64_R(28), A64_SP), ctx);
} else {
/*
@@ -381,8 +382,15 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf,
emit(A64_MOV(1, fp, A64_SP), ctx);
if (!ebpf_from_cbpf && is_main_prog) {
- /* Initialize tail_call_cnt */
- emit(A64_MOVZ(1, tcc, 0, 0), ctx);
+ if (prog->aux->tail_call_reachable) {
+ /* Cache tcc_ptr. */
+ emit(A64_LDR64I(ptr, r1, 8), ctx);
+ /* Restore the original ctx. */
+ emit(A64_LDR64I(r1, r1, 0), ctx);
+ } else {
+ emit(A64_NOP, ctx);
+ emit(A64_NOP, ctx);
+ }
cur_offset = ctx->idx - idx0;
if (cur_offset != PROLOGUE_OFFSET) {
@@ -432,7 +440,8 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
const u8 tmp = bpf2a64[TMP_REG_1];
const u8 prg = bpf2a64[TMP_REG_2];
- const u8 tcc = bpf2a64[TCALL_CNT];
+ const u8 ptr = bpf2a64[TCCNT_PTR];
+ const u8 tcc = bpf2a64[TMP_REG_3];
const int idx0 = ctx->idx;
#define cur_offset (ctx->idx - idx0)
#define jmp_offset (out_offset - (cur_offset))
@@ -449,14 +458,16 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
emit(A64_B_(A64_COND_CS, jmp_offset), ctx);
/*
- * if (tail_call_cnt >= MAX_TAIL_CALL_CNT)
+ * if ((*tcc_ptr) >= MAX_TAIL_CALL_CNT)
* goto out;
- * tail_call_cnt++;
+ * (*tcc_ptr)++;
*/
emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx);
+ emit(A64_LDR32I(tcc, ptr, 0), ctx);
emit(A64_CMP(1, tcc, tmp), ctx);
emit(A64_B_(A64_COND_CS, jmp_offset), ctx);
emit(A64_ADD_I(1, tcc, tcc, 1), ctx);
+ emit(A64_STR32I(tcc, ptr, 0), ctx);
/* prog = array->ptrs[index];
* if (prog == NULL)
@@ -1890,15 +1901,28 @@ bool bpf_jit_supports_subprog_tailcalls(void)
return true;
}
+/* Indicate the JIT backend supports tail call count pointer in tailcall context. */
+bool bpf_jit_supports_tail_call_cnt_ptr(void)
+{
+ return true;
+}
+
static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
int args_off, int retval_off, int run_ctx_off,
bool save_ret)
{
+ int tail_call_run_ctx_off = offsetof(struct bpf_tramp_run_ctx, tail_call_run_ctx);
+ int tcc_ptr_off = tail_call_run_ctx_off + offsetof(struct bpf_tail_call_run_ctx,
+ tail_call_cnt_ptr);
+ int tail_call_cnt_off = offsetof(struct bpf_tramp_run_ctx, tail_call_cnt);
+ int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
+ struct bpf_prog *p = l->link.prog;
+ const u8 tmp = bpf2a64[TMP_REG_1];
+ const u8 r1 = bpf2a64[BPF_REG_1];
+ const u8 sp = A64_SP;
__le32 *branch;
u64 enter_prog;
u64 exit_prog;
- struct bpf_prog *p = l->link.prog;
- int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
enter_prog = (u64)bpf_trampoline_enter(p);
exit_prog = (u64)bpf_trampoline_exit(p);
@@ -1936,6 +1960,19 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
emit(A64_ADD_I(1, A64_R(0), A64_SP, args_off), ctx);
if (!p->jited)
emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx);
+ if (p->aux->use_tail_call_run_ctx) {
+ /* Cache the original ctx. */
+ emit(A64_STR64I(r1, sp, run_ctx_off + tail_call_run_ctx_off), ctx);
+ /* Update r1 as tcc_ptr. */
+ emit(A64_ADD_I(1, r1, sp, run_ctx_off + tail_call_cnt_off), ctx);
+ /* Clear tail_call_cnt. */
+ emit_a64_mov_i(0, tmp, 0, ctx);
+ emit(A64_STR32I(tmp, r1, 0), ctx);
+ /* Cache tcc_ptr. */
+ emit(A64_STR64I(r1, sp, run_ctx_off + tcc_ptr_off), ctx);
+ /* Update r1 as tail call run ctx. */
+ emit(A64_ADD_I(1, r1, sp, run_ctx_off + tail_call_run_ctx_off), ctx);
+ }
emit_call((const u64)p->bpf_func, ctx);
--
2.44.0
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH bpf-next v4 5/5] selftests/bpf: Add testcases for tailcall hierarchy fixing
2024-05-09 15:05 [PATCH bpf-next v4 0/5] bpf: Fix tailcall hierarchy Leon Hwang
` (3 preceding siblings ...)
2024-05-09 15:05 ` [PATCH bpf-next v4 4/5] bpf, arm64: " Leon Hwang
@ 2024-05-09 15:05 ` Leon Hwang
4 siblings, 0 replies; 9+ messages in thread
From: Leon Hwang @ 2024-05-09 15:05 UTC (permalink / raw)
To: bpf
Cc: ast, daniel, andrii, maciej.fijalkowski, jakub, pulehui,
hffilwlqm, kernel-patches-bot
Add some test cases to confirm the tailcall hierarchy issue has been fixed.
On x64, the selftests result is:
cd tools/testing/selftests/bpf && ./test_progs -t tailcalls
319/18 tailcalls/tailcall_bpf2bpf_hierarchy_1:OK
319/19 tailcalls/tailcall_bpf2bpf_hierarchy_fentry:OK
319/20 tailcalls/tailcall_bpf2bpf_hierarchy_fexit:OK
319/21 tailcalls/tailcall_bpf2bpf_hierarchy_fentry_fexit:OK
319/22 tailcalls/tailcall_bpf2bpf_hierarchy_fentry_entry:OK
319/23 tailcalls/tailcall_bpf2bpf_hierarchy_2:OK
319/24 tailcalls/tailcall_bpf2bpf_hierarchy_3:OK
319 tailcalls:OK
Summary: 1/24 PASSED, 0 SKIPPED, 0 FAILED
On arm64, the selftests result is:
cd tools/testing/selftests/bpf && ./test_progs -t tailcalls
323/18 tailcalls/tailcall_bpf2bpf_hierarchy_1:OK
323/19 tailcalls/tailcall_bpf2bpf_hierarchy_fentry:OK
323/20 tailcalls/tailcall_bpf2bpf_hierarchy_fexit:OK
323/21 tailcalls/tailcall_bpf2bpf_hierarchy_fentry_fexit:OK
323/22 tailcalls/tailcall_bpf2bpf_hierarchy_fentry_entry:OK
323/23 tailcalls/tailcall_bpf2bpf_hierarchy_2:OK
323/24 tailcalls/tailcall_bpf2bpf_hierarchy_3:OK
323 tailcalls:OK
Summary: 1/24 PASSED, 0 SKIPPED, 0 FAILED
Signed-off-by: Leon Hwang <hffilwlqm@gmail.com>
---
.../selftests/bpf/prog_tests/tailcalls.c | 479 ++++++++++++++++++
.../bpf/progs/tailcall_bpf2bpf_hierarchy1.c | 34 ++
.../bpf/progs/tailcall_bpf2bpf_hierarchy2.c | 55 ++
.../bpf/progs/tailcall_bpf2bpf_hierarchy3.c | 46 ++
.../progs/tailcall_bpf2bpf_hierarchy_fentry.c | 35 ++
tools/testing/selftests/bpf/progs/tc_dummy.c | 12 +
6 files changed, 661 insertions(+)
create mode 100644 tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c
create mode 100644 tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c
create mode 100644 tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c
create mode 100644 tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c
create mode 100644 tools/testing/selftests/bpf/progs/tc_dummy.c
diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
index 59993fc9c0d7e..d67ef079fc79e 100644
--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -1187,6 +1187,471 @@ static void test_tailcall_poke(void)
tailcall_poke__destroy(call);
}
+static void test_tailcall_hierarchy_count(const char *which, bool test_fentry,
+ bool test_fexit,
+ bool test_fentry_entry)
+{
+ int err, map_fd, prog_fd, main_data_fd, fentry_data_fd, fexit_data_fd, i, val;
+ struct bpf_object *obj = NULL, *fentry_obj = NULL, *fexit_obj = NULL;
+ struct bpf_link *fentry_link = NULL, *fexit_link = NULL;
+ struct bpf_program *prog, *fentry_prog;
+ struct bpf_map *prog_array, *data_map;
+ int fentry_prog_fd;
+ char buff[128] = {};
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = buff,
+ .data_size_in = sizeof(buff),
+ .repeat = 1,
+ );
+
+ err = bpf_prog_test_load(which, BPF_PROG_TYPE_SCHED_CLS, &obj,
+ &prog_fd);
+ if (!ASSERT_OK(err, "load obj"))
+ return;
+
+ prog = bpf_object__find_program_by_name(obj, "entry");
+ if (!ASSERT_OK_PTR(prog, "find entry prog"))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (!ASSERT_GE(prog_fd, 0, "prog_fd"))
+ goto out;
+
+ if (test_fentry_entry) {
+ fentry_obj = bpf_object__open_file("tailcall_bpf2bpf_hierarchy_fentry.bpf.o",
+ NULL);
+ if (!ASSERT_OK_PTR(fentry_obj, "open fentry_obj file"))
+ goto out;
+
+ fentry_prog = bpf_object__find_program_by_name(fentry_obj,
+ "fentry");
+ if (!ASSERT_OK_PTR(prog, "find fentry prog"))
+ goto out;
+
+ err = bpf_program__set_attach_target(fentry_prog, prog_fd,
+ "entry");
+ if (!ASSERT_OK(err, "set_attach_target entry"))
+ goto out;
+
+ err = bpf_object__load(fentry_obj);
+ if (!ASSERT_OK(err, "load fentry_obj"))
+ goto out;
+
+ fentry_link = bpf_program__attach_trace(fentry_prog);
+ if (!ASSERT_OK_PTR(fentry_link, "attach_trace"))
+ goto out;
+
+ fentry_prog_fd = bpf_program__fd(fentry_prog);
+ if (!ASSERT_GE(fentry_prog_fd, 0, "fentry_prog_fd"))
+ goto out;
+
+ prog_array = bpf_object__find_map_by_name(fentry_obj, "jmp_table");
+ if (!ASSERT_OK_PTR(prog_array, "find jmp_table"))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (!ASSERT_GE(map_fd, 0, "map_fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd, &i, &fentry_prog_fd, BPF_ANY);
+ if (!ASSERT_OK(err, "update jmp_table"))
+ goto out;
+
+ data_map = bpf_object__find_map_by_name(fentry_obj, ".bss");
+ if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map),
+ "find data_map"))
+ goto out;
+
+ } else {
+ prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+ if (!ASSERT_OK_PTR(prog_array, "find jmp_table"))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (!ASSERT_GE(map_fd, 0, "map_fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (!ASSERT_OK(err, "update jmp_table"))
+ goto out;
+
+ data_map = bpf_object__find_map_by_name(obj, ".bss");
+ if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map),
+ "find data_map"))
+ goto out;
+ }
+
+ if (test_fentry) {
+ fentry_obj = bpf_object__open_file("tailcall_bpf2bpf_fentry.bpf.o",
+ NULL);
+ if (!ASSERT_OK_PTR(fentry_obj, "open fentry_obj file"))
+ goto out;
+
+ prog = bpf_object__find_program_by_name(fentry_obj, "fentry");
+ if (!ASSERT_OK_PTR(prog, "find fentry prog"))
+ goto out;
+
+ err = bpf_program__set_attach_target(prog, prog_fd,
+ "subprog_tail");
+ if (!ASSERT_OK(err, "set_attach_target subprog_tail"))
+ goto out;
+
+ err = bpf_object__load(fentry_obj);
+ if (!ASSERT_OK(err, "load fentry_obj"))
+ goto out;
+
+ fentry_link = bpf_program__attach_trace(prog);
+ if (!ASSERT_OK_PTR(fentry_link, "attach_trace"))
+ goto out;
+ }
+
+ if (test_fexit) {
+ fexit_obj = bpf_object__open_file("tailcall_bpf2bpf_fexit.bpf.o",
+ NULL);
+ if (!ASSERT_OK_PTR(fexit_obj, "open fexit_obj file"))
+ goto out;
+
+ prog = bpf_object__find_program_by_name(fexit_obj, "fexit");
+ if (!ASSERT_OK_PTR(prog, "find fexit prog"))
+ goto out;
+
+ err = bpf_program__set_attach_target(prog, prog_fd,
+ "subprog_tail");
+ if (!ASSERT_OK(err, "set_attach_target subprog_tail"))
+ goto out;
+
+ err = bpf_object__load(fexit_obj);
+ if (!ASSERT_OK(err, "load fexit_obj"))
+ goto out;
+
+ fexit_link = bpf_program__attach_trace(prog);
+ if (!ASSERT_OK_PTR(fexit_link, "attach_trace"))
+ goto out;
+ }
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 1, "tailcall retval");
+
+ main_data_fd = bpf_map__fd(data_map);
+ if (!ASSERT_GE(main_data_fd, 0, "main_data_fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_lookup_elem(main_data_fd, &i, &val);
+ ASSERT_OK(err, "tailcall count");
+ ASSERT_EQ(val, 34, "tailcall count");
+
+ if (test_fentry) {
+ data_map = bpf_object__find_map_by_name(fentry_obj, ".bss");
+ if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map),
+ "find tailcall_bpf2bpf_fentry.bss map"))
+ goto out;
+
+ fentry_data_fd = bpf_map__fd(data_map);
+ if (!ASSERT_GE(fentry_data_fd, 0,
+ "find tailcall_bpf2bpf_fentry.bss map fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_lookup_elem(fentry_data_fd, &i, &val);
+ ASSERT_OK(err, "fentry count");
+ ASSERT_EQ(val, 68, "fentry count");
+ }
+
+ if (test_fexit) {
+ data_map = bpf_object__find_map_by_name(fexit_obj, ".bss");
+ if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map),
+ "find tailcall_bpf2bpf_fexit.bss map"))
+ goto out;
+
+ fexit_data_fd = bpf_map__fd(data_map);
+ if (!ASSERT_GE(fexit_data_fd, 0,
+ "find tailcall_bpf2bpf_fexit.bss map fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_lookup_elem(fexit_data_fd, &i, &val);
+ ASSERT_OK(err, "fexit count");
+ ASSERT_EQ(val, 68, "fexit count");
+ }
+
+ i = 0;
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (!ASSERT_OK(err, "delete_elem from jmp_table"))
+ goto out;
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 1, "tailcall retval");
+
+ i = 0;
+ err = bpf_map_lookup_elem(main_data_fd, &i, &val);
+ ASSERT_OK(err, "tailcall count");
+ ASSERT_EQ(val, 35, "tailcall count");
+
+ if (test_fentry) {
+ i = 0;
+ err = bpf_map_lookup_elem(fentry_data_fd, &i, &val);
+ ASSERT_OK(err, "fentry count");
+ ASSERT_EQ(val, 70, "fentry count");
+ }
+
+ if (test_fexit) {
+ i = 0;
+ err = bpf_map_lookup_elem(fexit_data_fd, &i, &val);
+ ASSERT_OK(err, "fexit count");
+ ASSERT_EQ(val, 70, "fexit count");
+ }
+
+out:
+ bpf_link__destroy(fentry_link);
+ bpf_link__destroy(fexit_link);
+ bpf_object__close(fentry_obj);
+ bpf_object__close(fexit_obj);
+ bpf_object__close(obj);
+}
+
+/* test_tailcall_bpf2bpf_hierarchy_1 checks that the count value of the tail
+ * call limit enforcement matches with expectations when tailcalls are preceded
+ * with two bpf2bpf calls.
+ *
+ * subprog --tailcall-> entry
+ * entry <
+ * subprog --tailcall-> entry
+ */
+static void test_tailcall_bpf2bpf_hierarchy_1(void)
+{
+ test_tailcall_hierarchy_count("tailcall_bpf2bpf_hierarchy1.bpf.o",
+ false, false, false);
+}
+
+/* test_tailcall_bpf2bpf_hierarchy_fentry checks that the count value of the
+ * tail call limit enforcement matches with expectations when tailcalls are
+ * preceded with two bpf2bpf calls, and the two subprogs are traced by fentry.
+ */
+static void test_tailcall_bpf2bpf_hierarchy_fentry(void)
+{
+ test_tailcall_hierarchy_count("tailcall_bpf2bpf_hierarchy1.bpf.o",
+ true, false, false);
+}
+
+/* test_tailcall_bpf2bpf_hierarchy_fexit checks that the count value of the tail
+ * call limit enforcement matches with expectations when tailcalls are preceded
+ * with two bpf2bpf calls, and the two subprogs are traced by fexit.
+ */
+static void test_tailcall_bpf2bpf_hierarchy_fexit(void)
+{
+ test_tailcall_hierarchy_count("tailcall_bpf2bpf_hierarchy1.bpf.o",
+ false, true, false);
+}
+
+/* test_tailcall_bpf2bpf_hierarchy_fentry_fexit checks that the count value of
+ * the tail call limit enforcement matches with expectations when tailcalls are
+ * preceded with two bpf2bpf calls, and the two subprogs are traced by both
+ * fentry and fexit.
+ */
+static void test_tailcall_bpf2bpf_hierarchy_fentry_fexit(void)
+{
+ test_tailcall_hierarchy_count("tailcall_bpf2bpf_hierarchy1.bpf.o",
+ true, true, false);
+}
+
+/* test_tailcall_bpf2bpf_hierarchy_fentry_entry checks that the count value of
+ * the tail call limit enforcement matches with expectations when tailcalls are
+ * preceded with two bpf2bpf calls in fentry.
+ */
+static void test_tailcall_bpf2bpf_hierarchy_fentry_entry(void)
+{
+ test_tailcall_hierarchy_count("tc_dummy.bpf.o", false, false, true);
+}
+
+/* test_tailcall_bpf2bpf_hierarchy_2 checks that the count value of the tail
+ * call limit enforcement matches with expectations:
+ *
+ * subprog_tail0 --tailcall-> classifier_0 -> subprog_tail0
+ * entry <
+ * subprog_tail1 --tailcall-> classifier_1 -> subprog_tail1
+ */
+static void test_tailcall_bpf2bpf_hierarchy_2(void)
+{
+ int err, map_fd, prog_fd, data_fd, main_fd, i, val[2];
+ struct bpf_map *prog_array, *data_map;
+ struct bpf_object *obj = NULL;
+ struct bpf_program *prog;
+ char buff[128] = {};
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = buff,
+ .data_size_in = sizeof(buff),
+ .repeat = 1,
+ );
+
+ err = bpf_prog_test_load("tailcall_bpf2bpf_hierarchy2.bpf.o",
+ BPF_PROG_TYPE_SCHED_CLS,
+ &obj, &prog_fd);
+ if (!ASSERT_OK(err, "load obj"))
+ return;
+
+ prog = bpf_object__find_program_by_name(obj, "entry");
+ if (!ASSERT_OK_PTR(prog, "find entry prog"))
+ goto out;
+
+ main_fd = bpf_program__fd(prog);
+ if (!ASSERT_GE(main_fd, 0, "main_fd"))
+ goto out;
+
+ prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+ if (!ASSERT_OK_PTR(prog_array, "find jmp_table map"))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (!ASSERT_GE(map_fd, 0, "find jmp_table map fd"))
+ goto out;
+
+ prog = bpf_object__find_program_by_name(obj, "classifier_0");
+ if (!ASSERT_OK_PTR(prog, "find classifier_0 prog"))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (!ASSERT_GE(prog_fd, 0, "find classifier_0 prog fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (!ASSERT_OK(err, "update jmp_table"))
+ goto out;
+
+ prog = bpf_object__find_program_by_name(obj, "classifier_1");
+ if (!ASSERT_OK_PTR(prog, "find classifier_1 prog"))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (!ASSERT_GE(prog_fd, 0, "find classifier_1 prog fd"))
+ goto out;
+
+ i = 1;
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (!ASSERT_OK(err, "update jmp_table"))
+ goto out;
+
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 1, "tailcall retval");
+
+ data_map = bpf_object__find_map_by_name(obj, ".bss");
+ if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map),
+ "find .bss map"))
+ goto out;
+
+ data_fd = bpf_map__fd(data_map);
+ if (!ASSERT_GE(data_fd, 0, "find .bss map fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_lookup_elem(data_fd, &i, &val);
+ ASSERT_OK(err, "tailcall counts");
+ ASSERT_EQ(val[0], 33, "tailcall count0");
+ ASSERT_EQ(val[1], 0, "tailcall count1");
+
+out:
+ bpf_object__close(obj);
+}
+
+/* test_tailcall_bpf2bpf_hierarchy_3 checks that the count value of the tail
+ * call limit enforcement matches with expectations:
+ *
+ * subprog with jmp_table0 to classifier_0
+ * entry --tailcall-> classifier_0 <
+ * subprog with jmp_table1 to classifier_0
+ */
+static void test_tailcall_bpf2bpf_hierarchy_3(void)
+{
+ int err, map_fd, prog_fd, data_fd, main_fd, i, val;
+ struct bpf_map *prog_array, *data_map;
+ struct bpf_object *obj = NULL;
+ struct bpf_program *prog;
+ char buff[128] = {};
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = buff,
+ .data_size_in = sizeof(buff),
+ .repeat = 1,
+ );
+
+ err = bpf_prog_test_load("tailcall_bpf2bpf_hierarchy3.bpf.o",
+ BPF_PROG_TYPE_SCHED_CLS,
+ &obj, &prog_fd);
+ if (!ASSERT_OK(err, "load obj"))
+ return;
+
+ prog = bpf_object__find_program_by_name(obj, "entry");
+ if (!ASSERT_OK_PTR(prog, "find entry prog"))
+ goto out;
+
+ main_fd = bpf_program__fd(prog);
+ if (!ASSERT_GE(main_fd, 0, "main_fd"))
+ goto out;
+
+ prog_array = bpf_object__find_map_by_name(obj, "jmp_table0");
+ if (!ASSERT_OK_PTR(prog_array, "find jmp_table0 map"))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (!ASSERT_GE(map_fd, 0, "find jmp_table0 map fd"))
+ goto out;
+
+ prog = bpf_object__find_program_by_name(obj, "classifier_0");
+ if (!ASSERT_OK_PTR(prog, "find classifier_0 prog"))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (!ASSERT_GE(prog_fd, 0, "find classifier_0 prog fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (!ASSERT_OK(err, "update jmp_table0"))
+ goto out;
+
+ prog_array = bpf_object__find_map_by_name(obj, "jmp_table1");
+ if (!ASSERT_OK_PTR(prog_array, "find jmp_table1 map"))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (!ASSERT_GE(map_fd, 0, "find jmp_table1 map fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (!ASSERT_OK(err, "update jmp_table1"))
+ goto out;
+
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 1, "tailcall retval");
+
+ data_map = bpf_object__find_map_by_name(obj, ".bss");
+ if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map),
+ "find .bss map"))
+ goto out;
+
+ data_fd = bpf_map__fd(data_map);
+ if (!ASSERT_GE(data_fd, 0, "find .bss map fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_lookup_elem(data_fd, &i, &val);
+ ASSERT_OK(err, "tailcall count");
+ ASSERT_EQ(val, 33, "tailcall count");
+
+out:
+ bpf_object__close(obj);
+}
+
void test_tailcalls(void)
{
if (test__start_subtest("tailcall_1"))
@@ -1223,4 +1688,18 @@ void test_tailcalls(void)
test_tailcall_bpf2bpf_fentry_entry();
if (test__start_subtest("tailcall_poke"))
test_tailcall_poke();
+ if (test__start_subtest("tailcall_bpf2bpf_hierarchy_1"))
+ test_tailcall_bpf2bpf_hierarchy_1();
+ if (test__start_subtest("tailcall_bpf2bpf_hierarchy_fentry"))
+ test_tailcall_bpf2bpf_hierarchy_fentry();
+ if (test__start_subtest("tailcall_bpf2bpf_hierarchy_fexit"))
+ test_tailcall_bpf2bpf_hierarchy_fexit();
+ if (test__start_subtest("tailcall_bpf2bpf_hierarchy_fentry_fexit"))
+ test_tailcall_bpf2bpf_hierarchy_fentry_fexit();
+ if (test__start_subtest("tailcall_bpf2bpf_hierarchy_fentry_entry"))
+ test_tailcall_bpf2bpf_hierarchy_fentry_entry();
+ if (test__start_subtest("tailcall_bpf2bpf_hierarchy_2"))
+ test_tailcall_bpf2bpf_hierarchy_2();
+ if (test__start_subtest("tailcall_bpf2bpf_hierarchy_3"))
+ test_tailcall_bpf2bpf_hierarchy_3();
}
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c
new file mode 100644
index 0000000000000..327ca395e8601
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+int count = 0;
+
+static __noinline
+int subprog_tail(struct __sk_buff *skb)
+{
+ bpf_tail_call_static(skb, &jmp_table, 0);
+ return 0;
+}
+
+SEC("tc")
+int entry(struct __sk_buff *skb)
+{
+ int ret = 1;
+
+ count++;
+ subprog_tail(skb);
+ subprog_tail(skb);
+
+ return ret;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c
new file mode 100644
index 0000000000000..b84541546082e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 2);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+int count0 = 0;
+int count1 = 0;
+
+static __noinline
+int subprog_tail0(struct __sk_buff *skb)
+{
+ bpf_tail_call_static(skb, &jmp_table, 0);
+ return 0;
+}
+
+SEC("tc")
+int classifier_0(struct __sk_buff *skb)
+{
+ count0++;
+ subprog_tail0(skb);
+ return 0;
+}
+
+static __noinline
+int subprog_tail1(struct __sk_buff *skb)
+{
+ bpf_tail_call_static(skb, &jmp_table, 1);
+ return 0;
+}
+
+SEC("tc")
+int classifier_1(struct __sk_buff *skb)
+{
+ count1++;
+ subprog_tail1(skb);
+ return 0;
+}
+
+SEC("tc")
+int entry(struct __sk_buff *skb)
+{
+ subprog_tail0(skb);
+ subprog_tail1(skb);
+
+ return 1;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c
new file mode 100644
index 0000000000000..6398a1d277fc7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table0 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table1 SEC(".maps");
+
+int count = 0;
+
+static __noinline
+int subprog_tail(struct __sk_buff *skb, void *jmp_table)
+{
+ bpf_tail_call_static(skb, jmp_table, 0);
+ return 0;
+}
+
+SEC("tc")
+int classifier_0(struct __sk_buff *skb)
+{
+ count++;
+ subprog_tail(skb, &jmp_table0);
+ subprog_tail(skb, &jmp_table1);
+ return 1;
+}
+
+SEC("tc")
+int entry(struct __sk_buff *skb)
+{
+ bpf_tail_call_static(skb, &jmp_table0, 0);
+
+ return 0;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c
new file mode 100644
index 0000000000000..c87f9ca982d3e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Leon Hwang */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+int count = 0;
+
+static __noinline
+int subprog_tail(void *ctx)
+{
+ bpf_tail_call_static(ctx, &jmp_table, 0);
+ return 0;
+}
+
+SEC("fentry/dummy")
+int BPF_PROG(fentry, struct sk_buff *skb)
+{
+ count++;
+ subprog_tail(ctx);
+ subprog_tail(ctx);
+
+ return 0;
+}
+
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tc_dummy.c b/tools/testing/selftests/bpf/progs/tc_dummy.c
new file mode 100644
index 0000000000000..69a3d0dc87879
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tc_dummy.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
+
+SEC("tc")
+int entry(struct __sk_buff *skb)
+{
+ return 1;
+}
+
+char __license[] SEC("license") = "GPL";
--
2.44.0
^ permalink raw reply related [flat|nested] 9+ messages in thread