* [PATCHv3 bpf-next 04/24] bpf: Add struct bpf_tramp_node object
From: Jiri Olsa @ 2026-03-16 7:51 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
Cc: Hengqi Chen, bpf, linux-trace-kernel, Martin KaFai Lau,
Eduard Zingerman, Song Liu, Yonghong Song, Menglong Dong,
Steven Rostedt
In-Reply-To: <20260316075138.465430-1-jolsa@kernel.org>
Adding struct bpf_tramp_node to decouple the link out of the trampoline
attachment info.
At the moment the object for attaching bpf program to the trampoline is
'struct bpf_tramp_link':
struct bpf_tramp_link {
struct bpf_link link;
struct hlist_node tramp_hlist;
u64 cookie;
}
The link holds the bpf_prog pointer and forces one link - one program
binding logic. In following changes we want to attach program to multiple
trampolines but we want to keep just one bpf_link object.
Splitting struct bpf_tramp_link into:
struct bpf_tramp_link {
struct bpf_link link;
struct bpf_tramp_node node;
};
struct bpf_tramp_node {
struct bpf_link *link;
struct hlist_node tramp_hlist;
u64 cookie;
};
The 'struct bpf_tramp_link' defines standard single trampoline link
and 'struct bpf_tramp_node' is the attachment trampoline object with
pointer to the bpf_link object.
This will allow us to define link for multiple trampolines, like:
struct bpf_tracing_multi_link {
struct bpf_link link;
...
int nodes_cnt;
struct bpf_tracing_multi_node nodes[] __counted_by(nodes_cnt);
};
Cc: Hengqi Chen <hengqi.chen@gmail.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
arch/arm64/net/bpf_jit_comp.c | 58 +++++++++---------
arch/loongarch/net/bpf_jit.c | 44 ++++++-------
arch/powerpc/net/bpf_jit_comp.c | 46 +++++++-------
arch/riscv/net/bpf_jit_comp64.c | 52 ++++++++--------
arch/s390/net/bpf_jit_comp.c | 44 ++++++-------
arch/x86/net/bpf_jit_comp.c | 54 ++++++++--------
include/linux/bpf.h | 60 +++++++++++-------
kernel/bpf/bpf_struct_ops.c | 27 ++++----
kernel/bpf/syscall.c | 39 ++++++------
kernel/bpf/trampoline.c | 105 ++++++++++++++++----------------
net/bpf/bpf_dummy_struct_ops.c | 14 ++---
11 files changed, 281 insertions(+), 262 deletions(-)
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index adf84962d579..6d08a6f08a0c 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -2288,24 +2288,24 @@ bool bpf_jit_supports_subprog_tailcalls(void)
return true;
}
-static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
+static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_node *node,
int bargs_off, int retval_off, int run_ctx_off,
bool save_ret)
{
__le32 *branch;
u64 enter_prog;
u64 exit_prog;
- struct bpf_prog *p = l->link.prog;
+ struct bpf_prog *p = node->link->prog;
int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
enter_prog = (u64)bpf_trampoline_enter(p);
exit_prog = (u64)bpf_trampoline_exit(p);
- if (l->cookie == 0) {
+ if (node->cookie == 0) {
/* if cookie is zero, one instruction is enough to store it */
emit(A64_STR64I(A64_ZR, A64_SP, run_ctx_off + cookie_off), ctx);
} else {
- emit_a64_mov_i64(A64_R(10), l->cookie, ctx);
+ emit_a64_mov_i64(A64_R(10), node->cookie, ctx);
emit(A64_STR64I(A64_R(10), A64_SP, run_ctx_off + cookie_off),
ctx);
}
@@ -2355,7 +2355,7 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
emit_call(exit_prog, ctx);
}
-static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
+static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_nodes *tn,
int bargs_off, int retval_off, int run_ctx_off,
__le32 **branches)
{
@@ -2365,8 +2365,8 @@ static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
* Set this to 0 to avoid confusing the program.
*/
emit(A64_STR64I(A64_ZR, A64_SP, retval_off), ctx);
- for (i = 0; i < tl->nr_links; i++) {
- invoke_bpf_prog(ctx, tl->links[i], bargs_off, retval_off,
+ for (i = 0; i < tn->nr_nodes; i++) {
+ invoke_bpf_prog(ctx, tn->nodes[i], bargs_off, retval_off,
run_ctx_off, true);
/* if (*(u64 *)(sp + retval_off) != 0)
* goto do_fexit;
@@ -2497,10 +2497,10 @@ static void restore_args(struct jit_ctx *ctx, int bargs_off, int nregs)
}
}
-static bool is_struct_ops_tramp(const struct bpf_tramp_links *fentry_links)
+static bool is_struct_ops_tramp(const struct bpf_tramp_nodes *fentry_nodes)
{
- return fentry_links->nr_links == 1 &&
- fentry_links->links[0]->link.type == BPF_LINK_TYPE_STRUCT_OPS;
+ return fentry_nodes->nr_nodes == 1 &&
+ fentry_nodes->nodes[0]->link->type == BPF_LINK_TYPE_STRUCT_OPS;
}
static void store_func_meta(struct jit_ctx *ctx, u64 func_meta, int func_meta_off)
@@ -2521,7 +2521,7 @@ static void store_func_meta(struct jit_ctx *ctx, u64 func_meta, int func_meta_of
*
*/
static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
- struct bpf_tramp_links *tlinks, void *func_addr,
+ struct bpf_tramp_nodes *tnodes, void *func_addr,
const struct btf_func_model *m,
const struct arg_aux *a,
u32 flags)
@@ -2537,14 +2537,14 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
int run_ctx_off;
int oargs_off;
int nfuncargs;
- struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
- struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
- struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
+ struct bpf_tramp_nodes *fentry = &tnodes[BPF_TRAMP_FENTRY];
+ struct bpf_tramp_nodes *fexit = &tnodes[BPF_TRAMP_FEXIT];
+ struct bpf_tramp_nodes *fmod_ret = &tnodes[BPF_TRAMP_MODIFY_RETURN];
bool save_ret;
__le32 **branches = NULL;
bool is_struct_ops = is_struct_ops_tramp(fentry);
int cookie_off, cookie_cnt, cookie_bargs_off;
- int fsession_cnt = bpf_fsession_cnt(tlinks);
+ int fsession_cnt = bpf_fsession_cnt(tnodes);
u64 func_meta;
/* trampoline stack layout:
@@ -2590,7 +2590,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
cookie_off = stack_size;
/* room for session cookies */
- cookie_cnt = bpf_fsession_cookie_cnt(tlinks);
+ cookie_cnt = bpf_fsession_cookie_cnt(tnodes);
stack_size += cookie_cnt * 8;
ip_off = stack_size;
@@ -2687,20 +2687,20 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
}
cookie_bargs_off = (bargs_off - cookie_off) / 8;
- for (i = 0; i < fentry->nr_links; i++) {
- if (bpf_prog_calls_session_cookie(fentry->links[i])) {
+ for (i = 0; i < fentry->nr_nodes; i++) {
+ if (bpf_prog_calls_session_cookie(fentry->nodes[i])) {
u64 meta = func_meta | (cookie_bargs_off << BPF_TRAMP_COOKIE_INDEX_SHIFT);
store_func_meta(ctx, meta, func_meta_off);
cookie_bargs_off--;
}
- invoke_bpf_prog(ctx, fentry->links[i], bargs_off,
+ invoke_bpf_prog(ctx, fentry->nodes[i], bargs_off,
retval_off, run_ctx_off,
flags & BPF_TRAMP_F_RET_FENTRY_RET);
}
- if (fmod_ret->nr_links) {
- branches = kcalloc(fmod_ret->nr_links, sizeof(__le32 *),
+ if (fmod_ret->nr_nodes) {
+ branches = kcalloc(fmod_ret->nr_nodes, sizeof(__le32 *),
GFP_KERNEL);
if (!branches)
return -ENOMEM;
@@ -2724,7 +2724,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
}
/* update the branches saved in invoke_bpf_mod_ret with cbnz */
- for (i = 0; i < fmod_ret->nr_links && ctx->image != NULL; i++) {
+ for (i = 0; i < fmod_ret->nr_nodes && ctx->image != NULL; i++) {
int offset = &ctx->image[ctx->idx] - branches[i];
*branches[i] = cpu_to_le32(A64_CBNZ(1, A64_R(10), offset));
}
@@ -2735,14 +2735,14 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
store_func_meta(ctx, func_meta, func_meta_off);
cookie_bargs_off = (bargs_off - cookie_off) / 8;
- for (i = 0; i < fexit->nr_links; i++) {
- if (bpf_prog_calls_session_cookie(fexit->links[i])) {
+ for (i = 0; i < fexit->nr_nodes; i++) {
+ if (bpf_prog_calls_session_cookie(fexit->nodes[i])) {
u64 meta = func_meta | (cookie_bargs_off << BPF_TRAMP_COOKIE_INDEX_SHIFT);
store_func_meta(ctx, meta, func_meta_off);
cookie_bargs_off--;
}
- invoke_bpf_prog(ctx, fexit->links[i], bargs_off, retval_off,
+ invoke_bpf_prog(ctx, fexit->nodes[i], bargs_off, retval_off,
run_ctx_off, false);
}
@@ -2800,7 +2800,7 @@ bool bpf_jit_supports_fsession(void)
}
int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_links *tlinks, void *func_addr)
+ struct bpf_tramp_nodes *tnodes, void *func_addr)
{
struct jit_ctx ctx = {
.image = NULL,
@@ -2814,7 +2814,7 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
if (ret < 0)
return ret;
- ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, m, &aaux, flags);
+ ret = prepare_trampoline(&ctx, &im, tnodes, func_addr, m, &aaux, flags);
if (ret < 0)
return ret;
@@ -2838,7 +2838,7 @@ int arch_protect_bpf_trampoline(void *image, unsigned int size)
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
void *ro_image_end, const struct btf_func_model *m,
- u32 flags, struct bpf_tramp_links *tlinks,
+ u32 flags, struct bpf_tramp_nodes *tnodes,
void *func_addr)
{
u32 size = ro_image_end - ro_image;
@@ -2865,7 +2865,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
ret = calc_arg_aux(m, &aaux);
if (ret)
goto out;
- ret = prepare_trampoline(&ctx, im, tlinks, func_addr, m, &aaux, flags);
+ ret = prepare_trampoline(&ctx, im, tnodes, func_addr, m, &aaux, flags);
if (ret > 0 && validate_code(&ctx) < 0) {
ret = -EINVAL;
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index 3bd89f55960d..a2471f42376e 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -1480,16 +1480,16 @@ static void restore_args(struct jit_ctx *ctx, int nargs, int args_off)
}
}
-static int invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
+static int invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_node *n,
int args_off, int retval_off, int run_ctx_off, bool save_ret)
{
int ret;
u32 *branch;
- struct bpf_prog *p = l->link.prog;
+ struct bpf_prog *p = n->link->prog;
int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
- if (l->cookie) {
- move_imm(ctx, LOONGARCH_GPR_T1, l->cookie, false);
+ if (n->cookie) {
+ move_imm(ctx, LOONGARCH_GPR_T1, n->cookie, false);
emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -run_ctx_off + cookie_off);
} else {
emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, -run_ctx_off + cookie_off);
@@ -1544,14 +1544,14 @@ static int invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
return ret;
}
-static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
+static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_nodes *tn,
int args_off, int retval_off, int run_ctx_off, u32 **branches)
{
int i;
emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, -retval_off);
- for (i = 0; i < tl->nr_links; i++) {
- invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off, run_ctx_off, true);
+ for (i = 0; i < tn->nr_nodes; i++) {
+ invoke_bpf_prog(ctx, tn->nodes[i], args_off, retval_off, run_ctx_off, true);
emit_insn(ctx, ldd, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -retval_off);
branches[i] = (u32 *)ctx->image + ctx->idx;
emit_insn(ctx, nop);
@@ -1600,7 +1600,7 @@ static void sign_extend(struct jit_ctx *ctx, int rd, int rj, u8 size, bool sign)
}
static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
- const struct btf_func_model *m, struct bpf_tramp_links *tlinks,
+ const struct btf_func_model *m, struct bpf_tramp_nodes *tnodes,
void *func_addr, u32 flags)
{
int i, ret, save_ret;
@@ -1608,9 +1608,9 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i
int retval_off, args_off, nargs_off, ip_off, run_ctx_off, sreg_off, tcc_ptr_off;
bool is_struct_ops = flags & BPF_TRAMP_F_INDIRECT;
void *orig_call = func_addr;
- struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
- struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
- struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
+ struct bpf_tramp_nodes *fentry = &tnodes[BPF_TRAMP_FENTRY];
+ struct bpf_tramp_nodes *fexit = &tnodes[BPF_TRAMP_FEXIT];
+ struct bpf_tramp_nodes *fmod_ret = &tnodes[BPF_TRAMP_MODIFY_RETURN];
u32 **branches = NULL;
/*
@@ -1753,14 +1753,14 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i
return ret;
}
- for (i = 0; i < fentry->nr_links; i++) {
- ret = invoke_bpf_prog(ctx, fentry->links[i], args_off, retval_off,
+ for (i = 0; i < fentry->nr_nodes; i++) {
+ ret = invoke_bpf_prog(ctx, fentry->nodes[i], args_off, retval_off,
run_ctx_off, flags & BPF_TRAMP_F_RET_FENTRY_RET);
if (ret)
return ret;
}
- if (fmod_ret->nr_links) {
- branches = kcalloc(fmod_ret->nr_links, sizeof(u32 *), GFP_KERNEL);
+ if (fmod_ret->nr_nodes) {
+ branches = kcalloc(fmod_ret->nr_nodes, sizeof(u32 *), GFP_KERNEL);
if (!branches)
return -ENOMEM;
@@ -1784,13 +1784,13 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i
emit_insn(ctx, nop);
}
- for (i = 0; ctx->image && i < fmod_ret->nr_links; i++) {
+ for (i = 0; ctx->image && i < fmod_ret->nr_nodes; i++) {
int offset = (void *)(&ctx->image[ctx->idx]) - (void *)branches[i];
*branches[i] = larch_insn_gen_bne(LOONGARCH_GPR_T1, LOONGARCH_GPR_ZERO, offset);
}
- for (i = 0; i < fexit->nr_links; i++) {
- ret = invoke_bpf_prog(ctx, fexit->links[i], args_off, retval_off, run_ctx_off, false);
+ for (i = 0; i < fexit->nr_nodes; i++) {
+ ret = invoke_bpf_prog(ctx, fexit->nodes[i], args_off, retval_off, run_ctx_off, false);
if (ret)
goto out;
}
@@ -1858,7 +1858,7 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
void *ro_image_end, const struct btf_func_model *m,
- u32 flags, struct bpf_tramp_links *tlinks, void *func_addr)
+ u32 flags, struct bpf_tramp_nodes *tnodes, void *func_addr)
{
int ret, size;
void *image, *tmp;
@@ -1874,7 +1874,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
ctx.idx = 0;
jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image));
- ret = __arch_prepare_bpf_trampoline(&ctx, im, m, tlinks, func_addr, flags);
+ ret = __arch_prepare_bpf_trampoline(&ctx, im, m, tnodes, func_addr, flags);
if (ret < 0)
goto out;
@@ -1895,7 +1895,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
}
int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_links *tlinks, void *func_addr)
+ struct bpf_tramp_nodes *tnodes, void *func_addr)
{
int ret;
struct jit_ctx ctx;
@@ -1904,7 +1904,7 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
ctx.image = NULL;
ctx.idx = 0;
- ret = __arch_prepare_bpf_trampoline(&ctx, &im, m, tlinks, func_addr, flags);
+ ret = __arch_prepare_bpf_trampoline(&ctx, &im, m, tnodes, func_addr, flags);
return ret < 0 ? ret : ret * LOONGARCH_INSN_SIZE;
}
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 52162e4a7f84..462344a58902 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -512,22 +512,22 @@ int arch_protect_bpf_trampoline(void *image, unsigned int size)
}
static int invoke_bpf_prog(u32 *image, u32 *ro_image, struct codegen_context *ctx,
- struct bpf_tramp_link *l, int regs_off, int retval_off,
+ struct bpf_tramp_node *n, int regs_off, int retval_off,
int run_ctx_off, bool save_ret)
{
- struct bpf_prog *p = l->link.prog;
+ struct bpf_prog *p = n->link->prog;
ppc_inst_t branch_insn;
u32 jmp_idx;
int ret = 0;
/* Save cookie */
if (IS_ENABLED(CONFIG_PPC64)) {
- PPC_LI64(_R3, l->cookie);
+ PPC_LI64(_R3, n->cookie);
EMIT(PPC_RAW_STD(_R3, _R1, run_ctx_off + offsetof(struct bpf_tramp_run_ctx,
bpf_cookie)));
} else {
- PPC_LI32(_R3, l->cookie >> 32);
- PPC_LI32(_R4, l->cookie);
+ PPC_LI32(_R3, n->cookie >> 32);
+ PPC_LI32(_R4, n->cookie);
EMIT(PPC_RAW_STW(_R3, _R1,
run_ctx_off + offsetof(struct bpf_tramp_run_ctx, bpf_cookie)));
EMIT(PPC_RAW_STW(_R4, _R1,
@@ -594,7 +594,7 @@ static int invoke_bpf_prog(u32 *image, u32 *ro_image, struct codegen_context *ct
}
static int invoke_bpf_mod_ret(u32 *image, u32 *ro_image, struct codegen_context *ctx,
- struct bpf_tramp_links *tl, int regs_off, int retval_off,
+ struct bpf_tramp_nodes *tn, int regs_off, int retval_off,
int run_ctx_off, u32 *branches)
{
int i;
@@ -605,8 +605,8 @@ static int invoke_bpf_mod_ret(u32 *image, u32 *ro_image, struct codegen_context
*/
EMIT(PPC_RAW_LI(_R3, 0));
EMIT(PPC_RAW_STL(_R3, _R1, retval_off));
- for (i = 0; i < tl->nr_links; i++) {
- if (invoke_bpf_prog(image, ro_image, ctx, tl->links[i], regs_off, retval_off,
+ for (i = 0; i < tn->nr_nodes; i++) {
+ if (invoke_bpf_prog(image, ro_image, ctx, tn->nodes[i], regs_off, retval_off,
run_ctx_off, true))
return -EINVAL;
@@ -737,14 +737,14 @@ static void bpf_trampoline_restore_args_stack(u32 *image, struct codegen_context
static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_image,
void *rw_image_end, void *ro_image,
const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_links *tlinks,
+ struct bpf_tramp_nodes *tnodes,
void *func_addr)
{
int regs_off, nregs_off, ip_off, run_ctx_off, retval_off, nvr_off, alt_lr_off, r4_off = 0;
int i, ret, nr_regs, bpf_frame_size = 0, bpf_dummy_frame_size = 0, func_frame_offset;
- struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
- struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
- struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
+ struct bpf_tramp_nodes *fmod_ret = &tnodes[BPF_TRAMP_MODIFY_RETURN];
+ struct bpf_tramp_nodes *fentry = &tnodes[BPF_TRAMP_FENTRY];
+ struct bpf_tramp_nodes *fexit = &tnodes[BPF_TRAMP_FEXIT];
struct codegen_context codegen_ctx, *ctx;
u32 *image = (u32 *)rw_image;
ppc_inst_t branch_insn;
@@ -938,13 +938,13 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
return ret;
}
- for (i = 0; i < fentry->nr_links; i++)
- if (invoke_bpf_prog(image, ro_image, ctx, fentry->links[i], regs_off, retval_off,
+ for (i = 0; i < fentry->nr_nodes; i++)
+ if (invoke_bpf_prog(image, ro_image, ctx, fentry->nodes[i], regs_off, retval_off,
run_ctx_off, flags & BPF_TRAMP_F_RET_FENTRY_RET))
return -EINVAL;
- if (fmod_ret->nr_links) {
- branches = kcalloc(fmod_ret->nr_links, sizeof(u32), GFP_KERNEL);
+ if (fmod_ret->nr_nodes) {
+ branches = kcalloc(fmod_ret->nr_nodes, sizeof(u32), GFP_KERNEL);
if (!branches)
return -ENOMEM;
@@ -994,7 +994,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
}
/* Update branches saved in invoke_bpf_mod_ret with address of do_fexit */
- for (i = 0; i < fmod_ret->nr_links && image; i++) {
+ for (i = 0; i < fmod_ret->nr_nodes && image; i++) {
if (create_cond_branch(&branch_insn, &image[branches[i]],
(unsigned long)&image[ctx->idx], COND_NE << 16)) {
ret = -EINVAL;
@@ -1004,8 +1004,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
image[branches[i]] = ppc_inst_val(branch_insn);
}
- for (i = 0; i < fexit->nr_links; i++)
- if (invoke_bpf_prog(image, ro_image, ctx, fexit->links[i], regs_off, retval_off,
+ for (i = 0; i < fexit->nr_nodes; i++)
+ if (invoke_bpf_prog(image, ro_image, ctx, fexit->nodes[i], regs_off, retval_off,
run_ctx_off, false)) {
ret = -EINVAL;
goto cleanup;
@@ -1071,18 +1071,18 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
}
int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_links *tlinks, void *func_addr)
+ struct bpf_tramp_nodes *tnodes, void *func_addr)
{
struct bpf_tramp_image im;
int ret;
- ret = __arch_prepare_bpf_trampoline(&im, NULL, NULL, NULL, m, flags, tlinks, func_addr);
+ ret = __arch_prepare_bpf_trampoline(&im, NULL, NULL, NULL, m, flags, tnodes, func_addr);
return ret;
}
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_links *tlinks,
+ struct bpf_tramp_nodes *tnodes,
void *func_addr)
{
u32 size = image_end - image;
@@ -1098,7 +1098,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
return -ENOMEM;
ret = __arch_prepare_bpf_trampoline(im, rw_image, rw_image + size, image, m,
- flags, tlinks, func_addr);
+ flags, tnodes, func_addr);
if (ret < 0)
goto out;
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index 2f1109dbf105..461b902a5f92 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -934,15 +934,15 @@ static void emit_store_stack_imm64(u8 reg, int stack_off, u64 imm64,
emit_sd(RV_REG_FP, stack_off, reg, ctx);
}
-static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_off,
+static int invoke_bpf_prog(struct bpf_tramp_node *node, int args_off, int retval_off,
int run_ctx_off, bool save_ret, struct rv_jit_context *ctx)
{
int ret, branch_off;
- struct bpf_prog *p = l->link.prog;
+ struct bpf_prog *p = node->link->prog;
int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
- if (l->cookie)
- emit_store_stack_imm64(RV_REG_T1, -run_ctx_off + cookie_off, l->cookie, ctx);
+ if (node->cookie)
+ emit_store_stack_imm64(RV_REG_T1, -run_ctx_off + cookie_off, node->cookie, ctx);
else
emit_sd(RV_REG_FP, -run_ctx_off + cookie_off, RV_REG_ZERO, ctx);
@@ -996,22 +996,22 @@ static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_of
return ret;
}
-static int invoke_bpf(struct bpf_tramp_links *tl, int args_off, int retval_off,
+static int invoke_bpf(struct bpf_tramp_nodes *tn, int args_off, int retval_off,
int run_ctx_off, int func_meta_off, bool save_ret, u64 func_meta,
int cookie_off, struct rv_jit_context *ctx)
{
int i, cur_cookie = (cookie_off - args_off) / 8;
- for (i = 0; i < tl->nr_links; i++) {
+ for (i = 0; i < tn->nr_nodes; i++) {
int err;
- if (bpf_prog_calls_session_cookie(tl->links[i])) {
+ if (bpf_prog_calls_session_cookie(tn->nodes[i])) {
u64 meta = func_meta | ((u64)cur_cookie << BPF_TRAMP_COOKIE_INDEX_SHIFT);
emit_store_stack_imm64(RV_REG_T1, -func_meta_off, meta, ctx);
cur_cookie--;
}
- err = invoke_bpf_prog(tl->links[i], args_off, retval_off, run_ctx_off,
+ err = invoke_bpf_prog(tn->nodes[i], args_off, retval_off, run_ctx_off,
save_ret, ctx);
if (err)
return err;
@@ -1021,7 +1021,7 @@ static int invoke_bpf(struct bpf_tramp_links *tl, int args_off, int retval_off,
static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
const struct btf_func_model *m,
- struct bpf_tramp_links *tlinks,
+ struct bpf_tramp_nodes *tnodes,
void *func_addr, u32 flags,
struct rv_jit_context *ctx)
{
@@ -1030,9 +1030,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
int stack_size = 0, nr_arg_slots = 0;
int retval_off, args_off, func_meta_off, ip_off, run_ctx_off, sreg_off, stk_arg_off;
int cookie_off, cookie_cnt;
- struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
- struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
- struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
+ struct bpf_tramp_nodes *fentry = &tnodes[BPF_TRAMP_FENTRY];
+ struct bpf_tramp_nodes *fexit = &tnodes[BPF_TRAMP_FEXIT];
+ struct bpf_tramp_nodes *fmod_ret = &tnodes[BPF_TRAMP_MODIFY_RETURN];
bool is_struct_ops = flags & BPF_TRAMP_F_INDIRECT;
void *orig_call = func_addr;
bool save_ret;
@@ -1115,7 +1115,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
ip_off = stack_size;
}
- cookie_cnt = bpf_fsession_cookie_cnt(tlinks);
+ cookie_cnt = bpf_fsession_cookie_cnt(tnodes);
/* room for session cookies */
stack_size += cookie_cnt * 8;
cookie_off = stack_size;
@@ -1172,7 +1172,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
store_args(nr_arg_slots, args_off, ctx);
- if (bpf_fsession_cnt(tlinks)) {
+ if (bpf_fsession_cnt(tnodes)) {
/* clear all session cookies' value */
for (i = 0; i < cookie_cnt; i++)
emit_sd(RV_REG_FP, -cookie_off + 8 * i, RV_REG_ZERO, ctx);
@@ -1187,22 +1187,22 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
return ret;
}
- if (fentry->nr_links) {
+ if (fentry->nr_nodes) {
ret = invoke_bpf(fentry, args_off, retval_off, run_ctx_off, func_meta_off,
flags & BPF_TRAMP_F_RET_FENTRY_RET, func_meta, cookie_off, ctx);
if (ret)
return ret;
}
- if (fmod_ret->nr_links) {
- branches_off = kzalloc_objs(int, fmod_ret->nr_links);
+ if (fmod_ret->nr_nodes) {
+ branches_off = kzalloc_objs(int, fmod_ret->nr_nodes);
if (!branches_off)
return -ENOMEM;
/* cleanup to avoid garbage return value confusion */
emit_sd(RV_REG_FP, -retval_off, RV_REG_ZERO, ctx);
- for (i = 0; i < fmod_ret->nr_links; i++) {
- ret = invoke_bpf_prog(fmod_ret->links[i], args_off, retval_off,
+ for (i = 0; i < fmod_ret->nr_nodes; i++) {
+ ret = invoke_bpf_prog(fmod_ret->nodes[i], args_off, retval_off,
run_ctx_off, true, ctx);
if (ret)
goto out;
@@ -1230,7 +1230,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
}
/* update branches saved in invoke_bpf_mod_ret with bnez */
- for (i = 0; ctx->insns && i < fmod_ret->nr_links; i++) {
+ for (i = 0; ctx->insns && i < fmod_ret->nr_nodes; i++) {
offset = ninsns_rvoff(ctx->ninsns - branches_off[i]);
insn = rv_bne(RV_REG_T1, RV_REG_ZERO, offset >> 1);
*(u32 *)(ctx->insns + branches_off[i]) = insn;
@@ -1238,10 +1238,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
/* set "is_return" flag for fsession */
func_meta |= (1ULL << BPF_TRAMP_IS_RETURN_SHIFT);
- if (bpf_fsession_cnt(tlinks))
+ if (bpf_fsession_cnt(tnodes))
emit_store_stack_imm64(RV_REG_T1, -func_meta_off, func_meta, ctx);
- if (fexit->nr_links) {
+ if (fexit->nr_nodes) {
ret = invoke_bpf(fexit, args_off, retval_off, run_ctx_off, func_meta_off,
false, func_meta, cookie_off, ctx);
if (ret)
@@ -1305,7 +1305,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
}
int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_links *tlinks, void *func_addr)
+ struct bpf_tramp_nodes *tnodes, void *func_addr)
{
struct bpf_tramp_image im;
struct rv_jit_context ctx;
@@ -1314,7 +1314,7 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
ctx.ninsns = 0;
ctx.insns = NULL;
ctx.ro_insns = NULL;
- ret = __arch_prepare_bpf_trampoline(&im, m, tlinks, func_addr, flags, &ctx);
+ ret = __arch_prepare_bpf_trampoline(&im, m, tnodes, func_addr, flags, &ctx);
return ret < 0 ? ret : ninsns_rvoff(ctx.ninsns);
}
@@ -1331,7 +1331,7 @@ void arch_free_bpf_trampoline(void *image, unsigned int size)
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
void *ro_image_end, const struct btf_func_model *m,
- u32 flags, struct bpf_tramp_links *tlinks,
+ u32 flags, struct bpf_tramp_nodes *tnodes,
void *func_addr)
{
int ret;
@@ -1346,7 +1346,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
ctx.ninsns = 0;
ctx.insns = image;
ctx.ro_insns = ro_image;
- ret = __arch_prepare_bpf_trampoline(im, m, tlinks, func_addr, flags, &ctx);
+ ret = __arch_prepare_bpf_trampoline(im, m, tnodes, func_addr, flags, &ctx);
if (ret < 0)
goto out;
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 1f9a6b728beb..888e9d717dd5 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -2522,19 +2522,19 @@ static void emit_store_stack_imm64(struct bpf_jit *jit, int tmp_reg, int stack_o
static int invoke_bpf_prog(struct bpf_tramp_jit *tjit,
const struct btf_func_model *m,
- struct bpf_tramp_link *tlink, bool save_ret)
+ struct bpf_tramp_node *node, bool save_ret)
{
struct bpf_jit *jit = &tjit->common;
int cookie_off = tjit->run_ctx_off +
offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
- struct bpf_prog *p = tlink->link.prog;
+ struct bpf_prog *p = node->link->prog;
int patch;
/*
- * run_ctx.cookie = tlink->cookie;
+ * run_ctx.cookie = node->cookie;
*/
- emit_store_stack_imm64(jit, REG_W0, cookie_off, tlink->cookie);
+ emit_store_stack_imm64(jit, REG_W0, cookie_off, node->cookie);
/*
* if ((start = __bpf_prog_enter(p, &run_ctx)) == 0)
@@ -2594,20 +2594,20 @@ static int invoke_bpf_prog(struct bpf_tramp_jit *tjit,
static int invoke_bpf(struct bpf_tramp_jit *tjit,
const struct btf_func_model *m,
- struct bpf_tramp_links *tl, bool save_ret,
+ struct bpf_tramp_nodes *tn, bool save_ret,
u64 func_meta, int cookie_off)
{
int i, cur_cookie = (tjit->bpf_args_off - cookie_off) / sizeof(u64);
struct bpf_jit *jit = &tjit->common;
- for (i = 0; i < tl->nr_links; i++) {
- if (bpf_prog_calls_session_cookie(tl->links[i])) {
+ for (i = 0; i < tn->nr_nodes; i++) {
+ if (bpf_prog_calls_session_cookie(tn->nodes[i])) {
u64 meta = func_meta | ((u64)cur_cookie << BPF_TRAMP_COOKIE_INDEX_SHIFT);
emit_store_stack_imm64(jit, REG_0, tjit->func_meta_off, meta);
cur_cookie--;
}
- if (invoke_bpf_prog(tjit, m, tl->links[i], save_ret))
+ if (invoke_bpf_prog(tjit, m, tn->nodes[i], save_ret))
return -EINVAL;
}
@@ -2636,12 +2636,12 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
struct bpf_tramp_jit *tjit,
const struct btf_func_model *m,
u32 flags,
- struct bpf_tramp_links *tlinks,
+ struct bpf_tramp_nodes *tnodes,
void *func_addr)
{
- struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
- struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
- struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
+ struct bpf_tramp_nodes *fmod_ret = &tnodes[BPF_TRAMP_MODIFY_RETURN];
+ struct bpf_tramp_nodes *fentry = &tnodes[BPF_TRAMP_FENTRY];
+ struct bpf_tramp_nodes *fexit = &tnodes[BPF_TRAMP_FEXIT];
int nr_bpf_args, nr_reg_args, nr_stack_args;
int cookie_cnt, cookie_off, fsession_cnt;
struct bpf_jit *jit = &tjit->common;
@@ -2678,8 +2678,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
return -ENOTSUPP;
}
- cookie_cnt = bpf_fsession_cookie_cnt(tlinks);
- fsession_cnt = bpf_fsession_cnt(tlinks);
+ cookie_cnt = bpf_fsession_cookie_cnt(tnodes);
+ fsession_cnt = bpf_fsession_cnt(tnodes);
/*
* Calculate the stack layout.
@@ -2814,7 +2814,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
func_meta, cookie_off))
return -EINVAL;
- if (fmod_ret->nr_links) {
+ if (fmod_ret->nr_nodes) {
/*
* retval = 0;
*/
@@ -2823,8 +2823,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
_EMIT6(0xd707f000 | tjit->retval_off,
0xf000 | tjit->retval_off);
- for (i = 0; i < fmod_ret->nr_links; i++) {
- if (invoke_bpf_prog(tjit, m, fmod_ret->links[i], true))
+ for (i = 0; i < fmod_ret->nr_nodes; i++) {
+ if (invoke_bpf_prog(tjit, m, fmod_ret->nodes[i], true))
return -EINVAL;
/*
@@ -2949,7 +2949,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
}
int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_links *tlinks, void *orig_call)
+ struct bpf_tramp_nodes *tnodes, void *orig_call)
{
struct bpf_tramp_image im;
struct bpf_tramp_jit tjit;
@@ -2958,14 +2958,14 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
memset(&tjit, 0, sizeof(tjit));
ret = __arch_prepare_bpf_trampoline(&im, &tjit, m, flags,
- tlinks, orig_call);
+ tnodes, orig_call);
return ret < 0 ? ret : tjit.common.prg;
}
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
void *image_end, const struct btf_func_model *m,
- u32 flags, struct bpf_tramp_links *tlinks,
+ u32 flags, struct bpf_tramp_nodes *tnodes,
void *func_addr)
{
struct bpf_tramp_jit tjit;
@@ -2974,7 +2974,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
/* Compute offsets, check whether the code fits. */
memset(&tjit, 0, sizeof(tjit));
ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
- tlinks, func_addr);
+ tnodes, func_addr);
if (ret < 0)
return ret;
@@ -2988,7 +2988,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
tjit.common.prg = 0;
tjit.common.prg_buf = image;
ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
- tlinks, func_addr);
+ tnodes, func_addr);
return ret < 0 ? ret : tjit.common.prg;
}
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index e9b78040d703..dc3f2e8d5ca7 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -2969,15 +2969,15 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog,
}
static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
- struct bpf_tramp_link *l, int stack_size,
+ struct bpf_tramp_node *node, int stack_size,
int run_ctx_off, bool save_ret,
void *image, void *rw_image)
{
u8 *prog = *pprog;
u8 *jmp_insn;
int ctx_cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
- struct bpf_prog *p = l->link.prog;
- u64 cookie = l->cookie;
+ struct bpf_prog *p = node->link->prog;
+ u64 cookie = node->cookie;
/* mov rdi, cookie */
emit_mov_imm64(&prog, BPF_REG_1, (long) cookie >> 32, (u32) (long) cookie);
@@ -3084,7 +3084,7 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
}
static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
- struct bpf_tramp_links *tl, int stack_size,
+ struct bpf_tramp_nodes *tl, int stack_size,
int run_ctx_off, int func_meta_off, bool save_ret,
void *image, void *rw_image, u64 func_meta,
int cookie_off)
@@ -3092,13 +3092,13 @@ static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
int i, cur_cookie = (cookie_off - stack_size) / 8;
u8 *prog = *pprog;
- for (i = 0; i < tl->nr_links; i++) {
- if (tl->links[i]->link.prog->call_session_cookie) {
+ for (i = 0; i < tl->nr_nodes; i++) {
+ if (tl->nodes[i]->link->prog->call_session_cookie) {
emit_store_stack_imm64(&prog, BPF_REG_0, -func_meta_off,
func_meta | (cur_cookie << BPF_TRAMP_COOKIE_INDEX_SHIFT));
cur_cookie--;
}
- if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size,
+ if (invoke_bpf_prog(m, &prog, tl->nodes[i], stack_size,
run_ctx_off, save_ret, image, rw_image))
return -EINVAL;
}
@@ -3107,7 +3107,7 @@ static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
}
static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
- struct bpf_tramp_links *tl, int stack_size,
+ struct bpf_tramp_nodes *tl, int stack_size,
int run_ctx_off, u8 **branches,
void *image, void *rw_image)
{
@@ -3119,8 +3119,8 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
*/
emit_mov_imm32(&prog, false, BPF_REG_0, 0);
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
- for (i = 0; i < tl->nr_links; i++) {
- if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, true,
+ for (i = 0; i < tl->nr_nodes; i++) {
+ if (invoke_bpf_prog(m, &prog, tl->nodes[i], stack_size, run_ctx_off, true,
image, rw_image))
return -EINVAL;
@@ -3211,14 +3211,14 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_image,
void *rw_image_end, void *image,
const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_links *tlinks,
+ struct bpf_tramp_nodes *tnodes,
void *func_addr)
{
int i, ret, nr_regs = m->nr_args, stack_size = 0;
int regs_off, func_meta_off, ip_off, run_ctx_off, arg_stack_off, rbx_off;
- struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
- struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
- struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
+ struct bpf_tramp_nodes *fentry = &tnodes[BPF_TRAMP_FENTRY];
+ struct bpf_tramp_nodes *fexit = &tnodes[BPF_TRAMP_FEXIT];
+ struct bpf_tramp_nodes *fmod_ret = &tnodes[BPF_TRAMP_MODIFY_RETURN];
void *orig_call = func_addr;
int cookie_off, cookie_cnt;
u8 **branches = NULL;
@@ -3290,7 +3290,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
ip_off = stack_size;
- cookie_cnt = bpf_fsession_cookie_cnt(tlinks);
+ cookie_cnt = bpf_fsession_cookie_cnt(tnodes);
/* room for session cookies */
stack_size += cookie_cnt * 8;
cookie_off = stack_size;
@@ -3383,7 +3383,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
}
}
- if (bpf_fsession_cnt(tlinks)) {
+ if (bpf_fsession_cnt(tnodes)) {
/* clear all the session cookies' value */
for (int i = 0; i < cookie_cnt; i++)
emit_store_stack_imm64(&prog, BPF_REG_0, -cookie_off + 8 * i, 0);
@@ -3391,15 +3391,15 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
emit_store_stack_imm64(&prog, BPF_REG_0, -8, 0);
}
- if (fentry->nr_links) {
+ if (fentry->nr_nodes) {
if (invoke_bpf(m, &prog, fentry, regs_off, run_ctx_off, func_meta_off,
flags & BPF_TRAMP_F_RET_FENTRY_RET, image, rw_image,
func_meta, cookie_off))
return -EINVAL;
}
- if (fmod_ret->nr_links) {
- branches = kcalloc(fmod_ret->nr_links, sizeof(u8 *),
+ if (fmod_ret->nr_nodes) {
+ branches = kcalloc(fmod_ret->nr_nodes, sizeof(u8 *),
GFP_KERNEL);
if (!branches)
return -ENOMEM;
@@ -3438,7 +3438,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
emit_nops(&prog, X86_PATCH_SIZE);
}
- if (fmod_ret->nr_links) {
+ if (fmod_ret->nr_nodes) {
/* From Intel 64 and IA-32 Architectures Optimization
* Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler
* Coding Rule 11: All branch targets should be 16-byte
@@ -3448,7 +3448,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
/* Update the branches saved in invoke_bpf_mod_ret with the
* aligned address of do_fexit.
*/
- for (i = 0; i < fmod_ret->nr_links; i++) {
+ for (i = 0; i < fmod_ret->nr_nodes; i++) {
emit_cond_near_jump(&branches[i], image + (prog - (u8 *)rw_image),
image + (branches[i] - (u8 *)rw_image), X86_JNE);
}
@@ -3456,10 +3456,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
/* set the "is_return" flag for fsession */
func_meta |= (1ULL << BPF_TRAMP_IS_RETURN_SHIFT);
- if (bpf_fsession_cnt(tlinks))
+ if (bpf_fsession_cnt(tnodes))
emit_store_stack_imm64(&prog, BPF_REG_0, -func_meta_off, func_meta);
- if (fexit->nr_links) {
+ if (fexit->nr_nodes) {
if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off, func_meta_off,
false, image, rw_image, func_meta, cookie_off)) {
ret = -EINVAL;
@@ -3533,7 +3533,7 @@ int arch_protect_bpf_trampoline(void *image, unsigned int size)
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_links *tlinks,
+ struct bpf_tramp_nodes *tnodes,
void *func_addr)
{
void *rw_image, *tmp;
@@ -3548,7 +3548,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
return -ENOMEM;
ret = __arch_prepare_bpf_trampoline(im, rw_image, rw_image + size, image, m,
- flags, tlinks, func_addr);
+ flags, tnodes, func_addr);
if (ret < 0)
goto out;
@@ -3561,7 +3561,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
}
int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_links *tlinks, void *func_addr)
+ struct bpf_tramp_nodes *tnodes, void *func_addr)
{
struct bpf_tramp_image im;
void *image;
@@ -3579,7 +3579,7 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
return -ENOMEM;
ret = __arch_prepare_bpf_trampoline(&im, image, image + PAGE_SIZE, image,
- m, flags, tlinks, func_addr);
+ m, flags, tnodes, func_addr);
bpf_jit_free_exec(image);
return ret;
}
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 1d900f49aff5..f97aa34ee4c2 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1233,9 +1233,9 @@ enum {
#define BPF_TRAMP_COOKIE_INDEX_SHIFT 8
#define BPF_TRAMP_IS_RETURN_SHIFT 63
-struct bpf_tramp_links {
- struct bpf_tramp_link *links[BPF_MAX_TRAMP_LINKS];
- int nr_links;
+struct bpf_tramp_nodes {
+ struct bpf_tramp_node *nodes[BPF_MAX_TRAMP_LINKS];
+ int nr_nodes;
};
struct bpf_tramp_run_ctx;
@@ -1263,13 +1263,13 @@ struct bpf_tramp_run_ctx;
struct bpf_tramp_image;
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_links *tlinks,
+ struct bpf_tramp_nodes *tnodes,
void *func_addr);
void *arch_alloc_bpf_trampoline(unsigned int size);
void arch_free_bpf_trampoline(void *image, unsigned int size);
int __must_check arch_protect_bpf_trampoline(void *image, unsigned int size);
int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_links *tlinks, void *func_addr);
+ struct bpf_tramp_nodes *tnodes, void *func_addr);
u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
struct bpf_tramp_run_ctx *run_ctx);
@@ -1453,10 +1453,10 @@ static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u6
}
#ifdef CONFIG_BPF_JIT
-int bpf_trampoline_link_prog(struct bpf_tramp_link *link,
+int bpf_trampoline_link_prog(struct bpf_tramp_node *node,
struct bpf_trampoline *tr,
struct bpf_prog *tgt_prog);
-int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
+int bpf_trampoline_unlink_prog(struct bpf_tramp_node *node,
struct bpf_trampoline *tr,
struct bpf_prog *tgt_prog);
struct bpf_trampoline *bpf_trampoline_get(u64 key,
@@ -1540,13 +1540,13 @@ int bpf_jit_charge_modmem(u32 size);
void bpf_jit_uncharge_modmem(u32 size);
bool bpf_prog_has_trampoline(const struct bpf_prog *prog);
#else
-static inline int bpf_trampoline_link_prog(struct bpf_tramp_link *link,
+static inline int bpf_trampoline_link_prog(struct bpf_tramp_node *node,
struct bpf_trampoline *tr,
struct bpf_prog *tgt_prog)
{
return -ENOTSUPP;
}
-static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
+static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_node *node,
struct bpf_trampoline *tr,
struct bpf_prog *tgt_prog)
{
@@ -1865,12 +1865,17 @@ struct bpf_link_ops {
__poll_t (*poll)(struct file *file, struct poll_table_struct *pts);
};
-struct bpf_tramp_link {
- struct bpf_link link;
+struct bpf_tramp_node {
+ struct bpf_link *link;
struct hlist_node tramp_hlist;
u64 cookie;
};
+struct bpf_tramp_link {
+ struct bpf_link link;
+ struct bpf_tramp_node node;
+};
+
struct bpf_shim_tramp_link {
struct bpf_tramp_link link;
struct bpf_trampoline *trampoline;
@@ -2088,8 +2093,8 @@ void bpf_struct_ops_put(const void *kdata);
int bpf_struct_ops_supported(const struct bpf_struct_ops *st_ops, u32 moff);
int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
void *value);
-int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
- struct bpf_tramp_link *link,
+int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_nodes *tnodes,
+ struct bpf_tramp_node *node,
const struct btf_func_model *model,
void *stub_func,
void **image, u32 *image_off,
@@ -2181,31 +2186,31 @@ static inline void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_op
#endif
-static inline int bpf_fsession_cnt(struct bpf_tramp_links *links)
+static inline int bpf_fsession_cnt(struct bpf_tramp_nodes *nodes)
{
- struct bpf_tramp_links fentries = links[BPF_TRAMP_FENTRY];
+ struct bpf_tramp_nodes fentries = nodes[BPF_TRAMP_FENTRY];
int cnt = 0;
- for (int i = 0; i < links[BPF_TRAMP_FENTRY].nr_links; i++) {
- if (fentries.links[i]->link.prog->expected_attach_type == BPF_TRACE_FSESSION)
+ for (int i = 0; i < nodes[BPF_TRAMP_FENTRY].nr_nodes; i++) {
+ if (fentries.nodes[i]->link->prog->expected_attach_type == BPF_TRACE_FSESSION)
cnt++;
}
return cnt;
}
-static inline bool bpf_prog_calls_session_cookie(struct bpf_tramp_link *link)
+static inline bool bpf_prog_calls_session_cookie(struct bpf_tramp_node *node)
{
- return link->link.prog->call_session_cookie;
+ return node->link->prog->call_session_cookie;
}
-static inline int bpf_fsession_cookie_cnt(struct bpf_tramp_links *links)
+static inline int bpf_fsession_cookie_cnt(struct bpf_tramp_nodes *nodes)
{
- struct bpf_tramp_links fentries = links[BPF_TRAMP_FENTRY];
+ struct bpf_tramp_nodes fentries = nodes[BPF_TRAMP_FENTRY];
int cnt = 0;
- for (int i = 0; i < links[BPF_TRAMP_FENTRY].nr_links; i++) {
- if (bpf_prog_calls_session_cookie(fentries.links[i]))
+ for (int i = 0; i < nodes[BPF_TRAMP_FENTRY].nr_nodes; i++) {
+ if (bpf_prog_calls_session_cookie(fentries.nodes[i]))
cnt++;
}
@@ -2758,6 +2763,9 @@ void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
void bpf_link_init_sleepable(struct bpf_link *link, enum bpf_link_type type,
const struct bpf_link_ops *ops, struct bpf_prog *prog,
enum bpf_attach_type attach_type, bool sleepable);
+void bpf_tramp_link_init(struct bpf_tramp_link *link, enum bpf_link_type type,
+ const struct bpf_link_ops *ops, struct bpf_prog *prog,
+ enum bpf_attach_type attach_type, u64 cookie);
int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer);
int bpf_link_settle(struct bpf_link_primer *primer);
void bpf_link_cleanup(struct bpf_link_primer *primer);
@@ -3123,6 +3131,12 @@ static inline void bpf_link_init_sleepable(struct bpf_link *link, enum bpf_link_
{
}
+static inline void bpf_tramp_link_init(struct bpf_tramp_link *link, enum bpf_link_type type,
+ const struct bpf_link_ops *ops, struct bpf_prog *prog,
+ enum bpf_attach_type attach_type, u64 cookie)
+{
+}
+
static inline int bpf_link_prime(struct bpf_link *link,
struct bpf_link_primer *primer)
{
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 05b366b821c3..10a9301615ba 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -594,8 +594,8 @@ const struct bpf_link_ops bpf_struct_ops_link_lops = {
.dealloc = bpf_struct_ops_link_dealloc,
};
-int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
- struct bpf_tramp_link *link,
+int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_nodes *tnodes,
+ struct bpf_tramp_node *node,
const struct btf_func_model *model,
void *stub_func,
void **_image, u32 *_image_off,
@@ -605,13 +605,13 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
void *image = *_image;
int size;
- tlinks[BPF_TRAMP_FENTRY].links[0] = link;
- tlinks[BPF_TRAMP_FENTRY].nr_links = 1;
+ tnodes[BPF_TRAMP_FENTRY].nodes[0] = node;
+ tnodes[BPF_TRAMP_FENTRY].nr_nodes = 1;
if (model->ret_size > 0)
flags |= BPF_TRAMP_F_RET_FENTRY_RET;
- size = arch_bpf_trampoline_size(model, flags, tlinks, stub_func);
+ size = arch_bpf_trampoline_size(model, flags, tnodes, stub_func);
if (size <= 0)
return size ? : -EFAULT;
@@ -628,7 +628,7 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
size = arch_prepare_bpf_trampoline(NULL, image + image_off,
image + image_off + size,
- model, flags, tlinks, stub_func);
+ model, flags, tnodes, stub_func);
if (size <= 0) {
if (image != *_image)
bpf_struct_ops_image_free(image);
@@ -693,7 +693,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
const struct btf_type *module_type;
const struct btf_member *member;
const struct btf_type *t = st_ops_desc->type;
- struct bpf_tramp_links *tlinks;
+ struct bpf_tramp_nodes *tnodes;
void *udata, *kdata;
int prog_fd, err;
u32 i, trampoline_start, image_off = 0;
@@ -720,8 +720,8 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
if (uvalue->common.state || refcount_read(&uvalue->common.refcnt))
return -EINVAL;
- tlinks = kzalloc_objs(*tlinks, BPF_TRAMP_MAX);
- if (!tlinks)
+ tnodes = kzalloc_objs(*tnodes, BPF_TRAMP_MAX);
+ if (!tnodes)
return -ENOMEM;
uvalue = (struct bpf_struct_ops_value *)st_map->uvalue;
@@ -820,8 +820,9 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
err = -ENOMEM;
goto reset_unlock;
}
- bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS,
- &bpf_struct_ops_link_lops, prog, prog->expected_attach_type);
+ bpf_tramp_link_init(link, BPF_LINK_TYPE_STRUCT_OPS,
+ &bpf_struct_ops_link_lops, prog, prog->expected_attach_type, 0);
+
*plink++ = &link->link;
ksym = kzalloc_obj(*ksym, GFP_USER);
@@ -832,7 +833,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
*pksym++ = ksym;
trampoline_start = image_off;
- err = bpf_struct_ops_prepare_trampoline(tlinks, link,
+ err = bpf_struct_ops_prepare_trampoline(tnodes, &link->node,
&st_ops->func_models[i],
*(void **)(st_ops->cfi_stubs + moff),
&image, &image_off,
@@ -910,7 +911,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
memset(uvalue, 0, map->value_size);
memset(kvalue, 0, map->value_size);
unlock:
- kfree(tlinks);
+ kfree(tnodes);
mutex_unlock(&st_map->lock);
if (!err)
bpf_struct_ops_map_add_ksyms(st_map);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 274039e36465..6db6d1e74379 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3209,6 +3209,15 @@ void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
bpf_link_init_sleepable(link, type, ops, prog, attach_type, false);
}
+void bpf_tramp_link_init(struct bpf_tramp_link *link, enum bpf_link_type type,
+ const struct bpf_link_ops *ops, struct bpf_prog *prog,
+ enum bpf_attach_type attach_type, u64 cookie)
+{
+ bpf_link_init(&link->link, type, ops, prog, attach_type);
+ link->node.link = &link->link;
+ link->node.cookie = cookie;
+}
+
static void bpf_link_free_id(int id)
{
if (!id)
@@ -3502,7 +3511,7 @@ static void bpf_tracing_link_release(struct bpf_link *link)
struct bpf_tracing_link *tr_link =
container_of(link, struct bpf_tracing_link, link.link);
- WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link,
+ WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link.node,
tr_link->trampoline,
tr_link->tgt_prog));
@@ -3515,8 +3524,7 @@ static void bpf_tracing_link_release(struct bpf_link *link)
static void bpf_tracing_link_dealloc(struct bpf_link *link)
{
- struct bpf_tracing_link *tr_link =
- container_of(link, struct bpf_tracing_link, link.link);
+ struct bpf_tracing_link *tr_link = container_of(link, struct bpf_tracing_link, link.link);
kfree(tr_link);
}
@@ -3524,8 +3532,8 @@ static void bpf_tracing_link_dealloc(struct bpf_link *link)
static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link,
struct seq_file *seq)
{
- struct bpf_tracing_link *tr_link =
- container_of(link, struct bpf_tracing_link, link.link);
+ struct bpf_tracing_link *tr_link = container_of(link, struct bpf_tracing_link, link.link);
+
u32 target_btf_id, target_obj_id;
bpf_trampoline_unpack_key(tr_link->trampoline->key,
@@ -3538,17 +3546,16 @@ static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link,
link->attach_type,
target_obj_id,
target_btf_id,
- tr_link->link.cookie);
+ tr_link->link.node.cookie);
}
static int bpf_tracing_link_fill_link_info(const struct bpf_link *link,
struct bpf_link_info *info)
{
- struct bpf_tracing_link *tr_link =
- container_of(link, struct bpf_tracing_link, link.link);
+ struct bpf_tracing_link *tr_link = container_of(link, struct bpf_tracing_link, link.link);
info->tracing.attach_type = link->attach_type;
- info->tracing.cookie = tr_link->link.cookie;
+ info->tracing.cookie = tr_link->link.node.cookie;
bpf_trampoline_unpack_key(tr_link->trampoline->key,
&info->tracing.target_obj_id,
&info->tracing.target_btf_id);
@@ -3635,9 +3642,9 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
fslink = kzalloc_obj(*fslink, GFP_USER);
if (fslink) {
- bpf_link_init(&fslink->fexit.link, BPF_LINK_TYPE_TRACING,
- &bpf_tracing_link_lops, prog, attach_type);
- fslink->fexit.cookie = bpf_cookie;
+ bpf_tramp_link_init(&fslink->fexit, BPF_LINK_TYPE_TRACING,
+ &bpf_tracing_link_lops, prog, attach_type,
+ bpf_cookie);
link = &fslink->link;
} else {
link = NULL;
@@ -3649,10 +3656,8 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
err = -ENOMEM;
goto out_put_prog;
}
- bpf_link_init(&link->link.link, BPF_LINK_TYPE_TRACING,
- &bpf_tracing_link_lops, prog, attach_type);
-
- link->link.cookie = bpf_cookie;
+ bpf_tramp_link_init(&link->link, BPF_LINK_TYPE_TRACING,
+ &bpf_tracing_link_lops, prog, attach_type, bpf_cookie);
mutex_lock(&prog->aux->dst_mutex);
@@ -3738,7 +3743,7 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
if (err)
goto out_unlock;
- err = bpf_trampoline_link_prog(&link->link, tr, tgt_prog);
+ err = bpf_trampoline_link_prog(&link->link.node, tr, tgt_prog);
if (err) {
bpf_link_cleanup(&link_primer);
link = NULL;
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index d72057c715bd..3739938d2211 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -482,30 +482,29 @@ static struct bpf_trampoline_ops trampoline_ops = {
.modify_fentry = modify_fentry,
};
-static struct bpf_tramp_links *
+static struct bpf_tramp_nodes *
bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total, bool *ip_arg)
{
- struct bpf_tramp_link *link;
- struct bpf_tramp_links *tlinks;
- struct bpf_tramp_link **links;
+ struct bpf_tramp_node *node, **nodes;
+ struct bpf_tramp_nodes *tnodes;
int kind;
*total = 0;
- tlinks = kzalloc_objs(*tlinks, BPF_TRAMP_MAX);
- if (!tlinks)
+ tnodes = kzalloc_objs(*tnodes, BPF_TRAMP_MAX);
+ if (!tnodes)
return ERR_PTR(-ENOMEM);
for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
- tlinks[kind].nr_links = tr->progs_cnt[kind];
+ tnodes[kind].nr_nodes = tr->progs_cnt[kind];
*total += tr->progs_cnt[kind];
- links = tlinks[kind].links;
+ nodes = tnodes[kind].nodes;
- hlist_for_each_entry(link, &tr->progs_hlist[kind], tramp_hlist) {
- *ip_arg |= link->link.prog->call_get_func_ip;
- *links++ = link;
+ hlist_for_each_entry(node, &tr->progs_hlist[kind], tramp_hlist) {
+ *ip_arg |= node->link->prog->call_get_func_ip;
+ *nodes++ = node;
}
}
- return tlinks;
+ return tnodes;
}
static void bpf_tramp_image_free(struct bpf_tramp_image *im)
@@ -653,14 +652,14 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
struct bpf_trampoline_ops *ops, void *data)
{
struct bpf_tramp_image *im;
- struct bpf_tramp_links *tlinks;
+ struct bpf_tramp_nodes *tnodes;
u32 orig_flags = tr->flags;
bool ip_arg = false;
int err, total, size;
- tlinks = bpf_trampoline_get_progs(tr, &total, &ip_arg);
- if (IS_ERR(tlinks))
- return PTR_ERR(tlinks);
+ tnodes = bpf_trampoline_get_progs(tr, &total, &ip_arg);
+ if (IS_ERR(tnodes))
+ return PTR_ERR(tnodes);
if (total == 0) {
err = ops->unregister_fentry(tr, orig_flags, tr->cur_image->image, data);
@@ -672,8 +671,8 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
/* clear all bits except SHARE_IPMODIFY and TAIL_CALL_CTX */
tr->flags &= (BPF_TRAMP_F_SHARE_IPMODIFY | BPF_TRAMP_F_TAIL_CALL_CTX);
- if (tlinks[BPF_TRAMP_FEXIT].nr_links ||
- tlinks[BPF_TRAMP_MODIFY_RETURN].nr_links) {
+ if (tnodes[BPF_TRAMP_FEXIT].nr_nodes ||
+ tnodes[BPF_TRAMP_MODIFY_RETURN].nr_nodes) {
/* NOTE: BPF_TRAMP_F_RESTORE_REGS and BPF_TRAMP_F_SKIP_FRAME
* should not be set together.
*/
@@ -704,7 +703,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
#endif
size = arch_bpf_trampoline_size(&tr->func.model, tr->flags,
- tlinks, tr->func.addr);
+ tnodes, tr->func.addr);
if (size < 0) {
err = size;
goto out;
@@ -722,7 +721,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
}
err = arch_prepare_bpf_trampoline(im, im->image, im->image + size,
- &tr->func.model, tr->flags, tlinks,
+ &tr->func.model, tr->flags, tnodes,
tr->func.addr);
if (err < 0)
goto out_free;
@@ -760,7 +759,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
/* If any error happens, restore previous flags */
if (err)
tr->flags = orig_flags;
- kfree(tlinks);
+ kfree(tnodes);
return err;
out_free:
@@ -810,7 +809,7 @@ static int bpf_freplace_check_tgt_prog(struct bpf_prog *tgt_prog)
return 0;
}
-static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
+static int __bpf_trampoline_link_prog(struct bpf_tramp_node *node,
struct bpf_trampoline *tr,
struct bpf_prog *tgt_prog,
struct bpf_trampoline_ops *ops,
@@ -818,12 +817,12 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
{
struct bpf_fsession_link *fslink = NULL;
enum bpf_tramp_prog_type kind;
- struct bpf_tramp_link *link_exiting;
+ struct bpf_tramp_node *node_existing;
struct hlist_head *prog_list;
int err = 0;
int cnt = 0, i;
- kind = bpf_attach_type_to_tramp(link->link.prog);
+ kind = bpf_attach_type_to_tramp(node->link->prog);
if (tr->extension_prog)
/* cannot attach fentry/fexit if extension prog is attached.
* cannot overwrite extension prog either.
@@ -840,10 +839,10 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
err = bpf_freplace_check_tgt_prog(tgt_prog);
if (err)
return err;
- tr->extension_prog = link->link.prog;
+ tr->extension_prog = node->link->prog;
return bpf_arch_text_poke(tr->func.addr, BPF_MOD_NOP,
BPF_MOD_JUMP, NULL,
- link->link.prog->bpf_func);
+ node->link->prog->bpf_func);
}
if (kind == BPF_TRAMP_FSESSION) {
prog_list = &tr->progs_hlist[BPF_TRAMP_FENTRY];
@@ -853,31 +852,31 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
}
if (cnt >= BPF_MAX_TRAMP_LINKS)
return -E2BIG;
- if (!hlist_unhashed(&link->tramp_hlist))
+ if (!hlist_unhashed(&node->tramp_hlist))
/* prog already linked */
return -EBUSY;
- hlist_for_each_entry(link_exiting, prog_list, tramp_hlist) {
- if (link_exiting->link.prog != link->link.prog)
+ hlist_for_each_entry(node_existing, prog_list, tramp_hlist) {
+ if (node_existing->link->prog != node->link->prog)
continue;
/* prog already linked */
return -EBUSY;
}
- hlist_add_head(&link->tramp_hlist, prog_list);
+ hlist_add_head(&node->tramp_hlist, prog_list);
if (kind == BPF_TRAMP_FSESSION) {
tr->progs_cnt[BPF_TRAMP_FENTRY]++;
- fslink = container_of(link, struct bpf_fsession_link, link.link);
- hlist_add_head(&fslink->fexit.tramp_hlist, &tr->progs_hlist[BPF_TRAMP_FEXIT]);
+ fslink = container_of(node, struct bpf_fsession_link, link.link.node);
+ hlist_add_head(&fslink->fexit.node.tramp_hlist, &tr->progs_hlist[BPF_TRAMP_FEXIT]);
tr->progs_cnt[BPF_TRAMP_FEXIT]++;
} else {
tr->progs_cnt[kind]++;
}
err = bpf_trampoline_update(tr, true /* lock_direct_mutex */, ops, data);
if (err) {
- hlist_del_init(&link->tramp_hlist);
+ hlist_del_init(&node->tramp_hlist);
if (kind == BPF_TRAMP_FSESSION) {
tr->progs_cnt[BPF_TRAMP_FENTRY]--;
- hlist_del_init(&fslink->fexit.tramp_hlist);
+ hlist_del_init(&fslink->fexit.node.tramp_hlist);
tr->progs_cnt[BPF_TRAMP_FEXIT]--;
} else {
tr->progs_cnt[kind]--;
@@ -886,19 +885,19 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
return err;
}
-int bpf_trampoline_link_prog(struct bpf_tramp_link *link,
+int bpf_trampoline_link_prog(struct bpf_tramp_node *node,
struct bpf_trampoline *tr,
struct bpf_prog *tgt_prog)
{
int err;
trampoline_lock(tr);
- err = __bpf_trampoline_link_prog(link, tr, tgt_prog, &trampoline_ops, NULL);
+ err = __bpf_trampoline_link_prog(node, tr, tgt_prog, &trampoline_ops, NULL);
trampoline_unlock(tr);
return err;
}
-static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
+static int __bpf_trampoline_unlink_prog(struct bpf_tramp_node *node,
struct bpf_trampoline *tr,
struct bpf_prog *tgt_prog,
struct bpf_trampoline_ops *ops,
@@ -907,7 +906,7 @@ static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
enum bpf_tramp_prog_type kind;
int err;
- kind = bpf_attach_type_to_tramp(link->link.prog);
+ kind = bpf_attach_type_to_tramp(node->link->prog);
if (kind == BPF_TRAMP_REPLACE) {
WARN_ON_ONCE(!tr->extension_prog);
err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP,
@@ -919,26 +918,26 @@ static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
return err;
} else if (kind == BPF_TRAMP_FSESSION) {
struct bpf_fsession_link *fslink =
- container_of(link, struct bpf_fsession_link, link.link);
+ container_of(node, struct bpf_fsession_link, link.link.node);
- hlist_del_init(&fslink->fexit.tramp_hlist);
+ hlist_del_init(&fslink->fexit.node.tramp_hlist);
tr->progs_cnt[BPF_TRAMP_FEXIT]--;
kind = BPF_TRAMP_FENTRY;
}
- hlist_del_init(&link->tramp_hlist);
+ hlist_del_init(&node->tramp_hlist);
tr->progs_cnt[kind]--;
return bpf_trampoline_update(tr, true /* lock_direct_mutex */, ops, data);
}
/* bpf_trampoline_unlink_prog() should never fail. */
-int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
+int bpf_trampoline_unlink_prog(struct bpf_tramp_node *node,
struct bpf_trampoline *tr,
struct bpf_prog *tgt_prog)
{
int err;
trampoline_lock(tr);
- err = __bpf_trampoline_unlink_prog(link, tr, tgt_prog, &trampoline_ops, NULL);
+ err = __bpf_trampoline_unlink_prog(node, tr, tgt_prog, &trampoline_ops, NULL);
trampoline_unlock(tr);
return err;
}
@@ -953,7 +952,7 @@ static void bpf_shim_tramp_link_release(struct bpf_link *link)
if (!shim_link->trampoline)
return;
- WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link, shim_link->trampoline, NULL));
+ WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link.node, shim_link->trampoline, NULL));
bpf_trampoline_put(shim_link->trampoline);
}
@@ -999,8 +998,8 @@ static struct bpf_shim_tramp_link *cgroup_shim_alloc(const struct bpf_prog *prog
p->type = BPF_PROG_TYPE_LSM;
p->expected_attach_type = BPF_LSM_MAC;
bpf_prog_inc(p);
- bpf_link_init(&shim_link->link.link, BPF_LINK_TYPE_UNSPEC,
- &bpf_shim_tramp_link_lops, p, attach_type);
+ bpf_tramp_link_init(&shim_link->link, BPF_LINK_TYPE_UNSPEC,
+ &bpf_shim_tramp_link_lops, p, attach_type, 0);
bpf_cgroup_atype_get(p->aux->attach_btf_id, cgroup_atype);
return shim_link;
@@ -1009,15 +1008,15 @@ static struct bpf_shim_tramp_link *cgroup_shim_alloc(const struct bpf_prog *prog
static struct bpf_shim_tramp_link *cgroup_shim_find(struct bpf_trampoline *tr,
bpf_func_t bpf_func)
{
- struct bpf_tramp_link *link;
+ struct bpf_tramp_node *node;
int kind;
for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
- hlist_for_each_entry(link, &tr->progs_hlist[kind], tramp_hlist) {
- struct bpf_prog *p = link->link.prog;
+ hlist_for_each_entry(node, &tr->progs_hlist[kind], tramp_hlist) {
+ struct bpf_prog *p = node->link->prog;
if (p->bpf_func == bpf_func)
- return container_of(link, struct bpf_shim_tramp_link, link);
+ return container_of(node, struct bpf_shim_tramp_link, link.node);
}
}
@@ -1067,7 +1066,7 @@ int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
goto err;
}
- err = __bpf_trampoline_link_prog(&shim_link->link, tr, NULL, &trampoline_ops, NULL);
+ err = __bpf_trampoline_link_prog(&shim_link->link.node, tr, NULL, &trampoline_ops, NULL);
if (err)
goto err;
@@ -1382,7 +1381,7 @@ bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog)
int __weak
arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_links *tlinks,
+ struct bpf_tramp_nodes *tnodes,
void *func_addr)
{
return -ENOTSUPP;
@@ -1416,7 +1415,7 @@ int __weak arch_protect_bpf_trampoline(void *image, unsigned int size)
}
int __weak arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_links *tlinks, void *func_addr)
+ struct bpf_tramp_nodes *tnodes, void *func_addr)
{
return -ENOTSUPP;
}
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c
index ae5a54c350b9..191a6b3ee254 100644
--- a/net/bpf/bpf_dummy_struct_ops.c
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -132,7 +132,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
const struct bpf_struct_ops *st_ops = &bpf_bpf_dummy_ops;
const struct btf_type *func_proto;
struct bpf_dummy_ops_test_args *args;
- struct bpf_tramp_links *tlinks = NULL;
+ struct bpf_tramp_nodes *tnodes = NULL;
struct bpf_tramp_link *link = NULL;
void *image = NULL;
unsigned int op_idx;
@@ -158,8 +158,8 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
if (err)
goto out;
- tlinks = kzalloc_objs(*tlinks, BPF_TRAMP_MAX);
- if (!tlinks) {
+ tnodes = kzalloc_objs(*tnodes, BPF_TRAMP_MAX);
+ if (!tnodes) {
err = -ENOMEM;
goto out;
}
@@ -171,11 +171,11 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
}
/* prog doesn't take the ownership of the reference from caller */
bpf_prog_inc(prog);
- bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS, &bpf_struct_ops_link_lops, prog,
- prog->expected_attach_type);
+ bpf_tramp_link_init(link, BPF_LINK_TYPE_STRUCT_OPS, &bpf_struct_ops_link_lops,
+ prog, prog->expected_attach_type, 0);
op_idx = prog->expected_attach_type;
- err = bpf_struct_ops_prepare_trampoline(tlinks, link,
+ err = bpf_struct_ops_prepare_trampoline(tnodes, &link->node,
&st_ops->func_models[op_idx],
&dummy_ops_test_ret_function,
&image, &image_off,
@@ -198,7 +198,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
bpf_struct_ops_image_free(image);
if (link)
bpf_link_put(&link->link);
- kfree(tlinks);
+ kfree(tnodes);
return err;
}
--
2.53.0
^ permalink raw reply related
* [PATCHv3 bpf-next 03/24] bpf: Add struct bpf_trampoline_ops object
From: Jiri Olsa @ 2026-03-16 7:51 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
Cc: bpf, linux-trace-kernel, Martin KaFai Lau, Eduard Zingerman,
Song Liu, Yonghong Song, Menglong Dong, Steven Rostedt
In-Reply-To: <20260316075138.465430-1-jolsa@kernel.org>
In following changes we will need to override ftrace direct attachment
behaviour. In order to do that we are adding struct bpf_trampoline_ops
object that defines callbacks for ftrace direct attachment:
register_fentry
unregister_fentry
modify_fentry
The new struct bpf_trampoline_ops object is passed as an argument to
__bpf_trampoline_link/unlink_prog functions.
At the moment the default trampoline_ops is set to the current ftrace
direct attachment functions, so there's no functional change for the
current code.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
kernel/bpf/trampoline.c | 59 ++++++++++++++++++++++++++++-------------
1 file changed, 41 insertions(+), 18 deletions(-)
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 9923703a1544..d72057c715bd 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -58,8 +58,18 @@ static void trampoline_unlock(struct bpf_trampoline *tr)
mutex_unlock(select_trampoline_lock(tr));
}
+struct bpf_trampoline_ops {
+ int (*register_fentry)(struct bpf_trampoline *tr, void *new_addr, void *data);
+ int (*unregister_fentry)(struct bpf_trampoline *tr, u32 orig_flags, void *old_addr,
+ void *data);
+ int (*modify_fentry)(struct bpf_trampoline *tr, u32 orig_flags, void *old_addr,
+ void *new_addr, bool lock_direct_mutex, void *data);
+};
+
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
-static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex);
+static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex,
+ struct bpf_trampoline_ops *ops, void *data);
+static struct bpf_trampoline_ops trampoline_ops;
#ifdef CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS
static struct bpf_trampoline *direct_ops_ip_lookup(struct ftrace_ops *ops, unsigned long ip)
@@ -144,13 +154,15 @@ static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, unsigned long ip,
if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) &&
!(tr->flags & BPF_TRAMP_F_ORIG_STACK))
- ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */);
+ ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */,
+ &trampoline_ops, NULL);
break;
case FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER:
tr->flags &= ~BPF_TRAMP_F_SHARE_IPMODIFY;
if (tr->flags & BPF_TRAMP_F_ORIG_STACK)
- ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */);
+ ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */,
+ &trampoline_ops, NULL);
break;
default:
ret = -EINVAL;
@@ -414,7 +426,7 @@ static int bpf_trampoline_update_fentry(struct bpf_trampoline *tr, u32 orig_flag
}
static int unregister_fentry(struct bpf_trampoline *tr, u32 orig_flags,
- void *old_addr)
+ void *old_addr, void *data)
{
int ret;
@@ -428,7 +440,7 @@ static int unregister_fentry(struct bpf_trampoline *tr, u32 orig_flags,
static int modify_fentry(struct bpf_trampoline *tr, u32 orig_flags,
void *old_addr, void *new_addr,
- bool lock_direct_mutex)
+ bool lock_direct_mutex, void *data __maybe_unused)
{
int ret;
@@ -442,7 +454,7 @@ static int modify_fentry(struct bpf_trampoline *tr, u32 orig_flags,
}
/* first time registering */
-static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
+static int register_fentry(struct bpf_trampoline *tr, void *new_addr, void *data __maybe_unused)
{
void *ip = tr->func.addr;
unsigned long faddr;
@@ -464,6 +476,12 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
return ret;
}
+static struct bpf_trampoline_ops trampoline_ops = {
+ .register_fentry = register_fentry,
+ .unregister_fentry = unregister_fentry,
+ .modify_fentry = modify_fentry,
+};
+
static struct bpf_tramp_links *
bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total, bool *ip_arg)
{
@@ -631,7 +649,8 @@ static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, int size)
return ERR_PTR(err);
}
-static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex)
+static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex,
+ struct bpf_trampoline_ops *ops, void *data)
{
struct bpf_tramp_image *im;
struct bpf_tramp_links *tlinks;
@@ -644,7 +663,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
return PTR_ERR(tlinks);
if (total == 0) {
- err = unregister_fentry(tr, orig_flags, tr->cur_image->image);
+ err = ops->unregister_fentry(tr, orig_flags, tr->cur_image->image, data);
bpf_tramp_image_put(tr->cur_image);
tr->cur_image = NULL;
goto out;
@@ -715,11 +734,11 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
WARN_ON(tr->cur_image && total == 0);
if (tr->cur_image)
/* progs already running at this address */
- err = modify_fentry(tr, orig_flags, tr->cur_image->image,
- im->image, lock_direct_mutex);
+ err = ops->modify_fentry(tr, orig_flags, tr->cur_image->image,
+ im->image, lock_direct_mutex, data);
else
/* first time registering */
- err = register_fentry(tr, im->image);
+ err = ops->register_fentry(tr, im->image, data);
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
if (err == -EAGAIN) {
@@ -793,7 +812,9 @@ static int bpf_freplace_check_tgt_prog(struct bpf_prog *tgt_prog)
static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
struct bpf_trampoline *tr,
- struct bpf_prog *tgt_prog)
+ struct bpf_prog *tgt_prog,
+ struct bpf_trampoline_ops *ops,
+ void *data)
{
struct bpf_fsession_link *fslink = NULL;
enum bpf_tramp_prog_type kind;
@@ -851,7 +872,7 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
} else {
tr->progs_cnt[kind]++;
}
- err = bpf_trampoline_update(tr, true /* lock_direct_mutex */);
+ err = bpf_trampoline_update(tr, true /* lock_direct_mutex */, ops, data);
if (err) {
hlist_del_init(&link->tramp_hlist);
if (kind == BPF_TRAMP_FSESSION) {
@@ -872,14 +893,16 @@ int bpf_trampoline_link_prog(struct bpf_tramp_link *link,
int err;
trampoline_lock(tr);
- err = __bpf_trampoline_link_prog(link, tr, tgt_prog);
+ err = __bpf_trampoline_link_prog(link, tr, tgt_prog, &trampoline_ops, NULL);
trampoline_unlock(tr);
return err;
}
static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
struct bpf_trampoline *tr,
- struct bpf_prog *tgt_prog)
+ struct bpf_prog *tgt_prog,
+ struct bpf_trampoline_ops *ops,
+ void *data)
{
enum bpf_tramp_prog_type kind;
int err;
@@ -904,7 +927,7 @@ static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
}
hlist_del_init(&link->tramp_hlist);
tr->progs_cnt[kind]--;
- return bpf_trampoline_update(tr, true /* lock_direct_mutex */);
+ return bpf_trampoline_update(tr, true /* lock_direct_mutex */, ops, data);
}
/* bpf_trampoline_unlink_prog() should never fail. */
@@ -915,7 +938,7 @@ int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
int err;
trampoline_lock(tr);
- err = __bpf_trampoline_unlink_prog(link, tr, tgt_prog);
+ err = __bpf_trampoline_unlink_prog(link, tr, tgt_prog, &trampoline_ops, NULL);
trampoline_unlock(tr);
return err;
}
@@ -1044,7 +1067,7 @@ int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
goto err;
}
- err = __bpf_trampoline_link_prog(&shim_link->link, tr, NULL);
+ err = __bpf_trampoline_link_prog(&shim_link->link, tr, NULL, &trampoline_ops, NULL);
if (err)
goto err;
--
2.53.0
^ permalink raw reply related
* [PATCHv3 bpf-next 02/24] bpf: Use mutex lock pool for bpf trampolines
From: Jiri Olsa @ 2026-03-16 7:51 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
Cc: bpf, linux-trace-kernel, Martin KaFai Lau, Eduard Zingerman,
Song Liu, Yonghong Song, Menglong Dong, Steven Rostedt
In-Reply-To: <20260316075138.465430-1-jolsa@kernel.org>
Adding mutex lock pool that replaces bpf trampolines mutex.
For tracing_multi link coming in following changes we need to lock all
the involved trampolines during the attachment. This could mean thousands
of mutex locks, which is not convenient.
As suggested by Andrii we can replace bpf trampolines mutex with mutex
pool, where each trampoline is hash-ed to one of the locks from the pool.
It's better to lock all the pool mutexes (32 at the moment) than
thousands of them.
There is 48 (MAX_LOCK_DEPTH) lock limit allowed to be simultaneously
held by task, so we need to keep 32 mutexes (5 bits) in the pool, so
when we lock them all in following changes the lockdep won't scream.
Removing the mutex_is_locked in bpf_trampoline_put, because we removed
the mutex from bpf_trampoline.
Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
include/linux/bpf.h | 2 --
kernel/bpf/trampoline.c | 76 ++++++++++++++++++++++++++++-------------
2 files changed, 52 insertions(+), 26 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 05b34a6355b0..1d900f49aff5 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1335,8 +1335,6 @@ struct bpf_trampoline {
/* hlist for trampoline_ip_table */
struct hlist_node hlist_ip;
struct ftrace_ops *fops;
- /* serializes access to fields of this trampoline */
- struct mutex mutex;
refcount_t refcnt;
u32 flags;
u64 key;
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index f02254a21585..9923703a1544 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -30,6 +30,34 @@ static struct hlist_head trampoline_ip_table[TRAMPOLINE_TABLE_SIZE];
/* serializes access to trampoline tables */
static DEFINE_MUTEX(trampoline_mutex);
+/*
+ * We keep 32 trampoline locks (5 bits) in the pool, because there
+ * is 48 (MAX_LOCK_DEPTH) locks limit allowed to be simultaneously
+ * held by task.
+ */
+#define TRAMPOLINE_LOCKS_BITS 5
+#define TRAMPOLINE_LOCKS_TABLE_SIZE (1 << TRAMPOLINE_LOCKS_BITS)
+
+static struct {
+ struct mutex mutex;
+ struct lock_class_key key;
+} trampoline_locks[TRAMPOLINE_LOCKS_TABLE_SIZE];
+
+static struct mutex *select_trampoline_lock(struct bpf_trampoline *tr)
+{
+ return &trampoline_locks[hash_64((u64)(uintptr_t) tr, TRAMPOLINE_LOCKS_BITS)].mutex;
+}
+
+static void trampoline_lock(struct bpf_trampoline *tr)
+{
+ mutex_lock(select_trampoline_lock(tr));
+}
+
+static void trampoline_unlock(struct bpf_trampoline *tr)
+{
+ mutex_unlock(select_trampoline_lock(tr));
+}
+
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex);
@@ -69,9 +97,9 @@ static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, unsigned long ip,
if (cmd == FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF) {
/* This is called inside register_ftrace_direct_multi(), so
- * tr->mutex is already locked.
+ * trampoline's mutex is already locked.
*/
- lockdep_assert_held_once(&tr->mutex);
+ lockdep_assert_held_once(select_trampoline_lock(tr));
/* Instead of updating the trampoline here, we propagate
* -EAGAIN to register_ftrace_direct(). Then we can
@@ -91,7 +119,7 @@ static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, unsigned long ip,
}
/* The normal locking order is
- * tr->mutex => direct_mutex (ftrace.c) => ftrace_lock (ftrace.c)
+ * select_trampoline_lock(tr) => direct_mutex (ftrace.c) => ftrace_lock (ftrace.c)
*
* The following two commands are called from
*
@@ -99,12 +127,12 @@ static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, unsigned long ip,
* cleanup_direct_functions_after_ipmodify
*
* In both cases, direct_mutex is already locked. Use
- * mutex_trylock(&tr->mutex) to avoid deadlock in race condition
- * (something else is making changes to this same trampoline).
+ * mutex_trylock(select_trampoline_lock(tr)) to avoid deadlock in race condition
+ * (something else holds the same pool lock).
*/
- if (!mutex_trylock(&tr->mutex)) {
- /* sleep 1 ms to make sure whatever holding tr->mutex makes
- * some progress.
+ if (!mutex_trylock(select_trampoline_lock(tr))) {
+ /* sleep 1 ms to make sure whatever holding select_trampoline_lock(tr)
+ * makes some progress.
*/
msleep(1);
return -EAGAIN;
@@ -129,7 +157,7 @@ static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, unsigned long ip,
break;
}
- mutex_unlock(&tr->mutex);
+ trampoline_unlock(tr);
return ret;
}
#endif
@@ -359,7 +387,6 @@ static struct bpf_trampoline *bpf_trampoline_lookup(u64 key, unsigned long ip)
head = &trampoline_ip_table[hash_64(tr->ip, TRAMPOLINE_HASH_BITS)];
hlist_add_head(&tr->hlist_ip, head);
refcount_set(&tr->refcnt, 1);
- mutex_init(&tr->mutex);
for (i = 0; i < BPF_TRAMP_MAX; i++)
INIT_HLIST_HEAD(&tr->progs_hlist[i]);
out:
@@ -844,9 +871,9 @@ int bpf_trampoline_link_prog(struct bpf_tramp_link *link,
{
int err;
- mutex_lock(&tr->mutex);
+ trampoline_lock(tr);
err = __bpf_trampoline_link_prog(link, tr, tgt_prog);
- mutex_unlock(&tr->mutex);
+ trampoline_unlock(tr);
return err;
}
@@ -887,9 +914,9 @@ int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
{
int err;
- mutex_lock(&tr->mutex);
+ trampoline_lock(tr);
err = __bpf_trampoline_unlink_prog(link, tr, tgt_prog);
- mutex_unlock(&tr->mutex);
+ trampoline_unlock(tr);
return err;
}
@@ -999,12 +1026,12 @@ int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
if (!tr)
return -ENOMEM;
- mutex_lock(&tr->mutex);
+ trampoline_lock(tr);
shim_link = cgroup_shim_find(tr, bpf_func);
if (shim_link && !IS_ERR(bpf_link_inc_not_zero(&shim_link->link.link))) {
/* Reusing existing shim attached by the other program. */
- mutex_unlock(&tr->mutex);
+ trampoline_unlock(tr);
bpf_trampoline_put(tr); /* bpf_trampoline_get above */
return 0;
}
@@ -1024,16 +1051,16 @@ int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
shim_link->trampoline = tr;
/* note, we're still holding tr refcnt from above */
- mutex_unlock(&tr->mutex);
+ trampoline_unlock(tr);
return 0;
err:
- mutex_unlock(&tr->mutex);
+ trampoline_unlock(tr);
if (shim_link)
bpf_link_put(&shim_link->link.link);
- /* have to release tr while _not_ holding its mutex */
+ /* have to release tr while _not_ holding pool mutex for trampoline */
bpf_trampoline_put(tr); /* bpf_trampoline_get above */
return err;
@@ -1054,9 +1081,9 @@ void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog)
if (WARN_ON_ONCE(!tr))
return;
- mutex_lock(&tr->mutex);
+ trampoline_lock(tr);
shim_link = cgroup_shim_find(tr, bpf_func);
- mutex_unlock(&tr->mutex);
+ trampoline_unlock(tr);
if (shim_link)
bpf_link_put(&shim_link->link.link);
@@ -1074,14 +1101,14 @@ struct bpf_trampoline *bpf_trampoline_get(u64 key,
if (!tr)
return NULL;
- mutex_lock(&tr->mutex);
+ trampoline_lock(tr);
if (tr->func.addr)
goto out;
memcpy(&tr->func.model, &tgt_info->fmodel, sizeof(tgt_info->fmodel));
tr->func.addr = (void *)tgt_info->tgt_addr;
out:
- mutex_unlock(&tr->mutex);
+ trampoline_unlock(tr);
return tr;
}
@@ -1094,7 +1121,6 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
mutex_lock(&trampoline_mutex);
if (!refcount_dec_and_test(&tr->refcnt))
goto out;
- WARN_ON_ONCE(mutex_is_locked(&tr->mutex));
for (i = 0; i < BPF_TRAMP_MAX; i++)
if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[i])))
@@ -1380,6 +1406,8 @@ static int __init init_trampolines(void)
INIT_HLIST_HEAD(&trampoline_key_table[i]);
for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++)
INIT_HLIST_HEAD(&trampoline_ip_table[i]);
+ for (i = 0; i < TRAMPOLINE_LOCKS_TABLE_SIZE; i++)
+ __mutex_init(&trampoline_locks[i].mutex, "trampoline_lock", &trampoline_locks[i].key);
return 0;
}
late_initcall(init_trampolines);
--
2.53.0
^ permalink raw reply related
* [PATCHv3 bpf-next 01/24] ftrace: Add ftrace_hash_count function
From: Jiri Olsa @ 2026-03-16 7:51 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
Cc: bpf, linux-trace-kernel, Martin KaFai Lau, Eduard Zingerman,
Song Liu, Yonghong Song, Menglong Dong, Steven Rostedt
In-Reply-To: <20260316075138.465430-1-jolsa@kernel.org>
Adding external ftrace_hash_count function so we could get hash
count outside of ftrace object.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
include/linux/ftrace.h | 1 +
kernel/trace/ftrace.c | 7 ++++++-
2 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index c242fe49af4c..401f8dfd05d3 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -415,6 +415,7 @@ struct ftrace_hash *alloc_ftrace_hash(int size_bits);
void free_ftrace_hash(struct ftrace_hash *hash);
struct ftrace_func_entry *add_ftrace_hash_entry_direct(struct ftrace_hash *hash,
unsigned long ip, unsigned long direct);
+unsigned long ftrace_hash_count(struct ftrace_hash *hash);
/* The hash used to know what functions callbacks trace */
struct ftrace_ops_hash {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 71dcbfeac86c..2240c38e7216 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -6288,11 +6288,16 @@ int modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
}
EXPORT_SYMBOL_GPL(modify_ftrace_direct);
-static unsigned long hash_count(struct ftrace_hash *hash)
+static inline unsigned long hash_count(struct ftrace_hash *hash)
{
return hash ? hash->count : 0;
}
+unsigned long ftrace_hash_count(struct ftrace_hash *hash)
+{
+ return hash_count(hash);
+}
+
/**
* hash_add - adds two struct ftrace_hash and returns the result
* @a: struct ftrace_hash object
--
2.53.0
^ permalink raw reply related
* [PATCHv3 bpf-next 00/24] bpf: tracing_multi link
From: Jiri Olsa @ 2026-03-16 7:51 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
Cc: Hengqi Chen, bpf, linux-trace-kernel, Martin KaFai Lau,
Eduard Zingerman, Song Liu, Yonghong Song, Menglong Dong,
Steven Rostedt
hi,
adding tracing_multi link support that allows fast attachment
of tracing program to many functions.
RFC: https://lore.kernel.org/bpf/20260203093819.2105105-1-jolsa@kernel.org/
v1: https://lore.kernel.org/bpf/20260220100649.628307-1-jolsa@kernel.org/
v2: https://lore.kernel.org/bpf/20260304222141.497203-1-jolsa@kernel.org/
v3 changes:
- fix module parsing [Leon Hwang]
- use function traceable check from libbpf [Leon Hwang]
- use ptr_to_u64 and fix/updated few comments [ci]
- display cookies as decimal numbers [ci]
- added link_create.flags check [ci]
- fix error path in bpf_trampoline_multi_detach [ci]
- make fentry/fexit.multi not extendable [ci]
- add missing OPTS_VALID to bpf_program__attach_tracing_multi [ci]
v2 changes:
- allocate data.unreg in bpf_trampoline_multi_attach for rollback path [ci]
and fixed link count setup in rollback path [ci]
- several small assorted fixes [ci]
- added loongarch and powerpc changes for struct bpf_tramp_node change
- added support to attach functions from modules
- added tests for sleepable programs
- added rollback tests
v1 changes:
- added ftrace_hash_count as wrapper for hash_count [Steven]
- added trampoline mutex pool [Andrii]
- reworked 'struct bpf_tramp_node' separatoin [Andrii]
- the 'struct bpf_tramp_node' now holds pointer to bpf_link,
which is similar to what we do for uprobe_multi;
I understand it's not a fundamental change compared to previous
version which used bpf_prog pointer instead, but I don't see better
way of doing this.. I'm happy to discuss this further if there's
better idea
- reworked 'struct bpf_fsession_link' based on bpf_tramp_node
- made btf__find_by_glob_kind function internal helper [Andrii]
- many small assorted fixes [Andrii,CI]
- added session support [Leon Hwang]
- added cookies support
- added more tests
Note I plan to send linkinfo support separately, the patchset is big enough.
thanks,
jirka
Cc: Hengqi Chen <hengqi.chen@gmail.com>
---
Jiri Olsa (24):
ftrace: Add ftrace_hash_count function
bpf: Use mutex lock pool for bpf trampolines
bpf: Add struct bpf_trampoline_ops object
bpf: Add struct bpf_tramp_node object
bpf: Factor fsession link to use struct bpf_tramp_node
bpf: Add multi tracing attach types
bpf: Move sleepable verification code to btf_id_allow_sleepable
bpf: Add bpf_trampoline_multi_attach/detach functions
bpf: Add support for tracing multi link
bpf: Add support for tracing_multi link cookies
bpf: Add support for tracing_multi link session
bpf: Add support for tracing_multi link fdinfo
libbpf: Add bpf_object_cleanup_btf function
libbpf: Add bpf_link_create support for tracing_multi link
libbpf: Add btf_type_is_traceable_func function
libbpf: Add support to create tracing multi link
selftests/bpf: Add tracing multi skel/pattern/ids attach tests
selftests/bpf: Add tracing multi skel/pattern/ids module attach tests
selftests/bpf: Add tracing multi intersect tests
selftests/bpf: Add tracing multi cookies test
selftests/bpf: Add tracing multi session test
selftests/bpf: Add tracing multi attach fails test
selftests/bpf: Add tracing multi attach benchmark test
selftests/bpf: Add tracing multi attach rollback tests
arch/arm64/net/bpf_jit_comp.c | 58 +++---
arch/loongarch/net/bpf_jit.c | 44 ++---
arch/powerpc/net/bpf_jit_comp.c | 46 ++---
arch/riscv/net/bpf_jit_comp64.c | 52 +++---
arch/s390/net/bpf_jit_comp.c | 44 ++---
arch/x86/net/bpf_jit_comp.c | 54 +++---
include/linux/bpf.h | 91 ++++++---
include/linux/bpf_types.h | 1 +
include/linux/bpf_verifier.h | 3 +
include/linux/btf_ids.h | 1 +
include/linux/ftrace.h | 1 +
include/linux/trace_events.h | 6 +
include/uapi/linux/bpf.h | 9 +
kernel/bpf/bpf_struct_ops.c | 27 +--
kernel/bpf/btf.c | 4 +
kernel/bpf/syscall.c | 88 +++++----
kernel/bpf/trampoline.c | 512 ++++++++++++++++++++++++++++++++++++++++----------
kernel/bpf/verifier.c | 124 +++++++++---
kernel/trace/bpf_trace.c | 149 ++++++++++++++-
kernel/trace/ftrace.c | 7 +-
net/bpf/bpf_dummy_struct_ops.c | 14 +-
net/bpf/test_run.c | 3 +
tools/include/uapi/linux/bpf.h | 10 +
tools/lib/bpf/bpf.c | 9 +
tools/lib/bpf/bpf.h | 5 +
tools/lib/bpf/libbpf.c | 337 ++++++++++++++++++++++++++++++++-
tools/lib/bpf/libbpf.h | 15 ++
tools/lib/bpf/libbpf.map | 1 +
tools/lib/bpf/libbpf_internal.h | 1 +
tools/testing/selftests/bpf/Makefile | 9 +-
tools/testing/selftests/bpf/prog_tests/tracing_multi.c | 860 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
tools/testing/selftests/bpf/progs/tracing_multi_attach.c | 40 ++++
tools/testing/selftests/bpf/progs/tracing_multi_attach_module.c | 26 +++
tools/testing/selftests/bpf/progs/tracing_multi_bench.c | 13 ++
tools/testing/selftests/bpf/progs/tracing_multi_check.c | 213 +++++++++++++++++++++
tools/testing/selftests/bpf/progs/tracing_multi_fail.c | 19 ++
tools/testing/selftests/bpf/progs/tracing_multi_intersect_attach.c | 42 +++++
tools/testing/selftests/bpf/progs/tracing_multi_rollback.c | 38 ++++
tools/testing/selftests/bpf/progs/tracing_multi_session_attach.c | 43 +++++
tools/testing/selftests/bpf/trace_helpers.c | 6 +-
tools/testing/selftests/bpf/trace_helpers.h | 1 +
41 files changed, 2661 insertions(+), 365 deletions(-)
create mode 100644 tools/testing/selftests/bpf/prog_tests/tracing_multi.c
create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_attach.c
create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_attach_module.c
create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_bench.c
create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_check.c
create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_fail.c
create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_intersect_attach.c
create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_rollback.c
create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_session_attach.c
^ permalink raw reply
* Re: [PATCH V2] tracing: Revert "tracing: Remove pid in task_rename tracing output"
From: Xuewen Yan @ 2026-03-16 2:00 UTC (permalink / raw)
To: Steven Rostedt
Cc: Xuewen Yan, mhiramat, mathieu.desnoyers, elver, kees,
lorenzo.stoakes, brauner, schuster.simon, david, linux-kernel,
linux-trace-kernel, guohua.yan, ke.wang, jing.xia
In-Reply-To: <20260306100625.2211675a@gandalf.local.home>
Hi Steven,
Unless there are any further comments, could you please help to take
this through the tracing tree?
Thanks!
On Fri, Mar 6, 2026 at 11:06 PM Steven Rostedt <rostedt@goodmis.org> wrote:
>
> On Fri, 6 Mar 2026 15:59:54 +0800
> Xuewen Yan <xuewen.yan@unisoc.com> wrote:
>
> > This reverts commit e3f6a42272e028c46695acc83fc7d7c42f2750ad.
> >
> > The commit says that the tracepoint only deals with the current task,
> > however the following case is not current task:
> >
> > comm_write() {
> > p = get_proc_task(inode);
> > if (!p)
> > return -ESRCH;
> >
> > if (same_thread_group(current, p))
> > set_task_comm(p, buffer);
> > }
> > where set_task_comm() calls __set_task_comm() which records
> > the update of p and not current.
> >
> > So revert the patch to show pid.
> >
> > Fixes: e3f6a42272e0 ("tracing: Remove pid in task_rename tracing output")
> > Reported-by: Guohua Yan <guohua.yan@unisoc.com>
> > Signed-off-by: Xuewen Yan <xuewen.yan@unisoc.com>
>
> Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
>
> -- Steve
>
> > ---
> > v2:
> > - update commit message (Steven)
> > ---
> > include/trace/events/task.h | 7 +++++--
> > 1 file changed, 5 insertions(+), 2 deletions(-)
> >
> > diff --git a/include/trace/events/task.h b/include/trace/events/task.h
> > index 4f0759634306..b9a129eb54d9 100644
> > --- a/include/trace/events/task.h
> > +++ b/include/trace/events/task.h
> > @@ -38,19 +38,22 @@ TRACE_EVENT(task_rename,
> > TP_ARGS(task, comm),
> >
> > TP_STRUCT__entry(
> > + __field( pid_t, pid)
> > __array( char, oldcomm, TASK_COMM_LEN)
> > __array( char, newcomm, TASK_COMM_LEN)
> > __field( short, oom_score_adj)
> > ),
> >
> > TP_fast_assign(
> > + __entry->pid = task->pid;
> > memcpy(entry->oldcomm, task->comm, TASK_COMM_LEN);
> > strscpy(entry->newcomm, comm, TASK_COMM_LEN);
> > __entry->oom_score_adj = task->signal->oom_score_adj;
> > ),
> >
> > - TP_printk("oldcomm=%s newcomm=%s oom_score_adj=%hd",
> > - __entry->oldcomm, __entry->newcomm, __entry->oom_score_adj)
> > + TP_printk("pid=%d oldcomm=%s newcomm=%s oom_score_adj=%hd",
> > + __entry->pid, __entry->oldcomm,
> > + __entry->newcomm, __entry->oom_score_adj)
> > );
> >
> > /**
>
^ permalink raw reply
* Re: [RFC PATCH 0/4] Enable Clang's Source-based Code Coverage and MC/DC for x86-64
From: Sasha Levin @ 2026-03-15 14:15 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Chuck Wolber, nathan, Matt.Kelly2, akpm, andrew.j.oppelt,
anton.ivanov, ardb, arnd, bhelgaas, bp, chuck.wolber, dave.hansen,
dvyukov, hpa, jinghao7, johannes, jpoimboe, justinstitt, kees,
kent.overstreet, linux-arch, linux-efi, linux-kbuild,
linux-kernel, linux-trace-kernel, linux-um, llvm, luto, marinov,
masahiroy, maskray, mathieu.desnoyers, matthew.l.weber3, mhiramat,
mingo, morbo, ndesaulniers, oberpar, paulmck, richard, rostedt,
samitolvanen, samuel.sarkisian, steven.h.vanderleest, tglx,
tingxur, tyxu, wentaoz5, x86
In-Reply-To: <20251015092145.GB3419281@noisy.programming.kicks-ass.net>
On Wed, Oct 15, 2025 at 11:21:45AM +0200, Peter Zijlstra wrote:
>On Wed, Oct 15, 2025 at 08:26:50AM +0000, Chuck Wolber wrote:
>> Optimization makes it nearly impossible to correlate GCov results back to
>> actual lines of source. llvm-cov instruments at the AST level which enables
>> precise mapping back to source code regardless of optimization level.
>>
>>
>> A detailed rundown on this issue can be found here[1], with the most relevant
>> excerpt reproduced here:
>
>Yes read and understand this, but that doesn't mean you have to have 3
>different kernel interfaces for all of this, right?
To clarify, are you suggesting that we'll have something like a single
/sys/kernel/debug/coverage interface that is producing the same structured
output whether we use gcov or llvm?
--
Thanks,
Sasha
^ permalink raw reply
* Re: [PATCH 16/53] ovl: drop dir lock for lookups in impure readdir
From: Amir Goldstein @ 2026-03-15 13:51 UTC (permalink / raw)
To: NeilBrown
Cc: Linus Torvalds, Alexander Viro, Christian Brauner, Jan Kara,
Jeff Layton, Trond Myklebust, Anna Schumaker, Carlos Maiolino,
Miklos Szeredi, Jan Harkes, Hugh Dickins, Baolin Wang,
David Howells, Marc Dionne, Steve French, Namjae Jeon,
Sungjong Seo, Yuezhang Mo, Andreas Hindborg, Breno Leitao,
Theodore Ts'o, Andreas Dilger, Steven Rostedt,
Masami Hiramatsu, Ilya Dryomov, Alex Markuze, Viacheslav Dubeyko,
Tyler Hicks, Andreas Gruenbacher, Richard Weinberger,
Anton Ivanov, Johannes Berg, Jeremy Kerr, Ard Biesheuvel,
linux-fsdevel, linux-nfs, linux-xfs, linux-unionfs, coda,
linux-mm, linux-afs, linux-cifs, linux-ext4, linux-kernel,
linux-trace-kernel, ceph-devel, ecryptfs, gfs2, linux-um,
linux-efi
In-Reply-To: <20260312214330.3885211-17-neilb@ownmail.net>
On Thu, Mar 12, 2026 at 10:49 PM NeilBrown <neilb@ownmail.net> wrote:
>
> From: NeilBrown <neil@brown.name>
>
> When performing an "impure" readdir, ovl needs to perform a lookup on some
> of the names that it found.
> With proposed locking changes it will not be possible to perform this
> lookup (in particular, not safe to wait for d_alloc_parallel()) while
> holding a lock on the directory.
>
> ovl doesn't really need the lock at this point.
Not exactly. see below.
> It has already iterated
> the directory and has cached a list of the contents. It now needs to
> gather extra information about some contents. It can do this without
> the lock.
>
> After gathering that info it needs to retake the lock for API
> correctness. After doing this it must check IS_DEADDIR() again to
> ensure readdir always returns -ENOENT on a removed directory.
>
> Note that while ->iterate_shared is called with a shared lock, ovl uses
> WRAP_DIR_ITER() so an exclusive lock is held and so we drop and retake
> that exclusive lock.
>
> As the directory is no longer locked in ovl_cache_update() we need
> dget_parent() to get a reference to the parent.
>
> Signed-off-by: NeilBrown <neil@brown.name>
> ---
> fs/overlayfs/readdir.c | 19 ++++++++++++-------
> 1 file changed, 12 insertions(+), 7 deletions(-)
>
> diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
> index 1dcc75b3a90f..d5123b37921c 100644
> --- a/fs/overlayfs/readdir.c
> +++ b/fs/overlayfs/readdir.c
> @@ -568,13 +568,12 @@ static int ovl_cache_update(const struct path *path, struct ovl_cache_entry *p,
> goto get;
> }
> if (p->len == 2) {
> - /* we shall not be moved */
> - this = dget(dir->d_parent);
> + this = dget_parent(dir);
> goto get;
> }
> }
> /* This checks also for xwhiteouts */
> - this = lookup_one(mnt_idmap(path->mnt), &QSTR_LEN(p->name, p->len), dir);
> + this = lookup_one_unlocked(mnt_idmap(path->mnt), &QSTR_LEN(p->name, p->len), dir);
ovl_cache_update() is also called from ovl_iterate_merged() where inode
is locked.
> if (IS_ERR_OR_NULL(this) || !this->d_inode) {
> /* Mark a stale entry */
> p->is_whiteout = true;
> @@ -666,11 +665,12 @@ static int ovl_dir_read_impure(const struct path *path, struct list_head *list,
> if (err)
> return err;
>
> + inode_unlock(path->dentry->d_inode);
> list_for_each_entry_safe(p, n, list, l_node) {
> if (!name_is_dot_dotdot(p->name, p->len)) {
> err = ovl_cache_update(path, p, true);
> if (err)
> - return err;
> + break;
> }
> if (p->ino == p->real_ino) {
> list_del(&p->l_node);
> @@ -680,14 +680,19 @@ static int ovl_dir_read_impure(const struct path *path, struct list_head *list,
> struct rb_node *parent = NULL;
>
> if (WARN_ON(ovl_cache_entry_find_link(p->name, p->len,
> - &newp, &parent)))
> - return -EIO;
> + &newp, &parent))) {
> + err = -EIO;
> + break;
> + }
>
> rb_link_node(&p->node, parent, newp);
> rb_insert_color(&p->node, root);
> }
> }
> - return 0;
> + inode_lock(path->dentry->d_inode);
> + if (IS_DEADDIR(path->dentry->d_inode))
> + err = -ENOENT;
> + return err;
> }
>
> static struct ovl_dir_cache *ovl_cache_get_impure(const struct path *path)
> --
You missed the fact that overlayfs uses the dir inode lock
to protect the readdir inode cache, so your patch introduces
a risk for storing a stale readdir cache when dir modify operations
invalidate the readdir cache version while lock is dropped
and also introduces memory leak when cache is stomped
without freeing cache created by a competing thread.
I think something like the untested patch below should fix this.
I did not look into ovl_iterate_merged() to see if it has a simple
fix and I am not 100% sure that this fix for impure dir is enough.
Thanks,
Amir.
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index d5123b37921c8..9e90064b252ce 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -702,15 +702,13 @@ static struct ovl_dir_cache
*ovl_cache_get_impure(const struct path *path)
struct inode *inode = d_inode(dentry);
struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct ovl_dir_cache *cache;
+ /* Snapshot version before ovl_dir_read_impure() drops i_rwsem */
+ u64 version = ovl_inode_version_get(inode);
cache = ovl_dir_cache(inode);
- if (cache && ovl_inode_version_get(inode) == cache->version)
+ if (cache && version == cache->version)
return cache;
- /* Impure cache is not refcounted, free it here */
- ovl_dir_cache_free(inode);
- ovl_set_dir_cache(inode, NULL);
-
cache = kzalloc_obj(struct ovl_dir_cache);
if (!cache)
return ERR_PTR(-ENOMEM);
@@ -721,6 +719,14 @@ static struct ovl_dir_cache
*ovl_cache_get_impure(const struct path *path)
kfree(cache);
return ERR_PTR(res);
}
+
+ /*
+ * Impure cache is not refcounted, free it here.
+ * Also frees cache stored by concurrent readdir during i_rwsem drop.
+ */
+ ovl_dir_cache_free(inode);
+ ovl_set_dir_cache(inode, NULL);
+
if (list_empty(&cache->entries)) {
/*
* A good opportunity to get rid of an unneeded "impure" flag.
@@ -736,7 +742,7 @@ static struct ovl_dir_cache
*ovl_cache_get_impure(const struct path *path)
return NULL;
}
- cache->version = ovl_inode_version_get(inode);
+ cache->version = version;
ovl_set_dir_cache(inode, cache);
return cache;
^ permalink raw reply related
* [PATCH v6 09/17] lib/bootconfig: validate child node index in xbc_verify_tree()
From: Josh Law @ 2026-03-15 12:20 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>
xbc_verify_tree() validates that each node's next index is within
bounds, but does not check the child index. Add the same bounds
check for the child field.
Without this check, a corrupt bootconfig that passes next-index
validation could still trigger an out-of-bounds memory access via an
invalid child index when xbc_node_get_child() is called during tree
traversal at boot time.
Signed-off-by: Josh Law <objecting@objecting.org>
---
lib/bootconfig.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 0823491221f4..038f56689a48 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -823,6 +823,10 @@ static int __init xbc_verify_tree(void)
return xbc_parse_error("No closing brace",
xbc_node_get_data(xbc_nodes + i));
}
+ if (xbc_nodes[i].child >= xbc_node_num) {
+ return xbc_parse_error("Broken child node",
+ xbc_node_get_data(xbc_nodes + i));
+ }
}
/* Key tree limitation check */
--
2.34.1
^ permalink raw reply related
* [PATCH v6 17/17] lib/bootconfig: change xbc_node_index() return type to uint16_t
From: Josh Law @ 2026-03-15 12:20 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>
lib/bootconfig.c:136:21: warning: conversion from 'long int' to
'int' may change value [-Wconversion]
lib/bootconfig.c:308:33: warning: conversion from 'int' to 'uint16_t'
may change value [-Wconversion]
lib/bootconfig.c:467:37: warning: conversion from 'int' to 'uint16_t'
may change value [-Wconversion]
lib/bootconfig.c:469:40: warning: conversion from 'int' to 'uint16_t'
may change value [-Wconversion]
lib/bootconfig.c:472:54: warning: conversion from 'int' to 'uint16_t'
may change value [-Wconversion]
lib/bootconfig.c:476:45: warning: conversion from 'int' to 'uint16_t'
may change value [-Wconversion]
xbc_node_index() returns the position of a node in the xbc_nodes array,
which has at most XBC_NODE_MAX (8192) entries, well within uint16_t
range. Every caller stores the result in a uint16_t field (node->parent,
node->child, node->next, or the keys[] array in compose_key_after), so
the int return type causes narrowing warnings at all six call sites.
Change the return type to uint16_t and add an explicit cast on the
pointer subtraction to match the storage width and eliminate the
warnings.
Signed-off-by: Josh Law <objecting@objecting.org>
---
include/linux/bootconfig.h | 2 +-
lib/bootconfig.c | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h
index 23a96c5edcf3..692a5acc2ffc 100644
--- a/include/linux/bootconfig.h
+++ b/include/linux/bootconfig.h
@@ -66,7 +66,7 @@ struct xbc_node {
/* Node tree access raw APIs */
struct xbc_node * __init xbc_root_node(void);
-int __init xbc_node_index(struct xbc_node *node);
+uint16_t __init xbc_node_index(struct xbc_node *node);
struct xbc_node * __init xbc_node_get_parent(struct xbc_node *node);
struct xbc_node * __init xbc_node_get_child(struct xbc_node *node);
struct xbc_node * __init xbc_node_get_next(struct xbc_node *node);
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 68a72dbc38fa..148084abae12 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -131,9 +131,9 @@ struct xbc_node * __init xbc_root_node(void)
*
* Return the index number of @node in XBC node list.
*/
-int __init xbc_node_index(struct xbc_node *node)
+uint16_t __init xbc_node_index(struct xbc_node *node)
{
- return node - &xbc_nodes[0];
+ return (uint16_t)(node - &xbc_nodes[0]);
}
/**
--
2.34.1
^ permalink raw reply related
* [PATCH v6 16/17] lib/bootconfig: fix sign-compare in xbc_node_compose_key_after()
From: Josh Law @ 2026-03-15 12:20 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>
lib/bootconfig.c:322:25: warning: comparison of integer expressions
of different signedness: 'int' and 'size_t' [-Wsign-compare]
lib/bootconfig.c:325:30: warning: conversion to 'size_t' from 'int'
may change the sign of the result [-Wsign-conversion]
snprintf() returns int but size is size_t, so comparing ret >= size
and subtracting size -= ret involve mixed-sign operations. Cast ret
at the comparison and subtraction sites; ret is known non-negative at
this point because the ret < 0 early return has already been taken.
Signed-off-by: Josh Law <objecting@objecting.org>
---
lib/bootconfig.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index e318b236e728..68a72dbc38fa 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -319,10 +319,10 @@ int __init xbc_node_compose_key_after(struct xbc_node *root,
depth ? "." : "");
if (ret < 0)
return ret;
- if (ret >= size) {
+ if (ret >= (int)size) {
size = 0;
} else {
- size -= ret;
+ size -= (size_t)ret;
buf += ret;
}
total += ret;
--
2.34.1
^ permalink raw reply related
* [PATCH v6 15/17] lib/bootconfig: use size_t for key length tracking in xbc_verify_tree()
From: Josh Law @ 2026-03-15 12:20 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>
lib/bootconfig.c:839:24: warning: conversion from 'size_t' to 'int'
may change value [-Wconversion]
lib/bootconfig.c:860:32: warning: conversion from 'size_t' to 'int'
may change value [-Wconversion]
lib/bootconfig.c:860:29: warning: conversion to 'size_t' from 'int'
may change the sign of the result [-Wsign-conversion]
The key length variables len and wlen accumulate strlen() results but
were declared as int, causing truncation and sign-conversion warnings.
Change both to size_t to match the strlen() return type and avoid
mixed-sign arithmetic.
Signed-off-by: Josh Law <objecting@objecting.org>
---
lib/bootconfig.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 7296df003459..e318b236e728 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -803,7 +803,8 @@ static int __init xbc_close_brace(char **k, char *n)
static int __init xbc_verify_tree(void)
{
- int i, depth, len, wlen;
+ int i, depth;
+ size_t len, wlen;
struct xbc_node *n, *m;
/* Brace closing */
--
2.34.1
^ permalink raw reply related
* [PATCH v6 14/17] lib/bootconfig: narrow offset type in xbc_init_node()
From: Josh Law @ 2026-03-15 12:20 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>
lib/bootconfig.c:415:32: warning: conversion to 'long unsigned int'
from 'long int' may change the sign of the result [-Wsign-conversion]
Pointer subtraction yields ptrdiff_t (signed long), which was stored in
unsigned long. The offset is immediately checked against XBC_DATA_MAX
(32767) and then truncated to uint16_t, so unsigned int is sufficient.
Add an explicit cast on the subtraction to suppress the sign-conversion
warning.
Signed-off-by: Josh Law <objecting@objecting.org>
---
lib/bootconfig.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 995c2ec94cbe..7296df003459 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -412,7 +412,7 @@ const char * __init xbc_node_find_next_key_value(struct xbc_node *root,
static int __init xbc_init_node(struct xbc_node *node, char *data, uint16_t flag)
{
- unsigned long offset = data - xbc_data;
+ unsigned int offset = (unsigned int)(data - xbc_data);
if (WARN_ON(offset >= XBC_DATA_MAX))
return -EINVAL;
--
2.34.1
^ permalink raw reply related
* [PATCH v6 13/17] lib/bootconfig: use size_t for strlen result in xbc_node_match_prefix()
From: Josh Law @ 2026-03-15 12:20 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>
lib/bootconfig.c:198:19: warning: conversion from 'size_t' to 'int'
may change value [-Wconversion]
lib/bootconfig.c:200:33: warning: conversion to '__kernel_size_t'
from 'int' may change the sign of the result [-Wsign-conversion]
strlen() returns size_t but the result was stored in an int. The value
is then passed back to strncmp() which expects size_t, causing a second
sign-conversion warning on the round-trip. Use size_t throughout to
match the API types.
Signed-off-by: Josh Law <objecting@objecting.org>
---
lib/bootconfig.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 806a8f038d24..995c2ec94cbe 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -195,7 +195,7 @@ static bool __init
xbc_node_match_prefix(struct xbc_node *node, const char **prefix)
{
const char *p = xbc_node_get_data(node);
- int len = strlen(p);
+ size_t len = strlen(p);
if (strncmp(*prefix, p, len))
return false;
--
2.34.1
^ permalink raw reply related
* [PATCH v6 10/17] lib/bootconfig: check xbc_init_node() return in override path
From: Josh Law @ 2026-03-15 12:20 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>
The ':=' override path in xbc_parse_kv() calls xbc_init_node() to
re-initialize an existing value node but does not check the return
value. If xbc_init_node() fails (data offset out of range), parsing
silently continues with stale node data.
Add the missing error check to match the xbc_add_node() call path
which already checks for failure.
In practice, a bootconfig using ':=' to override a value near the
32KB data limit could silently retain the old value, meaning a
security-relevant boot parameter override (e.g., a trace filter or
debug setting) would not take effect as intended.
Fixes: e5efaeb8a8f5 ("bootconfig: Support mixing a value and subkeys under a key")
Signed-off-by: Josh Law <objecting@objecting.org>
---
lib/bootconfig.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 038f56689a48..182d9d9bc5a6 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -728,7 +728,8 @@ static int __init xbc_parse_kv(char **k, char *v, int op)
if (op == ':') {
unsigned short nidx = child->next;
- xbc_init_node(child, v, XBC_VALUE);
+ if (xbc_init_node(child, v, XBC_VALUE) < 0)
+ return xbc_parse_error("Failed to override value", v);
child->next = nidx; /* keep subkeys */
goto array;
}
--
2.34.1
^ permalink raw reply related
* [PATCH v6 12/17] lib/bootconfig: fix signed comparison in xbc_node_get_data()
From: Josh Law @ 2026-03-15 12:20 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>
lib/bootconfig.c:188:28: warning: comparison of integer expressions
of different signedness: 'int' and 'size_t' [-Wsign-compare]
The local variable 'offset' is declared as int, but xbc_data_size is
size_t. Using ~XBC_VALUE as the mask also involves integer promotion
rules that obscure intent.
Change the type to unsigned int and mask with XBC_DATA_MAX (which is
the 15-bit data mask) instead of ~XBC_VALUE, making the expression
self-documenting and eliminating the signed/unsigned comparison.
Signed-off-by: Josh Law <objecting@objecting.org>
---
lib/bootconfig.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 182d9d9bc5a6..806a8f038d24 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -183,7 +183,7 @@ struct xbc_node * __init xbc_node_get_next(struct xbc_node *node)
*/
const char * __init xbc_node_get_data(struct xbc_node *node)
{
- int offset = node->data & ~XBC_VALUE;
+ unsigned int offset = node->data & XBC_DATA_MAX;
if (WARN_ON(offset >= xbc_data_size))
return NULL;
--
2.34.1
^ permalink raw reply related
* [PATCH v6 06/17] lib/bootconfig: drop redundant memset of xbc_nodes
From: Josh Law @ 2026-03-15 12:20 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>
memblock_alloc() already returns zeroed memory, so the explicit memset
in xbc_init() is redundant. Switch the userspace xbc_alloc_mem() from
malloc() to calloc() so both paths return zeroed memory, and remove
the separate memset call.
Signed-off-by: Josh Law <objecting@objecting.org>
---
lib/bootconfig.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 06e8a79ab472..fe1053043752 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -71,7 +71,7 @@ static inline void __init xbc_free_mem(void *addr, size_t size, bool early)
static inline void *xbc_alloc_mem(size_t size)
{
- return malloc(size);
+ return calloc(1, size);
}
static inline void xbc_free_mem(void *addr, size_t size, bool early)
@@ -982,7 +982,6 @@ int __init xbc_init(const char *data, size_t size, const char **emsg, int *epos)
_xbc_exit(true);
return -ENOMEM;
}
- memset(xbc_nodes, 0, sizeof(struct xbc_node) * XBC_NODE_MAX);
ret = xbc_parse_tree();
if (!ret)
--
2.34.1
^ permalink raw reply related
* [PATCH v6 08/17] lib/bootconfig: replace linux/kernel.h with specific includes
From: Josh Law @ 2026-03-15 12:20 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>
linux/kernel.h is a legacy catch-all header. Replace it with the
specific headers actually needed: linux/cache.h for SMP_CACHE_BYTES,
linux/compiler.h for unlikely(), and linux/sprintf.h for snprintf().
Signed-off-by: Josh Law <objecting@objecting.org>
---
lib/bootconfig.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index fe1053043752..0823491221f4 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -17,7 +17,9 @@
#include <linux/bug.h>
#include <linux/ctype.h>
#include <linux/errno.h>
-#include <linux/kernel.h>
+#include <linux/cache.h>
+#include <linux/compiler.h>
+#include <linux/sprintf.h>
#include <linux/memblock.h>
#include <linux/string.h>
--
2.34.1
^ permalink raw reply related
* [PATCH v6 11/17] tools/bootconfig: fix fd leak in load_xbc_file() on fstat failure
From: Josh Law @ 2026-03-15 12:20 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>
If fstat() fails after open() succeeds, load_xbc_file() returns
-errno without closing the file descriptor. Add the missing close()
call on the error path.
Fixes: 950313ebf79c ("tools: bootconfig: Add bootconfig command")
Signed-off-by: Josh Law <objecting@objecting.org>
---
tools/bootconfig/main.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index 55d59ed507d5..8078fee0b75b 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -162,8 +162,10 @@ static int load_xbc_file(const char *path, char **buf)
if (fd < 0)
return -errno;
ret = fstat(fd, &stat);
- if (ret < 0)
+ if (ret < 0) {
+ close(fd);
return -errno;
+ }
ret = load_xbc_fd(fd, buf, stat.st_size);
--
2.34.1
^ permalink raw reply related
* [PATCH v6 05/17] lib/bootconfig: increment xbc_node_num after node init succeeds
From: Josh Law @ 2026-03-15 12:20 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>
Move the xbc_node_num increment to after xbc_init_node() so a failed
init does not leave a partially initialized node counted in the array.
If xbc_init_node() fails on a data offset at the boundary of a
maximum-size bootconfig, the pre-incremented count causes subsequent
tree verification and traversal to consider the uninitialized node as
valid, potentially leading to an out-of-bounds read or unpredictable
boot behavior.
Signed-off-by: Josh Law <objecting@objecting.org>
---
lib/bootconfig.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 56fbedc9e725..06e8a79ab472 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -429,9 +429,10 @@ static struct xbc_node * __init xbc_add_node(char *data, uint16_t flag)
if (xbc_node_num == XBC_NODE_MAX)
return NULL;
- node = &xbc_nodes[xbc_node_num++];
+ node = &xbc_nodes[xbc_node_num];
if (xbc_init_node(node, data, flag) < 0)
return NULL;
+ xbc_node_num++;
return node;
}
--
2.34.1
^ permalink raw reply related
* [PATCH v6 07/17] bootconfig: constify xbc_calc_checksum() data parameter
From: Josh Law @ 2026-03-15 12:20 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>
xbc_calc_checksum() only reads the data buffer, so mark the parameter
as const void * and the internal pointer as const unsigned char *.
Signed-off-by: Josh Law <objecting@objecting.org>
---
include/linux/bootconfig.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h
index 25df9260d206..23a96c5edcf3 100644
--- a/include/linux/bootconfig.h
+++ b/include/linux/bootconfig.h
@@ -36,9 +36,9 @@ bool __init cmdline_has_extra_options(void);
* The checksum will be used with the BOOTCONFIG_MAGIC and the size for
* embedding the bootconfig in the initrd image.
*/
-static inline __init uint32_t xbc_calc_checksum(void *data, uint32_t size)
+static inline __init uint32_t xbc_calc_checksum(const void *data, uint32_t size)
{
- unsigned char *p = data;
+ const unsigned char *p = data;
uint32_t ret = 0;
while (size--)
--
2.34.1
^ permalink raw reply related
* [PATCH v6 02/17] lib/bootconfig: fix typos, kerneldoc, and inconsistent if/else bracing
From: Josh Law @ 2026-03-15 12:20 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>
Fix comment typos ("initiized" -> "initialized" in xbc_root_node(),
"uder" -> "under" in xbc_node_find_next_leaf()), add a missing blank
line before the xbc_get_info() kerneldoc block, and add braces to
if/else blocks where one branch uses braces but the other does not,
per coding-style section 3.1.
Signed-off-by: Josh Law <objecting@objecting.org>
---
lib/bootconfig.c | 19 +++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 51fd2299ec0f..80de9540245d 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -79,6 +79,7 @@ static inline void xbc_free_mem(void *addr, size_t size, bool early)
free(addr);
}
#endif
+
/**
* xbc_get_info() - Get the information of loaded boot config
* @node_size: A pointer to store the number of nodes.
@@ -112,7 +113,7 @@ static int __init xbc_parse_error(const char *msg, const char *p)
* xbc_root_node() - Get the root node of extended boot config
*
* Return the address of root node of extended boot config. If the
- * extended boot config is not initiized, return NULL.
+ * extended boot config is not initialized, return NULL.
*/
struct xbc_node * __init xbc_root_node(void)
{
@@ -364,7 +365,7 @@ struct xbc_node * __init xbc_node_find_next_leaf(struct xbc_node *root,
node = xbc_node_get_parent(node);
if (node == root)
return NULL;
- /* User passed a node which is not uder parent */
+ /* User passed a node which is not under parent */
if (WARN_ON(!node))
return NULL;
}
@@ -472,8 +473,9 @@ static struct xbc_node * __init __xbc_add_sibling(char *data, uint32_t flag, boo
sib->next = xbc_node_index(node);
}
}
- } else
+ } else {
xbc_parse_error("Too many nodes", data);
+ }
return node;
}
@@ -655,9 +657,9 @@ static int __init __xbc_add_key(char *k)
if (unlikely(xbc_node_num == 0))
goto add_node;
- if (!last_parent) /* the first level */
+ if (!last_parent) { /* the first level */
node = find_match_node(xbc_nodes, k);
- else {
+ } else {
child = xbc_node_get_child(last_parent);
/* Since the value node is the first child, skip it. */
if (child && xbc_node_is_value(child))
@@ -665,9 +667,9 @@ static int __init __xbc_add_key(char *k)
node = find_match_node(child, k);
}
- if (node)
+ if (node) {
last_parent = node;
- else {
+ } else {
add_node:
node = xbc_add_child(k, XBC_KEY);
if (!node)
@@ -991,8 +993,9 @@ int __init xbc_init(const char *data, size_t size, const char **emsg, int *epos)
if (emsg)
*emsg = xbc_err_msg;
_xbc_exit(true);
- } else
+ } else {
ret = xbc_node_num;
+ }
return ret;
}
--
2.34.1
^ permalink raw reply related
* [PATCH v6 04/17] lib/bootconfig: fix off-by-one in xbc_verify_tree() next node check
From: Josh Law @ 2026-03-15 12:20 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>
Valid node indices are 0 to xbc_node_num-1, so a next value equal to
xbc_node_num is out of bounds. Use >= instead of > to catch this.
A malformed or corrupt bootconfig could pass tree verification with
an out-of-bounds next index. On subsequent tree traversal at boot
time, xbc_node_get_next() would return a pointer past the allocated
xbc_nodes array, causing an out-of-bounds read of kernel memory.
Signed-off-by: Josh Law <objecting@objecting.org>
---
lib/bootconfig.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 58d6ae297280..56fbedc9e725 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -816,7 +816,7 @@ static int __init xbc_verify_tree(void)
}
for (i = 0; i < xbc_node_num; i++) {
- if (xbc_nodes[i].next > xbc_node_num) {
+ if (xbc_nodes[i].next >= xbc_node_num) {
return xbc_parse_error("No closing brace",
xbc_node_get_data(xbc_nodes + i));
}
--
2.34.1
^ permalink raw reply related
* [PATCH v6 03/17] lib/bootconfig: narrow flag parameter type from uint32_t to uint16_t
From: Josh Law @ 2026-03-15 12:20 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>
The flag parameter in the node creation helpers only ever carries
XBC_KEY (0) or XBC_VALUE (0x8000), both of which fit in uint16_t.
Using uint16_t matches the width of xbc_node.data where the flag is
ultimately stored.
Signed-off-by: Josh Law <objecting@objecting.org>
---
lib/bootconfig.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 80de9540245d..58d6ae297280 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -408,7 +408,7 @@ const char * __init xbc_node_find_next_key_value(struct xbc_node *root,
/* XBC parse and tree build */
-static int __init xbc_init_node(struct xbc_node *node, char *data, uint32_t flag)
+static int __init xbc_init_node(struct xbc_node *node, char *data, uint16_t flag)
{
unsigned long offset = data - xbc_data;
@@ -422,7 +422,7 @@ static int __init xbc_init_node(struct xbc_node *node, char *data, uint32_t flag
return 0;
}
-static struct xbc_node * __init xbc_add_node(char *data, uint32_t flag)
+static struct xbc_node * __init xbc_add_node(char *data, uint16_t flag)
{
struct xbc_node *node;
@@ -452,7 +452,7 @@ static inline __init struct xbc_node *xbc_last_child(struct xbc_node *node)
return node;
}
-static struct xbc_node * __init __xbc_add_sibling(char *data, uint32_t flag, bool head)
+static struct xbc_node * __init __xbc_add_sibling(char *data, uint16_t flag, bool head)
{
struct xbc_node *sib, *node = xbc_add_node(data, flag);
@@ -480,17 +480,17 @@ static struct xbc_node * __init __xbc_add_sibling(char *data, uint32_t flag, boo
return node;
}
-static inline struct xbc_node * __init xbc_add_sibling(char *data, uint32_t flag)
+static inline struct xbc_node * __init xbc_add_sibling(char *data, uint16_t flag)
{
return __xbc_add_sibling(data, flag, false);
}
-static inline struct xbc_node * __init xbc_add_head_sibling(char *data, uint32_t flag)
+static inline struct xbc_node * __init xbc_add_head_sibling(char *data, uint16_t flag)
{
return __xbc_add_sibling(data, flag, true);
}
-static inline __init struct xbc_node *xbc_add_child(char *data, uint32_t flag)
+static inline __init struct xbc_node *xbc_add_child(char *data, uint16_t flag)
{
struct xbc_node *node = xbc_add_sibling(data, flag);
--
2.34.1
^ permalink raw reply related
* [PATCH v6 01/17] lib/bootconfig: add missing __init annotations to static helpers
From: Josh Law @ 2026-03-15 12:19 UTC (permalink / raw)
To: Masami Hiramatsu, Andrew Morton
Cc: Steven Rostedt, linux-kernel, linux-trace-kernel, Josh Law
In-Reply-To: <20260315122015.55965-1-objecting@objecting.org>
skip_comment() and skip_spaces_until_newline() are static functions
called exclusively from __init code paths but lack the __init
annotation themselves. Add it so their memory can be reclaimed after
init.
Signed-off-by: Josh Law <objecting@objecting.org>
---
lib/bootconfig.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index b0ef1e74e98a..51fd2299ec0f 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -509,7 +509,7 @@ static inline __init bool xbc_valid_keyword(char *key)
return *key == '\0';
}
-static char *skip_comment(char *p)
+static char __init *skip_comment(char *p)
{
char *ret;
@@ -522,7 +522,7 @@ static char *skip_comment(char *p)
return ret;
}
-static char *skip_spaces_until_newline(char *p)
+static char __init *skip_spaces_until_newline(char *p)
{
while (isspace(*p) && *p != '\n')
p++;
--
2.34.1
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox