* [PATCHv2 bpf-next 11/23] bpf: Add support for tracing_multi link session
From: Jiri Olsa @ 2026-03-04 22:21 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
Cc: bpf, linux-trace-kernel, Martin KaFai Lau, Eduard Zingerman,
Song Liu, Yonghong Song, Menglong Dong, Steven Rostedt
In-Reply-To: <20260304222141.497203-1-jolsa@kernel.org>
Adding support to use session attachment with tracing_multi link.
Adding new BPF_TRACE_FSESSION_MULTI program attach type, that follows
the BPF_TRACE_FSESSION behaviour but on the tracing_multi link.
Such program is called on entry and exit of the attached function
and allows to pass cookie value from entry to exit execution.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
include/linux/bpf.h | 6 ++++-
include/uapi/linux/bpf.h | 1 +
kernel/bpf/btf.c | 2 ++
kernel/bpf/syscall.c | 1 +
kernel/bpf/trampoline.c | 43 +++++++++++++++++++++++++++-------
kernel/bpf/verifier.c | 17 ++++++++++----
kernel/trace/bpf_trace.c | 15 +++++++++++-
net/bpf/test_run.c | 1 +
tools/include/uapi/linux/bpf.h | 1 +
tools/lib/bpf/libbpf.c | 1 +
10 files changed, 73 insertions(+), 15 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a919143a8b35..55f373464da3 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1902,6 +1902,7 @@ struct bpf_tracing_multi_node {
struct bpf_tracing_multi_link {
struct bpf_link link;
u64 *cookies;
+ struct bpf_tramp_node *fexits;
int nodes_cnt;
struct bpf_tracing_multi_node nodes[] __counted_by(nodes_cnt);
};
@@ -2136,7 +2137,8 @@ u32 bpf_struct_ops_id(const void *kdata);
static inline bool is_tracing_multi(enum bpf_attach_type type)
{
- return type == BPF_TRACE_FENTRY_MULTI || type == BPF_TRACE_FEXIT_MULTI;
+ return type == BPF_TRACE_FENTRY_MULTI || type == BPF_TRACE_FEXIT_MULTI ||
+ type == BPF_TRACE_FSESSION_MULTI;
}
#ifdef CONFIG_NET
@@ -2213,6 +2215,8 @@ static inline int bpf_fsession_cnt(struct bpf_tramp_nodes *nodes)
for (int i = 0; i < nodes[BPF_TRAMP_FENTRY].nr_nodes; i++) {
if (fentries.nodes[i]->link->prog->expected_attach_type == BPF_TRACE_FSESSION)
cnt++;
+ if (fentries.nodes[i]->link->prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI)
+ cnt++;
}
return cnt;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e28722ddeb5b..4520830fda06 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1156,6 +1156,7 @@ enum bpf_attach_type {
BPF_TRACE_FSESSION,
BPF_TRACE_FENTRY_MULTI,
BPF_TRACE_FEXIT_MULTI,
+ BPF_TRACE_FSESSION_MULTI,
__MAX_BPF_ATTACH_TYPE
};
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index c8738834bbc9..668a31952510 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -6221,6 +6221,7 @@ static int btf_validate_prog_ctx_type(struct bpf_verifier_log *log, const struct
case BPF_TRACE_FEXIT:
case BPF_MODIFY_RETURN:
case BPF_TRACE_FSESSION:
+ case BPF_TRACE_FSESSION_MULTI:
case BPF_TRACE_FENTRY_MULTI:
case BPF_TRACE_FEXIT_MULTI:
/* allow u64* as ctx */
@@ -6825,6 +6826,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
case BPF_LSM_CGROUP:
case BPF_TRACE_FEXIT:
case BPF_TRACE_FSESSION:
+ case BPF_TRACE_FSESSION_MULTI:
/* When LSM programs are attached to void LSM hooks
* they use FEXIT trampolines and when attached to
* int LSM hooks, they use MODIFY_RETURN trampolines.
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 94c6a9c81ef0..c13cb812a1d3 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -4388,6 +4388,7 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
case BPF_TRACE_FSESSION:
+ case BPF_TRACE_FSESSION_MULTI:
case BPF_TRACE_FENTRY_MULTI:
case BPF_TRACE_FEXIT_MULTI:
case BPF_MODIFY_RETURN:
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 1aa723dac209..13a2eb62a544 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -199,7 +199,8 @@ bool bpf_prog_has_trampoline(const struct bpf_prog *prog)
case BPF_PROG_TYPE_TRACING:
if (eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT ||
eatype == BPF_MODIFY_RETURN || eatype == BPF_TRACE_FSESSION ||
- eatype == BPF_TRACE_FENTRY_MULTI || eatype == BPF_TRACE_FEXIT_MULTI)
+ eatype == BPF_TRACE_FENTRY_MULTI || eatype == BPF_TRACE_FEXIT_MULTI ||
+ eatype == BPF_TRACE_FSESSION_MULTI)
return true;
return false;
case BPF_PROG_TYPE_LSM:
@@ -796,6 +797,7 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog)
case BPF_TRACE_FEXIT_MULTI:
return BPF_TRAMP_FEXIT;
case BPF_TRACE_FSESSION:
+ case BPF_TRACE_FSESSION_MULTI:
return BPF_TRAMP_FSESSION;
case BPF_LSM_MAC:
if (!prog->aux->attach_func_proto->type)
@@ -828,15 +830,34 @@ static int bpf_freplace_check_tgt_prog(struct bpf_prog *tgt_prog)
return 0;
}
+static struct bpf_tramp_node *fsession_exit(struct bpf_tramp_node *node)
+{
+ if (node->link->type == BPF_LINK_TYPE_TRACING) {
+ struct bpf_tracing_link *link;
+
+ link = container_of(node->link, struct bpf_tracing_link, link.link);
+ return &link->fexit;
+ } else if (node->link->type == BPF_LINK_TYPE_TRACING_MULTI) {
+ struct bpf_tracing_multi_link *link;
+ struct bpf_tracing_multi_node *mnode;
+
+ link = container_of(node->link, struct bpf_tracing_multi_link, link);
+ mnode = container_of(node, struct bpf_tracing_multi_node, node);
+ return &link->fexits[mnode - link->nodes];
+ }
+
+ WARN_ON_ONCE(1);
+ return NULL;
+}
+
static int __bpf_trampoline_link_prog(struct bpf_tramp_node *node,
struct bpf_trampoline *tr,
struct bpf_prog *tgt_prog,
struct bpf_trampoline_ops *ops,
void *data)
{
- struct bpf_tracing_link *tr_link = NULL;
enum bpf_tramp_prog_type kind;
- struct bpf_tramp_node *node_existing;
+ struct bpf_tramp_node *node_existing, *fexit;
struct hlist_head *prog_list;
int err = 0;
int cnt = 0, i;
@@ -884,8 +905,8 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_node *node,
hlist_add_head(&node->tramp_hlist, prog_list);
if (kind == BPF_TRAMP_FSESSION) {
tr->progs_cnt[BPF_TRAMP_FENTRY]++;
- tr_link = container_of(node, struct bpf_tracing_link, link.node);
- hlist_add_head(&tr_link->fexit.tramp_hlist, &tr->progs_hlist[BPF_TRAMP_FEXIT]);
+ fexit = fsession_exit(node);
+ hlist_add_head(&fexit->tramp_hlist, &tr->progs_hlist[BPF_TRAMP_FEXIT]);
tr->progs_cnt[BPF_TRAMP_FEXIT]++;
} else {
tr->progs_cnt[kind]++;
@@ -895,7 +916,7 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_node *node,
hlist_del_init(&node->tramp_hlist);
if (kind == BPF_TRAMP_FSESSION) {
tr->progs_cnt[BPF_TRAMP_FENTRY]--;
- hlist_del_init(&tr_link->fexit.tramp_hlist);
+ hlist_del_init(&fexit->tramp_hlist);
tr->progs_cnt[BPF_TRAMP_FEXIT]--;
} else {
tr->progs_cnt[kind]--;
@@ -936,10 +957,9 @@ static int __bpf_trampoline_unlink_prog(struct bpf_tramp_node *node,
tgt_prog->aux->is_extended = false;
return err;
} else if (kind == BPF_TRAMP_FSESSION) {
- struct bpf_tracing_link *tr_link =
- container_of(node, struct bpf_tracing_link, link.node);
+ struct bpf_tramp_node *fexit = fsession_exit(node);
- hlist_del_init(&tr_link->fexit.tramp_hlist);
+ hlist_del_init(&fexit->tramp_hlist);
tr->progs_cnt[BPF_TRAMP_FEXIT]--;
kind = BPF_TRAMP_FENTRY;
}
@@ -1583,6 +1603,11 @@ int bpf_trampoline_multi_attach(struct bpf_prog *prog, u32 *ids,
mnode->trampoline = tr;
mnode->node.link = &link->link;
mnode->node.cookie = link->cookies ? link->cookies[i] : 0;
+
+ if (prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI) {
+ link->fexits[i].link = &link->link;
+ link->fexits[i].cookie = link->cookies ? link->cookies[i] : 0;
+ }
}
trampoline_lock_all();
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index ff29e27a85d0..aea514b2c12b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -17921,6 +17921,7 @@ static bool return_retval_range(struct bpf_verifier_env *env, struct bpf_retval_
case BPF_TRACE_FSESSION:
case BPF_TRACE_FENTRY_MULTI:
case BPF_TRACE_FEXIT_MULTI:
+ case BPF_TRACE_FSESSION_MULTI:
*range = retval_range(0, 0);
break;
case BPF_TRACE_RAW_TP:
@@ -23310,7 +23311,8 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
*cnt = 1;
} else if (desc->func_id == special_kfunc_list[KF_bpf_session_is_return] &&
- env->prog->expected_attach_type == BPF_TRACE_FSESSION) {
+ (env->prog->expected_attach_type == BPF_TRACE_FSESSION ||
+ env->prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI)) {
/*
* inline the bpf_session_is_return() for fsession:
* bool bpf_session_is_return(void *ctx)
@@ -23323,7 +23325,8 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
insn_buf[2] = BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1);
*cnt = 3;
} else if (desc->func_id == special_kfunc_list[KF_bpf_session_cookie] &&
- env->prog->expected_attach_type == BPF_TRACE_FSESSION) {
+ (env->prog->expected_attach_type == BPF_TRACE_FSESSION ||
+ env->prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI)) {
/*
* inline bpf_session_cookie() for fsession:
* __u64 *bpf_session_cookie(void *ctx)
@@ -24111,6 +24114,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
if (eatype == BPF_TRACE_FEXIT ||
eatype == BPF_TRACE_FSESSION ||
eatype == BPF_TRACE_FEXIT_MULTI ||
+ eatype == BPF_TRACE_FSESSION_MULTI ||
eatype == BPF_MODIFY_RETURN) {
/* Load nr_args from ctx - 8 */
insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
@@ -25122,7 +25126,8 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
prog_extension &&
(tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
tgt_prog->expected_attach_type == BPF_TRACE_FEXIT ||
- tgt_prog->expected_attach_type == BPF_TRACE_FSESSION)) {
+ tgt_prog->expected_attach_type == BPF_TRACE_FSESSION ||
+ tgt_prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI)) {
/* Program extensions can extend all program types
* except fentry/fexit. The reason is the following.
* The fentry/fexit programs are used for performance
@@ -25222,9 +25227,11 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
case BPF_TRACE_FSESSION:
+ case BPF_TRACE_FSESSION_MULTI:
case BPF_TRACE_FENTRY_MULTI:
case BPF_TRACE_FEXIT_MULTI:
- if (prog->expected_attach_type == BPF_TRACE_FSESSION &&
+ if ((prog->expected_attach_type == BPF_TRACE_FSESSION ||
+ prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI) &&
!bpf_jit_supports_fsession()) {
bpf_log(log, "JIT does not support fsession\n");
return -EOPNOTSUPP;
@@ -25376,6 +25383,7 @@ static bool can_be_sleepable(struct bpf_prog *prog)
case BPF_MODIFY_RETURN:
case BPF_TRACE_ITER:
case BPF_TRACE_FSESSION:
+ case BPF_TRACE_FSESSION_MULTI:
case BPF_TRACE_FENTRY_MULTI:
case BPF_TRACE_FEXIT_MULTI:
return true;
@@ -25462,6 +25470,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
return -EINVAL;
} else if ((prog->expected_attach_type == BPF_TRACE_FEXIT ||
prog->expected_attach_type == BPF_TRACE_FSESSION ||
+ prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI ||
prog->expected_attach_type == BPF_MODIFY_RETURN) &&
btf_id_set_contains(&noreturn_deny, btf_id)) {
verbose(env, "Attaching fexit/fsession/fmod_ret to __noreturn function '%s' is rejected.\n",
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index b8932434e306..3070b9de174d 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1306,7 +1306,8 @@ static inline bool is_uprobe_session(const struct bpf_prog *prog)
static inline bool is_trace_fsession(const struct bpf_prog *prog)
{
return prog->type == BPF_PROG_TYPE_TRACING &&
- prog->expected_attach_type == BPF_TRACE_FSESSION;
+ (prog->expected_attach_type == BPF_TRACE_FSESSION ||
+ prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI);
}
static const struct bpf_func_proto *
@@ -3612,6 +3613,7 @@ static void bpf_tracing_multi_link_dealloc(struct bpf_link *link)
struct bpf_tracing_multi_link *tr_link =
container_of(link, struct bpf_tracing_multi_link, link);
+ kvfree(tr_link->fexits);
kvfree(tr_link->cookies);
kvfree(tr_link);
}
@@ -3624,6 +3626,7 @@ static const struct bpf_link_ops bpf_tracing_multi_link_lops = {
int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr)
{
struct bpf_tracing_multi_link *link = NULL;
+ struct bpf_tramp_node *fexits = NULL;
struct bpf_link_primer link_primer;
u32 cnt, *ids = NULL;
u64 *cookies = NULL;
@@ -3661,6 +3664,14 @@ int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr)
}
}
+ if (prog->expected_attach_type == BPF_TRACE_FSESSION_MULTI) {
+ fexits = kvmalloc_array(cnt, sizeof(*fexits), GFP_KERNEL);
+ if (!fexits) {
+ err = -ENOMEM;
+ goto error;
+ }
+ }
+
link = kvzalloc(struct_size(link, nodes, cnt), GFP_KERNEL);
if (!link) {
err = -ENOMEM;
@@ -3676,6 +3687,7 @@ int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr)
link->nodes_cnt = cnt;
link->cookies = cookies;
+ link->fexits = fexits;
err = bpf_trampoline_multi_attach(prog, ids, link);
kvfree(ids);
@@ -3686,6 +3698,7 @@ int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr)
return bpf_link_settle(&link_primer);
error:
+ kvfree(fexits);
kvfree(cookies);
kvfree(ids);
kvfree(link);
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 3373450132f0..1aa07d40c80c 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -688,6 +688,7 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog,
case BPF_TRACE_FSESSION:
case BPF_TRACE_FENTRY_MULTI:
case BPF_TRACE_FEXIT_MULTI:
+ case BPF_TRACE_FSESSION_MULTI:
if (bpf_fentry_test1(1) != 2 ||
bpf_fentry_test2(2, 3) != 5 ||
bpf_fentry_test3(4, 5, 6) != 15 ||
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e28722ddeb5b..4520830fda06 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1156,6 +1156,7 @@ enum bpf_attach_type {
BPF_TRACE_FSESSION,
BPF_TRACE_FENTRY_MULTI,
BPF_TRACE_FEXIT_MULTI,
+ BPF_TRACE_FSESSION_MULTI,
__MAX_BPF_ATTACH_TYPE
};
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 74e579d7f310..1eb3869e3444 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -138,6 +138,7 @@ static const char * const attach_type_name[] = {
[BPF_TRACE_UPROBE_SESSION] = "trace_uprobe_session",
[BPF_TRACE_FENTRY_MULTI] = "trace_fentry_multi",
[BPF_TRACE_FEXIT_MULTI] = "trace_fexit_multi",
+ [BPF_TRACE_FSESSION_MULTI] = "trace_fsession_multi",
};
static const char * const link_type_name[] = {
--
2.53.0
^ permalink raw reply related
* [PATCHv2 bpf-next 10/23] bpf: Add support for tracing_multi link cookies
From: Jiri Olsa @ 2026-03-04 22:21 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
Cc: bpf, linux-trace-kernel, Martin KaFai Lau, Eduard Zingerman,
Song Liu, Yonghong Song, Menglong Dong, Steven Rostedt
In-Reply-To: <20260304222141.497203-1-jolsa@kernel.org>
Add support to specify cookies for tracing_multi link.
Cookies are provided in array where each value is paired with provided
BTF ID value with the same array index.
Such cookie can be retrieved by bpf program with bpf_get_attach_cookie
helper call.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
include/linux/bpf.h | 1 +
include/uapi/linux/bpf.h | 1 +
kernel/bpf/trampoline.c | 1 +
kernel/trace/bpf_trace.c | 18 ++++++++++++++++++
tools/include/uapi/linux/bpf.h | 1 +
5 files changed, 22 insertions(+)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f22b9400a915..a919143a8b35 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1901,6 +1901,7 @@ struct bpf_tracing_multi_node {
struct bpf_tracing_multi_link {
struct bpf_link link;
+ u64 *cookies;
int nodes_cnt;
struct bpf_tracing_multi_node nodes[] __counted_by(nodes_cnt);
};
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 7f5c51f27a36..e28722ddeb5b 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1866,6 +1866,7 @@ union bpf_attr {
} cgroup;
struct {
__aligned_u64 ids;
+ __aligned_u64 cookies;
__u32 cnt;
} tracing_multi;
};
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index c42bf16b6807..1aa723dac209 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -1582,6 +1582,7 @@ int bpf_trampoline_multi_attach(struct bpf_prog *prog, u32 *ids,
mnode = &link->nodes[i];
mnode->trampoline = tr;
mnode->node.link = &link->link;
+ mnode->node.cookie = link->cookies ? link->cookies[i] : 0;
}
trampoline_lock_all();
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index a18aef19c3b5..b8932434e306 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -3612,6 +3612,7 @@ static void bpf_tracing_multi_link_dealloc(struct bpf_link *link)
struct bpf_tracing_multi_link *tr_link =
container_of(link, struct bpf_tracing_multi_link, link);
+ kvfree(tr_link->cookies);
kvfree(tr_link);
}
@@ -3625,6 +3626,8 @@ int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr)
struct bpf_tracing_multi_link *link = NULL;
struct bpf_link_primer link_primer;
u32 cnt, *ids = NULL;
+ u64 *cookies = NULL;
+ void __user *ucookies;
u32 __user *uids;
int err;
@@ -3645,6 +3648,19 @@ int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr)
goto error;
}
+ ucookies = u64_to_user_ptr(attr->link_create.tracing_multi.cookies);
+ if (ucookies) {
+ cookies = kvmalloc_array(cnt, sizeof(*cookies), GFP_KERNEL);
+ if (!cookies) {
+ err = -ENOMEM;
+ goto error;
+ }
+ if (copy_from_user(cookies, ucookies, cnt * sizeof(*cookies))) {
+ err = -EFAULT;
+ goto error;
+ }
+ }
+
link = kvzalloc(struct_size(link, nodes, cnt), GFP_KERNEL);
if (!link) {
err = -ENOMEM;
@@ -3659,6 +3675,7 @@ int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr)
goto error;
link->nodes_cnt = cnt;
+ link->cookies = cookies;
err = bpf_trampoline_multi_attach(prog, ids, link);
kvfree(ids);
@@ -3669,6 +3686,7 @@ int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr)
return bpf_link_settle(&link_primer);
error:
+ kvfree(cookies);
kvfree(ids);
kvfree(link);
return err;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 7f5c51f27a36..e28722ddeb5b 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1866,6 +1866,7 @@ union bpf_attr {
} cgroup;
struct {
__aligned_u64 ids;
+ __aligned_u64 cookies;
__u32 cnt;
} tracing_multi;
};
--
2.53.0
^ permalink raw reply related
* [PATCHv2 bpf-next 09/23] bpf: Add support for tracing multi link
From: Jiri Olsa @ 2026-03-04 22:21 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
Cc: bpf, linux-trace-kernel, Martin KaFai Lau, Eduard Zingerman,
Song Liu, Yonghong Song, Menglong Dong, Steven Rostedt
In-Reply-To: <20260304222141.497203-1-jolsa@kernel.org>
Adding new link to allow to attach program to multiple function
BTF IDs. The link is represented by struct bpf_tracing_multi_link.
To configure the link, new fields are added to bpf_attr::link_create
to pass array of BTF IDs;
struct {
__aligned_u64 ids;
__u32 cnt;
} tracing_multi;
Each BTF ID represents function (BTF_KIND_FUNC) that the link will
attach bpf program to.
We use previously added bpf_trampoline_multi_attach/detach functions
to attach/detach the link.
The linkinfo/fdinfo callbacks will be implemented in following changes.
Note this is supported only for archs (x86_64) with ftrace direct and
have single ops support.
CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS &&
CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
include/linux/bpf_types.h | 1 +
include/linux/trace_events.h | 6 +++
include/uapi/linux/bpf.h | 5 ++
kernel/bpf/syscall.c | 2 +
kernel/trace/bpf_trace.c | 88 ++++++++++++++++++++++++++++++++++
tools/include/uapi/linux/bpf.h | 6 +++
tools/lib/bpf/libbpf.c | 1 +
7 files changed, 109 insertions(+)
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index b13de31e163f..96575b5b563e 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -155,3 +155,4 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_PERF_EVENT, perf)
BPF_LINK_TYPE(BPF_LINK_TYPE_KPROBE_MULTI, kprobe_multi)
BPF_LINK_TYPE(BPF_LINK_TYPE_STRUCT_OPS, struct_ops)
BPF_LINK_TYPE(BPF_LINK_TYPE_UPROBE_MULTI, uprobe_multi)
+BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING_MULTI, tracing_multi)
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 37eb2f0f3dd8..b6d4c745bdac 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -783,6 +783,7 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
unsigned long *missed);
int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
+int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr);
#else
static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
{
@@ -835,6 +836,11 @@ bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
return -EOPNOTSUPP;
}
+static inline int
+bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr)
+{
+ return -EOPNOTSUPP;
+}
#endif
enum {
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 68600972a778..7f5c51f27a36 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1180,6 +1180,7 @@ enum bpf_link_type {
BPF_LINK_TYPE_UPROBE_MULTI = 12,
BPF_LINK_TYPE_NETKIT = 13,
BPF_LINK_TYPE_SOCKMAP = 14,
+ BPF_LINK_TYPE_TRACING_MULTI = 15,
__MAX_BPF_LINK_TYPE,
};
@@ -1863,6 +1864,10 @@ union bpf_attr {
};
__u64 expected_revision;
} cgroup;
+ struct {
+ __aligned_u64 ids;
+ __u32 cnt;
+ } tracing_multi;
};
} link_create;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2680740e9c09..94c6a9c81ef0 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -5749,6 +5749,8 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
ret = bpf_iter_link_attach(attr, uattr, prog);
else if (prog->expected_attach_type == BPF_LSM_CGROUP)
ret = cgroup_bpf_link_attach(attr, prog);
+ else if (is_tracing_multi(prog->expected_attach_type))
+ ret = bpf_tracing_multi_attach(prog, attr);
else
ret = bpf_tracing_prog_attach(prog,
attr->link_create.target_fd,
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 0b040a417442..a18aef19c3b5 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -42,6 +42,7 @@
#define MAX_UPROBE_MULTI_CNT (1U << 20)
#define MAX_KPROBE_MULTI_CNT (1U << 20)
+#define MAX_TRACING_MULTI_CNT (1U << 20)
#ifdef CONFIG_MODULES
struct bpf_trace_module {
@@ -3594,3 +3595,90 @@ __bpf_kfunc int bpf_copy_from_user_task_str_dynptr(struct bpf_dynptr *dptr, u64
}
__bpf_kfunc_end_defs();
+
+#if defined(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS) && \
+ defined(CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS)
+
+static void bpf_tracing_multi_link_release(struct bpf_link *link)
+{
+ struct bpf_tracing_multi_link *tr_link =
+ container_of(link, struct bpf_tracing_multi_link, link);
+
+ WARN_ON_ONCE(bpf_trampoline_multi_detach(link->prog, tr_link));
+}
+
+static void bpf_tracing_multi_link_dealloc(struct bpf_link *link)
+{
+ struct bpf_tracing_multi_link *tr_link =
+ container_of(link, struct bpf_tracing_multi_link, link);
+
+ kvfree(tr_link);
+}
+
+static const struct bpf_link_ops bpf_tracing_multi_link_lops = {
+ .release = bpf_tracing_multi_link_release,
+ .dealloc_deferred = bpf_tracing_multi_link_dealloc,
+};
+
+int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr)
+{
+ struct bpf_tracing_multi_link *link = NULL;
+ struct bpf_link_primer link_primer;
+ u32 cnt, *ids = NULL;
+ u32 __user *uids;
+ int err;
+
+ uids = u64_to_user_ptr(attr->link_create.tracing_multi.ids);
+ cnt = attr->link_create.tracing_multi.cnt;
+
+ if (!cnt || !uids)
+ return -EINVAL;
+ if (cnt > MAX_TRACING_MULTI_CNT)
+ return -E2BIG;
+
+ ids = kvmalloc_array(cnt, sizeof(*ids), GFP_KERNEL);
+ if (!ids)
+ return -ENOMEM;
+
+ if (copy_from_user(ids, uids, cnt * sizeof(*ids))) {
+ err = -EFAULT;
+ goto error;
+ }
+
+ link = kvzalloc(struct_size(link, nodes, cnt), GFP_KERNEL);
+ if (!link) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ bpf_link_init(&link->link, BPF_LINK_TYPE_TRACING_MULTI,
+ &bpf_tracing_multi_link_lops, prog, prog->expected_attach_type);
+
+ err = bpf_link_prime(&link->link, &link_primer);
+ if (err)
+ goto error;
+
+ link->nodes_cnt = cnt;
+
+ err = bpf_trampoline_multi_attach(prog, ids, link);
+ kvfree(ids);
+ if (err) {
+ bpf_link_cleanup(&link_primer);
+ return err;
+ }
+ return bpf_link_settle(&link_primer);
+
+error:
+ kvfree(ids);
+ kvfree(link);
+ return err;
+}
+
+#else
+
+int bpf_tracing_multi_attach(struct bpf_prog *prog, const union bpf_attr *attr)
+{
+ return -EOPNOTSUPP;
+}
+
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS) && CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS */
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 61f0fe5bc0aa..7f5c51f27a36 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1180,6 +1180,7 @@ enum bpf_link_type {
BPF_LINK_TYPE_UPROBE_MULTI = 12,
BPF_LINK_TYPE_NETKIT = 13,
BPF_LINK_TYPE_SOCKMAP = 14,
+ BPF_LINK_TYPE_TRACING_MULTI = 15,
__MAX_BPF_LINK_TYPE,
};
@@ -1863,6 +1864,10 @@ union bpf_attr {
};
__u64 expected_revision;
} cgroup;
+ struct {
+ __aligned_u64 ids;
+ __u32 cnt;
+ } tracing_multi;
};
} link_create;
@@ -7236,6 +7241,7 @@ enum {
TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */
SK_BPF_CB_FLAGS = 1009, /* Get or set sock ops flags in socket */
SK_BPF_BYPASS_PROT_MEM = 1010, /* Get or Set sk->sk_bypass_prot_mem */
+
};
enum {
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 1e19c7b861ec..74e579d7f310 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -156,6 +156,7 @@ static const char * const link_type_name[] = {
[BPF_LINK_TYPE_UPROBE_MULTI] = "uprobe_multi",
[BPF_LINK_TYPE_NETKIT] = "netkit",
[BPF_LINK_TYPE_SOCKMAP] = "sockmap",
+ [BPF_LINK_TYPE_TRACING_MULTI] = "tracing_multi",
};
static const char * const map_type_name[] = {
--
2.53.0
^ permalink raw reply related
* [PATCHv2 bpf-next 08/23] bpf: Add bpf_trampoline_multi_attach/detach functions
From: Jiri Olsa @ 2026-03-04 22:21 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
Cc: bpf, linux-trace-kernel, Martin KaFai Lau, Eduard Zingerman,
Song Liu, Yonghong Song, Menglong Dong, Steven Rostedt
In-Reply-To: <20260304222141.497203-1-jolsa@kernel.org>
Adding bpf_trampoline_multi_attach/detach functions that allows to
attach/detach tracing program to multiple functions/trampolines.
The attachment is defined with bpf_program and array of BTF ids of
functions to attach the bpf program to.
Adding bpf_tracing_multi_link object that holds all the attached
trampolines and is initialized in attach and used in detach.
The attachment allocates or uses currently existing trampoline
for each function to attach and links it with the bpf program.
The attach works as follows:
- we get all the needed trampolines
- lock them and add the bpf program to each (__bpf_trampoline_link_prog)
- the trampoline_multi_ops passed in __bpf_trampoline_link_prog gathers
ftrace_hash (ip -> trampoline) objects
- we call update_ftrace_direct_add/mod to update needed locations
- we unlock all the trampolines
The detach works as follows:
- we lock all the needed trampolines
- remove the program from each (__bpf_trampoline_unlink_prog)
- the trampoline_multi_ops passed in __bpf_trampoline_link_prog gathers
ftrace_hash (ip -> trampoline) objects
- we call update_ftrace_direct_del/mod to update needed locations
- we unlock and put all the trampolines
Adding trampoline_(un)lock_all functions to (un)lock all trampolines
to gate the tracing_multi attachment.
Note this is supported only for archs (x86_64) with ftrace direct and
have single ops support.
CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS &&
CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
include/linux/bpf.h | 17 +++
kernel/bpf/trampoline.c | 243 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 260 insertions(+)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c401b308a325..f22b9400a915 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1464,6 +1464,12 @@ struct bpf_trampoline *bpf_trampoline_get(u64 key,
void bpf_trampoline_put(struct bpf_trampoline *tr);
int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_funcs);
+struct bpf_tracing_multi_link;
+int bpf_trampoline_multi_attach(struct bpf_prog *prog, u32 *ids,
+ struct bpf_tracing_multi_link *link);
+int bpf_trampoline_multi_detach(struct bpf_prog *prog,
+ struct bpf_tracing_multi_link *link);
+
/*
* When the architecture supports STATIC_CALL replace the bpf_dispatcher_fn
* indirection with a direct call to the bpf program. If the architecture does
@@ -1888,6 +1894,17 @@ struct bpf_tracing_link {
struct bpf_prog *tgt_prog;
};
+struct bpf_tracing_multi_node {
+ struct bpf_tramp_node node;
+ struct bpf_trampoline *trampoline;
+};
+
+struct bpf_tracing_multi_link {
+ struct bpf_link link;
+ int nodes_cnt;
+ struct bpf_tracing_multi_node nodes[] __counted_by(nodes_cnt);
+};
+
struct bpf_raw_tp_link {
struct bpf_link link;
struct bpf_raw_event_map *btp;
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index e2f4a15886b0..c42bf16b6807 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -88,6 +88,22 @@ static struct bpf_trampoline *direct_ops_ip_lookup(struct ftrace_ops *ops, unsig
mutex_unlock(&trampoline_mutex);
return tr;
}
+
+static void trampoline_lock_all(void)
+{
+ int i;
+
+ for (i = 0; i < TRAMPOLINE_LOCKS_TABLE_SIZE; i++)
+ mutex_lock(&trampoline_locks[i].mutex);
+}
+
+static void trampoline_unlock_all(void)
+{
+ int i;
+
+ for (i = 0; i < TRAMPOLINE_LOCKS_TABLE_SIZE; i++)
+ mutex_unlock(&trampoline_locks[i].mutex);
+}
#else
static struct bpf_trampoline *direct_ops_ip_lookup(struct ftrace_ops *ops, unsigned long ip)
{
@@ -1426,6 +1442,233 @@ int __weak arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
return -ENOTSUPP;
}
+#if defined(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS) && \
+ defined(CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS)
+
+struct fentry_multi_data {
+ struct ftrace_hash *unreg;
+ struct ftrace_hash *modify;
+ struct ftrace_hash *reg;
+};
+
+static void free_fentry_multi_data(struct fentry_multi_data *data)
+{
+ free_ftrace_hash(data->reg);
+ free_ftrace_hash(data->unreg);
+ free_ftrace_hash(data->modify);
+}
+
+static int register_fentry_multi(struct bpf_trampoline *tr, void *new_addr, void *ptr)
+{
+ unsigned long addr = (unsigned long) new_addr;
+ unsigned long ip = ftrace_location(tr->ip);
+ struct fentry_multi_data *data = ptr;
+
+ if (bpf_trampoline_use_jmp(tr->flags))
+ addr = ftrace_jmp_set(addr);
+ return add_ftrace_hash_entry_direct(data->reg, ip, addr) ? 0 : -ENOMEM;
+}
+
+static int unregister_fentry_multi(struct bpf_trampoline *tr, u32 orig_flags, void *old_addr,
+ void *ptr)
+{
+ unsigned long addr = (unsigned long) old_addr;
+ unsigned long ip = ftrace_location(tr->ip);
+ struct fentry_multi_data *data = ptr;
+
+ if (bpf_trampoline_use_jmp(tr->flags))
+ addr = ftrace_jmp_set(addr);
+ return add_ftrace_hash_entry_direct(data->unreg, ip, addr) ? 0 : -ENOMEM;
+}
+
+static int modify_fentry_multi(struct bpf_trampoline *tr, u32 orig_flags, void *old_addr,
+ void *new_addr, bool lock_direct_mutex, void *ptr)
+{
+ unsigned long addr = (unsigned long) new_addr;
+ unsigned long ip = ftrace_location(tr->ip);
+ struct fentry_multi_data *data = ptr;
+
+ if (bpf_trampoline_use_jmp(tr->flags))
+ addr = ftrace_jmp_set(addr);
+ return add_ftrace_hash_entry_direct(data->modify, ip, addr) ? 0 : -ENOMEM;
+}
+
+static struct bpf_trampoline_ops trampoline_multi_ops = {
+ .register_fentry = register_fentry_multi,
+ .unregister_fentry = unregister_fentry_multi,
+ .modify_fentry = modify_fentry_multi,
+};
+
+static int bpf_get_btf_id_target(struct btf *btf, struct bpf_prog *prog, u32 btf_id,
+ struct bpf_attach_target_info *tgt_info)
+{
+ const struct btf_type *t;
+ unsigned long addr;
+ const char *tname;
+ int err;
+
+ if (!btf_id || !btf)
+ return -EINVAL;
+ t = btf_type_by_id(btf, btf_id);
+ if (!t)
+ return -EINVAL;
+ tname = btf_name_by_offset(btf, t->name_off);
+ if (!tname)
+ return -EINVAL;
+ if (!btf_type_is_func(t))
+ return -EINVAL;
+ t = btf_type_by_id(btf, t->type);
+ if (!btf_type_is_func_proto(t))
+ return -EINVAL;
+ err = btf_distill_func_proto(NULL, btf, t, tname, &tgt_info->fmodel);
+ if (err < 0)
+ return err;
+ if (btf_is_module(btf)) {
+ /* The bpf program already holds refference to module. */
+ if (WARN_ON_ONCE(!prog->aux->mod))
+ return -EINVAL;
+ addr = find_kallsyms_symbol_value(prog->aux->mod, tname);
+ } else {
+ addr = kallsyms_lookup_name(tname);
+ }
+ if (!addr || !ftrace_location(addr))
+ return -ENOENT;
+ tgt_info->tgt_addr = addr;
+ return 0;
+}
+
+int bpf_trampoline_multi_attach(struct bpf_prog *prog, u32 *ids,
+ struct bpf_tracing_multi_link *link)
+{
+ struct bpf_attach_target_info tgt_info = {};
+ struct btf *btf = prog->aux->attach_btf;
+ struct bpf_tracing_multi_node *mnode;
+ int j, i, err, cnt = link->nodes_cnt;
+ struct fentry_multi_data data;
+ struct bpf_trampoline *tr;
+ u32 btf_id;
+ u64 key;
+
+ data.reg = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS);
+ data.unreg = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS);
+ data.modify = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS);
+
+ if (!data.reg || !data.unreg || !data.modify) {
+ free_fentry_multi_data(&data);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < cnt; i++) {
+ btf_id = ids[i];
+
+ err = bpf_get_btf_id_target(btf, prog, btf_id, &tgt_info);
+ if (err)
+ goto rollback_put;
+
+ if (prog->sleepable) {
+ err = btf_id_allow_sleepable(btf_id, tgt_info.tgt_addr, prog, btf);
+ if (err)
+ goto rollback_put;
+ }
+
+ key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, btf_id);
+
+ tr = bpf_trampoline_get(key, &tgt_info);
+ if (!tr) {
+ err = -ENOMEM;
+ goto rollback_put;
+ }
+
+ mnode = &link->nodes[i];
+ mnode->trampoline = tr;
+ mnode->node.link = &link->link;
+ }
+
+ trampoline_lock_all();
+
+ for (i = 0; i < cnt; i++) {
+ mnode = &link->nodes[i];
+ err = __bpf_trampoline_link_prog(&mnode->node, mnode->trampoline, NULL,
+ &trampoline_multi_ops, &data);
+ if (err)
+ goto rollback_unlink;
+ }
+
+ if (ftrace_hash_count(data.reg)) {
+ err = update_ftrace_direct_add(&direct_ops, data.reg);
+ if (err)
+ goto rollback_unlink;
+ }
+
+ if (ftrace_hash_count(data.modify)) {
+ err = update_ftrace_direct_mod(&direct_ops, data.modify, true);
+ if (err) {
+ WARN_ON_ONCE(update_ftrace_direct_del(&direct_ops, data.reg));
+ goto rollback_unlink;
+ }
+ }
+
+ trampoline_unlock_all();
+
+ free_fentry_multi_data(&data);
+ return 0;
+
+rollback_unlink:
+ for (j = 0; j < i; j++) {
+ mnode = &link->nodes[j];
+ WARN_ON_ONCE(__bpf_trampoline_unlink_prog(&mnode->node, mnode->trampoline,
+ NULL, &trampoline_multi_ops, &data));
+ }
+ trampoline_unlock_all();
+
+ i = cnt;
+
+rollback_put:
+ for (j = 0; j < i; j++)
+ bpf_trampoline_put(link->nodes[j].trampoline);
+
+ free_fentry_multi_data(&data);
+ return err;
+}
+
+int bpf_trampoline_multi_detach(struct bpf_prog *prog, struct bpf_tracing_multi_link *link)
+{
+ struct bpf_tracing_multi_node *mnode;
+ struct fentry_multi_data data = {};
+ int i, cnt = link->nodes_cnt;
+
+ data.unreg = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS);
+ data.modify = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS);
+
+ if (!data.unreg || !data.modify) {
+ free_ftrace_hash(data.unreg);
+ return -ENOMEM;
+ }
+
+ trampoline_lock_all();
+
+ for (i = 0; i < cnt; i++) {
+ mnode = &link->nodes[i];
+ WARN_ON_ONCE(__bpf_trampoline_unlink_prog(&mnode->node, mnode->trampoline,
+ NULL, &trampoline_multi_ops, &data));
+ }
+
+ if (ftrace_hash_count(data.unreg))
+ WARN_ON_ONCE(update_ftrace_direct_del(&direct_ops, data.unreg));
+ if (ftrace_hash_count(data.modify))
+ WARN_ON_ONCE(update_ftrace_direct_mod(&direct_ops, data.modify, true));
+
+ trampoline_unlock_all();
+
+ for (i = 0; i < cnt; i++)
+ bpf_trampoline_put(link->nodes[i].trampoline);
+
+ free_fentry_multi_data(&data);
+ return 0;
+}
+
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS) && CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS */
+
static int __init init_trampolines(void)
{
int i;
--
2.53.0
^ permalink raw reply related
* [PATCHv2 bpf-next 07/23] bpf: Move sleepable verification code to btf_id_allow_sleepable
From: Jiri Olsa @ 2026-03-04 22:21 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
Cc: bpf, linux-trace-kernel, Martin KaFai Lau, Eduard Zingerman,
Song Liu, Yonghong Song, Menglong Dong, Steven Rostedt
In-Reply-To: <20260304222141.497203-1-jolsa@kernel.org>
Move sleepable verification code to btf_id_allow_sleepable
function. It will be used in following changes.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
include/linux/bpf_verifier.h | 3 ++
kernel/bpf/verifier.c | 79 +++++++++++++++++++-----------------
2 files changed, 45 insertions(+), 37 deletions(-)
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 090aa26d1c98..186726fcf52a 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -932,6 +932,9 @@ static inline void bpf_trampoline_unpack_key(u64 key, u32 *obj_id, u32 *btf_id)
*btf_id = key & 0x7FFFFFFF;
}
+int btf_id_allow_sleepable(u32 btf_id, unsigned long addr, const struct bpf_prog *prog,
+ const struct btf *btf);
+
int bpf_check_attach_target(struct bpf_verifier_log *log,
const struct bpf_prog *prog,
const struct bpf_prog *tgt_prog,
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 832a87da5a86..ff29e27a85d0 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -24959,6 +24959,47 @@ static bool is_tracing_multi_id(const struct bpf_prog *prog, u32 btf_id)
return is_tracing_multi(prog->expected_attach_type) && bpf_multi_func_btf_id[0] == btf_id;
}
+int btf_id_allow_sleepable(u32 btf_id, unsigned long addr, const struct bpf_prog *prog,
+ const struct btf *btf)
+{
+ switch (prog->type) {
+ case BPF_PROG_TYPE_TRACING:
+
+ /* *.multi sleepable programs will pass initial sleepable check,
+ * the actual attached btf ids are checked later during the link
+ * attachment.
+ */
+ if (is_tracing_multi_id(prog, btf_id))
+ return 0;
+ /* fentry/fexit/fmod_ret progs can be sleepable if they are
+ * attached to ALLOW_ERROR_INJECTION and are not in denylist.
+ */
+ else if (!check_non_sleepable_error_inject(btf_id) &&
+ within_error_injection_list(addr))
+ return 0;
+ /* fentry/fexit/fmod_ret progs can also be sleepable if they are
+ * in the fmodret id set with the KF_SLEEPABLE flag.
+ */
+ else {
+ u32 *flags = btf_kfunc_is_modify_return(btf, btf_id, prog);
+
+ if (flags && (*flags & KF_SLEEPABLE))
+ return 0;
+ }
+ break;
+ case BPF_PROG_TYPE_LSM:
+ /* LSM progs check that they are attached to bpf_lsm_*() funcs.
+ * Only some of them are sleepable.
+ */
+ if (bpf_lsm_is_sleepable_hook(btf_id))
+ return 0;
+ break;
+ default:
+ break;
+ }
+ return -EINVAL;
+}
+
int bpf_check_attach_target(struct bpf_verifier_log *log,
const struct bpf_prog *prog,
const struct bpf_prog *tgt_prog,
@@ -25247,43 +25288,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
}
if (prog->sleepable) {
- ret = -EINVAL;
- switch (prog->type) {
- case BPF_PROG_TYPE_TRACING:
-
- /* *.multi sleepable programs will pass initial sleepable check,
- * the actual attached btf ids are checked later during the link
- * attachment.
- */
- if (is_tracing_multi_id(prog, btf_id))
- ret = 0;
- /* fentry/fexit/fmod_ret progs can be sleepable if they are
- * attached to ALLOW_ERROR_INJECTION and are not in denylist.
- */
- else if (!check_non_sleepable_error_inject(btf_id) &&
- within_error_injection_list(addr))
- ret = 0;
- /* fentry/fexit/fmod_ret progs can also be sleepable if they are
- * in the fmodret id set with the KF_SLEEPABLE flag.
- */
- else {
- u32 *flags = btf_kfunc_is_modify_return(btf, btf_id,
- prog);
-
- if (flags && (*flags & KF_SLEEPABLE))
- ret = 0;
- }
- break;
- case BPF_PROG_TYPE_LSM:
- /* LSM progs check that they are attached to bpf_lsm_*() funcs.
- * Only some of them are sleepable.
- */
- if (bpf_lsm_is_sleepable_hook(btf_id))
- ret = 0;
- break;
- default:
- break;
- }
+ ret = btf_id_allow_sleepable(btf_id, addr, prog, btf);
if (ret) {
module_put(mod);
bpf_log(log, "%s is not sleepable\n", tname);
--
2.53.0
^ permalink raw reply related
* [PATCHv2 bpf-next 06/23] bpf: Add multi tracing attach types
From: Jiri Olsa @ 2026-03-04 22:21 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
Cc: bpf, linux-trace-kernel, Martin KaFai Lau, Eduard Zingerman,
Song Liu, Yonghong Song, Menglong Dong, Steven Rostedt
In-Reply-To: <20260304222141.497203-1-jolsa@kernel.org>
Adding new program attach types multi tracing attachment:
BPF_TRACE_FENTRY_MULTI
BPF_TRACE_FEXIT_MULTI
and their base support in verifier code.
Programs with such attach type will use specific link attachment
interface coming in following changes.
This was suggested by Andrii some (long) time ago and turned out
to be easier than having special program flag for that.
Bpf programs with such types have 'bpf_multi_func' function set as
their attach_btf_id and keep module reference when it's specified
by attach_prog_fd.
Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
include/linux/bpf.h | 5 +++++
include/linux/btf_ids.h | 1 +
include/uapi/linux/bpf.h | 2 ++
kernel/bpf/btf.c | 2 ++
kernel/bpf/syscall.c | 33 +++++++++++++++++++++++++++++----
kernel/bpf/trampoline.c | 5 ++++-
kernel/bpf/verifier.c | 34 ++++++++++++++++++++++++++++++++--
net/bpf/test_run.c | 2 ++
tools/include/uapi/linux/bpf.h | 2 ++
tools/lib/bpf/libbpf.c | 2 ++
10 files changed, 81 insertions(+), 7 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d536640aef41..c401b308a325 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2116,6 +2116,11 @@ void bpf_prog_disassoc_struct_ops(struct bpf_prog *prog);
void *bpf_prog_get_assoc_struct_ops(const struct bpf_prog_aux *aux);
u32 bpf_struct_ops_id(const void *kdata);
+static inline bool is_tracing_multi(enum bpf_attach_type type)
+{
+ return type == BPF_TRACE_FENTRY_MULTI || type == BPF_TRACE_FEXIT_MULTI;
+}
+
#ifdef CONFIG_NET
/* Define it here to avoid the use of forward declaration */
struct bpf_dummy_ops_state {
diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
index 139bdececdcf..eb2c4432856d 100644
--- a/include/linux/btf_ids.h
+++ b/include/linux/btf_ids.h
@@ -284,5 +284,6 @@ extern u32 bpf_cgroup_btf_id[];
extern u32 bpf_local_storage_map_btf_id[];
extern u32 btf_bpf_map_id[];
extern u32 bpf_kmem_cache_btf_id[];
+extern u32 bpf_multi_func_btf_id[];
#endif
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c8d400b7680a..68600972a778 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1154,6 +1154,8 @@ enum bpf_attach_type {
BPF_TRACE_KPROBE_SESSION,
BPF_TRACE_UPROBE_SESSION,
BPF_TRACE_FSESSION,
+ BPF_TRACE_FENTRY_MULTI,
+ BPF_TRACE_FEXIT_MULTI,
__MAX_BPF_ATTACH_TYPE
};
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 09fcbb125155..c8738834bbc9 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -6221,6 +6221,8 @@ static int btf_validate_prog_ctx_type(struct bpf_verifier_log *log, const struct
case BPF_TRACE_FEXIT:
case BPF_MODIFY_RETURN:
case BPF_TRACE_FSESSION:
+ case BPF_TRACE_FENTRY_MULTI:
+ case BPF_TRACE_FEXIT_MULTI:
/* allow u64* as ctx */
if (btf_is_int(t) && t->size == 8)
return 0;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 003ad95940c9..2680740e9c09 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -41,6 +41,7 @@
#include <linux/overflow.h>
#include <linux/cookie.h>
#include <linux/verification.h>
+#include <linux/btf_ids.h>
#include <net/netfilter/nf_bpf_link.h>
#include <net/netkit.h>
@@ -2653,7 +2654,8 @@ static int
bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
enum bpf_attach_type expected_attach_type,
struct btf *attach_btf, u32 btf_id,
- struct bpf_prog *dst_prog)
+ struct bpf_prog *dst_prog,
+ bool multi_func)
{
if (btf_id) {
if (btf_id > BTF_MAX_TYPE)
@@ -2673,6 +2675,14 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
}
}
+ if (multi_func) {
+ if (prog_type != BPF_PROG_TYPE_TRACING)
+ return -EINVAL;
+ if (!attach_btf || btf_id)
+ return -EINVAL;
+ return 0;
+ }
+
if (attach_btf && (!btf_id || dst_prog))
return -EINVAL;
@@ -2865,6 +2875,16 @@ static int bpf_prog_mark_insn_arrays_ready(struct bpf_prog *prog)
return 0;
}
+#define DEFINE_BPF_MULTI_FUNC(args...) \
+ extern int bpf_multi_func(args); \
+ int __init bpf_multi_func(args) { return 0; }
+
+DEFINE_BPF_MULTI_FUNC(unsigned long a1, unsigned long a2,
+ unsigned long a3, unsigned long a4,
+ unsigned long a5, unsigned long a6)
+
+BTF_ID_LIST_GLOBAL_SINGLE(bpf_multi_func_btf_id, func, bpf_multi_func)
+
/* last field in 'union bpf_attr' used by this command */
#define BPF_PROG_LOAD_LAST_FIELD keyring_id
@@ -2877,6 +2897,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
bool bpf_cap;
int err;
char license[128];
+ bool multi_func;
if (CHECK_ATTR(BPF_PROG_LOAD))
return -EINVAL;
@@ -2943,6 +2964,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
if (is_perfmon_prog_type(type) && !bpf_token_capable(token, CAP_PERFMON))
goto put_token;
+ multi_func = is_tracing_multi(attr->expected_attach_type);
+
/* attach_prog_fd/attach_btf_obj_fd can specify fd of either bpf_prog
* or btf, we need to check which one it is
*/
@@ -2964,7 +2987,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
goto put_token;
}
}
- } else if (attr->attach_btf_id) {
+ } else if (attr->attach_btf_id || multi_func) {
/* fall back to vmlinux BTF, if BTF type ID is specified */
attach_btf = bpf_get_btf_vmlinux();
if (IS_ERR(attach_btf)) {
@@ -2980,7 +3003,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
if (bpf_prog_load_check_attach(type, attr->expected_attach_type,
attach_btf, attr->attach_btf_id,
- dst_prog)) {
+ dst_prog, multi_func)) {
if (dst_prog)
bpf_prog_put(dst_prog);
if (attach_btf)
@@ -3003,7 +3026,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
prog->expected_attach_type = attr->expected_attach_type;
prog->sleepable = !!(attr->prog_flags & BPF_F_SLEEPABLE);
prog->aux->attach_btf = attach_btf;
- prog->aux->attach_btf_id = attr->attach_btf_id;
+ prog->aux->attach_btf_id = multi_func ? bpf_multi_func_btf_id[0] : attr->attach_btf_id;
prog->aux->dst_prog = dst_prog;
prog->aux->dev_bound = !!attr->prog_ifindex;
prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS;
@@ -4365,6 +4388,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
case BPF_TRACE_FSESSION:
+ case BPF_TRACE_FENTRY_MULTI:
+ case BPF_TRACE_FEXIT_MULTI:
case BPF_MODIFY_RETURN:
return BPF_PROG_TYPE_TRACING;
case BPF_LSM_MAC:
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 2de81e4369a1..e2f4a15886b0 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -182,7 +182,8 @@ bool bpf_prog_has_trampoline(const struct bpf_prog *prog)
switch (ptype) {
case BPF_PROG_TYPE_TRACING:
if (eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT ||
- eatype == BPF_MODIFY_RETURN || eatype == BPF_TRACE_FSESSION)
+ eatype == BPF_MODIFY_RETURN || eatype == BPF_TRACE_FSESSION ||
+ eatype == BPF_TRACE_FENTRY_MULTI || eatype == BPF_TRACE_FEXIT_MULTI)
return true;
return false;
case BPF_PROG_TYPE_LSM:
@@ -771,10 +772,12 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog)
{
switch (prog->expected_attach_type) {
case BPF_TRACE_FENTRY:
+ case BPF_TRACE_FENTRY_MULTI:
return BPF_TRAMP_FENTRY;
case BPF_MODIFY_RETURN:
return BPF_TRAMP_MODIFY_RETURN;
case BPF_TRACE_FEXIT:
+ case BPF_TRACE_FEXIT_MULTI:
return BPF_TRAMP_FEXIT;
case BPF_TRACE_FSESSION:
return BPF_TRAMP_FSESSION;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d92cf2821657..832a87da5a86 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -17919,6 +17919,8 @@ static bool return_retval_range(struct bpf_verifier_env *env, struct bpf_retval_
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
case BPF_TRACE_FSESSION:
+ case BPF_TRACE_FENTRY_MULTI:
+ case BPF_TRACE_FEXIT_MULTI:
*range = retval_range(0, 0);
break;
case BPF_TRACE_RAW_TP:
@@ -24108,6 +24110,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
insn->imm == BPF_FUNC_get_func_ret) {
if (eatype == BPF_TRACE_FEXIT ||
eatype == BPF_TRACE_FSESSION ||
+ eatype == BPF_TRACE_FEXIT_MULTI ||
eatype == BPF_MODIFY_RETURN) {
/* Load nr_args from ctx - 8 */
insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
@@ -24951,6 +24954,11 @@ static int check_non_sleepable_error_inject(u32 btf_id)
return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
}
+static bool is_tracing_multi_id(const struct bpf_prog *prog, u32 btf_id)
+{
+ return is_tracing_multi(prog->expected_attach_type) && bpf_multi_func_btf_id[0] == btf_id;
+}
+
int bpf_check_attach_target(struct bpf_verifier_log *log,
const struct bpf_prog *prog,
const struct bpf_prog *tgt_prog,
@@ -25173,6 +25181,8 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
case BPF_TRACE_FSESSION:
+ case BPF_TRACE_FENTRY_MULTI:
+ case BPF_TRACE_FEXIT_MULTI:
if (prog->expected_attach_type == BPF_TRACE_FSESSION &&
!bpf_jit_supports_fsession()) {
bpf_log(log, "JIT does not support fsession\n");
@@ -25202,7 +25212,17 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
if (ret < 0)
return ret;
- if (tgt_prog) {
+ /* *.multi programs don't need an address during program
+ * verification, we just take the module ref if needed.
+ */
+ if (is_tracing_multi_id(prog, btf_id)) {
+ if (btf_is_module(btf)) {
+ mod = btf_try_get_module(btf);
+ if (!mod)
+ return -ENOENT;
+ }
+ addr = 0;
+ } else if (tgt_prog) {
if (subprog == 0)
addr = (long) tgt_prog->bpf_func;
else
@@ -25231,10 +25251,16 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
switch (prog->type) {
case BPF_PROG_TYPE_TRACING:
+ /* *.multi sleepable programs will pass initial sleepable check,
+ * the actual attached btf ids are checked later during the link
+ * attachment.
+ */
+ if (is_tracing_multi_id(prog, btf_id))
+ ret = 0;
/* fentry/fexit/fmod_ret progs can be sleepable if they are
* attached to ALLOW_ERROR_INJECTION and are not in denylist.
*/
- if (!check_non_sleepable_error_inject(btf_id) &&
+ else if (!check_non_sleepable_error_inject(btf_id) &&
within_error_injection_list(addr))
ret = 0;
/* fentry/fexit/fmod_ret progs can also be sleepable if they are
@@ -25345,6 +25371,8 @@ static bool can_be_sleepable(struct bpf_prog *prog)
case BPF_MODIFY_RETURN:
case BPF_TRACE_ITER:
case BPF_TRACE_FSESSION:
+ case BPF_TRACE_FENTRY_MULTI:
+ case BPF_TRACE_FEXIT_MULTI:
return true;
default:
return false;
@@ -25414,6 +25442,8 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
return 0;
} else if (prog->expected_attach_type == BPF_TRACE_ITER) {
return bpf_iter_prog_supported(prog);
+ } else if (is_tracing_multi(prog->expected_attach_type)) {
+ return prog->type == BPF_PROG_TYPE_TRACING ? 0 : -EINVAL;
}
if (prog->type == BPF_PROG_TYPE_LSM) {
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 178c4738e63b..3373450132f0 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -686,6 +686,8 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog,
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
case BPF_TRACE_FSESSION:
+ case BPF_TRACE_FENTRY_MULTI:
+ case BPF_TRACE_FEXIT_MULTI:
if (bpf_fentry_test1(1) != 2 ||
bpf_fentry_test2(2, 3) != 5 ||
bpf_fentry_test3(4, 5, 6) != 15 ||
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 5e38b4887de6..61f0fe5bc0aa 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1154,6 +1154,8 @@ enum bpf_attach_type {
BPF_TRACE_KPROBE_SESSION,
BPF_TRACE_UPROBE_SESSION,
BPF_TRACE_FSESSION,
+ BPF_TRACE_FENTRY_MULTI,
+ BPF_TRACE_FEXIT_MULTI,
__MAX_BPF_ATTACH_TYPE
};
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 0be7017800fe..1e19c7b861ec 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -136,6 +136,8 @@ static const char * const attach_type_name[] = {
[BPF_NETKIT_PEER] = "netkit_peer",
[BPF_TRACE_KPROBE_SESSION] = "trace_kprobe_session",
[BPF_TRACE_UPROBE_SESSION] = "trace_uprobe_session",
+ [BPF_TRACE_FENTRY_MULTI] = "trace_fentry_multi",
+ [BPF_TRACE_FEXIT_MULTI] = "trace_fexit_multi",
};
static const char * const link_type_name[] = {
--
2.53.0
^ permalink raw reply related
* [PATCHv2 bpf-next 05/23] bpf: Factor fsession link to use struct bpf_tramp_node
From: Jiri Olsa @ 2026-03-04 22:21 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
Cc: bpf, linux-trace-kernel, Martin KaFai Lau, Eduard Zingerman,
Song Liu, Yonghong Song, Menglong Dong, Steven Rostedt
In-Reply-To: <20260304222141.497203-1-jolsa@kernel.org>
Now that we split trampoline attachment object (bpf_tramp_node) from
the link object (bpf_tramp_link) we can use bpf_tramp_node as fsession's
fexit attachment object and get rid of the bpf_fsession_link object.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
include/linux/bpf.h | 6 +-----
kernel/bpf/syscall.c | 21 ++++++---------------
kernel/bpf/trampoline.c | 14 +++++++-------
3 files changed, 14 insertions(+), 27 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f97aa34ee4c2..d536640aef41 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1883,15 +1883,11 @@ struct bpf_shim_tramp_link {
struct bpf_tracing_link {
struct bpf_tramp_link link;
+ struct bpf_tramp_node fexit;
struct bpf_trampoline *trampoline;
struct bpf_prog *tgt_prog;
};
-struct bpf_fsession_link {
- struct bpf_tracing_link link;
- struct bpf_tramp_link fexit;
-};
-
struct bpf_raw_tp_link {
struct bpf_link link;
struct bpf_raw_event_map *btp;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 6db6d1e74379..003ad95940c9 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3637,21 +3637,7 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
key = bpf_trampoline_compute_key(tgt_prog, NULL, btf_id);
}
- if (prog->expected_attach_type == BPF_TRACE_FSESSION) {
- struct bpf_fsession_link *fslink;
-
- fslink = kzalloc_obj(*fslink, GFP_USER);
- if (fslink) {
- bpf_tramp_link_init(&fslink->fexit, BPF_LINK_TYPE_TRACING,
- &bpf_tracing_link_lops, prog, attach_type,
- bpf_cookie);
- link = &fslink->link;
- } else {
- link = NULL;
- }
- } else {
- link = kzalloc_obj(*link, GFP_USER);
- }
+ link = kzalloc_obj(*link, GFP_USER);
if (!link) {
err = -ENOMEM;
goto out_put_prog;
@@ -3659,6 +3645,11 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
bpf_tramp_link_init(&link->link, BPF_LINK_TYPE_TRACING,
&bpf_tracing_link_lops, prog, attach_type, bpf_cookie);
+ if (prog->expected_attach_type == BPF_TRACE_FSESSION) {
+ link->fexit.link = &link->link.link;
+ link->fexit.cookie = bpf_cookie;
+ }
+
mutex_lock(&prog->aux->dst_mutex);
/* There are a few possible cases here:
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index dfbc190056f2..2de81e4369a1 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -815,7 +815,7 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_node *node,
struct bpf_trampoline_ops *ops,
void *data)
{
- struct bpf_fsession_link *fslink = NULL;
+ struct bpf_tracing_link *tr_link = NULL;
enum bpf_tramp_prog_type kind;
struct bpf_tramp_node *node_existing;
struct hlist_head *prog_list;
@@ -865,8 +865,8 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_node *node,
hlist_add_head(&node->tramp_hlist, prog_list);
if (kind == BPF_TRAMP_FSESSION) {
tr->progs_cnt[BPF_TRAMP_FENTRY]++;
- fslink = container_of(node, struct bpf_fsession_link, link.link.node);
- hlist_add_head(&fslink->fexit.node.tramp_hlist, &tr->progs_hlist[BPF_TRAMP_FEXIT]);
+ tr_link = container_of(node, struct bpf_tracing_link, link.node);
+ hlist_add_head(&tr_link->fexit.tramp_hlist, &tr->progs_hlist[BPF_TRAMP_FEXIT]);
tr->progs_cnt[BPF_TRAMP_FEXIT]++;
} else {
tr->progs_cnt[kind]++;
@@ -876,7 +876,7 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_node *node,
hlist_del_init(&node->tramp_hlist);
if (kind == BPF_TRAMP_FSESSION) {
tr->progs_cnt[BPF_TRAMP_FENTRY]--;
- hlist_del_init(&fslink->fexit.node.tramp_hlist);
+ hlist_del_init(&tr_link->fexit.tramp_hlist);
tr->progs_cnt[BPF_TRAMP_FEXIT]--;
} else {
tr->progs_cnt[kind]--;
@@ -917,10 +917,10 @@ static int __bpf_trampoline_unlink_prog(struct bpf_tramp_node *node,
tgt_prog->aux->is_extended = false;
return err;
} else if (kind == BPF_TRAMP_FSESSION) {
- struct bpf_fsession_link *fslink =
- container_of(node, struct bpf_fsession_link, link.link.node);
+ struct bpf_tracing_link *tr_link =
+ container_of(node, struct bpf_tracing_link, link.node);
- hlist_del_init(&fslink->fexit.node.tramp_hlist);
+ hlist_del_init(&tr_link->fexit.tramp_hlist);
tr->progs_cnt[BPF_TRAMP_FEXIT]--;
kind = BPF_TRAMP_FENTRY;
}
--
2.53.0
^ permalink raw reply related
* [PATCHv2 bpf-next 04/23] bpf: Add struct bpf_tramp_node object
From: Jiri Olsa @ 2026-03-04 22:21 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
Cc: Hengqi Chen, bpf, linux-trace-kernel, Martin KaFai Lau,
Eduard Zingerman, Song Liu, Yonghong Song, Menglong Dong,
Steven Rostedt
In-Reply-To: <20260304222141.497203-1-jolsa@kernel.org>
Adding struct bpf_tramp_node to decouple the link out of the trampoline
attachment info.
At the moment the object for attaching bpf program to the trampoline is
'struct bpf_tramp_link':
struct bpf_tramp_link {
struct bpf_link link;
struct hlist_node tramp_hlist;
u64 cookie;
}
The link holds the bpf_prog pointer and forces one link - one program
binding logic. In following changes we want to attach program to multiple
trampolines but we want to keep just one bpf_link object.
Splitting struct bpf_tramp_link into:
struct bpf_tramp_link {
struct bpf_link link;
struct bpf_tramp_node node;
};
struct bpf_tramp_node {
struct bpf_link *link;
struct hlist_node tramp_hlist;
u64 cookie;
};
The 'struct bpf_tramp_link' defines standard single trampoline link
and 'struct bpf_tramp_node' is the attachment trampoline object with
pointer to the bpf_link object.
This will allow us to define link for multiple trampolines, like:
struct bpf_tracing_multi_link {
struct bpf_link link;
...
int nodes_cnt;
struct bpf_tracing_multi_node nodes[] __counted_by(nodes_cnt);
};
Cc: Hengqi Chen <hengqi.chen@gmail.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
arch/arm64/net/bpf_jit_comp.c | 58 +++++++++---------
arch/loongarch/net/bpf_jit.c | 44 ++++++-------
arch/powerpc/net/bpf_jit_comp.c | 46 +++++++-------
arch/riscv/net/bpf_jit_comp64.c | 52 ++++++++--------
arch/s390/net/bpf_jit_comp.c | 44 ++++++-------
arch/x86/net/bpf_jit_comp.c | 54 ++++++++--------
include/linux/bpf.h | 60 +++++++++++-------
kernel/bpf/bpf_struct_ops.c | 27 ++++----
kernel/bpf/syscall.c | 39 ++++++------
kernel/bpf/trampoline.c | 105 ++++++++++++++++----------------
net/bpf/bpf_dummy_struct_ops.c | 14 ++---
11 files changed, 281 insertions(+), 262 deletions(-)
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index adf84962d579..6d08a6f08a0c 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -2288,24 +2288,24 @@ bool bpf_jit_supports_subprog_tailcalls(void)
return true;
}
-static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
+static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_node *node,
int bargs_off, int retval_off, int run_ctx_off,
bool save_ret)
{
__le32 *branch;
u64 enter_prog;
u64 exit_prog;
- struct bpf_prog *p = l->link.prog;
+ struct bpf_prog *p = node->link->prog;
int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
enter_prog = (u64)bpf_trampoline_enter(p);
exit_prog = (u64)bpf_trampoline_exit(p);
- if (l->cookie == 0) {
+ if (node->cookie == 0) {
/* if cookie is zero, one instruction is enough to store it */
emit(A64_STR64I(A64_ZR, A64_SP, run_ctx_off + cookie_off), ctx);
} else {
- emit_a64_mov_i64(A64_R(10), l->cookie, ctx);
+ emit_a64_mov_i64(A64_R(10), node->cookie, ctx);
emit(A64_STR64I(A64_R(10), A64_SP, run_ctx_off + cookie_off),
ctx);
}
@@ -2355,7 +2355,7 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
emit_call(exit_prog, ctx);
}
-static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
+static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_nodes *tn,
int bargs_off, int retval_off, int run_ctx_off,
__le32 **branches)
{
@@ -2365,8 +2365,8 @@ static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
* Set this to 0 to avoid confusing the program.
*/
emit(A64_STR64I(A64_ZR, A64_SP, retval_off), ctx);
- for (i = 0; i < tl->nr_links; i++) {
- invoke_bpf_prog(ctx, tl->links[i], bargs_off, retval_off,
+ for (i = 0; i < tn->nr_nodes; i++) {
+ invoke_bpf_prog(ctx, tn->nodes[i], bargs_off, retval_off,
run_ctx_off, true);
/* if (*(u64 *)(sp + retval_off) != 0)
* goto do_fexit;
@@ -2497,10 +2497,10 @@ static void restore_args(struct jit_ctx *ctx, int bargs_off, int nregs)
}
}
-static bool is_struct_ops_tramp(const struct bpf_tramp_links *fentry_links)
+static bool is_struct_ops_tramp(const struct bpf_tramp_nodes *fentry_nodes)
{
- return fentry_links->nr_links == 1 &&
- fentry_links->links[0]->link.type == BPF_LINK_TYPE_STRUCT_OPS;
+ return fentry_nodes->nr_nodes == 1 &&
+ fentry_nodes->nodes[0]->link->type == BPF_LINK_TYPE_STRUCT_OPS;
}
static void store_func_meta(struct jit_ctx *ctx, u64 func_meta, int func_meta_off)
@@ -2521,7 +2521,7 @@ static void store_func_meta(struct jit_ctx *ctx, u64 func_meta, int func_meta_of
*
*/
static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
- struct bpf_tramp_links *tlinks, void *func_addr,
+ struct bpf_tramp_nodes *tnodes, void *func_addr,
const struct btf_func_model *m,
const struct arg_aux *a,
u32 flags)
@@ -2537,14 +2537,14 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
int run_ctx_off;
int oargs_off;
int nfuncargs;
- struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
- struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
- struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
+ struct bpf_tramp_nodes *fentry = &tnodes[BPF_TRAMP_FENTRY];
+ struct bpf_tramp_nodes *fexit = &tnodes[BPF_TRAMP_FEXIT];
+ struct bpf_tramp_nodes *fmod_ret = &tnodes[BPF_TRAMP_MODIFY_RETURN];
bool save_ret;
__le32 **branches = NULL;
bool is_struct_ops = is_struct_ops_tramp(fentry);
int cookie_off, cookie_cnt, cookie_bargs_off;
- int fsession_cnt = bpf_fsession_cnt(tlinks);
+ int fsession_cnt = bpf_fsession_cnt(tnodes);
u64 func_meta;
/* trampoline stack layout:
@@ -2590,7 +2590,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
cookie_off = stack_size;
/* room for session cookies */
- cookie_cnt = bpf_fsession_cookie_cnt(tlinks);
+ cookie_cnt = bpf_fsession_cookie_cnt(tnodes);
stack_size += cookie_cnt * 8;
ip_off = stack_size;
@@ -2687,20 +2687,20 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
}
cookie_bargs_off = (bargs_off - cookie_off) / 8;
- for (i = 0; i < fentry->nr_links; i++) {
- if (bpf_prog_calls_session_cookie(fentry->links[i])) {
+ for (i = 0; i < fentry->nr_nodes; i++) {
+ if (bpf_prog_calls_session_cookie(fentry->nodes[i])) {
u64 meta = func_meta | (cookie_bargs_off << BPF_TRAMP_COOKIE_INDEX_SHIFT);
store_func_meta(ctx, meta, func_meta_off);
cookie_bargs_off--;
}
- invoke_bpf_prog(ctx, fentry->links[i], bargs_off,
+ invoke_bpf_prog(ctx, fentry->nodes[i], bargs_off,
retval_off, run_ctx_off,
flags & BPF_TRAMP_F_RET_FENTRY_RET);
}
- if (fmod_ret->nr_links) {
- branches = kcalloc(fmod_ret->nr_links, sizeof(__le32 *),
+ if (fmod_ret->nr_nodes) {
+ branches = kcalloc(fmod_ret->nr_nodes, sizeof(__le32 *),
GFP_KERNEL);
if (!branches)
return -ENOMEM;
@@ -2724,7 +2724,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
}
/* update the branches saved in invoke_bpf_mod_ret with cbnz */
- for (i = 0; i < fmod_ret->nr_links && ctx->image != NULL; i++) {
+ for (i = 0; i < fmod_ret->nr_nodes && ctx->image != NULL; i++) {
int offset = &ctx->image[ctx->idx] - branches[i];
*branches[i] = cpu_to_le32(A64_CBNZ(1, A64_R(10), offset));
}
@@ -2735,14 +2735,14 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
store_func_meta(ctx, func_meta, func_meta_off);
cookie_bargs_off = (bargs_off - cookie_off) / 8;
- for (i = 0; i < fexit->nr_links; i++) {
- if (bpf_prog_calls_session_cookie(fexit->links[i])) {
+ for (i = 0; i < fexit->nr_nodes; i++) {
+ if (bpf_prog_calls_session_cookie(fexit->nodes[i])) {
u64 meta = func_meta | (cookie_bargs_off << BPF_TRAMP_COOKIE_INDEX_SHIFT);
store_func_meta(ctx, meta, func_meta_off);
cookie_bargs_off--;
}
- invoke_bpf_prog(ctx, fexit->links[i], bargs_off, retval_off,
+ invoke_bpf_prog(ctx, fexit->nodes[i], bargs_off, retval_off,
run_ctx_off, false);
}
@@ -2800,7 +2800,7 @@ bool bpf_jit_supports_fsession(void)
}
int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_links *tlinks, void *func_addr)
+ struct bpf_tramp_nodes *tnodes, void *func_addr)
{
struct jit_ctx ctx = {
.image = NULL,
@@ -2814,7 +2814,7 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
if (ret < 0)
return ret;
- ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, m, &aaux, flags);
+ ret = prepare_trampoline(&ctx, &im, tnodes, func_addr, m, &aaux, flags);
if (ret < 0)
return ret;
@@ -2838,7 +2838,7 @@ int arch_protect_bpf_trampoline(void *image, unsigned int size)
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
void *ro_image_end, const struct btf_func_model *m,
- u32 flags, struct bpf_tramp_links *tlinks,
+ u32 flags, struct bpf_tramp_nodes *tnodes,
void *func_addr)
{
u32 size = ro_image_end - ro_image;
@@ -2865,7 +2865,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
ret = calc_arg_aux(m, &aaux);
if (ret)
goto out;
- ret = prepare_trampoline(&ctx, im, tlinks, func_addr, m, &aaux, flags);
+ ret = prepare_trampoline(&ctx, im, tnodes, func_addr, m, &aaux, flags);
if (ret > 0 && validate_code(&ctx) < 0) {
ret = -EINVAL;
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index 3bd89f55960d..a2471f42376e 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -1480,16 +1480,16 @@ static void restore_args(struct jit_ctx *ctx, int nargs, int args_off)
}
}
-static int invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
+static int invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_node *n,
int args_off, int retval_off, int run_ctx_off, bool save_ret)
{
int ret;
u32 *branch;
- struct bpf_prog *p = l->link.prog;
+ struct bpf_prog *p = n->link->prog;
int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
- if (l->cookie) {
- move_imm(ctx, LOONGARCH_GPR_T1, l->cookie, false);
+ if (n->cookie) {
+ move_imm(ctx, LOONGARCH_GPR_T1, n->cookie, false);
emit_insn(ctx, std, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -run_ctx_off + cookie_off);
} else {
emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, -run_ctx_off + cookie_off);
@@ -1544,14 +1544,14 @@ static int invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
return ret;
}
-static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
+static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_nodes *tn,
int args_off, int retval_off, int run_ctx_off, u32 **branches)
{
int i;
emit_insn(ctx, std, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_FP, -retval_off);
- for (i = 0; i < tl->nr_links; i++) {
- invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off, run_ctx_off, true);
+ for (i = 0; i < tn->nr_nodes; i++) {
+ invoke_bpf_prog(ctx, tn->nodes[i], args_off, retval_off, run_ctx_off, true);
emit_insn(ctx, ldd, LOONGARCH_GPR_T1, LOONGARCH_GPR_FP, -retval_off);
branches[i] = (u32 *)ctx->image + ctx->idx;
emit_insn(ctx, nop);
@@ -1600,7 +1600,7 @@ static void sign_extend(struct jit_ctx *ctx, int rd, int rj, u8 size, bool sign)
}
static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
- const struct btf_func_model *m, struct bpf_tramp_links *tlinks,
+ const struct btf_func_model *m, struct bpf_tramp_nodes *tnodes,
void *func_addr, u32 flags)
{
int i, ret, save_ret;
@@ -1608,9 +1608,9 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i
int retval_off, args_off, nargs_off, ip_off, run_ctx_off, sreg_off, tcc_ptr_off;
bool is_struct_ops = flags & BPF_TRAMP_F_INDIRECT;
void *orig_call = func_addr;
- struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
- struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
- struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
+ struct bpf_tramp_nodes *fentry = &tnodes[BPF_TRAMP_FENTRY];
+ struct bpf_tramp_nodes *fexit = &tnodes[BPF_TRAMP_FEXIT];
+ struct bpf_tramp_nodes *fmod_ret = &tnodes[BPF_TRAMP_MODIFY_RETURN];
u32 **branches = NULL;
/*
@@ -1753,14 +1753,14 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i
return ret;
}
- for (i = 0; i < fentry->nr_links; i++) {
- ret = invoke_bpf_prog(ctx, fentry->links[i], args_off, retval_off,
+ for (i = 0; i < fentry->nr_nodes; i++) {
+ ret = invoke_bpf_prog(ctx, fentry->nodes[i], args_off, retval_off,
run_ctx_off, flags & BPF_TRAMP_F_RET_FENTRY_RET);
if (ret)
return ret;
}
- if (fmod_ret->nr_links) {
- branches = kcalloc(fmod_ret->nr_links, sizeof(u32 *), GFP_KERNEL);
+ if (fmod_ret->nr_nodes) {
+ branches = kcalloc(fmod_ret->nr_nodes, sizeof(u32 *), GFP_KERNEL);
if (!branches)
return -ENOMEM;
@@ -1784,13 +1784,13 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i
emit_insn(ctx, nop);
}
- for (i = 0; ctx->image && i < fmod_ret->nr_links; i++) {
+ for (i = 0; ctx->image && i < fmod_ret->nr_nodes; i++) {
int offset = (void *)(&ctx->image[ctx->idx]) - (void *)branches[i];
*branches[i] = larch_insn_gen_bne(LOONGARCH_GPR_T1, LOONGARCH_GPR_ZERO, offset);
}
- for (i = 0; i < fexit->nr_links; i++) {
- ret = invoke_bpf_prog(ctx, fexit->links[i], args_off, retval_off, run_ctx_off, false);
+ for (i = 0; i < fexit->nr_nodes; i++) {
+ ret = invoke_bpf_prog(ctx, fexit->nodes[i], args_off, retval_off, run_ctx_off, false);
if (ret)
goto out;
}
@@ -1858,7 +1858,7 @@ static int __arch_prepare_bpf_trampoline(struct jit_ctx *ctx, struct bpf_tramp_i
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
void *ro_image_end, const struct btf_func_model *m,
- u32 flags, struct bpf_tramp_links *tlinks, void *func_addr)
+ u32 flags, struct bpf_tramp_nodes *tnodes, void *func_addr)
{
int ret, size;
void *image, *tmp;
@@ -1874,7 +1874,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
ctx.idx = 0;
jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image));
- ret = __arch_prepare_bpf_trampoline(&ctx, im, m, tlinks, func_addr, flags);
+ ret = __arch_prepare_bpf_trampoline(&ctx, im, m, tnodes, func_addr, flags);
if (ret < 0)
goto out;
@@ -1895,7 +1895,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
}
int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_links *tlinks, void *func_addr)
+ struct bpf_tramp_nodes *tnodes, void *func_addr)
{
int ret;
struct jit_ctx ctx;
@@ -1904,7 +1904,7 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
ctx.image = NULL;
ctx.idx = 0;
- ret = __arch_prepare_bpf_trampoline(&ctx, &im, m, tlinks, func_addr, flags);
+ ret = __arch_prepare_bpf_trampoline(&ctx, &im, m, tnodes, func_addr, flags);
return ret < 0 ? ret : ret * LOONGARCH_INSN_SIZE;
}
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 52162e4a7f84..462344a58902 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -512,22 +512,22 @@ int arch_protect_bpf_trampoline(void *image, unsigned int size)
}
static int invoke_bpf_prog(u32 *image, u32 *ro_image, struct codegen_context *ctx,
- struct bpf_tramp_link *l, int regs_off, int retval_off,
+ struct bpf_tramp_node *n, int regs_off, int retval_off,
int run_ctx_off, bool save_ret)
{
- struct bpf_prog *p = l->link.prog;
+ struct bpf_prog *p = n->link->prog;
ppc_inst_t branch_insn;
u32 jmp_idx;
int ret = 0;
/* Save cookie */
if (IS_ENABLED(CONFIG_PPC64)) {
- PPC_LI64(_R3, l->cookie);
+ PPC_LI64(_R3, n->cookie);
EMIT(PPC_RAW_STD(_R3, _R1, run_ctx_off + offsetof(struct bpf_tramp_run_ctx,
bpf_cookie)));
} else {
- PPC_LI32(_R3, l->cookie >> 32);
- PPC_LI32(_R4, l->cookie);
+ PPC_LI32(_R3, n->cookie >> 32);
+ PPC_LI32(_R4, n->cookie);
EMIT(PPC_RAW_STW(_R3, _R1,
run_ctx_off + offsetof(struct bpf_tramp_run_ctx, bpf_cookie)));
EMIT(PPC_RAW_STW(_R4, _R1,
@@ -594,7 +594,7 @@ static int invoke_bpf_prog(u32 *image, u32 *ro_image, struct codegen_context *ct
}
static int invoke_bpf_mod_ret(u32 *image, u32 *ro_image, struct codegen_context *ctx,
- struct bpf_tramp_links *tl, int regs_off, int retval_off,
+ struct bpf_tramp_nodes *tn, int regs_off, int retval_off,
int run_ctx_off, u32 *branches)
{
int i;
@@ -605,8 +605,8 @@ static int invoke_bpf_mod_ret(u32 *image, u32 *ro_image, struct codegen_context
*/
EMIT(PPC_RAW_LI(_R3, 0));
EMIT(PPC_RAW_STL(_R3, _R1, retval_off));
- for (i = 0; i < tl->nr_links; i++) {
- if (invoke_bpf_prog(image, ro_image, ctx, tl->links[i], regs_off, retval_off,
+ for (i = 0; i < tn->nr_nodes; i++) {
+ if (invoke_bpf_prog(image, ro_image, ctx, tn->nodes[i], regs_off, retval_off,
run_ctx_off, true))
return -EINVAL;
@@ -737,14 +737,14 @@ static void bpf_trampoline_restore_args_stack(u32 *image, struct codegen_context
static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_image,
void *rw_image_end, void *ro_image,
const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_links *tlinks,
+ struct bpf_tramp_nodes *tnodes,
void *func_addr)
{
int regs_off, nregs_off, ip_off, run_ctx_off, retval_off, nvr_off, alt_lr_off, r4_off = 0;
int i, ret, nr_regs, bpf_frame_size = 0, bpf_dummy_frame_size = 0, func_frame_offset;
- struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
- struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
- struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
+ struct bpf_tramp_nodes *fmod_ret = &tnodes[BPF_TRAMP_MODIFY_RETURN];
+ struct bpf_tramp_nodes *fentry = &tnodes[BPF_TRAMP_FENTRY];
+ struct bpf_tramp_nodes *fexit = &tnodes[BPF_TRAMP_FEXIT];
struct codegen_context codegen_ctx, *ctx;
u32 *image = (u32 *)rw_image;
ppc_inst_t branch_insn;
@@ -938,13 +938,13 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
return ret;
}
- for (i = 0; i < fentry->nr_links; i++)
- if (invoke_bpf_prog(image, ro_image, ctx, fentry->links[i], regs_off, retval_off,
+ for (i = 0; i < fentry->nr_nodes; i++)
+ if (invoke_bpf_prog(image, ro_image, ctx, fentry->nodes[i], regs_off, retval_off,
run_ctx_off, flags & BPF_TRAMP_F_RET_FENTRY_RET))
return -EINVAL;
- if (fmod_ret->nr_links) {
- branches = kcalloc(fmod_ret->nr_links, sizeof(u32), GFP_KERNEL);
+ if (fmod_ret->nr_nodes) {
+ branches = kcalloc(fmod_ret->nr_nodes, sizeof(u32), GFP_KERNEL);
if (!branches)
return -ENOMEM;
@@ -994,7 +994,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
}
/* Update branches saved in invoke_bpf_mod_ret with address of do_fexit */
- for (i = 0; i < fmod_ret->nr_links && image; i++) {
+ for (i = 0; i < fmod_ret->nr_nodes && image; i++) {
if (create_cond_branch(&branch_insn, &image[branches[i]],
(unsigned long)&image[ctx->idx], COND_NE << 16)) {
ret = -EINVAL;
@@ -1004,8 +1004,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
image[branches[i]] = ppc_inst_val(branch_insn);
}
- for (i = 0; i < fexit->nr_links; i++)
- if (invoke_bpf_prog(image, ro_image, ctx, fexit->links[i], regs_off, retval_off,
+ for (i = 0; i < fexit->nr_nodes; i++)
+ if (invoke_bpf_prog(image, ro_image, ctx, fexit->nodes[i], regs_off, retval_off,
run_ctx_off, false)) {
ret = -EINVAL;
goto cleanup;
@@ -1071,18 +1071,18 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
}
int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_links *tlinks, void *func_addr)
+ struct bpf_tramp_nodes *tnodes, void *func_addr)
{
struct bpf_tramp_image im;
int ret;
- ret = __arch_prepare_bpf_trampoline(&im, NULL, NULL, NULL, m, flags, tlinks, func_addr);
+ ret = __arch_prepare_bpf_trampoline(&im, NULL, NULL, NULL, m, flags, tnodes, func_addr);
return ret;
}
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_links *tlinks,
+ struct bpf_tramp_nodes *tnodes,
void *func_addr)
{
u32 size = image_end - image;
@@ -1098,7 +1098,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
return -ENOMEM;
ret = __arch_prepare_bpf_trampoline(im, rw_image, rw_image + size, image, m,
- flags, tlinks, func_addr);
+ flags, tnodes, func_addr);
if (ret < 0)
goto out;
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index 2f1109dbf105..461b902a5f92 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -934,15 +934,15 @@ static void emit_store_stack_imm64(u8 reg, int stack_off, u64 imm64,
emit_sd(RV_REG_FP, stack_off, reg, ctx);
}
-static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_off,
+static int invoke_bpf_prog(struct bpf_tramp_node *node, int args_off, int retval_off,
int run_ctx_off, bool save_ret, struct rv_jit_context *ctx)
{
int ret, branch_off;
- struct bpf_prog *p = l->link.prog;
+ struct bpf_prog *p = node->link->prog;
int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
- if (l->cookie)
- emit_store_stack_imm64(RV_REG_T1, -run_ctx_off + cookie_off, l->cookie, ctx);
+ if (node->cookie)
+ emit_store_stack_imm64(RV_REG_T1, -run_ctx_off + cookie_off, node->cookie, ctx);
else
emit_sd(RV_REG_FP, -run_ctx_off + cookie_off, RV_REG_ZERO, ctx);
@@ -996,22 +996,22 @@ static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_of
return ret;
}
-static int invoke_bpf(struct bpf_tramp_links *tl, int args_off, int retval_off,
+static int invoke_bpf(struct bpf_tramp_nodes *tn, int args_off, int retval_off,
int run_ctx_off, int func_meta_off, bool save_ret, u64 func_meta,
int cookie_off, struct rv_jit_context *ctx)
{
int i, cur_cookie = (cookie_off - args_off) / 8;
- for (i = 0; i < tl->nr_links; i++) {
+ for (i = 0; i < tn->nr_nodes; i++) {
int err;
- if (bpf_prog_calls_session_cookie(tl->links[i])) {
+ if (bpf_prog_calls_session_cookie(tn->nodes[i])) {
u64 meta = func_meta | ((u64)cur_cookie << BPF_TRAMP_COOKIE_INDEX_SHIFT);
emit_store_stack_imm64(RV_REG_T1, -func_meta_off, meta, ctx);
cur_cookie--;
}
- err = invoke_bpf_prog(tl->links[i], args_off, retval_off, run_ctx_off,
+ err = invoke_bpf_prog(tn->nodes[i], args_off, retval_off, run_ctx_off,
save_ret, ctx);
if (err)
return err;
@@ -1021,7 +1021,7 @@ static int invoke_bpf(struct bpf_tramp_links *tl, int args_off, int retval_off,
static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
const struct btf_func_model *m,
- struct bpf_tramp_links *tlinks,
+ struct bpf_tramp_nodes *tnodes,
void *func_addr, u32 flags,
struct rv_jit_context *ctx)
{
@@ -1030,9 +1030,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
int stack_size = 0, nr_arg_slots = 0;
int retval_off, args_off, func_meta_off, ip_off, run_ctx_off, sreg_off, stk_arg_off;
int cookie_off, cookie_cnt;
- struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
- struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
- struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
+ struct bpf_tramp_nodes *fentry = &tnodes[BPF_TRAMP_FENTRY];
+ struct bpf_tramp_nodes *fexit = &tnodes[BPF_TRAMP_FEXIT];
+ struct bpf_tramp_nodes *fmod_ret = &tnodes[BPF_TRAMP_MODIFY_RETURN];
bool is_struct_ops = flags & BPF_TRAMP_F_INDIRECT;
void *orig_call = func_addr;
bool save_ret;
@@ -1115,7 +1115,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
ip_off = stack_size;
}
- cookie_cnt = bpf_fsession_cookie_cnt(tlinks);
+ cookie_cnt = bpf_fsession_cookie_cnt(tnodes);
/* room for session cookies */
stack_size += cookie_cnt * 8;
cookie_off = stack_size;
@@ -1172,7 +1172,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
store_args(nr_arg_slots, args_off, ctx);
- if (bpf_fsession_cnt(tlinks)) {
+ if (bpf_fsession_cnt(tnodes)) {
/* clear all session cookies' value */
for (i = 0; i < cookie_cnt; i++)
emit_sd(RV_REG_FP, -cookie_off + 8 * i, RV_REG_ZERO, ctx);
@@ -1187,22 +1187,22 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
return ret;
}
- if (fentry->nr_links) {
+ if (fentry->nr_nodes) {
ret = invoke_bpf(fentry, args_off, retval_off, run_ctx_off, func_meta_off,
flags & BPF_TRAMP_F_RET_FENTRY_RET, func_meta, cookie_off, ctx);
if (ret)
return ret;
}
- if (fmod_ret->nr_links) {
- branches_off = kzalloc_objs(int, fmod_ret->nr_links);
+ if (fmod_ret->nr_nodes) {
+ branches_off = kzalloc_objs(int, fmod_ret->nr_nodes);
if (!branches_off)
return -ENOMEM;
/* cleanup to avoid garbage return value confusion */
emit_sd(RV_REG_FP, -retval_off, RV_REG_ZERO, ctx);
- for (i = 0; i < fmod_ret->nr_links; i++) {
- ret = invoke_bpf_prog(fmod_ret->links[i], args_off, retval_off,
+ for (i = 0; i < fmod_ret->nr_nodes; i++) {
+ ret = invoke_bpf_prog(fmod_ret->nodes[i], args_off, retval_off,
run_ctx_off, true, ctx);
if (ret)
goto out;
@@ -1230,7 +1230,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
}
/* update branches saved in invoke_bpf_mod_ret with bnez */
- for (i = 0; ctx->insns && i < fmod_ret->nr_links; i++) {
+ for (i = 0; ctx->insns && i < fmod_ret->nr_nodes; i++) {
offset = ninsns_rvoff(ctx->ninsns - branches_off[i]);
insn = rv_bne(RV_REG_T1, RV_REG_ZERO, offset >> 1);
*(u32 *)(ctx->insns + branches_off[i]) = insn;
@@ -1238,10 +1238,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
/* set "is_return" flag for fsession */
func_meta |= (1ULL << BPF_TRAMP_IS_RETURN_SHIFT);
- if (bpf_fsession_cnt(tlinks))
+ if (bpf_fsession_cnt(tnodes))
emit_store_stack_imm64(RV_REG_T1, -func_meta_off, func_meta, ctx);
- if (fexit->nr_links) {
+ if (fexit->nr_nodes) {
ret = invoke_bpf(fexit, args_off, retval_off, run_ctx_off, func_meta_off,
false, func_meta, cookie_off, ctx);
if (ret)
@@ -1305,7 +1305,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
}
int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_links *tlinks, void *func_addr)
+ struct bpf_tramp_nodes *tnodes, void *func_addr)
{
struct bpf_tramp_image im;
struct rv_jit_context ctx;
@@ -1314,7 +1314,7 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
ctx.ninsns = 0;
ctx.insns = NULL;
ctx.ro_insns = NULL;
- ret = __arch_prepare_bpf_trampoline(&im, m, tlinks, func_addr, flags, &ctx);
+ ret = __arch_prepare_bpf_trampoline(&im, m, tnodes, func_addr, flags, &ctx);
return ret < 0 ? ret : ninsns_rvoff(ctx.ninsns);
}
@@ -1331,7 +1331,7 @@ void arch_free_bpf_trampoline(void *image, unsigned int size)
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
void *ro_image_end, const struct btf_func_model *m,
- u32 flags, struct bpf_tramp_links *tlinks,
+ u32 flags, struct bpf_tramp_nodes *tnodes,
void *func_addr)
{
int ret;
@@ -1346,7 +1346,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
ctx.ninsns = 0;
ctx.insns = image;
ctx.ro_insns = ro_image;
- ret = __arch_prepare_bpf_trampoline(im, m, tlinks, func_addr, flags, &ctx);
+ ret = __arch_prepare_bpf_trampoline(im, m, tnodes, func_addr, flags, &ctx);
if (ret < 0)
goto out;
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 1f9a6b728beb..888e9d717dd5 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -2522,19 +2522,19 @@ static void emit_store_stack_imm64(struct bpf_jit *jit, int tmp_reg, int stack_o
static int invoke_bpf_prog(struct bpf_tramp_jit *tjit,
const struct btf_func_model *m,
- struct bpf_tramp_link *tlink, bool save_ret)
+ struct bpf_tramp_node *node, bool save_ret)
{
struct bpf_jit *jit = &tjit->common;
int cookie_off = tjit->run_ctx_off +
offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
- struct bpf_prog *p = tlink->link.prog;
+ struct bpf_prog *p = node->link->prog;
int patch;
/*
- * run_ctx.cookie = tlink->cookie;
+ * run_ctx.cookie = node->cookie;
*/
- emit_store_stack_imm64(jit, REG_W0, cookie_off, tlink->cookie);
+ emit_store_stack_imm64(jit, REG_W0, cookie_off, node->cookie);
/*
* if ((start = __bpf_prog_enter(p, &run_ctx)) == 0)
@@ -2594,20 +2594,20 @@ static int invoke_bpf_prog(struct bpf_tramp_jit *tjit,
static int invoke_bpf(struct bpf_tramp_jit *tjit,
const struct btf_func_model *m,
- struct bpf_tramp_links *tl, bool save_ret,
+ struct bpf_tramp_nodes *tn, bool save_ret,
u64 func_meta, int cookie_off)
{
int i, cur_cookie = (tjit->bpf_args_off - cookie_off) / sizeof(u64);
struct bpf_jit *jit = &tjit->common;
- for (i = 0; i < tl->nr_links; i++) {
- if (bpf_prog_calls_session_cookie(tl->links[i])) {
+ for (i = 0; i < tn->nr_nodes; i++) {
+ if (bpf_prog_calls_session_cookie(tn->nodes[i])) {
u64 meta = func_meta | ((u64)cur_cookie << BPF_TRAMP_COOKIE_INDEX_SHIFT);
emit_store_stack_imm64(jit, REG_0, tjit->func_meta_off, meta);
cur_cookie--;
}
- if (invoke_bpf_prog(tjit, m, tl->links[i], save_ret))
+ if (invoke_bpf_prog(tjit, m, tn->nodes[i], save_ret))
return -EINVAL;
}
@@ -2636,12 +2636,12 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
struct bpf_tramp_jit *tjit,
const struct btf_func_model *m,
u32 flags,
- struct bpf_tramp_links *tlinks,
+ struct bpf_tramp_nodes *tnodes,
void *func_addr)
{
- struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
- struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
- struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
+ struct bpf_tramp_nodes *fmod_ret = &tnodes[BPF_TRAMP_MODIFY_RETURN];
+ struct bpf_tramp_nodes *fentry = &tnodes[BPF_TRAMP_FENTRY];
+ struct bpf_tramp_nodes *fexit = &tnodes[BPF_TRAMP_FEXIT];
int nr_bpf_args, nr_reg_args, nr_stack_args;
int cookie_cnt, cookie_off, fsession_cnt;
struct bpf_jit *jit = &tjit->common;
@@ -2678,8 +2678,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
return -ENOTSUPP;
}
- cookie_cnt = bpf_fsession_cookie_cnt(tlinks);
- fsession_cnt = bpf_fsession_cnt(tlinks);
+ cookie_cnt = bpf_fsession_cookie_cnt(tnodes);
+ fsession_cnt = bpf_fsession_cnt(tnodes);
/*
* Calculate the stack layout.
@@ -2814,7 +2814,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
func_meta, cookie_off))
return -EINVAL;
- if (fmod_ret->nr_links) {
+ if (fmod_ret->nr_nodes) {
/*
* retval = 0;
*/
@@ -2823,8 +2823,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
_EMIT6(0xd707f000 | tjit->retval_off,
0xf000 | tjit->retval_off);
- for (i = 0; i < fmod_ret->nr_links; i++) {
- if (invoke_bpf_prog(tjit, m, fmod_ret->links[i], true))
+ for (i = 0; i < fmod_ret->nr_nodes; i++) {
+ if (invoke_bpf_prog(tjit, m, fmod_ret->nodes[i], true))
return -EINVAL;
/*
@@ -2949,7 +2949,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
}
int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_links *tlinks, void *orig_call)
+ struct bpf_tramp_nodes *tnodes, void *orig_call)
{
struct bpf_tramp_image im;
struct bpf_tramp_jit tjit;
@@ -2958,14 +2958,14 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
memset(&tjit, 0, sizeof(tjit));
ret = __arch_prepare_bpf_trampoline(&im, &tjit, m, flags,
- tlinks, orig_call);
+ tnodes, orig_call);
return ret < 0 ? ret : tjit.common.prg;
}
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
void *image_end, const struct btf_func_model *m,
- u32 flags, struct bpf_tramp_links *tlinks,
+ u32 flags, struct bpf_tramp_nodes *tnodes,
void *func_addr)
{
struct bpf_tramp_jit tjit;
@@ -2974,7 +2974,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
/* Compute offsets, check whether the code fits. */
memset(&tjit, 0, sizeof(tjit));
ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
- tlinks, func_addr);
+ tnodes, func_addr);
if (ret < 0)
return ret;
@@ -2988,7 +2988,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
tjit.common.prg = 0;
tjit.common.prg_buf = image;
ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
- tlinks, func_addr);
+ tnodes, func_addr);
return ret < 0 ? ret : tjit.common.prg;
}
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 8f10080e6fe3..cc712d8e8a5f 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -2978,15 +2978,15 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog,
}
static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
- struct bpf_tramp_link *l, int stack_size,
+ struct bpf_tramp_node *node, int stack_size,
int run_ctx_off, bool save_ret,
void *image, void *rw_image)
{
u8 *prog = *pprog;
u8 *jmp_insn;
int ctx_cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
- struct bpf_prog *p = l->link.prog;
- u64 cookie = l->cookie;
+ struct bpf_prog *p = node->link->prog;
+ u64 cookie = node->cookie;
/* mov rdi, cookie */
emit_mov_imm64(&prog, BPF_REG_1, (long) cookie >> 32, (u32) (long) cookie);
@@ -3093,7 +3093,7 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
}
static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
- struct bpf_tramp_links *tl, int stack_size,
+ struct bpf_tramp_nodes *tl, int stack_size,
int run_ctx_off, int func_meta_off, bool save_ret,
void *image, void *rw_image, u64 func_meta,
int cookie_off)
@@ -3101,13 +3101,13 @@ static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
int i, cur_cookie = (cookie_off - stack_size) / 8;
u8 *prog = *pprog;
- for (i = 0; i < tl->nr_links; i++) {
- if (tl->links[i]->link.prog->call_session_cookie) {
+ for (i = 0; i < tl->nr_nodes; i++) {
+ if (tl->nodes[i]->link->prog->call_session_cookie) {
emit_store_stack_imm64(&prog, BPF_REG_0, -func_meta_off,
func_meta | (cur_cookie << BPF_TRAMP_COOKIE_INDEX_SHIFT));
cur_cookie--;
}
- if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size,
+ if (invoke_bpf_prog(m, &prog, tl->nodes[i], stack_size,
run_ctx_off, save_ret, image, rw_image))
return -EINVAL;
}
@@ -3116,7 +3116,7 @@ static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
}
static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
- struct bpf_tramp_links *tl, int stack_size,
+ struct bpf_tramp_nodes *tl, int stack_size,
int run_ctx_off, u8 **branches,
void *image, void *rw_image)
{
@@ -3128,8 +3128,8 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
*/
emit_mov_imm32(&prog, false, BPF_REG_0, 0);
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
- for (i = 0; i < tl->nr_links; i++) {
- if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, true,
+ for (i = 0; i < tl->nr_nodes; i++) {
+ if (invoke_bpf_prog(m, &prog, tl->nodes[i], stack_size, run_ctx_off, true,
image, rw_image))
return -EINVAL;
@@ -3220,14 +3220,14 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_image,
void *rw_image_end, void *image,
const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_links *tlinks,
+ struct bpf_tramp_nodes *tnodes,
void *func_addr)
{
int i, ret, nr_regs = m->nr_args, stack_size = 0;
int regs_off, func_meta_off, ip_off, run_ctx_off, arg_stack_off, rbx_off;
- struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
- struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
- struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
+ struct bpf_tramp_nodes *fentry = &tnodes[BPF_TRAMP_FENTRY];
+ struct bpf_tramp_nodes *fexit = &tnodes[BPF_TRAMP_FEXIT];
+ struct bpf_tramp_nodes *fmod_ret = &tnodes[BPF_TRAMP_MODIFY_RETURN];
void *orig_call = func_addr;
int cookie_off, cookie_cnt;
u8 **branches = NULL;
@@ -3299,7 +3299,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
ip_off = stack_size;
- cookie_cnt = bpf_fsession_cookie_cnt(tlinks);
+ cookie_cnt = bpf_fsession_cookie_cnt(tnodes);
/* room for session cookies */
stack_size += cookie_cnt * 8;
cookie_off = stack_size;
@@ -3392,7 +3392,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
}
}
- if (bpf_fsession_cnt(tlinks)) {
+ if (bpf_fsession_cnt(tnodes)) {
/* clear all the session cookies' value */
for (int i = 0; i < cookie_cnt; i++)
emit_store_stack_imm64(&prog, BPF_REG_0, -cookie_off + 8 * i, 0);
@@ -3400,15 +3400,15 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
emit_store_stack_imm64(&prog, BPF_REG_0, -8, 0);
}
- if (fentry->nr_links) {
+ if (fentry->nr_nodes) {
if (invoke_bpf(m, &prog, fentry, regs_off, run_ctx_off, func_meta_off,
flags & BPF_TRAMP_F_RET_FENTRY_RET, image, rw_image,
func_meta, cookie_off))
return -EINVAL;
}
- if (fmod_ret->nr_links) {
- branches = kcalloc(fmod_ret->nr_links, sizeof(u8 *),
+ if (fmod_ret->nr_nodes) {
+ branches = kcalloc(fmod_ret->nr_nodes, sizeof(u8 *),
GFP_KERNEL);
if (!branches)
return -ENOMEM;
@@ -3447,7 +3447,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
emit_nops(&prog, X86_PATCH_SIZE);
}
- if (fmod_ret->nr_links) {
+ if (fmod_ret->nr_nodes) {
/* From Intel 64 and IA-32 Architectures Optimization
* Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler
* Coding Rule 11: All branch targets should be 16-byte
@@ -3457,7 +3457,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
/* Update the branches saved in invoke_bpf_mod_ret with the
* aligned address of do_fexit.
*/
- for (i = 0; i < fmod_ret->nr_links; i++) {
+ for (i = 0; i < fmod_ret->nr_nodes; i++) {
emit_cond_near_jump(&branches[i], image + (prog - (u8 *)rw_image),
image + (branches[i] - (u8 *)rw_image), X86_JNE);
}
@@ -3465,10 +3465,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
/* set the "is_return" flag for fsession */
func_meta |= (1ULL << BPF_TRAMP_IS_RETURN_SHIFT);
- if (bpf_fsession_cnt(tlinks))
+ if (bpf_fsession_cnt(tnodes))
emit_store_stack_imm64(&prog, BPF_REG_0, -func_meta_off, func_meta);
- if (fexit->nr_links) {
+ if (fexit->nr_nodes) {
if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off, func_meta_off,
false, image, rw_image, func_meta, cookie_off)) {
ret = -EINVAL;
@@ -3542,7 +3542,7 @@ int arch_protect_bpf_trampoline(void *image, unsigned int size)
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_links *tlinks,
+ struct bpf_tramp_nodes *tnodes,
void *func_addr)
{
void *rw_image, *tmp;
@@ -3557,7 +3557,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
return -ENOMEM;
ret = __arch_prepare_bpf_trampoline(im, rw_image, rw_image + size, image, m,
- flags, tlinks, func_addr);
+ flags, tnodes, func_addr);
if (ret < 0)
goto out;
@@ -3570,7 +3570,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
}
int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_links *tlinks, void *func_addr)
+ struct bpf_tramp_nodes *tnodes, void *func_addr)
{
struct bpf_tramp_image im;
void *image;
@@ -3588,7 +3588,7 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
return -ENOMEM;
ret = __arch_prepare_bpf_trampoline(&im, image, image + PAGE_SIZE, image,
- m, flags, tlinks, func_addr);
+ m, flags, tnodes, func_addr);
bpf_jit_free_exec(image);
return ret;
}
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 1d900f49aff5..f97aa34ee4c2 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1233,9 +1233,9 @@ enum {
#define BPF_TRAMP_COOKIE_INDEX_SHIFT 8
#define BPF_TRAMP_IS_RETURN_SHIFT 63
-struct bpf_tramp_links {
- struct bpf_tramp_link *links[BPF_MAX_TRAMP_LINKS];
- int nr_links;
+struct bpf_tramp_nodes {
+ struct bpf_tramp_node *nodes[BPF_MAX_TRAMP_LINKS];
+ int nr_nodes;
};
struct bpf_tramp_run_ctx;
@@ -1263,13 +1263,13 @@ struct bpf_tramp_run_ctx;
struct bpf_tramp_image;
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_links *tlinks,
+ struct bpf_tramp_nodes *tnodes,
void *func_addr);
void *arch_alloc_bpf_trampoline(unsigned int size);
void arch_free_bpf_trampoline(void *image, unsigned int size);
int __must_check arch_protect_bpf_trampoline(void *image, unsigned int size);
int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_links *tlinks, void *func_addr);
+ struct bpf_tramp_nodes *tnodes, void *func_addr);
u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
struct bpf_tramp_run_ctx *run_ctx);
@@ -1453,10 +1453,10 @@ static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u6
}
#ifdef CONFIG_BPF_JIT
-int bpf_trampoline_link_prog(struct bpf_tramp_link *link,
+int bpf_trampoline_link_prog(struct bpf_tramp_node *node,
struct bpf_trampoline *tr,
struct bpf_prog *tgt_prog);
-int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
+int bpf_trampoline_unlink_prog(struct bpf_tramp_node *node,
struct bpf_trampoline *tr,
struct bpf_prog *tgt_prog);
struct bpf_trampoline *bpf_trampoline_get(u64 key,
@@ -1540,13 +1540,13 @@ int bpf_jit_charge_modmem(u32 size);
void bpf_jit_uncharge_modmem(u32 size);
bool bpf_prog_has_trampoline(const struct bpf_prog *prog);
#else
-static inline int bpf_trampoline_link_prog(struct bpf_tramp_link *link,
+static inline int bpf_trampoline_link_prog(struct bpf_tramp_node *node,
struct bpf_trampoline *tr,
struct bpf_prog *tgt_prog)
{
return -ENOTSUPP;
}
-static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
+static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_node *node,
struct bpf_trampoline *tr,
struct bpf_prog *tgt_prog)
{
@@ -1865,12 +1865,17 @@ struct bpf_link_ops {
__poll_t (*poll)(struct file *file, struct poll_table_struct *pts);
};
-struct bpf_tramp_link {
- struct bpf_link link;
+struct bpf_tramp_node {
+ struct bpf_link *link;
struct hlist_node tramp_hlist;
u64 cookie;
};
+struct bpf_tramp_link {
+ struct bpf_link link;
+ struct bpf_tramp_node node;
+};
+
struct bpf_shim_tramp_link {
struct bpf_tramp_link link;
struct bpf_trampoline *trampoline;
@@ -2088,8 +2093,8 @@ void bpf_struct_ops_put(const void *kdata);
int bpf_struct_ops_supported(const struct bpf_struct_ops *st_ops, u32 moff);
int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
void *value);
-int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
- struct bpf_tramp_link *link,
+int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_nodes *tnodes,
+ struct bpf_tramp_node *node,
const struct btf_func_model *model,
void *stub_func,
void **image, u32 *image_off,
@@ -2181,31 +2186,31 @@ static inline void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_op
#endif
-static inline int bpf_fsession_cnt(struct bpf_tramp_links *links)
+static inline int bpf_fsession_cnt(struct bpf_tramp_nodes *nodes)
{
- struct bpf_tramp_links fentries = links[BPF_TRAMP_FENTRY];
+ struct bpf_tramp_nodes fentries = nodes[BPF_TRAMP_FENTRY];
int cnt = 0;
- for (int i = 0; i < links[BPF_TRAMP_FENTRY].nr_links; i++) {
- if (fentries.links[i]->link.prog->expected_attach_type == BPF_TRACE_FSESSION)
+ for (int i = 0; i < nodes[BPF_TRAMP_FENTRY].nr_nodes; i++) {
+ if (fentries.nodes[i]->link->prog->expected_attach_type == BPF_TRACE_FSESSION)
cnt++;
}
return cnt;
}
-static inline bool bpf_prog_calls_session_cookie(struct bpf_tramp_link *link)
+static inline bool bpf_prog_calls_session_cookie(struct bpf_tramp_node *node)
{
- return link->link.prog->call_session_cookie;
+ return node->link->prog->call_session_cookie;
}
-static inline int bpf_fsession_cookie_cnt(struct bpf_tramp_links *links)
+static inline int bpf_fsession_cookie_cnt(struct bpf_tramp_nodes *nodes)
{
- struct bpf_tramp_links fentries = links[BPF_TRAMP_FENTRY];
+ struct bpf_tramp_nodes fentries = nodes[BPF_TRAMP_FENTRY];
int cnt = 0;
- for (int i = 0; i < links[BPF_TRAMP_FENTRY].nr_links; i++) {
- if (bpf_prog_calls_session_cookie(fentries.links[i]))
+ for (int i = 0; i < nodes[BPF_TRAMP_FENTRY].nr_nodes; i++) {
+ if (bpf_prog_calls_session_cookie(fentries.nodes[i]))
cnt++;
}
@@ -2758,6 +2763,9 @@ void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
void bpf_link_init_sleepable(struct bpf_link *link, enum bpf_link_type type,
const struct bpf_link_ops *ops, struct bpf_prog *prog,
enum bpf_attach_type attach_type, bool sleepable);
+void bpf_tramp_link_init(struct bpf_tramp_link *link, enum bpf_link_type type,
+ const struct bpf_link_ops *ops, struct bpf_prog *prog,
+ enum bpf_attach_type attach_type, u64 cookie);
int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer);
int bpf_link_settle(struct bpf_link_primer *primer);
void bpf_link_cleanup(struct bpf_link_primer *primer);
@@ -3123,6 +3131,12 @@ static inline void bpf_link_init_sleepable(struct bpf_link *link, enum bpf_link_
{
}
+static inline void bpf_tramp_link_init(struct bpf_tramp_link *link, enum bpf_link_type type,
+ const struct bpf_link_ops *ops, struct bpf_prog *prog,
+ enum bpf_attach_type attach_type, u64 cookie)
+{
+}
+
static inline int bpf_link_prime(struct bpf_link *link,
struct bpf_link_primer *primer)
{
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 05b366b821c3..10a9301615ba 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -594,8 +594,8 @@ const struct bpf_link_ops bpf_struct_ops_link_lops = {
.dealloc = bpf_struct_ops_link_dealloc,
};
-int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
- struct bpf_tramp_link *link,
+int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_nodes *tnodes,
+ struct bpf_tramp_node *node,
const struct btf_func_model *model,
void *stub_func,
void **_image, u32 *_image_off,
@@ -605,13 +605,13 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
void *image = *_image;
int size;
- tlinks[BPF_TRAMP_FENTRY].links[0] = link;
- tlinks[BPF_TRAMP_FENTRY].nr_links = 1;
+ tnodes[BPF_TRAMP_FENTRY].nodes[0] = node;
+ tnodes[BPF_TRAMP_FENTRY].nr_nodes = 1;
if (model->ret_size > 0)
flags |= BPF_TRAMP_F_RET_FENTRY_RET;
- size = arch_bpf_trampoline_size(model, flags, tlinks, stub_func);
+ size = arch_bpf_trampoline_size(model, flags, tnodes, stub_func);
if (size <= 0)
return size ? : -EFAULT;
@@ -628,7 +628,7 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
size = arch_prepare_bpf_trampoline(NULL, image + image_off,
image + image_off + size,
- model, flags, tlinks, stub_func);
+ model, flags, tnodes, stub_func);
if (size <= 0) {
if (image != *_image)
bpf_struct_ops_image_free(image);
@@ -693,7 +693,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
const struct btf_type *module_type;
const struct btf_member *member;
const struct btf_type *t = st_ops_desc->type;
- struct bpf_tramp_links *tlinks;
+ struct bpf_tramp_nodes *tnodes;
void *udata, *kdata;
int prog_fd, err;
u32 i, trampoline_start, image_off = 0;
@@ -720,8 +720,8 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
if (uvalue->common.state || refcount_read(&uvalue->common.refcnt))
return -EINVAL;
- tlinks = kzalloc_objs(*tlinks, BPF_TRAMP_MAX);
- if (!tlinks)
+ tnodes = kzalloc_objs(*tnodes, BPF_TRAMP_MAX);
+ if (!tnodes)
return -ENOMEM;
uvalue = (struct bpf_struct_ops_value *)st_map->uvalue;
@@ -820,8 +820,9 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
err = -ENOMEM;
goto reset_unlock;
}
- bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS,
- &bpf_struct_ops_link_lops, prog, prog->expected_attach_type);
+ bpf_tramp_link_init(link, BPF_LINK_TYPE_STRUCT_OPS,
+ &bpf_struct_ops_link_lops, prog, prog->expected_attach_type, 0);
+
*plink++ = &link->link;
ksym = kzalloc_obj(*ksym, GFP_USER);
@@ -832,7 +833,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
*pksym++ = ksym;
trampoline_start = image_off;
- err = bpf_struct_ops_prepare_trampoline(tlinks, link,
+ err = bpf_struct_ops_prepare_trampoline(tnodes, &link->node,
&st_ops->func_models[i],
*(void **)(st_ops->cfi_stubs + moff),
&image, &image_off,
@@ -910,7 +911,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
memset(uvalue, 0, map->value_size);
memset(kvalue, 0, map->value_size);
unlock:
- kfree(tlinks);
+ kfree(tnodes);
mutex_unlock(&st_map->lock);
if (!err)
bpf_struct_ops_map_add_ksyms(st_map);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 274039e36465..6db6d1e74379 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3209,6 +3209,15 @@ void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
bpf_link_init_sleepable(link, type, ops, prog, attach_type, false);
}
+void bpf_tramp_link_init(struct bpf_tramp_link *link, enum bpf_link_type type,
+ const struct bpf_link_ops *ops, struct bpf_prog *prog,
+ enum bpf_attach_type attach_type, u64 cookie)
+{
+ bpf_link_init(&link->link, type, ops, prog, attach_type);
+ link->node.link = &link->link;
+ link->node.cookie = cookie;
+}
+
static void bpf_link_free_id(int id)
{
if (!id)
@@ -3502,7 +3511,7 @@ static void bpf_tracing_link_release(struct bpf_link *link)
struct bpf_tracing_link *tr_link =
container_of(link, struct bpf_tracing_link, link.link);
- WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link,
+ WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link.node,
tr_link->trampoline,
tr_link->tgt_prog));
@@ -3515,8 +3524,7 @@ static void bpf_tracing_link_release(struct bpf_link *link)
static void bpf_tracing_link_dealloc(struct bpf_link *link)
{
- struct bpf_tracing_link *tr_link =
- container_of(link, struct bpf_tracing_link, link.link);
+ struct bpf_tracing_link *tr_link = container_of(link, struct bpf_tracing_link, link.link);
kfree(tr_link);
}
@@ -3524,8 +3532,8 @@ static void bpf_tracing_link_dealloc(struct bpf_link *link)
static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link,
struct seq_file *seq)
{
- struct bpf_tracing_link *tr_link =
- container_of(link, struct bpf_tracing_link, link.link);
+ struct bpf_tracing_link *tr_link = container_of(link, struct bpf_tracing_link, link.link);
+
u32 target_btf_id, target_obj_id;
bpf_trampoline_unpack_key(tr_link->trampoline->key,
@@ -3538,17 +3546,16 @@ static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link,
link->attach_type,
target_obj_id,
target_btf_id,
- tr_link->link.cookie);
+ tr_link->link.node.cookie);
}
static int bpf_tracing_link_fill_link_info(const struct bpf_link *link,
struct bpf_link_info *info)
{
- struct bpf_tracing_link *tr_link =
- container_of(link, struct bpf_tracing_link, link.link);
+ struct bpf_tracing_link *tr_link = container_of(link, struct bpf_tracing_link, link.link);
info->tracing.attach_type = link->attach_type;
- info->tracing.cookie = tr_link->link.cookie;
+ info->tracing.cookie = tr_link->link.node.cookie;
bpf_trampoline_unpack_key(tr_link->trampoline->key,
&info->tracing.target_obj_id,
&info->tracing.target_btf_id);
@@ -3635,9 +3642,9 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
fslink = kzalloc_obj(*fslink, GFP_USER);
if (fslink) {
- bpf_link_init(&fslink->fexit.link, BPF_LINK_TYPE_TRACING,
- &bpf_tracing_link_lops, prog, attach_type);
- fslink->fexit.cookie = bpf_cookie;
+ bpf_tramp_link_init(&fslink->fexit, BPF_LINK_TYPE_TRACING,
+ &bpf_tracing_link_lops, prog, attach_type,
+ bpf_cookie);
link = &fslink->link;
} else {
link = NULL;
@@ -3649,10 +3656,8 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
err = -ENOMEM;
goto out_put_prog;
}
- bpf_link_init(&link->link.link, BPF_LINK_TYPE_TRACING,
- &bpf_tracing_link_lops, prog, attach_type);
-
- link->link.cookie = bpf_cookie;
+ bpf_tramp_link_init(&link->link, BPF_LINK_TYPE_TRACING,
+ &bpf_tracing_link_lops, prog, attach_type, bpf_cookie);
mutex_lock(&prog->aux->dst_mutex);
@@ -3738,7 +3743,7 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
if (err)
goto out_unlock;
- err = bpf_trampoline_link_prog(&link->link, tr, tgt_prog);
+ err = bpf_trampoline_link_prog(&link->link.node, tr, tgt_prog);
if (err) {
bpf_link_cleanup(&link_primer);
link = NULL;
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 7c2c2e104da2..dfbc190056f2 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -482,30 +482,29 @@ static struct bpf_trampoline_ops trampoline_ops = {
.modify_fentry = modify_fentry,
};
-static struct bpf_tramp_links *
+static struct bpf_tramp_nodes *
bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total, bool *ip_arg)
{
- struct bpf_tramp_link *link;
- struct bpf_tramp_links *tlinks;
- struct bpf_tramp_link **links;
+ struct bpf_tramp_node *node, **nodes;
+ struct bpf_tramp_nodes *tnodes;
int kind;
*total = 0;
- tlinks = kzalloc_objs(*tlinks, BPF_TRAMP_MAX);
- if (!tlinks)
+ tnodes = kzalloc_objs(*tnodes, BPF_TRAMP_MAX);
+ if (!tnodes)
return ERR_PTR(-ENOMEM);
for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
- tlinks[kind].nr_links = tr->progs_cnt[kind];
+ tnodes[kind].nr_nodes = tr->progs_cnt[kind];
*total += tr->progs_cnt[kind];
- links = tlinks[kind].links;
+ nodes = tnodes[kind].nodes;
- hlist_for_each_entry(link, &tr->progs_hlist[kind], tramp_hlist) {
- *ip_arg |= link->link.prog->call_get_func_ip;
- *links++ = link;
+ hlist_for_each_entry(node, &tr->progs_hlist[kind], tramp_hlist) {
+ *ip_arg |= node->link->prog->call_get_func_ip;
+ *nodes++ = node;
}
}
- return tlinks;
+ return tnodes;
}
static void bpf_tramp_image_free(struct bpf_tramp_image *im)
@@ -653,14 +652,14 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
struct bpf_trampoline_ops *ops, void *data)
{
struct bpf_tramp_image *im;
- struct bpf_tramp_links *tlinks;
+ struct bpf_tramp_nodes *tnodes;
u32 orig_flags = tr->flags;
bool ip_arg = false;
int err, total, size;
- tlinks = bpf_trampoline_get_progs(tr, &total, &ip_arg);
- if (IS_ERR(tlinks))
- return PTR_ERR(tlinks);
+ tnodes = bpf_trampoline_get_progs(tr, &total, &ip_arg);
+ if (IS_ERR(tnodes))
+ return PTR_ERR(tnodes);
if (total == 0) {
err = ops->unregister_fentry(tr, orig_flags, tr->cur_image->image, data);
@@ -672,8 +671,8 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
/* clear all bits except SHARE_IPMODIFY and TAIL_CALL_CTX */
tr->flags &= (BPF_TRAMP_F_SHARE_IPMODIFY | BPF_TRAMP_F_TAIL_CALL_CTX);
- if (tlinks[BPF_TRAMP_FEXIT].nr_links ||
- tlinks[BPF_TRAMP_MODIFY_RETURN].nr_links) {
+ if (tnodes[BPF_TRAMP_FEXIT].nr_nodes ||
+ tnodes[BPF_TRAMP_MODIFY_RETURN].nr_nodes) {
/* NOTE: BPF_TRAMP_F_RESTORE_REGS and BPF_TRAMP_F_SKIP_FRAME
* should not be set together.
*/
@@ -704,7 +703,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
#endif
size = arch_bpf_trampoline_size(&tr->func.model, tr->flags,
- tlinks, tr->func.addr);
+ tnodes, tr->func.addr);
if (size < 0) {
err = size;
goto out;
@@ -722,7 +721,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
}
err = arch_prepare_bpf_trampoline(im, im->image, im->image + size,
- &tr->func.model, tr->flags, tlinks,
+ &tr->func.model, tr->flags, tnodes,
tr->func.addr);
if (err < 0)
goto out_free;
@@ -760,7 +759,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
/* If any error happens, restore previous flags */
if (err)
tr->flags = orig_flags;
- kfree(tlinks);
+ kfree(tnodes);
return err;
out_free:
@@ -810,7 +809,7 @@ static int bpf_freplace_check_tgt_prog(struct bpf_prog *tgt_prog)
return 0;
}
-static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
+static int __bpf_trampoline_link_prog(struct bpf_tramp_node *node,
struct bpf_trampoline *tr,
struct bpf_prog *tgt_prog,
struct bpf_trampoline_ops *ops,
@@ -818,12 +817,12 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
{
struct bpf_fsession_link *fslink = NULL;
enum bpf_tramp_prog_type kind;
- struct bpf_tramp_link *link_exiting;
+ struct bpf_tramp_node *node_existing;
struct hlist_head *prog_list;
int err = 0;
int cnt = 0, i;
- kind = bpf_attach_type_to_tramp(link->link.prog);
+ kind = bpf_attach_type_to_tramp(node->link->prog);
if (tr->extension_prog)
/* cannot attach fentry/fexit if extension prog is attached.
* cannot overwrite extension prog either.
@@ -840,10 +839,10 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
err = bpf_freplace_check_tgt_prog(tgt_prog);
if (err)
return err;
- tr->extension_prog = link->link.prog;
+ tr->extension_prog = node->link->prog;
return bpf_arch_text_poke(tr->func.addr, BPF_MOD_NOP,
BPF_MOD_JUMP, NULL,
- link->link.prog->bpf_func);
+ node->link->prog->bpf_func);
}
if (kind == BPF_TRAMP_FSESSION) {
prog_list = &tr->progs_hlist[BPF_TRAMP_FENTRY];
@@ -853,31 +852,31 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
}
if (cnt >= BPF_MAX_TRAMP_LINKS)
return -E2BIG;
- if (!hlist_unhashed(&link->tramp_hlist))
+ if (!hlist_unhashed(&node->tramp_hlist))
/* prog already linked */
return -EBUSY;
- hlist_for_each_entry(link_exiting, prog_list, tramp_hlist) {
- if (link_exiting->link.prog != link->link.prog)
+ hlist_for_each_entry(node_existing, prog_list, tramp_hlist) {
+ if (node_existing->link->prog != node->link->prog)
continue;
/* prog already linked */
return -EBUSY;
}
- hlist_add_head(&link->tramp_hlist, prog_list);
+ hlist_add_head(&node->tramp_hlist, prog_list);
if (kind == BPF_TRAMP_FSESSION) {
tr->progs_cnt[BPF_TRAMP_FENTRY]++;
- fslink = container_of(link, struct bpf_fsession_link, link.link);
- hlist_add_head(&fslink->fexit.tramp_hlist, &tr->progs_hlist[BPF_TRAMP_FEXIT]);
+ fslink = container_of(node, struct bpf_fsession_link, link.link.node);
+ hlist_add_head(&fslink->fexit.node.tramp_hlist, &tr->progs_hlist[BPF_TRAMP_FEXIT]);
tr->progs_cnt[BPF_TRAMP_FEXIT]++;
} else {
tr->progs_cnt[kind]++;
}
err = bpf_trampoline_update(tr, true /* lock_direct_mutex */, ops, data);
if (err) {
- hlist_del_init(&link->tramp_hlist);
+ hlist_del_init(&node->tramp_hlist);
if (kind == BPF_TRAMP_FSESSION) {
tr->progs_cnt[BPF_TRAMP_FENTRY]--;
- hlist_del_init(&fslink->fexit.tramp_hlist);
+ hlist_del_init(&fslink->fexit.node.tramp_hlist);
tr->progs_cnt[BPF_TRAMP_FEXIT]--;
} else {
tr->progs_cnt[kind]--;
@@ -886,19 +885,19 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
return err;
}
-int bpf_trampoline_link_prog(struct bpf_tramp_link *link,
+int bpf_trampoline_link_prog(struct bpf_tramp_node *node,
struct bpf_trampoline *tr,
struct bpf_prog *tgt_prog)
{
int err;
trampoline_lock(tr);
- err = __bpf_trampoline_link_prog(link, tr, tgt_prog, &trampoline_ops, NULL);
+ err = __bpf_trampoline_link_prog(node, tr, tgt_prog, &trampoline_ops, NULL);
trampoline_unlock(tr);
return err;
}
-static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
+static int __bpf_trampoline_unlink_prog(struct bpf_tramp_node *node,
struct bpf_trampoline *tr,
struct bpf_prog *tgt_prog,
struct bpf_trampoline_ops *ops,
@@ -907,7 +906,7 @@ static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
enum bpf_tramp_prog_type kind;
int err;
- kind = bpf_attach_type_to_tramp(link->link.prog);
+ kind = bpf_attach_type_to_tramp(node->link->prog);
if (kind == BPF_TRAMP_REPLACE) {
WARN_ON_ONCE(!tr->extension_prog);
err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP,
@@ -919,26 +918,26 @@ static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
return err;
} else if (kind == BPF_TRAMP_FSESSION) {
struct bpf_fsession_link *fslink =
- container_of(link, struct bpf_fsession_link, link.link);
+ container_of(node, struct bpf_fsession_link, link.link.node);
- hlist_del_init(&fslink->fexit.tramp_hlist);
+ hlist_del_init(&fslink->fexit.node.tramp_hlist);
tr->progs_cnt[BPF_TRAMP_FEXIT]--;
kind = BPF_TRAMP_FENTRY;
}
- hlist_del_init(&link->tramp_hlist);
+ hlist_del_init(&node->tramp_hlist);
tr->progs_cnt[kind]--;
return bpf_trampoline_update(tr, true /* lock_direct_mutex */, ops, data);
}
/* bpf_trampoline_unlink_prog() should never fail. */
-int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
+int bpf_trampoline_unlink_prog(struct bpf_tramp_node *node,
struct bpf_trampoline *tr,
struct bpf_prog *tgt_prog)
{
int err;
trampoline_lock(tr);
- err = __bpf_trampoline_unlink_prog(link, tr, tgt_prog, &trampoline_ops, NULL);
+ err = __bpf_trampoline_unlink_prog(node, tr, tgt_prog, &trampoline_ops, NULL);
trampoline_unlock(tr);
return err;
}
@@ -953,7 +952,7 @@ static void bpf_shim_tramp_link_release(struct bpf_link *link)
if (!shim_link->trampoline)
return;
- WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link, shim_link->trampoline, NULL));
+ WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link.node, shim_link->trampoline, NULL));
bpf_trampoline_put(shim_link->trampoline);
}
@@ -999,8 +998,8 @@ static struct bpf_shim_tramp_link *cgroup_shim_alloc(const struct bpf_prog *prog
p->type = BPF_PROG_TYPE_LSM;
p->expected_attach_type = BPF_LSM_MAC;
bpf_prog_inc(p);
- bpf_link_init(&shim_link->link.link, BPF_LINK_TYPE_UNSPEC,
- &bpf_shim_tramp_link_lops, p, attach_type);
+ bpf_tramp_link_init(&shim_link->link, BPF_LINK_TYPE_UNSPEC,
+ &bpf_shim_tramp_link_lops, p, attach_type, 0);
bpf_cgroup_atype_get(p->aux->attach_btf_id, cgroup_atype);
return shim_link;
@@ -1009,15 +1008,15 @@ static struct bpf_shim_tramp_link *cgroup_shim_alloc(const struct bpf_prog *prog
static struct bpf_shim_tramp_link *cgroup_shim_find(struct bpf_trampoline *tr,
bpf_func_t bpf_func)
{
- struct bpf_tramp_link *link;
+ struct bpf_tramp_node *node;
int kind;
for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
- hlist_for_each_entry(link, &tr->progs_hlist[kind], tramp_hlist) {
- struct bpf_prog *p = link->link.prog;
+ hlist_for_each_entry(node, &tr->progs_hlist[kind], tramp_hlist) {
+ struct bpf_prog *p = node->link->prog;
if (p->bpf_func == bpf_func)
- return container_of(link, struct bpf_shim_tramp_link, link);
+ return container_of(node, struct bpf_shim_tramp_link, link.node);
}
}
@@ -1070,7 +1069,7 @@ int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
goto err;
}
- err = __bpf_trampoline_link_prog(&shim_link->link, tr, NULL, &trampoline_ops, NULL);
+ err = __bpf_trampoline_link_prog(&shim_link->link.node, tr, NULL, &trampoline_ops, NULL);
if (err)
goto err;
@@ -1385,7 +1384,7 @@ bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog)
int __weak
arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_links *tlinks,
+ struct bpf_tramp_nodes *tnodes,
void *func_addr)
{
return -ENOTSUPP;
@@ -1419,7 +1418,7 @@ int __weak arch_protect_bpf_trampoline(void *image, unsigned int size)
}
int __weak arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_links *tlinks, void *func_addr)
+ struct bpf_tramp_nodes *tnodes, void *func_addr)
{
return -ENOTSUPP;
}
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c
index ae5a54c350b9..191a6b3ee254 100644
--- a/net/bpf/bpf_dummy_struct_ops.c
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -132,7 +132,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
const struct bpf_struct_ops *st_ops = &bpf_bpf_dummy_ops;
const struct btf_type *func_proto;
struct bpf_dummy_ops_test_args *args;
- struct bpf_tramp_links *tlinks = NULL;
+ struct bpf_tramp_nodes *tnodes = NULL;
struct bpf_tramp_link *link = NULL;
void *image = NULL;
unsigned int op_idx;
@@ -158,8 +158,8 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
if (err)
goto out;
- tlinks = kzalloc_objs(*tlinks, BPF_TRAMP_MAX);
- if (!tlinks) {
+ tnodes = kzalloc_objs(*tnodes, BPF_TRAMP_MAX);
+ if (!tnodes) {
err = -ENOMEM;
goto out;
}
@@ -171,11 +171,11 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
}
/* prog doesn't take the ownership of the reference from caller */
bpf_prog_inc(prog);
- bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS, &bpf_struct_ops_link_lops, prog,
- prog->expected_attach_type);
+ bpf_tramp_link_init(link, BPF_LINK_TYPE_STRUCT_OPS, &bpf_struct_ops_link_lops,
+ prog, prog->expected_attach_type, 0);
op_idx = prog->expected_attach_type;
- err = bpf_struct_ops_prepare_trampoline(tlinks, link,
+ err = bpf_struct_ops_prepare_trampoline(tnodes, &link->node,
&st_ops->func_models[op_idx],
&dummy_ops_test_ret_function,
&image, &image_off,
@@ -198,7 +198,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
bpf_struct_ops_image_free(image);
if (link)
bpf_link_put(&link->link);
- kfree(tlinks);
+ kfree(tnodes);
return err;
}
--
2.53.0
^ permalink raw reply related
* [PATCHv2 bpf-next 03/23] bpf: Add struct bpf_trampoline_ops object
From: Jiri Olsa @ 2026-03-04 22:21 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
Cc: bpf, linux-trace-kernel, Martin KaFai Lau, Eduard Zingerman,
Song Liu, Yonghong Song, Menglong Dong, Steven Rostedt
In-Reply-To: <20260304222141.497203-1-jolsa@kernel.org>
In following changes we will need to override ftrace direct attachment
behaviour. In order to do that we are adding struct bpf_trampoline_ops
object that defines callbacks for ftrace direct attachment:
register_fentry
unregister_fentry
modify_fentry
The new struct bpf_trampoline_ops object is passed as an argument to
__bpf_trampoline_link/unlink_prog functions.
At the moment the default trampoline_ops is set to the current ftrace
direct attachment functions, so there's no functional change for the
current code.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
kernel/bpf/trampoline.c | 59 ++++++++++++++++++++++++++++-------------
1 file changed, 41 insertions(+), 18 deletions(-)
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 28a5a96bccec..7c2c2e104da2 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -58,8 +58,18 @@ static void trampoline_unlock(struct bpf_trampoline *tr)
mutex_unlock(select_trampoline_lock(tr));
}
+struct bpf_trampoline_ops {
+ int (*register_fentry)(struct bpf_trampoline *tr, void *new_addr, void *data);
+ int (*unregister_fentry)(struct bpf_trampoline *tr, u32 orig_flags, void *old_addr,
+ void *data);
+ int (*modify_fentry)(struct bpf_trampoline *tr, u32 orig_flags, void *old_addr,
+ void *new_addr, bool lock_direct_mutex, void *data);
+};
+
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
-static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex);
+static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex,
+ struct bpf_trampoline_ops *ops, void *data);
+static struct bpf_trampoline_ops trampoline_ops;
#ifdef CONFIG_HAVE_SINGLE_FTRACE_DIRECT_OPS
static struct bpf_trampoline *direct_ops_ip_lookup(struct ftrace_ops *ops, unsigned long ip)
@@ -144,13 +154,15 @@ static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, unsigned long ip,
if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) &&
!(tr->flags & BPF_TRAMP_F_ORIG_STACK))
- ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */);
+ ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */,
+ &trampoline_ops, NULL);
break;
case FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER:
tr->flags &= ~BPF_TRAMP_F_SHARE_IPMODIFY;
if (tr->flags & BPF_TRAMP_F_ORIG_STACK)
- ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */);
+ ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */,
+ &trampoline_ops, NULL);
break;
default:
ret = -EINVAL;
@@ -414,7 +426,7 @@ static int bpf_trampoline_update_fentry(struct bpf_trampoline *tr, u32 orig_flag
}
static int unregister_fentry(struct bpf_trampoline *tr, u32 orig_flags,
- void *old_addr)
+ void *old_addr, void *data)
{
int ret;
@@ -428,7 +440,7 @@ static int unregister_fentry(struct bpf_trampoline *tr, u32 orig_flags,
static int modify_fentry(struct bpf_trampoline *tr, u32 orig_flags,
void *old_addr, void *new_addr,
- bool lock_direct_mutex)
+ bool lock_direct_mutex, void *data __maybe_unused)
{
int ret;
@@ -442,7 +454,7 @@ static int modify_fentry(struct bpf_trampoline *tr, u32 orig_flags,
}
/* first time registering */
-static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
+static int register_fentry(struct bpf_trampoline *tr, void *new_addr, void *data __maybe_unused)
{
void *ip = tr->func.addr;
unsigned long faddr;
@@ -464,6 +476,12 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
return ret;
}
+static struct bpf_trampoline_ops trampoline_ops = {
+ .register_fentry = register_fentry,
+ .unregister_fentry = unregister_fentry,
+ .modify_fentry = modify_fentry,
+};
+
static struct bpf_tramp_links *
bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total, bool *ip_arg)
{
@@ -631,7 +649,8 @@ static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, int size)
return ERR_PTR(err);
}
-static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex)
+static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex,
+ struct bpf_trampoline_ops *ops, void *data)
{
struct bpf_tramp_image *im;
struct bpf_tramp_links *tlinks;
@@ -644,7 +663,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
return PTR_ERR(tlinks);
if (total == 0) {
- err = unregister_fentry(tr, orig_flags, tr->cur_image->image);
+ err = ops->unregister_fentry(tr, orig_flags, tr->cur_image->image, data);
bpf_tramp_image_put(tr->cur_image);
tr->cur_image = NULL;
goto out;
@@ -715,11 +734,11 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
WARN_ON(tr->cur_image && total == 0);
if (tr->cur_image)
/* progs already running at this address */
- err = modify_fentry(tr, orig_flags, tr->cur_image->image,
- im->image, lock_direct_mutex);
+ err = ops->modify_fentry(tr, orig_flags, tr->cur_image->image,
+ im->image, lock_direct_mutex, data);
else
/* first time registering */
- err = register_fentry(tr, im->image);
+ err = ops->register_fentry(tr, im->image, data);
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
if (err == -EAGAIN) {
@@ -793,7 +812,9 @@ static int bpf_freplace_check_tgt_prog(struct bpf_prog *tgt_prog)
static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
struct bpf_trampoline *tr,
- struct bpf_prog *tgt_prog)
+ struct bpf_prog *tgt_prog,
+ struct bpf_trampoline_ops *ops,
+ void *data)
{
struct bpf_fsession_link *fslink = NULL;
enum bpf_tramp_prog_type kind;
@@ -851,7 +872,7 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link,
} else {
tr->progs_cnt[kind]++;
}
- err = bpf_trampoline_update(tr, true /* lock_direct_mutex */);
+ err = bpf_trampoline_update(tr, true /* lock_direct_mutex */, ops, data);
if (err) {
hlist_del_init(&link->tramp_hlist);
if (kind == BPF_TRAMP_FSESSION) {
@@ -872,14 +893,16 @@ int bpf_trampoline_link_prog(struct bpf_tramp_link *link,
int err;
trampoline_lock(tr);
- err = __bpf_trampoline_link_prog(link, tr, tgt_prog);
+ err = __bpf_trampoline_link_prog(link, tr, tgt_prog, &trampoline_ops, NULL);
trampoline_unlock(tr);
return err;
}
static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
struct bpf_trampoline *tr,
- struct bpf_prog *tgt_prog)
+ struct bpf_prog *tgt_prog,
+ struct bpf_trampoline_ops *ops,
+ void *data)
{
enum bpf_tramp_prog_type kind;
int err;
@@ -904,7 +927,7 @@ static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
}
hlist_del_init(&link->tramp_hlist);
tr->progs_cnt[kind]--;
- return bpf_trampoline_update(tr, true /* lock_direct_mutex */);
+ return bpf_trampoline_update(tr, true /* lock_direct_mutex */, ops, data);
}
/* bpf_trampoline_unlink_prog() should never fail. */
@@ -915,7 +938,7 @@ int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
int err;
trampoline_lock(tr);
- err = __bpf_trampoline_unlink_prog(link, tr, tgt_prog);
+ err = __bpf_trampoline_unlink_prog(link, tr, tgt_prog, &trampoline_ops, NULL);
trampoline_unlock(tr);
return err;
}
@@ -1047,7 +1070,7 @@ int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
goto err;
}
- err = __bpf_trampoline_link_prog(&shim_link->link, tr, NULL);
+ err = __bpf_trampoline_link_prog(&shim_link->link, tr, NULL, &trampoline_ops, NULL);
if (err)
goto err;
--
2.53.0
^ permalink raw reply related
* [PATCHv2 bpf-next 02/23] bpf: Use mutex lock pool for bpf trampolines
From: Jiri Olsa @ 2026-03-04 22:21 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
Cc: bpf, linux-trace-kernel, Martin KaFai Lau, Eduard Zingerman,
Song Liu, Yonghong Song, Menglong Dong, Steven Rostedt
In-Reply-To: <20260304222141.497203-1-jolsa@kernel.org>
Adding mutex lock pool that replaces bpf trampolines mutex.
For tracing_multi link coming in following changes we need to lock all
the involved trampolines during the attachment. This could mean thousands
of mutex locks, which is not convenient.
As suggested by Andrii we can replace bpf trampolines mutex with mutex
pool, where each trampoline is hash-ed to one of the locks from the pool.
It's better to lock all the pool mutexes (32 at the moment) than
thousands of them.
There is 48 (MAX_LOCK_DEPTH) lock limit allowed to be simultaneously
held by task, so we need to keep 32 mutexes (5 bits) in the pool, so
when we lock them all in following changes the lockdep won't scream.
Removing the mutex_is_locked in bpf_trampoline_put, because we removed
the mutex from bpf_trampoline.
Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
include/linux/bpf.h | 2 --
kernel/bpf/trampoline.c | 75 ++++++++++++++++++++++++++++-------------
2 files changed, 52 insertions(+), 25 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 05b34a6355b0..1d900f49aff5 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1335,8 +1335,6 @@ struct bpf_trampoline {
/* hlist for trampoline_ip_table */
struct hlist_node hlist_ip;
struct ftrace_ops *fops;
- /* serializes access to fields of this trampoline */
- struct mutex mutex;
refcount_t refcnt;
u32 flags;
u64 key;
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 84db9e658e52..28a5a96bccec 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -30,6 +30,34 @@ static struct hlist_head trampoline_ip_table[TRAMPOLINE_TABLE_SIZE];
/* serializes access to trampoline tables */
static DEFINE_MUTEX(trampoline_mutex);
+/*
+ * We keep 32 trampoline locks (5 bits) in the pool, because there
+ * is 48 (MAX_LOCK_DEPTH) locks limit allowed to be simultaneously
+ * held by task.
+ */
+#define TRAMPOLINE_LOCKS_BITS 5
+#define TRAMPOLINE_LOCKS_TABLE_SIZE (1 << TRAMPOLINE_LOCKS_BITS)
+
+static struct {
+ struct mutex mutex;
+ struct lock_class_key key;
+} trampoline_locks[TRAMPOLINE_LOCKS_TABLE_SIZE];
+
+static struct mutex *select_trampoline_lock(struct bpf_trampoline *tr)
+{
+ return &trampoline_locks[hash_64((u64)(uintptr_t) tr, TRAMPOLINE_LOCKS_BITS)].mutex;
+}
+
+static void trampoline_lock(struct bpf_trampoline *tr)
+{
+ mutex_lock(select_trampoline_lock(tr));
+}
+
+static void trampoline_unlock(struct bpf_trampoline *tr)
+{
+ mutex_unlock(select_trampoline_lock(tr));
+}
+
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex);
@@ -69,9 +97,9 @@ static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, unsigned long ip,
if (cmd == FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF) {
/* This is called inside register_ftrace_direct_multi(), so
- * tr->mutex is already locked.
+ * trampoline's mutex is already locked.
*/
- lockdep_assert_held_once(&tr->mutex);
+ lockdep_assert_held_once(select_trampoline_lock(tr));
/* Instead of updating the trampoline here, we propagate
* -EAGAIN to register_ftrace_direct(). Then we can
@@ -91,7 +119,7 @@ static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, unsigned long ip,
}
/* The normal locking order is
- * tr->mutex => direct_mutex (ftrace.c) => ftrace_lock (ftrace.c)
+ * select_trampoline_lock(tr) => direct_mutex (ftrace.c) => ftrace_lock (ftrace.c)
*
* The following two commands are called from
*
@@ -99,12 +127,12 @@ static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, unsigned long ip,
* cleanup_direct_functions_after_ipmodify
*
* In both cases, direct_mutex is already locked. Use
- * mutex_trylock(&tr->mutex) to avoid deadlock in race condition
+ * mutex_trylock(select_trampoline_lock(tr)) to avoid deadlock in race condition
* (something else is making changes to this same trampoline).
*/
- if (!mutex_trylock(&tr->mutex)) {
- /* sleep 1 ms to make sure whatever holding tr->mutex makes
- * some progress.
+ if (!mutex_trylock(select_trampoline_lock(tr))) {
+ /* sleep 1 ms to make sure whatever holding select_trampoline_lock(tr)
+ * makes some progress.
*/
msleep(1);
return -EAGAIN;
@@ -129,7 +157,7 @@ static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, unsigned long ip,
break;
}
- mutex_unlock(&tr->mutex);
+ trampoline_unlock(tr);
return ret;
}
#endif
@@ -359,7 +387,6 @@ static struct bpf_trampoline *bpf_trampoline_lookup(u64 key, unsigned long ip)
head = &trampoline_ip_table[hash_64(tr->ip, TRAMPOLINE_HASH_BITS)];
hlist_add_head(&tr->hlist_ip, head);
refcount_set(&tr->refcnt, 1);
- mutex_init(&tr->mutex);
for (i = 0; i < BPF_TRAMP_MAX; i++)
INIT_HLIST_HEAD(&tr->progs_hlist[i]);
out:
@@ -844,9 +871,9 @@ int bpf_trampoline_link_prog(struct bpf_tramp_link *link,
{
int err;
- mutex_lock(&tr->mutex);
+ trampoline_lock(tr);
err = __bpf_trampoline_link_prog(link, tr, tgt_prog);
- mutex_unlock(&tr->mutex);
+ trampoline_unlock(tr);
return err;
}
@@ -887,9 +914,9 @@ int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
{
int err;
- mutex_lock(&tr->mutex);
+ trampoline_lock(tr);
err = __bpf_trampoline_unlink_prog(link, tr, tgt_prog);
- mutex_unlock(&tr->mutex);
+ trampoline_unlock(tr);
return err;
}
@@ -999,14 +1026,15 @@ int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
if (!tr)
return -ENOMEM;
- mutex_lock(&tr->mutex);
+ trampoline_lock(tr);
shim_link = cgroup_shim_find(tr, bpf_func);
if (shim_link) {
/* Reusing existing shim attached by the other program. */
bpf_link_inc(&shim_link->link.link);
- mutex_unlock(&tr->mutex);
+ trampoline_unlock(tr);
+
bpf_trampoline_put(tr); /* bpf_trampoline_get above */
return 0;
}
@@ -1026,16 +1054,16 @@ int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
shim_link->trampoline = tr;
/* note, we're still holding tr refcnt from above */
- mutex_unlock(&tr->mutex);
+ trampoline_unlock(tr);
return 0;
err:
- mutex_unlock(&tr->mutex);
+ trampoline_unlock(tr);
if (shim_link)
bpf_link_put(&shim_link->link.link);
- /* have to release tr while _not_ holding its mutex */
+ /* have to release tr while _not_ holding pool mutex for trampoline */
bpf_trampoline_put(tr); /* bpf_trampoline_get above */
return err;
@@ -1056,9 +1084,9 @@ void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog)
if (WARN_ON_ONCE(!tr))
return;
- mutex_lock(&tr->mutex);
+ trampoline_lock(tr);
shim_link = cgroup_shim_find(tr, bpf_func);
- mutex_unlock(&tr->mutex);
+ trampoline_unlock(tr);
if (shim_link)
bpf_link_put(&shim_link->link.link);
@@ -1076,14 +1104,14 @@ struct bpf_trampoline *bpf_trampoline_get(u64 key,
if (!tr)
return NULL;
- mutex_lock(&tr->mutex);
+ trampoline_lock(tr);
if (tr->func.addr)
goto out;
memcpy(&tr->func.model, &tgt_info->fmodel, sizeof(tgt_info->fmodel));
tr->func.addr = (void *)tgt_info->tgt_addr;
out:
- mutex_unlock(&tr->mutex);
+ trampoline_unlock(tr);
return tr;
}
@@ -1096,7 +1124,6 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
mutex_lock(&trampoline_mutex);
if (!refcount_dec_and_test(&tr->refcnt))
goto out;
- WARN_ON_ONCE(mutex_is_locked(&tr->mutex));
for (i = 0; i < BPF_TRAMP_MAX; i++)
if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[i])))
@@ -1382,6 +1409,8 @@ static int __init init_trampolines(void)
INIT_HLIST_HEAD(&trampoline_key_table[i]);
for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++)
INIT_HLIST_HEAD(&trampoline_ip_table[i]);
+ for (i = 0; i < TRAMPOLINE_LOCKS_TABLE_SIZE; i++)
+ __mutex_init(&trampoline_locks[i].mutex, "trampoline_lock", &trampoline_locks[i].key);
return 0;
}
late_initcall(init_trampolines);
--
2.53.0
^ permalink raw reply related
* [PATCHv2 bpf-next 01/23] ftrace: Add ftrace_hash_count function
From: Jiri Olsa @ 2026-03-04 22:21 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
Cc: bpf, linux-trace-kernel, Martin KaFai Lau, Eduard Zingerman,
Song Liu, Yonghong Song, Menglong Dong, Steven Rostedt
In-Reply-To: <20260304222141.497203-1-jolsa@kernel.org>
Adding external ftrace_hash_count function so we could get hash
count outside of ftrace object.
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
include/linux/ftrace.h | 1 +
kernel/trace/ftrace.c | 7 ++++++-
2 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index c242fe49af4c..401f8dfd05d3 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -415,6 +415,7 @@ struct ftrace_hash *alloc_ftrace_hash(int size_bits);
void free_ftrace_hash(struct ftrace_hash *hash);
struct ftrace_func_entry *add_ftrace_hash_entry_direct(struct ftrace_hash *hash,
unsigned long ip, unsigned long direct);
+unsigned long ftrace_hash_count(struct ftrace_hash *hash);
/* The hash used to know what functions callbacks trace */
struct ftrace_ops_hash {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 8baf61c9be6d..ac06bf17caaf 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -6288,11 +6288,16 @@ int modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
}
EXPORT_SYMBOL_GPL(modify_ftrace_direct);
-static unsigned long hash_count(struct ftrace_hash *hash)
+static inline unsigned long hash_count(struct ftrace_hash *hash)
{
return hash ? hash->count : 0;
}
+unsigned long ftrace_hash_count(struct ftrace_hash *hash)
+{
+ return hash_count(hash);
+}
+
/**
* hash_add - adds two struct ftrace_hash and returns the result
* @a: struct ftrace_hash object
--
2.53.0
^ permalink raw reply related
* [PATCHv2 bpf-next 00/23] bpf: tracing_multi link
From: Jiri Olsa @ 2026-03-04 22:21 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
Cc: bpf, linux-trace-kernel, Martin KaFai Lau, Eduard Zingerman,
Song Liu, Yonghong Song, Menglong Dong, Steven Rostedt
hi,
adding tracing_multi link support that allows fast attachment
of tracing program to many functions.
RFC version: https://lore.kernel.org/bpf/20260203093819.2105105-1-jolsa@kernel.org/
v1: https://lore.kernel.org/bpf/20260220100649.628307-1-jolsa@kernel.org/
v2 changes:
- allocate data.unreg in bpf_trampoline_multi_attach for rollback path [ci]
and fixed link count setup in rollback path [ci]
- several small assorted fixes [ci]
- added loongarch and powerpc changes for struct bpf_tramp_node change
- added support to attach functions from modules
- added tests for sleepable programs
- added rollback tests
v1 changes:
- added ftrace_hash_count as wrapper for hash_count [Steven]
- added trampoline mutex pool [Andrii]
- reworked 'struct bpf_tramp_node' separatoin [Andrii]
- the 'struct bpf_tramp_node' now holds pointer to bpf_link,
which is similar to what we do for uprobe_multi;
I understand it's not a fundamental change compared to previous
version which used bpf_prog pointer instead, but I don't see better
way of doing this.. I'm happy to discuss this further if there's
better idea
- reworked 'struct bpf_fsession_link' based on bpf_tramp_node
- made btf__find_by_glob_kind function internal helper [Andrii]
- many small assorted fixes [Andrii,CI]
- added session support [Leon Hwang]
- added cookies support
- added more tests
Note I plan to send linkinfo support separately, the patchset is big enough.
thanks,
jirka
---
Jiri Olsa (23):
ftrace: Add ftrace_hash_count function
bpf: Use mutex lock pool for bpf trampolines
bpf: Add struct bpf_trampoline_ops object
bpf: Add struct bpf_tramp_node object
bpf: Factor fsession link to use struct bpf_tramp_node
bpf: Add multi tracing attach types
bpf: Move sleepable verification code to btf_id_allow_sleepable
bpf: Add bpf_trampoline_multi_attach/detach functions
bpf: Add support for tracing multi link
bpf: Add support for tracing_multi link cookies
bpf: Add support for tracing_multi link session
bpf: Add support for tracing_multi link fdinfo
libbpf: Add bpf_object_cleanup_btf function
libbpf: Add bpf_link_create support for tracing_multi link
libbpf: Add support to create tracing multi link
selftests/bpf: Add tracing multi skel/pattern/ids attach tests
selftests/bpf: Add tracing multi skel/pattern/ids module attach tests
selftests/bpf: Add tracing multi intersect tests
selftests/bpf: Add tracing multi cookies test
selftests/bpf: Add tracing multi session test
selftests/bpf: Add tracing multi attach fails test
selftests/bpf: Add tracing multi attach benchmark test
selftests/bpf: Add tracing multi rollback tests
arch/arm64/net/bpf_jit_comp.c | 58 +++---
arch/loongarch/net/bpf_jit.c | 44 ++---
arch/powerpc/net/bpf_jit_comp.c | 46 ++---
arch/riscv/net/bpf_jit_comp64.c | 52 ++---
arch/s390/net/bpf_jit_comp.c | 44 ++---
arch/x86/net/bpf_jit_comp.c | 54 +++---
include/linux/bpf.h | 91 ++++++---
include/linux/bpf_types.h | 1 +
include/linux/bpf_verifier.h | 3 +
include/linux/btf_ids.h | 1 +
include/linux/ftrace.h | 1 +
include/linux/trace_events.h | 6 +
include/uapi/linux/bpf.h | 9 +
kernel/bpf/bpf_struct_ops.c | 27 +--
kernel/bpf/btf.c | 4 +
kernel/bpf/syscall.c | 88 ++++++---
kernel/bpf/trampoline.c | 511 +++++++++++++++++++++++++++++++++++++++---------
kernel/bpf/verifier.c | 116 +++++++----
kernel/trace/bpf_trace.c | 147 +++++++++++++-
kernel/trace/ftrace.c | 7 +-
net/bpf/bpf_dummy_struct_ops.c | 14 +-
net/bpf/test_run.c | 3 +
tools/include/uapi/linux/bpf.h | 10 +
tools/lib/bpf/bpf.c | 9 +
tools/lib/bpf/bpf.h | 5 +
tools/lib/bpf/libbpf.c | 329 ++++++++++++++++++++++++++++++-
tools/lib/bpf/libbpf.h | 15 ++
tools/lib/bpf/libbpf.map | 1 +
tools/testing/selftests/bpf/Makefile | 9 +-
tools/testing/selftests/bpf/prog_tests/tracing_multi.c | 896 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
tools/testing/selftests/bpf/progs/tracing_multi_attach.c | 40 ++++
tools/testing/selftests/bpf/progs/tracing_multi_attach_module.c | 26 +++
tools/testing/selftests/bpf/progs/tracing_multi_bench.c | 13 ++
tools/testing/selftests/bpf/progs/tracing_multi_check.c | 215 ++++++++++++++++++++
tools/testing/selftests/bpf/progs/tracing_multi_fail.c | 19 ++
tools/testing/selftests/bpf/progs/tracing_multi_intersect_attach.c | 42 ++++
tools/testing/selftests/bpf/progs/tracing_multi_rollback.c | 25 +++
tools/testing/selftests/bpf/progs/tracing_multi_session_attach.c | 27 +++
38 files changed, 2642 insertions(+), 366 deletions(-)
create mode 100644 tools/testing/selftests/bpf/prog_tests/tracing_multi.c
create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_attach.c
create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_attach_module.c
create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_bench.c
create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_check.c
create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_fail.c
create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_intersect_attach.c
create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_rollback.c
create mode 100644 tools/testing/selftests/bpf/progs/tracing_multi_session_attach.c
^ permalink raw reply
* Re: [PATCH RFC 2/3] locking/percpu-rwsem: Extract __percpu_up_read_slowpath()
From: Peter Zijlstra @ 2026-03-04 22:02 UTC (permalink / raw)
To: Dmitry Ilvokhin
Cc: Dennis Zhou, Tejun Heo, Christoph Lameter, Steven Rostedt,
Masami Hiramatsu, Mathieu Desnoyers, Ingo Molnar, Will Deacon,
Boqun Feng, Waiman Long, linux-mm, linux-kernel,
linux-trace-kernel, kernel-team
In-Reply-To: <6b1f1521ca186d5c402a65619d8f30fe83b93bf6.1772642407.git.d@ilvokhin.com>
On Wed, Mar 04, 2026 at 04:56:16PM +0000, Dmitry Ilvokhin wrote:
> Move the percpu_up_read() slowpath out of the inline function into a new
> __percpu_up_read_slowpath() to avoid binary size increase from adding a
> tracepoint to an inlined function.
>
> Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
> ---
> include/linux/percpu-rwsem.h | 15 +++------------
> kernel/locking/percpu-rwsem.c | 18 ++++++++++++++++++
> 2 files changed, 21 insertions(+), 12 deletions(-)
>
> diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
> index c8cb010d655e..89506895365c 100644
> --- a/include/linux/percpu-rwsem.h
> +++ b/include/linux/percpu-rwsem.h
> @@ -107,6 +107,8 @@ static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
> return ret;
> }
>
> +void __percpu_up_read_slowpath(struct percpu_rw_semaphore *sem);
> +
extern for consistency with all the other declarations in this header.
s/_slowpath//, the corresponding down function also doesn't have
_slowpath on.
> static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
> {
> rwsem_release(&sem->dep_map, _RET_IP_);
^ permalink raw reply
* Re: [RFC PATCH 1/2] locking: add mutex_lock_nospin()
From: David Laight @ 2026-03-04 21:44 UTC (permalink / raw)
To: Steven Rostedt
Cc: Peter Zijlstra, Yafang Shao, mingo, will, boqun, longman,
mhiramat, mark.rutland, mathieu.desnoyers, linux-kernel,
linux-trace-kernel, bpf
In-Reply-To: <20260304155742.7b4de2d1@gandalf.local.home>
On Wed, 4 Mar 2026 15:57:42 -0500
Steven Rostedt <rostedt@goodmis.org> wrote:
> On Wed, 4 Mar 2026 09:54:15 +0000
> David Laight <david.laight.linux@gmail.com> wrote:
>
> > That might still be an issue if a high priority process is spinning.
> > But a %sys spike doesn't imply a latency spike.
> >
> > Is this using the osq_lock.c code?
> > That will have problems on overprovisioned VMs, it tries to find out
> > whether the hypervisor has switched out - but ISTR that is flawed.
> >
> > In reality a spin lock shouldn't be held for long enough to cause
> > any kind latency issue.
> > So something in the code that reads the list of filter functions
> > needs to be done differently so that the lock isn't held for as long.
>
> It's not a spinlock, it's an adaptive mutex which spins while the owner of
> the mutex is also still running on the CPU. If the spinner CPU triggers a
> NEED_RESCHED or the owner goes to sleep, the spinner stops spinning and
> goes to sleep too.
I think half my brain knew that - otherwise I wouldn't have mentioned
the osq_lock.c code.
That all reminded me I've a patch that optimises that code a bit.
But I do remember thinking it ought to have a 'I been spinning long
enough, time to sleep' path.
David
>
> Honestly, this still looks like a non-issue or a corner case that I don't
> think requires these changes.
>
> This looks like one of those "Patient: Doctor it hurts me when I do this.
> Doctor: Then don't do that." cases.
>
> Why is a production system having multiple users cat
> avaliable_filter_functions to begin with?
>
> -- Steve
^ permalink raw reply
* Re: [PATCH v3 08/12] zonefs: widen trace event i_ino fields to u64
From: Damien Le Moal @ 2026-03-04 21:41 UTC (permalink / raw)
To: Jeff Layton, Alexander Viro, Christian Brauner, Jan Kara,
Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, Dan Williams,
Eric Biggers, Theodore Y. Ts'o, Muchun Song, Oscar Salvador,
David Hildenbrand, David Howells, Paulo Alcantara, Andreas Dilger,
Jan Kara, Jaegeuk Kim, Chao Yu, Trond Myklebust, Anna Schumaker,
Chuck Lever, NeilBrown, Olga Kornievskaia, Dai Ngo, Tom Talpey,
Steve French, Ronnie Sahlberg, Shyam Prasad N, Bharath SM,
Alexander Aring, Ryusuke Konishi, Viacheslav Dubeyko,
Eric Van Hensbergen, Latchesar Ionkov, Dominique Martinet,
Christian Schoenebeck, David Sterba, Marc Dionne, Ian Kent,
Luis de Bethencourt, Salah Triki, Tigran A. Aivazian,
Ilya Dryomov, Alex Markuze, Jan Harkes, coda, Nicolas Pitre,
Tyler Hicks, Amir Goldstein, Christoph Hellwig,
John Paul Adrian Glaubitz, Yangtao Li, Mikulas Patocka,
David Woodhouse, Richard Weinberger, Dave Kleikamp,
Konstantin Komarov, Mark Fasheh, Joel Becker, Joseph Qi,
Mike Marshall, Martin Brandenburg, Miklos Szeredi, Anders Larsen,
Zhihao Cheng, Naohiro Aota, Johannes Thumshirn, John Johansen,
Paul Moore, James Morris, Serge E. Hallyn, Mimi Zohar,
Roberto Sassu, Dmitry Kasatkin, Eric Snowberg, Fan Wu,
Stephen Smalley, Ondrej Mosnacek, Casey Schaufler, Alex Deucher,
Christian König, David Airlie, Simona Vetter, Sumit Semwal,
Eric Dumazet, Kuniyuki Iwashima, Paolo Abeni, Willem de Bruijn,
David S. Miller, Jakub Kicinski, Simon Horman, Oleg Nesterov,
Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
Ian Rogers, Adrian Hunter, James Clark, Darrick J. Wong,
Martin Schiller, Eric Paris, Joerg Reuter, Marcel Holtmann,
Johan Hedberg, Luiz Augusto von Dentz, Oliver Hartkopp,
Marc Kleine-Budde, David Ahern, Neal Cardwell, Steffen Klassert,
Herbert Xu, Remi Denis-Courmont, Marcelo Ricardo Leitner,
Xin Long, Magnus Karlsson, Maciej Fijalkowski, Stanislav Fomichev,
Alexei Starovoitov, Daniel Borkmann, Jesper Dangaard Brouer,
John Fastabend
Cc: linux-fsdevel, linux-kernel, linux-trace-kernel, nvdimm, fsverity,
linux-mm, netfs, linux-ext4, linux-f2fs-devel, linux-nfs,
linux-cifs, samba-technical, linux-nilfs, v9fs, linux-afs, autofs,
ceph-devel, codalist, ecryptfs, linux-mtd, jfs-discussion, ntfs3,
ocfs2-devel, devel, linux-unionfs, apparmor,
linux-security-module, linux-integrity, selinux, amd-gfx,
dri-devel, linux-media, linaro-mm-sig, netdev, linux-perf-users,
linux-fscrypt, linux-xfs, linux-hams, linux-x25, audit,
linux-bluetooth, linux-can, linux-sctp, bpf
In-Reply-To: <20260304-iino-u64-v3-8-2257ad83d372@kernel.org>
On 3/5/26 00:32, Jeff Layton wrote:
> Update zonefs trace event definitions to use u64 instead of
> ino_t/unsigned long for inode number fields.
>
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Damien Le Moal <dlemoal@kernel.org>
--
Damien Le Moal
Western Digital Research
^ permalink raw reply
* Re: [PATCH v3 12/12] treewide: change inode->i_ino from unsigned long to u64
From: Damien Le Moal @ 2026-03-04 21:41 UTC (permalink / raw)
To: Jeff Layton, Alexander Viro, Christian Brauner, Jan Kara,
Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers, Dan Williams,
Eric Biggers, Theodore Y. Ts'o, Muchun Song, Oscar Salvador,
David Hildenbrand, David Howells, Paulo Alcantara, Andreas Dilger,
Jan Kara, Jaegeuk Kim, Chao Yu, Trond Myklebust, Anna Schumaker,
Chuck Lever, NeilBrown, Olga Kornievskaia, Dai Ngo, Tom Talpey,
Steve French, Ronnie Sahlberg, Shyam Prasad N, Bharath SM,
Alexander Aring, Ryusuke Konishi, Viacheslav Dubeyko,
Eric Van Hensbergen, Latchesar Ionkov, Dominique Martinet,
Christian Schoenebeck, David Sterba, Marc Dionne, Ian Kent,
Luis de Bethencourt, Salah Triki, Tigran A. Aivazian,
Ilya Dryomov, Alex Markuze, Jan Harkes, coda, Nicolas Pitre,
Tyler Hicks, Amir Goldstein, Christoph Hellwig,
John Paul Adrian Glaubitz, Yangtao Li, Mikulas Patocka,
David Woodhouse, Richard Weinberger, Dave Kleikamp,
Konstantin Komarov, Mark Fasheh, Joel Becker, Joseph Qi,
Mike Marshall, Martin Brandenburg, Miklos Szeredi, Anders Larsen,
Zhihao Cheng, Naohiro Aota, Johannes Thumshirn, John Johansen,
Paul Moore, James Morris, Serge E. Hallyn, Mimi Zohar,
Roberto Sassu, Dmitry Kasatkin, Eric Snowberg, Fan Wu,
Stephen Smalley, Ondrej Mosnacek, Casey Schaufler, Alex Deucher,
Christian König, David Airlie, Simona Vetter, Sumit Semwal,
Eric Dumazet, Kuniyuki Iwashima, Paolo Abeni, Willem de Bruijn,
David S. Miller, Jakub Kicinski, Simon Horman, Oleg Nesterov,
Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
Ian Rogers, Adrian Hunter, James Clark, Darrick J. Wong,
Martin Schiller, Eric Paris, Joerg Reuter, Marcel Holtmann,
Johan Hedberg, Luiz Augusto von Dentz, Oliver Hartkopp,
Marc Kleine-Budde, David Ahern, Neal Cardwell, Steffen Klassert,
Herbert Xu, Remi Denis-Courmont, Marcelo Ricardo Leitner,
Xin Long, Magnus Karlsson, Maciej Fijalkowski, Stanislav Fomichev,
Alexei Starovoitov, Daniel Borkmann, Jesper Dangaard Brouer,
John Fastabend
Cc: linux-fsdevel, linux-kernel, linux-trace-kernel, nvdimm, fsverity,
linux-mm, netfs, linux-ext4, linux-f2fs-devel, linux-nfs,
linux-cifs, samba-technical, linux-nilfs, v9fs, linux-afs, autofs,
ceph-devel, codalist, ecryptfs, linux-mtd, jfs-discussion, ntfs3,
ocfs2-devel, devel, linux-unionfs, apparmor,
linux-security-module, linux-integrity, selinux, amd-gfx,
dri-devel, linux-media, linaro-mm-sig, netdev, linux-perf-users,
linux-fscrypt, linux-xfs, linux-hams, linux-x25, audit,
linux-bluetooth, linux-can, linux-sctp, bpf
In-Reply-To: <20260304-iino-u64-v3-12-2257ad83d372@kernel.org>
On 3/5/26 00:32, Jeff Layton wrote:
> On 32-bit architectures, unsigned long is only 32 bits wide, which
> causes 64-bit inode numbers to be silently truncated. Several
> filesystems (NFS, XFS, BTRFS, etc.) can generate inode numbers that
> exceed 32 bits, and this truncation can lead to inode number collisions
> and other subtle bugs on 32-bit systems.
>
> Change the type of inode->i_ino from unsigned long to u64 to ensure that
> inode numbers are always represented as 64-bit values regardless of
> architecture. Update all format specifiers treewide from %lu/%lx to
> %llu/%llx to match the new type, along with corresponding local variable
> types.
>
> This is the bulk treewide conversion. Earlier patches in this series
> handled trace events separately to allow trace field reordering for
> better struct packing on 32-bit.
>
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
For the zonefs bits:
Acked-by: Damien Le Moal <dlemoal@kernel.org>
--
Damien Le Moal
Western Digital Research
^ permalink raw reply
* Re: [RFC PATCH 1/2] locking: add mutex_lock_nospin()
From: Steven Rostedt @ 2026-03-04 20:57 UTC (permalink / raw)
To: David Laight
Cc: Peter Zijlstra, Yafang Shao, mingo, will, boqun, longman,
mhiramat, mark.rutland, mathieu.desnoyers, linux-kernel,
linux-trace-kernel, bpf
In-Reply-To: <20260304095415.4d5f2528@pumpkin>
On Wed, 4 Mar 2026 09:54:15 +0000
David Laight <david.laight.linux@gmail.com> wrote:
> That might still be an issue if a high priority process is spinning.
> But a %sys spike doesn't imply a latency spike.
>
> Is this using the osq_lock.c code?
> That will have problems on overprovisioned VMs, it tries to find out
> whether the hypervisor has switched out - but ISTR that is flawed.
>
> In reality a spin lock shouldn't be held for long enough to cause
> any kind latency issue.
> So something in the code that reads the list of filter functions
> needs to be done differently so that the lock isn't held for as long.
It's not a spinlock, it's an adaptive mutex which spins while the owner of
the mutex is also still running on the CPU. If the spinner CPU triggers a
NEED_RESCHED or the owner goes to sleep, the spinner stops spinning and
goes to sleep too.
Honestly, this still looks like a non-issue or a corner case that I don't
think requires these changes.
This looks like one of those "Patient: Doctor it hurts me when I do this.
Doctor: Then don't do that." cases.
Why is a production system having multiple users cat
avaliable_filter_functions to begin with?
-- Steve
^ permalink raw reply
* Re: [PATCH] tracing: Fix WARN_ON in tracing_buffers_mmap_close
From: Lorenzo Stoakes (Oracle) @ 2026-03-04 17:30 UTC (permalink / raw)
To: Steven Rostedt
Cc: Lorenzo Stoakes, Vincent Donnefort, Qing Wang, Masami Hiramatsu,
Mathieu Desnoyers, linux-kernel, linux-trace-kernel,
syzbot+3b5dd2030fe08afdf65d, linux-mm, Andrew Morton,
Vlastimil Babka, David Hildenbrand
In-Reply-To: <20260303102528.744d57ae@gandalf.local.home>
On Tue, Mar 03, 2026 at 10:25:28AM -0500, Steven Rostedt wrote:
> On Tue, 3 Mar 2026 10:19:52 +0000
> Lorenzo Stoakes <lorenzo.stoakes@oracle.com> wrote:
>
> > > > Setting VM_IO just to trigger a failure case in madvise() feels like a hack? I
> > > > guess it'd do the trick though, but you're not going to be able to reclaim that
> > > > memory, and you might get some unexpected behaviour in code paths that assume
> > > > VM_IO means it's memory-mapped I/O... (for instance GUP will stop working, if
> > > > you need that).
> > >
> > > Well, we don't reclaim that memory anyway.
> >
> > OK so maybe not such an issue then! As long as GUP not working with it wouldn't
> > break anything?
>
> Yeah, we don't ever use get_user_page() for this memory. It's pretty much
> all handled on the kernel side. User space just has it mapped as read only.
> The kernel doesn't care what user space does with it.
OK.
>
>
> > > Yeah, right now the accounting gets screwed up as the mappings get out of
> > > sync when it is forked.
> >
> > Ack, is that in a way that could screw up things from a kernel point of view at
> > all? Presumably there was some report that triggered this work, like an assert
> > fail or something, or a warning?
>
> Yes, it triggered a warning. The start of this thread added a patch to allow
> madvise to remove DONTCOPY from this memory without screwing things up
> (by adding an open() handler for the vm_operations_struct).
I mean that'd be preferable.
>
>
> >
> > If a user is explicitly going to an ftrace buffer and madvise()'ing it in random
> > ways they've only themselves to blame for being so stupid :)
> >
> > As long as it's not exploitable in some way I don't think that's too much of an
> > issue?
> >
> > It'd be nice to keep the semantics of 'don't copy on fork' if we could, even if
> > some crazy users might override it with madvise().
>
> OK, so should we add the VM_IO flag?
Would be preferable not to lie about the mapping if possible :P that'd be a hack.
>
> >
> > Kind of separate from the question, but I mean if it's kernel-allocated memory
> > which you're managing separately it should surely be VM_PFNMAP?
>
> OK, I'm a bit confused. What do you mean "managing separately"? You mean
> managing the user space side of things? if so then yes.
>
> Hmm, I haven't heard of VM_PFNMAP, can you explain more its use case.
means either no struct page (e.g. could be I/O mapped device memory) or 'don't
touch struct page it's not for userspace' e.g. kernel allocation.
>
> >
> > It depends if you want to have a refcounted folio underneath though. If you do
> > then yeah don't do that :)
>
> I have no idea what a refcounted folio would do here :-p
Well you are currently treating this as a userland folio.
>
> >
> > In general I'd suggest supporting the fork case if you can, or just let things
> > be wrong if a user does something crazy and undoes the VM_DONTCOPY flag.
>
> OK, so don't add these flags and just allow the forking to happen as this
> patch does (if they screw it up, it's their problem).
>
> This patch is here:
>
> https://lore.kernel.org/all/20260227025842.1085206-1-wangqing7171@gmail.com/
>
> I mean, we allow two separate tasks to mmap the same buffer, and they will
> have the same issues as a fork would have. Thus, I guess the answer is to
> apply this patch?
Well note that vm_ops->open is also called on splitting a VMA (ok I guess you
disable this I think you said) and also mremap()'ing (before it removes the old
VMA, if MREMAP_DONTUNMAP is not set).
Probably all that's fine right? If so then good, and yeah something not-VM_IO
would be best I think!
>
> -- Steve
Cheers, Lorenzo
^ permalink raw reply
* [PATCH RFC 3/3] locking: Wire up contended_release tracepoint
From: Dmitry Ilvokhin @ 2026-03-04 16:56 UTC (permalink / raw)
To: Dennis Zhou, Tejun Heo, Christoph Lameter, Steven Rostedt,
Masami Hiramatsu, Mathieu Desnoyers, Peter Zijlstra, Ingo Molnar,
Will Deacon, Boqun Feng, Waiman Long
Cc: linux-mm, linux-kernel, linux-trace-kernel, kernel-team,
Dmitry Ilvokhin
In-Reply-To: <cover.1772642407.git.d@ilvokhin.com>
Add trace_contended_release() calls to the slowpath unlock paths of
sleepable locks: mutex, rtmutex, semaphore, rwsem, percpu-rwsem, and
RT-specific rwbase locks. Each call site fires only when there are
blocked waiters being woken, except percpu_up_write() which always wakes
via __wake_up().
Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
---
kernel/locking/mutex.c | 1 +
kernel/locking/percpu-rwsem.c | 3 +++
kernel/locking/rtmutex.c | 1 +
kernel/locking/rwbase_rt.c | 8 +++++++-
kernel/locking/rwsem.c | 9 +++++++--
kernel/locking/semaphore.c | 4 +++-
6 files changed, 22 insertions(+), 4 deletions(-)
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index c867f6c15530..54ca045987a2 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -970,6 +970,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
next = waiter->task;
+ trace_contended_release(lock);
debug_mutex_wake_waiter(lock, waiter);
__clear_task_blocked_on(next, lock);
wake_q_add(&wake_q, next);
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 4190635458da..0f2e8e63d252 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -263,6 +263,8 @@ void percpu_up_write(struct percpu_rw_semaphore *sem)
{
rwsem_release(&sem->dep_map, _RET_IP_);
+ trace_contended_release(sem);
+
/*
* Signal the writer is done, no fast path yet.
*
@@ -297,6 +299,7 @@ void __percpu_up_read_slowpath(struct percpu_rw_semaphore *sem)
* writer.
*/
smp_mb(); /* B matches C */
+ trace_contended_release(sem);
/*
* In other words, if they see our decrement (presumably to
* aggregate zero, as that is the only time it matters) they
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index c80902eacd79..e0873f0ed982 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1457,6 +1457,7 @@ static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock)
raw_spin_lock_irqsave(&lock->wait_lock, flags);
}
+ trace_contended_release(lock);
/*
* The wakeup next waiter path does not suffer from the above
* race. See the comments there.
diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c
index 9f4322c07486..42f3658c0059 100644
--- a/kernel/locking/rwbase_rt.c
+++ b/kernel/locking/rwbase_rt.c
@@ -162,8 +162,10 @@ static void __sched __rwbase_read_unlock(struct rwbase_rt *rwb,
* worst case which can happen is a spurious wakeup.
*/
owner = rt_mutex_owner(rtm);
- if (owner)
+ if (owner) {
+ trace_contended_release(rwb);
rt_mutex_wake_q_add_task(&wqh, owner, state);
+ }
/* Pairs with the preempt_enable in rt_mutex_wake_up_q() */
preempt_disable();
@@ -204,6 +206,8 @@ static inline void rwbase_write_unlock(struct rwbase_rt *rwb)
unsigned long flags;
raw_spin_lock_irqsave(&rtm->wait_lock, flags);
+ if (rt_mutex_has_waiters(rtm))
+ trace_contended_release(rwb);
__rwbase_write_unlock(rwb, WRITER_BIAS, flags);
}
@@ -213,6 +217,8 @@ static inline void rwbase_write_downgrade(struct rwbase_rt *rwb)
unsigned long flags;
raw_spin_lock_irqsave(&rtm->wait_lock, flags);
+ if (rt_mutex_has_waiters(rtm))
+ trace_contended_release(rwb);
/* Release it and account current as reader */
__rwbase_write_unlock(rwb, WRITER_BIAS - 1, flags);
}
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 24df4d98f7d2..4e61dc0bb045 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -1360,6 +1360,7 @@ static inline void __up_read(struct rw_semaphore *sem)
if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) ==
RWSEM_FLAG_WAITERS)) {
clear_nonspinnable(sem);
+ trace_contended_release(sem);
rwsem_wake(sem);
}
preempt_enable();
@@ -1383,8 +1384,10 @@ static inline void __up_write(struct rw_semaphore *sem)
preempt_disable();
rwsem_clear_owner(sem);
tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count);
- if (unlikely(tmp & RWSEM_FLAG_WAITERS))
+ if (unlikely(tmp & RWSEM_FLAG_WAITERS)) {
+ trace_contended_release(sem);
rwsem_wake(sem);
+ }
preempt_enable();
}
@@ -1407,8 +1410,10 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
tmp = atomic_long_fetch_add_release(
-RWSEM_WRITER_LOCKED+RWSEM_READER_BIAS, &sem->count);
rwsem_set_reader_owned(sem);
- if (tmp & RWSEM_FLAG_WAITERS)
+ if (tmp & RWSEM_FLAG_WAITERS) {
+ trace_contended_release(sem);
rwsem_downgrade_wake(sem);
+ }
preempt_enable();
}
diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c
index 3ef032e22f7e..3cef5ba88f7e 100644
--- a/kernel/locking/semaphore.c
+++ b/kernel/locking/semaphore.c
@@ -231,8 +231,10 @@ void __sched up(struct semaphore *sem)
else
__up(sem, &wake_q);
raw_spin_unlock_irqrestore(&sem->lock, flags);
- if (!wake_q_empty(&wake_q))
+ if (!wake_q_empty(&wake_q)) {
+ trace_contended_release(sem);
wake_up_q(&wake_q);
+ }
}
EXPORT_SYMBOL(up);
--
2.47.3
^ permalink raw reply related
* [PATCH RFC 2/3] locking/percpu-rwsem: Extract __percpu_up_read_slowpath()
From: Dmitry Ilvokhin @ 2026-03-04 16:56 UTC (permalink / raw)
To: Dennis Zhou, Tejun Heo, Christoph Lameter, Steven Rostedt,
Masami Hiramatsu, Mathieu Desnoyers, Peter Zijlstra, Ingo Molnar,
Will Deacon, Boqun Feng, Waiman Long
Cc: linux-mm, linux-kernel, linux-trace-kernel, kernel-team,
Dmitry Ilvokhin
In-Reply-To: <cover.1772642407.git.d@ilvokhin.com>
Move the percpu_up_read() slowpath out of the inline function into a new
__percpu_up_read_slowpath() to avoid binary size increase from adding a
tracepoint to an inlined function.
Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
---
include/linux/percpu-rwsem.h | 15 +++------------
kernel/locking/percpu-rwsem.c | 18 ++++++++++++++++++
2 files changed, 21 insertions(+), 12 deletions(-)
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index c8cb010d655e..89506895365c 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -107,6 +107,8 @@ static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
return ret;
}
+void __percpu_up_read_slowpath(struct percpu_rw_semaphore *sem);
+
static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
{
rwsem_release(&sem->dep_map, _RET_IP_);
@@ -118,18 +120,7 @@ static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
if (likely(rcu_sync_is_idle(&sem->rss))) {
this_cpu_dec(*sem->read_count);
} else {
- /*
- * slowpath; reader will only ever wake a single blocked
- * writer.
- */
- smp_mb(); /* B matches C */
- /*
- * In other words, if they see our decrement (presumably to
- * aggregate zero, as that is the only time it matters) they
- * will also see our critical section.
- */
- this_cpu_dec(*sem->read_count);
- rcuwait_wake_up(&sem->writer);
+ __percpu_up_read_slowpath(sem);
}
preempt_enable();
}
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index ef234469baac..4190635458da 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -288,3 +288,21 @@ void percpu_up_write(struct percpu_rw_semaphore *sem)
rcu_sync_exit(&sem->rss);
}
EXPORT_SYMBOL_GPL(percpu_up_write);
+
+void __percpu_up_read_slowpath(struct percpu_rw_semaphore *sem)
+{
+ lockdep_assert_preemption_disabled();
+ /*
+ * slowpath; reader will only ever wake a single blocked
+ * writer.
+ */
+ smp_mb(); /* B matches C */
+ /*
+ * In other words, if they see our decrement (presumably to
+ * aggregate zero, as that is the only time it matters) they
+ * will also see our critical section.
+ */
+ this_cpu_dec(*sem->read_count);
+ rcuwait_wake_up(&sem->writer);
+}
+EXPORT_SYMBOL_GPL(__percpu_up_read_slowpath);
--
2.47.3
^ permalink raw reply related
* [PATCH RFC 0/3] locking: contended_release tracepoint instrumentation
From: Dmitry Ilvokhin @ 2026-03-04 16:56 UTC (permalink / raw)
To: Dennis Zhou, Tejun Heo, Christoph Lameter, Steven Rostedt,
Masami Hiramatsu, Mathieu Desnoyers, Peter Zijlstra, Ingo Molnar,
Will Deacon, Boqun Feng, Waiman Long
Cc: linux-mm, linux-kernel, linux-trace-kernel, kernel-team,
Dmitry Ilvokhin
The existing contention_begin/contention_end tracepoints fire on the
waiter side. The lock holder's identity and stack can be captured at
contention_begin time (e.g. perf lock contention --lock-owner), but
this reflects the holder's state when a waiter arrives, not when the
lock is actually released.
This series adds a contended_release tracepoint that fires on the
holder side when a lock with waiters is released. This provides:
- Hold time estimation: when the holder's own acquisition was
contended, its contention_end (acquisition) and contended_release
can be correlated to measure how long the lock was held under
contention.
- The holder's stack at release time, which may differ from what perf lock
contention --lock-owner captures if the holder does significant work between
the waiter's arrival and the unlock.
The tracepoint is placed exclusively in slowpath unlock paths, so
there is no performance impact on the uncontended fast path and
expected minimal impact on binary size.
Dmitry Ilvokhin (3):
locking: Add contended_release tracepoint
locking/percpu-rwsem: Extract __percpu_up_read_slowpath()
locking: Wire up contended_release tracepoint
include/linux/percpu-rwsem.h | 15 +++------------
include/trace/events/lock.h | 17 +++++++++++++++++
kernel/locking/mutex.c | 1 +
kernel/locking/percpu-rwsem.c | 21 +++++++++++++++++++++
kernel/locking/rtmutex.c | 1 +
kernel/locking/rwbase_rt.c | 8 +++++++-
kernel/locking/rwsem.c | 9 +++++++--
kernel/locking/semaphore.c | 4 +++-
8 files changed, 60 insertions(+), 16 deletions(-)
--
2.47.3
^ permalink raw reply
* [PATCH RFC 1/3] locking: Add contended_release tracepoint
From: Dmitry Ilvokhin @ 2026-03-04 16:56 UTC (permalink / raw)
To: Dennis Zhou, Tejun Heo, Christoph Lameter, Steven Rostedt,
Masami Hiramatsu, Mathieu Desnoyers, Peter Zijlstra, Ingo Molnar,
Will Deacon, Boqun Feng, Waiman Long
Cc: linux-mm, linux-kernel, linux-trace-kernel, kernel-team,
Dmitry Ilvokhin
In-Reply-To: <cover.1772642407.git.d@ilvokhin.com>
Add the contended_release trace event. This tracepoint fires on the
holder side when a contended lock is released, complementing the
existing contention_begin/contention_end tracepoints which fire on the
waiter side.
This enables correlating lock hold time under contention with waiter
events by lock address. Subsequent patches wire this tracepoint into
the individual lock implementations.
Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
---
include/trace/events/lock.h | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/include/trace/events/lock.h b/include/trace/events/lock.h
index 8e89baa3775f..4f28e41977ec 100644
--- a/include/trace/events/lock.h
+++ b/include/trace/events/lock.h
@@ -138,6 +138,23 @@ TRACE_EVENT(contention_end,
TP_printk("%p (ret=%d)", __entry->lock_addr, __entry->ret)
);
+TRACE_EVENT(contended_release,
+
+ TP_PROTO(void *lock),
+
+ TP_ARGS(lock),
+
+ TP_STRUCT__entry(
+ __field(void *, lock_addr)
+ ),
+
+ TP_fast_assign(
+ __entry->lock_addr = lock;
+ ),
+
+ TP_printk("%p", __entry->lock_addr)
+);
+
#endif /* _TRACE_LOCK_H */
/* This part must be outside protection */
--
2.47.3
^ permalink raw reply related
* [PATCH net-next v3 13/13] net/mlx5: Add a shared devlink instance for PFs on same chip
From: Jiri Pirko @ 2026-03-04 16:00 UTC (permalink / raw)
To: netdev
Cc: davem, edumazet, kuba, pabeni, horms, donald.hunter, corbet,
skhan, saeedm, leon, tariqt, mbloch, przemyslaw.kitszel, mschmidt,
andrew+netdev, rostedt, mhiramat, mathieu.desnoyers, chuck.lever,
matttbe, cjubran, daniel.zahka, linux-doc, linux-rdma,
linux-trace-kernel
In-Reply-To: <20260304160022.6114-1-jiri@resnulli.us>
From: Jiri Pirko <jiri@nvidia.com>
Use the previously introduced shared devlink infrastructure to create
a shared devlink instance for mlx5 PFs that reside on the same physical
chip. The shared instance is identified by the chip's serial number
extracted from PCI VPD (V3 keyword, with fallback to serial number
for older devices).
Each PF that probes calls mlx5_shd_init() which extracts the chip serial
number and uses devlink_shd_get() to get or create the shared instance.
When a PF is removed, mlx5_shd_uninit() calls devlink_shd_put()
to release the reference. The shared instance is automatically destroyed
when the last PF is removed.
Make the PF devlink instances nested in this shared devlink instance,
allowing userspace to identify which PFs belong to the same physical
chip.
Example:
pci/0000:08:00.0: index 0
nested_devlink:
auxiliary/mlx5_core.eth.0
devlink_index/1: index 1
nested_devlink:
pci/0000:08:00.0
pci/0000:08:00.1
auxiliary/mlx5_core.eth.0: index 2
pci/0000:08:00.1: index 3
nested_devlink:
auxiliary/mlx5_core.eth.1
auxiliary/mlx5_core.eth.1: index 4
Signed-off-by: Jiri Pirko <jiri@nvidia.com>
---
v2->v3:
- removed "const" from "sn"
- passing driver pointer to devlink_shd_get()
---
.../net/ethernet/mellanox/mlx5/core/Makefile | 5 +-
.../net/ethernet/mellanox/mlx5/core/main.c | 17 ++++++
.../ethernet/mellanox/mlx5/core/sh_devlink.c | 61 +++++++++++++++++++
.../ethernet/mellanox/mlx5/core/sh_devlink.h | 12 ++++
include/linux/mlx5/driver.h | 1 +
5 files changed, 94 insertions(+), 2 deletions(-)
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.c
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.h
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 8ffa286a18f5..d39fe9c4a87c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -16,8 +16,9 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \
lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \
- diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o diag/reporter_vnic.o \
- fw_reset.o qos.o lib/tout.o lib/aso.o wc.o fs_pool.o lib/nv_param.o
+ diag/fw_tracer.o diag/crdump.o devlink.o sh_devlink.o diag/rsc_dump.o \
+ diag/reporter_vnic.o fw_reset.o qos.o lib/tout.o lib/aso.o wc.o fs_pool.o \
+ lib/nv_param.o
#
# Netdev basic
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index fdc3ba20912e..1c35c3fc3bb3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -74,6 +74,7 @@
#include "mlx5_irq.h"
#include "hwmon.h"
#include "lag/lag.h"
+#include "sh_devlink.h"
MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver");
@@ -1520,10 +1521,16 @@ int mlx5_init_one(struct mlx5_core_dev *dev)
int err;
devl_lock(devlink);
+ if (dev->shd) {
+ err = devl_nested_devlink_set(dev->shd, devlink);
+ if (err)
+ goto unlock;
+ }
devl_register(devlink);
err = mlx5_init_one_devl_locked(dev);
if (err)
devl_unregister(devlink);
+unlock:
devl_unlock(devlink);
return err;
}
@@ -2005,6 +2012,13 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
goto pci_init_err;
}
+ err = mlx5_shd_init(dev);
+ if (err) {
+ mlx5_core_err(dev, "mlx5_shd_init failed with error code %d\n",
+ err);
+ goto shd_init_err;
+ }
+
err = mlx5_init_one(dev);
if (err) {
mlx5_core_err(dev, "mlx5_init_one failed with error code %d\n",
@@ -2018,6 +2032,8 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
return 0;
err_init_one:
+ mlx5_shd_uninit(dev);
+shd_init_err:
mlx5_pci_close(dev);
pci_init_err:
mlx5_mdev_uninit(dev);
@@ -2039,6 +2055,7 @@ static void remove_one(struct pci_dev *pdev)
mlx5_drain_health_wq(dev);
mlx5_sriov_disable(pdev, false);
mlx5_uninit_one(dev);
+ mlx5_shd_uninit(dev);
mlx5_pci_close(dev);
mlx5_mdev_uninit(dev);
mlx5_adev_idx_free(dev->priv.adev_idx);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.c
new file mode 100644
index 000000000000..bc33f95302df
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <linux/mlx5/driver.h>
+#include <net/devlink.h>
+
+#include "sh_devlink.h"
+
+static const struct devlink_ops mlx5_shd_ops = {
+};
+
+int mlx5_shd_init(struct mlx5_core_dev *dev)
+{
+ u8 *vpd_data __free(kfree) = NULL;
+ struct pci_dev *pdev = dev->pdev;
+ unsigned int vpd_size, kw_len;
+ struct devlink *devlink;
+ char *sn, *end;
+ int start;
+ int err;
+
+ if (!mlx5_core_is_pf(dev))
+ return 0;
+
+ vpd_data = pci_vpd_alloc(pdev, &vpd_size);
+ if (IS_ERR(vpd_data)) {
+ err = PTR_ERR(vpd_data);
+ return err == -ENODEV ? 0 : err;
+ }
+ start = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size, "V3", &kw_len);
+ if (start < 0) {
+ /* Fall-back to SN for older devices. */
+ start = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size,
+ PCI_VPD_RO_KEYWORD_SERIALNO, &kw_len);
+ if (start < 0)
+ return -ENOENT;
+ }
+ sn = kstrndup(vpd_data + start, kw_len, GFP_KERNEL);
+ if (!sn)
+ return -ENOMEM;
+ /* Firmware may return spaces at the end of the string, strip it. */
+ end = strchrnul(sn, ' ');
+ *end = '\0';
+
+ /* Get or create shared devlink instance */
+ devlink = devlink_shd_get(sn, &mlx5_shd_ops, 0, pdev->dev.driver);
+ kfree(sn);
+ if (!devlink)
+ return -ENOMEM;
+
+ dev->shd = devlink;
+ return 0;
+}
+
+void mlx5_shd_uninit(struct mlx5_core_dev *dev)
+{
+ if (!dev->shd)
+ return;
+
+ devlink_shd_put(dev->shd);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.h
new file mode 100644
index 000000000000..8ab8d6940227
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sh_devlink.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_SH_DEVLINK_H__
+#define __MLX5_SH_DEVLINK_H__
+
+#include <linux/mlx5/driver.h>
+
+int mlx5_shd_init(struct mlx5_core_dev *dev);
+void mlx5_shd_uninit(struct mlx5_core_dev *dev);
+
+#endif /* __MLX5_SH_DEVLINK_H__ */
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 04dcd09f7517..1268fcf35ec7 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -798,6 +798,7 @@ struct mlx5_core_dev {
enum mlx5_wc_state wc_state;
/* sync write combining state */
struct mutex wc_state_lock;
+ struct devlink *shd;
};
struct mlx5_db {
--
2.51.1
^ permalink raw reply related
* [PATCH net-next v3 12/13] documentation: networking: add shared devlink documentation
From: Jiri Pirko @ 2026-03-04 16:00 UTC (permalink / raw)
To: netdev
Cc: davem, edumazet, kuba, pabeni, horms, donald.hunter, corbet,
skhan, saeedm, leon, tariqt, mbloch, przemyslaw.kitszel, mschmidt,
andrew+netdev, rostedt, mhiramat, mathieu.desnoyers, chuck.lever,
matttbe, cjubran, daniel.zahka, linux-doc, linux-rdma,
linux-trace-kernel
In-Reply-To: <20260304160022.6114-1-jiri@resnulli.us>
From: Jiri Pirko <jiri@nvidia.com>
Document shared devlink instances for multiple PFs on the same chip.
Signed-off-by: Jiri Pirko <jiri@nvidia.com>
---
v2->v3:
- describing 2 models of use os shared device, with and without per-PF
instances
v1->v2:
- fixed number of "="'s
---
.../networking/devlink/devlink-shared.rst | 97 +++++++++++++++++++
Documentation/networking/devlink/index.rst | 1 +
2 files changed, 98 insertions(+)
create mode 100644 Documentation/networking/devlink/devlink-shared.rst
diff --git a/Documentation/networking/devlink/devlink-shared.rst b/Documentation/networking/devlink/devlink-shared.rst
new file mode 100644
index 000000000000..16bf6a7d25d9
--- /dev/null
+++ b/Documentation/networking/devlink/devlink-shared.rst
@@ -0,0 +1,97 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========================
+Devlink Shared Instances
+========================
+
+Overview
+========
+
+Shared devlink instances allow multiple physical functions (PFs) on the same
+chip to share a devlink instance for chip-wide operations.
+
+Multiple PFs may reside on the same physical chip, running a single firmware.
+Some of the resources and configurations may be shared among these PFs. The
+shared devlink instance provides an object to pin configuration knobs on.
+
+There are two possible usage models:
+
+1. The shared devlink instance is used alongside individual PF devlink
+ instances, providing chip-wide configuration in addition to per-PF
+ configuration.
+2. The shared devlink instance is the only devlink instance, without
+ per-PF instances.
+
+It is up to the driver to decide which usage model to use.
+
+The shared devlink instance is not backed by any struct *device*.
+
+Implementation
+==============
+
+Architecture
+------------
+
+The implementation uses:
+
+* **Chip identification**: PFs are grouped by chip using a driver-specific identifier
+* **Shared instance management**: Global list of shared instances with reference counting
+
+API Functions
+-------------
+
+The following functions are provided for managing shared devlink instances:
+
+* ``devlink_shd_get()``: Get or create a shared devlink instance identified by a string ID
+* ``devlink_shd_put()``: Release a reference on a shared devlink instance
+* ``devlink_shd_get_priv()``: Get private data from shared devlink instance
+
+Initialization Flow
+-------------------
+
+1. **PF calls shared devlink init** during driver probe
+2. **Chip identification** using driver-specific method to determine device identity
+3. **Get or create shared instance** using ``devlink_shd_get()``:
+
+ * The function looks up existing instance by identifier
+ * If none exists, creates new instance:
+ - Allocates and registers devlink instance
+ - Adds to global shared instances list
+ - Increments reference count
+
+4. **Set nested devlink instance** for the PF devlink instance using
+ ``devl_nested_devlink_set()`` before registering the PF devlink instance
+
+Cleanup Flow
+------------
+
+1. **Cleanup** when PF is removed
+2. **Call** ``devlink_shd_put()`` to release reference (decrements reference count)
+3. **Shared instance is automatically destroyed** when the last PF removes (reference count reaches zero)
+
+Chip Identification
+-------------------
+
+PFs belonging to the same chip are identified using a driver-specific method.
+The driver is free to choose any identifier that is suitable for determining
+whether two PFs are part of the same device. Examples include:
+
+* **PCI VPD serial numbers**: Extract from PCI VPD
+* **Device tree properties**: Read chip identifier from device tree
+* **Other hardware-specific identifiers**: Any unique identifier that groups PFs by chip
+
+Locking
+-------
+
+A global mutex (``shd_mutex``) protects the shared instances list during registration/deregistration.
+
+Similarly to other nested devlink instance relationships, devlink lock of
+the shared instance should be always taken after the devlink lock of PF.
+
+Reference Counting
+------------------
+
+Each shared devlink instance maintains a reference count (``refcount_t refcount``).
+The reference count is incremented when ``devlink_shd_get()`` is called and decremented
+when ``devlink_shd_put()`` is called. When the reference count reaches zero, the shared
+instance is automatically destroyed.
diff --git a/Documentation/networking/devlink/index.rst b/Documentation/networking/devlink/index.rst
index 35b12a2bfeba..f7ba7dcf477d 100644
--- a/Documentation/networking/devlink/index.rst
+++ b/Documentation/networking/devlink/index.rst
@@ -68,6 +68,7 @@ general.
devlink-resource
devlink-selftests
devlink-trap
+ devlink-shared
Driver-specific documentation
-----------------------------
--
2.51.1
^ permalink raw reply related
* [PATCH net-next v3 11/13] devlink: introduce shared devlink instance for PFs on same chip
From: Jiri Pirko @ 2026-03-04 16:00 UTC (permalink / raw)
To: netdev
Cc: davem, edumazet, kuba, pabeni, horms, donald.hunter, corbet,
skhan, saeedm, leon, tariqt, mbloch, przemyslaw.kitszel, mschmidt,
andrew+netdev, rostedt, mhiramat, mathieu.desnoyers, chuck.lever,
matttbe, cjubran, daniel.zahka, linux-doc, linux-rdma,
linux-trace-kernel
In-Reply-To: <20260304160022.6114-1-jiri@resnulli.us>
From: Jiri Pirko <jiri@nvidia.com>
Multiple PFs may reside on the same physical chip, running a single
firmware. Some of the resources and configurations may be shared among
these PFs. Currently, there is no good object to pin the configuration
knobs on.
Introduce a shared devlink instance, instantiated upon probe of
the first PF and removed during remove of the last PF. The shared
devlink instance is not backed by any device device, as there is
no PCI device related to it.
The implementation uses reference counting to manage the lifecycle:
each PF that probes calls devlink_shd_get() to get or create
the shared instance, and calls devlink_shd_put() when it removes.
The shared instance is automatically destroyed when the last PF removes.
Example:
pci/0000:08:00.0: index 0
nested_devlink:
auxiliary/mlx5_core.eth.0
devlink_index/1: index 1
nested_devlink:
pci/0000:08:00.0
pci/0000:08:00.1
auxiliary/mlx5_core.eth.0: index 2
pci/0000:08:00.1: index 3
nested_devlink:
auxiliary/mlx5_core.eth.1
auxiliary/mlx5_core.eth.1: index 4
Signed-off-by: Jiri Pirko <jiri@nvidia.com>
---
v2->v3:
- added __counter_by() for priv
- added *driver arg to devlink_shd_get()
- added ops, priv_size and driver pointer consistency check
v1->v2:
- s/err_kstrdup_id/err_devlink_free/
- fixed kernel-doc comment of devlink_shd_get()
- removed NULL arg check in devlink_shd_get/put()
---
include/net/devlink.h | 7 ++
net/devlink/Makefile | 2 +-
net/devlink/sh_dev.c | 161 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 169 insertions(+), 1 deletion(-)
create mode 100644 net/devlink/sh_dev.c
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 45dec7067a8e..3038af6ec017 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1647,6 +1647,13 @@ void devlink_register(struct devlink *devlink);
void devlink_unregister(struct devlink *devlink);
void devlink_free(struct devlink *devlink);
+struct devlink *devlink_shd_get(const char *id,
+ const struct devlink_ops *ops,
+ size_t priv_size,
+ const struct device_driver *driver);
+void devlink_shd_put(struct devlink *devlink);
+void *devlink_shd_get_priv(struct devlink *devlink);
+
/**
* struct devlink_port_ops - Port operations
* @port_split: Callback used to split the port into multiple ones.
diff --git a/net/devlink/Makefile b/net/devlink/Makefile
index 000da622116a..8f2adb5e5836 100644
--- a/net/devlink/Makefile
+++ b/net/devlink/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
obj-y := core.o netlink.o netlink_gen.o dev.o port.o sb.o dpipe.o \
- resource.o param.o region.o health.o trap.o rate.o linecard.o
+ resource.o param.o region.o health.o trap.o rate.o linecard.o sh_dev.o
diff --git a/net/devlink/sh_dev.c b/net/devlink/sh_dev.c
new file mode 100644
index 000000000000..85acce97e788
--- /dev/null
+++ b/net/devlink/sh_dev.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <net/devlink.h>
+
+#include "devl_internal.h"
+
+static LIST_HEAD(shd_list);
+static DEFINE_MUTEX(shd_mutex); /* Protects shd_list and shd->list */
+
+/* This structure represents a shared devlink instance,
+ * there is one created per identifier (e.g., serial number).
+ */
+struct devlink_shd {
+ struct list_head list; /* Node in shd list */
+ const char *id; /* Identifier string (e.g., serial number) */
+ refcount_t refcount; /* Reference count */
+ size_t priv_size; /* Size of driver private data */
+ char priv[] __aligned(NETDEV_ALIGN) __counted_by(priv_size);
+};
+
+static struct devlink_shd *devlink_shd_lookup(const char *id)
+{
+ struct devlink_shd *shd;
+
+ list_for_each_entry(shd, &shd_list, list) {
+ if (!strcmp(shd->id, id))
+ return shd;
+ }
+
+ return NULL;
+}
+
+static struct devlink_shd *devlink_shd_create(const char *id,
+ const struct devlink_ops *ops,
+ size_t priv_size,
+ const struct device_driver *driver)
+{
+ struct devlink_shd *shd;
+ struct devlink *devlink;
+
+ devlink = __devlink_alloc(ops, sizeof(struct devlink_shd) + priv_size,
+ &init_net, NULL, driver);
+ if (!devlink)
+ return NULL;
+ shd = devlink_priv(devlink);
+
+ shd->id = kstrdup(id, GFP_KERNEL);
+ if (!shd->id)
+ goto err_devlink_free;
+ shd->priv_size = priv_size;
+ refcount_set(&shd->refcount, 1);
+
+ devl_lock(devlink);
+ devl_register(devlink);
+ devl_unlock(devlink);
+
+ list_add_tail(&shd->list, &shd_list);
+
+ return shd;
+
+err_devlink_free:
+ devlink_free(devlink);
+ return NULL;
+}
+
+static void devlink_shd_destroy(struct devlink_shd *shd)
+{
+ struct devlink *devlink = priv_to_devlink(shd);
+
+ list_del(&shd->list);
+ devl_lock(devlink);
+ devl_unregister(devlink);
+ devl_unlock(devlink);
+ kfree(shd->id);
+ devlink_free(devlink);
+}
+
+/**
+ * devlink_shd_get - Get or create a shared devlink instance
+ * @id: Identifier string (e.g., serial number) for the shared instance
+ * @ops: Devlink operations structure
+ * @priv_size: Size of private data structure
+ * @driver: Driver associated with the shared devlink instance
+ *
+ * Get an existing shared devlink instance identified by @id, or create
+ * a new one if it doesn't exist. Return the devlink instance with a
+ * reference held. The caller must call devlink_shd_put() when done.
+ *
+ * All callers sharing the same @id must pass identical @ops, @priv_size
+ * and @driver. A mismatch triggers a warning and returns NULL.
+ *
+ * Return: Pointer to the shared devlink instance on success,
+ * NULL on failure
+ */
+struct devlink *devlink_shd_get(const char *id,
+ const struct devlink_ops *ops,
+ size_t priv_size,
+ const struct device_driver *driver)
+{
+ struct devlink *devlink;
+ struct devlink_shd *shd;
+
+ mutex_lock(&shd_mutex);
+
+ shd = devlink_shd_lookup(id);
+ if (!shd) {
+ shd = devlink_shd_create(id, ops, priv_size, driver);
+ goto unlock;
+ }
+
+ devlink = priv_to_devlink(shd);
+ if (WARN_ON_ONCE(devlink->ops != ops ||
+ shd->priv_size != priv_size ||
+ devlink->dev_driver != driver)) {
+ shd = NULL;
+ goto unlock;
+ }
+ refcount_inc(&shd->refcount);
+
+unlock:
+ mutex_unlock(&shd_mutex);
+ return shd ? priv_to_devlink(shd) : NULL;
+}
+EXPORT_SYMBOL_GPL(devlink_shd_get);
+
+/**
+ * devlink_shd_put - Release a reference on a shared devlink instance
+ * @devlink: Shared devlink instance
+ *
+ * Release a reference on a shared devlink instance obtained via
+ * devlink_shd_get().
+ */
+void devlink_shd_put(struct devlink *devlink)
+{
+ struct devlink_shd *shd;
+
+ mutex_lock(&shd_mutex);
+ shd = devlink_priv(devlink);
+ if (refcount_dec_and_test(&shd->refcount))
+ devlink_shd_destroy(shd);
+ mutex_unlock(&shd_mutex);
+}
+EXPORT_SYMBOL_GPL(devlink_shd_put);
+
+/**
+ * devlink_shd_get_priv - Get private data from shared devlink instance
+ * @devlink: Devlink instance
+ *
+ * Returns a pointer to the driver's private data structure within
+ * the shared devlink instance.
+ *
+ * Return: Pointer to private data
+ */
+void *devlink_shd_get_priv(struct devlink *devlink)
+{
+ struct devlink_shd *shd = devlink_priv(devlink);
+
+ return shd->priv;
+}
+EXPORT_SYMBOL_GPL(devlink_shd_get_priv);
--
2.51.1
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox