From: Jiri Olsa <jolsa@kernel.org>
To: Alexei Starovoitov <ast@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
Andrii Nakryiko <andrii@kernel.org>
Cc: bpf@vger.kernel.org, linux-trace-kernel@vger.kernel.org,
Martin KaFai Lau <kafai@fb.com>,
Eduard Zingerman <eddyz87@gmail.com>,
Song Liu <songliubraving@fb.com>, Yonghong Song <yhs@fb.com>,
Menglong Dong <menglong8.dong@gmail.com>,
Steven Rostedt <rostedt@kernel.org>
Subject: [PATCHv4 bpf-next 03/25] bpf: Use mutex lock pool for bpf trampolines
Date: Tue, 24 Mar 2026 09:18:24 +0100 [thread overview]
Message-ID: <20260324081846.2334094-4-jolsa@kernel.org> (raw)
In-Reply-To: <20260324081846.2334094-1-jolsa@kernel.org>
Adding mutex lock pool that replaces bpf trampolines mutex.
For tracing_multi link coming in following changes we need to lock all
the involved trampolines during the attachment. This could mean thousands
of mutex locks, which is not convenient.
As suggested by Andrii we can replace bpf trampolines mutex with mutex
pool, where each trampoline is hash-ed to one of the locks from the pool.
It's better to lock all the pool mutexes (32 at the moment) than
thousands of them.
There is 48 (MAX_LOCK_DEPTH) lock limit allowed to be simultaneously
held by task, so we need to keep 32 mutexes (5 bits) in the pool, so
when we lock them all in following changes the lockdep won't scream.
Removing the mutex_is_locked in bpf_trampoline_put, because we removed
the mutex from bpf_trampoline.
Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
include/linux/bpf.h | 2 --
kernel/bpf/trampoline.c | 76 ++++++++++++++++++++++++++++-------------
2 files changed, 52 insertions(+), 26 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 05b34a6355b0..1d900f49aff5 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1335,8 +1335,6 @@ struct bpf_trampoline {
/* hlist for trampoline_ip_table */
struct hlist_node hlist_ip;
struct ftrace_ops *fops;
- /* serializes access to fields of this trampoline */
- struct mutex mutex;
refcount_t refcnt;
u32 flags;
u64 key;
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index f02254a21585..eb4ea78ff77f 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -30,6 +30,34 @@ static struct hlist_head trampoline_ip_table[TRAMPOLINE_TABLE_SIZE];
/* serializes access to trampoline tables */
static DEFINE_MUTEX(trampoline_mutex);
+/*
+ * We keep 32 trampoline locks (5 bits) in the pool, because there is
+ * 48 (MAX_LOCK_DEPTH) locks limit allowed to be simultaneously held
+ * by task. Each lock has its own lockdep key to keep it simple.
+ */
+#define TRAMPOLINE_LOCKS_BITS 5
+#define TRAMPOLINE_LOCKS_TABLE_SIZE (1 << TRAMPOLINE_LOCKS_BITS)
+
+static struct {
+ struct mutex mutex;
+ struct lock_class_key key;
+} trampoline_locks[TRAMPOLINE_LOCKS_TABLE_SIZE];
+
+static struct mutex *select_trampoline_lock(struct bpf_trampoline *tr)
+{
+ return &trampoline_locks[hash_64((u64)(uintptr_t) tr, TRAMPOLINE_LOCKS_BITS)].mutex;
+}
+
+static void trampoline_lock(struct bpf_trampoline *tr)
+{
+ mutex_lock(select_trampoline_lock(tr));
+}
+
+static void trampoline_unlock(struct bpf_trampoline *tr)
+{
+ mutex_unlock(select_trampoline_lock(tr));
+}
+
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex);
@@ -69,9 +97,9 @@ static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, unsigned long ip,
if (cmd == FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF) {
/* This is called inside register_ftrace_direct_multi(), so
- * tr->mutex is already locked.
+ * trampoline's mutex is already locked.
*/
- lockdep_assert_held_once(&tr->mutex);
+ lockdep_assert_held_once(select_trampoline_lock(tr));
/* Instead of updating the trampoline here, we propagate
* -EAGAIN to register_ftrace_direct(). Then we can
@@ -91,7 +119,7 @@ static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, unsigned long ip,
}
/* The normal locking order is
- * tr->mutex => direct_mutex (ftrace.c) => ftrace_lock (ftrace.c)
+ * select_trampoline_lock(tr) => direct_mutex (ftrace.c) => ftrace_lock (ftrace.c)
*
* The following two commands are called from
*
@@ -99,12 +127,12 @@ static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, unsigned long ip,
* cleanup_direct_functions_after_ipmodify
*
* In both cases, direct_mutex is already locked. Use
- * mutex_trylock(&tr->mutex) to avoid deadlock in race condition
- * (something else is making changes to this same trampoline).
+ * mutex_trylock(select_trampoline_lock(tr)) to avoid deadlock in race condition
+ * (something else holds the same pool lock).
*/
- if (!mutex_trylock(&tr->mutex)) {
- /* sleep 1 ms to make sure whatever holding tr->mutex makes
- * some progress.
+ if (!mutex_trylock(select_trampoline_lock(tr))) {
+ /* sleep 1 ms to make sure whatever holding select_trampoline_lock(tr)
+ * makes some progress.
*/
msleep(1);
return -EAGAIN;
@@ -129,7 +157,7 @@ static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, unsigned long ip,
break;
}
- mutex_unlock(&tr->mutex);
+ trampoline_unlock(tr);
return ret;
}
#endif
@@ -359,7 +387,6 @@ static struct bpf_trampoline *bpf_trampoline_lookup(u64 key, unsigned long ip)
head = &trampoline_ip_table[hash_64(tr->ip, TRAMPOLINE_HASH_BITS)];
hlist_add_head(&tr->hlist_ip, head);
refcount_set(&tr->refcnt, 1);
- mutex_init(&tr->mutex);
for (i = 0; i < BPF_TRAMP_MAX; i++)
INIT_HLIST_HEAD(&tr->progs_hlist[i]);
out:
@@ -844,9 +871,9 @@ int bpf_trampoline_link_prog(struct bpf_tramp_link *link,
{
int err;
- mutex_lock(&tr->mutex);
+ trampoline_lock(tr);
err = __bpf_trampoline_link_prog(link, tr, tgt_prog);
- mutex_unlock(&tr->mutex);
+ trampoline_unlock(tr);
return err;
}
@@ -887,9 +914,9 @@ int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
{
int err;
- mutex_lock(&tr->mutex);
+ trampoline_lock(tr);
err = __bpf_trampoline_unlink_prog(link, tr, tgt_prog);
- mutex_unlock(&tr->mutex);
+ trampoline_unlock(tr);
return err;
}
@@ -999,12 +1026,12 @@ int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
if (!tr)
return -ENOMEM;
- mutex_lock(&tr->mutex);
+ trampoline_lock(tr);
shim_link = cgroup_shim_find(tr, bpf_func);
if (shim_link && !IS_ERR(bpf_link_inc_not_zero(&shim_link->link.link))) {
/* Reusing existing shim attached by the other program. */
- mutex_unlock(&tr->mutex);
+ trampoline_unlock(tr);
bpf_trampoline_put(tr); /* bpf_trampoline_get above */
return 0;
}
@@ -1024,16 +1051,16 @@ int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
shim_link->trampoline = tr;
/* note, we're still holding tr refcnt from above */
- mutex_unlock(&tr->mutex);
+ trampoline_unlock(tr);
return 0;
err:
- mutex_unlock(&tr->mutex);
+ trampoline_unlock(tr);
if (shim_link)
bpf_link_put(&shim_link->link.link);
- /* have to release tr while _not_ holding its mutex */
+ /* have to release tr while _not_ holding pool mutex for trampoline */
bpf_trampoline_put(tr); /* bpf_trampoline_get above */
return err;
@@ -1054,9 +1081,9 @@ void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog)
if (WARN_ON_ONCE(!tr))
return;
- mutex_lock(&tr->mutex);
+ trampoline_lock(tr);
shim_link = cgroup_shim_find(tr, bpf_func);
- mutex_unlock(&tr->mutex);
+ trampoline_unlock(tr);
if (shim_link)
bpf_link_put(&shim_link->link.link);
@@ -1074,14 +1101,14 @@ struct bpf_trampoline *bpf_trampoline_get(u64 key,
if (!tr)
return NULL;
- mutex_lock(&tr->mutex);
+ trampoline_lock(tr);
if (tr->func.addr)
goto out;
memcpy(&tr->func.model, &tgt_info->fmodel, sizeof(tgt_info->fmodel));
tr->func.addr = (void *)tgt_info->tgt_addr;
out:
- mutex_unlock(&tr->mutex);
+ trampoline_unlock(tr);
return tr;
}
@@ -1094,7 +1121,6 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
mutex_lock(&trampoline_mutex);
if (!refcount_dec_and_test(&tr->refcnt))
goto out;
- WARN_ON_ONCE(mutex_is_locked(&tr->mutex));
for (i = 0; i < BPF_TRAMP_MAX; i++)
if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[i])))
@@ -1380,6 +1406,8 @@ static int __init init_trampolines(void)
INIT_HLIST_HEAD(&trampoline_key_table[i]);
for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++)
INIT_HLIST_HEAD(&trampoline_ip_table[i]);
+ for (i = 0; i < TRAMPOLINE_LOCKS_TABLE_SIZE; i++)
+ __mutex_init(&trampoline_locks[i].mutex, "trampoline_lock", &trampoline_locks[i].key);
return 0;
}
late_initcall(init_trampolines);
--
2.53.0
next prev parent reply other threads:[~2026-03-24 8:19 UTC|newest]
Thread overview: 41+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-24 8:18 [PATCHv4 bpf-next 00/25] bpf: tracing_multi link Jiri Olsa
2026-03-24 8:18 ` [PATCHv4 bpf-next 01/25] ftrace: Add ftrace_hash_count function Jiri Olsa
2026-03-24 8:18 ` [PATCHv4 bpf-next 02/25] ftrace: Make ftrace_hash_clear global Jiri Olsa
2026-03-24 8:18 ` Jiri Olsa [this message]
2026-03-24 8:18 ` [PATCHv4 bpf-next 04/25] bpf: Add struct bpf_trampoline_ops object Jiri Olsa
2026-03-24 8:18 ` [PATCHv4 bpf-next 05/25] bpf: Add struct bpf_tramp_node object Jiri Olsa
2026-03-24 8:18 ` [PATCHv4 bpf-next 06/25] bpf: Factor fsession link to use struct bpf_tramp_node Jiri Olsa
2026-03-24 8:18 ` [PATCHv4 bpf-next 07/25] bpf: Add multi tracing attach types Jiri Olsa
2026-03-24 8:18 ` [PATCHv4 bpf-next 08/25] bpf: Move sleepable verification code to btf_id_allow_sleepable Jiri Olsa
2026-03-24 8:18 ` [PATCHv4 bpf-next 09/25] bpf: Add bpf_trampoline_multi_attach/detach functions Jiri Olsa
2026-03-24 8:58 ` bot+bpf-ci
2026-03-24 14:29 ` Jiri Olsa
2026-03-27 4:18 ` kernel test robot
2026-03-24 8:18 ` [PATCHv4 bpf-next 10/25] bpf: Add support for tracing multi link Jiri Olsa
2026-03-24 8:18 ` [PATCHv4 bpf-next 11/25] bpf: Add support for tracing_multi link cookies Jiri Olsa
2026-03-24 8:18 ` [PATCHv4 bpf-next 12/25] bpf: Add support for tracing_multi link session Jiri Olsa
2026-03-24 8:18 ` [PATCHv4 bpf-next 13/25] bpf: Add support for tracing_multi link fdinfo Jiri Olsa
2026-03-25 6:43 ` Leon Hwang
2026-03-25 21:49 ` Jiri Olsa
2026-03-24 8:18 ` [PATCHv4 bpf-next 14/25] libbpf: Add bpf_object_cleanup_btf function Jiri Olsa
2026-03-24 8:18 ` [PATCHv4 bpf-next 15/25] libbpf: Add bpf_link_create support for tracing_multi link Jiri Olsa
2026-03-24 8:18 ` [PATCHv4 bpf-next 16/25] libbpf: Add btf_type_is_traceable_func function Jiri Olsa
2026-03-24 8:58 ` bot+bpf-ci
2026-03-24 14:29 ` Jiri Olsa
2026-03-24 8:18 ` [PATCHv4 bpf-next 17/25] libbpf: Add support to create tracing multi link Jiri Olsa
2026-03-24 8:18 ` [PATCHv4 bpf-next 18/25] selftests/bpf: Add tracing multi skel/pattern/ids attach tests Jiri Olsa
2026-03-24 8:18 ` [PATCHv4 bpf-next 19/25] selftests/bpf: Add tracing multi skel/pattern/ids module " Jiri Olsa
2026-03-24 8:18 ` [PATCHv4 bpf-next 20/25] selftests/bpf: Add tracing multi intersect tests Jiri Olsa
2026-03-24 8:18 ` [PATCHv4 bpf-next 21/25] selftests/bpf: Add tracing multi cookies test Jiri Olsa
2026-03-24 8:18 ` [PATCHv4 bpf-next 22/25] selftests/bpf: Add tracing multi session test Jiri Olsa
2026-03-24 8:18 ` [PATCHv4 bpf-next 23/25] selftests/bpf: Add tracing multi attach fails test Jiri Olsa
2026-03-24 8:18 ` [PATCHv4 bpf-next 24/25] selftests/bpf: Add tracing multi attach benchmark test Jiri Olsa
2026-03-25 6:45 ` Leon Hwang
2026-03-25 15:11 ` Alexei Starovoitov
2026-03-25 21:48 ` Jiri Olsa
2026-03-25 21:48 ` Jiri Olsa
2026-03-24 8:18 ` [PATCHv4 bpf-next 25/25] selftests/bpf: Add tracing multi attach rollback tests Jiri Olsa
2026-03-25 6:45 ` Leon Hwang
2026-03-25 21:49 ` Jiri Olsa
2026-03-25 6:42 ` [PATCHv4 bpf-next 00/25] bpf: tracing_multi link Leon Hwang
2026-03-25 14:58 ` Leon Hwang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260324081846.2334094-4-jolsa@kernel.org \
--to=jolsa@kernel.org \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=eddyz87@gmail.com \
--cc=kafai@fb.com \
--cc=linux-trace-kernel@vger.kernel.org \
--cc=menglong8.dong@gmail.com \
--cc=rostedt@kernel.org \
--cc=songliubraving@fb.com \
--cc=yhs@fb.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox