* [PATCH bpf-next v1 1/1] selftests/bpf: Trace bpf_local_storage_update to debug flaky local storage tests
@ 2026-04-17 23:36 Amery Hung
2026-04-18 21:06 ` Kumar Kartikeya Dwivedi
2026-04-19 12:10 ` patchwork-bot+netdevbpf
0 siblings, 2 replies; 4+ messages in thread
From: Amery Hung @ 2026-04-17 23:36 UTC (permalink / raw)
To: bpf
Cc: alexei.starovoitov, andrii, daniel, eddyz87, memxor, martin.lau,
mykyta.yatsenko5, ameryhung, kernel-team
task_local_storage/sys_enter_exit and cgrp_local_storage/
cgroup_iter_sleepable occasionally fail in CI possibly because
bpf_{task,cgrp}_storage_get() returns NULL. Add a fexit probe on
bpf_local_storage_update() to capture the actual error code when this
happens. It will allow us to tell if it is trylock failure in
kmalloc_nolock(), timeout/deadlock in rqspinlock or something else.
Signed-off-by: Amery Hung <ameryhung@gmail.com>
---
.../bpf/prog_tests/cgrp_local_storage.c | 15 +++++++++++++--
.../bpf/prog_tests/task_local_storage.c | 1 +
.../selftests/bpf/progs/cgrp_ls_sleepable.c | 18 ++++++++++++++++++
.../selftests/bpf/progs/task_local_storage.c | 19 +++++++++++++++++++
4 files changed, 51 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
index 478a77cb67e6..c4398ccf3493 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
@@ -176,7 +176,7 @@ static void test_cgroup_iter_sleepable(int cgroup_fd, __u64 cgroup_id)
DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
union bpf_iter_link_info linfo;
struct cgrp_ls_sleepable *skel;
- struct bpf_link *link;
+ struct bpf_link *link, *fexit_link;
int err, iter_fd;
char buf[16];
@@ -200,16 +200,27 @@ static void test_cgroup_iter_sleepable(int cgroup_fd, __u64 cgroup_id)
if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
+ fexit_link = bpf_program__attach(skel->progs.fexit_update);
+ if (!ASSERT_OK_PTR(fexit_link, "attach_fexit"))
+ goto out_link;
+
iter_fd = bpf_iter_create(bpf_link__fd(link));
if (!ASSERT_GE(iter_fd, 0, "iter_create"))
- goto out_link;
+ goto out_fexit_link;
+
+ skel->bss->target_pid = sys_gettid();
/* trigger the program run */
(void)read(iter_fd, buf, sizeof(buf));
+ skel->bss->target_pid = 0;
+
+ ASSERT_EQ(skel->bss->update_err, 0, "update_err");
ASSERT_EQ(skel->bss->cgroup_id, cgroup_id, "cgroup_id");
close(iter_fd);
+out_fexit_link:
+ bpf_link__destroy(fexit_link);
out_link:
bpf_link__destroy(link);
out:
diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
index 1b26c12f255a..5b2b56cc3a4f 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
@@ -47,6 +47,7 @@ static void test_sys_enter_exit(void)
skel->bss->target_pid = 0;
/* 2x gettid syscalls */
+ ASSERT_EQ(skel->bss->update_err, 0, "update_err");
ASSERT_EQ(skel->bss->enter_cnt, 2, "enter_cnt");
ASSERT_EQ(skel->bss->exit_cnt, 2, "exit_cnt");
ASSERT_EQ(skel->bss->mismatch_cnt, 0, "mismatch_cnt");
diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c
index a2de95f85648..37bd6b03ba01 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c
+++ b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c
@@ -4,6 +4,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_misc.h"
+#include "err.h"
char _license[] SEC("license") = "GPL";
@@ -16,6 +17,7 @@ struct {
__s32 target_pid;
__u64 cgroup_id;
+long update_err;
int target_hid;
bool is_cgroup1;
@@ -123,3 +125,19 @@ int yes_rcu_lock(void *ctx)
bpf_rcu_read_unlock();
return 0;
}
+
+SEC("fexit/bpf_local_storage_update")
+int BPF_PROG(fexit_update, void *owner, struct bpf_local_storage_map *smap,
+ void *value, u64 map_flags, bool swap_uptrs,
+ struct bpf_local_storage_data *ret)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+
+ if (task->pid != target_pid)
+ return 0;
+
+ if (IS_ERR_VALUE(ret))
+ update_err = PTR_ERR(ret);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_local_storage.c b/tools/testing/selftests/bpf/progs/task_local_storage.c
index 80a0a20db88d..34fa3d6451d2 100644
--- a/tools/testing/selftests/bpf/progs/task_local_storage.c
+++ b/tools/testing/selftests/bpf/progs/task_local_storage.c
@@ -14,12 +14,15 @@ struct {
__type(value, long);
} enter_id SEC(".maps");
+#include "err.h"
+
#define MAGIC_VALUE 0xabcd1234
pid_t target_pid = 0;
int mismatch_cnt = 0;
int enter_cnt = 0;
int exit_cnt = 0;
+long update_err = 0;
SEC("tp_btf/sys_enter")
int BPF_PROG(on_enter, struct pt_regs *regs, long id)
@@ -62,3 +65,19 @@ int BPF_PROG(on_exit, struct pt_regs *regs, long id)
__sync_fetch_and_add(&mismatch_cnt, 1);
return 0;
}
+
+SEC("fexit/bpf_local_storage_update")
+int BPF_PROG(fexit_update, void *owner, struct bpf_local_storage_map *smap,
+ void *value, u64 map_flags, bool swap_uptrs,
+ struct bpf_local_storage_data *ret)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+
+ if (task->pid != target_pid)
+ return 0;
+
+ if (IS_ERR_VALUE(ret))
+ update_err = PTR_ERR(ret);
+
+ return 0;
+}
--
2.52.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH bpf-next v1 1/1] selftests/bpf: Trace bpf_local_storage_update to debug flaky local storage tests
2026-04-17 23:36 [PATCH bpf-next v1 1/1] selftests/bpf: Trace bpf_local_storage_update to debug flaky local storage tests Amery Hung
@ 2026-04-18 21:06 ` Kumar Kartikeya Dwivedi
2026-04-19 5:35 ` Amery Hung
2026-04-19 12:10 ` patchwork-bot+netdevbpf
1 sibling, 1 reply; 4+ messages in thread
From: Kumar Kartikeya Dwivedi @ 2026-04-18 21:06 UTC (permalink / raw)
To: Amery Hung
Cc: bpf, alexei.starovoitov, andrii, daniel, eddyz87, martin.lau,
mykyta.yatsenko5, kernel-team
On Sat, 18 Apr 2026 at 00:36, Amery Hung <ameryhung@gmail.com> wrote:
>
> task_local_storage/sys_enter_exit and cgrp_local_storage/
> cgroup_iter_sleepable occasionally fail in CI possibly because
> bpf_{task,cgrp}_storage_get() returns NULL. Add a fexit probe on
> bpf_local_storage_update() to capture the actual error code when this
> happens. It will allow us to tell if it is trylock failure in
> kmalloc_nolock(), timeout/deadlock in rqspinlock or something else.
>
> Signed-off-by: Amery Hung <ameryhung@gmail.com>
> ---
I assume this is to get signal on what goes wrong when we see a
failure next time?
LGTM though.
Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH bpf-next v1 1/1] selftests/bpf: Trace bpf_local_storage_update to debug flaky local storage tests
2026-04-18 21:06 ` Kumar Kartikeya Dwivedi
@ 2026-04-19 5:35 ` Amery Hung
0 siblings, 0 replies; 4+ messages in thread
From: Amery Hung @ 2026-04-19 5:35 UTC (permalink / raw)
To: Kumar Kartikeya Dwivedi
Cc: bpf, alexei.starovoitov, andrii, daniel, eddyz87, martin.lau,
mykyta.yatsenko5, kernel-team
On Sat, Apr 18, 2026 at 2:07 PM Kumar Kartikeya Dwivedi
<memxor@gmail.com> wrote:
>
> On Sat, 18 Apr 2026 at 00:36, Amery Hung <ameryhung@gmail.com> wrote:
> >
> > task_local_storage/sys_enter_exit and cgrp_local_storage/
> > cgroup_iter_sleepable occasionally fail in CI possibly because
> > bpf_{task,cgrp}_storage_get() returns NULL. Add a fexit probe on
> > bpf_local_storage_update() to capture the actual error code when this
> > happens. It will allow us to tell if it is trylock failure in
> > kmalloc_nolock(), timeout/deadlock in rqspinlock or something else.
> >
> > Signed-off-by: Amery Hung <ameryhung@gmail.com>
> > ---
>
> I assume this is to get signal on what goes wrong when we see a
> failure next time?
Right. Currently, these two tests fail occasionally in CI. The error
message is limited (shown below), but my theory is some failures in
bpf_{task,cgrp}_storage_get(). Since I could not reproduce locally, I
hope this patch will give us more insight once it fails in CI.
All error logs:
cgrp2_local_storage:PASS:join_cgroup /cgrp_local_storage 0 nsec
test_cgroup_iter_sleepable:PASS:skel_open 0 nsec
test_cgroup_iter_sleepable:PASS:skel_load 0 nsec
test_cgroup_iter_sleepable:PASS:attach_iter 0 nsec
test_cgroup_iter_sleepable:PASS:iter_create 0 nsec
test_cgroup_iter_sleepable:FAIL:cgroup_id unexpected cgroup_id: actual
0 != expected 3841
#80/5 cgrp_local_storage/cgroup_iter_sleepable:FAIL
#80 cgrp_local_storage:FAIL
Summary: 703/6273 PASSED, 78 SKIPPED, 1 FAILED
test_sys_enter_exit:PASS:skel_open_and_load 0 nsec
test_sys_enter_exit:PASS:skel_attach 0 nsec
test_sys_enter_exit:FAIL:enter_cnt unexpected enter_cnt: actual 1 != expected 2
test_sys_enter_exit:PASS:exit_cnt 0 nsec
test_sys_enter_exit:FAIL:mismatch_cnt unexpected mismatch_cnt: actual
2 != expected 0
#463/1 task_local_storage/sys_enter_exit:FAIL
#463 task_local_storage:FAIL
Summary: 702/6263 PASSED, 85 SKIPPED, 1 FAILED
> LGTM though.
>
> Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH bpf-next v1 1/1] selftests/bpf: Trace bpf_local_storage_update to debug flaky local storage tests
2026-04-17 23:36 [PATCH bpf-next v1 1/1] selftests/bpf: Trace bpf_local_storage_update to debug flaky local storage tests Amery Hung
2026-04-18 21:06 ` Kumar Kartikeya Dwivedi
@ 2026-04-19 12:10 ` patchwork-bot+netdevbpf
1 sibling, 0 replies; 4+ messages in thread
From: patchwork-bot+netdevbpf @ 2026-04-19 12:10 UTC (permalink / raw)
To: Amery Hung
Cc: bpf, alexei.starovoitov, andrii, daniel, eddyz87, memxor,
martin.lau, mykyta.yatsenko5, kernel-team
Hello:
This patch was applied to bpf/bpf-next.git (master)
by Kumar Kartikeya Dwivedi <memxor@gmail.com>:
On Fri, 17 Apr 2026 16:36:31 -0700 you wrote:
> task_local_storage/sys_enter_exit and cgrp_local_storage/
> cgroup_iter_sleepable occasionally fail in CI possibly because
> bpf_{task,cgrp}_storage_get() returns NULL. Add a fexit probe on
> bpf_local_storage_update() to capture the actual error code when this
> happens. It will allow us to tell if it is trylock failure in
> kmalloc_nolock(), timeout/deadlock in rqspinlock or something else.
>
> [...]
Here is the summary with links:
- [bpf-next,v1,1/1] selftests/bpf: Trace bpf_local_storage_update to debug flaky local storage tests
https://git.kernel.org/bpf/bpf-next/c/7c528b364bd8
You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2026-04-19 12:10 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-17 23:36 [PATCH bpf-next v1 1/1] selftests/bpf: Trace bpf_local_storage_update to debug flaky local storage tests Amery Hung
2026-04-18 21:06 ` Kumar Kartikeya Dwivedi
2026-04-19 5:35 ` Amery Hung
2026-04-19 12:10 ` patchwork-bot+netdevbpf
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox