From: "Vlastimil Babka (SUSE)" <vbabka@kernel.org>
To: "Harry Yoo (Oracle)" <harry@kernel.org>,
Andrew Morton <akpm@linux-foundation.org>,
Hao Li <hao.li@linux.dev>, Christoph Lameter <cl@gentwo.org>,
David Rientjes <rientjes@google.com>,
Roman Gushchin <roman.gushchin@linux.dev>,
Alexei Starovoitov <ast@kernel.org>,
Andrii Nakryiko <andrii@kernel.org>,
Puranjay Mohan <puranjay@kernel.org>,
Amery Hung <ameryhung@gmail.com>,
Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
Clark Williams <clrkwllms@kernel.org>,
Steven Rostedt <rostedt@goodmis.org>,
"Paul E. McKenney" <paulmck@kernel.org>,
Frederic Weisbecker <frederic@kernel.org>,
Neeraj Upadhyay <neeraj.upadhyay@kernel.org>,
Joel Fernandes <joelagnelf@nvidia.com>,
Josh Triplett <josh@joshtriplett.org>,
Boqun Feng <boqun@kernel.org>,
Uladzislau Rezki <urezki@gmail.com>,
Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
Lai Jiangshan <jiangshanlai@gmail.com>,
Zqiang <qiang.zhang@linux.dev>, Pedro Falcato <pfalcato@suse.de>,
Suren Baghdasaryan <surenb@google.com>
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
linux-rt-devel@lists.linux.dev, rcu@vger.kernel.org,
bpf@vger.kernel.org
Subject: Re: [PATCH for-next v3 2/9] mm/slab, slub_kunit: register kprobe to trigger _nolock APIs
Date: Tue, 16 Jun 2026 08:57:44 +0200 [thread overview]
Message-ID: <a64777b0-dfdf-4cc4-bb3a-87f25ca394bf@kernel.org> (raw)
In-Reply-To: <20260615-kfree_rcu_nolock-v3-2-70a54f3775bb@kernel.org>
On 6/15/26 13:05, Harry Yoo (Oracle) wrote:
> Since kmalloc_nolock() always fails in NMI and hardirq contexts on
> PREEMPT_RT, slub_kunit cannot properly test _nolock() APIs.
>
> Register a kprobe pre-handler to invoke kmalloc_nolock() and
> kfree_nolock() in the middle of the slab allocator. However, do not
> register the handler on UP kernels [1].
>
> To attach the pre-handler while s->cpu_sheaves->lock or n->list_lock
> is held, add a wrapper function for lockdep_assert_held() that calls
> a no-op function slab_attach_kprobe_locked() on debug builds. The
> function is optimized away when neither CONFIG_PROVE_LOCKING nor
> CONFIG_DEBUG_VM is selected and register_kprobe() fails.
>
> The function calls barrier() to prevent the compiler from optimizing
> away its callsites. Otherwise, the compiler may consider the function
> does not have any side effect and remove callsites.
>
> Link: https://lore.kernel.org/linux-mm/20260427-nolock-api-fix-v2-0-a6b83a92d9a4@kernel.org [1]
> Signed-off-by: Harry Yoo (Oracle) <harry@kernel.org>
Looks very useful!
Acked-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
> ---
> lib/tests/slub_kunit.c | 82 +++++++++++++++++++++++++++++++++++++++++++-------
> mm/slub.c | 36 ++++++++++++++++------
> 2 files changed, 98 insertions(+), 20 deletions(-)
>
> diff --git a/lib/tests/slub_kunit.c b/lib/tests/slub_kunit.c
> index 11255fc8eb78..01d808cb77fa 100644
> --- a/lib/tests/slub_kunit.c
> +++ b/lib/tests/slub_kunit.c
> @@ -8,6 +8,7 @@
> #include <linux/rcupdate.h>
> #include <linux/delay.h>
> #include <linux/perf_event.h>
> +#include <linux/kprobes.h>
> #include "../mm/slab.h"
>
> static struct kunit_resource resource;
> @@ -292,7 +293,8 @@ static void test_krealloc_redzone_zeroing(struct kunit *test)
> kmem_cache_destroy(s);
> }
>
> -#ifdef CONFIG_PERF_EVENTS
> +#if defined(CONFIG_PERF_EVENTS) || (defined(CONFIG_KPROBES) && defined(CONFIG_SMP))
> +#define SLUB_KUNIT_TEST_KMALLOC_KFREE_NOLOCK
> #define NR_ITERATIONS 1000
> #define NR_OBJECTS 1000
> static void *objects[NR_OBJECTS];
> @@ -302,10 +304,16 @@ struct test_nolock_context {
> int callback_count;
> int alloc_ok;
> int alloc_fail;
> +#ifdef CONFIG_PERF_EVENTS
> struct perf_event *event;
> bool is_perf_type_hw;
> +#endif
> +#ifdef CONFIG_KPROBES
> + struct kprobe kprobe;
> +#endif
> };
>
> +#ifdef CONFIG_PERF_EVENTS
> static struct perf_event_attr hw_attr = {
> .type = PERF_TYPE_HARDWARE,
> .config = PERF_COUNT_HW_CPU_CYCLES,
> @@ -326,13 +334,10 @@ static struct perf_event_attr sw_attr = {
> .sample_freq = 100000,
> };
>
> -static void overflow_handler_test_nolock(struct perf_event *event,
> - struct perf_sample_data *data,
> - struct pt_regs *regs)
> +static void test_nolock(struct test_nolock_context *ctx)
> {
> void *objp;
> gfp_t gfp;
> - struct test_nolock_context *ctx = event->overflow_handler_context;
>
> /* __GFP_ACCOUNT to test kmalloc_nolock() in alloc_slab_obj_exts() */
> gfp = (ctx->callback_count % 2) ? 0 : __GFP_ACCOUNT;
> @@ -347,6 +352,15 @@ static void overflow_handler_test_nolock(struct perf_event *event,
> ctx->callback_count++;
> }
>
> +static void overflow_handler_test_nolock(struct perf_event *event,
> + struct perf_sample_data *data,
> + struct pt_regs *regs)
> +{
> + struct test_nolock_context *ctx = event->overflow_handler_context;
> +
> + test_nolock(ctx);
> +}
> +
> static bool enable_perf_events(struct test_nolock_context *ctx)
> {
> struct perf_event *event;
> @@ -382,17 +396,60 @@ static void disable_perf_events(struct test_nolock_context *ctx)
> perf_event_disable(ctx->event);
> perf_event_release_kernel(ctx->event);
> }
> +#else
> +static bool enable_perf_events(struct test_nolock_context *ctx) { return false; }
> +static void disable_perf_events(struct test_nolock_context *ctx) { }
> +#endif
> +
> +#if defined(CONFIG_KPROBES) && defined(CONFIG_SMP)
> +static int slab_kprobe_pre_handler(struct kprobe *p, struct pt_regs *regs)
> +{
> + struct test_nolock_context *ctx;
> +
> + ctx = container_of(p, struct test_nolock_context, kprobe);
> + test_nolock(ctx);
> + return 0;
> +}
> +
> +static bool register_slab_kprobes(struct test_nolock_context *ctx)
> +{
> + ctx->kprobe.symbol_name = "slab_attach_kprobe_locked";
> + ctx->kprobe.pre_handler = slab_kprobe_pre_handler;
> +
> + if (register_kprobe(&ctx->kprobe))
> + return false;
> + return true;
> +}
> +
> +static void unregister_slab_kprobes(struct test_nolock_context *ctx)
> +{
> + kunit_info(ctx->test, "kprobes: callback_count: %d, alloc_ok: %d, alloc_fail: %d\n",
> + ctx->callback_count, ctx->alloc_ok, ctx->alloc_fail);
> + unregister_kprobe(&ctx->kprobe);
> +}
> +#else
> +static bool register_slab_kprobes(struct test_nolock_context *ctx) { return false; }
> +static void unregister_slab_kprobes(struct test_nolock_context *ctx) { }
> +#endif
>
> static void test_kmalloc_kfree_nolock(struct kunit *test)
> {
> int i, j;
> - struct test_nolock_context ctx = { .test = test };
> + struct test_nolock_context perf_ctx = { .test = test };
> + struct test_nolock_context kprobe_ctx = { .test = test };
> bool alloc_fail = false;
> bool perf_events_enabled;
> + bool slab_kprobes_enabled;
>
> - perf_events_enabled = enable_perf_events(&ctx);
> - if (!perf_events_enabled)
> - kunit_skip(test, "Failed to create perf event");
> + perf_events_enabled = enable_perf_events(&perf_ctx);
> + slab_kprobes_enabled = register_slab_kprobes(&kprobe_ctx);
> +
> + if (!perf_events_enabled && !slab_kprobes_enabled)
> + kunit_skip(test, "Failed to enable perf event and kprobe, skipping");
> + else if (!perf_events_enabled)
> + kunit_info(test, "Failed to create perf event");
> + if (!slab_kprobes_enabled)
> + kunit_info(test, "Failed to register kprobe pre-handler");
>
> for (i = 0; i < NR_ITERATIONS; i++) {
> for (j = 0; j < NR_OBJECTS; j++) {
> @@ -412,7 +469,10 @@ static void test_kmalloc_kfree_nolock(struct kunit *test)
> }
>
> cleanup:
> - disable_perf_events(&ctx);
> + if (perf_events_enabled)
> + disable_perf_events(&perf_ctx);
> + if (slab_kprobes_enabled)
> + unregister_slab_kprobes(&kprobe_ctx);
>
> if (alloc_fail)
> kunit_skip(test, "Allocation failed");
> @@ -444,7 +504,7 @@ static struct kunit_case test_cases[] = {
> KUNIT_CASE(test_kfree_rcu_wq_destroy),
> KUNIT_CASE(test_leak_destroy),
> KUNIT_CASE(test_krealloc_redzone_zeroing),
> -#ifdef CONFIG_PERF_EVENTS
> +#ifdef SLUB_KUNIT_TEST_KMALLOC_KFREE_NOLOCK
> KUNIT_CASE_SLOW(test_kmalloc_kfree_nolock),
> #endif
> {}
> diff --git a/mm/slub.c b/mm/slub.c
> index 813fb863254d..87ca154ccd80 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -908,6 +908,24 @@ static inline unsigned int obj_exts_offset_in_object(struct kmem_cache *s)
> }
> #endif
>
> +/*
> + * A no-op function used to attach kprobe handlers in slub_kunit tests.
> + * The barrier is needed to prevent the compiler from optimizing out callsites.
> + */
> +#if defined(CONFIG_DEBUG_VM) || defined(CONFIG_PROVE_LOCKING)
> +static noinline void slab_attach_kprobe_locked(void)
> +{
> + barrier();
> +}
> +#else
> +static inline void slab_attach_kprobe_locked(void) { }
> +#endif
> +
> +#define slab_lockdep_assert_held(lock) do { \
> + lockdep_assert_held(lock); \
> + slab_attach_kprobe_locked(); \
> +} while (0)
> +
> #ifdef CONFIG_SLUB_DEBUG
>
> /*
> @@ -1665,7 +1683,7 @@ static void add_full(struct kmem_cache *s,
> if (!(s->flags & SLAB_STORE_USER))
> return;
>
> - lockdep_assert_held(&n->list_lock);
> + slab_lockdep_assert_held(&n->list_lock);
> list_add(&slab->slab_list, &n->full);
> }
>
> @@ -1674,7 +1692,7 @@ static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct
> if (!(s->flags & SLAB_STORE_USER))
> return;
>
> - lockdep_assert_held(&n->list_lock);
> + slab_lockdep_assert_held(&n->list_lock);
> list_del(&slab->slab_list);
> }
>
> @@ -2866,7 +2884,7 @@ static unsigned int __sheaf_flush_main_batch(struct kmem_cache *s)
> void *objects[PCS_BATCH_MAX];
> struct slab_sheaf *sheaf;
>
> - lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock));
> + slab_lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock));
>
> pcs = this_cpu_ptr(s->cpu_sheaves);
> sheaf = pcs->main;
> @@ -3545,7 +3563,7 @@ __add_partial(struct kmem_cache_node *n, struct slab *slab, enum add_mode mode)
> static inline void add_partial(struct kmem_cache_node *n,
> struct slab *slab, enum add_mode mode)
> {
> - lockdep_assert_held(&n->list_lock);
> + slab_lockdep_assert_held(&n->list_lock);
> __add_partial(n, slab, mode);
> }
>
> @@ -3559,7 +3577,7 @@ static inline void clear_node_partial_state(struct kmem_cache_node *n,
> static inline void remove_partial(struct kmem_cache_node *n,
> struct slab *slab)
> {
> - lockdep_assert_held(&n->list_lock);
> + slab_lockdep_assert_held(&n->list_lock);
> list_del(&slab->slab_list);
> clear_node_partial_state(n, slab);
> }
> @@ -3575,7 +3593,7 @@ static void *alloc_single_from_partial(struct kmem_cache *s,
> {
> void *object;
>
> - lockdep_assert_held(&n->list_lock);
> + slab_lockdep_assert_held(&n->list_lock);
>
> #ifdef CONFIG_SLUB_DEBUG
> if (s->flags & SLAB_CONSISTENCY_CHECKS) {
> @@ -4646,7 +4664,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs,
> struct node_barn *barn;
> bool allow_spin;
>
> - lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock));
> + slab_lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock));
>
> /* Bootstrap or debug cache, back off */
> if (unlikely(!cache_has_sheaves(s))) {
> @@ -5786,7 +5804,7 @@ static void __pcs_install_empty_sheaf(struct kmem_cache *s,
> struct slub_percpu_sheaves *pcs, struct slab_sheaf *empty,
> struct node_barn *barn)
> {
> - lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock));
> + slab_lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock));
>
> /* This is what we expect to find if nobody interrupted us. */
> if (likely(!pcs->spare)) {
> @@ -5837,7 +5855,7 @@ __pcs_replace_full_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs,
> bool put_fail;
>
> restart:
> - lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock));
> + slab_lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock));
>
> /* Bootstrap or debug cache, back off */
> if (unlikely(!cache_has_sheaves(s))) {
>
next prev parent reply other threads:[~2026-06-16 6:57 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-15 11:05 [PATCH for-next v3 0/9] mm/slab: introduce kfree_rcu_nolock() and improve slub_kunit coverage Harry Yoo (Oracle)
2026-06-15 11:05 ` [PATCH for-next v3 1/9] slub_kunit: fall back to SW perf events when HW PMU is not available Harry Yoo (Oracle)
2026-06-15 11:14 ` sashiko-bot
2026-06-15 12:58 ` Harry Yoo
2026-06-15 20:01 ` Alexei Starovoitov
2026-06-16 5:23 ` Harry Yoo
2026-06-15 11:05 ` [PATCH for-next v3 2/9] mm/slab, slub_kunit: register kprobe to trigger _nolock APIs Harry Yoo (Oracle)
2026-06-15 11:25 ` sashiko-bot
2026-06-15 20:04 ` Alexei Starovoitov
2026-06-16 6:57 ` Vlastimil Babka (SUSE) [this message]
2026-06-15 11:05 ` [PATCH for-next v3 3/9] mm/slab: handle the !allow_spin case in kfree_rcu_sheaf() Harry Yoo (Oracle)
2026-06-15 11:24 ` sashiko-bot
2026-06-16 7:55 ` Vlastimil Babka (SUSE)
2026-06-16 9:20 ` Vlastimil Babka (SUSE)
2026-06-15 11:05 ` [PATCH for-next v3 4/9] mm/slab: use call_rcu() in unknown context if irqs are enabled Harry Yoo (Oracle)
2026-06-15 11:25 ` sashiko-bot
2026-06-16 9:51 ` Vlastimil Babka (SUSE)
2026-06-15 11:05 ` [PATCH for-next v3 5/9] mm/slab: extend deferred free mechanism to handle rcu sheaves Harry Yoo (Oracle)
2026-06-15 11:24 ` sashiko-bot
2026-06-16 13:03 ` Vlastimil Babka (SUSE)
2026-06-15 11:06 ` [PATCH for-next v3 6/9] mm/slab: allow kfree_rcu_sheaf() on PREEMPT_RT Harry Yoo (Oracle)
2026-06-15 11:19 ` sashiko-bot
2026-06-15 11:06 ` [PATCH for-next v3 7/9] mm/slab: introduce kfree_rcu_nolock() Harry Yoo (Oracle)
2026-06-15 11:22 ` sashiko-bot
2026-06-15 11:06 ` [PATCH for-next v3 8/9] mm/slab: introduce struct kfree_rcu_head and use in kfree_rcu_nolock() Harry Yoo (Oracle)
2026-06-15 11:22 ` sashiko-bot
2026-06-15 11:06 ` [PATCH for-next v3 9/9] slub_kunit: extend the test for kfree_rcu_nolock() Harry Yoo (Oracle)
2026-06-15 11:43 ` [PATCH for-next v3 0/9] mm/slab: introduce kfree_rcu_nolock() and improve slub_kunit coverage Harry Yoo
2026-06-15 20:28 ` Alexei Starovoitov
2026-06-16 4:57 ` Harry Yoo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=a64777b0-dfdf-4cc4-bb3a-87f25ca394bf@kernel.org \
--to=vbabka@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=ameryhung@gmail.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bigeasy@linutronix.de \
--cc=boqun@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=cl@gentwo.org \
--cc=clrkwllms@kernel.org \
--cc=frederic@kernel.org \
--cc=hao.li@linux.dev \
--cc=harry@kernel.org \
--cc=jiangshanlai@gmail.com \
--cc=joelagnelf@nvidia.com \
--cc=josh@joshtriplett.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-rt-devel@lists.linux.dev \
--cc=mathieu.desnoyers@efficios.com \
--cc=neeraj.upadhyay@kernel.org \
--cc=paulmck@kernel.org \
--cc=pfalcato@suse.de \
--cc=puranjay@kernel.org \
--cc=qiang.zhang@linux.dev \
--cc=rcu@vger.kernel.org \
--cc=rientjes@google.com \
--cc=roman.gushchin@linux.dev \
--cc=rostedt@goodmis.org \
--cc=surenb@google.com \
--cc=urezki@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.