public inbox for linux-mm@kvack.org
 help / color / mirror / Atom feed
* [PATCH] rhashtable: give each instance its own lockdep class
@ 2026-04-27 11:09 Christian Brauner
  2026-04-27 11:29 ` Herbert Xu
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Christian Brauner @ 2026-04-27 11:09 UTC (permalink / raw)
  To: Thomas Graf, Herbert Xu, Andrew Morton, Vlastimil Babka,
	Lorenzo Stoakes, David Hildenbrand, Suren Baghdasaryan,
	Michal Hocko
  Cc: linux-kernel, linux-mm, linux-fsdevel,
	syzbot+5af806780f38a5fe691f, Christian Brauner

syzbot reported a possible circular locking dependency between
&ht->mutex and fs_reclaim:

  CPU0 (kswapd0)                    CPU1 (kworker)
  --------------                    --------------
  fs_reclaim                        ht->mutex
    shmem_evict_inode                 rhashtable_rehash_alloc
      simple_xattrs_free                bucket_table_alloc(GFP_KERNEL)
        rhashtable_free_and_destroy       __kvmalloc_node
          mutex_lock(&ht->mutex)            might_alloc -> fs_reclaim

The two halves of the splat refer to two different events on
&ht->mutex.

The kswapd0 path is unambiguous: shmem_evict_inode at mm/shmem.c:1429
calls simple_xattrs_free(), which calls rhashtable_free_and_destroy()
on the per-inode simple_xattrs rhashtable being torn down with the
inode.

The previously-recorded ht->mutex -> fs_reclaim edge comes from
rht_deferred_worker -> rhashtable_rehash_alloc ->
bucket_table_alloc(GFP_KERNEL) -> __kvmalloc_node ->
might_alloc -> fs_reclaim. That stack stops at generic library code:
there is no subsystem-specific frame above rht_deferred_worker, so
the splat does not identify which rhashtable's worker recorded the
edge -- only that some rhashtable in the system did.

Whether or not that recording happened on the same simple_xattrs ht
that is now being destroyed, the predicted deadlock cannot occur:
rhashtable_free_and_destroy() does cancel_work_sync(&ht->run_work)
before taking ht->mutex, so the deferred worker cannot be running on
the instance being torn down. If the recording was on a different
rhashtable instance, the two ht->mutex acquisitions are on distinct
mutex objects and cannot deadlock either.

Lockdep flags a cycle regardless because mutex_init(&ht->mutex) lives
on a single source line in rhashtable_init_noprof(), so every
ht->mutex in the kernel shares one static lockdep class. Lockdep
matches by class, not by instance, and collapses all of these into
one node.

Lift the lockdep key out of rhashtable_init_noprof() and into the
caller. The user-visible rhashtable_init_noprof() /
rhltable_init_noprof() identifiers become macros that declare a
per-call-site static lock_class_key.

Reported-by: syzbot+5af806780f38a5fe691f@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/69e798fe.050a0220.24bfd3.0032.GAE@google.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/rhashtable-types.h | 22 ++++++++++++++++++----
 lib/rhashtable.c                 | 17 ++++++++++-------
 2 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/include/linux/rhashtable-types.h b/include/linux/rhashtable-types.h
index 015c8298bebc..841021c67d3d 100644
--- a/include/linux/rhashtable-types.h
+++ b/include/linux/rhashtable-types.h
@@ -131,12 +131,26 @@ struct rhashtable_iter {
 	bool end_of_table;
 };
 
-int rhashtable_init_noprof(struct rhashtable *ht,
-		    const struct rhashtable_params *params);
+int __rhashtable_init_noprof(struct rhashtable *ht,
+		    const struct rhashtable_params *params,
+		    struct lock_class_key *key);
+#define rhashtable_init_noprof(ht, params)				\
+({									\
+	static struct lock_class_key __key;				\
+									\
+	__rhashtable_init_noprof(ht, params, &__key);			\
+})
 #define rhashtable_init(...)	alloc_hooks(rhashtable_init_noprof(__VA_ARGS__))
 
-int rhltable_init_noprof(struct rhltable *hlt,
-		  const struct rhashtable_params *params);
+int __rhltable_init_noprof(struct rhltable *hlt,
+		  const struct rhashtable_params *params,
+		  struct lock_class_key *key);
+#define rhltable_init_noprof(hlt, params)				\
+({									\
+	static struct lock_class_key __key;				\
+									\
+	__rhltable_init_noprof(hlt, params, &__key);			\
+})
 #define rhltable_init(...)	alloc_hooks(rhltable_init_noprof(__VA_ARGS__))
 
 #endif /* _LINUX_RHASHTABLE_TYPES_H */
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 6074ed5f66f3..fb13749d824a 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -1025,8 +1025,9 @@ static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
  *	.obj_hashfn = my_hash_fn,
  * };
  */
-int rhashtable_init_noprof(struct rhashtable *ht,
-		    const struct rhashtable_params *params)
+int __rhashtable_init_noprof(struct rhashtable *ht,
+		    const struct rhashtable_params *params,
+		    struct lock_class_key *key)
 {
 	struct bucket_table *tbl;
 	size_t size;
@@ -1036,7 +1037,7 @@ int rhashtable_init_noprof(struct rhashtable *ht,
 		return -EINVAL;
 
 	memset(ht, 0, sizeof(*ht));
-	mutex_init(&ht->mutex);
+	mutex_init_with_key(&ht->mutex, key);
 	spin_lock_init(&ht->lock);
 	memcpy(&ht->p, params, sizeof(*params));
 
@@ -1087,7 +1088,7 @@ int rhashtable_init_noprof(struct rhashtable *ht,
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(rhashtable_init_noprof);
+EXPORT_SYMBOL_GPL(__rhashtable_init_noprof);
 
 /**
  * rhltable_init - initialize a new hash list table
@@ -1098,15 +1099,17 @@ EXPORT_SYMBOL_GPL(rhashtable_init_noprof);
  *
  * See documentation for rhashtable_init.
  */
-int rhltable_init_noprof(struct rhltable *hlt, const struct rhashtable_params *params)
+int __rhltable_init_noprof(struct rhltable *hlt,
+			   const struct rhashtable_params *params,
+			   struct lock_class_key *key)
 {
 	int err;
 
-	err = rhashtable_init_noprof(&hlt->ht, params);
+	err = __rhashtable_init_noprof(&hlt->ht, params, key);
 	hlt->ht.rhlist = true;
 	return err;
 }
-EXPORT_SYMBOL_GPL(rhltable_init_noprof);
+EXPORT_SYMBOL_GPL(__rhltable_init_noprof);
 
 static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj,
 				void (*free_fn)(void *ptr, void *arg),

---
base-commit: 6596a02b207886e9e00bb0161c7fd59fea53c081
change-id: 20260427-work-rhashtable-lockdep-cb0356367073



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] rhashtable: give each instance its own lockdep class
  2026-04-27 11:09 [PATCH] rhashtable: give each instance its own lockdep class Christian Brauner
@ 2026-04-27 11:29 ` Herbert Xu
  2026-04-27 12:21   ` Christian Brauner
  2026-04-27 11:31 ` Andrew Morton
  2026-04-27 13:01 ` Michal Hocko
  2 siblings, 1 reply; 7+ messages in thread
From: Herbert Xu @ 2026-04-27 11:29 UTC (permalink / raw)
  To: Christian Brauner
  Cc: Thomas Graf, Andrew Morton, Vlastimil Babka, Lorenzo Stoakes,
	David Hildenbrand, Suren Baghdasaryan, Michal Hocko, linux-kernel,
	linux-mm, linux-fsdevel, syzbot+5af806780f38a5fe691f,
	Mikhail Gavrilov, Christian Brauner, Jan Kara, Darrick J . Wong,
	linux-crypto

On Mon, Apr 27, 2026 at 01:09:57PM +0200, Christian Brauner wrote:
> syzbot reported a possible circular locking dependency between
> &ht->mutex and fs_reclaim:
> 
>   CPU0 (kswapd0)                    CPU1 (kworker)
>   --------------                    --------------
>   fs_reclaim                        ht->mutex
>     shmem_evict_inode                 rhashtable_rehash_alloc
>       simple_xattrs_free                bucket_table_alloc(GFP_KERNEL)
>         rhashtable_free_and_destroy       __kvmalloc_node
>           mutex_lock(&ht->mutex)            might_alloc -> fs_reclaim
> 
> The two halves of the splat refer to two different events on
> &ht->mutex.
> 
> The kswapd0 path is unambiguous: shmem_evict_inode at mm/shmem.c:1429
> calls simple_xattrs_free(), which calls rhashtable_free_and_destroy()
> on the per-inode simple_xattrs rhashtable being torn down with the
> inode.
> 
> The previously-recorded ht->mutex -> fs_reclaim edge comes from
> rht_deferred_worker -> rhashtable_rehash_alloc ->
> bucket_table_alloc(GFP_KERNEL) -> __kvmalloc_node ->
> might_alloc -> fs_reclaim. That stack stops at generic library code:
> there is no subsystem-specific frame above rht_deferred_worker, so
> the splat does not identify which rhashtable's worker recorded the
> edge -- only that some rhashtable in the system did.
> 
> Whether or not that recording happened on the same simple_xattrs ht
> that is now being destroyed, the predicted deadlock cannot occur:
> rhashtable_free_and_destroy() does cancel_work_sync(&ht->run_work)
> before taking ht->mutex, so the deferred worker cannot be running on
> the instance being torn down. If the recording was on a different
> rhashtable instance, the two ht->mutex acquisitions are on distinct
> mutex objects and cannot deadlock either.
> 
> Lockdep flags a cycle regardless because mutex_init(&ht->mutex) lives
> on a single source line in rhashtable_init_noprof(), so every
> ht->mutex in the kernel shares one static lockdep class. Lockdep
> matches by class, not by instance, and collapses all of these into
> one node.
> 
> Lift the lockdep key out of rhashtable_init_noprof() and into the
> caller. The user-visible rhashtable_init_noprof() /
> rhltable_init_noprof() identifiers become macros that declare a
> per-call-site static lock_class_key.
> 
> Reported-by: syzbot+5af806780f38a5fe691f@syzkaller.appspotmail.com
> Closes: https://lore.kernel.org/69e798fe.050a0220.24bfd3.0032.GAE@google.com
> Signed-off-by: Christian Brauner <brauner@kernel.org>
> ---
>  include/linux/rhashtable-types.h | 22 ++++++++++++++++++----
>  lib/rhashtable.c                 | 17 ++++++++++-------
>  2 files changed, 28 insertions(+), 11 deletions(-)

Thanks for the patch.

But could you please try this patch and see if it also fixes
your problem?

https://patchwork.kernel.org/project/linux-crypto/patch/20260422213349.1345098-2-mikhail.v.gavrilov@gmail.com/

Cheers,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] rhashtable: give each instance its own lockdep class
  2026-04-27 11:09 [PATCH] rhashtable: give each instance its own lockdep class Christian Brauner
  2026-04-27 11:29 ` Herbert Xu
@ 2026-04-27 11:31 ` Andrew Morton
  2026-04-27 11:34   ` Herbert Xu
  2026-04-27 13:01 ` Michal Hocko
  2 siblings, 1 reply; 7+ messages in thread
From: Andrew Morton @ 2026-04-27 11:31 UTC (permalink / raw)
  To: Christian Brauner
  Cc: Thomas Graf, Herbert Xu, Vlastimil Babka, Lorenzo Stoakes,
	David Hildenbrand, Suren Baghdasaryan, Michal Hocko, linux-kernel,
	linux-mm, linux-fsdevel, syzbot+5af806780f38a5fe691f,
	Uladzislau Rezki (Sony)

On Mon, 27 Apr 2026 13:09:57 +0200 Christian Brauner <brauner@kernel.org> wrote:

> syzbot reported a possible circular locking dependency between
> &ht->mutex and fs_reclaim:
> 
>   CPU0 (kswapd0)                    CPU1 (kworker)
>   --------------                    --------------
>   fs_reclaim                        ht->mutex
>     shmem_evict_inode                 rhashtable_rehash_alloc
>       simple_xattrs_free                bucket_table_alloc(GFP_KERNEL)
>         rhashtable_free_and_destroy       __kvmalloc_node
>           mutex_lock(&ht->mutex)            might_alloc -> fs_reclaim
> 
> The two halves of the splat refer to two different events on
> &ht->mutex.
> 
> The kswapd0 path is unambiguous: shmem_evict_inode at mm/shmem.c:1429
> calls simple_xattrs_free(), which calls rhashtable_free_and_destroy()
> on the per-inode simple_xattrs rhashtable being torn down with the
> inode.
> 
> The previously-recorded ht->mutex -> fs_reclaim edge comes from
> rht_deferred_worker -> rhashtable_rehash_alloc ->
> bucket_table_alloc(GFP_KERNEL) -> __kvmalloc_node ->
> might_alloc -> fs_reclaim. That stack stops at generic library code:
> there is no subsystem-specific frame above rht_deferred_worker, so
> the splat does not identify which rhashtable's worker recorded the
> edge -- only that some rhashtable in the system did.
> 
> Whether or not that recording happened on the same simple_xattrs ht
> that is now being destroyed, the predicted deadlock cannot occur:
> rhashtable_free_and_destroy() does cancel_work_sync(&ht->run_work)
> before taking ht->mutex, so the deferred worker cannot be running on
> the instance being torn down. If the recording was on a different
> rhashtable instance, the two ht->mutex acquisitions are on distinct
> mutex objects and cannot deadlock either.
> 
> Lockdep flags a cycle regardless because mutex_init(&ht->mutex) lives
> on a single source line in rhashtable_init_noprof(), so every
> ht->mutex in the kernel shares one static lockdep class. Lockdep
> matches by class, not by instance, and collapses all of these into
> one node.
> 
> Lift the lockdep key out of rhashtable_init_noprof() and into the
> caller. The user-visible rhashtable_init_noprof() /
> rhltable_init_noprof() identifiers become macros that declare a
> per-call-site static lock_class_key.
> 
> Reported-by: syzbot+5af806780f38a5fe691f@syzkaller.appspotmail.com
> Closes: https://lore.kernel.org/69e798fe.050a0220.24bfd3.0032.GAE@google.com

Thanks.

Is this related to https://lore.kernel.org/202604211323.fac1b29e-lkp@intel.com

Are we able to identify a Fixes: target?

In the above-linked thread Herbert had
Fixes: c6307674ed82 ("mm: kvmalloc: add non-blocking support for vmalloc")




^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] rhashtable: give each instance its own lockdep class
  2026-04-27 11:31 ` Andrew Morton
@ 2026-04-27 11:34   ` Herbert Xu
  0 siblings, 0 replies; 7+ messages in thread
From: Herbert Xu @ 2026-04-27 11:34 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Christian Brauner, Thomas Graf, Vlastimil Babka, Lorenzo Stoakes,
	David Hildenbrand, Suren Baghdasaryan, Michal Hocko, linux-kernel,
	linux-mm, linux-fsdevel, syzbot+5af806780f38a5fe691f,
	Uladzislau Rezki (Sony)

On Mon, Apr 27, 2026 at 04:31:49AM -0700, Andrew Morton wrote:
>
> Is this related to https://lore.kernel.org/202604211323.fac1b29e-lkp@intel.com
> 
> Are we able to identify a Fixes: target?
> 
> In the above-linked thread Herbert had
> Fixes: c6307674ed82 ("mm: kvmalloc: add non-blocking support for vmalloc")

I think they're two separate issues.  The addition of non-blocking
support triggered the vfree while atomic issue, while the lockdep
splat is simply due to the use of rhashtable in fs/core.

Cheers,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] rhashtable: give each instance its own lockdep class
  2026-04-27 11:29 ` Herbert Xu
@ 2026-04-27 12:21   ` Christian Brauner
  2026-04-27 12:51     ` Mikhail Gavrilov
  0 siblings, 1 reply; 7+ messages in thread
From: Christian Brauner @ 2026-04-27 12:21 UTC (permalink / raw)
  To: Herbert Xu
  Cc: Thomas Graf, Andrew Morton, Vlastimil Babka, Lorenzo Stoakes,
	David Hildenbrand, Suren Baghdasaryan, Michal Hocko, linux-kernel,
	linux-mm, linux-fsdevel, syzbot+5af806780f38a5fe691f,
	Mikhail Gavrilov, Jan Kara, Darrick J . Wong, linux-crypto

On Mon, Apr 27, 2026 at 07:29:58PM +0800, Herbert Xu wrote:
> On Mon, Apr 27, 2026 at 01:09:57PM +0200, Christian Brauner wrote:
> > syzbot reported a possible circular locking dependency between
> > &ht->mutex and fs_reclaim:
> > 
> >   CPU0 (kswapd0)                    CPU1 (kworker)
> >   --------------                    --------------
> >   fs_reclaim                        ht->mutex
> >     shmem_evict_inode                 rhashtable_rehash_alloc
> >       simple_xattrs_free                bucket_table_alloc(GFP_KERNEL)
> >         rhashtable_free_and_destroy       __kvmalloc_node
> >           mutex_lock(&ht->mutex)            might_alloc -> fs_reclaim
> > 
> > The two halves of the splat refer to two different events on
> > &ht->mutex.
> > 
> > The kswapd0 path is unambiguous: shmem_evict_inode at mm/shmem.c:1429
> > calls simple_xattrs_free(), which calls rhashtable_free_and_destroy()
> > on the per-inode simple_xattrs rhashtable being torn down with the
> > inode.
> > 
> > The previously-recorded ht->mutex -> fs_reclaim edge comes from
> > rht_deferred_worker -> rhashtable_rehash_alloc ->
> > bucket_table_alloc(GFP_KERNEL) -> __kvmalloc_node ->
> > might_alloc -> fs_reclaim. That stack stops at generic library code:
> > there is no subsystem-specific frame above rht_deferred_worker, so
> > the splat does not identify which rhashtable's worker recorded the
> > edge -- only that some rhashtable in the system did.
> > 
> > Whether or not that recording happened on the same simple_xattrs ht
> > that is now being destroyed, the predicted deadlock cannot occur:
> > rhashtable_free_and_destroy() does cancel_work_sync(&ht->run_work)
> > before taking ht->mutex, so the deferred worker cannot be running on
> > the instance being torn down. If the recording was on a different
> > rhashtable instance, the two ht->mutex acquisitions are on distinct
> > mutex objects and cannot deadlock either.
> > 
> > Lockdep flags a cycle regardless because mutex_init(&ht->mutex) lives
> > on a single source line in rhashtable_init_noprof(), so every
> > ht->mutex in the kernel shares one static lockdep class. Lockdep
> > matches by class, not by instance, and collapses all of these into
> > one node.
> > 
> > Lift the lockdep key out of rhashtable_init_noprof() and into the
> > caller. The user-visible rhashtable_init_noprof() /
> > rhltable_init_noprof() identifiers become macros that declare a
> > per-call-site static lock_class_key.
> > 
> > Reported-by: syzbot+5af806780f38a5fe691f@syzkaller.appspotmail.com
> > Closes: https://lore.kernel.org/69e798fe.050a0220.24bfd3.0032.GAE@google.com
> > Signed-off-by: Christian Brauner <brauner@kernel.org>
> > ---
> >  include/linux/rhashtable-types.h | 22 ++++++++++++++++++----
> >  lib/rhashtable.c                 | 17 ++++++++++-------
> >  2 files changed, 28 insertions(+), 11 deletions(-)
> 
> Thanks for the patch.
> 
> But could you please try this patch and see if it also fixes
> your problem?
> 
> https://patchwork.kernel.org/project/linux-crypto/patch/20260422213349.1345098-2-mikhail.v.gavrilov@gmail.com/

Possibly, I don't have a way to easily reproduce this though.
Imho, the right thing would be to have both: actual useful keyed lockdep
annotation and - if safe - dropping the mutex.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] rhashtable: give each instance its own lockdep class
  2026-04-27 12:21   ` Christian Brauner
@ 2026-04-27 12:51     ` Mikhail Gavrilov
  0 siblings, 0 replies; 7+ messages in thread
From: Mikhail Gavrilov @ 2026-04-27 12:51 UTC (permalink / raw)
  To: Christian Brauner
  Cc: Herbert Xu, Thomas Graf, Andrew Morton, Vlastimil Babka,
	Lorenzo Stoakes, David Hildenbrand, Suren Baghdasaryan,
	Michal Hocko, linux-kernel, linux-mm, linux-fsdevel,
	syzbot+5af806780f38a5fe691f, Jan Kara, Darrick J . Wong,
	linux-crypto

On Mon, Apr 27, 2026 at 5:21 PM Christian Brauner <brauner@kernel.org> wrote:
> >
> > But could you please try this patch and see if it also fixes
> > your problem?
> >
> > https://patchwork.kernel.org/project/linux-crypto/patch/20260422213349.1345098-2-mikhail.v.gavrilov@gmail.com/
>
> Possibly, I don't have a way to easily reproduce this though.
> Imho, the right thing would be to have both: actual useful keyed lockdep
> annotation and - if safe - dropping the mutex.

Agreed -- the two changes are orthogonal: yours fixes the lockdep
class collapse, mine removes a mutex acquisition that has been
unnecessary since cancel_work_sync() was added in front of it.

On the safety of dropping the mutex: rhashtable_free_and_destroy()'s
documented contract already requires the caller to ensure no
concurrent operations, and cancel_work_sync(&ht->run_work) at the
top of the function quiesces the only in-library writer (the rehash
worker). After that, the tables are owned exclusively by this
function. The walks I switch from rht_dereference() to
rcu_dereference_raw() were already correct; the lockdep annotation
was just asking for a lock that no longer needs to be held.

I checked all in-tree callers of rhashtable_free_and_destroy() (and
of rhashtable_destroy(), which inlines the same teardown) for
correctness against the contract; none rely on the mutex for
serialization with anything other than the rehash worker.

I have no objection to either patch order. Happy to rebase mine on
top of yours, or for it to go in independently via Herbert's tree
once yours is merged.

-- 
Thanks,
Mikhail


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] rhashtable: give each instance its own lockdep class
  2026-04-27 11:09 [PATCH] rhashtable: give each instance its own lockdep class Christian Brauner
  2026-04-27 11:29 ` Herbert Xu
  2026-04-27 11:31 ` Andrew Morton
@ 2026-04-27 13:01 ` Michal Hocko
  2 siblings, 0 replies; 7+ messages in thread
From: Michal Hocko @ 2026-04-27 13:01 UTC (permalink / raw)
  To: Christian Brauner
  Cc: Thomas Graf, Herbert Xu, Andrew Morton, Vlastimil Babka,
	Lorenzo Stoakes, David Hildenbrand, Suren Baghdasaryan,
	linux-kernel, linux-mm, linux-fsdevel,
	syzbot+5af806780f38a5fe691f

On Mon 27-04-26 13:09:57, Christian Brauner wrote:
> syzbot reported a possible circular locking dependency between
> &ht->mutex and fs_reclaim:
> 
>   CPU0 (kswapd0)                    CPU1 (kworker)
>   --------------                    --------------
>   fs_reclaim                        ht->mutex
>     shmem_evict_inode                 rhashtable_rehash_alloc
>       simple_xattrs_free                bucket_table_alloc(GFP_KERNEL)
>         rhashtable_free_and_destroy       __kvmalloc_node
>           mutex_lock(&ht->mutex)            might_alloc -> fs_reclaim
> 
> The two halves of the splat refer to two different events on
> &ht->mutex.
> 
> The kswapd0 path is unambiguous: shmem_evict_inode at mm/shmem.c:1429
> calls simple_xattrs_free(), which calls rhashtable_free_and_destroy()
> on the per-inode simple_xattrs rhashtable being torn down with the
> inode.
> 
> The previously-recorded ht->mutex -> fs_reclaim edge comes from
> rht_deferred_worker -> rhashtable_rehash_alloc ->
> bucket_table_alloc(GFP_KERNEL) -> __kvmalloc_node ->
> might_alloc -> fs_reclaim. That stack stops at generic library code:
> there is no subsystem-specific frame above rht_deferred_worker, so
> the splat does not identify which rhashtable's worker recorded the
> edge -- only that some rhashtable in the system did.
> 
> Whether or not that recording happened on the same simple_xattrs ht
> that is now being destroyed, the predicted deadlock cannot occur:
> rhashtable_free_and_destroy() does cancel_work_sync(&ht->run_work)
> before taking ht->mutex, so the deferred worker cannot be running on
> the instance being torn down. If the recording was on a different
> rhashtable instance, the two ht->mutex acquisitions are on distinct
> mutex objects and cannot deadlock either.
> 
> Lockdep flags a cycle regardless because mutex_init(&ht->mutex) lives
> on a single source line in rhashtable_init_noprof(), so every
> ht->mutex in the kernel shares one static lockdep class. Lockdep
> matches by class, not by instance, and collapses all of these into
> one node.
> 
> Lift the lockdep key out of rhashtable_init_noprof() and into the
> caller. The user-visible rhashtable_init_noprof() /
> rhltable_init_noprof() identifiers become macros that declare a
> per-call-site static lock_class_key.
> 
> Reported-by: syzbot+5af806780f38a5fe691f@syzkaller.appspotmail.com
> Closes: https://lore.kernel.org/69e798fe.050a0220.24bfd3.0032.GAE@google.com
> Signed-off-by: Christian Brauner <brauner@kernel.org>

Acked-by: Michal Hocko <mhocko@suse.com>
Thanks!

> ---
>  include/linux/rhashtable-types.h | 22 ++++++++++++++++++----
>  lib/rhashtable.c                 | 17 ++++++++++-------
>  2 files changed, 28 insertions(+), 11 deletions(-)
> 
> diff --git a/include/linux/rhashtable-types.h b/include/linux/rhashtable-types.h
> index 015c8298bebc..841021c67d3d 100644
> --- a/include/linux/rhashtable-types.h
> +++ b/include/linux/rhashtable-types.h
> @@ -131,12 +131,26 @@ struct rhashtable_iter {
>  	bool end_of_table;
>  };
>  
> -int rhashtable_init_noprof(struct rhashtable *ht,
> -		    const struct rhashtable_params *params);
> +int __rhashtable_init_noprof(struct rhashtable *ht,
> +		    const struct rhashtable_params *params,
> +		    struct lock_class_key *key);
> +#define rhashtable_init_noprof(ht, params)				\
> +({									\
> +	static struct lock_class_key __key;				\
> +									\
> +	__rhashtable_init_noprof(ht, params, &__key);			\
> +})
>  #define rhashtable_init(...)	alloc_hooks(rhashtable_init_noprof(__VA_ARGS__))
>  
> -int rhltable_init_noprof(struct rhltable *hlt,
> -		  const struct rhashtable_params *params);
> +int __rhltable_init_noprof(struct rhltable *hlt,
> +		  const struct rhashtable_params *params,
> +		  struct lock_class_key *key);
> +#define rhltable_init_noprof(hlt, params)				\
> +({									\
> +	static struct lock_class_key __key;				\
> +									\
> +	__rhltable_init_noprof(hlt, params, &__key);			\
> +})
>  #define rhltable_init(...)	alloc_hooks(rhltable_init_noprof(__VA_ARGS__))
>  
>  #endif /* _LINUX_RHASHTABLE_TYPES_H */
> diff --git a/lib/rhashtable.c b/lib/rhashtable.c
> index 6074ed5f66f3..fb13749d824a 100644
> --- a/lib/rhashtable.c
> +++ b/lib/rhashtable.c
> @@ -1025,8 +1025,9 @@ static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
>   *	.obj_hashfn = my_hash_fn,
>   * };
>   */
> -int rhashtable_init_noprof(struct rhashtable *ht,
> -		    const struct rhashtable_params *params)
> +int __rhashtable_init_noprof(struct rhashtable *ht,
> +		    const struct rhashtable_params *params,
> +		    struct lock_class_key *key)
>  {
>  	struct bucket_table *tbl;
>  	size_t size;
> @@ -1036,7 +1037,7 @@ int rhashtable_init_noprof(struct rhashtable *ht,
>  		return -EINVAL;
>  
>  	memset(ht, 0, sizeof(*ht));
> -	mutex_init(&ht->mutex);
> +	mutex_init_with_key(&ht->mutex, key);
>  	spin_lock_init(&ht->lock);
>  	memcpy(&ht->p, params, sizeof(*params));
>  
> @@ -1087,7 +1088,7 @@ int rhashtable_init_noprof(struct rhashtable *ht,
>  
>  	return 0;
>  }
> -EXPORT_SYMBOL_GPL(rhashtable_init_noprof);
> +EXPORT_SYMBOL_GPL(__rhashtable_init_noprof);
>  
>  /**
>   * rhltable_init - initialize a new hash list table
> @@ -1098,15 +1099,17 @@ EXPORT_SYMBOL_GPL(rhashtable_init_noprof);
>   *
>   * See documentation for rhashtable_init.
>   */
> -int rhltable_init_noprof(struct rhltable *hlt, const struct rhashtable_params *params)
> +int __rhltable_init_noprof(struct rhltable *hlt,
> +			   const struct rhashtable_params *params,
> +			   struct lock_class_key *key)
>  {
>  	int err;
>  
> -	err = rhashtable_init_noprof(&hlt->ht, params);
> +	err = __rhashtable_init_noprof(&hlt->ht, params, key);
>  	hlt->ht.rhlist = true;
>  	return err;
>  }
> -EXPORT_SYMBOL_GPL(rhltable_init_noprof);
> +EXPORT_SYMBOL_GPL(__rhltable_init_noprof);
>  
>  static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj,
>  				void (*free_fn)(void *ptr, void *arg),
> 
> ---
> base-commit: 6596a02b207886e9e00bb0161c7fd59fea53c081
> change-id: 20260427-work-rhashtable-lockdep-cb0356367073

-- 
Michal Hocko
SUSE Labs


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2026-04-27 13:01 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-27 11:09 [PATCH] rhashtable: give each instance its own lockdep class Christian Brauner
2026-04-27 11:29 ` Herbert Xu
2026-04-27 12:21   ` Christian Brauner
2026-04-27 12:51     ` Mikhail Gavrilov
2026-04-27 11:31 ` Andrew Morton
2026-04-27 11:34   ` Herbert Xu
2026-04-27 13:01 ` Michal Hocko

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox