From: Tejun Heo <tj@kernel.org>
To: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Thomas Graf <tgraf@suug.ch>,
Andrew Morton <akpm@linux-foundation.org>,
linux-crypto@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH v3] rhashtable: Bounce deferred worker kick through irq_work
Date: Mon, 20 Apr 2026 20:03:26 -1000 [thread overview]
Message-ID: <20260421060326.2836354-1-tj@kernel.org> (raw)
In-Reply-To: <4ff731fc-3791-4b96-a997-89c3bcd2d69b@kernel.org>
Inserts past 75% load call schedule_work(&ht->run_work) to kick an
async resize. If a caller holds a raw spinlock (e.g. an
insecure_elasticity user), schedule_work() under that lock records
caller_lock -> pool->lock -> pi_lock -> rq->__lock
A cycle forms if any of these locks is acquired in the reverse
direction elsewhere. sched_ext, the only current insecure_elasticity
user, hits this: it holds scx_sched_lock across rhashtable inserts of
sub-schedulers, while scx_bypass() takes rq->__lock -> scx_sched_lock.
Exercising the resize path produces:
Chain exists of:
&pool->lock --> &rq->__lock --> scx_sched_lock
Bounce the kick from the insert paths through irq_work so
schedule_work() runs from hard IRQ context with the caller's lock no
longer held. rht_deferred_worker()'s self-rearm on error stays on
schedule_work(&ht->run_work) - the worker runs in process context with
no caller lock held, and keeping the self-requeue on @run_work lets
cancel_work_sync() in rhashtable_free_and_destroy() drain it.
v3: Keep rht_deferred_worker()'s self-rearm on schedule_work(&run_work).
Routing it through irq_work in v2 broke cancel_work_sync()'s
self-requeue handling - an irq_work queued after irq_work_sync()
returned but while cancel_work_sync() was still waiting could fire
post-teardown.
v2: Bounce unconditionally instead of gating on insecure_elasticity,
as suggested by Herbert.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
Herbert, dropped your Ack on v2. v2 routed rht_deferred_worker()'s
self-rearm through irq_work, which would race with
cancel_work_sync() in rhashtable_free_and_destroy(). v3 keeps only
the insert-path kicks on irq_work; the worker's self-rearm stays on
schedule_work(&ht->run_work).
include/linux/rhashtable-types.h | 3 +++
include/linux/rhashtable.h | 3 ++-
lib/rhashtable.c | 31 ++++++++++++++++++++++++++++---
3 files changed, 33 insertions(+), 4 deletions(-)
diff --git a/include/linux/rhashtable-types.h b/include/linux/rhashtable-types.h
index 72082428d6c6..fc2f596a6df1 100644
--- a/include/linux/rhashtable-types.h
+++ b/include/linux/rhashtable-types.h
@@ -12,6 +12,7 @@
#include <linux/alloc_tag.h>
#include <linux/atomic.h>
#include <linux/compiler.h>
+#include <linux/irq_work_types.h>
#include <linux/mutex.h>
#include <linux/workqueue_types.h>
@@ -77,6 +78,7 @@ struct rhashtable_params {
* @p: Configuration parameters
* @rhlist: True if this is an rhltable
* @run_work: Deferred worker to expand/shrink asynchronously
+ * @run_irq_work: Bounces the @run_work kick through hard IRQ context.
* @mutex: Mutex to protect current/future table swapping
* @lock: Spin lock to protect walker list
* @nelems: Number of elements in table
@@ -88,6 +90,7 @@ struct rhashtable {
struct rhashtable_params p;
bool rhlist;
struct work_struct run_work;
+ struct irq_work run_irq_work;
struct mutex mutex;
spinlock_t lock;
atomic_t nelems;
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 7def3f0f556b..ef5230cece36 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -20,6 +20,7 @@
#include <linux/err.h>
#include <linux/errno.h>
+#include <linux/irq_work.h>
#include <linux/jhash.h>
#include <linux/list_nulls.h>
#include <linux/workqueue.h>
@@ -847,7 +848,7 @@ static __always_inline void *__rhashtable_insert_fast(
rht_assign_unlock(tbl, bkt, obj, flags);
if (rht_grow_above_75(ht, tbl))
- schedule_work(&ht->run_work);
+ irq_work_queue(&ht->run_irq_work);
data = NULL;
out:
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index fb2b7bc137ba..7a67ef5b67b6 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -441,10 +441,33 @@ static void rht_deferred_worker(struct work_struct *work)
mutex_unlock(&ht->mutex);
+ /*
+ * Re-arm via @run_work, not @run_irq_work.
+ * rhashtable_free_and_destroy() drains async work as irq_work_sync()
+ * followed by cancel_work_sync(). If this site queued irq_work while
+ * cancel_work_sync() was waiting for us, irq_work_sync() would already
+ * have returned and the stale irq_work could fire post-teardown.
+ * cancel_work_sync() natively handles self-requeue on @run_work.
+ */
if (err)
schedule_work(&ht->run_work);
}
+/*
+ * Insert-path callers can run under a raw spinlock (e.g. an insecure_elasticity
+ * user). Calling schedule_work() under that lock records caller_lock ->
+ * pool->lock -> pi_lock -> rq->__lock, closing a locking cycle if any of
+ * these is acquired in the reverse direction elsewhere. Bounce through
+ * irq_work so the schedule_work() runs with the caller's lock no longer held.
+ */
+static void rht_deferred_irq_work(struct irq_work *irq_work)
+{
+ struct rhashtable *ht = container_of(irq_work, struct rhashtable,
+ run_irq_work);
+
+ schedule_work(&ht->run_work);
+}
+
static int rhashtable_insert_rehash(struct rhashtable *ht,
struct bucket_table *tbl)
{
@@ -477,7 +500,7 @@ static int rhashtable_insert_rehash(struct rhashtable *ht,
if (err == -EEXIST)
err = 0;
} else
- schedule_work(&ht->run_work);
+ irq_work_queue(&ht->run_irq_work);
return err;
@@ -488,7 +511,7 @@ static int rhashtable_insert_rehash(struct rhashtable *ht,
/* Schedule async rehash to retry allocation in process context. */
if (err == -ENOMEM)
- schedule_work(&ht->run_work);
+ irq_work_queue(&ht->run_irq_work);
return err;
}
@@ -630,7 +653,7 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
rht_unlock(tbl, bkt, flags);
if (inserted && rht_grow_above_75(ht, tbl))
- schedule_work(&ht->run_work);
+ irq_work_queue(&ht->run_irq_work);
}
} while (!IS_ERR_OR_NULL(new_tbl));
@@ -1085,6 +1108,7 @@ int rhashtable_init_noprof(struct rhashtable *ht,
RCU_INIT_POINTER(ht->tbl, tbl);
INIT_WORK(&ht->run_work, rht_deferred_worker);
+ init_irq_work(&ht->run_irq_work, rht_deferred_irq_work);
return 0;
}
@@ -1150,6 +1174,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
struct bucket_table *tbl, *next_tbl;
unsigned int i;
+ irq_work_sync(&ht->run_irq_work);
cancel_work_sync(&ht->run_work);
mutex_lock(&ht->mutex);
--
2.53.0
next prev parent reply other threads:[~2026-04-21 6:03 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-19 18:19 [RFC PATCH] rhashtable: Bounce deferred worker kick through irq_work when insecure_elasticity is set Tejun Heo
2026-04-20 8:44 ` Herbert Xu
2026-04-20 17:02 ` Tejun Heo
2026-04-20 18:12 ` [PATCH v2] rhashtable: Bounce deferred worker kick through irq_work Tejun Heo
2026-04-21 3:02 ` Herbert Xu
2026-04-21 6:03 ` Tejun Heo [this message]
2026-04-21 6:06 ` [PATCH v3] " Herbert Xu
2026-04-21 6:14 ` Tejun Heo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260421060326.2836354-1-tj@kernel.org \
--to=tj@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=herbert@gondor.apana.org.au \
--cc=linux-crypto@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=tgraf@suug.ch \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox