* [PATCH -next v4 0/3] rcu/nocb: Cleanup patches for next merge window
@ 2026-01-23 15:19 Joel Fernandes
2026-01-23 15:19 ` [PATCH -next v4 1/3] rcu/nocb: Remove unnecessary WakeOvfIsDeferred wake path Joel Fernandes
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Joel Fernandes @ 2026-01-23 15:19 UTC (permalink / raw)
To: linux-kernel
Cc: Paul E . McKenney, Boqun Feng, rcu, Frederic Weisbecker,
Neeraj Upadhyay, Josh Triplett, Uladzislau Rezki, Steven Rostedt,
Mathieu Desnoyers, Lai Jiangshan, Zqiang, Joel Fernandes
Changes from v3 to v4:
- Just updated change log since last version and added tags.
- Patch 2 ("Remove dead callback overload handling") commit message updated to
improve the argument that the code is dead, per Paul's feedback.
These are a few nocb related cleanup patches for the next merge window.
nocb rcutorture scenarios passed.
The git tree with all patches can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/jfern/linux.git (tag: nocb-cleanup-v4-20260123)
Link to v3: https://lore.kernel.org/all/20260119231223.989409-1-joelagnelf@nvidia.com/
Link to v2: https://lore.kernel.org/all/20260114173154.1701056-1-joelagnelf@nvidia.com/
Link to v1: https://lore.kernel.org/all/20260101163417.1065705-1-joelagnelf@nvidia.com/
Joel Fernandes (3):
rcu/nocb: Remove unnecessary WakeOvfIsDeferred wake path
rcu/nocb: Remove dead callback overload handling
rcu/nocb: Extract nocb_defer_wakeup_cancel() helper
kernel/rcu/tree.c | 2 +-
kernel/rcu/tree.h | 3 +-
kernel/rcu/tree_nocb.h | 80 ++++++++++++++----------------------------
3 files changed, 29 insertions(+), 56 deletions(-)
base-commit: d26143bb38e2546fe6f8c9860c13a88146ce5dd6
prerequisite-patch-id: 34e2542a49252718ee5d865e7e3a7229cbb13f25
prerequisite-patch-id: 7bca160efb737a0511ad2104ab1a199753e8cb19
--
2.34.1
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH -next v4 1/3] rcu/nocb: Remove unnecessary WakeOvfIsDeferred wake path
2026-01-23 15:19 [PATCH -next v4 0/3] rcu/nocb: Cleanup patches for next merge window Joel Fernandes
@ 2026-01-23 15:19 ` Joel Fernandes
2026-01-23 15:19 ` [PATCH -next v4 2/3] rcu/nocb: Remove dead callback overload handling Joel Fernandes
2026-01-23 15:19 ` [PATCH -next v4 3/3] rcu/nocb: Extract nocb_defer_wakeup_cancel() helper Joel Fernandes
2 siblings, 0 replies; 4+ messages in thread
From: Joel Fernandes @ 2026-01-23 15:19 UTC (permalink / raw)
To: linux-kernel
Cc: Paul E . McKenney, Boqun Feng, rcu, Frederic Weisbecker,
Neeraj Upadhyay, Josh Triplett, Uladzislau Rezki, Steven Rostedt,
Mathieu Desnoyers, Lai Jiangshan, Zqiang, Joel Fernandes
The WakeOvfIsDeferred code path in __call_rcu_nocb_wake() attempts to
wake rcuog when the callback count exceeds qhimark and callbacks aren't
done with their GP (newly queued or awaiting GP). However, a lot of
testing proves this wake is always redundant or useless.
In the flooding case, rcuog is always waiting for a GP to finish. So
waking up the rcuog thread is pointless. The timer wakeup adds overhead,
rcuog simply wakes up and goes back to sleep achieving nothing.
This path also adds a full memory barrier, and additional timer expiry
modifications unnecessarily.
The root cause is that WakeOvfIsDeferred fires when
!rcu_segcblist_ready_cbs() (GP not complete), but waking rcuog cannot
accelerate GP completion.
This commit therefore removes this path.
Tested with rcutorture scenarios: TREE01, TREE05, TREE08 (all NOCB
configurations) - all pass. Also stress tested using a kernel module
that floods call_rcu() to trigger the overload conditions and made the
observations confirming the findings.
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
kernel/rcu/tree.c | 2 +-
kernel/rcu/tree.h | 3 +--
kernel/rcu/tree_nocb.h | 49 ++++++++++++++----------------------------
3 files changed, 18 insertions(+), 36 deletions(-)
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 293bbd9ac3f4..2921ffb19939 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3769,7 +3769,7 @@ static void rcu_barrier_entrain(struct rcu_data *rdp)
}
rcu_nocb_unlock(rdp);
if (wake_nocb)
- wake_nocb_gp(rdp, false);
+ wake_nocb_gp(rdp);
smp_store_release(&rdp->barrier_seq_snap, gseq);
}
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 2265b9c2906e..7dfc57e9adb1 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -301,7 +301,6 @@ struct rcu_data {
#define RCU_NOCB_WAKE_BYPASS 1
#define RCU_NOCB_WAKE_LAZY 2
#define RCU_NOCB_WAKE 3
-#define RCU_NOCB_WAKE_FORCE 4
#define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500))
/* For jiffies_till_first_fqs and */
@@ -500,7 +499,7 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp);
static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp);
static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);
static void rcu_init_one_nocb(struct rcu_node *rnp);
-static bool wake_nocb_gp(struct rcu_data *rdp, bool force);
+static bool wake_nocb_gp(struct rcu_data *rdp);
static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
unsigned long j, bool lazy);
static void call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *head,
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index e6cd56603cad..f525e4f7985b 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -192,7 +192,7 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
static bool __wake_nocb_gp(struct rcu_data *rdp_gp,
struct rcu_data *rdp,
- bool force, unsigned long flags)
+ unsigned long flags)
__releases(rdp_gp->nocb_gp_lock)
{
bool needwake = false;
@@ -209,7 +209,7 @@ static bool __wake_nocb_gp(struct rcu_data *rdp_gp,
timer_delete(&rdp_gp->nocb_timer);
}
- if (force || READ_ONCE(rdp_gp->nocb_gp_sleep)) {
+ if (READ_ONCE(rdp_gp->nocb_gp_sleep)) {
WRITE_ONCE(rdp_gp->nocb_gp_sleep, false);
needwake = true;
}
@@ -225,13 +225,13 @@ static bool __wake_nocb_gp(struct rcu_data *rdp_gp,
/*
* Kick the GP kthread for this NOCB group.
*/
-static bool wake_nocb_gp(struct rcu_data *rdp, bool force)
+static bool wake_nocb_gp(struct rcu_data *rdp)
{
unsigned long flags;
struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
- return __wake_nocb_gp(rdp_gp, rdp, force, flags);
+ return __wake_nocb_gp(rdp_gp, rdp, flags);
}
#ifdef CONFIG_RCU_LAZY
@@ -518,10 +518,8 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
}
/*
- * Awaken the no-CBs grace-period kthread if needed, either due to it
- * legitimately being asleep or due to overload conditions.
- *
- * If warranted, also wake up the kthread servicing this CPUs queues.
+ * Awaken the no-CBs grace-period kthread if needed due to it legitimately
+ * being asleep.
*/
static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
unsigned long flags)
@@ -533,7 +531,6 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
long lazy_len;
long len;
struct task_struct *t;
- struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
// If we are being polled or there is no kthread, just leave.
t = READ_ONCE(rdp->nocb_gp_kthread);
@@ -549,22 +546,22 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
lazy_len = READ_ONCE(rdp->lazy_len);
if (was_alldone) {
rdp->qlen_last_fqs_check = len;
+ rcu_nocb_unlock(rdp);
// Only lazy CBs in bypass list
if (lazy_len && bypass_len == lazy_len) {
- rcu_nocb_unlock(rdp);
wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY,
TPS("WakeLazy"));
} else if (!irqs_disabled_flags(flags)) {
/* ... if queue was empty ... */
- rcu_nocb_unlock(rdp);
- wake_nocb_gp(rdp, false);
+ wake_nocb_gp(rdp);
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
TPS("WakeEmpty"));
} else {
- rcu_nocb_unlock(rdp);
wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE,
TPS("WakeEmptyIsDeferred"));
}
+
+ return;
} else if (len > rdp->qlen_last_fqs_check + qhimark) {
/* ... or if many callbacks queued. */
rdp->qlen_last_fqs_check = len;
@@ -575,21 +572,10 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
rcu_advance_cbs_nowake(rdp->mynode, rdp);
rdp->nocb_gp_adv_time = j;
}
- smp_mb(); /* Enqueue before timer_pending(). */
- if ((rdp->nocb_cb_sleep ||
- !rcu_segcblist_ready_cbs(&rdp->cblist)) &&
- !timer_pending(&rdp_gp->nocb_timer)) {
- rcu_nocb_unlock(rdp);
- wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_FORCE,
- TPS("WakeOvfIsDeferred"));
- } else {
- rcu_nocb_unlock(rdp);
- trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));
- }
- } else {
- rcu_nocb_unlock(rdp);
- trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));
}
+
+ rcu_nocb_unlock(rdp);
+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));
}
static void call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *head,
@@ -966,7 +952,6 @@ static bool do_nocb_deferred_wakeup_common(struct rcu_data *rdp_gp,
unsigned long flags)
__releases(rdp_gp->nocb_gp_lock)
{
- int ndw;
int ret;
if (!rcu_nocb_need_deferred_wakeup(rdp_gp, level)) {
@@ -974,8 +959,7 @@ static bool do_nocb_deferred_wakeup_common(struct rcu_data *rdp_gp,
return false;
}
- ndw = rdp_gp->nocb_defer_wakeup;
- ret = __wake_nocb_gp(rdp_gp, rdp, ndw == RCU_NOCB_WAKE_FORCE, flags);
+ ret = __wake_nocb_gp(rdp_gp, rdp, flags);
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DeferredWake"));
return ret;
@@ -991,7 +975,6 @@ static void do_nocb_deferred_wakeup_timer(struct timer_list *t)
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Timer"));
raw_spin_lock_irqsave(&rdp->nocb_gp_lock, flags);
- smp_mb__after_spinlock(); /* Timer expire before wakeup. */
do_nocb_deferred_wakeup_common(rdp, rdp, RCU_NOCB_WAKE_BYPASS, flags);
}
@@ -1272,7 +1255,7 @@ lazy_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
}
rcu_nocb_try_flush_bypass(rdp, jiffies);
rcu_nocb_unlock_irqrestore(rdp, flags);
- wake_nocb_gp(rdp, false);
+ wake_nocb_gp(rdp);
sc->nr_to_scan -= _count;
count += _count;
if (sc->nr_to_scan <= 0)
@@ -1657,7 +1640,7 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
{
}
-static bool wake_nocb_gp(struct rcu_data *rdp, bool force)
+static bool wake_nocb_gp(struct rcu_data *rdp)
{
return false;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH -next v4 2/3] rcu/nocb: Remove dead callback overload handling
2026-01-23 15:19 [PATCH -next v4 0/3] rcu/nocb: Cleanup patches for next merge window Joel Fernandes
2026-01-23 15:19 ` [PATCH -next v4 1/3] rcu/nocb: Remove unnecessary WakeOvfIsDeferred wake path Joel Fernandes
@ 2026-01-23 15:19 ` Joel Fernandes
2026-01-23 15:19 ` [PATCH -next v4 3/3] rcu/nocb: Extract nocb_defer_wakeup_cancel() helper Joel Fernandes
2 siblings, 0 replies; 4+ messages in thread
From: Joel Fernandes @ 2026-01-23 15:19 UTC (permalink / raw)
To: linux-kernel
Cc: Paul E . McKenney, Boqun Feng, rcu, Frederic Weisbecker,
Neeraj Upadhyay, Josh Triplett, Uladzislau Rezki, Steven Rostedt,
Mathieu Desnoyers, Lai Jiangshan, Zqiang, Joel Fernandes
During callback overload (exceeding qhimark), the NOCB code attempts
opportunistic advancement via rcu_advance_cbs_nowake(). Analysis shows
this code path is practically unreachable and serves no useful purpose.
Testing with 300,000 callback floods showed:
- 30 overload conditions triggered
- 0 advancements actually occurred
While a theoretical window exists where this code could execute (e.g.,
vCPU preemption between gp_seq update and rcu_nocb_gp_cleanup()), even
if it did, the advancement would be redundant. The rcuog kthread must
still run to wake the rcuoc callback thread - we would just be
duplicating work that rcuog will perform when it finally gets to run.
Since this path provides no meaningful benefit and extensive testing
confirms it is never useful, remove it entirely.
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
kernel/rcu/tree_nocb.h | 12 ------------
1 file changed, 12 deletions(-)
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index f525e4f7985b..64a8ff350f92 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -526,8 +526,6 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
__releases(rdp->nocb_lock)
{
long bypass_len;
- unsigned long cur_gp_seq;
- unsigned long j;
long lazy_len;
long len;
struct task_struct *t;
@@ -562,16 +560,6 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
}
return;
- } else if (len > rdp->qlen_last_fqs_check + qhimark) {
- /* ... or if many callbacks queued. */
- rdp->qlen_last_fqs_check = len;
- j = jiffies;
- if (j != rdp->nocb_gp_adv_time &&
- rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&
- rcu_seq_done(&rdp->mynode->gp_seq, cur_gp_seq)) {
- rcu_advance_cbs_nowake(rdp->mynode, rdp);
- rdp->nocb_gp_adv_time = j;
- }
}
rcu_nocb_unlock(rdp);
--
2.34.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH -next v4 3/3] rcu/nocb: Extract nocb_defer_wakeup_cancel() helper
2026-01-23 15:19 [PATCH -next v4 0/3] rcu/nocb: Cleanup patches for next merge window Joel Fernandes
2026-01-23 15:19 ` [PATCH -next v4 1/3] rcu/nocb: Remove unnecessary WakeOvfIsDeferred wake path Joel Fernandes
2026-01-23 15:19 ` [PATCH -next v4 2/3] rcu/nocb: Remove dead callback overload handling Joel Fernandes
@ 2026-01-23 15:19 ` Joel Fernandes
2 siblings, 0 replies; 4+ messages in thread
From: Joel Fernandes @ 2026-01-23 15:19 UTC (permalink / raw)
To: linux-kernel
Cc: Paul E . McKenney, Boqun Feng, rcu, Frederic Weisbecker,
Neeraj Upadhyay, Josh Triplett, Uladzislau Rezki, Steven Rostedt,
Mathieu Desnoyers, Lai Jiangshan, Zqiang, Joel Fernandes
The pattern of checking nocb_defer_wakeup and deleting the timer is
duplicated in __wake_nocb_gp() and nocb_gp_wait(). Extract this into a
common helper function nocb_defer_wakeup_cancel().
This removes code duplication and makes it easier to maintain.
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
kernel/rcu/tree_nocb.h | 19 +++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index 64a8ff350f92..b3337c7231cc 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -190,6 +190,15 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
init_swait_queue_head(&rnp->nocb_gp_wq[1]);
}
+/* Clear any pending deferred wakeup timer (nocb_gp_lock must be held). */
+static void nocb_defer_wakeup_cancel(struct rcu_data *rdp_gp)
+{
+ if (rdp_gp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) {
+ WRITE_ONCE(rdp_gp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
+ timer_delete(&rdp_gp->nocb_timer);
+ }
+}
+
static bool __wake_nocb_gp(struct rcu_data *rdp_gp,
struct rcu_data *rdp,
unsigned long flags)
@@ -204,10 +213,7 @@ static bool __wake_nocb_gp(struct rcu_data *rdp_gp,
return false;
}
- if (rdp_gp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) {
- WRITE_ONCE(rdp_gp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
- timer_delete(&rdp_gp->nocb_timer);
- }
+ nocb_defer_wakeup_cancel(rdp_gp);
if (READ_ONCE(rdp_gp->nocb_gp_sleep)) {
WRITE_ONCE(rdp_gp->nocb_gp_sleep, false);
@@ -788,10 +794,7 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
if (rdp_toggling)
my_rdp->nocb_toggling_rdp = NULL;
- if (my_rdp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) {
- WRITE_ONCE(my_rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
- timer_delete(&my_rdp->nocb_timer);
- }
+ nocb_defer_wakeup_cancel(my_rdp);
WRITE_ONCE(my_rdp->nocb_gp_sleep, true);
raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags);
} else {
--
2.34.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2026-01-23 15:20 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-01-23 15:19 [PATCH -next v4 0/3] rcu/nocb: Cleanup patches for next merge window Joel Fernandes
2026-01-23 15:19 ` [PATCH -next v4 1/3] rcu/nocb: Remove unnecessary WakeOvfIsDeferred wake path Joel Fernandes
2026-01-23 15:19 ` [PATCH -next v4 2/3] rcu/nocb: Remove dead callback overload handling Joel Fernandes
2026-01-23 15:19 ` [PATCH -next v4 3/3] rcu/nocb: Extract nocb_defer_wakeup_cancel() helper Joel Fernandes
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox