* [PATCH -next v3 0/3] rcu/nocb: Cleanup patches for next merge window
@ 2026-01-19 23:12 Joel Fernandes
2026-01-19 23:12 ` [PATCH -next v3 1/3] rcu/nocb: Remove unnecessary WakeOvfIsDeferred wake path Joel Fernandes
` (3 more replies)
0 siblings, 4 replies; 18+ messages in thread
From: Joel Fernandes @ 2026-01-19 23:12 UTC (permalink / raw)
To: linux-kernel
Cc: Paul E . McKenney, Boqun Feng, rcu, Frederic Weisbecker,
Neeraj Upadhyay, Josh Triplett, Uladzislau Rezki, Steven Rostedt,
Mathieu Desnoyers, Lai Jiangshan, Zqiang, Joel Fernandes
These are a few nocb related cleanup patches for the next merge window.
Also Frederic please object to the second patch ("Remove dead callback overload
handling") if you would like, but I think based on our discussion I have
implemented your suggestion, so it should be good.
Changes from v2:
- Dropped patch 2 "Add warning if no rcuog wake up attempt happened during
overload" as it was not needed since we removed the dead CB overload handling.
- Replaced "Add warning to detect if overload advancement is ever useful" with
"Remove dead callback overload handling" per Frederic's feedback - instead of
adding a warning, just remove the dead code path entirely.
nocb rcutorture scenarios passed overnight testing on my system.
Link to v2: https://lore.kernel.org/all/20260114173154.1701056-1-joelagnelf@nvidia.com/
Link to v1: https://lore.kernel.org/all/20260101163417.1065705-1-joelagnelf@nvidia.com/
Joel Fernandes (3):
rcu/nocb: Remove unnecessary WakeOvfIsDeferred wake path
rcu/nocb: Remove dead callback overload handling
rcu/nocb: Extract nocb_defer_wakeup_cancel() helper
kernel/rcu/tree.c | 2 +-
kernel/rcu/tree.h | 3 +-
kernel/rcu/tree_nocb.h | 80 ++++++++++++++----------------------------
3 files changed, 29 insertions(+), 56 deletions(-)
--
2.34.1
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH -next v3 1/3] rcu/nocb: Remove unnecessary WakeOvfIsDeferred wake path
2026-01-19 23:12 [PATCH -next v3 0/3] rcu/nocb: Cleanup patches for next merge window Joel Fernandes
@ 2026-01-19 23:12 ` Joel Fernandes
2026-01-22 21:56 ` Paul E. McKenney
2026-01-19 23:12 ` [PATCH -next v3 2/3] rcu/nocb: Remove dead callback overload handling Joel Fernandes
` (2 subsequent siblings)
3 siblings, 1 reply; 18+ messages in thread
From: Joel Fernandes @ 2026-01-19 23:12 UTC (permalink / raw)
To: linux-kernel
Cc: Paul E . McKenney, Boqun Feng, rcu, Frederic Weisbecker,
Neeraj Upadhyay, Josh Triplett, Uladzislau Rezki, Steven Rostedt,
Mathieu Desnoyers, Lai Jiangshan, Zqiang, Joel Fernandes
The WakeOvfIsDeferred code path in __call_rcu_nocb_wake() attempts to
wake rcuog when the callback count exceeds qhimark and callbacks aren't
done with their GP (newly queued or awaiting GP). However, a lot of
testing proves this wake is always redundant or useless.
In the flooding case, rcuog is always waiting for a GP to finish. So
waking up the rcuog thread is pointless. The timer wakeup adds overhead,
rcuog simply wakes up and goes back to sleep achieving nothing.
This path also adds a full memory barrier, and additional timer expiry
modifications unnecessarily.
The root cause is that WakeOvfIsDeferred fires when
!rcu_segcblist_ready_cbs() (GP not complete), but waking rcuog cannot
accelerate GP completion.
This commit therefore removes this path.
Tested with rcutorture scenarios: TREE01, TREE05, TREE08 (all NOCB
configurations) - all pass. Also stress tested using a kernel module
that floods call_rcu() to trigger the overload conditions and made the
observations confirming the findings.
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
kernel/rcu/tree.c | 2 +-
kernel/rcu/tree.h | 3 +--
kernel/rcu/tree_nocb.h | 49 ++++++++++++++----------------------------
3 files changed, 18 insertions(+), 36 deletions(-)
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 293bbd9ac3f4..2921ffb19939 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3769,7 +3769,7 @@ static void rcu_barrier_entrain(struct rcu_data *rdp)
}
rcu_nocb_unlock(rdp);
if (wake_nocb)
- wake_nocb_gp(rdp, false);
+ wake_nocb_gp(rdp);
smp_store_release(&rdp->barrier_seq_snap, gseq);
}
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 2265b9c2906e..7dfc57e9adb1 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -301,7 +301,6 @@ struct rcu_data {
#define RCU_NOCB_WAKE_BYPASS 1
#define RCU_NOCB_WAKE_LAZY 2
#define RCU_NOCB_WAKE 3
-#define RCU_NOCB_WAKE_FORCE 4
#define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500))
/* For jiffies_till_first_fqs and */
@@ -500,7 +499,7 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp);
static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp);
static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);
static void rcu_init_one_nocb(struct rcu_node *rnp);
-static bool wake_nocb_gp(struct rcu_data *rdp, bool force);
+static bool wake_nocb_gp(struct rcu_data *rdp);
static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
unsigned long j, bool lazy);
static void call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *head,
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index e6cd56603cad..f525e4f7985b 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -192,7 +192,7 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
static bool __wake_nocb_gp(struct rcu_data *rdp_gp,
struct rcu_data *rdp,
- bool force, unsigned long flags)
+ unsigned long flags)
__releases(rdp_gp->nocb_gp_lock)
{
bool needwake = false;
@@ -209,7 +209,7 @@ static bool __wake_nocb_gp(struct rcu_data *rdp_gp,
timer_delete(&rdp_gp->nocb_timer);
}
- if (force || READ_ONCE(rdp_gp->nocb_gp_sleep)) {
+ if (READ_ONCE(rdp_gp->nocb_gp_sleep)) {
WRITE_ONCE(rdp_gp->nocb_gp_sleep, false);
needwake = true;
}
@@ -225,13 +225,13 @@ static bool __wake_nocb_gp(struct rcu_data *rdp_gp,
/*
* Kick the GP kthread for this NOCB group.
*/
-static bool wake_nocb_gp(struct rcu_data *rdp, bool force)
+static bool wake_nocb_gp(struct rcu_data *rdp)
{
unsigned long flags;
struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
- return __wake_nocb_gp(rdp_gp, rdp, force, flags);
+ return __wake_nocb_gp(rdp_gp, rdp, flags);
}
#ifdef CONFIG_RCU_LAZY
@@ -518,10 +518,8 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
}
/*
- * Awaken the no-CBs grace-period kthread if needed, either due to it
- * legitimately being asleep or due to overload conditions.
- *
- * If warranted, also wake up the kthread servicing this CPUs queues.
+ * Awaken the no-CBs grace-period kthread if needed due to it legitimately
+ * being asleep.
*/
static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
unsigned long flags)
@@ -533,7 +531,6 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
long lazy_len;
long len;
struct task_struct *t;
- struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
// If we are being polled or there is no kthread, just leave.
t = READ_ONCE(rdp->nocb_gp_kthread);
@@ -549,22 +546,22 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
lazy_len = READ_ONCE(rdp->lazy_len);
if (was_alldone) {
rdp->qlen_last_fqs_check = len;
+ rcu_nocb_unlock(rdp);
// Only lazy CBs in bypass list
if (lazy_len && bypass_len == lazy_len) {
- rcu_nocb_unlock(rdp);
wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY,
TPS("WakeLazy"));
} else if (!irqs_disabled_flags(flags)) {
/* ... if queue was empty ... */
- rcu_nocb_unlock(rdp);
- wake_nocb_gp(rdp, false);
+ wake_nocb_gp(rdp);
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
TPS("WakeEmpty"));
} else {
- rcu_nocb_unlock(rdp);
wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE,
TPS("WakeEmptyIsDeferred"));
}
+
+ return;
} else if (len > rdp->qlen_last_fqs_check + qhimark) {
/* ... or if many callbacks queued. */
rdp->qlen_last_fqs_check = len;
@@ -575,21 +572,10 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
rcu_advance_cbs_nowake(rdp->mynode, rdp);
rdp->nocb_gp_adv_time = j;
}
- smp_mb(); /* Enqueue before timer_pending(). */
- if ((rdp->nocb_cb_sleep ||
- !rcu_segcblist_ready_cbs(&rdp->cblist)) &&
- !timer_pending(&rdp_gp->nocb_timer)) {
- rcu_nocb_unlock(rdp);
- wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_FORCE,
- TPS("WakeOvfIsDeferred"));
- } else {
- rcu_nocb_unlock(rdp);
- trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));
- }
- } else {
- rcu_nocb_unlock(rdp);
- trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));
}
+
+ rcu_nocb_unlock(rdp);
+ trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));
}
static void call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *head,
@@ -966,7 +952,6 @@ static bool do_nocb_deferred_wakeup_common(struct rcu_data *rdp_gp,
unsigned long flags)
__releases(rdp_gp->nocb_gp_lock)
{
- int ndw;
int ret;
if (!rcu_nocb_need_deferred_wakeup(rdp_gp, level)) {
@@ -974,8 +959,7 @@ static bool do_nocb_deferred_wakeup_common(struct rcu_data *rdp_gp,
return false;
}
- ndw = rdp_gp->nocb_defer_wakeup;
- ret = __wake_nocb_gp(rdp_gp, rdp, ndw == RCU_NOCB_WAKE_FORCE, flags);
+ ret = __wake_nocb_gp(rdp_gp, rdp, flags);
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DeferredWake"));
return ret;
@@ -991,7 +975,6 @@ static void do_nocb_deferred_wakeup_timer(struct timer_list *t)
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Timer"));
raw_spin_lock_irqsave(&rdp->nocb_gp_lock, flags);
- smp_mb__after_spinlock(); /* Timer expire before wakeup. */
do_nocb_deferred_wakeup_common(rdp, rdp, RCU_NOCB_WAKE_BYPASS, flags);
}
@@ -1272,7 +1255,7 @@ lazy_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
}
rcu_nocb_try_flush_bypass(rdp, jiffies);
rcu_nocb_unlock_irqrestore(rdp, flags);
- wake_nocb_gp(rdp, false);
+ wake_nocb_gp(rdp);
sc->nr_to_scan -= _count;
count += _count;
if (sc->nr_to_scan <= 0)
@@ -1657,7 +1640,7 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
{
}
-static bool wake_nocb_gp(struct rcu_data *rdp, bool force)
+static bool wake_nocb_gp(struct rcu_data *rdp)
{
return false;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH -next v3 2/3] rcu/nocb: Remove dead callback overload handling
2026-01-19 23:12 [PATCH -next v3 0/3] rcu/nocb: Cleanup patches for next merge window Joel Fernandes
2026-01-19 23:12 ` [PATCH -next v3 1/3] rcu/nocb: Remove unnecessary WakeOvfIsDeferred wake path Joel Fernandes
@ 2026-01-19 23:12 ` Joel Fernandes
2026-01-19 23:53 ` Frederic Weisbecker
` (2 more replies)
2026-01-19 23:12 ` [PATCH -next v3 3/3] rcu/nocb: Extract nocb_defer_wakeup_cancel() helper Joel Fernandes
2026-01-21 19:09 ` [PATCH -next v3 0/3] rcu/nocb: Cleanup patches for next merge window Paul E. McKenney
3 siblings, 3 replies; 18+ messages in thread
From: Joel Fernandes @ 2026-01-19 23:12 UTC (permalink / raw)
To: linux-kernel
Cc: Paul E . McKenney, Boqun Feng, rcu, Frederic Weisbecker,
Neeraj Upadhyay, Josh Triplett, Uladzislau Rezki, Steven Rostedt,
Mathieu Desnoyers, Lai Jiangshan, Zqiang, Joel Fernandes
During callback overload (exceeding qhimark), the NOCB code attempts
opportunistic advancement via rcu_advance_cbs_nowake(). Analysis shows
this entire code path is dead:
- 30 overload conditions triggered with 300,000 callback flood
- 0 advancements actually occurred
- 100% of time blocked because current GP not done
The overload condition triggers when callbacks are coming in at a high
rate with GPs not completing as fast. But the advancement requires the
GP to be complete - a logical contradiction. Even if the GP did complete
in time, nocb_gp_wait() has to wake up anyway to do the advancement, so
it is pointless.
Since the advancement is dead code, the entire overload handling block
serves no purpose. Remove it entirely.
Suggested-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
kernel/rcu/tree_nocb.h | 12 ------------
1 file changed, 12 deletions(-)
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index f525e4f7985b..64a8ff350f92 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -526,8 +526,6 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
__releases(rdp->nocb_lock)
{
long bypass_len;
- unsigned long cur_gp_seq;
- unsigned long j;
long lazy_len;
long len;
struct task_struct *t;
@@ -562,16 +560,6 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
}
return;
- } else if (len > rdp->qlen_last_fqs_check + qhimark) {
- /* ... or if many callbacks queued. */
- rdp->qlen_last_fqs_check = len;
- j = jiffies;
- if (j != rdp->nocb_gp_adv_time &&
- rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&
- rcu_seq_done(&rdp->mynode->gp_seq, cur_gp_seq)) {
- rcu_advance_cbs_nowake(rdp->mynode, rdp);
- rdp->nocb_gp_adv_time = j;
- }
}
rcu_nocb_unlock(rdp);
--
2.34.1
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH -next v3 3/3] rcu/nocb: Extract nocb_defer_wakeup_cancel() helper
2026-01-19 23:12 [PATCH -next v3 0/3] rcu/nocb: Cleanup patches for next merge window Joel Fernandes
2026-01-19 23:12 ` [PATCH -next v3 1/3] rcu/nocb: Remove unnecessary WakeOvfIsDeferred wake path Joel Fernandes
2026-01-19 23:12 ` [PATCH -next v3 2/3] rcu/nocb: Remove dead callback overload handling Joel Fernandes
@ 2026-01-19 23:12 ` Joel Fernandes
2026-01-22 21:59 ` Paul E. McKenney
2026-01-21 19:09 ` [PATCH -next v3 0/3] rcu/nocb: Cleanup patches for next merge window Paul E. McKenney
3 siblings, 1 reply; 18+ messages in thread
From: Joel Fernandes @ 2026-01-19 23:12 UTC (permalink / raw)
To: linux-kernel
Cc: Paul E . McKenney, Boqun Feng, rcu, Frederic Weisbecker,
Neeraj Upadhyay, Josh Triplett, Uladzislau Rezki, Steven Rostedt,
Mathieu Desnoyers, Lai Jiangshan, Zqiang, Joel Fernandes
The pattern of checking nocb_defer_wakeup and deleting the timer is
duplicated in __wake_nocb_gp() and nocb_gp_wait(). Extract this into a
common helper function nocb_defer_wakeup_cancel().
This removes code duplication and makes it easier to maintain.
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
---
kernel/rcu/tree_nocb.h | 19 +++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index 64a8ff350f92..b3337c7231cc 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -190,6 +190,15 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
init_swait_queue_head(&rnp->nocb_gp_wq[1]);
}
+/* Clear any pending deferred wakeup timer (nocb_gp_lock must be held). */
+static void nocb_defer_wakeup_cancel(struct rcu_data *rdp_gp)
+{
+ if (rdp_gp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) {
+ WRITE_ONCE(rdp_gp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
+ timer_delete(&rdp_gp->nocb_timer);
+ }
+}
+
static bool __wake_nocb_gp(struct rcu_data *rdp_gp,
struct rcu_data *rdp,
unsigned long flags)
@@ -204,10 +213,7 @@ static bool __wake_nocb_gp(struct rcu_data *rdp_gp,
return false;
}
- if (rdp_gp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) {
- WRITE_ONCE(rdp_gp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
- timer_delete(&rdp_gp->nocb_timer);
- }
+ nocb_defer_wakeup_cancel(rdp_gp);
if (READ_ONCE(rdp_gp->nocb_gp_sleep)) {
WRITE_ONCE(rdp_gp->nocb_gp_sleep, false);
@@ -788,10 +794,7 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
if (rdp_toggling)
my_rdp->nocb_toggling_rdp = NULL;
- if (my_rdp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) {
- WRITE_ONCE(my_rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
- timer_delete(&my_rdp->nocb_timer);
- }
+ nocb_defer_wakeup_cancel(my_rdp);
WRITE_ONCE(my_rdp->nocb_gp_sleep, true);
raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags);
} else {
--
2.34.1
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH -next v3 2/3] rcu/nocb: Remove dead callback overload handling
2026-01-19 23:12 ` [PATCH -next v3 2/3] rcu/nocb: Remove dead callback overload handling Joel Fernandes
@ 2026-01-19 23:53 ` Frederic Weisbecker
2026-01-20 0:07 ` Paul E. McKenney
2026-01-22 21:55 ` Paul E. McKenney
2026-01-23 5:41 ` Paul E. McKenney
2 siblings, 1 reply; 18+ messages in thread
From: Frederic Weisbecker @ 2026-01-19 23:53 UTC (permalink / raw)
To: Joel Fernandes
Cc: linux-kernel, Paul E . McKenney, Boqun Feng, rcu, Neeraj Upadhyay,
Josh Triplett, Uladzislau Rezki, Steven Rostedt,
Mathieu Desnoyers, Lai Jiangshan, Zqiang
Le Mon, Jan 19, 2026 at 06:12:22PM -0500, Joel Fernandes a écrit :
> During callback overload (exceeding qhimark), the NOCB code attempts
> opportunistic advancement via rcu_advance_cbs_nowake(). Analysis shows
> this entire code path is dead:
>
> - 30 overload conditions triggered with 300,000 callback flood
> - 0 advancements actually occurred
> - 100% of time blocked because current GP not done
>
> The overload condition triggers when callbacks are coming in at a high
> rate with GPs not completing as fast. But the advancement requires the
> GP to be complete - a logical contradiction. Even if the GP did complete
> in time, nocb_gp_wait() has to wake up anyway to do the advancement, so
> it is pointless.
>
> Since the advancement is dead code, the entire overload handling block
> serves no purpose. Remove it entirely.
>
> Suggested-by: Frederic Weisbecker <frederic@kernel.org>
> Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Would be nice to have Paul's ack as well, in case we missed something subtle
here.
Also probably for upcoming merge window + 1, note that similar code with
similar removal opportunity resides in rcu_nocb_try_bypass().
And ->nocb_gp_adv_time could then be removed.
Thanks.
--
Frederic Weisbecker
SUSE Labs
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH -next v3 2/3] rcu/nocb: Remove dead callback overload handling
2026-01-19 23:53 ` Frederic Weisbecker
@ 2026-01-20 0:07 ` Paul E. McKenney
2026-01-20 0:59 ` joelagnelf
0 siblings, 1 reply; 18+ messages in thread
From: Paul E. McKenney @ 2026-01-20 0:07 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: Joel Fernandes, linux-kernel, Boqun Feng, rcu, Neeraj Upadhyay,
Josh Triplett, Uladzislau Rezki, Steven Rostedt,
Mathieu Desnoyers, Lai Jiangshan, Zqiang
On Tue, Jan 20, 2026 at 12:53:26AM +0100, Frederic Weisbecker wrote:
> Le Mon, Jan 19, 2026 at 06:12:22PM -0500, Joel Fernandes a écrit :
> > During callback overload (exceeding qhimark), the NOCB code attempts
> > opportunistic advancement via rcu_advance_cbs_nowake(). Analysis shows
> > this entire code path is dead:
> >
> > - 30 overload conditions triggered with 300,000 callback flood
> > - 0 advancements actually occurred
> > - 100% of time blocked because current GP not done
> >
> > The overload condition triggers when callbacks are coming in at a high
> > rate with GPs not completing as fast. But the advancement requires the
> > GP to be complete - a logical contradiction. Even if the GP did complete
> > in time, nocb_gp_wait() has to wake up anyway to do the advancement, so
> > it is pointless.
> >
> > Since the advancement is dead code, the entire overload handling block
> > serves no purpose. Remove it entirely.
> >
> > Suggested-by: Frederic Weisbecker <frederic@kernel.org>
> > Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
>
> Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
>
> Would be nice to have Paul's ack as well, in case we missed something subtle
> here.
Given that you are good with it, I will take a look. And test it. ;-)
> Also probably for upcoming merge window + 1, note that similar code with
> similar removal opportunity resides in rcu_nocb_try_bypass().
> And ->nocb_gp_adv_time could then be removed.
Further simplification sounds like a good thing! Just not too simple,
you understand! ;-)
Thanx, Paul
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH -next v3 2/3] rcu/nocb: Remove dead callback overload handling
2026-01-20 0:07 ` Paul E. McKenney
@ 2026-01-20 0:59 ` joelagnelf
0 siblings, 0 replies; 18+ messages in thread
From: joelagnelf @ 2026-01-20 0:59 UTC (permalink / raw)
To: paulmck
Cc: Frederic Weisbecker, linux-kernel, Boqun Feng, rcu,
Neeraj Upadhyay, Josh Triplett, Uladzislau Rezki, Steven Rostedt,
Mathieu Desnoyers, Lai Jiangshan, Zqiang
> On Jan 19, 2026, at 7:07 PM, Paul E. McKenney <paulmck@kernel.org> wrote:
>
> On Tue, Jan 20, 2026 at 12:53:26AM +0100, Frederic Weisbecker wrote:
>> Le Mon, Jan 19, 2026 at 06:12:22PM -0500, Joel Fernandes a écrit :
>>> During callback overload (exceeding qhimark), the NOCB code attempts
>>> opportunistic advancement via rcu_advance_cbs_nowake(). Analysis shows
>>> this entire code path is dead:
>>>
>>> - 30 overload conditions triggered with 300,000 callback flood
>>> - 0 advancements actually occurred
>>> - 100% of time blocked because current GP not done
>>>
>>> The overload condition triggers when callbacks are coming in at a high
>>> rate with GPs not completing as fast. But the advancement requires the
>>> GP to be complete - a logical contradiction. Even if the GP did complete
>>> in time, nocb_gp_wait() has to wake up anyway to do the advancement, so
>>> it is pointless.
>>>
>>> Since the advancement is dead code, the entire overload handling block
>>> serves no purpose. Remove it entirely.
>>>
>>> Suggested-by: Frederic Weisbecker <frederic@kernel.org>
>>> Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
>>
>> Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
>>
>> Would be nice to have Paul's ack as well, in case we missed something subtle
>> here.
>
> Given that you are good with it, I will take a look. And test it. ;-)
Sure, thanks!
>> Also probably for upcoming merge window + 1, note that similar code with
>> similar removal opportunity resides in rcu_nocb_try_bypass().
>> And ->nocb_gp_adv_time could then be removed.
>
> Further simplification sounds like a good thing! Just not too simple,
> you understand! ;-)
Yes I have some more queued in my local tree that I plan for merge window + 1. :-)
By the way, I have another recent idea: why don't we trigger nocb poll mode
automatically under overload condition? Currently rcu_nocb_poll is only set via
the boot parameter and stays constant. Testing shows me that poll mode can cause
GP completion faster during overload, so dynamically enabling it when we exceed
qhimark could be beneficial. The question then is how do we turn it off
dynamically as well - perhaps when callback count drops below qlowmark, and
using some debounce logic to avoid too frequent toggling?
> Thanx, Paul
thanks,
- Joel
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH -next v3 0/3] rcu/nocb: Cleanup patches for next merge window
2026-01-19 23:12 [PATCH -next v3 0/3] rcu/nocb: Cleanup patches for next merge window Joel Fernandes
` (2 preceding siblings ...)
2026-01-19 23:12 ` [PATCH -next v3 3/3] rcu/nocb: Extract nocb_defer_wakeup_cancel() helper Joel Fernandes
@ 2026-01-21 19:09 ` Paul E. McKenney
2026-01-21 19:13 ` Joel Fernandes
3 siblings, 1 reply; 18+ messages in thread
From: Paul E. McKenney @ 2026-01-21 19:09 UTC (permalink / raw)
To: Joel Fernandes
Cc: linux-kernel, Boqun Feng, rcu, Frederic Weisbecker,
Neeraj Upadhyay, Josh Triplett, Uladzislau Rezki, Steven Rostedt,
Mathieu Desnoyers, Lai Jiangshan, Zqiang
On Mon, Jan 19, 2026 at 06:12:20PM -0500, Joel Fernandes wrote:
> These are a few nocb related cleanup patches for the next merge window.
>
> Also Frederic please object to the second patch ("Remove dead callback overload
> handling") if you would like, but I think based on our discussion I have
> implemented your suggestion, so it should be good.
>
> Changes from v2:
> - Dropped patch 2 "Add warning if no rcuog wake up attempt happened during
> overload" as it was not needed since we removed the dead CB overload handling.
> - Replaced "Add warning to detect if overload advancement is ever useful" with
> "Remove dead callback overload handling" per Frederic's feedback - instead of
> adding a warning, just remove the dead code path entirely.
>
> nocb rcutorture scenarios passed overnight testing on my system.
I reverted three of your earlier patches in order to apply this, only
one of which I am really confident in. Please check my -rcu tree to see
if any of the three should be added back in, and I am starting tests in
the meantime.
Thanx, Paul
> Link to v2: https://lore.kernel.org/all/20260114173154.1701056-1-joelagnelf@nvidia.com/
> Link to v1: https://lore.kernel.org/all/20260101163417.1065705-1-joelagnelf@nvidia.com/
>
> Joel Fernandes (3):
> rcu/nocb: Remove unnecessary WakeOvfIsDeferred wake path
> rcu/nocb: Remove dead callback overload handling
> rcu/nocb: Extract nocb_defer_wakeup_cancel() helper
>
> kernel/rcu/tree.c | 2 +-
> kernel/rcu/tree.h | 3 +-
> kernel/rcu/tree_nocb.h | 80 ++++++++++++++----------------------------
> 3 files changed, 29 insertions(+), 56 deletions(-)
>
> --
> 2.34.1
>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH -next v3 0/3] rcu/nocb: Cleanup patches for next merge window
2026-01-21 19:09 ` [PATCH -next v3 0/3] rcu/nocb: Cleanup patches for next merge window Paul E. McKenney
@ 2026-01-21 19:13 ` Joel Fernandes
2026-01-21 19:41 ` Paul E. McKenney
0 siblings, 1 reply; 18+ messages in thread
From: Joel Fernandes @ 2026-01-21 19:13 UTC (permalink / raw)
To: paulmck
Cc: linux-kernel, Boqun Feng, rcu, Frederic Weisbecker,
Neeraj Upadhyay, Josh Triplett, Uladzislau Rezki, Steven Rostedt,
Mathieu Desnoyers, Lai Jiangshan, Zqiang
On 1/21/2026 2:09 PM, Paul E. McKenney wrote:
> On Mon, Jan 19, 2026 at 06:12:20PM -0500, Joel Fernandes wrote:
>> These are a few nocb related cleanup patches for the next merge window.
>>
>> Also Frederic please object to the second patch ("Remove dead callback overload
>> handling") if you would like, but I think based on our discussion I have
>> implemented your suggestion, so it should be good.
>>
>> Changes from v2:
>> - Dropped patch 2 "Add warning if no rcuog wake up attempt happened during
>> overload" as it was not needed since we removed the dead CB overload handling.
>> - Replaced "Add warning to detect if overload advancement is ever useful" with
>> "Remove dead callback overload handling" per Frederic's feedback - instead of
>> adding a warning, just remove the dead code path entirely.
>>
>> nocb rcutorture scenarios passed overnight testing on my system.
>
> I reverted three of your earlier patches in order to apply this, only
> one of which I am really confident in. Please check my -rcu tree to see
> if any of the three should be added back in, and I am starting tests in
> the meantime.Age Commit message (Expand) Author Files Lines
Per your latest /dev branch
(https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git/log/?h=dev),
you have the latest 3 patches:
Extract nocb_defer_wakeup_cancel() helperdev Joel Fernandes 1 -8/+11
rcu/nocb: Remove dead callback overload handling Joel Fernandes 1 -12/+0
rcu/nocb: Remove unnecessary WakeOvfIsDeferred wake path Joel Fernandes 3 -36/+18
thanks,
--
Joel Fernandes
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH -next v3 0/3] rcu/nocb: Cleanup patches for next merge window
2026-01-21 19:13 ` Joel Fernandes
@ 2026-01-21 19:41 ` Paul E. McKenney
2026-01-21 19:50 ` Joel Fernandes
0 siblings, 1 reply; 18+ messages in thread
From: Paul E. McKenney @ 2026-01-21 19:41 UTC (permalink / raw)
To: Joel Fernandes
Cc: linux-kernel, Boqun Feng, rcu, Frederic Weisbecker,
Neeraj Upadhyay, Josh Triplett, Uladzislau Rezki, Steven Rostedt,
Mathieu Desnoyers, Lai Jiangshan, Zqiang
On Wed, Jan 21, 2026 at 02:13:56PM -0500, Joel Fernandes wrote:
>
>
> On 1/21/2026 2:09 PM, Paul E. McKenney wrote:
> > On Mon, Jan 19, 2026 at 06:12:20PM -0500, Joel Fernandes wrote:
> >> These are a few nocb related cleanup patches for the next merge window.
> >>
> >> Also Frederic please object to the second patch ("Remove dead callback overload
> >> handling") if you would like, but I think based on our discussion I have
> >> implemented your suggestion, so it should be good.
> >>
> >> Changes from v2:
> >> - Dropped patch 2 "Add warning if no rcuog wake up attempt happened during
> >> overload" as it was not needed since we removed the dead CB overload handling.
> >> - Replaced "Add warning to detect if overload advancement is ever useful" with
> >> "Remove dead callback overload handling" per Frederic's feedback - instead of
> >> adding a warning, just remove the dead code path entirely.
> >>
> >> nocb rcutorture scenarios passed overnight testing on my system.
> >
> > I reverted three of your earlier patches in order to apply this, only
> > one of which I am really confident in. Please check my -rcu tree to see
> > if any of the three should be added back in, and I am starting tests in
> > the meantime.Age Commit message (Expand) Author Files Lines
>
> Per your latest /dev branch
> (https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git/log/?h=dev),
> you have the latest 3 patches:
>
> Extract nocb_defer_wakeup_cancel() helperdev Joel Fernandes 1 -8/+11
> rcu/nocb: Remove dead callback overload handling Joel Fernandes 1 -12/+0
> rcu/nocb: Remove unnecessary WakeOvfIsDeferred wake path Joel Fernandes 3 -36/+18
Thank you for checking!
And just to double-check, all of the patches that I reverted are obsolete,
correct?
Thanx, Paul
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH -next v3 0/3] rcu/nocb: Cleanup patches for next merge window
2026-01-21 19:41 ` Paul E. McKenney
@ 2026-01-21 19:50 ` Joel Fernandes
2026-01-21 19:58 ` Paul E. McKenney
0 siblings, 1 reply; 18+ messages in thread
From: Joel Fernandes @ 2026-01-21 19:50 UTC (permalink / raw)
To: paulmck
Cc: linux-kernel, Boqun Feng, rcu, Frederic Weisbecker,
Neeraj Upadhyay, Josh Triplett, Uladzislau Rezki, Steven Rostedt,
Mathieu Desnoyers, Lai Jiangshan, Zqiang
On 1/21/2026 2:41 PM, Paul E. McKenney wrote:
> On Wed, Jan 21, 2026 at 02:13:56PM -0500, Joel Fernandes wrote:
>>
>>
>> On 1/21/2026 2:09 PM, Paul E. McKenney wrote:
>>> On Mon, Jan 19, 2026 at 06:12:20PM -0500, Joel Fernandes wrote:
>>>> These are a few nocb related cleanup patches for the next merge window.
>>>>
>>>> Also Frederic please object to the second patch ("Remove dead callback overload
>>>> handling") if you would like, but I think based on our discussion I have
>>>> implemented your suggestion, so it should be good.
>>>>
>>>> Changes from v2:
>>>> - Dropped patch 2 "Add warning if no rcuog wake up attempt happened during
>>>> overload" as it was not needed since we removed the dead CB overload handling.
>>>> - Replaced "Add warning to detect if overload advancement is ever useful" with
>>>> "Remove dead callback overload handling" per Frederic's feedback - instead of
>>>> adding a warning, just remove the dead code path entirely.
>>>>
>>>> nocb rcutorture scenarios passed overnight testing on my system.
>>>
>>> I reverted three of your earlier patches in order to apply this, only
>>> one of which I am really confident in. Please check my -rcu tree to see
>>> if any of the three should be added back in, and I am starting tests in
>>> the meantime.Age Commit message (Expand) Author Files Lines
>>
>> Per your latest /dev branch
>> (https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git/log/?h=dev),
>> you have the latest 3 patches:
>>
>> Extract nocb_defer_wakeup_cancel() helperdev Joel Fernandes 1 -8/+11
>> rcu/nocb: Remove dead callback overload handling Joel Fernandes 1 -12/+0
>> rcu/nocb: Remove unnecessary WakeOvfIsDeferred wake path Joel Fernandes 3 -36/+18
>
> Thank you for checking!
>
> And just to double-check, all of the patches that I reverted are obsolete,
> correct?
Yes, those patches were from v2. You have correctly applied the latest v3 version.
thanks,
--
Joel Fernandes
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH -next v3 0/3] rcu/nocb: Cleanup patches for next merge window
2026-01-21 19:50 ` Joel Fernandes
@ 2026-01-21 19:58 ` Paul E. McKenney
0 siblings, 0 replies; 18+ messages in thread
From: Paul E. McKenney @ 2026-01-21 19:58 UTC (permalink / raw)
To: Joel Fernandes
Cc: linux-kernel, Boqun Feng, rcu, Frederic Weisbecker,
Neeraj Upadhyay, Josh Triplett, Uladzislau Rezki, Steven Rostedt,
Mathieu Desnoyers, Lai Jiangshan, Zqiang
On Wed, Jan 21, 2026 at 02:50:54PM -0500, Joel Fernandes wrote:
>
>
> On 1/21/2026 2:41 PM, Paul E. McKenney wrote:
> > On Wed, Jan 21, 2026 at 02:13:56PM -0500, Joel Fernandes wrote:
> >>
> >>
> >> On 1/21/2026 2:09 PM, Paul E. McKenney wrote:
> >>> On Mon, Jan 19, 2026 at 06:12:20PM -0500, Joel Fernandes wrote:
> >>>> These are a few nocb related cleanup patches for the next merge window.
> >>>>
> >>>> Also Frederic please object to the second patch ("Remove dead callback overload
> >>>> handling") if you would like, but I think based on our discussion I have
> >>>> implemented your suggestion, so it should be good.
> >>>>
> >>>> Changes from v2:
> >>>> - Dropped patch 2 "Add warning if no rcuog wake up attempt happened during
> >>>> overload" as it was not needed since we removed the dead CB overload handling.
> >>>> - Replaced "Add warning to detect if overload advancement is ever useful" with
> >>>> "Remove dead callback overload handling" per Frederic's feedback - instead of
> >>>> adding a warning, just remove the dead code path entirely.
> >>>>
> >>>> nocb rcutorture scenarios passed overnight testing on my system.
> >>>
> >>> I reverted three of your earlier patches in order to apply this, only
> >>> one of which I am really confident in. Please check my -rcu tree to see
> >>> if any of the three should be added back in, and I am starting tests in
> >>> the meantime.Age Commit message (Expand) Author Files Lines
> >>
> >> Per your latest /dev branch
> >> (https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git/log/?h=dev),
> >> you have the latest 3 patches:
> >>
> >> Extract nocb_defer_wakeup_cancel() helperdev Joel Fernandes 1 -8/+11
> >> rcu/nocb: Remove dead callback overload handling Joel Fernandes 1 -12/+0
> >> rcu/nocb: Remove unnecessary WakeOvfIsDeferred wake path Joel Fernandes 3 -36/+18
> >
> > Thank you for checking!
> >
> > And just to double-check, all of the patches that I reverted are obsolete,
> > correct?
>
> Yes, those patches were from v2. You have correctly applied the latest v3 version.
Woo-hoo!!! ;-)
Thanx, Paul
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH -next v3 2/3] rcu/nocb: Remove dead callback overload handling
2026-01-19 23:12 ` [PATCH -next v3 2/3] rcu/nocb: Remove dead callback overload handling Joel Fernandes
2026-01-19 23:53 ` Frederic Weisbecker
@ 2026-01-22 21:55 ` Paul E. McKenney
2026-01-22 23:43 ` Joel Fernandes
2026-01-23 5:41 ` Paul E. McKenney
2 siblings, 1 reply; 18+ messages in thread
From: Paul E. McKenney @ 2026-01-22 21:55 UTC (permalink / raw)
To: Joel Fernandes
Cc: linux-kernel, Boqun Feng, rcu, Frederic Weisbecker,
Neeraj Upadhyay, Josh Triplett, Uladzislau Rezki, Steven Rostedt,
Mathieu Desnoyers, Lai Jiangshan, Zqiang
On Mon, Jan 19, 2026 at 06:12:22PM -0500, Joel Fernandes wrote:
> During callback overload (exceeding qhimark), the NOCB code attempts
> opportunistic advancement via rcu_advance_cbs_nowake(). Analysis shows
> this entire code path is dead:
>
> - 30 overload conditions triggered with 300,000 callback flood
> - 0 advancements actually occurred
> - 100% of time blocked because current GP not done
>
> The overload condition triggers when callbacks are coming in at a high
> rate with GPs not completing as fast. But the advancement requires the
> GP to be complete - a logical contradiction. Even if the GP did complete
> in time, nocb_gp_wait() has to wake up anyway to do the advancement, so
> it is pointless.
>
> Since the advancement is dead code, the entire overload handling block
> serves no purpose. Remove it entirely.
>
> Suggested-by: Frederic Weisbecker <frederic@kernel.org>
> Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
> ---
> kernel/rcu/tree_nocb.h | 12 ------------
> 1 file changed, 12 deletions(-)
>
> diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
> index f525e4f7985b..64a8ff350f92 100644
> --- a/kernel/rcu/tree_nocb.h
> +++ b/kernel/rcu/tree_nocb.h
> @@ -526,8 +526,6 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
> __releases(rdp->nocb_lock)
> {
> long bypass_len;
> - unsigned long cur_gp_seq;
> - unsigned long j;
> long lazy_len;
> long len;
> struct task_struct *t;
> @@ -562,16 +560,6 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
> }
>
> return;
> - } else if (len > rdp->qlen_last_fqs_check + qhimark) {
> - /* ... or if many callbacks queued. */
> - rdp->qlen_last_fqs_check = len;
> - j = jiffies;
> - if (j != rdp->nocb_gp_adv_time &&
> - rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&
This places in cur_gp_seq not the grace period for the current callback
(which would be unlikely to have finished), but rather the grace period
for the oldest callback that has not yet been marked as done. And that
callback started some time ago, and thus might well have finished.
So while this code might not have been executed in your tests, it is
definitely not a logical contradiction.
Or am I missing something subtle here?
Thanx, Paul
> - rcu_seq_done(&rdp->mynode->gp_seq, cur_gp_seq)) {
> - rcu_advance_cbs_nowake(rdp->mynode, rdp);
> - rdp->nocb_gp_adv_time = j;
> - }
> }
>
> rcu_nocb_unlock(rdp);
> --
> 2.34.1
>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH -next v3 1/3] rcu/nocb: Remove unnecessary WakeOvfIsDeferred wake path
2026-01-19 23:12 ` [PATCH -next v3 1/3] rcu/nocb: Remove unnecessary WakeOvfIsDeferred wake path Joel Fernandes
@ 2026-01-22 21:56 ` Paul E. McKenney
0 siblings, 0 replies; 18+ messages in thread
From: Paul E. McKenney @ 2026-01-22 21:56 UTC (permalink / raw)
To: Joel Fernandes
Cc: linux-kernel, Boqun Feng, rcu, Frederic Weisbecker,
Neeraj Upadhyay, Josh Triplett, Uladzislau Rezki, Steven Rostedt,
Mathieu Desnoyers, Lai Jiangshan, Zqiang
On Mon, Jan 19, 2026 at 06:12:21PM -0500, Joel Fernandes wrote:
> The WakeOvfIsDeferred code path in __call_rcu_nocb_wake() attempts to
> wake rcuog when the callback count exceeds qhimark and callbacks aren't
> done with their GP (newly queued or awaiting GP). However, a lot of
> testing proves this wake is always redundant or useless.
>
> In the flooding case, rcuog is always waiting for a GP to finish. So
> waking up the rcuog thread is pointless. The timer wakeup adds overhead,
> rcuog simply wakes up and goes back to sleep achieving nothing.
>
> This path also adds a full memory barrier, and additional timer expiry
> modifications unnecessarily.
>
> The root cause is that WakeOvfIsDeferred fires when
> !rcu_segcblist_ready_cbs() (GP not complete), but waking rcuog cannot
> accelerate GP completion.
>
> This commit therefore removes this path.
>
> Tested with rcutorture scenarios: TREE01, TREE05, TREE08 (all NOCB
> configurations) - all pass. Also stress tested using a kernel module
> that floods call_rcu() to trigger the overload conditions and made the
> observations confirming the findings.
>
> Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
> Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
Huh. I recall a timed wait that this was to help with, but I do not
see it there anymore. So...
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
> ---
> kernel/rcu/tree.c | 2 +-
> kernel/rcu/tree.h | 3 +--
> kernel/rcu/tree_nocb.h | 49 ++++++++++++++----------------------------
> 3 files changed, 18 insertions(+), 36 deletions(-)
>
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 293bbd9ac3f4..2921ffb19939 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -3769,7 +3769,7 @@ static void rcu_barrier_entrain(struct rcu_data *rdp)
> }
> rcu_nocb_unlock(rdp);
> if (wake_nocb)
> - wake_nocb_gp(rdp, false);
> + wake_nocb_gp(rdp);
> smp_store_release(&rdp->barrier_seq_snap, gseq);
> }
>
> diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
> index 2265b9c2906e..7dfc57e9adb1 100644
> --- a/kernel/rcu/tree.h
> +++ b/kernel/rcu/tree.h
> @@ -301,7 +301,6 @@ struct rcu_data {
> #define RCU_NOCB_WAKE_BYPASS 1
> #define RCU_NOCB_WAKE_LAZY 2
> #define RCU_NOCB_WAKE 3
> -#define RCU_NOCB_WAKE_FORCE 4
>
> #define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500))
> /* For jiffies_till_first_fqs and */
> @@ -500,7 +499,7 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp);
> static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp);
> static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);
> static void rcu_init_one_nocb(struct rcu_node *rnp);
> -static bool wake_nocb_gp(struct rcu_data *rdp, bool force);
> +static bool wake_nocb_gp(struct rcu_data *rdp);
> static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
> unsigned long j, bool lazy);
> static void call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *head,
> diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
> index e6cd56603cad..f525e4f7985b 100644
> --- a/kernel/rcu/tree_nocb.h
> +++ b/kernel/rcu/tree_nocb.h
> @@ -192,7 +192,7 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
>
> static bool __wake_nocb_gp(struct rcu_data *rdp_gp,
> struct rcu_data *rdp,
> - bool force, unsigned long flags)
> + unsigned long flags)
> __releases(rdp_gp->nocb_gp_lock)
> {
> bool needwake = false;
> @@ -209,7 +209,7 @@ static bool __wake_nocb_gp(struct rcu_data *rdp_gp,
> timer_delete(&rdp_gp->nocb_timer);
> }
>
> - if (force || READ_ONCE(rdp_gp->nocb_gp_sleep)) {
> + if (READ_ONCE(rdp_gp->nocb_gp_sleep)) {
> WRITE_ONCE(rdp_gp->nocb_gp_sleep, false);
> needwake = true;
> }
> @@ -225,13 +225,13 @@ static bool __wake_nocb_gp(struct rcu_data *rdp_gp,
> /*
> * Kick the GP kthread for this NOCB group.
> */
> -static bool wake_nocb_gp(struct rcu_data *rdp, bool force)
> +static bool wake_nocb_gp(struct rcu_data *rdp)
> {
> unsigned long flags;
> struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
>
> raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
> - return __wake_nocb_gp(rdp_gp, rdp, force, flags);
> + return __wake_nocb_gp(rdp_gp, rdp, flags);
> }
>
> #ifdef CONFIG_RCU_LAZY
> @@ -518,10 +518,8 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
> }
>
> /*
> - * Awaken the no-CBs grace-period kthread if needed, either due to it
> - * legitimately being asleep or due to overload conditions.
> - *
> - * If warranted, also wake up the kthread servicing this CPUs queues.
> + * Awaken the no-CBs grace-period kthread if needed due to it legitimately
> + * being asleep.
> */
> static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
> unsigned long flags)
> @@ -533,7 +531,6 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
> long lazy_len;
> long len;
> struct task_struct *t;
> - struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
>
> // If we are being polled or there is no kthread, just leave.
> t = READ_ONCE(rdp->nocb_gp_kthread);
> @@ -549,22 +546,22 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
> lazy_len = READ_ONCE(rdp->lazy_len);
> if (was_alldone) {
> rdp->qlen_last_fqs_check = len;
> + rcu_nocb_unlock(rdp);
> // Only lazy CBs in bypass list
> if (lazy_len && bypass_len == lazy_len) {
> - rcu_nocb_unlock(rdp);
> wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY,
> TPS("WakeLazy"));
> } else if (!irqs_disabled_flags(flags)) {
> /* ... if queue was empty ... */
> - rcu_nocb_unlock(rdp);
> - wake_nocb_gp(rdp, false);
> + wake_nocb_gp(rdp);
> trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
> TPS("WakeEmpty"));
> } else {
> - rcu_nocb_unlock(rdp);
> wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE,
> TPS("WakeEmptyIsDeferred"));
> }
> +
> + return;
> } else if (len > rdp->qlen_last_fqs_check + qhimark) {
> /* ... or if many callbacks queued. */
> rdp->qlen_last_fqs_check = len;
> @@ -575,21 +572,10 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
> rcu_advance_cbs_nowake(rdp->mynode, rdp);
> rdp->nocb_gp_adv_time = j;
> }
> - smp_mb(); /* Enqueue before timer_pending(). */
> - if ((rdp->nocb_cb_sleep ||
> - !rcu_segcblist_ready_cbs(&rdp->cblist)) &&
> - !timer_pending(&rdp_gp->nocb_timer)) {
> - rcu_nocb_unlock(rdp);
> - wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_FORCE,
> - TPS("WakeOvfIsDeferred"));
> - } else {
> - rcu_nocb_unlock(rdp);
> - trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));
> - }
> - } else {
> - rcu_nocb_unlock(rdp);
> - trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));
> }
> +
> + rcu_nocb_unlock(rdp);
> + trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));
> }
>
> static void call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *head,
> @@ -966,7 +952,6 @@ static bool do_nocb_deferred_wakeup_common(struct rcu_data *rdp_gp,
> unsigned long flags)
> __releases(rdp_gp->nocb_gp_lock)
> {
> - int ndw;
> int ret;
>
> if (!rcu_nocb_need_deferred_wakeup(rdp_gp, level)) {
> @@ -974,8 +959,7 @@ static bool do_nocb_deferred_wakeup_common(struct rcu_data *rdp_gp,
> return false;
> }
>
> - ndw = rdp_gp->nocb_defer_wakeup;
> - ret = __wake_nocb_gp(rdp_gp, rdp, ndw == RCU_NOCB_WAKE_FORCE, flags);
> + ret = __wake_nocb_gp(rdp_gp, rdp, flags);
> trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DeferredWake"));
>
> return ret;
> @@ -991,7 +975,6 @@ static void do_nocb_deferred_wakeup_timer(struct timer_list *t)
> trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Timer"));
>
> raw_spin_lock_irqsave(&rdp->nocb_gp_lock, flags);
> - smp_mb__after_spinlock(); /* Timer expire before wakeup. */
> do_nocb_deferred_wakeup_common(rdp, rdp, RCU_NOCB_WAKE_BYPASS, flags);
> }
>
> @@ -1272,7 +1255,7 @@ lazy_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
> }
> rcu_nocb_try_flush_bypass(rdp, jiffies);
> rcu_nocb_unlock_irqrestore(rdp, flags);
> - wake_nocb_gp(rdp, false);
> + wake_nocb_gp(rdp);
> sc->nr_to_scan -= _count;
> count += _count;
> if (sc->nr_to_scan <= 0)
> @@ -1657,7 +1640,7 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
> {
> }
>
> -static bool wake_nocb_gp(struct rcu_data *rdp, bool force)
> +static bool wake_nocb_gp(struct rcu_data *rdp)
> {
> return false;
> }
> --
> 2.34.1
>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH -next v3 3/3] rcu/nocb: Extract nocb_defer_wakeup_cancel() helper
2026-01-19 23:12 ` [PATCH -next v3 3/3] rcu/nocb: Extract nocb_defer_wakeup_cancel() helper Joel Fernandes
@ 2026-01-22 21:59 ` Paul E. McKenney
0 siblings, 0 replies; 18+ messages in thread
From: Paul E. McKenney @ 2026-01-22 21:59 UTC (permalink / raw)
To: Joel Fernandes
Cc: linux-kernel, Boqun Feng, rcu, Frederic Weisbecker,
Neeraj Upadhyay, Josh Triplett, Uladzislau Rezki, Steven Rostedt,
Mathieu Desnoyers, Lai Jiangshan, Zqiang
On Mon, Jan 19, 2026 at 06:12:23PM -0500, Joel Fernandes wrote:
> The pattern of checking nocb_defer_wakeup and deleting the timer is
> duplicated in __wake_nocb_gp() and nocb_gp_wait(). Extract this into a
> common helper function nocb_defer_wakeup_cancel().
>
> This removes code duplication and makes it easier to maintain.
>
> Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
> Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
Good eyes!
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
> ---
> kernel/rcu/tree_nocb.h | 19 +++++++++++--------
> 1 file changed, 11 insertions(+), 8 deletions(-)
>
> diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
> index 64a8ff350f92..b3337c7231cc 100644
> --- a/kernel/rcu/tree_nocb.h
> +++ b/kernel/rcu/tree_nocb.h
> @@ -190,6 +190,15 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
> init_swait_queue_head(&rnp->nocb_gp_wq[1]);
> }
>
> +/* Clear any pending deferred wakeup timer (nocb_gp_lock must be held). */
> +static void nocb_defer_wakeup_cancel(struct rcu_data *rdp_gp)
> +{
> + if (rdp_gp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) {
> + WRITE_ONCE(rdp_gp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
> + timer_delete(&rdp_gp->nocb_timer);
> + }
> +}
> +
> static bool __wake_nocb_gp(struct rcu_data *rdp_gp,
> struct rcu_data *rdp,
> unsigned long flags)
> @@ -204,10 +213,7 @@ static bool __wake_nocb_gp(struct rcu_data *rdp_gp,
> return false;
> }
>
> - if (rdp_gp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) {
> - WRITE_ONCE(rdp_gp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
> - timer_delete(&rdp_gp->nocb_timer);
> - }
> + nocb_defer_wakeup_cancel(rdp_gp);
>
> if (READ_ONCE(rdp_gp->nocb_gp_sleep)) {
> WRITE_ONCE(rdp_gp->nocb_gp_sleep, false);
> @@ -788,10 +794,7 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
> if (rdp_toggling)
> my_rdp->nocb_toggling_rdp = NULL;
>
> - if (my_rdp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) {
> - WRITE_ONCE(my_rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
> - timer_delete(&my_rdp->nocb_timer);
> - }
> + nocb_defer_wakeup_cancel(my_rdp);
> WRITE_ONCE(my_rdp->nocb_gp_sleep, true);
> raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags);
> } else {
> --
> 2.34.1
>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH -next v3 2/3] rcu/nocb: Remove dead callback overload handling
2026-01-22 21:55 ` Paul E. McKenney
@ 2026-01-22 23:43 ` Joel Fernandes
2026-01-23 0:12 ` Paul E. McKenney
0 siblings, 1 reply; 18+ messages in thread
From: Joel Fernandes @ 2026-01-22 23:43 UTC (permalink / raw)
To: Paul E. McKenney
Cc: linux-kernel, Boqun Feng, rcu, Frederic Weisbecker,
Neeraj Upadhyay, Josh Triplett, Uladzislau Rezki, Steven Rostedt,
Mathieu Desnoyers, Lai Jiangshan, Zqiang
On Thu, Jan 22, 2026 at 01:55:11PM -0800, Paul E. McKenney wrote:
> On Mon, Jan 19, 2026 at 06:12:22PM -0500, Joel Fernandes wrote:
> > - } else if (len > rdp->qlen_last_fqs_check + qhimark) {
> > - /* ... or if many callbacks queued. */
> > - rdp->qlen_last_fqs_check = len;
> > - j = jiffies;
> > - if (j != rdp->nocb_gp_adv_time &&
> > - rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&
>
> This places in cur_gp_seq not the grace period for the current callback
> (which would be unlikely to have finished), but rather the grace period
> for the oldest callback that has not yet been marked as done. And that
> callback started some time ago, and thus might well have finished.
>
> So while this code might not have been executed in your tests, it is
> definitely not a logical contradiction.
>
> Or am I missing something subtle here?
You're right that it's not a logical contradiction - I was imprecise.
rcu_segcblist_nextgp() returns the GP for the oldest pending callback,
which could indeed have completed.
However, the question becomes: under what scenario do we need to advance
here? If that GP completed, rcuog should have already advanced those
callbacks. The only way this code path can execute is if rcuog is starved
and not running to advance them, right?
But as Frederic pointed out, even if rcuog is starved, advancing here
doesn't help - rcuog must still run anyway to wake the callback thread.
We're just duplicating work it will do when it finally gets to run.
The extensive testing (300K callback floods, hours of rcutorture) showing
zero hits confirms this window is practically unreachable. I can update the
commit message to remove the "logical contradiction" claim and focus on the
redundancy argument instead.
Would that address your concern?
--
Joel Fernandes
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH -next v3 2/3] rcu/nocb: Remove dead callback overload handling
2026-01-22 23:43 ` Joel Fernandes
@ 2026-01-23 0:12 ` Paul E. McKenney
0 siblings, 0 replies; 18+ messages in thread
From: Paul E. McKenney @ 2026-01-23 0:12 UTC (permalink / raw)
To: Joel Fernandes
Cc: linux-kernel, Boqun Feng, rcu, Frederic Weisbecker,
Neeraj Upadhyay, Josh Triplett, Uladzislau Rezki, Steven Rostedt,
Mathieu Desnoyers, Lai Jiangshan, Zqiang
On Thu, Jan 22, 2026 at 06:43:31PM -0500, Joel Fernandes wrote:
> On Thu, Jan 22, 2026 at 01:55:11PM -0800, Paul E. McKenney wrote:
> > On Mon, Jan 19, 2026 at 06:12:22PM -0500, Joel Fernandes wrote:
> > > - } else if (len > rdp->qlen_last_fqs_check + qhimark) {
> > > - /* ... or if many callbacks queued. */
> > > - rdp->qlen_last_fqs_check = len;
> > > - j = jiffies;
> > > - if (j != rdp->nocb_gp_adv_time &&
> > > - rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&
> >
> > This places in cur_gp_seq not the grace period for the current callback
> > (which would be unlikely to have finished), but rather the grace period
> > for the oldest callback that has not yet been marked as done. And that
> > callback started some time ago, and thus might well have finished.
> >
> > So while this code might not have been executed in your tests, it is
> > definitely not a logical contradiction.
> >
> > Or am I missing something subtle here?
>
> You're right that it's not a logical contradiction - I was imprecise.
> rcu_segcblist_nextgp() returns the GP for the oldest pending callback,
> which could indeed have completed.
>
> However, the question becomes: under what scenario do we need to advance
> here? If that GP completed, rcuog should have already advanced those
> callbacks. The only way this code path can execute is if rcuog is starved
> and not running to advance them, right?
That is one way. The other way is if the RCU grace-period gets delayed
(perhaps by vCPU preemption) between the time that it updates the
leaf rcu_node structure's ->gp_seq field and the time that it invokes
rcu_nocb_gp_cleanup().
> But as Frederic pointed out, even if rcuog is starved, advancing here
> doesn't help - rcuog must still run anyway to wake the callback thread.
> We're just duplicating work it will do when it finally gets to run.
So maybe we don't want that first patch after all? ;-)
> The extensive testing (300K callback floods, hours of rcutorture) showing
> zero hits confirms this window is practically unreachable. I can update the
> commit message to remove the "logical contradiction" claim and focus on the
> redundancy argument instead.
That would definitely be good!
> Would that address your concern?
Your point about the rcuoc kthread needing to be awakened is a good one.
I am still concerned about flooding on busy systems, especially if the
busy component is an underlying hypervisor, but we might need a more
principled approach for that situation.
Thanx, Paul
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH -next v3 2/3] rcu/nocb: Remove dead callback overload handling
2026-01-19 23:12 ` [PATCH -next v3 2/3] rcu/nocb: Remove dead callback overload handling Joel Fernandes
2026-01-19 23:53 ` Frederic Weisbecker
2026-01-22 21:55 ` Paul E. McKenney
@ 2026-01-23 5:41 ` Paul E. McKenney
2 siblings, 0 replies; 18+ messages in thread
From: Paul E. McKenney @ 2026-01-23 5:41 UTC (permalink / raw)
To: Joel Fernandes
Cc: linux-kernel, Boqun Feng, rcu, Frederic Weisbecker,
Neeraj Upadhyay, Josh Triplett, Uladzislau Rezki, Steven Rostedt,
Mathieu Desnoyers, Lai Jiangshan, Zqiang
On Mon, Jan 19, 2026 at 06:12:22PM -0500, Joel Fernandes wrote:
> During callback overload (exceeding qhimark), the NOCB code attempts
> opportunistic advancement via rcu_advance_cbs_nowake(). Analysis shows
> this entire code path is dead:
>
> - 30 overload conditions triggered with 300,000 callback flood
> - 0 advancements actually occurred
> - 100% of time blocked because current GP not done
>
> The overload condition triggers when callbacks are coming in at a high
> rate with GPs not completing as fast. But the advancement requires the
> GP to be complete - a logical contradiction. Even if the GP did complete
> in time, nocb_gp_wait() has to wake up anyway to do the advancement, so
> it is pointless.
>
> Since the advancement is dead code, the entire overload handling block
> serves no purpose. Remove it entirely.
>
> Suggested-by: Frederic Weisbecker <frederic@kernel.org>
> Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
> ---
> kernel/rcu/tree_nocb.h | 12 ------------
> 1 file changed, 12 deletions(-)
>
> diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
> index f525e4f7985b..64a8ff350f92 100644
> --- a/kernel/rcu/tree_nocb.h
> +++ b/kernel/rcu/tree_nocb.h
> @@ -526,8 +526,6 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
> __releases(rdp->nocb_lock)
> {
> long bypass_len;
> - unsigned long cur_gp_seq;
> - unsigned long j;
> long lazy_len;
> long len;
> struct task_struct *t;
> @@ -562,16 +560,6 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
> }
>
> return;
> - } else if (len > rdp->qlen_last_fqs_check + qhimark) {
> - /* ... or if many callbacks queued. */
> - rdp->qlen_last_fqs_check = len;
> - j = jiffies;
> - if (j != rdp->nocb_gp_adv_time &&
> - rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) &&
> - rcu_seq_done(&rdp->mynode->gp_seq, cur_gp_seq)) {
> - rcu_advance_cbs_nowake(rdp->mynode, rdp);
> - rdp->nocb_gp_adv_time = j;
> - }
> }
>
> rcu_nocb_unlock(rdp);
> --
> 2.34.1
>
^ permalink raw reply [flat|nested] 18+ messages in thread
end of thread, other threads:[~2026-01-23 5:41 UTC | newest]
Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-01-19 23:12 [PATCH -next v3 0/3] rcu/nocb: Cleanup patches for next merge window Joel Fernandes
2026-01-19 23:12 ` [PATCH -next v3 1/3] rcu/nocb: Remove unnecessary WakeOvfIsDeferred wake path Joel Fernandes
2026-01-22 21:56 ` Paul E. McKenney
2026-01-19 23:12 ` [PATCH -next v3 2/3] rcu/nocb: Remove dead callback overload handling Joel Fernandes
2026-01-19 23:53 ` Frederic Weisbecker
2026-01-20 0:07 ` Paul E. McKenney
2026-01-20 0:59 ` joelagnelf
2026-01-22 21:55 ` Paul E. McKenney
2026-01-22 23:43 ` Joel Fernandes
2026-01-23 0:12 ` Paul E. McKenney
2026-01-23 5:41 ` Paul E. McKenney
2026-01-19 23:12 ` [PATCH -next v3 3/3] rcu/nocb: Extract nocb_defer_wakeup_cancel() helper Joel Fernandes
2026-01-22 21:59 ` Paul E. McKenney
2026-01-21 19:09 ` [PATCH -next v3 0/3] rcu/nocb: Cleanup patches for next merge window Paul E. McKenney
2026-01-21 19:13 ` Joel Fernandes
2026-01-21 19:41 ` Paul E. McKenney
2026-01-21 19:50 ` Joel Fernandes
2026-01-21 19:58 ` Paul E. McKenney
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox