* [PATCH v2] rcu: Latch normal synchronize_rcu() path on flood
@ 2026-03-02 10:04 Uladzislau Rezki (Sony)
2026-03-03 20:45 ` Joel Fernandes
0 siblings, 1 reply; 9+ messages in thread
From: Uladzislau Rezki (Sony) @ 2026-03-02 10:04 UTC (permalink / raw)
To: Paul E . McKenney, Joel Fernandes
Cc: Vishal Chourasia, Shrikanth Hegde, Neeraj upadhyay, RCU, LKML,
Frederic Weisbecker, Uladzislau Rezki, Samir M
Currently, rcu_normal_wake_from_gp is only enabled by default
on small systems(<= 16 CPUs) or when a user explicitly set it
enabled.
Introduce an adaptive latching mechanism:
* Track the number of in-flight synchronize_rcu() requests
using a new rcu_sr_normal_count counter;
* If the count reaches/exceeds RCU_SR_NORMAL_LATCH_THR(64),
it sets the rcu_sr_normal_latched, reverting new requests
onto the scaled wait_rcu_gp() path;
* The latch is cleared only when the pending requests are fully
drained(nr == 0);
* Enables rcu_normal_wake_from_gp by default for all systems,
relying on this dynamic throttling instead of static CPU
limits.
Testing(synthetic flood workload):
* Kernel version: 6.19.0-rc6
* Number of CPUs: 1536
* 60K concurrent synchronize_rcu() calls
Perf(cycles, system-wide):
total cycles: 932020263832
rcu_sr_normal_add_req(): 2650282811 cycles(~0.28%)
Perf report excerpt:
0.01% 0.01% sync_test/... [k] rcu_sr_normal_add_req
Measured overhead of rcu_sr_normal_add_req() remained ~0.28%
of total CPU cycles in this synthetic stress test.
Tested-by: Samir M <samir@linux.ibm.com>
Suggested-by: Joel Fernandes <joelagnelf@nvidia.com>
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
---
.../admin-guide/kernel-parameters.txt | 10 ++---
kernel/rcu/tree.c | 41 +++++++++++++------
2 files changed, 33 insertions(+), 18 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index cb850e5290c2..d0574a02510d 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5867,13 +5867,13 @@ Kernel parameters
use a call_rcu[_hurry]() path. Please note, this is for a
normal grace period.
- How to enable it:
+ How to disable it:
- echo 1 > /sys/module/rcutree/parameters/rcu_normal_wake_from_gp
- or pass a boot parameter "rcutree.rcu_normal_wake_from_gp=1"
+ echo 0 > /sys/module/rcutree/parameters/rcu_normal_wake_from_gp
+ or pass a boot parameter "rcutree.rcu_normal_wake_from_gp=0"
- Default is 1 if num_possible_cpus() <= 16 and it is not explicitly
- disabled by the boot parameter passing 0.
+ Default is 1 if it is not explicitly disabled by the boot parameter
+ passing 0.
rcuscale.gp_async= [KNL]
Measure performance of asynchronous
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 55df6d37145e..86dc88a70fd0 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1632,17 +1632,21 @@ static void rcu_sr_put_wait_head(struct llist_node *node)
atomic_set_release(&sr_wn->inuse, 0);
}
-/* Enable rcu_normal_wake_from_gp automatically on small systems. */
-#define WAKE_FROM_GP_CPU_THRESHOLD 16
-
-static int rcu_normal_wake_from_gp = -1;
+static int rcu_normal_wake_from_gp = 1;
module_param(rcu_normal_wake_from_gp, int, 0644);
static struct workqueue_struct *sync_wq;
+#define RCU_SR_NORMAL_LATCH_THR 64
+
+/* Number of in-flight synchronize_rcu() calls queued on srs_next. */
+static atomic_long_t rcu_sr_normal_count;
+static atomic_t rcu_sr_normal_latched;
+
static void rcu_sr_normal_complete(struct llist_node *node)
{
struct rcu_synchronize *rs = container_of(
(struct rcu_head *) node, struct rcu_synchronize, head);
+ long nr;
WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) &&
!poll_state_synchronize_rcu_full(&rs->oldstate),
@@ -1650,6 +1654,15 @@ static void rcu_sr_normal_complete(struct llist_node *node)
/* Finally. */
complete(&rs->completion);
+ nr = atomic_long_dec_return(&rcu_sr_normal_count);
+ WARN_ON_ONCE(nr < 0);
+
+ /*
+ * Unlatch: switch back to normal path when fully
+ * drained and if it has been latched.
+ */
+ if (nr == 0)
+ (void)atomic_cmpxchg(&rcu_sr_normal_latched, 1, 0);
}
static void rcu_sr_normal_gp_cleanup_work(struct work_struct *work)
@@ -1795,7 +1808,14 @@ static bool rcu_sr_normal_gp_init(void)
static void rcu_sr_normal_add_req(struct rcu_synchronize *rs)
{
+ long nr;
+
llist_add((struct llist_node *) &rs->head, &rcu_state.srs_next);
+ nr = atomic_long_inc_return(&rcu_sr_normal_count);
+
+ /* Latch: only when flooded and if unlatched. */
+ if (nr >= RCU_SR_NORMAL_LATCH_THR)
+ (void)atomic_cmpxchg(&rcu_sr_normal_latched, 0, 1);
}
/*
@@ -3278,14 +3298,15 @@ static void synchronize_rcu_normal(void)
{
struct rcu_synchronize rs;
+ init_rcu_head_on_stack(&rs.head);
trace_rcu_sr_normal(rcu_state.name, &rs.head, TPS("request"));
- if (READ_ONCE(rcu_normal_wake_from_gp) < 1) {
+ if (READ_ONCE(rcu_normal_wake_from_gp) < 1 ||
+ atomic_read(&rcu_sr_normal_latched)) {
wait_rcu_gp(call_rcu_hurry);
goto trace_complete_out;
}
- init_rcu_head_on_stack(&rs.head);
init_completion(&rs.completion);
/*
@@ -3302,10 +3323,10 @@ static void synchronize_rcu_normal(void)
/* Now we can wait. */
wait_for_completion(&rs.completion);
- destroy_rcu_head_on_stack(&rs.head);
trace_complete_out:
trace_rcu_sr_normal(rcu_state.name, &rs.head, TPS("complete"));
+ destroy_rcu_head_on_stack(&rs.head);
}
/**
@@ -4904,12 +4925,6 @@ void __init rcu_init(void)
sync_wq = alloc_workqueue("sync_wq", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
WARN_ON(!sync_wq);
- /* Respect if explicitly disabled via a boot parameter. */
- if (rcu_normal_wake_from_gp < 0) {
- if (num_possible_cpus() <= WAKE_FROM_GP_CPU_THRESHOLD)
- rcu_normal_wake_from_gp = 1;
- }
-
/* Fill in default value for rcutree.qovld boot parameter. */
/* -After- the rcu_node ->lock fields are initialized! */
if (qovld < 0)
--
2.47.3
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH v2] rcu: Latch normal synchronize_rcu() path on flood
2026-03-02 10:04 [PATCH v2] rcu: Latch normal synchronize_rcu() path on flood Uladzislau Rezki (Sony)
@ 2026-03-03 20:45 ` Joel Fernandes
2026-03-05 10:59 ` Uladzislau Rezki
0 siblings, 1 reply; 9+ messages in thread
From: Joel Fernandes @ 2026-03-03 20:45 UTC (permalink / raw)
To: Uladzislau Rezki
Cc: Paul E.McKenney, Vishal Chourasia, Shrikanth Hegde,
Neeraj upadhyay, RCU, LKML, Frederic Weisbecker, Samir M
On Mon, 02 Mar 2026 11:04:04 +0100, Uladzislau Rezki (Sony) wrote:
> * The latch is cleared only when the pending requests are fully
> drained(nr == 0);
> +static void rcu_sr_normal_add_req(struct rcu_synchronize *rs)
> +{
> + long nr;
> +
> + llist_add((struct llist_node *) &rs->head, &rcu_state.srs_next);
> + nr = atomic_long_inc_return(&rcu_sr_normal_count);
> +
> + /* Latch: only when flooded and if unlatched. */
> + if (nr >= RCU_SR_NORMAL_LATCH_THR)
> + (void)atomic_cmpxchg(&rcu_sr_normal_latched, 0, 1);
> +}
I think there is a stuck-latch race here. Once llist_add() places the
entry in srs_next, the GP kthread can pick it up and fire
rcu_sr_normal_complete() before the latching cmpxchg runs. If the last
in-flight completion drains count to zero in that window, the unlatch
cmpxchg(latched, 1, 0) fails (latched is still 0 at that moment), and
then the latching cmpxchg(latched, 0, 1) fires anyway — with count=0:
CPU 0 (add_req, count just hit 64) GP kthread
---------------------------------- ----------
llist_add() <-- entry now in srs_next
inc_return() --> nr = 64
[preempted]
rcu_sr_normal_complete() x64:
dec_return -> count: 64..1..0
count==0:
cmpxchg(latched, 1, 0)
--> FAILS (latched still 0)
[resumes]
cmpxchg(latched, 0, 1) --> latched = 1
Final state: count=0, latched=1 --> STUCK LATCH
All subsequent synchronize_rcu() callers see latched==1 and take the
fallback path (not counted). With no new SR-normal callers,
rcu_sr_normal_complete() is never reached again, so the unlatch
cmpxchg(latched, 1, 0) never fires. The latch is permanently stuck.
This requires preemption for a full GP duration between llist_add() and
the cmpxchg, which is probably more likely on PREEMPT_RT or heavily loaded
systems.
The fix: move the cmpxchg *before* llist_add(), so the entry is not
visible to the GP kthread until after the latch is already set.
That should fix it, thoughts?
thanks,
--
Joel Fernandes
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2] rcu: Latch normal synchronize_rcu() path on flood
2026-03-03 20:45 ` Joel Fernandes
@ 2026-03-05 10:59 ` Uladzislau Rezki
2026-03-09 20:32 ` Joel Fernandes
2026-03-10 14:11 ` Frederic Weisbecker
0 siblings, 2 replies; 9+ messages in thread
From: Uladzislau Rezki @ 2026-03-05 10:59 UTC (permalink / raw)
To: Joel Fernandes
Cc: Uladzislau Rezki, Paul E.McKenney, Vishal Chourasia,
Shrikanth Hegde, Neeraj upadhyay, RCU, LKML, Frederic Weisbecker,
Samir M
On Tue, Mar 03, 2026 at 03:45:58PM -0500, Joel Fernandes wrote:
> On Mon, 02 Mar 2026 11:04:04 +0100, Uladzislau Rezki (Sony) wrote:
>
> > * The latch is cleared only when the pending requests are fully
> > drained(nr == 0);
>
> > +static void rcu_sr_normal_add_req(struct rcu_synchronize *rs)
> > +{
> > + long nr;
> > +
> > + llist_add((struct llist_node *) &rs->head, &rcu_state.srs_next);
> > + nr = atomic_long_inc_return(&rcu_sr_normal_count);
> > +
> > + /* Latch: only when flooded and if unlatched. */
> > + if (nr >= RCU_SR_NORMAL_LATCH_THR)
> > + (void)atomic_cmpxchg(&rcu_sr_normal_latched, 0, 1);
> > +}
>
> I think there is a stuck-latch race here. Once llist_add() places the
> entry in srs_next, the GP kthread can pick it up and fire
> rcu_sr_normal_complete() before the latching cmpxchg runs. If the last
> in-flight completion drains count to zero in that window, the unlatch
> cmpxchg(latched, 1, 0) fails (latched is still 0 at that moment), and
> then the latching cmpxchg(latched, 0, 1) fires anyway — with count=0:
>
> CPU 0 (add_req, count just hit 64) GP kthread
> ---------------------------------- ----------
> llist_add() <-- entry now in srs_next
> inc_return() --> nr = 64
> [preempted]
> rcu_sr_normal_complete() x64:
> dec_return -> count: 64..1..0
> count==0:
> cmpxchg(latched, 1, 0)
> --> FAILS (latched still 0)
> [resumes]
> cmpxchg(latched, 0, 1) --> latched = 1
>
> Final state: count=0, latched=1 --> STUCK LATCH
>
> All subsequent synchronize_rcu() callers see latched==1 and take the
> fallback path (not counted). With no new SR-normal callers,
> rcu_sr_normal_complete() is never reached again, so the unlatch
> cmpxchg(latched, 1, 0) never fires. The latch is permanently stuck.
>
> This requires preemption for a full GP duration between llist_add() and
> the cmpxchg, which is probably more likely on PREEMPT_RT or heavily loaded
> systems.
>
> The fix: move the cmpxchg *before* llist_add(), so the entry is not
> visible to the GP kthread until after the latch is already set.
>
> That should fix it, thoughts?
>
Yes and thank you!
We can improve it even more by removing atomic_cmpxchg() in
the rcu_sr_normal_add_req() function, because only one context
sees the (nr == RCU_SR_NORMAL_LATCH_THR) condition:
<snip>
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 86dc88a70fd0..72b340940e11 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1640,7 +1640,7 @@ static struct workqueue_struct *sync_wq;
/* Number of in-flight synchronize_rcu() calls queued on srs_next. */
static atomic_long_t rcu_sr_normal_count;
-static atomic_t rcu_sr_normal_latched;
+static int rcu_sr_normal_latched; /* 0/1 */
static void rcu_sr_normal_complete(struct llist_node *node)
{
@@ -1662,7 +1662,7 @@ static void rcu_sr_normal_complete(struct llist_node *node)
* drained and if it has been latched.
*/
if (nr == 0)
- (void)atomic_cmpxchg(&rcu_sr_normal_latched, 1, 0);
+ (void)cmpxchg(&rcu_sr_normal_latched, 1, 0);
}
static void rcu_sr_normal_gp_cleanup_work(struct work_struct *work)
@@ -1808,14 +1808,22 @@ static bool rcu_sr_normal_gp_init(void)
static void rcu_sr_normal_add_req(struct rcu_synchronize *rs)
{
- long nr;
+ /*
+ * Increment before publish to avoid a complete
+ * vs enqueue race on latch.
+ */
+ long nr = atomic_long_inc_return(&rcu_sr_normal_count);
- llist_add((struct llist_node *) &rs->head, &rcu_state.srs_next);
- nr = atomic_long_inc_return(&rcu_sr_normal_count);
+ /*
+ * Latch on threshold crossing. (nr == RCU_SR_NORMAL_LATCH_THR)
+ * can be true only for one context, avoiding contention on the
+ * write path.
+ */
+ if (nr == RCU_SR_NORMAL_LATCH_THR)
+ WRITE_ONCE(rcu_sr_normal_latched, 1);
- /* Latch: only when flooded and if unlatched. */
- if (nr >= RCU_SR_NORMAL_LATCH_THR)
- (void)atomic_cmpxchg(&rcu_sr_normal_latched, 0, 1);
+ /* Publish for the GP kthread/worker. */
+ llist_add((struct llist_node *) &rs->head, &rcu_state.srs_next);
}
/*
@@ -3302,7 +3310,7 @@ static void synchronize_rcu_normal(void)
trace_rcu_sr_normal(rcu_state.name, &rs.head, TPS("request"));
if (READ_ONCE(rcu_normal_wake_from_gp) < 1 ||
- atomic_read(&rcu_sr_normal_latched)) {
+ READ_ONCE(rcu_sr_normal_latched)) {
wait_rcu_gp(call_rcu_hurry);
goto trace_complete_out;
}
<snip>
--
Uladzislau Rezki
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH v2] rcu: Latch normal synchronize_rcu() path on flood
2026-03-05 10:59 ` Uladzislau Rezki
@ 2026-03-09 20:32 ` Joel Fernandes
2026-03-10 12:37 ` Uladzislau Rezki
2026-03-10 14:11 ` Frederic Weisbecker
1 sibling, 1 reply; 9+ messages in thread
From: Joel Fernandes @ 2026-03-09 20:32 UTC (permalink / raw)
To: Uladzislau Rezki
Cc: Paul E.McKenney, Vishal Chourasia, Shrikanth Hegde,
Neeraj upadhyay, RCU, LKML, Frederic Weisbecker, Samir M
On 3/5/2026 5:59 AM, Uladzislau Rezki wrote:
> On Tue, Mar 03, 2026 at 03:45:58PM -0500, Joel Fernandes wrote:
>> On Mon, 02 Mar 2026 11:04:04 +0100, Uladzislau Rezki (Sony) wrote:
>>
>>> * The latch is cleared only when the pending requests are fully
>>> drained(nr == 0);
>>
>>> +static void rcu_sr_normal_add_req(struct rcu_synchronize *rs)
>>> +{
>>> + long nr;
>>> +
>>> + llist_add((struct llist_node *) &rs->head, &rcu_state.srs_next);
>>> + nr = atomic_long_inc_return(&rcu_sr_normal_count);
>>> +
>>> + /* Latch: only when flooded and if unlatched. */
>>> + if (nr >= RCU_SR_NORMAL_LATCH_THR)
>>> + (void)atomic_cmpxchg(&rcu_sr_normal_latched, 0, 1);
>>> +}
>>
>> I think there is a stuck-latch race here. Once llist_add() places the
>> entry in srs_next, the GP kthread can pick it up and fire
>> rcu_sr_normal_complete() before the latching cmpxchg runs. If the last
>> in-flight completion drains count to zero in that window, the unlatch
>> cmpxchg(latched, 1, 0) fails (latched is still 0 at that moment), and
>> then the latching cmpxchg(latched, 0, 1) fires anyway — with count=0:
>>
>> CPU 0 (add_req, count just hit 64) GP kthread
>> ---------------------------------- ----------
>> llist_add() <-- entry now in srs_next
>> inc_return() --> nr = 64
>> [preempted]
>> rcu_sr_normal_complete() x64:
>> dec_return -> count: 64..1..0
>> count==0:
>> cmpxchg(latched, 1, 0)
>> --> FAILS (latched still 0)
>> [resumes]
>> cmpxchg(latched, 0, 1) --> latched = 1
>>
>> Final state: count=0, latched=1 --> STUCK LATCH
>>
>> All subsequent synchronize_rcu() callers see latched==1 and take the
>> fallback path (not counted). With no new SR-normal callers,
>> rcu_sr_normal_complete() is never reached again, so the unlatch
>> cmpxchg(latched, 1, 0) never fires. The latch is permanently stuck.
>>
>> This requires preemption for a full GP duration between llist_add() and
>> the cmpxchg, which is probably more likely on PREEMPT_RT or heavily loaded
>> systems.
>>
>> The fix: move the cmpxchg *before* llist_add(), so the entry is not
>> visible to the GP kthread until after the latch is already set.
>>
>> That should fix it, thoughts?
>>
> Yes and thank you!
>
> We can improve it even more by removing atomic_cmpxchg() in
> the rcu_sr_normal_add_req() function, because only one context
> sees the (nr == RCU_SR_NORMAL_LATCH_THR) condition:
Sure, though you still need the atomic_long_inc_return.
But yes, the approach looks good. :-) Do you think we can have v3 ready for 7.1?
I would like to shoot for that if possible.
thanks,
--
Joel Fernandes
>
> <snip>
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 86dc88a70fd0..72b340940e11 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -1640,7 +1640,7 @@ static struct workqueue_struct *sync_wq;
>
> /* Number of in-flight synchronize_rcu() calls queued on srs_next. */
> static atomic_long_t rcu_sr_normal_count;
> -static atomic_t rcu_sr_normal_latched;
> +static int rcu_sr_normal_latched; /* 0/1 */
>
> static void rcu_sr_normal_complete(struct llist_node *node)
> {
> @@ -1662,7 +1662,7 @@ static void rcu_sr_normal_complete(struct llist_node *node)
> * drained and if it has been latched.
> */
> if (nr == 0)
> - (void)atomic_cmpxchg(&rcu_sr_normal_latched, 1, 0);
> + (void)cmpxchg(&rcu_sr_normal_latched, 1, 0);
> }
>
> static void rcu_sr_normal_gp_cleanup_work(struct work_struct *work)
> @@ -1808,14 +1808,22 @@ static bool rcu_sr_normal_gp_init(void)
>
> static void rcu_sr_normal_add_req(struct rcu_synchronize *rs)
> {
> - long nr;
> + /*
> + * Increment before publish to avoid a complete
> + * vs enqueue race on latch.
> + */
> + long nr = atomic_long_inc_return(&rcu_sr_normal_count);
>
> - llist_add((struct llist_node *) &rs->head, &rcu_state.srs_next);
> - nr = atomic_long_inc_return(&rcu_sr_normal_count);
> + /*
> + * Latch on threshold crossing. (nr == RCU_SR_NORMAL_LATCH_THR)
> + * can be true only for one context, avoiding contention on the
> + * write path.
> + */
> + if (nr == RCU_SR_NORMAL_LATCH_THR)
> + WRITE_ONCE(rcu_sr_normal_latched, 1);
>
> - /* Latch: only when flooded and if unlatched. */
> - if (nr >= RCU_SR_NORMAL_LATCH_THR)
> - (void)atomic_cmpxchg(&rcu_sr_normal_latched, 0, 1);
> + /* Publish for the GP kthread/worker. */
> + llist_add((struct llist_node *) &rs->head, &rcu_state.srs_next);
> }
>
> /*
> @@ -3302,7 +3310,7 @@ static void synchronize_rcu_normal(void)
> trace_rcu_sr_normal(rcu_state.name, &rs.head, TPS("request"));
>
> if (READ_ONCE(rcu_normal_wake_from_gp) < 1 ||
> - atomic_read(&rcu_sr_normal_latched)) {
> + READ_ONCE(rcu_sr_normal_latched)) {
> wait_rcu_gp(call_rcu_hurry);
> goto trace_complete_out;
> }
> <snip>
>
> --
> Uladzislau Rezki
--
Joel Fernandes
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2] rcu: Latch normal synchronize_rcu() path on flood
2026-03-09 20:32 ` Joel Fernandes
@ 2026-03-10 12:37 ` Uladzislau Rezki
0 siblings, 0 replies; 9+ messages in thread
From: Uladzislau Rezki @ 2026-03-10 12:37 UTC (permalink / raw)
To: Joel Fernandes
Cc: Uladzislau Rezki, Paul E.McKenney, Vishal Chourasia,
Shrikanth Hegde, Neeraj upadhyay, RCU, LKML, Frederic Weisbecker,
Samir M
On Mon, Mar 09, 2026 at 04:32:46PM -0400, Joel Fernandes wrote:
>
>
> On 3/5/2026 5:59 AM, Uladzislau Rezki wrote:
> > On Tue, Mar 03, 2026 at 03:45:58PM -0500, Joel Fernandes wrote:
> >> On Mon, 02 Mar 2026 11:04:04 +0100, Uladzislau Rezki (Sony) wrote:
> >>
> >>> * The latch is cleared only when the pending requests are fully
> >>> drained(nr == 0);
> >>
> >>> +static void rcu_sr_normal_add_req(struct rcu_synchronize *rs)
> >>> +{
> >>> + long nr;
> >>> +
> >>> + llist_add((struct llist_node *) &rs->head, &rcu_state.srs_next);
> >>> + nr = atomic_long_inc_return(&rcu_sr_normal_count);
> >>> +
> >>> + /* Latch: only when flooded and if unlatched. */
> >>> + if (nr >= RCU_SR_NORMAL_LATCH_THR)
> >>> + (void)atomic_cmpxchg(&rcu_sr_normal_latched, 0, 1);
> >>> +}
> >>
> >> I think there is a stuck-latch race here. Once llist_add() places the
> >> entry in srs_next, the GP kthread can pick it up and fire
> >> rcu_sr_normal_complete() before the latching cmpxchg runs. If the last
> >> in-flight completion drains count to zero in that window, the unlatch
> >> cmpxchg(latched, 1, 0) fails (latched is still 0 at that moment), and
> >> then the latching cmpxchg(latched, 0, 1) fires anyway — with count=0:
> >>
> >> CPU 0 (add_req, count just hit 64) GP kthread
> >> ---------------------------------- ----------
> >> llist_add() <-- entry now in srs_next
> >> inc_return() --> nr = 64
> >> [preempted]
> >> rcu_sr_normal_complete() x64:
> >> dec_return -> count: 64..1..0
> >> count==0:
> >> cmpxchg(latched, 1, 0)
> >> --> FAILS (latched still 0)
> >> [resumes]
> >> cmpxchg(latched, 0, 1) --> latched = 1
> >>
> >> Final state: count=0, latched=1 --> STUCK LATCH
> >>
> >> All subsequent synchronize_rcu() callers see latched==1 and take the
> >> fallback path (not counted). With no new SR-normal callers,
> >> rcu_sr_normal_complete() is never reached again, so the unlatch
> >> cmpxchg(latched, 1, 0) never fires. The latch is permanently stuck.
> >>
> >> This requires preemption for a full GP duration between llist_add() and
> >> the cmpxchg, which is probably more likely on PREEMPT_RT or heavily loaded
> >> systems.
> >>
> >> The fix: move the cmpxchg *before* llist_add(), so the entry is not
> >> visible to the GP kthread until after the latch is already set.
> >>
> >> That should fix it, thoughts?
> >>
> > Yes and thank you!
> >
> > We can improve it even more by removing atomic_cmpxchg() in
> > the rcu_sr_normal_add_req() function, because only one context
> > sees the (nr == RCU_SR_NORMAL_LATCH_THR) condition:
> Sure, though you still need the atomic_long_inc_return.
>
Yes :)
>
> But yes, the approach looks good. :-) Do you think we can have v3 ready for 7.1?
> I would like to shoot for that if possible.
>
I will post v3 today.
--
Uladzislau Rezki
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2] rcu: Latch normal synchronize_rcu() path on flood
2026-03-05 10:59 ` Uladzislau Rezki
2026-03-09 20:32 ` Joel Fernandes
@ 2026-03-10 14:11 ` Frederic Weisbecker
2026-03-10 16:28 ` Uladzislau Rezki
1 sibling, 1 reply; 9+ messages in thread
From: Frederic Weisbecker @ 2026-03-10 14:11 UTC (permalink / raw)
To: Uladzislau Rezki
Cc: Joel Fernandes, Paul E.McKenney, Vishal Chourasia,
Shrikanth Hegde, Neeraj upadhyay, RCU, LKML, Samir M
Le Thu, Mar 05, 2026 at 11:59:15AM +0100, Uladzislau Rezki a écrit :
> On Tue, Mar 03, 2026 at 03:45:58PM -0500, Joel Fernandes wrote:
> > On Mon, 02 Mar 2026 11:04:04 +0100, Uladzislau Rezki (Sony) wrote:
> >
> > > * The latch is cleared only when the pending requests are fully
> > > drained(nr == 0);
> >
> > > +static void rcu_sr_normal_add_req(struct rcu_synchronize *rs)
> > > +{
> > > + long nr;
> > > +
> > > + llist_add((struct llist_node *) &rs->head, &rcu_state.srs_next);
> > > + nr = atomic_long_inc_return(&rcu_sr_normal_count);
> > > +
> > > + /* Latch: only when flooded and if unlatched. */
> > > + if (nr >= RCU_SR_NORMAL_LATCH_THR)
> > > + (void)atomic_cmpxchg(&rcu_sr_normal_latched, 0, 1);
> > > +}
> >
> > I think there is a stuck-latch race here. Once llist_add() places the
> > entry in srs_next, the GP kthread can pick it up and fire
> > rcu_sr_normal_complete() before the latching cmpxchg runs. If the last
> > in-flight completion drains count to zero in that window, the unlatch
> > cmpxchg(latched, 1, 0) fails (latched is still 0 at that moment), and
> > then the latching cmpxchg(latched, 0, 1) fires anyway — with count=0:
> >
> > CPU 0 (add_req, count just hit 64) GP kthread
> > ---------------------------------- ----------
> > llist_add() <-- entry now in srs_next
> > inc_return() --> nr = 64
> > [preempted]
> > rcu_sr_normal_complete() x64:
> > dec_return -> count: 64..1..0
> > count==0:
> > cmpxchg(latched, 1, 0)
> > --> FAILS (latched still 0)
> > [resumes]
> > cmpxchg(latched, 0, 1) --> latched = 1
> >
> > Final state: count=0, latched=1 --> STUCK LATCH
> >
> > All subsequent synchronize_rcu() callers see latched==1 and take the
> > fallback path (not counted). With no new SR-normal callers,
> > rcu_sr_normal_complete() is never reached again, so the unlatch
> > cmpxchg(latched, 1, 0) never fires. The latch is permanently stuck.
> >
> > This requires preemption for a full GP duration between llist_add() and
> > the cmpxchg, which is probably more likely on PREEMPT_RT or heavily loaded
> > systems.
> >
> > The fix: move the cmpxchg *before* llist_add(), so the entry is not
> > visible to the GP kthread until after the latch is already set.
> >
> > That should fix it, thoughts?
> >
> Yes and thank you!
>
> We can improve it even more by removing atomic_cmpxchg() in
> the rcu_sr_normal_add_req() function, because only one context
> sees the (nr == RCU_SR_NORMAL_LATCH_THR) condition:
>
> <snip>
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 86dc88a70fd0..72b340940e11 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -1640,7 +1640,7 @@ static struct workqueue_struct *sync_wq;
>
> /* Number of in-flight synchronize_rcu() calls queued on srs_next. */
> static atomic_long_t rcu_sr_normal_count;
> -static atomic_t rcu_sr_normal_latched;
> +static int rcu_sr_normal_latched; /* 0/1 */
>
> static void rcu_sr_normal_complete(struct llist_node *node)
> {
> @@ -1662,7 +1662,7 @@ static void rcu_sr_normal_complete(struct llist_node *node)
> * drained and if it has been latched.
> */
> if (nr == 0)
> - (void)atomic_cmpxchg(&rcu_sr_normal_latched, 1, 0);
> + (void)cmpxchg(&rcu_sr_normal_latched, 1, 0);
> }
>
> static void rcu_sr_normal_gp_cleanup_work(struct work_struct *work)
> @@ -1808,14 +1808,22 @@ static bool rcu_sr_normal_gp_init(void)
>
> static void rcu_sr_normal_add_req(struct rcu_synchronize *rs)
> {
> - long nr;
> + /*
> + * Increment before publish to avoid a complete
> + * vs enqueue race on latch.
> + */
> + long nr = atomic_long_inc_return(&rcu_sr_normal_count);
>
> - llist_add((struct llist_node *) &rs->head, &rcu_state.srs_next);
> - nr = atomic_long_inc_return(&rcu_sr_normal_count);
> + /*
> + * Latch on threshold crossing. (nr == RCU_SR_NORMAL_LATCH_THR)
> + * can be true only for one context, avoiding contention on the
> + * write path.
> + */
> + if (nr == RCU_SR_NORMAL_LATCH_THR)
> + WRITE_ONCE(rcu_sr_normal_latched, 1);
Isn't it still racy?
rcu_sr_normal_add_req rcu_sr_normal_complete
--------------------- ----------------------
nr = atomic_long_dec_return(&rcu_sr_normal_count);
// nr == 0
======= PREEMPTION =======
// 64 tasks doing synchronize_rcu()
rcu_sr_normal_add_req()
WRITE_ONCE(rcu_sr_normal_latched, 1);
cmpxchg(&rcu_sr_normal_latched, 1, 0);
Also more generally there is nothing that orders the WRITE_ONCE() with the
cmpxchg.
Is it possible to remove rcu_sr_normal_latched and simply deal with comparisons
between rcu_sr_normal_count and RCU_SR_NORMAL_LATCH_THR?
Thanks.
--
Frederic Weisbecker
SUSE Labs
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2] rcu: Latch normal synchronize_rcu() path on flood
2026-03-10 14:11 ` Frederic Weisbecker
@ 2026-03-10 16:28 ` Uladzislau Rezki
2026-03-10 22:24 ` Frederic Weisbecker
0 siblings, 1 reply; 9+ messages in thread
From: Uladzislau Rezki @ 2026-03-10 16:28 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: Uladzislau Rezki, Joel Fernandes, Paul E.McKenney,
Vishal Chourasia, Shrikanth Hegde, Neeraj upadhyay, RCU, LKML,
Samir M
Hello, Frederic!
> Le Thu, Mar 05, 2026 at 11:59:15AM +0100, Uladzislau Rezki a écrit :
> > On Tue, Mar 03, 2026 at 03:45:58PM -0500, Joel Fernandes wrote:
> > > On Mon, 02 Mar 2026 11:04:04 +0100, Uladzislau Rezki (Sony) wrote:
> > >
> > > > * The latch is cleared only when the pending requests are fully
> > > > drained(nr == 0);
> > >
> > > > +static void rcu_sr_normal_add_req(struct rcu_synchronize *rs)
> > > > +{
> > > > + long nr;
> > > > +
> > > > + llist_add((struct llist_node *) &rs->head, &rcu_state.srs_next);
> > > > + nr = atomic_long_inc_return(&rcu_sr_normal_count);
> > > > +
> > > > + /* Latch: only when flooded and if unlatched. */
> > > > + if (nr >= RCU_SR_NORMAL_LATCH_THR)
> > > > + (void)atomic_cmpxchg(&rcu_sr_normal_latched, 0, 1);
> > > > +}
> > >
> > > I think there is a stuck-latch race here. Once llist_add() places the
> > > entry in srs_next, the GP kthread can pick it up and fire
> > > rcu_sr_normal_complete() before the latching cmpxchg runs. If the last
> > > in-flight completion drains count to zero in that window, the unlatch
> > > cmpxchg(latched, 1, 0) fails (latched is still 0 at that moment), and
> > > then the latching cmpxchg(latched, 0, 1) fires anyway — with count=0:
> > >
> > > CPU 0 (add_req, count just hit 64) GP kthread
> > > ---------------------------------- ----------
> > > llist_add() <-- entry now in srs_next
> > > inc_return() --> nr = 64
> > > [preempted]
> > > rcu_sr_normal_complete() x64:
> > > dec_return -> count: 64..1..0
> > > count==0:
> > > cmpxchg(latched, 1, 0)
> > > --> FAILS (latched still 0)
> > > [resumes]
> > > cmpxchg(latched, 0, 1) --> latched = 1
> > >
> > > Final state: count=0, latched=1 --> STUCK LATCH
> > >
> > > All subsequent synchronize_rcu() callers see latched==1 and take the
> > > fallback path (not counted). With no new SR-normal callers,
> > > rcu_sr_normal_complete() is never reached again, so the unlatch
> > > cmpxchg(latched, 1, 0) never fires. The latch is permanently stuck.
> > >
> > > This requires preemption for a full GP duration between llist_add() and
> > > the cmpxchg, which is probably more likely on PREEMPT_RT or heavily loaded
> > > systems.
> > >
> > > The fix: move the cmpxchg *before* llist_add(), so the entry is not
> > > visible to the GP kthread until after the latch is already set.
> > >
> > > That should fix it, thoughts?
> > >
> > Yes and thank you!
> >
> > We can improve it even more by removing atomic_cmpxchg() in
> > the rcu_sr_normal_add_req() function, because only one context
> > sees the (nr == RCU_SR_NORMAL_LATCH_THR) condition:
> >
> > <snip>
> > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > index 86dc88a70fd0..72b340940e11 100644
> > --- a/kernel/rcu/tree.c
> > +++ b/kernel/rcu/tree.c
> > @@ -1640,7 +1640,7 @@ static struct workqueue_struct *sync_wq;
> >
> > /* Number of in-flight synchronize_rcu() calls queued on srs_next. */
> > static atomic_long_t rcu_sr_normal_count;
> > -static atomic_t rcu_sr_normal_latched;
> > +static int rcu_sr_normal_latched; /* 0/1 */
> >
> > static void rcu_sr_normal_complete(struct llist_node *node)
> > {
> > @@ -1662,7 +1662,7 @@ static void rcu_sr_normal_complete(struct llist_node *node)
> > * drained and if it has been latched.
> > */
> > if (nr == 0)
> > - (void)atomic_cmpxchg(&rcu_sr_normal_latched, 1, 0);
> > + (void)cmpxchg(&rcu_sr_normal_latched, 1, 0);
> > }
> >
> > static void rcu_sr_normal_gp_cleanup_work(struct work_struct *work)
> > @@ -1808,14 +1808,22 @@ static bool rcu_sr_normal_gp_init(void)
> >
> > static void rcu_sr_normal_add_req(struct rcu_synchronize *rs)
> > {
> > - long nr;
> > + /*
> > + * Increment before publish to avoid a complete
> > + * vs enqueue race on latch.
> > + */
> > + long nr = atomic_long_inc_return(&rcu_sr_normal_count);
> >
> > - llist_add((struct llist_node *) &rs->head, &rcu_state.srs_next);
> > - nr = atomic_long_inc_return(&rcu_sr_normal_count);
> > + /*
> > + * Latch on threshold crossing. (nr == RCU_SR_NORMAL_LATCH_THR)
> > + * can be true only for one context, avoiding contention on the
> > + * write path.
> > + */
> > + if (nr == RCU_SR_NORMAL_LATCH_THR)
> > + WRITE_ONCE(rcu_sr_normal_latched, 1);
>
> Isn't it still racy?
>
> rcu_sr_normal_add_req rcu_sr_normal_complete
> --------------------- ----------------------
> nr = atomic_long_dec_return(&rcu_sr_normal_count);
> // nr == 0
> ======= PREEMPTION =======
> // 64 tasks doing synchronize_rcu()
> rcu_sr_normal_add_req()
> WRITE_ONCE(rcu_sr_normal_latched, 1);
> cmpxchg(&rcu_sr_normal_latched, 1, 0);
>
>
> Also more generally there is nothing that orders the WRITE_ONCE() with the
> cmpxchg.
>
Yep that i know. This is rather "relaxed" mechanism rather than
a strictly ordered. The race you described can happen but i do not
find it as a problem because as noted it is relaxed policy flag.
But WRITE_ONCE() i can replace by:
if (nr == RCU_SR_NORMAL_LATCH_THR)
cmpxchg(&rcu_sr_normal_latched, 0, 1);
>
> Is it possible to remove rcu_sr_normal_latched and simply deal with comparisons
> between rcu_sr_normal_count and RCU_SR_NORMAL_LATCH_THR?
>
It is. But the idea with latch is a bit different then just checking
threshold. The main goal is to detect flood and lath the path until
__all__ users are flushed. I.e. it implements hysteresis to prevent
repeated switches around the threshold.
With your proposal behaviour becomes different.
Thoughts?
--
Uladzislau Rezki
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2] rcu: Latch normal synchronize_rcu() path on flood
2026-03-10 16:28 ` Uladzislau Rezki
@ 2026-03-10 22:24 ` Frederic Weisbecker
2026-03-11 8:45 ` Uladzislau Rezki
0 siblings, 1 reply; 9+ messages in thread
From: Frederic Weisbecker @ 2026-03-10 22:24 UTC (permalink / raw)
To: Uladzislau Rezki
Cc: Joel Fernandes, Paul E.McKenney, Vishal Chourasia,
Shrikanth Hegde, Neeraj upadhyay, RCU, LKML, Samir M
Le Tue, Mar 10, 2026 at 05:28:22PM +0100, Uladzislau Rezki a écrit :
> Hello, Frederic!
>
> > Le Thu, Mar 05, 2026 at 11:59:15AM +0100, Uladzislau Rezki a écrit :
> > > On Tue, Mar 03, 2026 at 03:45:58PM -0500, Joel Fernandes wrote:
> > > > On Mon, 02 Mar 2026 11:04:04 +0100, Uladzislau Rezki (Sony) wrote:
> > > >
> > > > > * The latch is cleared only when the pending requests are fully
> > > > > drained(nr == 0);
> > > >
> > > > > +static void rcu_sr_normal_add_req(struct rcu_synchronize *rs)
> > > > > +{
> > > > > + long nr;
> > > > > +
> > > > > + llist_add((struct llist_node *) &rs->head, &rcu_state.srs_next);
> > > > > + nr = atomic_long_inc_return(&rcu_sr_normal_count);
> > > > > +
> > > > > + /* Latch: only when flooded and if unlatched. */
> > > > > + if (nr >= RCU_SR_NORMAL_LATCH_THR)
> > > > > + (void)atomic_cmpxchg(&rcu_sr_normal_latched, 0, 1);
> > > > > +}
> > > >
> > > > I think there is a stuck-latch race here. Once llist_add() places the
> > > > entry in srs_next, the GP kthread can pick it up and fire
> > > > rcu_sr_normal_complete() before the latching cmpxchg runs. If the last
> > > > in-flight completion drains count to zero in that window, the unlatch
> > > > cmpxchg(latched, 1, 0) fails (latched is still 0 at that moment), and
> > > > then the latching cmpxchg(latched, 0, 1) fires anyway — with count=0:
> > > >
> > > > CPU 0 (add_req, count just hit 64) GP kthread
> > > > ---------------------------------- ----------
> > > > llist_add() <-- entry now in srs_next
> > > > inc_return() --> nr = 64
> > > > [preempted]
> > > > rcu_sr_normal_complete() x64:
> > > > dec_return -> count: 64..1..0
> > > > count==0:
> > > > cmpxchg(latched, 1, 0)
> > > > --> FAILS (latched still 0)
> > > > [resumes]
> > > > cmpxchg(latched, 0, 1) --> latched = 1
> > > >
> > > > Final state: count=0, latched=1 --> STUCK LATCH
> > > >
> > > > All subsequent synchronize_rcu() callers see latched==1 and take the
> > > > fallback path (not counted). With no new SR-normal callers,
> > > > rcu_sr_normal_complete() is never reached again, so the unlatch
> > > > cmpxchg(latched, 1, 0) never fires. The latch is permanently stuck.
> > > >
> > > > This requires preemption for a full GP duration between llist_add() and
> > > > the cmpxchg, which is probably more likely on PREEMPT_RT or heavily loaded
> > > > systems.
> > > >
> > > > The fix: move the cmpxchg *before* llist_add(), so the entry is not
> > > > visible to the GP kthread until after the latch is already set.
> > > >
> > > > That should fix it, thoughts?
> > > >
> > > Yes and thank you!
> > >
> > > We can improve it even more by removing atomic_cmpxchg() in
> > > the rcu_sr_normal_add_req() function, because only one context
> > > sees the (nr == RCU_SR_NORMAL_LATCH_THR) condition:
> > >
> > > <snip>
> > > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > > index 86dc88a70fd0..72b340940e11 100644
> > > --- a/kernel/rcu/tree.c
> > > +++ b/kernel/rcu/tree.c
> > > @@ -1640,7 +1640,7 @@ static struct workqueue_struct *sync_wq;
> > >
> > > /* Number of in-flight synchronize_rcu() calls queued on srs_next. */
> > > static atomic_long_t rcu_sr_normal_count;
> > > -static atomic_t rcu_sr_normal_latched;
> > > +static int rcu_sr_normal_latched; /* 0/1 */
> > >
> > > static void rcu_sr_normal_complete(struct llist_node *node)
> > > {
> > > @@ -1662,7 +1662,7 @@ static void rcu_sr_normal_complete(struct llist_node *node)
> > > * drained and if it has been latched.
> > > */
> > > if (nr == 0)
> > > - (void)atomic_cmpxchg(&rcu_sr_normal_latched, 1, 0);
> > > + (void)cmpxchg(&rcu_sr_normal_latched, 1, 0);
> > > }
> > >
> > > static void rcu_sr_normal_gp_cleanup_work(struct work_struct *work)
> > > @@ -1808,14 +1808,22 @@ static bool rcu_sr_normal_gp_init(void)
> > >
> > > static void rcu_sr_normal_add_req(struct rcu_synchronize *rs)
> > > {
> > > - long nr;
> > > + /*
> > > + * Increment before publish to avoid a complete
> > > + * vs enqueue race on latch.
> > > + */
> > > + long nr = atomic_long_inc_return(&rcu_sr_normal_count);
> > >
> > > - llist_add((struct llist_node *) &rs->head, &rcu_state.srs_next);
> > > - nr = atomic_long_inc_return(&rcu_sr_normal_count);
> > > + /*
> > > + * Latch on threshold crossing. (nr == RCU_SR_NORMAL_LATCH_THR)
> > > + * can be true only for one context, avoiding contention on the
> > > + * write path.
> > > + */
> > > + if (nr == RCU_SR_NORMAL_LATCH_THR)
> > > + WRITE_ONCE(rcu_sr_normal_latched, 1);
> >
> > Isn't it still racy?
> >
> > rcu_sr_normal_add_req rcu_sr_normal_complete
> > --------------------- ----------------------
> > nr = atomic_long_dec_return(&rcu_sr_normal_count);
> > // nr == 0
> > ======= PREEMPTION =======
> > // 64 tasks doing synchronize_rcu()
> > rcu_sr_normal_add_req()
> > WRITE_ONCE(rcu_sr_normal_latched, 1);
> > cmpxchg(&rcu_sr_normal_latched, 1, 0);
> >
> >
> > Also more generally there is nothing that orders the WRITE_ONCE() with the
> > cmpxchg.
> >
> Yep that i know. This is rather "relaxed" mechanism rather than
> a strictly ordered. The race you described can happen but i do not
> find it as a problem because as noted it is relaxed policy flag.
Ok, that will need a comment explaining how and why we tolerate missed
latches then.
>
> But WRITE_ONCE() i can replace by:
>
> if (nr == RCU_SR_NORMAL_LATCH_THR)
> cmpxchg(&rcu_sr_normal_latched, 0, 1);
Possibly yes, though I'm not sure that would help.
> > Is it possible to remove rcu_sr_normal_latched and simply deal with comparisons
> > between rcu_sr_normal_count and RCU_SR_NORMAL_LATCH_THR?
> >
> It is. But the idea with latch is a bit different then just checking
> threshold. The main goal is to detect flood and lath the path until
> __all__ users are flushed. I.e. it implements hysteresis to prevent
> repeated switches around the threshold.
Good point!
> With your proposal behaviour becomes different.
>
> Thoughts?
Good thoughts! :-)
--
Frederic Weisbecker
SUSE Labs
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2] rcu: Latch normal synchronize_rcu() path on flood
2026-03-10 22:24 ` Frederic Weisbecker
@ 2026-03-11 8:45 ` Uladzislau Rezki
0 siblings, 0 replies; 9+ messages in thread
From: Uladzislau Rezki @ 2026-03-11 8:45 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: Uladzislau Rezki, Joel Fernandes, Paul E.McKenney,
Vishal Chourasia, Shrikanth Hegde, Neeraj upadhyay, RCU, LKML,
Samir M
On Tue, Mar 10, 2026 at 11:24:00PM +0100, Frederic Weisbecker wrote:
> Le Tue, Mar 10, 2026 at 05:28:22PM +0100, Uladzislau Rezki a écrit :
> > Hello, Frederic!
> >
> > > Le Thu, Mar 05, 2026 at 11:59:15AM +0100, Uladzislau Rezki a écrit :
> > > > On Tue, Mar 03, 2026 at 03:45:58PM -0500, Joel Fernandes wrote:
> > > > > On Mon, 02 Mar 2026 11:04:04 +0100, Uladzislau Rezki (Sony) wrote:
> > > > >
> > > > > > * The latch is cleared only when the pending requests are fully
> > > > > > drained(nr == 0);
> > > > >
> > > > > > +static void rcu_sr_normal_add_req(struct rcu_synchronize *rs)
> > > > > > +{
> > > > > > + long nr;
> > > > > > +
> > > > > > + llist_add((struct llist_node *) &rs->head, &rcu_state.srs_next);
> > > > > > + nr = atomic_long_inc_return(&rcu_sr_normal_count);
> > > > > > +
> > > > > > + /* Latch: only when flooded and if unlatched. */
> > > > > > + if (nr >= RCU_SR_NORMAL_LATCH_THR)
> > > > > > + (void)atomic_cmpxchg(&rcu_sr_normal_latched, 0, 1);
> > > > > > +}
> > > > >
> > > > > I think there is a stuck-latch race here. Once llist_add() places the
> > > > > entry in srs_next, the GP kthread can pick it up and fire
> > > > > rcu_sr_normal_complete() before the latching cmpxchg runs. If the last
> > > > > in-flight completion drains count to zero in that window, the unlatch
> > > > > cmpxchg(latched, 1, 0) fails (latched is still 0 at that moment), and
> > > > > then the latching cmpxchg(latched, 0, 1) fires anyway — with count=0:
> > > > >
> > > > > CPU 0 (add_req, count just hit 64) GP kthread
> > > > > ---------------------------------- ----------
> > > > > llist_add() <-- entry now in srs_next
> > > > > inc_return() --> nr = 64
> > > > > [preempted]
> > > > > rcu_sr_normal_complete() x64:
> > > > > dec_return -> count: 64..1..0
> > > > > count==0:
> > > > > cmpxchg(latched, 1, 0)
> > > > > --> FAILS (latched still 0)
> > > > > [resumes]
> > > > > cmpxchg(latched, 0, 1) --> latched = 1
> > > > >
> > > > > Final state: count=0, latched=1 --> STUCK LATCH
> > > > >
> > > > > All subsequent synchronize_rcu() callers see latched==1 and take the
> > > > > fallback path (not counted). With no new SR-normal callers,
> > > > > rcu_sr_normal_complete() is never reached again, so the unlatch
> > > > > cmpxchg(latched, 1, 0) never fires. The latch is permanently stuck.
> > > > >
> > > > > This requires preemption for a full GP duration between llist_add() and
> > > > > the cmpxchg, which is probably more likely on PREEMPT_RT or heavily loaded
> > > > > systems.
> > > > >
> > > > > The fix: move the cmpxchg *before* llist_add(), so the entry is not
> > > > > visible to the GP kthread until after the latch is already set.
> > > > >
> > > > > That should fix it, thoughts?
> > > > >
> > > > Yes and thank you!
> > > >
> > > > We can improve it even more by removing atomic_cmpxchg() in
> > > > the rcu_sr_normal_add_req() function, because only one context
> > > > sees the (nr == RCU_SR_NORMAL_LATCH_THR) condition:
> > > >
> > > > <snip>
> > > > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > > > index 86dc88a70fd0..72b340940e11 100644
> > > > --- a/kernel/rcu/tree.c
> > > > +++ b/kernel/rcu/tree.c
> > > > @@ -1640,7 +1640,7 @@ static struct workqueue_struct *sync_wq;
> > > >
> > > > /* Number of in-flight synchronize_rcu() calls queued on srs_next. */
> > > > static atomic_long_t rcu_sr_normal_count;
> > > > -static atomic_t rcu_sr_normal_latched;
> > > > +static int rcu_sr_normal_latched; /* 0/1 */
> > > >
> > > > static void rcu_sr_normal_complete(struct llist_node *node)
> > > > {
> > > > @@ -1662,7 +1662,7 @@ static void rcu_sr_normal_complete(struct llist_node *node)
> > > > * drained and if it has been latched.
> > > > */
> > > > if (nr == 0)
> > > > - (void)atomic_cmpxchg(&rcu_sr_normal_latched, 1, 0);
> > > > + (void)cmpxchg(&rcu_sr_normal_latched, 1, 0);
> > > > }
> > > >
> > > > static void rcu_sr_normal_gp_cleanup_work(struct work_struct *work)
> > > > @@ -1808,14 +1808,22 @@ static bool rcu_sr_normal_gp_init(void)
> > > >
> > > > static void rcu_sr_normal_add_req(struct rcu_synchronize *rs)
> > > > {
> > > > - long nr;
> > > > + /*
> > > > + * Increment before publish to avoid a complete
> > > > + * vs enqueue race on latch.
> > > > + */
> > > > + long nr = atomic_long_inc_return(&rcu_sr_normal_count);
> > > >
> > > > - llist_add((struct llist_node *) &rs->head, &rcu_state.srs_next);
> > > > - nr = atomic_long_inc_return(&rcu_sr_normal_count);
> > > > + /*
> > > > + * Latch on threshold crossing. (nr == RCU_SR_NORMAL_LATCH_THR)
> > > > + * can be true only for one context, avoiding contention on the
> > > > + * write path.
> > > > + */
> > > > + if (nr == RCU_SR_NORMAL_LATCH_THR)
> > > > + WRITE_ONCE(rcu_sr_normal_latched, 1);
> > >
> > > Isn't it still racy?
> > >
> > > rcu_sr_normal_add_req rcu_sr_normal_complete
> > > --------------------- ----------------------
> > > nr = atomic_long_dec_return(&rcu_sr_normal_count);
> > > // nr == 0
> > > ======= PREEMPTION =======
> > > // 64 tasks doing synchronize_rcu()
> > > rcu_sr_normal_add_req()
> > > WRITE_ONCE(rcu_sr_normal_latched, 1);
> > > cmpxchg(&rcu_sr_normal_latched, 1, 0);
> > >
> > >
> > > Also more generally there is nothing that orders the WRITE_ONCE() with the
> > > cmpxchg.
> > >
> > Yep that i know. This is rather "relaxed" mechanism rather than
> > a strictly ordered. The race you described can happen but i do not
> > find it as a problem because as noted it is relaxed policy flag.
>
> Ok, that will need a comment explaining how and why we tolerate missed
> latches then.
>
Makes sense!
> >
> > But WRITE_ONCE() i can replace by:
> >
> > if (nr == RCU_SR_NORMAL_LATCH_THR)
> > cmpxchg(&rcu_sr_normal_latched, 0, 1);
>
> Possibly yes, though I'm not sure that would help.
>
Just to align with second place where we drop rcu_sr_normal_latched.
> > > Is it possible to remove rcu_sr_normal_latched and simply deal with comparisons
> > > between rcu_sr_normal_count and RCU_SR_NORMAL_LATCH_THR?
> > >
> > It is. But the idea with latch is a bit different then just checking
> > threshold. The main goal is to detect flood and lath the path until
> > __all__ users are flushed. I.e. it implements hysteresis to prevent
> > repeated switches around the threshold.
>
> Good point!
>
> > With your proposal behaviour becomes different.
> >
> > Thoughts?
>
> Good thoughts! :-)
>
Thank you Frederic :)
--
Uladzislau Rezki
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2026-03-11 8:45 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-02 10:04 [PATCH v2] rcu: Latch normal synchronize_rcu() path on flood Uladzislau Rezki (Sony)
2026-03-03 20:45 ` Joel Fernandes
2026-03-05 10:59 ` Uladzislau Rezki
2026-03-09 20:32 ` Joel Fernandes
2026-03-10 12:37 ` Uladzislau Rezki
2026-03-10 14:11 ` Frederic Weisbecker
2026-03-10 16:28 ` Uladzislau Rezki
2026-03-10 22:24 ` Frederic Weisbecker
2026-03-11 8:45 ` Uladzislau Rezki
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox