From: Andrea Righi <arighi@nvidia.com>
To: Ingo Molnar <mingo@redhat.com>,
Peter Zijlstra <peterz@infradead.org>,
Juri Lelli <juri.lelli@redhat.com>,
Vincent Guittot <vincent.guittot@linaro.org>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>,
Steven Rostedt <rostedt@goodmis.org>,
Ben Segall <bsegall@google.com>, Mel Gorman <mgorman@suse.de>,
Valentin Schneider <vschneid@redhat.com>,
K Prateek Nayak <kprateek.nayak@amd.com>,
Christian Loehle <christian.loehle@arm.com>,
Koba Ko <kobak@nvidia.com>,
Felix Abecassis <fabecassis@nvidia.com>,
Balbir Singh <balbirs@nvidia.com>,
Joel Fernandes <joelagnelf@nvidia.com>,
Shrikanth Hegde <sshegde@linux.ibm.com>,
linux-kernel@vger.kernel.org
Subject: [PATCH 1/6] sched/fair: Use guard(rcu) for sched_domain RCU sections
Date: Tue, 28 Apr 2026 07:16:35 +0200 [thread overview]
Message-ID: <20260428051720.3180182-2-arighi@nvidia.com> (raw)
In-Reply-To: <20260428051720.3180182-1-arighi@nvidia.com>
Use the scoped guard(rcu)() helper to safely access sched_domain
pointers.
No functional change intended, this is preparation for topology work
where sched_domain lifetimes are easier to reason about with explicit,
scope-bounded RCU critical sections.
Suggested-by: K Prateek Nayak <kprateek.nayak@amd.com>
Signed-off-by: Andrea Righi <arighi@nvidia.com>
---
kernel/sched/fair.c | 141 ++++++++++++++++++++++----------------------
1 file changed, 71 insertions(+), 70 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 69361c63353ad..fc0828150c780 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8083,6 +8083,8 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
*/
lockdep_assert_irqs_disabled();
+ guard(rcu)();
+
if (choose_idle_cpu(target, p) &&
asym_fits_cpu(task_util, util_min, util_max, target))
return target;
@@ -12701,55 +12703,16 @@ static void kick_ilb(unsigned int flags)
}
/*
- * Current decision point for kicking the idle load balancer in the presence
- * of idle CPUs in the system.
+ * Decide whether the ILB needs a stats and/or balance kick based on
+ * sched_domain state.
*/
-static void nohz_balancer_kick(struct rq *rq)
+static bool nohz_balancer_needs_kick(struct rq *rq)
{
- unsigned long now = jiffies;
struct sched_domain_shared *sds;
struct sched_domain *sd;
int nr_busy, i, cpu = rq->cpu;
- unsigned int flags = 0;
-
- if (unlikely(rq->idle_balance))
- return;
-
- /*
- * We may be recently in ticked or tickless idle mode. At the first
- * busy tick after returning from idle, we will update the busy stats.
- */
- nohz_balance_exit_idle(rq);
-
- if (READ_ONCE(nohz.has_blocked_load) &&
- time_after(now, READ_ONCE(nohz.next_blocked)))
- flags = NOHZ_STATS_KICK;
-
- /*
- * Most of the time system is not 100% busy. i.e nohz.nr_cpus > 0
- * Skip the read if time is not due.
- *
- * If none are in tickless mode, there maybe a narrow window
- * (28 jiffies, HZ=1000) where flags maybe set and kick_ilb called.
- * But idle load balancing is not done as find_new_ilb fails.
- * That's very rare. So read nohz.nr_cpus only if time is due.
- */
- if (time_before(now, nohz.next_balance))
- goto out;
- /*
- * None are in tickless mode and hence no need for NOHZ idle load
- * balancing
- */
- if (unlikely(cpumask_empty(nohz.idle_cpus_mask)))
- return;
-
- if (rq->nr_running >= 2) {
- flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
- goto out;
- }
-
- rcu_read_lock();
+ guard(rcu)();
sd = rcu_dereference_all(rq->sd);
if (sd) {
@@ -12757,10 +12720,8 @@ static void nohz_balancer_kick(struct rq *rq)
* If there's a runnable CFS task and the current CPU has reduced
* capacity, kick the ILB to see if there's a better CPU to run on:
*/
- if (rq->cfs.h_nr_runnable >= 1 && check_cpu_capacity(rq, sd)) {
- flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
- goto unlock;
- }
+ if (rq->cfs.h_nr_runnable >= 1 && check_cpu_capacity(rq, sd))
+ return true;
}
sd = rcu_dereference_all(per_cpu(sd_asym_packing, cpu));
@@ -12774,10 +12735,8 @@ static void nohz_balancer_kick(struct rq *rq)
* preferred CPU must be idle.
*/
for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) {
- if (sched_asym(sd, i, cpu)) {
- flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
- goto unlock;
- }
+ if (sched_asym(sd, i, cpu))
+ return true;
}
}
@@ -12787,10 +12746,8 @@ static void nohz_balancer_kick(struct rq *rq)
* When ASYM_CPUCAPACITY; see if there's a higher capacity CPU
* to run the misfit task on.
*/
- if (check_misfit_status(rq)) {
- flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
- goto unlock;
- }
+ if (check_misfit_status(rq))
+ return true;
/*
* For asymmetric systems, we do not want to nicely balance
@@ -12799,7 +12756,7 @@ static void nohz_balancer_kick(struct rq *rq)
*
* Skip the LLC logic because it's not relevant in that case.
*/
- goto unlock;
+ return false;
}
sds = rcu_dereference_all(per_cpu(sd_llc_shared, cpu));
@@ -12814,13 +12771,61 @@ static void nohz_balancer_kick(struct rq *rq)
* like this LLC domain has tasks we could move.
*/
nr_busy = atomic_read(&sds->nr_busy_cpus);
- if (nr_busy > 1) {
- flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
- goto unlock;
- }
+ if (nr_busy > 1)
+ return true;
}
-unlock:
- rcu_read_unlock();
+
+ return false;
+}
+
+/*
+ * Current decision point for kicking the idle load balancer in the presence
+ * of idle CPUs in the system.
+ */
+static void nohz_balancer_kick(struct rq *rq)
+{
+ unsigned long now = jiffies;
+ unsigned int flags = 0;
+
+ if (unlikely(rq->idle_balance))
+ return;
+
+ /*
+ * We may be recently in ticked or tickless idle mode. At the first
+ * busy tick after returning from idle, we will update the busy stats.
+ */
+ nohz_balance_exit_idle(rq);
+
+ if (READ_ONCE(nohz.has_blocked_load) &&
+ time_after(now, READ_ONCE(nohz.next_blocked)))
+ flags = NOHZ_STATS_KICK;
+
+ /*
+ * Most of the time system is not 100% busy. i.e nohz.nr_cpus > 0
+ * Skip the read if time is not due.
+ *
+ * If none are in tickless mode, there maybe a narrow window
+ * (28 jiffies, HZ=1000) where flags maybe set and kick_ilb called.
+ * But idle load balancing is not done as find_new_ilb fails.
+ * That's very rare. So read nohz.nr_cpus only if time is due.
+ */
+ if (time_before(now, nohz.next_balance))
+ goto out;
+
+ /*
+ * None are in tickless mode and hence no need for NOHZ idle load
+ * balancing
+ */
+ if (unlikely(cpumask_empty(nohz.idle_cpus_mask)))
+ return;
+
+ if (rq->nr_running >= 2) {
+ flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
+ goto out;
+ }
+
+ if (nohz_balancer_needs_kick(rq))
+ flags |= NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
out:
if (READ_ONCE(nohz.needs_update))
flags |= NOHZ_NEXT_KICK;
@@ -12833,16 +12838,14 @@ static void set_cpu_sd_state_busy(int cpu)
{
struct sched_domain *sd;
- rcu_read_lock();
+ guard(rcu)();
sd = rcu_dereference_all(per_cpu(sd_llc, cpu));
if (!sd || !sd->nohz_idle)
- goto unlock;
+ return;
sd->nohz_idle = 0;
atomic_inc(&sd->shared->nr_busy_cpus);
-unlock:
- rcu_read_unlock();
}
void nohz_balance_exit_idle(struct rq *rq)
@@ -12862,16 +12865,14 @@ static void set_cpu_sd_state_idle(int cpu)
{
struct sched_domain *sd;
- rcu_read_lock();
+ guard(rcu)();
sd = rcu_dereference_all(per_cpu(sd_llc, cpu));
if (!sd || sd->nohz_idle)
- goto unlock;
+ return;
sd->nohz_idle = 1;
atomic_dec(&sd->shared->nr_busy_cpus);
-unlock:
- rcu_read_unlock();
}
/*
--
2.54.0
next prev parent reply other threads:[~2026-04-28 5:17 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-28 5:16 [PATCH v4 0/6] sched/fair: SMT-aware asymmetric CPU capacity Andrea Righi
2026-04-28 5:16 ` Andrea Righi [this message]
2026-04-28 8:33 ` [PATCH 1/6] sched/fair: Use guard(rcu) for sched_domain RCU sections K Prateek Nayak
2026-04-28 10:43 ` Andrea Righi
2026-04-28 11:04 ` K Prateek Nayak
2026-04-28 11:50 ` Peter Zijlstra
2026-04-28 13:16 ` Andrea Righi
2026-04-28 14:12 ` Steven Rostedt
2026-04-28 14:26 ` Andrea Righi
2026-04-28 14:29 ` Steven Rostedt
2026-04-28 5:16 ` [PATCH 2/6] sched/fair: Attach sched_domain_shared to sd_asym_cpucapacity Andrea Righi
2026-04-28 6:45 ` Shrikanth Hegde
2026-04-28 8:47 ` Andrea Righi
2026-04-28 5:16 ` [PATCH 3/6] sched/fair: Prefer fully-idle SMT cores in asym-capacity idle selection Andrea Righi
2026-04-28 5:16 ` [PATCH 4/6] sched/fair: Reject misfit pulls onto busy SMT siblings on asym-capacity Andrea Righi
2026-04-28 5:16 ` [PATCH 5/6] sched/fair: Add SIS_UTIL support to select_idle_capacity() Andrea Righi
2026-04-28 5:16 ` [PATCH 6/6] sched/topology: Remove SMT/asym capacity warning Andrea Righi
2026-04-28 5:28 ` K Prateek Nayak
2026-04-28 5:54 ` Andrea Righi
2026-04-28 6:04 ` Andrea Righi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260428051720.3180182-2-arighi@nvidia.com \
--to=arighi@nvidia.com \
--cc=balbirs@nvidia.com \
--cc=bsegall@google.com \
--cc=christian.loehle@arm.com \
--cc=dietmar.eggemann@arm.com \
--cc=fabecassis@nvidia.com \
--cc=joelagnelf@nvidia.com \
--cc=juri.lelli@redhat.com \
--cc=kobak@nvidia.com \
--cc=kprateek.nayak@amd.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mgorman@suse.de \
--cc=mingo@redhat.com \
--cc=peterz@infradead.org \
--cc=rostedt@goodmis.org \
--cc=sshegde@linux.ibm.com \
--cc=vincent.guittot@linaro.org \
--cc=vschneid@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox