public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] sched/fair: Optimize idle core discovery algorithm via LLC-wide bitmask
@ 2026-01-22 15:20 Qiliang Yuan
  2026-01-23 10:04 ` Peter Zijlstra
  0 siblings, 1 reply; 2+ messages in thread
From: Qiliang Yuan @ 2026-01-22 15:20 UTC (permalink / raw)
  To: mingo, peterz, juri.lelli, vincent.guittot
  Cc: dietmar.eggemann, rostedt, bsegall, mgorman, vschneid,
	linux-kernel, Qiliang Yuan, Qiliang Yuan

The current select_idle_cpu() employs a linear O(N) scan to find idle
cores, which scales poorly on modern high-core-count systems.

This patch optimizes the discovery algorithm by:
1. Adding a per-LLC 'idle_cores_mask' to sched_domain_shared for
   tracking core-level idle status.
2. Converting the linear search into an efficient bitmask iteration,
   reducing typical search complexity towards O(1) in sparse scenarios.
3. Implementing a lazy-clear mechanism during the scan to maintain
   efficiency without expensive global synchronization.

This algorithmic refinement minimizes the search space in the critical
wakeup path, effectively mitigating linear scaling overhead on large
SMT machines.

Signed-off-by: Qiliang Yuan <realwujing@gmail.com>
Signed-off-by: Qiliang Yuan <yuanql9@chinatelecom.cn>
---
 include/linux/sched/topology.h |  1 +
 kernel/sched/fair.c            | 49 ++++++++++++++++++++++++----------
 kernel/sched/topology.c        |  2 +-
 3 files changed, 37 insertions(+), 15 deletions(-)

diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 45c0022b91ce..4223f2cd7eb8 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -68,6 +68,7 @@ struct sched_domain_shared {
 	atomic_t	nr_busy_cpus;
 	int		has_idle_cores;
 	int		nr_idle_scan;
+	unsigned long	idle_cores_mask[];
 };
 
 struct sched_domain {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e71302282671..458324d240e9 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7507,8 +7507,13 @@ static inline void set_idle_cores(int cpu, int val)
 	struct sched_domain_shared *sds;
 
 	sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
-	if (sds)
+	if (sds) {
 		WRITE_ONCE(sds->has_idle_cores, val);
+		if (val)
+			cpumask_set_cpu(cpu, (struct cpumask *)sds->idle_cores_mask);
+		else
+			cpumask_clear((struct cpumask *)sds->idle_cores_mask);
+	}
 }
 
 static inline bool test_idle_cores(int cpu)
@@ -7678,23 +7683,39 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool
 		}
 	}
 
-	for_each_cpu_wrap(cpu, cpus, target + 1) {
-		if (has_idle_core) {
-			i = select_idle_core(p, cpu, cpus, &idle_cpu);
-			if ((unsigned int)i < nr_cpumask_bits)
-				return i;
+	if (has_idle_core) {
+		sd_share = rcu_dereference(per_cpu(sd_llc_shared, target));
+		if (sd_share) {
+			for_each_cpu_wrap(cpu, (struct cpumask *)sd_share->idle_cores_mask, target + 1) {
+				if (!cpumask_test_cpu(cpu, cpus))
+					continue;
 
-		} else {
-			if (--nr <= 0)
-				return -1;
-			idle_cpu = __select_idle_cpu(cpu, p);
-			if ((unsigned int)idle_cpu < nr_cpumask_bits)
-				break;
+				i = select_idle_core(p, cpu, cpus, &idle_cpu);
+				if ((unsigned int)i < nr_cpumask_bits)
+					return i;
+
+				/* Core is no longer idle, clear the hint */
+				cpumask_clear_cpu(cpu, (struct cpumask *)sd_share->idle_cores_mask);
+			}
+
+			/* Searched all hinted cores and found none */
+			set_idle_cores(target, false);
 		}
+		/* If we found an idle CPU during core search, return it */
+		if (idle_cpu != -1)
+			return idle_cpu;
+
+		/* Fall through to any-CPU search if needed */
+		has_idle_core = false;
 	}
 
-	if (has_idle_core)
-		set_idle_cores(target, false);
+	for_each_cpu_wrap(cpu, cpus, target + 1) {
+		if (--nr <= 0)
+			return -1;
+		idle_cpu = __select_idle_cpu(cpu, p);
+		if ((unsigned int)idle_cpu < nr_cpumask_bits)
+			break;
+	}
 
 	return idle_cpu;
 }
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index cf643a5ddedd..5a3f29a26bdb 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -2392,7 +2392,7 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
 
 			*per_cpu_ptr(sdd->sd, j) = sd;
 
-			sds = kzalloc_node(sizeof(struct sched_domain_shared),
+			sds = kzalloc_node(sizeof(struct sched_domain_shared) + cpumask_size(),
 					GFP_KERNEL, cpu_to_node(j));
 			if (!sds)
 				return -ENOMEM;
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2026-01-23 10:04 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-01-22 15:20 [PATCH] sched/fair: Optimize idle core discovery algorithm via LLC-wide bitmask Qiliang Yuan
2026-01-23 10:04 ` Peter Zijlstra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox