[Patch v4 14/16] sched/cache: Fix has_multi_llcs iff at least one partition has multiple LLCs

The Linux Kernel Mailing List
 help / color / mirror / Atom feed

From: Tim Chen <tim.c.chen@linux.intel.com>
To: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>,
	K Prateek Nayak <kprateek.nayak@amd.com>,
	Vincent Guittot <vincent.guittot@linaro.org>
Cc: Chen Yu <yu.c.chen@intel.com>, Juri Lelli <juri.lelli@redhat.com>,
	Dietmar Eggemann <dietmar.eggemann@arm.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Ben Segall <bsegall@google.com>, Mel Gorman <mgorman@suse.de>,
	Valentin Schneider <vschneid@redhat.com>,
	Madadi Vineeth Reddy <vineethr@linux.ibm.com>,
	Hillf Danton <hdanton@sina.com>,
	Shrikanth Hegde <sshegde@linux.ibm.com>,
	Jianyong Wu <jianyong.wu@outlook.com>,
	Yangyu Chen <cyy@cyyself.name>,
	Tingyin Duan <tingyin.duan@gmail.com>,
	Vern Hao <vernhao@tencent.com>, Vern Hao <haoxing990@gmail.com>,
	Len Brown <len.brown@intel.com>,
	Tim Chen <tim.c.chen@linux.intel.com>,
	Aubrey Li <aubrey.li@intel.com>, Zhao Liu <zhao1.liu@intel.com>,
	Chen Yu <yu.chen.surf@gmail.com>,
	Adam Li <adamli@os.amperecomputing.com>,
	Aaron Lu <ziqianlu@bytedance.com>,
	Tim Chen <tim.c.chen@intel.com>, Josh Don <joshdon@google.com>,
	Gavin Guo <gavinguo@igalia.com>,
	Qais Yousef <qyousef@layalina.io>,
	Libo Chen <libchen@purestorage.com>,
	Luo Gengkun <luogengkun2@huawei.com>,
	linux-kernel@vger.kernel.org
Subject: [Patch v4 14/16] sched/cache: Fix has_multi_llcs iff at least one partition has multiple LLCs
Date: Wed, 13 May 2026 13:39:25 -0700	[thread overview]
Message-ID: <c541af2547d54509fbfd3b3a1e8072e2e5c7ff68.1778703694.git.tim.c.chen@linux.intel.com> (raw)
In-Reply-To: <cover.1778703694.git.tim.c.chen@linux.intel.com>

From: Chen Yu <yu.c.chen@intel.com>

sched_cache_present is a global static key, but build_sched_domains()
is called per partition from the "Build new domains" loop in
partition_sched_domains_locked(). Each call unconditionally sets the
key based solely on the has_multi_llcs local variable for that partition.

The call to the last partition set the value even when there
are previous partitions with multiple LLCs.

If partition A (multi-LLC) is built first, the key is enabled. Then
when partition B (single-LLC) is built, the key is disabled. The
multi-LLC partition A is still active but the key is now off.

Fix it by doing a similar thing as sched_energy_present: check the
multi-LLCs during the iteration over all the partitions rather than
checking it on a single partition.

This bug was reported by sashiko.

Fixes: d59f4fd1d303 ("sched/cache: Enable cache aware scheduling for
multi LLCs NUMA node")
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Co-developed-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
---
 kernel/sched/topology.c | 69 +++++++++++++++++++++++++++++++----------
 1 file changed, 53 insertions(+), 16 deletions(-)

diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 07f0a3d28253..4c5ea369d835 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -950,6 +950,7 @@ static void _sched_cache_active_set(void)
 	}
 }
 
+/* used by debugfs */
 void sched_cache_active_set(void)
 {
 	cpus_read_lock();
@@ -999,12 +1000,27 @@ void sched_update_llc_bytes(unsigned int cpu)
 unlock:
 	sched_domains_mutex_unlock();
 }
+
+static void sched_cache_set(bool has_multi_llcs)
+{
+	/*
+	 * TBD: check before writing to it. sched domain rebuild
+	 * is not in the critical path, leave as-is for now.
+	 */
+	if (has_multi_llcs)
+		static_branch_enable_cpuslocked(&sched_cache_present);
+	else
+		static_branch_disable_cpuslocked(&sched_cache_present);
+
+	_sched_cache_active_set();
+}
 #else
 static bool alloc_sd_llc(const struct cpumask *cpu_map,
 			 struct s_data *d)
 {
 	return false;
 }
+static inline void sched_cache_set(bool has_multi_llcs) { }
 #endif
 
 /*
@@ -2949,7 +2965,8 @@ void sched_domains_free_llc_id(int cpu)
  * to the individual CPUs
  */
 static int
-build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *attr)
+build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *attr,
+		    bool *multi_llcs)
 {
 	enum s_alloc alloc_state = sa_none;
 	bool has_multi_llcs = false;
@@ -3093,18 +3110,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
 
 	ret = 0;
 error:
-#ifdef CONFIG_SCHED_CACHE
-	/*
-	 * TBD: check before writing to it. sched domain rebuild
-	 * is not in the critical path, leave as-is for now.
-	 */
-	if (!ret && has_multi_llcs)
-		static_branch_enable_cpuslocked(&sched_cache_present);
-	else
-		static_branch_disable_cpuslocked(&sched_cache_present);
-
-	_sched_cache_active_set();
-#endif
+	*multi_llcs = has_multi_llcs;
 	__free_domain_allocs(&d, alloc_state, cpu_map);
 
 	return ret;
@@ -3167,6 +3173,7 @@ void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms)
  */
 int __init sched_init_domains(const struct cpumask *cpu_map)
 {
+	bool multi_llcs;
 	int err;
 
 	zalloc_cpumask_var(&sched_domains_llc_id_allocmask, GFP_KERNEL);
@@ -3181,7 +3188,9 @@ int __init sched_init_domains(const struct cpumask *cpu_map)
 	if (!doms_cur)
 		doms_cur = &fallback_doms;
 	cpumask_and(doms_cur[0], cpu_map, housekeeping_cpumask(HK_TYPE_DOMAIN));
-	err = build_sched_domains(doms_cur[0], NULL);
+	err = build_sched_domains(doms_cur[0], NULL, &multi_llcs);
+	if (!err)
+		sched_cache_set(multi_llcs);
 
 	return err;
 }
@@ -3254,6 +3263,7 @@ static void partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new
 				    struct sched_domain_attr *dattr_new)
 {
 	bool __maybe_unused has_eas = false;
+	bool has_multi_llcs = false, multi_llcs;
 	int i, j, n;
 	int new_topology;
 
@@ -3303,14 +3313,41 @@ static void partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new
 	for (i = 0; i < ndoms_new; i++) {
 		for (j = 0; j < n && !new_topology; j++) {
 			if (cpumask_equal(doms_new[i], doms_cur[j]) &&
-			    dattrs_equal(dattr_new, i, dattr_cur, j))
+			    dattrs_equal(dattr_new, i, dattr_cur, j)) {
+				/*
+				 * Reused partition has to be taken care
+				 * of here, because there could be a corner
+				 * case that if the reused partition is skipped
+				 * and only new partition is considered, an
+				 * incorrect has_multi_llcs would be set. For
+				 * example:
+				 * If the only multi-LLC partition is reused
+				 * and a new single-LLC partition is built,
+				 * sched_cache_set(false) disables cache-aware
+				 * scheduling globally despite the reused
+				 * multi-LLC partition still being active.
+				 */
+				struct sched_domain *sd;
+				int cpu = cpumask_first(doms_cur[j]);
+
+				guard(rcu)();
+				sd = rcu_dereference(cpu_rq(cpu)->sd);
+				while (sd && sd->parent && (sd->parent->flags & SD_SHARE_LLC))
+					sd = sd->parent;
+				if (sd && (sd->flags & SD_SHARE_LLC) && sd->parent &&
+				    sd_in_multi_llcs(sd))
+					has_multi_llcs = true;
 				goto match2;
+			}
 		}
 		/* No match - add a new doms_new */
-		build_sched_domains(doms_new[i], dattr_new ? dattr_new + i : NULL);
+		build_sched_domains(doms_new[i], dattr_new ? dattr_new + i : NULL,
+				    &multi_llcs);
+		has_multi_llcs |= multi_llcs;
 match2:
 		;
 	}
+	sched_cache_set(has_multi_llcs);
 
 #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
 	/* Build perf domains: */
-- 
2.32.0

next prev parent reply	other threads:[~2026-05-13 20:33 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-13 20:39 [Patch v4 00/16] Cache aware scheduling enhancements Tim Chen
2026-05-13 20:39 ` [Patch v4 01/16] sched/cache: Allow only 1 thread of the process to calculate the LLC occupancy Tim Chen
2026-05-13 20:39 ` [Patch v4 02/16] sched/cache: Disable cache aware scheduling for processes with high thread counts Tim Chen
2026-05-13 20:39 ` [Patch v4 03/16] sched/cache: Skip cache-aware scheduling for single-threaded processes Tim Chen
2026-05-13 20:39 ` [Patch v4 04/16] sched/cache: Calculate the LLC size and store it in sched_domain Tim Chen
2026-05-13 20:39 ` [Patch v4 05/16] sched/cache: Avoid cache-aware scheduling for memory-heavy processes Tim Chen
2026-05-13 20:39 ` [Patch v4 06/16] sched/cache: Add user control to adjust the aggressiveness of cache-aware scheduling Tim Chen
2026-05-13 20:39 ` [Patch v4 07/16] sched/cache: Fix rcu warning when accessing sd_llc domain Tim Chen
2026-05-13 20:39 ` [Patch v4 08/16] sched/cache: Fix potential NULL mm pointer access Tim Chen
2026-05-13 20:39 ` [Patch v4 09/16] sched/cache: Annotate lockless accesses to mm->sc_stat.cpu Tim Chen
2026-05-13 20:39 ` [Patch v4 10/16] sched/cache: Fix unpaired account_llc_enqueue/dequeue Tim Chen
2026-05-13 20:39 ` [Patch v4 11/16] sched/cache: Fix checking active load balance by only considering the CFS task Tim Chen
2026-05-13 20:39 ` [Patch v4 12/16] sched/cache: Fix race condition during sched domain rebuild Tim Chen
2026-05-13 20:39 ` [Patch v4 13/16] sched/cache: Fix cache aware scheduling enabling for multi LLCs system Tim Chen
2026-05-13 20:39 ` Tim Chen [this message]
2026-05-13 20:39 ` [Patch v4 15/16] sched/cache: Fix possible overflow when invalidating the preferred CPU Tim Chen
2026-05-13 20:39 ` [Patch v4 16/16] sched/cache: Fix stale preferred_llc for a new task Tim Chen

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:07f0a3d2825 dfblob:4c5ea369d83 )
 OR (
bs:"[Patch v4 14/16] sched/cache: Fix has_multi_llcs iff at least one partition has multiple LLCs" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=c541af2547d54509fbfd3b3a1e8072e2e5c7ff68.1778703694.git.tim.c.chen@linux.intel.com \
    --to=tim.c.chen@linux.intel.com \
    --cc=adamli@os.amperecomputing.com \
    --cc=aubrey.li@intel.com \
    --cc=bsegall@google.com \
    --cc=cyy@cyyself.name \
    --cc=dietmar.eggemann@arm.com \
    --cc=gavinguo@igalia.com \
    --cc=haoxing990@gmail.com \
    --cc=hdanton@sina.com \
    --cc=jianyong.wu@outlook.com \
    --cc=joshdon@google.com \
    --cc=juri.lelli@redhat.com \
    --cc=kprateek.nayak@amd.com \
    --cc=len.brown@intel.com \
    --cc=libchen@purestorage.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luogengkun2@huawei.com \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=qyousef@layalina.io \
    --cc=rostedt@goodmis.org \
    --cc=sshegde@linux.ibm.com \
    --cc=tim.c.chen@intel.com \
    --cc=tingyin.duan@gmail.com \
    --cc=vernhao@tencent.com \
    --cc=vincent.guittot@linaro.org \
    --cc=vineethr@linux.ibm.com \
    --cc=vschneid@redhat.com \
    --cc=yu.c.chen@intel.com \
    --cc=yu.chen.surf@gmail.com \
    --cc=zhao1.liu@intel.com \
    --cc=ziqianlu@bytedance.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox