The Linux Kernel Mailing List
 help / color / mirror / Atom feed
From: Tim Chen <tim.c.chen@linux.intel.com>
To: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>,
	K Prateek Nayak <kprateek.nayak@amd.com>,
	Vincent Guittot <vincent.guittot@linaro.org>
Cc: Chen Yu <yu.c.chen@intel.com>, Juri Lelli <juri.lelli@redhat.com>,
	Dietmar Eggemann <dietmar.eggemann@arm.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Ben Segall <bsegall@google.com>, Mel Gorman <mgorman@suse.de>,
	Valentin Schneider <vschneid@redhat.com>,
	Madadi Vineeth Reddy <vineethr@linux.ibm.com>,
	Hillf Danton <hdanton@sina.com>,
	Shrikanth Hegde <sshegde@linux.ibm.com>,
	Jianyong Wu <jianyong.wu@outlook.com>,
	Yangyu Chen <cyy@cyyself.name>,
	Tingyin Duan <tingyin.duan@gmail.com>,
	Vern Hao <vernhao@tencent.com>, Vern Hao <haoxing990@gmail.com>,
	Len Brown <len.brown@intel.com>,
	Tim Chen <tim.c.chen@linux.intel.com>,
	Aubrey Li <aubrey.li@intel.com>, Zhao Liu <zhao1.liu@intel.com>,
	Chen Yu <yu.chen.surf@gmail.com>,
	Adam Li <adamli@os.amperecomputing.com>,
	Aaron Lu <ziqianlu@bytedance.com>,
	Tim Chen <tim.c.chen@intel.com>, Josh Don <joshdon@google.com>,
	Gavin Guo <gavinguo@igalia.com>,
	Qais Yousef <qyousef@layalina.io>,
	Libo Chen <libchen@purestorage.com>,
	Luo Gengkun <luogengkun2@huawei.com>,
	linux-kernel@vger.kernel.org
Subject: [Patch v4 12/16] sched/cache: Fix race condition during sched domain rebuild
Date: Wed, 13 May 2026 13:39:23 -0700	[thread overview]
Message-ID: <9afddf439687f04bb56b46625bd9f153eb8abad5.1778703694.git.tim.c.chen@linux.intel.com> (raw)
In-Reply-To: <cover.1778703694.git.tim.c.chen@linux.intel.com>

From: Chen Yu <yu.c.chen@intel.com>

sched_cache_active_set_unlocked() checks hardware support without
locks:
static void sched_cache_active_set(bool locked)
{
        /* hardware does not support */
        if (!static_branch_likely(&sched_cache_present)) {
                _sched_cache_active_set(false, locked);
                return;
        }
    ...
If build_sched_domains() runs concurrently during CPU hotplug,
it can disable sched_cache_present under sched_domains_mutex
and the CPU hotplug lock. If a debugfs write thread evaluates
sched_cache_present as true right before that, and then blocks
or gets preempted, it might proceed to enable sched_cache_active
after the hardware support has been marked as absent. Make it
safer by acquiring cpus_read_lock() and sched_domains_mutex_lock()
when the user changes sched_cache_active via debugfs.

This bug was reported by sashiko.

Fixes: 067a31358143 ("sched/cache: Allow the user space to turn on and off cache aware scheduling")
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Co-developed-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
---
 kernel/sched/debug.c    |  4 +++-
 kernel/sched/sched.h    |  2 +-
 kernel/sched/topology.c | 42 +++++++++++++++--------------------------
 3 files changed, 19 insertions(+), 29 deletions(-)

diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index fe569539e888..ed3a0d65da0c 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -224,7 +224,9 @@ sched_cache_enable_write(struct file *filp, const char __user *ubuf,
 
 	sysctl_sched_cache_user = val;
 
-	sched_cache_active_set_unlocked();
+	sched_cache_active_set();
+
+	*ppos += cnt;
 
 	return cnt;
 }
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 27409399137c..45a3b77f46aa 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -4083,7 +4083,7 @@ static inline bool sched_cache_enabled(void)
 	return static_branch_unlikely(&sched_cache_active);
 }
 
-extern void sched_cache_active_set_unlocked(void);
+extern void sched_cache_active_set(void);
 
 #endif
 
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 7248a7279abe..cff5a0ecd64d 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -917,30 +917,19 @@ static bool alloc_sd_llc(const struct cpumask *cpu_map,
 	return false;
 }
 
-static void _sched_cache_active_set(bool enable, bool locked)
-{
-	if (enable) {
-		if (locked)
-			static_branch_enable_cpuslocked(&sched_cache_active);
-		else
-			static_branch_enable(&sched_cache_active);
-	} else {
-		if (locked)
-			static_branch_disable_cpuslocked(&sched_cache_active);
-		else
-			static_branch_disable(&sched_cache_active);
-	}
-}
-
 /*
  * Enable/disable cache aware scheduling according to
  * user input and the presence of hardware support.
+ * Expected to be protected by cpus_read_lock() and
+ * sched_domains_mutex_lock()
  */
-static void sched_cache_active_set(bool locked)
+static void _sched_cache_active_set(void)
 {
 	/* hardware does not support */
 	if (!static_branch_likely(&sched_cache_present)) {
-		_sched_cache_active_set(false, locked);
+		static_branch_disable_cpuslocked(&sched_cache_active);
+		if (sched_debug())
+			pr_info("%s: cache aware scheduling not supported on this platform\n", __func__);
 		return;
 	}
 
@@ -951,24 +940,23 @@ static void sched_cache_active_set(bool locked)
 	 * for now.
 	 */
 	if (sysctl_sched_cache_user) {
-		_sched_cache_active_set(true, locked);
+		static_branch_enable_cpuslocked(&sched_cache_active);
 		if (sched_debug())
 			pr_info("%s: enabling cache aware scheduling\n", __func__);
 	} else {
-		_sched_cache_active_set(false, locked);
+		static_branch_disable_cpuslocked(&sched_cache_active);
 		if (sched_debug())
 			pr_info("%s: disabling cache aware scheduling\n", __func__);
 	}
 }
 
-static void sched_cache_active_set_locked(void)
-{
-	return sched_cache_active_set(true);
-}
-
-void sched_cache_active_set_unlocked(void)
+void sched_cache_active_set(void)
 {
-	return sched_cache_active_set(false);
+	cpus_read_lock();
+	sched_domains_mutex_lock();
+	_sched_cache_active_set();
+	sched_domains_mutex_unlock();
+	cpus_read_unlock();
 }
 
 /*
@@ -3082,7 +3070,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
 	else
 		static_branch_disable_cpuslocked(&sched_cache_present);
 
-	sched_cache_active_set_locked();
+	_sched_cache_active_set();
 #endif
 	__free_domain_allocs(&d, alloc_state, cpu_map);
 
-- 
2.32.0


  parent reply	other threads:[~2026-05-13 20:33 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-13 20:39 [Patch v4 00/16] Cache aware scheduling enhancements Tim Chen
2026-05-13 20:39 ` [Patch v4 01/16] sched/cache: Allow only 1 thread of the process to calculate the LLC occupancy Tim Chen
2026-05-13 20:39 ` [Patch v4 02/16] sched/cache: Disable cache aware scheduling for processes with high thread counts Tim Chen
2026-05-13 20:39 ` [Patch v4 03/16] sched/cache: Skip cache-aware scheduling for single-threaded processes Tim Chen
2026-05-13 20:39 ` [Patch v4 04/16] sched/cache: Calculate the LLC size and store it in sched_domain Tim Chen
2026-05-13 20:39 ` [Patch v4 05/16] sched/cache: Avoid cache-aware scheduling for memory-heavy processes Tim Chen
2026-05-13 20:39 ` [Patch v4 06/16] sched/cache: Add user control to adjust the aggressiveness of cache-aware scheduling Tim Chen
2026-05-13 20:39 ` [Patch v4 07/16] sched/cache: Fix rcu warning when accessing sd_llc domain Tim Chen
2026-05-13 20:39 ` [Patch v4 08/16] sched/cache: Fix potential NULL mm pointer access Tim Chen
2026-05-13 20:39 ` [Patch v4 09/16] sched/cache: Annotate lockless accesses to mm->sc_stat.cpu Tim Chen
2026-05-13 20:39 ` [Patch v4 10/16] sched/cache: Fix unpaired account_llc_enqueue/dequeue Tim Chen
2026-05-13 20:39 ` [Patch v4 11/16] sched/cache: Fix checking active load balance by only considering the CFS task Tim Chen
2026-05-13 20:39 ` Tim Chen [this message]
2026-05-13 20:39 ` [Patch v4 13/16] sched/cache: Fix cache aware scheduling enabling for multi LLCs system Tim Chen
2026-05-13 20:39 ` [Patch v4 14/16] sched/cache: Fix has_multi_llcs iff at least one partition has multiple LLCs Tim Chen
2026-05-13 20:39 ` [Patch v4 15/16] sched/cache: Fix possible overflow when invalidating the preferred CPU Tim Chen
2026-05-13 20:39 ` [Patch v4 16/16] sched/cache: Fix stale preferred_llc for a new task Tim Chen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9afddf439687f04bb56b46625bd9f153eb8abad5.1778703694.git.tim.c.chen@linux.intel.com \
    --to=tim.c.chen@linux.intel.com \
    --cc=adamli@os.amperecomputing.com \
    --cc=aubrey.li@intel.com \
    --cc=bsegall@google.com \
    --cc=cyy@cyyself.name \
    --cc=dietmar.eggemann@arm.com \
    --cc=gavinguo@igalia.com \
    --cc=haoxing990@gmail.com \
    --cc=hdanton@sina.com \
    --cc=jianyong.wu@outlook.com \
    --cc=joshdon@google.com \
    --cc=juri.lelli@redhat.com \
    --cc=kprateek.nayak@amd.com \
    --cc=len.brown@intel.com \
    --cc=libchen@purestorage.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luogengkun2@huawei.com \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=qyousef@layalina.io \
    --cc=rostedt@goodmis.org \
    --cc=sshegde@linux.ibm.com \
    --cc=tim.c.chen@intel.com \
    --cc=tingyin.duan@gmail.com \
    --cc=vernhao@tencent.com \
    --cc=vincent.guittot@linaro.org \
    --cc=vineethr@linux.ibm.com \
    --cc=vschneid@redhat.com \
    --cc=yu.c.chen@intel.com \
    --cc=yu.chen.surf@gmail.com \
    --cc=zhao1.liu@intel.com \
    --cc=ziqianlu@bytedance.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox