From: Frederic Weisbecker <frederic@kernel.org>
To: LKML <linux-kernel@vger.kernel.org>
Cc: "Frederic Weisbecker" <frederic@kernel.org>,
"Michal Koutný" <mkoutny@suse.com>,
"Ingo Molnar" <mingo@redhat.com>,
"Johannes Weiner" <hannes@cmpxchg.org>,
"Marco Crivellari" <marco.crivellari@suse.com>,
"Michal Hocko" <mhocko@suse.com>,
"Peter Zijlstra" <peterz@infradead.org>,
"Tejun Heo" <tj@kernel.org>,
"Thomas Gleixner" <tglx@linutronix.de>,
"Vlastimil Babka" <vbabka@suse.cz>,
"Waiman Long" <longman@redhat.com>,
cgroups@vger.kernel.org
Subject: [PATCH 15/27] cpuset: Update HK_TYPE_DOMAIN cpumask from cpuset
Date: Fri, 20 Jun 2025 17:22:56 +0200 [thread overview]
Message-ID: <20250620152308.27492-16-frederic@kernel.org> (raw)
In-Reply-To: <20250620152308.27492-1-frederic@kernel.org>
Until now, HK_TYPE_DOMAIN used to only include boot defined isolated
CPUs passed through isolcpus= boot option. Users interested in also
knowing the runtime defined isolated CPUs through cpuset must use
different APIs: cpuset_cpu_is_isolated(), cpu_is_isolated(), etc...
There are many drawbacks to that approach:
1) Most interested subsystems want to know about all isolated CPUs, not
just those defined on boot time.
2) cpuset_cpu_is_isolated() / cpu_is_isolated() are not synchronized with
concurrent cpuset changes.
3) Further cpuset modifications are not propagated to subsystems
Solve 1) and 2) and centralize all isolated CPUs within the
HK_TYPE_DOMAIN housekeeping cpumask under the housekeeping lock.
Subsystems can rely on the housekeeping lock or RCU to synchronize
against concurrent changes.
The propagation mentioned in 3) will be handled in further patches.
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
---
include/linux/sched/isolation.h | 5 ++-
kernel/cgroup/cpuset.c | 2 +
kernel/sched/isolation.c | 71 ++++++++++++++++++++++++++++++---
kernel/sched/sched.h | 1 +
4 files changed, 72 insertions(+), 7 deletions(-)
diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h
index 731506d312d2..f1b309f18511 100644
--- a/include/linux/sched/isolation.h
+++ b/include/linux/sched/isolation.h
@@ -36,7 +36,7 @@ extern bool housekeeping_test_cpu(int cpu, enum hk_type type);
static inline bool housekeeping_cpu(int cpu, enum hk_type type)
{
- if (housekeeping_flags & BIT(type))
+ if (READ_ONCE(housekeeping_flags) & BIT(type))
return housekeeping_test_cpu(cpu, type);
else
return true;
@@ -45,6 +45,8 @@ static inline bool housekeeping_cpu(int cpu, enum hk_type type)
extern void housekeeping_lock(void);
extern void housekeeping_unlock(void);
+extern int housekeeping_update(struct cpumask *mask, enum hk_type type);
+
extern void __init housekeeping_init(void);
#else
@@ -79,6 +81,7 @@ static inline bool housekeeping_cpu(int cpu, enum hk_type type)
static inline void housekeeping_lock(void) { }
static inline void housekeeping_unlock(void) { }
+static inline int housekeeping_update(struct cpumask *mask, enum hk_type type) { return 0; }
static inline void housekeeping_init(void) { }
#endif /* CONFIG_CPU_ISOLATION */
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 8221b6a7da46..5f169a56f06c 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -1351,6 +1351,8 @@ static void update_unbound_workqueue_cpumask(bool isolcpus_updated)
ret = workqueue_unbound_exclude_cpumask(isolated_cpus);
WARN_ON_ONCE(ret < 0);
+ ret = housekeeping_update(isolated_cpus, HK_TYPE_DOMAIN);
+ WARN_ON_ONCE(ret < 0);
}
/**
diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
index 75505668dcb9..7814d60be87e 100644
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -23,7 +23,7 @@ DEFINE_STATIC_PERCPU_RWSEM(housekeeping_pcpu_lock);
bool housekeeping_enabled(enum hk_type type)
{
- return !!(housekeeping_flags & BIT(type));
+ return !!(READ_ONCE(housekeeping_flags) & BIT(type));
}
EXPORT_SYMBOL_GPL(housekeeping_enabled);
@@ -37,12 +37,39 @@ void housekeeping_unlock(void)
percpu_up_read(&housekeeping_pcpu_lock);
}
+static bool housekeeping_dereference_check(enum hk_type type)
+{
+ if (type == HK_TYPE_DOMAIN) {
+ if (system_state == SYSTEM_BOOTING)
+ return true;
+ if (IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_write_held())
+ return true;
+ if (percpu_rwsem_is_held(&housekeeping_pcpu_lock))
+ return true;
+ if (IS_ENABLED(CONFIG_CPUSETS) && lockdep_is_cpuset_held())
+ return true;
+
+ return false;
+ }
+
+ return true;
+}
+
+static inline struct cpumask *__housekeeping_cpumask(enum hk_type type)
+{
+ return rcu_dereference_check(housekeeping_cpumasks[type],
+ housekeeping_dereference_check(type));
+}
+
const struct cpumask *housekeeping_cpumask(enum hk_type type)
{
- if (housekeeping_flags & BIT(type)) {
- return rcu_dereference_check(housekeeping_cpumasks[type], 1);
- }
- return cpu_possible_mask;
+ const struct cpumask *mask = NULL;
+
+ if (READ_ONCE(housekeeping_flags) & BIT(type))
+ mask = __housekeeping_cpumask(type);
+ if (!mask)
+ mask = cpu_possible_mask;
+ return mask;
}
EXPORT_SYMBOL_GPL(housekeeping_cpumask);
@@ -80,12 +107,44 @@ EXPORT_SYMBOL_GPL(housekeeping_affine);
bool housekeeping_test_cpu(int cpu, enum hk_type type)
{
- if (housekeeping_flags & BIT(type))
+ if (READ_ONCE(housekeeping_flags) & BIT(type))
return cpumask_test_cpu(cpu, housekeeping_cpumask(type));
return true;
}
EXPORT_SYMBOL_GPL(housekeeping_test_cpu);
+int housekeeping_update(struct cpumask *mask, enum hk_type type)
+{
+ struct cpumask *trial, *old = NULL;
+
+ if (type != HK_TYPE_DOMAIN)
+ return -ENOTSUPP;
+
+ trial = kmalloc(sizeof(*trial), GFP_KERNEL);
+ if (!trial)
+ return -ENOMEM;
+
+ cpumask_andnot(trial, housekeeping_cpumask(HK_TYPE_DOMAIN_BOOT), mask);
+ if (!cpumask_intersects(trial, cpu_online_mask)) {
+ kfree(trial);
+ return -EINVAL;
+ }
+
+ percpu_down_write(&housekeeping_pcpu_lock);
+ if (housekeeping_flags & BIT(type))
+ old = __housekeeping_cpumask(type);
+ else
+ WRITE_ONCE(housekeeping_flags, housekeeping_flags | BIT(type));
+ rcu_assign_pointer(housekeeping_cpumasks[type], trial);
+ percpu_up_write(&housekeeping_pcpu_lock);
+
+ synchronize_rcu();
+
+ kfree(old);
+
+ return 0;
+}
+
void __init housekeeping_init(void)
{
enum hk_type type;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 407e7f5ad929..04094567cad4 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -30,6 +30,7 @@
#include <linux/context_tracking.h>
#include <linux/cpufreq.h>
#include <linux/cpumask_api.h>
+#include <linux/cpuset.h>
#include <linux/ctype.h>
#include <linux/file.h>
#include <linux/fs_api.h>
--
2.48.1
next prev parent reply other threads:[~2025-06-20 15:23 UTC|newest]
Thread overview: 51+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-06-20 15:22 [PATCH 00/27] cpuset/isolation: Honour kthreads preferred affinity Frederic Weisbecker
2025-06-20 15:22 ` [PATCH 01/27] sched/isolation: Remove housekeeping static key Frederic Weisbecker
2025-06-20 15:22 ` [PATCH 02/27] sched/isolation: Introduce housekeeping per-cpu rwsem Frederic Weisbecker
2025-06-23 17:34 ` Waiman Long
2025-06-23 17:39 ` Tejun Heo
2025-06-23 17:57 ` Waiman Long
2025-06-23 18:03 ` Tejun Heo
2025-06-25 14:30 ` Frederic Weisbecker
2025-06-25 12:18 ` Phil Auld
2025-06-25 14:34 ` Frederic Weisbecker
2025-06-25 15:50 ` Phil Auld
2025-06-27 0:11 ` Waiman Long
2025-06-27 0:48 ` Phil Auld
2025-06-30 12:59 ` Thomas Gleixner
2025-06-25 14:18 ` Frederic Weisbecker
2025-06-26 23:58 ` Waiman Long
2025-06-20 15:22 ` [PATCH 03/27] PCI: Protect against concurrent change of housekeeping cpumask Frederic Weisbecker
2025-06-20 16:17 ` Bjorn Helgaas
2025-06-26 14:51 ` Frederic Weisbecker
2025-06-20 15:22 ` [PATCH 04/27] cpu: Protect against concurrent isolated cpuset change Frederic Weisbecker
2025-06-20 15:22 ` [PATCH 05/27] memcg: Prepare to protect " Frederic Weisbecker
2025-06-20 19:19 ` Shakeel Butt
2025-06-20 15:22 ` [PATCH 06/27] mm: vmstat: " Frederic Weisbecker
2025-06-20 15:22 ` [PATCH 07/27] sched/isolation: Save boot defined domain flags Frederic Weisbecker
2025-06-20 15:22 ` [PATCH 08/27] cpuset: Convert boot_hk_cpus to use HK_TYPE_DOMAIN_BOOT Frederic Weisbecker
2025-06-20 15:22 ` [PATCH 09/27] driver core: cpu: Convert /sys/devices/system/cpu/isolated " Frederic Weisbecker
2025-06-20 15:22 ` [PATCH 10/27] net: Keep ignoring isolated cpuset change Frederic Weisbecker
2025-06-20 15:22 ` [PATCH 11/27] block: Protect against concurrent " Frederic Weisbecker
2025-06-20 15:59 ` Bart Van Assche
2025-06-26 15:03 ` Frederic Weisbecker
2025-06-23 5:46 ` Christoph Hellwig
2025-06-26 15:33 ` Frederic Weisbecker
2025-06-20 15:22 ` [PATCH 12/27] cpu: Provide lockdep check for CPU hotplug lock write-held Frederic Weisbecker
2025-06-20 15:22 ` [PATCH 13/27] cpuset: Provide lockdep check for cpuset lock held Frederic Weisbecker
2025-06-20 15:22 ` [PATCH 14/27] sched/isolation: Convert housekeeping cpumasks to rcu pointers Frederic Weisbecker
2025-06-20 15:22 ` Frederic Weisbecker [this message]
2025-06-20 15:22 ` [PATCH 16/27] sched/isolation: Flush memcg workqueues on cpuset isolated partition change Frederic Weisbecker
2025-06-20 19:30 ` Shakeel Butt
2025-06-20 15:22 ` [PATCH 17/27] sched/isolation: Flush vmstat " Frederic Weisbecker
2025-06-20 15:22 ` [PATCH 18/27] cpuset: Propagate cpuset isolation update to workqueue through housekeeping Frederic Weisbecker
2025-06-20 15:23 ` [PATCH 19/27] cpuset: Remove cpuset_cpu_is_isolated() Frederic Weisbecker
2025-06-20 15:23 ` [PATCH 20/27] sched/isolation: Remove HK_TYPE_TICK test from cpu_is_isolated() Frederic Weisbecker
2025-06-20 15:23 ` [PATCH 21/27] kthread: Refine naming of affinity related fields Frederic Weisbecker
2025-06-20 15:23 ` [PATCH 22/27] kthread: Include unbound kthreads in the managed affinity list Frederic Weisbecker
2025-06-20 15:23 ` [PATCH 23/27] kthread: Include kthreadd to " Frederic Weisbecker
2025-06-20 15:23 ` [PATCH 24/27] kthread: Rely on HK_TYPE_DOMAIN for preferred affinity management Frederic Weisbecker
2025-06-20 15:23 ` [PATCH 25/27] sched: Switch the fallback task allowed cpumask to HK_TYPE_DOMAIN Frederic Weisbecker
2025-06-20 15:23 ` [PATCH 26/27] kthread: Honour kthreads preferred affinity after cpuset changes Frederic Weisbecker
2025-06-20 15:23 ` [PATCH 27/27] kthread: Comment on the purpose and placement of kthread_affine_node() call Frederic Weisbecker
2025-06-20 16:08 ` [PATCH 00/27] cpuset/isolation: Honour kthreads preferred affinity Bjorn Helgaas
2025-06-26 14:57 ` Frederic Weisbecker
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250620152308.27492-16-frederic@kernel.org \
--to=frederic@kernel.org \
--cc=cgroups@vger.kernel.org \
--cc=hannes@cmpxchg.org \
--cc=linux-kernel@vger.kernel.org \
--cc=longman@redhat.com \
--cc=marco.crivellari@suse.com \
--cc=mhocko@suse.com \
--cc=mingo@redhat.com \
--cc=mkoutny@suse.com \
--cc=peterz@infradead.org \
--cc=tglx@linutronix.de \
--cc=tj@kernel.org \
--cc=vbabka@suse.cz \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.