From: Frederic Weisbecker <frederic@kernel.org>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Frederic Weisbecker <frederic@kernel.org>,
Ingo Molnar <mingo@redhat.com>,
Marco Crivellari <marco.crivellari@suse.com>,
Michal Hocko <mhocko@suse.com>,
Peter Zijlstra <peterz@infradead.org>, Tejun Heo <tj@kernel.org>,
Thomas Gleixner <tglx@linutronix.de>,
Vlastimil Babka <vbabka@suse.cz>,
Waiman Long <longman@redhat.com>
Subject: [PATCH 04/27] cpu: Protect against concurrent isolated cpuset change
Date: Fri, 20 Jun 2025 17:22:45 +0200 [thread overview]
Message-ID: <20250620152308.27492-5-frederic@kernel.org> (raw)
In-Reply-To: <20250620152308.27492-1-frederic@kernel.org>
_cpu_down() is called through work_on_cpu() on a target contained
within the HK_TYPE_DOMAIN cpumask.
But that cpumask will soon also integrate the cpuset isolated
partitions and some synchronization is needed to make sure that
the work_on_cpu() doesn't execute or last on an isolated CPU.
Unfortunately housekeeping_lock() can't be held before the call to
work_on_cpu() because _cpu_down() afterwards holds cpu_hotplug_lock.
This would be a lock inversion:
cpu_down() cpuset
--------- ------
percpu_down_read(&housekeeping_pcpu_lock); percpu_down_read(&cpu_hotplug_lock);
percpu_down_write(&cpu_hotplug_lock); percpu_down_write(&housekeeping_pcpu_lock);
To solve this situation, write-lock the cpu_hotplug_lock around the call
to work_on_cpu(). This will prevent from cpuset to modify the
housekeeping cpumask and therefore synchronize against HK_TYPE_DOMAIN
cpumask changes.
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
---
kernel/cpu.c | 44 ++++++++++++++++++++++++++++++--------------
1 file changed, 30 insertions(+), 14 deletions(-)
diff --git a/kernel/cpu.c b/kernel/cpu.c
index a59e009e0be4..069fce6c7eae 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1398,8 +1398,8 @@ static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
}
/* Requires cpu_add_remove_lock to be held */
-static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
- enum cpuhp_state target)
+static int __ref _cpu_down_locked(unsigned int cpu, int tasks_frozen,
+ enum cpuhp_state target)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
int prev_state, ret = 0;
@@ -1410,8 +1410,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
if (!cpu_present(cpu))
return -EINVAL;
- cpus_write_lock();
-
cpuhp_tasks_frozen = tasks_frozen;
prev_state = cpuhp_set_state(cpu, st, target);
@@ -1427,14 +1425,14 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
* return the error code..
*/
if (ret)
- goto out;
+ return ret;
/*
* We might have stopped still in the range of the AP hotplug
* thread. Nothing to do anymore.
*/
if (st->state > CPUHP_TEARDOWN_CPU)
- goto out;
+ return ret;
st->target = target;
}
@@ -1452,9 +1450,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
}
}
-out:
- cpus_write_unlock();
- arch_smt_update();
return ret;
}
@@ -1463,16 +1458,17 @@ struct cpu_down_work {
enum cpuhp_state target;
};
-static long __cpu_down_maps_locked(void *arg)
+static long __cpu_down_locked_work(void *arg)
{
struct cpu_down_work *work = arg;
- return _cpu_down(work->cpu, 0, work->target);
+ return _cpu_down_locked(work->cpu, 0, work->target);
}
static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
{
struct cpu_down_work work = { .cpu = cpu, .target = target, };
+ int err;
/*
* If the platform does not support hotplug, report it explicitly to
@@ -1483,17 +1479,24 @@ static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
if (cpu_hotplug_disabled)
return -EBUSY;
+ err = -EBUSY;
+
/*
* Ensure that the control task does not run on the to be offlined
* CPU to prevent a deadlock against cfs_b->period_timer.
* Also keep at least one housekeeping cpu onlined to avoid generating
- * an empty sched_domain span.
+ * an empty sched_domain span. Hotplug must be locked already to prevent
+ * cpusets from concurrently changing the housekeeping mask.
*/
+ cpus_write_lock();
for_each_cpu_and(cpu, cpu_online_mask, housekeeping_cpumask(HK_TYPE_DOMAIN)) {
if (cpu != work.cpu)
- return work_on_cpu(cpu, __cpu_down_maps_locked, &work);
+ err = work_on_cpu(cpu, __cpu_down_locked_work, &work);
}
- return -EBUSY;
+ cpus_write_unlock();
+ arch_smt_update();
+
+ return err;
}
static int cpu_down(unsigned int cpu, enum cpuhp_state target)
@@ -1896,6 +1899,19 @@ void __init bringup_nonboot_cpus(unsigned int max_cpus)
#ifdef CONFIG_PM_SLEEP_SMP
static cpumask_var_t frozen_cpus;
+static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
+ enum cpuhp_state target)
+{
+ int err;
+
+ cpus_write_lock();
+ err = _cpu_down_locked(cpu, tasks_frozen, target);
+ cpus_write_unlock();
+ arch_smt_update();
+
+ return err;
+}
+
int freeze_secondary_cpus(int primary)
{
int cpu, error = 0;
--
2.48.1
next prev parent reply other threads:[~2025-06-20 15:23 UTC|newest]
Thread overview: 51+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-06-20 15:22 [PATCH 00/27] cpuset/isolation: Honour kthreads preferred affinity Frederic Weisbecker
2025-06-20 15:22 ` [PATCH 01/27] sched/isolation: Remove housekeeping static key Frederic Weisbecker
2025-06-20 15:22 ` [PATCH 02/27] sched/isolation: Introduce housekeeping per-cpu rwsem Frederic Weisbecker
2025-06-23 17:34 ` Waiman Long
2025-06-23 17:39 ` Tejun Heo
2025-06-23 17:57 ` Waiman Long
2025-06-23 18:03 ` Tejun Heo
2025-06-25 14:30 ` Frederic Weisbecker
2025-06-25 12:18 ` Phil Auld
2025-06-25 14:34 ` Frederic Weisbecker
2025-06-25 15:50 ` Phil Auld
2025-06-27 0:11 ` Waiman Long
2025-06-27 0:48 ` Phil Auld
2025-06-30 12:59 ` Thomas Gleixner
2025-06-25 14:18 ` Frederic Weisbecker
2025-06-26 23:58 ` Waiman Long
2025-06-20 15:22 ` [PATCH 03/27] PCI: Protect against concurrent change of housekeeping cpumask Frederic Weisbecker
2025-06-20 16:17 ` Bjorn Helgaas
2025-06-26 14:51 ` Frederic Weisbecker
2025-06-20 15:22 ` Frederic Weisbecker [this message]
2025-06-20 15:22 ` [PATCH 05/27] memcg: Prepare to protect against concurrent isolated cpuset change Frederic Weisbecker
2025-06-20 19:19 ` Shakeel Butt
2025-06-20 15:22 ` [PATCH 06/27] mm: vmstat: " Frederic Weisbecker
2025-06-20 15:22 ` [PATCH 07/27] sched/isolation: Save boot defined domain flags Frederic Weisbecker
2025-06-20 15:22 ` [PATCH 08/27] cpuset: Convert boot_hk_cpus to use HK_TYPE_DOMAIN_BOOT Frederic Weisbecker
2025-06-20 15:22 ` [PATCH 09/27] driver core: cpu: Convert /sys/devices/system/cpu/isolated " Frederic Weisbecker
2025-06-20 15:22 ` [PATCH 10/27] net: Keep ignoring isolated cpuset change Frederic Weisbecker
2025-06-20 15:22 ` [PATCH 11/27] block: Protect against concurrent " Frederic Weisbecker
2025-06-20 15:59 ` Bart Van Assche
2025-06-26 15:03 ` Frederic Weisbecker
2025-06-23 5:46 ` Christoph Hellwig
2025-06-26 15:33 ` Frederic Weisbecker
2025-06-20 15:22 ` [PATCH 12/27] cpu: Provide lockdep check for CPU hotplug lock write-held Frederic Weisbecker
2025-06-20 15:22 ` [PATCH 13/27] cpuset: Provide lockdep check for cpuset lock held Frederic Weisbecker
2025-06-20 15:22 ` [PATCH 14/27] sched/isolation: Convert housekeeping cpumasks to rcu pointers Frederic Weisbecker
2025-06-20 15:22 ` [PATCH 15/27] cpuset: Update HK_TYPE_DOMAIN cpumask from cpuset Frederic Weisbecker
2025-06-20 15:22 ` [PATCH 16/27] sched/isolation: Flush memcg workqueues on cpuset isolated partition change Frederic Weisbecker
2025-06-20 19:30 ` Shakeel Butt
2025-06-20 15:22 ` [PATCH 17/27] sched/isolation: Flush vmstat " Frederic Weisbecker
2025-06-20 15:22 ` [PATCH 18/27] cpuset: Propagate cpuset isolation update to workqueue through housekeeping Frederic Weisbecker
2025-06-20 15:23 ` [PATCH 19/27] cpuset: Remove cpuset_cpu_is_isolated() Frederic Weisbecker
2025-06-20 15:23 ` [PATCH 20/27] sched/isolation: Remove HK_TYPE_TICK test from cpu_is_isolated() Frederic Weisbecker
2025-06-20 15:23 ` [PATCH 21/27] kthread: Refine naming of affinity related fields Frederic Weisbecker
2025-06-20 15:23 ` [PATCH 22/27] kthread: Include unbound kthreads in the managed affinity list Frederic Weisbecker
2025-06-20 15:23 ` [PATCH 23/27] kthread: Include kthreadd to " Frederic Weisbecker
2025-06-20 15:23 ` [PATCH 24/27] kthread: Rely on HK_TYPE_DOMAIN for preferred affinity management Frederic Weisbecker
2025-06-20 15:23 ` [PATCH 25/27] sched: Switch the fallback task allowed cpumask to HK_TYPE_DOMAIN Frederic Weisbecker
2025-06-20 15:23 ` [PATCH 26/27] kthread: Honour kthreads preferred affinity after cpuset changes Frederic Weisbecker
2025-06-20 15:23 ` [PATCH 27/27] kthread: Comment on the purpose and placement of kthread_affine_node() call Frederic Weisbecker
2025-06-20 16:08 ` [PATCH 00/27] cpuset/isolation: Honour kthreads preferred affinity Bjorn Helgaas
2025-06-26 14:57 ` Frederic Weisbecker
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250620152308.27492-5-frederic@kernel.org \
--to=frederic@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=longman@redhat.com \
--cc=marco.crivellari@suse.com \
--cc=mhocko@suse.com \
--cc=mingo@redhat.com \
--cc=peterz@infradead.org \
--cc=tglx@linutronix.de \
--cc=tj@kernel.org \
--cc=vbabka@suse.cz \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.