All of lore.kernel.org
 help / color / mirror / Atom feed
From: Waiman Long <longman@redhat.com>
To: "Chen Ridong" <chenridong@huaweicloud.com>,
	"Tejun Heo" <tj@kernel.org>,
	"Johannes Weiner" <hannes@cmpxchg.org>,
	"Michal Koutný" <mkoutny@suse.com>,
	"Ingo Molnar" <mingo@redhat.com>,
	"Peter Zijlstra" <peterz@infradead.org>,
	"Juri Lelli" <juri.lelli@redhat.com>,
	"Vincent Guittot" <vincent.guittot@linaro.org>,
	"Steven Rostedt" <rostedt@goodmis.org>,
	"Ben Segall" <bsegall@google.com>, "Mel Gorman" <mgorman@suse.de>,
	"Valentin Schneider" <vschneid@redhat.com>,
	"Anna-Maria Behnsen" <anna-maria@linutronix.de>,
	"Frederic Weisbecker" <frederic@kernel.org>,
	"Thomas Gleixner" <tglx@linutronix.de>,
	"Shuah Khan" <shuah@kernel.org>
Cc: cgroups@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-kselftest@vger.kernel.org, Waiman Long <longman@redhat.com>
Subject: [PATCH/for-next 1/2] cgroup/cpuset: Defer housekeeping_update() call from CPU hotplug to task_work
Date: Tue, 27 Jan 2026 23:42:50 -0500	[thread overview]
Message-ID: <20260128044251.1229702-2-longman@redhat.com> (raw)
In-Reply-To: <20260128044251.1229702-1-longman@redhat.com>

The update_isolation_cpumasks() function can be called either directly
from regular cpuset control file write with cpuset_full_lock() called
or via the CPU hotplug path with cpus_write_lock and cpuset_mutex held.

As we are going to enable dynamic update to the nozh_full housekeeping
cpumask (HK_TYPE_KERNEL_NOISE) soon with the help of CPU hotplug,
allowing the CPU hotplug path to call into housekeeping_update()
directly from update_isolation_cpumasks() will cause deadlock. So we
have to defer any call to housekeeping_update() after the CPU hotplug
operation has finished. This can be done via the task_work_add(...,
TWA_RESUME) API where the actual housekeeping_update() call, if needed,
will happen right before existing back to userspace.

Since the HK_TYPE_DOMAIN housekeeping cpumask should now track the
changes in "cpuset.cpus.isolated", add a check in test_cpuset_prs.sh to
confirm that the CPU hotplug deferral, if needed, is working as expected.

Signed-off-by: Waiman Long <longman@redhat.com>
---
 kernel/cgroup/cpuset.c                        | 49 ++++++++++++++++++-
 .../selftests/cgroup/test_cpuset_prs.sh       |  9 ++++
 2 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 7b7d12ab1006..98c7cb732206 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -84,6 +84,10 @@ static cpumask_var_t	isolated_cpus;
  */
 static bool isolated_cpus_updating;
 
+/* Both cpuset_mutex and cpus_read_locked acquired */
+static bool cpuset_full_locked;
+static bool isolation_task_work_queued;
+
 /*
  * A flag to force sched domain rebuild at the end of an operation.
  * It can be set in
@@ -285,10 +289,12 @@ void cpuset_full_lock(void)
 {
 	cpus_read_lock();
 	mutex_lock(&cpuset_mutex);
+	cpuset_full_locked = true;
 }
 
 void cpuset_full_unlock(void)
 {
+	cpuset_full_locked = false;
 	mutex_unlock(&cpuset_mutex);
 	cpus_read_unlock();
 }
@@ -1285,25 +1291,64 @@ static bool prstate_housekeeping_conflict(int prstate, struct cpumask *new_cpus)
 	return false;
 }
 
+static void __update_isolation_cpumasks(bool twork);
+static void isolation_task_work_fn(struct callback_head *cb)
+{
+	cpuset_full_lock();
+	__update_isolation_cpumasks(true);
+	cpuset_full_lock();
+}
+
 /*
- * update_isolation_cpumasks - Update external isolation related CPU masks
+ * __update_isolation_cpumasks - Update external isolation related CPU masks
+ * @twork - set if call from isolation_task_work_fn()
  *
  * The following external CPU masks will be updated if necessary:
  * - workqueue unbound cpumask
  */
-static void update_isolation_cpumasks(void)
+static void __update_isolation_cpumasks(bool twork)
 {
 	int ret;
 
+	if (twork)
+		isolation_task_work_queued = false;
+
 	if (!isolated_cpus_updating)
 		return;
 
+	/*
+	 * This function can be reached either directly from regular cpuset
+	 * control file write (cpuset_full_locked) or via hotplug
+	 * (cpus_write_lock && cpuset_mutex held). In the later case, we
+	 * defer the housekeeping_update() call to a task_work to avoid
+	 * the possibility of deadlock. The task_work will be run right
+	 * before exiting back to userspace.
+	 */
+	if (!cpuset_full_locked) {
+		static struct callback_head twork_cb;
+
+		if (!isolation_task_work_queued) {
+			init_task_work(&twork_cb, isolation_task_work_fn);
+			if (!task_work_add(current, &twork_cb, TWA_RESUME))
+				isolation_task_work_queued = true;
+			else
+				/* Current task shouldn't be exiting */
+				WARN_ON_ONCE(1);
+		}
+		return;
+	}
+
 	ret = housekeeping_update(isolated_cpus);
 	WARN_ON_ONCE(ret < 0);
 
 	isolated_cpus_updating = false;
 }
 
+static inline void update_isolation_cpumasks(void)
+{
+	__update_isolation_cpumasks(false);
+}
+
 /**
  * rm_siblings_excl_cpus - Remove exclusive CPUs that are used by sibling cpusets
  * @parent: Parent cpuset containing all siblings
diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
index 5dff3ad53867..af4a2532cb3e 100755
--- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh
+++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
@@ -773,6 +773,7 @@ check_isolcpus()
 	EXPECTED_ISOLCPUS=$1
 	ISCPUS=${CGROUP2}/cpuset.cpus.isolated
 	ISOLCPUS=$(cat $ISCPUS)
+	HKICPUS=$(cat /sys/devices/system/cpu/isolated)
 	LASTISOLCPU=
 	SCHED_DOMAINS=/sys/kernel/debug/sched/domains
 	if [[ $EXPECTED_ISOLCPUS = . ]]
@@ -810,6 +811,14 @@ check_isolcpus()
 	ISOLCPUS=
 	EXPECTED_ISOLCPUS=$EXPECTED_SDOMAIN
 
+	#
+	# The inverse of HK_TYPE_DOMAIN cpumask in $HKICPUS should match $ISOLCPUS
+	#
+	[[ "$ISOLCPUS" != "$HKICPUS" ]] && {
+		echo "Housekeeping isolated CPUs mismatch - $HKICPUS"
+		return 1
+	}
+
 	#
 	# Use the sched domain in debugfs to check isolated CPUs, if available
 	#
-- 
2.52.0


  reply	other threads:[~2026-01-28  4:43 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-28  4:42 [PATCH/for-next 0/2] cgroup/cpuset: Fix partition related locking issues Waiman Long
2026-01-28  4:42 ` Waiman Long [this message]
2026-01-28 17:44   ` [PATCH/for-next 1/2] cgroup/cpuset: Defer housekeeping_update() call from CPU hotplug to task_work Tejun Heo
2026-01-28 18:08     ` Waiman Long
2026-01-29  4:03   ` Chen Ridong
2026-01-29  7:15     ` Chen Ridong
2026-01-30  1:37       ` Waiman Long
2026-01-30  1:39         ` Waiman Long
2026-01-28  4:42 ` [PATCH/for-next 2/2] cgroup/cpuset: Introduce a new top level isolcpus_update_mutex Waiman Long
2026-01-29  8:01   ` Chen Ridong
2026-01-29  8:20     ` Chen Ridong
2026-01-29 20:57       ` Waiman Long
2026-01-30  1:16         ` Chen Ridong
2026-01-29 21:16     ` Waiman Long
2026-01-30  0:56       ` Chen Ridong
2026-01-30  1:35         ` Waiman Long
2026-01-30  1:42           ` Chen Ridong
2026-01-30  3:53             ` Waiman Long
2026-01-30  6:07               ` Chen Ridong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260128044251.1229702-2-longman@redhat.com \
    --to=longman@redhat.com \
    --cc=anna-maria@linutronix.de \
    --cc=bsegall@google.com \
    --cc=cgroups@vger.kernel.org \
    --cc=chenridong@huaweicloud.com \
    --cc=frederic@kernel.org \
    --cc=hannes@cmpxchg.org \
    --cc=juri.lelli@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=mkoutny@suse.com \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=shuah@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=tj@kernel.org \
    --cc=vincent.guittot@linaro.org \
    --cc=vschneid@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.