All of lore.kernel.org
 help / color / mirror / Atom feed
From: Waiman Long <longman@redhat.com>
To: "Ridong Chen" <ridong.chen@linux.dev>,
	"Tejun Heo" <tj@kernel.org>,
	"Johannes Weiner" <hannes@cmpxchg.org>,
	"Michal Koutný" <mkoutny@suse.com>,
	"Shuah Khan" <shuah@kernel.org>,
	"Juri Lelli" <juri.lelli@redhat.com>
Cc: cgroups@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-kselftest@vger.kernel.org,
	Aaron Tomlin <atomlin@atomlin.com>,
	Guopeng Zhang <guopeng.zhang@linux.dev>,
	Waiman Long <longman@redhat.com>
Subject: [PATCH-next v9 11/11] selftests/cgroup: Add test for cpuset affinity on controller disable
Date: Mon, 29 Jun 2026 23:33:44 -0400	[thread overview]
Message-ID: <20260630033344.352702-12-longman@redhat.com> (raw)
In-Reply-To: <20260630033344.352702-1-longman@redhat.com>

From: Michal Koutný <mkoutny@suse.com>

Add a new selftest that exposes a bug in cpuset_attach() where thread
CPU affinity is not properly updated when the cpuset controller is
disabled in a threaded cgroup hierarchy.

The test creates a threaded cgroup hierarchy with two child cgroups
(A and B) having different cpuset.cpus constraints:
- Parent: cpuset.cpus=0-1
- Child A: cpuset.cpus=0-1
- Child B: cpuset.cpus=1 (restricted to CPU 1 only)

A multithreaded process is created with threads placed in different
cgroups. When the cpuset controller is disabled on the parent, thread
affinities should be updated to match the parent's cpuset.

Expected behavior:
- thread_a affinity: {0-1} before and after (unchanged)
- thread_b affinity: {1} before, {0-1} after (expanded)

Current buggy behavior:
- thread_b affinity remains {1} after controller disable

Assisted-by: Claude:claude-sonnet-4-5
Signed-off-by: Michal Koutný <mkoutny@suse.com>
Acked-by: Waiman Long <longman@redhat.com>
---
 tools/testing/selftests/cgroup/test_cpuset.c | 243 +++++++++++++++++++
 1 file changed, 243 insertions(+)

diff --git a/tools/testing/selftests/cgroup/test_cpuset.c b/tools/testing/selftests/cgroup/test_cpuset.c
index c5cf8b56ceb8..8b4c4a9dd78b 100644
--- a/tools/testing/selftests/cgroup/test_cpuset.c
+++ b/tools/testing/selftests/cgroup/test_cpuset.c
@@ -1,7 +1,13 @@
 // SPDX-License-Identifier: GPL-2.0
 
+#define _GNU_SOURCE
+#include <assert.h>
 #include <linux/limits.h>
+#include <pthread.h>
+#include <sched.h>
 #include <signal.h>
+#include <sys/syscall.h>
+#include <unistd.h>
 
 #include "kselftest.h"
 #include "cgroup_util.h"
@@ -232,6 +238,242 @@ static int test_cpuset_perms_subtree(const char *root)
 	return ret;
 }
 
+static int get_cpu_affinity(cpu_set_t *mask)
+{
+	CPU_ZERO(mask);
+	return sched_getaffinity(0, sizeof(*mask), mask);
+}
+
+static int cpu_set_equal(cpu_set_t *dst, unsigned long mask)
+{
+	cpu_set_t expected;
+
+	CPU_ZERO(&expected);
+	assert(sizeof(mask) < CPU_SETSIZE);
+
+	for (int cpu = 0; cpu < sizeof(mask); ++cpu)
+		if ((1UL << cpu) & mask)
+			CPU_SET(cpu, &expected);
+
+	return CPU_EQUAL(&expected, dst);
+}
+
+enum test_phase {
+	AFFINITY_SETUP,
+	AFFINITY_THREAD_A_READY,
+	AFFINITY_THREADS_READY,
+	AFFINITY_CONTROLLER_DISABLED,
+	AFFINITY_COMPLETE,
+	AFFINITY_ERROR
+};
+
+struct thread_args {
+	const char *cgroup;
+	cpu_set_t *affinity_before;
+	cpu_set_t *affinity_after;
+	enum test_phase ready_phase;
+};
+
+static pthread_mutex_t test_mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t test_cond = PTHREAD_COND_INITIALIZER;
+static enum test_phase test_phase;
+
+static void *affinity_thread_fn(void *arg)
+{
+	struct thread_args *args = (struct thread_args *)arg;
+
+	if (cg_enter_current_thread(args->cgroup))
+		goto fail;
+
+	if (get_cpu_affinity(args->affinity_before) != 0)
+		goto fail;
+
+	pthread_mutex_lock(&test_mutex);
+	if (test_phase < args->ready_phase)
+		test_phase = args->ready_phase;
+	pthread_cond_broadcast(&test_cond);
+
+	while (test_phase < AFFINITY_CONTROLLER_DISABLED)
+		pthread_cond_wait(&test_cond, &test_mutex);
+	pthread_mutex_unlock(&test_mutex);
+
+	if (get_cpu_affinity(args->affinity_after) != 0)
+		goto fail;
+
+
+	return NULL;
+
+fail:
+	pthread_mutex_lock(&test_mutex);
+	test_phase = AFFINITY_ERROR;
+	pthread_cond_broadcast(&test_cond);
+	pthread_mutex_unlock(&test_mutex);
+	return NULL;
+}
+
+/*
+ * Test that disabling cpuset controller properly updates thread affinity.
+ *
+ * This test exposes a bug in cpuset_attach() where threads in child cgroups
+ * don't get their affinity updated when the cpuset controller is disabled.
+ *
+ * Setup:
+ * - Create parent cgroup with cpuset.cpus=0-1
+ * - Create child A with cpuset.cpus=0-1
+ * - Create child B with cpuset.cpus=1
+ * - Place multithreaded process: group leader + thread_a in A, thread_b in B
+ * - Disable cpuset controller on parent
+ *
+ * Expected: thread_b's affinity should expand from {1} to {0-1}
+ * Buggy: thread_b's affinity remains {1}
+ */
+static int test_cpuset_affinity_on_controller_disable(const char *root)
+{
+	char *parent = NULL, *child_a = NULL, *child_b = NULL;
+	pthread_t thread_a, thread_b;
+	int thread_a_created = 0, thread_b_created = 0;
+	cpu_set_t affinity_a_before, affinity_a_after;
+	cpu_set_t affinity_b_before, affinity_b_after;
+	int ret = KSFT_FAIL;
+
+	parent = cg_name(root, "cpuset_affinity_test");
+	if (!parent)
+		goto cleanup;
+	if (cg_create(parent))
+		goto cleanup;
+	if (cg_write(parent, "cgroup.type", "threaded"))
+		goto cleanup;
+
+	child_a = cg_name(parent, "A");
+	if (!child_a)
+		goto cleanup;
+	if (cg_create(child_a))
+		goto cleanup;
+	if (cg_write(child_a, "cgroup.type", "threaded"))
+		goto cleanup;
+
+	child_b = cg_name(parent, "B");
+	if (!child_b)
+		goto cleanup;
+	if (cg_create(child_b))
+		goto cleanup;
+	if (cg_write(child_b, "cgroup.type", "threaded"))
+		goto cleanup;
+
+	/* Now enable cpuset controller in parent */
+	if (cg_write(parent, "cgroup.subtree_control", "+cpuset")) {
+		ret = KSFT_SKIP;
+		goto cleanup;
+	}
+
+	/* Set CPU affinity constraints */
+	if (cg_write(parent, "cpuset.cpus", "0-1"))
+		goto cleanup;
+	if (cg_write(child_a, "cpuset.cpus", "0-1"))
+		goto cleanup;
+	if (cg_write(child_b, "cpuset.cpus", "1"))
+		goto cleanup;
+
+	/* Move group leader (main thread) to child A */
+	if (cg_enter_current(child_a))
+		goto cleanup;
+
+	/* Create threads - they will move themselves to their respective cgroups */
+	test_phase = AFFINITY_SETUP;
+
+	struct thread_args args_a = {
+		.cgroup = child_a,
+		.affinity_before = &affinity_a_before,
+		.affinity_after = &affinity_a_after,
+		.ready_phase = AFFINITY_THREAD_A_READY,
+	};
+	if (pthread_create(&thread_a, NULL, affinity_thread_fn, &args_a))
+		goto cleanup;
+	thread_a_created = 1;
+
+	struct thread_args args_b = {
+		.cgroup = child_b,
+		.affinity_before = &affinity_b_before,
+		.affinity_after = &affinity_b_after,
+		.ready_phase = AFFINITY_THREADS_READY,
+	};
+	if (pthread_create(&thread_b, NULL, affinity_thread_fn, &args_b))
+		goto cleanup_threads;
+	thread_b_created = 1;
+
+	pthread_mutex_lock(&test_mutex);
+	while (test_phase < AFFINITY_THREADS_READY)
+		pthread_cond_wait(&test_cond, &test_mutex);
+
+	/* If a thread failed during setup, bail out */
+	if (test_phase == AFFINITY_ERROR) {
+		pthread_mutex_unlock(&test_mutex);
+		goto cleanup_threads;
+	}
+	pthread_mutex_unlock(&test_mutex);
+
+	if (!cpu_set_equal(&affinity_a_before, 0x3)) {
+		ksft_print_msg("FAIL: thread_a initial affinity incorrect\n");
+		goto cleanup_threads;
+	}
+
+	if (!cpu_set_equal(&affinity_b_before, 0x2)) {
+		ksft_print_msg("FAIL: thread_b initial affinity incorrect\n");
+		goto cleanup_threads;
+	}
+
+	/* Disable cpuset controller - this should trigger affinity update */
+	if (cg_write(parent, "cgroup.subtree_control", "-cpuset"))
+		goto cleanup_threads;
+
+	/* Signal threads to save their final affinity and exit */
+	pthread_mutex_lock(&test_mutex);
+	test_phase = AFFINITY_CONTROLLER_DISABLED;
+	pthread_cond_broadcast(&test_cond);
+	pthread_mutex_unlock(&test_mutex);
+
+	pthread_join(thread_a, NULL);
+	pthread_join(thread_b, NULL);
+
+	/* Verify thread affinities AFTER disabling controller */
+	if (!cpu_set_equal(&affinity_a_after, 0x3)) {
+		ksft_print_msg("FAIL: thread_a final affinity incorrect\n");
+		goto cleanup;
+	}
+
+	if (!cpu_set_equal(&affinity_b_after, 0x3)) {
+		ksft_print_msg("FAIL: thread_b affinity did not expand to {0-1}\n");
+		goto cleanup;
+	}
+
+	ret = KSFT_PASS;
+	goto cleanup;
+
+cleanup_threads:
+	pthread_mutex_lock(&test_mutex);
+	test_phase = AFFINITY_COMPLETE;
+	pthread_cond_broadcast(&test_cond);
+	pthread_mutex_unlock(&test_mutex);
+
+	if (thread_a_created)
+		pthread_join(thread_a, NULL);
+	if (thread_b_created)
+		pthread_join(thread_b, NULL);
+
+cleanup:
+	/* Move back to root before cleanup */
+	cg_enter_current(root);
+
+	cg_destroy(child_b);
+	free(child_b);
+	cg_destroy(child_a);
+	free(child_a);
+	cg_destroy(parent);
+	free(parent);
+
+	return ret;
+}
+
 
 #define T(x) { x, #x }
 struct cpuset_test {
@@ -241,6 +483,7 @@ struct cpuset_test {
 	T(test_cpuset_perms_object_allow),
 	T(test_cpuset_perms_object_deny),
 	T(test_cpuset_perms_subtree),
+	T(test_cpuset_affinity_on_controller_disable),
 };
 #undef T
 
-- 
2.54.0


      parent reply	other threads:[~2026-06-30  3:35 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-30  3:33 [PATCH-next v9 00/11] cgroup/cpuset: Support multiple source/destination cpusets for cpuset_*attach() Waiman Long
2026-06-30  3:33 ` [PATCH-next v9 01/11] cgroup/cpuset: Make nr_deadline_tasks an atomic_t Waiman Long
2026-06-30 14:01   ` Juri Lelli
2026-06-30 17:56     ` Waiman Long
2026-07-01  9:00       ` Juri Lelli
2026-07-01  1:19   ` Ridong Chen
2026-06-30  3:33 ` [PATCH-next v9 02/11] cgroup/cpuset: Fix node inconsistencies between cpuset_update_tasks_nodemask() and cpuset_attach() Waiman Long
2026-06-30  3:33 ` [PATCH-next v9 03/11] cgroup/cpuset: Prevent race between task attach and cpuset state change Waiman Long
2026-07-01  1:41   ` Ridong Chen
2026-07-01 20:19     ` Waiman Long
2026-06-30  3:33 ` [PATCH-next v9 04/11] cgroup/cpuset: Put all task attach related variables into attach_ctx Waiman Long
2026-06-30  3:33 ` [PATCH-next v9 05/11] cgroup/cpuset: Add a cpuset_reserve_dl_bw() helper Waiman Long
2026-06-30  3:33 ` [PATCH-next v9 06/11] cgroup/cpuset: Expand the scope of cpuset_can_attach_check() Waiman Long
2026-06-30  3:33 ` [PATCH-next v9 07/11] cgroup/cpuset: Make attach_ctx.old_cs track task group leader Waiman Long
2026-06-30  3:33 ` [PATCH-next v9 08/11] cgroup/cpuset: Move mpol_rebind_mm/cpuset_migrate_mm() calls inside cpuset_attach_task() Waiman Long
2026-07-01  2:14   ` Ridong Chen
2026-07-01 20:30     ` Waiman Long
2026-06-30  3:33 ` [PATCH-next v9 09/11] cgroup/cpuset: Support multiple source cpusets for cpuset_*attach() Waiman Long
2026-07-01  2:35   ` Ridong Chen
2026-07-01 20:44     ` Waiman Long
2026-06-30  3:33 ` [PATCH-next v9 10/11] cgroup/cpuset: Support multiple destination " Waiman Long
2026-07-01  2:51   ` Ridong Chen
2026-07-01 21:16     ` Waiman Long
2026-06-30  3:33 ` Waiman Long [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260630033344.352702-12-longman@redhat.com \
    --to=longman@redhat.com \
    --cc=atomlin@atomlin.com \
    --cc=cgroups@vger.kernel.org \
    --cc=guopeng.zhang@linux.dev \
    --cc=hannes@cmpxchg.org \
    --cc=juri.lelli@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=mkoutny@suse.com \
    --cc=ridong.chen@linux.dev \
    --cc=shuah@kernel.org \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.