All of lore.kernel.org
 help / color / mirror / Atom feed
From: Aaron Tomlin <atomlin@atomlin.com>
To: longman@redhat.com, tj@kernel.org, hannes@cmpxchg.org, mkoutny@suse.com
Cc: chenridong@huaweicloud.com, neelx@suse.com,
	cgroups@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH] cpuset: Fix multi-source deadline task accounting and bandwidth bypass
Date: Mon, 11 May 2026 21:03:41 -0400	[thread overview]
Message-ID: <20260512010341.101419-1-atomlin@atomlin.com> (raw)

During a batch migration where threads in a taskset originate from
multiple source cpusets (e.g., via cgroup.procs), cpuset_can_attach()
and cpuset_attach() currently evaluate the source cpuset exactly once
by caching the first task's oldcs.

This creates two distinct critical flaws for SCHED_DEADLINE tasks:

    1.  oldcs->nr_deadline_tasks is decremented solely on the first
        source cpuset. If tasks originated from other cpusets, their
        counts are permanently leaked, and the first cpuset permanently
        underflows.

    2.  cpumask_intersects() is evaluated strictly against the first
        task's source cpuset. This allows tasks originating from
        entirely isolated root domains to silently bypass the
        dl_bw_alloc() admission control.

This patch refactors the deadline accounting to evaluate task_cs(task)
on a per-task basis during the cgroup_taskset_for_each() loops. To
achieve accurate accounting before the core cgroup migration actually
executes, the permanent nr_deadline_tasks increments/decrements are
shifted into cpuset_can_attach(). If the migration aborts, the counts
are gracefully reverted via an internal rollback loop or the
cpuset_cancel_attach() callback.

Signed-off-by: Aaron Tomlin <atomlin@atomlin.com>
---
 kernel/cgroup/cpuset.c | 53 +++++++++++++++++++++++++++++++-----------
 1 file changed, 39 insertions(+), 14 deletions(-)

diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index e3a081a07c6d..36f1d28f8ade 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -3034,32 +3034,36 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
 		if (setsched_check) {
 			ret = security_task_setscheduler(task);
 			if (ret)
-				goto out_unlock;
+				goto out_unlock_reset;
 		}
 
 		if (dl_task(task)) {
+			struct cpuset *old_cs = task_cs(task);
+
 			cs->nr_migrate_dl_tasks++;
-			cs->sum_migrate_dl_bw += task->dl.dl_bw;
+			old_cs->nr_deadline_tasks--;
+			cs->nr_deadline_tasks++;
+
+			if (!cpumask_intersects(old_cs->effective_cpus,
+						cs->effective_cpus))
+				cs->sum_migrate_dl_bw += task->dl.dl_bw;
 		}
 	}
 
 	if (!cs->nr_migrate_dl_tasks)
 		goto out_success;
 
-	if (!cpumask_intersects(oldcs->effective_cpus, cs->effective_cpus)) {
+	if (cs->sum_migrate_dl_bw) {
 		int cpu = cpumask_any_and(cpu_active_mask, cs->effective_cpus);
 
 		if (unlikely(cpu >= nr_cpu_ids)) {
-			reset_migrate_dl_data(cs);
 			ret = -EINVAL;
-			goto out_unlock;
+			goto out_unlock_reset;
 		}
 
 		ret = dl_bw_alloc(cpu, cs->sum_migrate_dl_bw);
-		if (ret) {
-			reset_migrate_dl_data(cs);
-			goto out_unlock;
-		}
+		if (ret)
+			goto out_unlock_reset;
 
 		cs->dl_bw_cpu = cpu;
 	}
@@ -3070,6 +3074,22 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
 	 * changes which zero cpus/mems_allowed.
 	 */
 	cs->attach_in_progress++;
+	goto out_unlock;
+
+out_unlock_reset:
+	if (cs->nr_migrate_dl_tasks) {
+		struct task_struct *t;
+
+		cgroup_taskset_for_each(t, css, tset) {
+			if (t == task)
+				break;
+			if (dl_task(t)) {
+				task_cs(t)->nr_deadline_tasks++;
+				cs->nr_deadline_tasks--;
+			}
+		}
+		reset_migrate_dl_data(cs);
+	}
 out_unlock:
 	mutex_unlock(&cpuset_mutex);
 	return ret;
@@ -3079,6 +3099,7 @@ static void cpuset_cancel_attach(struct cgroup_taskset *tset)
 {
 	struct cgroup_subsys_state *css;
 	struct cpuset *cs;
+	struct task_struct *task;
 
 	cgroup_taskset_first(tset, &css);
 	cs = css_cs(css);
@@ -3089,8 +3110,15 @@ static void cpuset_cancel_attach(struct cgroup_taskset *tset)
 	if (cs->dl_bw_cpu >= 0)
 		dl_bw_free(cs->dl_bw_cpu, cs->sum_migrate_dl_bw);
 
-	if (cs->nr_migrate_dl_tasks)
+	if (cs->nr_migrate_dl_tasks) {
+		cgroup_taskset_for_each(task, css, tset) {
+			if (dl_task(task)) {
+				task_cs(task)->nr_deadline_tasks++;
+				cs->nr_deadline_tasks--;
+			}
+		}
 		reset_migrate_dl_data(cs);
+	}
 
 	mutex_unlock(&cpuset_mutex);
 }
@@ -3195,11 +3223,8 @@ static void cpuset_attach(struct cgroup_taskset *tset)
 		schedule_flush_migrate_mm();
 	cs->old_mems_allowed = cpuset_attach_nodemask_to;
 
-	if (cs->nr_migrate_dl_tasks) {
-		cs->nr_deadline_tasks += cs->nr_migrate_dl_tasks;
-		oldcs->nr_deadline_tasks -= cs->nr_migrate_dl_tasks;
+	if (cs->nr_migrate_dl_tasks)
 		reset_migrate_dl_data(cs);
-	}
 
 	dec_attach_in_progress_locked(cs);
 
-- 
2.51.0


             reply	other threads:[~2026-05-12  1:03 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-12  1:03 Aaron Tomlin [this message]
2026-05-13 16:22 ` [PATCH] cpuset: Fix multi-source deadline task accounting and bandwidth bypass Dietmar Eggemann
2026-05-13 23:09   ` Aaron Tomlin
2026-05-13 23:19   ` Waiman Long
2026-05-13 23:39     ` Aaron Tomlin
2026-05-14  4:26       ` Waiman Long

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260512010341.101419-1-atomlin@atomlin.com \
    --to=atomlin@atomlin.com \
    --cc=cgroups@vger.kernel.org \
    --cc=chenridong@huaweicloud.com \
    --cc=hannes@cmpxchg.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=longman@redhat.com \
    --cc=mkoutny@suse.com \
    --cc=neelx@suse.com \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.