Linux cgroups development
 help / color / mirror / Atom feed
* [PATCH] cgroup/cpuset: make DL attach bandwidth reservation root-domain aware
@ 2026-04-21  8:34 Guopeng Zhang
  2026-04-22  2:14 ` Guopeng Zhang
  2026-04-24 14:15 ` Waiman Long
  0 siblings, 2 replies; 7+ messages in thread
From: Guopeng Zhang @ 2026-04-21  8:34 UTC (permalink / raw)
  To: longman, tj, juri.lelli, chenridong, mkoutny
  Cc: hannes, mingo, peterz, vincent.guittot, dietmar.eggemann, rostedt,
	bsegall, mgorman, vschneid, kprateek.nayak, linux-kernel, cgroups,
	Guopeng Zhang

cpuset_can_attach() currently sums the bandwidth of all migrating
SCHED_DEADLINE tasks and reserves destination bandwidth whenever the
old and new cpuset effective CPU masks do not overlap.

That condition is stronger than what the scheduler uses when migrating
a deadline task. set_cpus_allowed_dl() only subtracts bandwidth from
the source side when moving the task requires a DL bandwidth move
between root domains.

As a result, moving a deadline task between disjoint member cpusets that
still belong to the same root domain can reserve destination bandwidth
even though no matching source-side subtraction happens. Successful
back-and-forth migrations between such cpusets can monotonically
increase dl_bw->total_bw.

Fix this by extracting the source root-domain test already used by
set_cpus_allowed_dl() into a shared helper and make cpuset DL bandwidth
preallocation use that same condition. Count all migrating deadline
tasks for cpuset task accounting, but only accumulate sum_migrate_dl_bw
for tasks that actually need a DL bandwidth move. Reserve and rollback
bandwidth only for that subset.

This keeps successful attach accounting aligned with
set_cpus_allowed_dl() and avoids double-accounting within a single
root domain.

Fixes: 2ef269ef1ac0 ("cgroup/cpuset: Free DL BW in case can_attach() fails")
Signed-off-by: Guopeng Zhang <zhangguopeng@kylinos.cn>
---
 include/linux/sched/deadline.h  |  9 +++++++++
 kernel/cgroup/cpuset-internal.h |  1 +
 kernel/cgroup/cpuset.c          | 34 ++++++++++++++++-----------------
 kernel/sched/deadline.c         | 14 +++++++++++---
 4 files changed, 38 insertions(+), 20 deletions(-)

diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
index 1198138cb839..273538200a44 100644
--- a/include/linux/sched/deadline.h
+++ b/include/linux/sched/deadline.h
@@ -33,6 +33,15 @@ struct root_domain;
 extern void dl_add_task_root_domain(struct task_struct *p);
 extern void dl_clear_root_domain(struct root_domain *rd);
 extern void dl_clear_root_domain_cpu(int cpu);
+/*
+ * Return whether moving DL task @p to @new_mask requires moving DL
+ * bandwidth accounting between root domains. This helper is specific to
+ * DL bandwidth move accounting semantics and is shared by
+ * cpuset_can_attach() and set_cpus_allowed_dl() so both paths use the
+ * same source root-domain test.
+ */
+extern bool dl_task_needs_bw_move(struct task_struct *p,
+				  const struct cpumask *new_mask);
 
 extern u64 dl_cookie;
 extern bool dl_bw_visited(int cpu, u64 cookie);
diff --git a/kernel/cgroup/cpuset-internal.h b/kernel/cgroup/cpuset-internal.h
index bb4e692bea30..f7aaf01f7cd5 100644
--- a/kernel/cgroup/cpuset-internal.h
+++ b/kernel/cgroup/cpuset-internal.h
@@ -167,6 +167,7 @@ struct cpuset {
 	 */
 	int nr_deadline_tasks;
 	int nr_migrate_dl_tasks;
+	/* DL bandwidth that needs destination reservation for this attach. */
 	u64 sum_migrate_dl_bw;
 	/*
 	 * CPU used for temporary DL bandwidth allocation during attach;
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index e3a081a07c6d..761098b45f23 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -2993,7 +2993,7 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
 	struct cpuset *cs, *oldcs;
 	struct task_struct *task;
 	bool setsched_check;
-	int ret;
+	int cpu, ret;
 
 	/* used later by cpuset_attach() */
 	cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset, &css));
@@ -3039,31 +3039,31 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
 
 		if (dl_task(task)) {
 			cs->nr_migrate_dl_tasks++;
-			cs->sum_migrate_dl_bw += task->dl.dl_bw;
+
+			if (dl_task_needs_bw_move(task, cs->effective_cpus))
+				cs->sum_migrate_dl_bw += task->dl.dl_bw;
 		}
 	}
 
-	if (!cs->nr_migrate_dl_tasks)
+	if (!cs->sum_migrate_dl_bw)
 		goto out_success;
 
-	if (!cpumask_intersects(oldcs->effective_cpus, cs->effective_cpus)) {
-		int cpu = cpumask_any_and(cpu_active_mask, cs->effective_cpus);
+	cpu = cpumask_any_and(cpu_active_mask, cs->effective_cpus);
 
-		if (unlikely(cpu >= nr_cpu_ids)) {
-			reset_migrate_dl_data(cs);
-			ret = -EINVAL;
-			goto out_unlock;
-		}
-
-		ret = dl_bw_alloc(cpu, cs->sum_migrate_dl_bw);
-		if (ret) {
-			reset_migrate_dl_data(cs);
-			goto out_unlock;
-		}
+	if (unlikely(cpu >= nr_cpu_ids)) {
+		reset_migrate_dl_data(cs);
+		ret = -EINVAL;
+		goto out_unlock;
+	}
 
-		cs->dl_bw_cpu = cpu;
+	ret = dl_bw_alloc(cpu, cs->sum_migrate_dl_bw);
+	if (ret) {
+		reset_migrate_dl_data(cs);
+		goto out_unlock;
 	}
 
+	cs->dl_bw_cpu = cpu;
+
 out_success:
 	/*
 	 * Mark attach is in progress.  This makes validate_change() fail
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index edca7849b165..5ddfa0d30bf6 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -3107,20 +3107,18 @@ static void task_woken_dl(struct rq *rq, struct task_struct *p)
 static void set_cpus_allowed_dl(struct task_struct *p,
 				struct affinity_context *ctx)
 {
-	struct root_domain *src_rd;
 	struct rq *rq;
 
 	WARN_ON_ONCE(!dl_task(p));
 
 	rq = task_rq(p);
-	src_rd = rq->rd;
 	/*
 	 * Migrating a SCHED_DEADLINE task between exclusive
 	 * cpusets (different root_domains) entails a bandwidth
 	 * update. We already made space for us in the destination
 	 * domain (see cpuset_can_attach()).
 	 */
-	if (!cpumask_intersects(src_rd->span, ctx->new_mask)) {
+	if (dl_task_needs_bw_move(p, ctx->new_mask)) {
 		struct dl_bw *src_dl_b;
 
 		src_dl_b = dl_bw_of(cpu_of(rq));
@@ -3137,6 +3135,16 @@ static void set_cpus_allowed_dl(struct task_struct *p,
 	set_cpus_allowed_common(p, ctx);
 }
 
+bool dl_task_needs_bw_move(struct task_struct *p,
+			   const struct cpumask *new_mask)
+{
+	if (!dl_task(p))
+		return false;
+
+	guard(rcu)();
+	return !cpumask_intersects(task_rq(p)->rd->span, new_mask);
+}
+
 /* Assumes rq->lock is held */
 static void rq_online_dl(struct rq *rq)
 {
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2026-04-30 12:40 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-21  8:34 [PATCH] cgroup/cpuset: make DL attach bandwidth reservation root-domain aware Guopeng Zhang
2026-04-22  2:14 ` Guopeng Zhang
2026-04-24 14:15 ` Waiman Long
2026-04-26 13:48   ` Guopeng Zhang
2026-04-27 13:47     ` Waiman Long
2026-04-30  8:53   ` Guopeng Zhang
2026-04-30 12:39     ` Waiman Long

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox