[RFC PATCH v6 12/25] sched/rt: Add {alloc/unregister/free}_rt_sched_group

Linux cgroups development
 help / color / mirror / Atom feed

From: Yuri Andriaccio <yurand2000@gmail.com>
To: "Ingo Molnar" <mingo@redhat.com>,
	"Peter Zijlstra" <peterz@infradead.org>,
	"Juri Lelli" <juri.lelli@redhat.com>,
	"Vincent Guittot" <vincent.guittot@linaro.org>,
	"Dietmar Eggemann" <dietmar.eggemann@arm.com>,
	"Steven Rostedt" <rostedt@goodmis.org>,
	"Ben Segall" <bsegall@google.com>, "Mel Gorman" <mgorman@suse.de>,
	"Valentin Schneider" <vschneid@redhat.com>,
	"Tejun Heo" <tj@kernel.org>,
	"Johannes Weiner" <hannes@cmpxchg.org>,
	"Michal Koutný" <mkoutny@suse.com>
Cc: cgroups@vger.kernel.org, linux-kernel@vger.kernel.org,
	Luca Abeni <luca.abeni@santannapisa.it>,
	Yuri Andriaccio <yuri.andriaccio@santannapisa.it>
Subject: [RFC PATCH v6 12/25] sched/rt: Add {alloc/unregister/free}_rt_sched_group
Date: Mon,  8 Jun 2026 14:15:31 +0200	[thread overview]
Message-ID: <20260608121546.69910-13-yurand2000@gmail.com> (raw)
In-Reply-To: <20260608121546.69910-1-yurand2000@gmail.com>

Add allocation and deallocation code for rt-cgroups.

Declare dl_server specific functions (only skeleton, but no
implementation yet), needed by the deadline servers to be called when
trying to schedule.

Initialize a cgroup's active context to that of its parent.

Co-developed-by: Alessio Balsini <a.balsini@sssup.it>
Signed-off-by: Alessio Balsini <a.balsini@sssup.it>
Co-developed-by: Andrea Parri <parri.andrea@gmail.com>
Signed-off-by: Andrea Parri <parri.andrea@gmail.com>
Co-developed-by: luca abeni <luca.abeni@santannapisa.it>
Signed-off-by: luca abeni <luca.abeni@santannapisa.it>
Signed-off-by: Yuri Andriaccio <yurand2000@gmail.com>
---
 kernel/sched/rt.c | 156 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 154 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index dbba7a57d6f1..a6adf21772a6 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -120,24 +120,176 @@ struct dl_bandwidth *dl_bandwidth_write(struct task_group *tg)

 void unregister_rt_sched_group(struct task_group *tg)
 {
+	int i;
+
+	if (!rt_group_sched_enabled())
+		return;
+
+	if (!tg->dl_se || !tg->rt_rq)
+		return;

+	for_each_possible_cpu(i) {
+		if (!tg->dl_se[i] || !tg->rt_rq[i])
+			continue;
+
+		if (tg->dl_se[i]->dl_runtime)
+			dl_init_tg(tg->dl_se[i], 0, tg->dl_se[i]->dl_period);
+	}
 }

 void free_rt_sched_group(struct task_group *tg)
 {
+	int i;
+	unsigned long flags;
+
 	if (!rt_group_sched_enabled())
 		return;
+
+	if (!tg->dl_se || !tg->rt_rq)
+		return;
+
+	for_each_possible_cpu(i) {
+		if (!tg->dl_se[i] || !tg->rt_rq[i])
+			continue;
+
+		/*
+		 * Shutdown the dl_server and free it
+		 *
+		 * Since the dl timer is going to be cancelled,
+		 * we risk to never decrease the running bw...
+		 * Fix this issue by changing the group runtime
+		 * to 0 immediately before freeing it.
+		 */
+		if (tg->dl_se[i]->dl_runtime)
+			dl_init_tg(tg->dl_se[i], 0, tg->dl_se[i]->dl_period);
+
+		raw_spin_rq_lock_irqsave(cpu_rq(i), flags);
+		hrtimer_cancel(&tg->dl_se[i]->dl_timer);
+		raw_spin_rq_unlock_irqrestore(cpu_rq(i), flags);
+		kfree(tg->dl_se[i]);
+
+		/* Free the local per-cpu runqueue */
+		kfree(rq_of_rt_rq(tg->rt_rq[i]));
+	}
+
+	kfree(tg->rt_rq);
+	kfree(tg->dl_se);
 }

+static inline void __rt_rq_free(struct rt_rq **rt_rq)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		kfree(rq_of_rt_rq(rt_rq[i]));
+	}
+
+	kfree(rt_rq);
+}
+
+DEFINE_FREE(rt_rq_free, struct rt_rq **, if (_T) __rt_rq_free(_T))
+
+static inline void __dl_se_free(struct sched_dl_entity **dl_se)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		kfree(dl_se[i]);
+	}
+
+	kfree(dl_se);
+}
+
+DEFINE_FREE(dl_se_free, struct sched_dl_entity **, if (_T) __dl_se_free(_T))
+
+static int __alloc_rt_sched_group_data(struct task_group *tg) {
+	/* Instantiate automatic cleanup in event of kalloc fail */
+	struct rt_rq **tg_rt_rq __free(rt_rq_free) = NULL;
+	struct sched_dl_entity **tg_dl_se __free(dl_se_free) = NULL;
+	struct sched_dl_entity *dl_se __free(kfree) = NULL;
+	struct rq *s_rq __free(kfree) = NULL;
+	int i;
+
+	tg_rt_rq = kcalloc(nr_cpu_ids, sizeof(struct rt_rq *), GFP_KERNEL);
+	if (!tg_rt_rq)
+		return 0;
+
+	tg_dl_se = kcalloc(nr_cpu_ids,
+			   sizeof(struct sched_dl_entity *), GFP_KERNEL);
+	if (!tg_dl_se)
+		return 0;
+
+	for_each_possible_cpu(i) {
+		s_rq = kzalloc_node(sizeof(struct rq),
+				    GFP_KERNEL, cpu_to_node(i));
+		if (!s_rq)
+			return 0;
+
+		dl_se = kzalloc_node(sizeof(struct sched_dl_entity),
+				     GFP_KERNEL, cpu_to_node(i));
+		if (!dl_se)
+			return 0;
+
+		tg_rt_rq[i] = &no_free_ptr(s_rq)->rt;
+		tg_dl_se[i] = no_free_ptr(dl_se);
+	}
+
+	tg->rt_rq = no_free_ptr(tg_rt_rq);
+	tg->dl_se = no_free_ptr(tg_dl_se);
+
+	return 1;
+}
+
+static struct task_struct *rt_server_pick(struct sched_dl_entity *dl_se, struct rq_flags *rf);
+
 int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
 {
+	struct sched_dl_entity *dl_se;
+	struct rq *s_rq;
+	int i;
+
 	if (!rt_group_sched_enabled())
 		return 1;

+	/* Allocate all necessary resources beforehand */
+	if (!__alloc_rt_sched_group_data(tg))
+		return 0;
+
+	/* Initialize the allocated resources now. */
+	scoped_guard(raw_spinlock_irq, dl_bw_lock_of_tg(parent)) {
+		init_dl_bandwidth(&tg->dl_bandwidth, 0, RUNTIME_INF,
+				  dl_bandwidth_read(parent)->active_context);
+	}
+
+	for_each_possible_cpu(i) {
+		s_rq = rq_of_rt_rq(tg->rt_rq[i]);
+		dl_se = tg->dl_se[i];
+
+		init_rt_rq(&s_rq->rt);
+		s_rq->cpu = i;
+		s_rq->rt.tg = tg;
+
+		init_dl_entity(dl_se);
+		dl_se->dl_runtime = 0;
+		dl_se->dl_deadline = 0;
+		dl_se->dl_period = 0;
+		dl_se->runtime = 0;
+		dl_se->deadline = 0;
+		dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
+		dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime);
+		dl_se->dl_server = 1;
+		dl_server_init(dl_se, &cpu_rq(i)->dl, s_rq, rt_server_pick);
+	}
+
 	return 1;
 }

-#else /* !CONFIG_RT_GROUP_SCHED: */
+static struct task_struct *rt_server_pick(struct sched_dl_entity *dl_se, struct rq_flags *rf)
+{
+	return NULL;
+}
+
+#else /* !CONFIG_RT_GROUP_SCHED */

 void unregister_rt_sched_group(struct task_group *tg) { }

@@ -147,7 +299,7 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
 {
 	return 1;
 }
-#endif /* !CONFIG_RT_GROUP_SCHED */
+#endif /* CONFIG_RT_GROUP_SCHED */

 static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
 {
--
2.54.0

next prev parent reply	other threads:[~2026-06-08 12:16 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-08 12:15 [RFC PATCH v6 00/25] Hierarchical Constant Bandwidth Server Yuri Andriaccio
2026-06-08 12:15 ` [RFC PATCH v6 01/25] sched/deadline: Fix replenishment logic for non-deferred servers Yuri Andriaccio
2026-06-08 12:15 ` [RFC PATCH v6 02/25] sched/rt: Update default bandwidth for real-time tasks to ONE Yuri Andriaccio
2026-06-08 12:15 ` [RFC PATCH v6 03/25] sched/deadline: Do not access dl_se->rq directly Yuri Andriaccio
2026-06-08 12:15 ` [RFC PATCH v6 04/25] sched/deadline: Distinguish between dl_rq and my_q Yuri Andriaccio
2026-06-08 12:15 ` [RFC PATCH v6 05/25] sched/rt: Pass an rt_rq instead of an rq where needed Yuri Andriaccio
2026-06-08 12:15 ` [RFC PATCH v6 06/25] sched/rt: Move functions from rt.c to sched.h Yuri Andriaccio
2026-06-08 12:15 ` [RFC PATCH v6 07/25] sched/rt: Disable RT_GROUP_SCHED Yuri Andriaccio
2026-06-08 12:15 ` [RFC PATCH v6 08/25] sched/rt: Remove unnecessary runqueue pointer in struct rt_rq Yuri Andriaccio
2026-06-08 12:15 ` [RFC PATCH v6 09/25] sched/rt: Introduce HCBS specific structs in task_group Yuri Andriaccio
2026-06-08 12:15 ` [RFC PATCH v6 10/25] sched/core: Initialize HCBS specific structures Yuri Andriaccio
2026-06-08 12:15 ` [RFC PATCH v6 11/25] sched/deadline: Add dl_init_tg Yuri Andriaccio
2026-06-08 12:15 ` Yuri Andriaccio [this message]
2026-06-11  8:42   ` [RFC PATCH v6 12/25] sched/rt: Add {alloc/unregister/free}_rt_sched_group Juri Lelli
2026-06-08 12:15 ` [RFC PATCH v6 13/25] sched/deadline: Account rt-cgroups bandwidth in deadline tasks schedulability tests Yuri Andriaccio
2026-06-08 12:15 ` [RFC PATCH v6 14/25] sched/rt: Implement dl-server operations for rt-cgroups Yuri Andriaccio
2026-06-08 12:15 ` [RFC PATCH v6 15/25] sched/rt: Update task event callbacks for HCBS scheduling Yuri Andriaccio
2026-06-08 12:15 ` [RFC PATCH v6 16/25] sched/rt: Remove support for cgroups-v1 Yuri Andriaccio
2026-06-08 12:15 ` [RFC PATCH v6 17/25] sched/rt: Update rt-cgroup schedulability checks Yuri Andriaccio
2026-06-08 12:15 ` [RFC PATCH v6 18/25] sched/rt: Update task's RT runqueue when switching scheduling class Yuri Andriaccio
2026-06-08 12:15 ` [RFC PATCH v6 19/25] sched/rt: Remove old RT_GROUP_SCHED data structures Yuri Andriaccio
2026-06-08 12:15 ` [RFC PATCH v6 20/25] sched/rt: Add HCBS migration code to related functions Yuri Andriaccio
2026-06-08 12:15 ` [RFC PATCH v6 21/25] sched/rt: Hook HCBS migration functions Yuri Andriaccio
2026-06-08 12:15 ` [RFC PATCH v6 22/25] sched/core: Execute enqueued balance callbacks when changing allowed CPUs Yuri Andriaccio
2026-06-08 12:15 ` [RFC PATCH v6 23/25] sched/rt: Try pull task on empty server pick Yuri Andriaccio
2026-06-08 12:15 ` [RFC PATCH v6 24/25] sched/core: Execute enqueued balance callbacks after migrate_disable_switch Yuri Andriaccio
2026-06-08 12:15 ` [RFC PATCH v6 25/25] Documentation: Update documentation for real-time cgroups Yuri Andriaccio
2026-06-09 15:46 ` [RFC PATCH v6 00/25] Hierarchical Constant Bandwidth Server Juri Lelli
2026-06-09 16:23   ` Yuri Andriaccio
2026-06-10  9:21     ` Juri Lelli
2026-06-15 20:38 ` Tejun Heo

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:dbba7a57d6f dfblob:a6adf21772a )
 OR (
bs:"[RFC PATCH v6 12/25] sched/rt: Add {alloc/unregister/free}_rt_sched_group" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260608121546.69910-13-yurand2000@gmail.com \
    --to=yurand2000@gmail.com \
    --cc=bsegall@google.com \
    --cc=cgroups@vger.kernel.org \
    --cc=dietmar.eggemann@arm.com \
    --cc=hannes@cmpxchg.org \
    --cc=juri.lelli@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luca.abeni@santannapisa.it \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=mkoutny@suse.com \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=tj@kernel.org \
    --cc=vincent.guittot@linaro.org \
    --cc=vschneid@redhat.com \
    --cc=yuri.andriaccio@santannapisa.it \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox