[RFC PATCH v5 11/29] sched/rt: Add {alloc/unregister/free}_rt_sched_group

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Yuri Andriaccio <yurand2000@gmail.com>
To: Ingo Molnar <mingo@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Juri Lelli <juri.lelli@redhat.com>,
	Vincent Guittot <vincent.guittot@linaro.org>,
	Dietmar Eggemann <dietmar.eggemann@arm.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Ben Segall <bsegall@google.com>, Mel Gorman <mgorman@suse.de>,
	Valentin Schneider <vschneid@redhat.com>
Cc: linux-kernel@vger.kernel.org,
	Luca Abeni <luca.abeni@santannapisa.it>,
	Yuri Andriaccio <yuri.andriaccio@santannapisa.it>
Subject: [RFC PATCH v5 11/29] sched/rt: Add {alloc/unregister/free}_rt_sched_group
Date: Thu, 30 Apr 2026 23:38:15 +0200	[thread overview]
Message-ID: <20260430213835.62217-12-yurand2000@gmail.com> (raw)
In-Reply-To: <20260430213835.62217-1-yurand2000@gmail.com>

From: luca abeni <luca.abeni@santannapisa.it>

Add allocation and deallocation code for rt-cgroups.

Declare dl_server specific functions (only skeleton, but no
implementation yet), needed by the deadline servers to be called when
trying to schedule.

Co-developed-by: Alessio Balsini <a.balsini@sssup.it>
Signed-off-by: Alessio Balsini <a.balsini@sssup.it>
Co-developed-by: Andrea Parri <parri.andrea@gmail.com>
Signed-off-by: Andrea Parri <parri.andrea@gmail.com>
Co-developed-by: Yuri Andriaccio <yurand2000@gmail.com>
Signed-off-by: Yuri Andriaccio <yurand2000@gmail.com>
Signed-off-by: luca abeni <luca.abeni@santannapisa.it>
---
 kernel/sched/rt.c | 151 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 149 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 741fac9f57ac..3d7f2b2ebe60 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -88,24 +88,171 @@ void init_rt_rq(struct rt_rq *rt_rq)

 void unregister_rt_sched_group(struct task_group *tg)
 {
+	int i;
+
+	if (!rt_group_sched_enabled())
+		return;
+
+	if (!tg->dl_se || !tg->rt_rq)
+		return;

+	for_each_possible_cpu(i) {
+		if (!tg->dl_se[i] || !tg->rt_rq[i])
+			continue;
+
+		if (tg->dl_se[i]->dl_runtime)
+			dl_init_tg(tg->dl_se[i], 0, tg->dl_se[i]->dl_period);
+	}
 }

 void free_rt_sched_group(struct task_group *tg)
 {
+	int i;
+	unsigned long flags;
+
 	if (!rt_group_sched_enabled())
 		return;
+
+	if (!tg->dl_se || !tg->rt_rq)
+		return;
+
+	for_each_possible_cpu(i) {
+		if (!tg->dl_se[i] || !tg->rt_rq[i])
+			continue;
+
+		/*
+		 * Shutdown the dl_server and free it
+		 *
+		 * Since the dl timer is going to be cancelled,
+		 * we risk to never decrease the running bw...
+		 * Fix this issue by changing the group runtime
+		 * to 0 immediately before freeing it.
+		 */
+		if (tg->dl_se[i]->dl_runtime)
+			dl_init_tg(tg->dl_se[i], 0, tg->dl_se[i]->dl_period);
+
+		raw_spin_rq_lock_irqsave(cpu_rq(i), flags);
+		hrtimer_cancel(&tg->dl_se[i]->dl_timer);
+		raw_spin_rq_unlock_irqrestore(cpu_rq(i), flags);
+		kfree(tg->dl_se[i]);
+
+		/* Free the local per-cpu runqueue */
+		kfree(served_rq_of_rt_rq(tg->rt_rq[i]));
+	}
+
+	kfree(tg->rt_rq);
+	kfree(tg->dl_se);
+}
+
+static struct task_struct *rt_server_pick(struct sched_dl_entity *dl_se, struct rq_flags *rf)
+{
+	return NULL;
+}
+
+static inline void __rt_rq_free(struct rt_rq **rt_rq)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		kfree(served_rq_of_rt_rq(rt_rq[i]));
+	}
+
+	kfree(rt_rq);
+}
+
+DEFINE_FREE(rt_rq_free, struct rt_rq **, if (_T) __rt_rq_free(_T))
+
+static inline void __dl_se_free(struct sched_dl_entity **dl_se)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		kfree(dl_se[i]);
+	}
+
+	kfree(dl_se);
+}
+
+DEFINE_FREE(dl_se_free, struct sched_dl_entity **, if (_T) __dl_se_free(_T))
+
+static int __alloc_rt_sched_group_data(struct task_group *tg) {
+	/* Instantiate automatic cleanup in event of kalloc fail */
+	struct rt_rq **tg_rt_rq __free(rt_rq_free) = NULL;
+	struct sched_dl_entity **tg_dl_se __free(dl_se_free) = NULL;
+	struct sched_dl_entity *dl_se __free(kfree) = NULL;
+	struct rq *s_rq __free(kfree) = NULL;
+	int i;
+
+	tg_rt_rq = kcalloc(nr_cpu_ids, sizeof(struct rt_rq *), GFP_KERNEL);
+	if (!tg_rt_rq)
+		return 0;
+
+	tg_dl_se = kcalloc(nr_cpu_ids,
+			   sizeof(struct sched_dl_entity *), GFP_KERNEL);
+	if (!tg_dl_se)
+		return 0;
+
+	for_each_possible_cpu(i) {
+		s_rq = kzalloc_node(sizeof(struct rq),
+				    GFP_KERNEL, cpu_to_node(i));
+		if (!s_rq)
+			return 0;
+
+		dl_se = kzalloc_node(sizeof(struct sched_dl_entity),
+				     GFP_KERNEL, cpu_to_node(i));
+		if (!dl_se)
+			return 0;
+
+		tg_rt_rq[i] = &no_free_ptr(s_rq)->rt;
+		tg_dl_se[i] = no_free_ptr(dl_se);
+	}
+
+	tg->rt_rq = no_free_ptr(tg_rt_rq);
+	tg->dl_se = no_free_ptr(tg_dl_se);
+
+	return 1;
 }

 int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
 {
+	struct sched_dl_entity *dl_se;
+	struct rq *s_rq;
+	int i;
+
 	if (!rt_group_sched_enabled())
 		return 1;

+	/* Allocate all necessary resources beforehand */
+	if (!__alloc_rt_sched_group_data(tg))
+		return 0;
+
+	/* Initialize the allocated resources now. */
+	init_dl_bandwidth(&tg->dl_bandwidth, 0, 0);
+
+	for_each_possible_cpu(i) {
+		s_rq = served_rq_of_rt_rq(tg->rt_rq[i]);
+		dl_se = tg->dl_se[i];
+
+		init_rt_rq(&s_rq->rt);
+		s_rq->cpu = i;
+		s_rq->rt.tg = tg;
+
+		init_dl_entity(dl_se);
+		dl_se->dl_runtime = tg->dl_bandwidth.dl_runtime;
+		dl_se->dl_deadline = tg->dl_bandwidth.dl_period;
+		dl_se->dl_period = tg->dl_bandwidth.dl_period;
+		dl_se->runtime = 0;
+		dl_se->deadline = 0;
+		dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
+		dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime);
+		dl_se->dl_server = 1;
+		dl_server_init(dl_se, &cpu_rq(i)->dl, s_rq, rt_server_pick);
+	}
+
 	return 1;
 }

-#else /* !CONFIG_RT_GROUP_SCHED: */
+#else /* !CONFIG_RT_GROUP_SCHED */

 void unregister_rt_sched_group(struct task_group *tg) { }

@@ -115,7 +262,7 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
 {
 	return 1;
 }
-#endif /* !CONFIG_RT_GROUP_SCHED */
+#endif /* CONFIG_RT_GROUP_SCHED */

 static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
 {
--
2.53.0

next prev parent reply	other threads:[~2026-04-30 21:38 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-30 21:38 [RFC PATCH v5 00/29] Hierarchical Constant Bandwidth Server Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 01/29] sched/deadline: Fix replenishment logic for non-deferred servers Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 02/29] sched/deadline: Do not access dl_se->rq directly Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 03/29] sched/deadline: Distinguish between dl_rq and my_q Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 04/29] sched/rt: Pass an rt_rq instead of an rq where needed Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 05/29] sched/rt: Move functions from rt.c to sched.h Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 06/29] sched/rt: Disable RT_GROUP_SCHED Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 07/29] sched/rt: Remove unnecessary runqueue pointer in struct rt_rq Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 08/29] sched/rt: Introduce HCBS specific structs in task_group Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 09/29] sched/core: Initialize HCBS specific structures Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 10/29] sched/deadline: Add dl_init_tg Yuri Andriaccio
2026-04-30 21:38 ` Yuri Andriaccio [this message]
2026-04-30 21:38 ` [RFC PATCH v5 12/29] sched/deadline: Account rt-cgroups bandwidth in deadline tasks schedulability tests Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 13/29] sched/rt: Implement dl-server operations for rt-cgroups Yuri Andriaccio
2026-05-05 13:04   ` Peter Zijlstra
2026-04-30 21:38 ` [RFC PATCH v5 14/29] sched/rt: Update task event callbacks for HCBS scheduling Yuri Andriaccio
2026-05-05 13:16   ` Peter Zijlstra
2026-04-30 21:38 ` [RFC PATCH v5 15/29] sched/rt: Update rt-cgroup schedulability checks Yuri Andriaccio
2026-05-05 14:36   ` Peter Zijlstra
2026-04-30 21:38 ` [RFC PATCH v5 16/29] sched/rt: Allow zeroing the runtime of the root control group Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 17/29] sched/rt: Remove old RT_GROUP_SCHED data structures Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 18/29] sched/core: Cgroup v2 support Yuri Andriaccio
2026-05-05 14:59   ` Peter Zijlstra
2026-04-30 21:38 ` [RFC PATCH v5 19/29] sched/rt: Remove support for cgroups-v1 Yuri Andriaccio
2026-05-05 15:01   ` Peter Zijlstra
2026-04-30 21:38 ` [RFC PATCH v5 20/29] sched/deadline: Allow deeper hierarchies of RT cgroups Yuri Andriaccio
2026-05-05 15:15   ` Peter Zijlstra
2026-05-05 19:56     ` Tejun Heo
2026-04-30 21:38 ` [RFC PATCH v5 21/29] sched/rt: Update default bandwidth for real-time tasks to ONE Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 22/29] sched/rt: Add rt-cgroup migration functions Yuri Andriaccio
2026-05-05 15:20   ` Peter Zijlstra
2026-05-05 15:24   ` Peter Zijlstra
2026-04-30 21:38 ` [RFC PATCH v5 23/29] sched/rt: Hook HCBS " Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 24/29] sched/core: Execute enqueued balance callbacks when changing allowed CPUs Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 25/29] sched/rt: Try pull task on empty server pick Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 26/29] sched/core: Execute enqueued balance callbacks after migrate_disable_switch Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 27/29] Documentation: Update documentation for real-time cgroups Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 28/29] sched/rt: Add debug BUG_ONs for pre-migration code Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 29/29] sched/rt: Add debug BUG_ONs in migration code Yuri Andriaccio

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:741fac9f57a dfblob:3d7f2b2ebe6 )
 OR (
bs:"[RFC PATCH v5 11/29] sched/rt: Add {alloc/unregister/free}_rt_sched_group" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260430213835.62217-12-yurand2000@gmail.com \
    --to=yurand2000@gmail.com \
    --cc=bsegall@google.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=juri.lelli@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luca.abeni@santannapisa.it \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=vincent.guittot@linaro.org \
    --cc=vschneid@redhat.com \
    --cc=yuri.andriaccio@santannapisa.it \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox