[RFC PATCH v5 13/29] sched/rt: Implement dl-server operations for rt-cgroups

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Yuri Andriaccio <yurand2000@gmail.com>
To: Ingo Molnar <mingo@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Juri Lelli <juri.lelli@redhat.com>,
	Vincent Guittot <vincent.guittot@linaro.org>,
	Dietmar Eggemann <dietmar.eggemann@arm.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Ben Segall <bsegall@google.com>, Mel Gorman <mgorman@suse.de>,
	Valentin Schneider <vschneid@redhat.com>
Cc: linux-kernel@vger.kernel.org,
	Luca Abeni <luca.abeni@santannapisa.it>,
	Yuri Andriaccio <yuri.andriaccio@santannapisa.it>
Subject: [RFC PATCH v5 13/29] sched/rt: Implement dl-server operations for rt-cgroups
Date: Thu, 30 Apr 2026 23:38:17 +0200	[thread overview]
Message-ID: <20260430213835.62217-14-yurand2000@gmail.com> (raw)
In-Reply-To: <20260430213835.62217-1-yurand2000@gmail.com>

Implement rt_server_pick, the callback that deadline servers use to
pick a task to schedule.
  rt_server_pick(): pick the next runnable rt task and tell the
  scheduler that it is going to be scheduled next.

Let enqueue_task_rt function start the attached deadline server when the
first task is enqueued on a specific rq/server.
  The server is not symmetrically stopped in dequeue_task_rt as it is
  stopped when server_pick_task returns NULL (see deadline.c).

Change update_curr_rt to perform a deadline server update if the
updated task is served by non-root group.

Update inc/dec_dl_tasks to account the number of active tasks in the
local runqueue for rt-cgroups servers, as their local runqueue is
different from the global runqueue, and thus when a rt-group server is
activated/deactivated, the number of served tasks' must be
added/removed. This uses nr_running to be compatible with future
dl-server interfaces. Account also the deadline server so that it is
picked for shutdown when its runqueue is empty (future patches will
try to pull tasks before stopping).

Update inc/dec_rt_prio_smp to change a rq's cpupri only if the rt_rq
is the global runqueue, since cgroups are scheduled via their
dl-server priority.

Update inc/dec_rt_tasks to account for waking/sleeping tasks on the
global runqueue, when the task runs on the root cgroup, or its local
dl server is active. The accounting is not done when servers are
throttled, as they will add/sub the number of tasks running when they
get enqueued/dequeued. For rt cgroups, account for the number of active
tasks in the nr_running field of the local runqueue (add/sub_nr_running),
as this number is used when a dl server is enqueued/dequeued.

Update set_task_rq to record the dl_rq, tracking which deadline
server manages a task.

Update set_task_rq to not use the parent field anymore, as it is
unused by this patchset's code. Remove the unused parent field from
sched_rt_entity.

Co-developed-by: Alessio Balsini <a.balsini@sssup.it>
Signed-off-by: Alessio Balsini <a.balsini@sssup.it>
Co-developed-by: Andrea Parri <parri.andrea@gmail.com>
Signed-off-by: Andrea Parri <parri.andrea@gmail.com>
Co-developed-by: luca abeni <luca.abeni@santannapisa.it>
Signed-off-by: luca abeni <luca.abeni@santannapisa.it>
Signed-off-by: Yuri Andriaccio <yurand2000@gmail.com>
---
 include/linux/sched.h   |  1 -
 kernel/sched/deadline.c |  8 ++++++
 kernel/sched/rt.c       | 60 ++++++++++++++++++++++++++++++++++++++---
 kernel/sched/sched.h    |  8 +++++-
 4 files changed, 71 insertions(+), 6 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index eb8b57f689b5..ea2e74598b93 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -630,7 +630,6 @@ struct sched_rt_entity {

 	struct sched_rt_entity		*back;
 #ifdef CONFIG_RT_GROUP_SCHED
-	struct sched_rt_entity		*parent;
 	/* rq on which this entity is (to be) queued: */
 	struct rt_rq			*rt_rq;
 	/* rq "owned" by this entity/group: */
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 084af1d375b5..c82810732106 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -2093,6 +2093,10 @@ void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)

 	if (!dl_server(dl_se))
 		add_nr_running(rq_of_dl_rq(dl_rq), 1);
+	else if (rq_of_dl_se(dl_se) != dl_se->my_q) {
+		WARN_ON(dl_se->my_q->rt.rt_nr_running != dl_se->my_q->nr_running);
+		add_nr_running(rq_of_dl_rq(dl_rq), dl_se->my_q->nr_running + 1);
+	}

 	inc_dl_deadline(dl_rq, deadline);
 }
@@ -2105,6 +2109,10 @@ void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)

 	if (!dl_server(dl_se))
 		sub_nr_running(rq_of_dl_rq(dl_rq), 1);
+	else if (rq_of_dl_se(dl_se) != dl_se->my_q) {
+		WARN_ON(dl_se->my_q->rt.rt_nr_running != dl_se->my_q->nr_running);
+		sub_nr_running(rq_of_dl_rq(dl_rq), dl_se->my_q->nr_running - 1);
+	}

 	dec_dl_deadline(dl_rq, dl_se->deadline);
 }
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 3d7f2b2ebe60..defb812b0e48 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -144,9 +144,22 @@ void free_rt_sched_group(struct task_group *tg)
 	kfree(tg->dl_se);
 }

+static struct sched_rt_entity *pick_next_rt_entity(struct rt_rq *rt_rq);
+static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool first);
+
 static struct task_struct *rt_server_pick(struct sched_dl_entity *dl_se, struct rq_flags *rf)
 {
-	return NULL;
+	struct rt_rq *rt_rq = &dl_se->my_q->rt;
+	struct rq *rq = rq_of_rt_rq(rt_rq);
+	struct task_struct *p;
+
+	if (!sched_rt_runnable(dl_se->my_q))
+		return NULL;
+
+	p = rt_task_of(pick_next_rt_entity(rt_rq));
+	set_next_task_rt(rq, p, true);
+
+	return p;
 }

 static inline void __rt_rq_free(struct rt_rq **rt_rq)
@@ -462,6 +475,7 @@ static inline int rt_se_prio(struct sched_rt_entity *rt_se)
 static void update_curr_rt(struct rq *rq)
 {
 	struct task_struct *donor = rq->donor;
+	struct rt_rq *rt_rq;
 	s64 delta_exec;

 	if (donor->sched_class != &rt_sched_class)
@@ -471,8 +485,18 @@ static void update_curr_rt(struct rq *rq)
 	if (unlikely(delta_exec <= 0))
 		return;

-	if (!rt_bandwidth_enabled())
+	if (!rt_group_sched_enabled())
 		return;
+
+	if (!dl_bandwidth_enabled())
+		return;
+
+	rt_rq = rt_rq_of_se(&donor->rt);
+	if (is_dl_group(rt_rq)) {
+		struct sched_dl_entity *dl_se = dl_group_of(rt_rq);
+
+		dl_server_update(dl_se, delta_exec);
+	}
 }

 static void
@@ -483,7 +507,7 @@ inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
 	/*
 	 * Change rq's cpupri only if rt_rq is the top queue.
 	 */
-	if (IS_ENABLED(CONFIG_RT_GROUP_SCHED) && &rq->rt != rt_rq)
+	if (IS_ENABLED(CONFIG_RT_GROUP_SCHED) && is_dl_group(rt_rq))
 		return;

 	if (rq->online && prio < prev_prio)
@@ -498,7 +522,7 @@ dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
 	/*
 	 * Change rq's cpupri only if rt_rq is the top queue.
 	 */
-	if (IS_ENABLED(CONFIG_RT_GROUP_SCHED) && &rq->rt != rt_rq)
+	if (IS_ENABLED(CONFIG_RT_GROUP_SCHED) && is_dl_group(rt_rq))
 		return;

 	if (rq->online && rt_rq->highest_prio.curr != prev_prio)
@@ -561,6 +585,16 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 	rt_rq->rr_nr_running += is_rr_task(rt_se);

 	inc_rt_prio(rt_rq, rt_se_prio(rt_se));
+
+	if (rt_group_sched_enabled() && is_dl_group(rt_rq)) {
+		struct sched_dl_entity *dl_se = dl_group_of(rt_rq);
+
+		if (!dl_se->dl_throttled)
+			add_nr_running(rq_of_rt_rq(rt_rq), 1);
+		add_nr_running(served_rq_of_rt_rq(rt_rq), 1);
+	} else {
+		add_nr_running(rq_of_rt_rq(rt_rq), 1);
+	}
 }

 static inline
@@ -571,6 +605,16 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 	rt_rq->rr_nr_running -= is_rr_task(rt_se);

 	dec_rt_prio(rt_rq, rt_se_prio(rt_se));
+
+	if (rt_group_sched_enabled() && is_dl_group(rt_rq)) {
+		struct sched_dl_entity *dl_se = dl_group_of(rt_rq);
+
+		if (!dl_se->dl_throttled)
+			sub_nr_running(rq_of_rt_rq(rt_rq), 1);
+		sub_nr_running(served_rq_of_rt_rq(rt_rq), 1);
+	} else {
+		sub_nr_running(rq_of_rt_rq(rt_rq), 1);
+	}
 }

 /*
@@ -752,6 +796,14 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 	check_schedstat_required();
 	update_stats_wait_start_rt(rt_rq_of_se(rt_se), rt_se);

+	/* Task arriving in an idle group of tasks. */
+	if (rt_group_sched_enabled() &&
+	    is_dl_group(rt_rq) && rt_rq->rt_nr_running == 0) {
+		struct sched_dl_entity *dl_se = dl_group_of(rt_rq);
+
+		dl_server_start(dl_se);
+	}
+
 	enqueue_rt_entity(rt_se, flags);

 	if (task_is_blocked(p))
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ca69d2132061..d949babfe16a 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2292,7 +2292,7 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
 	if (!rt_group_sched_enabled())
 		tg = &root_task_group;
 	p->rt.rt_rq  = tg->rt_rq[cpu];
-	p->rt.parent = tg->rt_se[cpu];
+	p->dl.dl_rq  = &cpu_rq(cpu)->dl;
 #endif /* CONFIG_RT_GROUP_SCHED */
 }

@@ -2954,6 +2954,9 @@ static inline void add_nr_running(struct rq *rq, unsigned count)
 	unsigned prev_nr = rq->nr_running;

 	rq->nr_running = prev_nr + count;
+	if (rq != cpu_rq(rq->cpu))
+		return;
+
 	if (trace_sched_update_nr_running_tp_enabled()) {
 		call_trace_sched_update_nr_running(rq, count);
 	}
@@ -2967,6 +2970,9 @@ static inline void add_nr_running(struct rq *rq, unsigned count)
 static inline void sub_nr_running(struct rq *rq, unsigned count)
 {
 	rq->nr_running -= count;
+	if (rq != cpu_rq(rq->cpu))
+		return;
+
 	if (trace_sched_update_nr_running_tp_enabled()) {
 		call_trace_sched_update_nr_running(rq, -count);
 	}
--
2.53.0

next prev parent reply	other threads:[~2026-04-30 21:39 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-30 21:38 [RFC PATCH v5 00/29] Hierarchical Constant Bandwidth Server Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 01/29] sched/deadline: Fix replenishment logic for non-deferred servers Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 02/29] sched/deadline: Do not access dl_se->rq directly Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 03/29] sched/deadline: Distinguish between dl_rq and my_q Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 04/29] sched/rt: Pass an rt_rq instead of an rq where needed Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 05/29] sched/rt: Move functions from rt.c to sched.h Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 06/29] sched/rt: Disable RT_GROUP_SCHED Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 07/29] sched/rt: Remove unnecessary runqueue pointer in struct rt_rq Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 08/29] sched/rt: Introduce HCBS specific structs in task_group Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 09/29] sched/core: Initialize HCBS specific structures Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 10/29] sched/deadline: Add dl_init_tg Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 11/29] sched/rt: Add {alloc/unregister/free}_rt_sched_group Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 12/29] sched/deadline: Account rt-cgroups bandwidth in deadline tasks schedulability tests Yuri Andriaccio
2026-04-30 21:38 ` Yuri Andriaccio [this message]
2026-05-05 13:04   ` [RFC PATCH v5 13/29] sched/rt: Implement dl-server operations for rt-cgroups Peter Zijlstra
2026-04-30 21:38 ` [RFC PATCH v5 14/29] sched/rt: Update task event callbacks for HCBS scheduling Yuri Andriaccio
2026-05-05 13:16   ` Peter Zijlstra
2026-04-30 21:38 ` [RFC PATCH v5 15/29] sched/rt: Update rt-cgroup schedulability checks Yuri Andriaccio
2026-05-05 14:36   ` Peter Zijlstra
2026-04-30 21:38 ` [RFC PATCH v5 16/29] sched/rt: Allow zeroing the runtime of the root control group Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 17/29] sched/rt: Remove old RT_GROUP_SCHED data structures Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 18/29] sched/core: Cgroup v2 support Yuri Andriaccio
2026-05-05 14:59   ` Peter Zijlstra
2026-04-30 21:38 ` [RFC PATCH v5 19/29] sched/rt: Remove support for cgroups-v1 Yuri Andriaccio
2026-05-05 15:01   ` Peter Zijlstra
2026-04-30 21:38 ` [RFC PATCH v5 20/29] sched/deadline: Allow deeper hierarchies of RT cgroups Yuri Andriaccio
2026-05-05 15:15   ` Peter Zijlstra
2026-05-05 19:56     ` Tejun Heo
2026-04-30 21:38 ` [RFC PATCH v5 21/29] sched/rt: Update default bandwidth for real-time tasks to ONE Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 22/29] sched/rt: Add rt-cgroup migration functions Yuri Andriaccio
2026-05-05 15:20   ` Peter Zijlstra
2026-05-05 15:24   ` Peter Zijlstra
2026-04-30 21:38 ` [RFC PATCH v5 23/29] sched/rt: Hook HCBS " Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 24/29] sched/core: Execute enqueued balance callbacks when changing allowed CPUs Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 25/29] sched/rt: Try pull task on empty server pick Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 26/29] sched/core: Execute enqueued balance callbacks after migrate_disable_switch Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 27/29] Documentation: Update documentation for real-time cgroups Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 28/29] sched/rt: Add debug BUG_ONs for pre-migration code Yuri Andriaccio
2026-04-30 21:38 ` [RFC PATCH v5 29/29] sched/rt: Add debug BUG_ONs in migration code Yuri Andriaccio

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:eb8b57f689b dfblob:ea2e74598b9 dfblob:084af1d375b
dfblob:c8281073210 dfblob:3d7f2b2ebe6 dfblob:defb812b0e4
dfblob:ca69d213206 dfblob:d949babfe16 )
 OR (
bs:"[RFC PATCH v5 13/29] sched/rt: Implement dl-server operations for rt-cgroups" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260430213835.62217-14-yurand2000@gmail.com \
    --to=yurand2000@gmail.com \
    --cc=bsegall@google.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=juri.lelli@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luca.abeni@santannapisa.it \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=vincent.guittot@linaro.org \
    --cc=vschneid@redhat.com \
    --cc=yuri.andriaccio@santannapisa.it \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox