From: Andrea Righi <arighi@nvidia.com>
To: Ingo Molnar <mingo@redhat.com>,
Peter Zijlstra <peterz@infradead.org>,
Juri Lelli <juri.lelli@redhat.com>,
Vincent Guittot <vincent.guittot@linaro.org>,
Dietmar Eggemann <dietmar.eggemann@arm.com>,
Steven Rostedt <rostedt@goodmis.org>,
Ben Segall <bsegall@google.com>, Mel Gorman <mgorman@suse.de>,
Valentin Schneider <vschneid@redhat.com>,
Joel Fernandes <joelagnelf@nvidia.com>, Tejun Heo <tj@kernel.org>,
David Vernet <void@manifault.com>,
Changwoo Min <changwoo@igalia.com>, Shuah Khan <shuah@kernel.org>
Cc: sched-ext@lists.linux.dev, bpf@vger.kernel.org,
linux-kernel@vger.kernel.org
Subject: [PATCH 12/14] sched_ext: Selectively enable ext and fair DL servers
Date: Fri, 17 Oct 2025 11:25:59 +0200 [thread overview]
Message-ID: <20251017093214.70029-13-arighi@nvidia.com> (raw)
In-Reply-To: <20251017093214.70029-1-arighi@nvidia.com>
Enable or disable the appropriate DL servers (ext and fair) depending on
whether an scx scheduler is started in full or partial mode:
- in full mode, disable the fair DL server and enable the ext DL server
on all online CPUs,
- in partial mode (%SCX_OPS_SWITCH_PARTIAL), keep both fair and ext DL
servers active to support tasks in both scheduling classes.
Additionally, handle CPU hotplug events by selectively enabling or
disabling the relevant DL servers on the CPU that is going
offline/online. This ensures correct bandwidth reservation also when
CPUs are brought online or offline.
Co-developed-by: Joel Fernandes <joelagnelf@nvidia.com>
Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
Signed-off-by: Andrea Righi <arighi@nvidia.com>
---
kernel/sched/ext.c | 97 +++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 87 insertions(+), 10 deletions(-)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index bc2aaa3236fd4..c5f3c39972b6b 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -2545,6 +2545,57 @@ static void set_cpus_allowed_scx(struct task_struct *p,
p, (struct cpumask *)p->cpus_ptr);
}
+static void dl_server_on(struct rq *rq, bool switch_all)
+{
+ struct rq_flags rf;
+ int err;
+
+ rq_lock_irqsave(rq, &rf);
+ update_rq_clock(rq);
+
+ if (switch_all) {
+ /*
+ * If all fair tasks are moved to the scx scheduler, we
+ * don't need the fair DL servers anymore, so remove it.
+ *
+ * When the current scx scheduler is unloaded, the fair DL
+ * server will be re-initialized.
+ */
+ if (dl_server_active(&rq->fair_server))
+ dl_server_stop(&rq->fair_server);
+ dl_server_remove_params(&rq->fair_server);
+ }
+
+ err = dl_server_init_params(&rq->ext_server);
+ WARN_ON_ONCE(err);
+
+ rq_unlock_irqrestore(rq, &rf);
+}
+
+static void dl_server_off(struct rq *rq, bool switch_all)
+{
+ struct rq_flags rf;
+ int err;
+
+ rq_lock_irqsave(rq, &rf);
+ update_rq_clock(rq);
+
+ if (dl_server_active(&rq->ext_server))
+ dl_server_stop(&rq->ext_server);
+ dl_server_remove_params(&rq->ext_server);
+
+ if (switch_all) {
+ /*
+ * Re-initialize the fair DL server if it was previously disabled
+ * because all fair tasks had been moved to the ext class.
+ */
+ err = dl_server_init_params(&rq->fair_server);
+ WARN_ON_ONCE(err);
+ }
+
+ rq_unlock_irqrestore(rq, &rf);
+}
+
static void handle_hotplug(struct rq *rq, bool online)
{
struct scx_sched *sch = scx_root;
@@ -2560,9 +2611,20 @@ static void handle_hotplug(struct rq *rq, bool online)
if (unlikely(!sch))
return;
- if (scx_enabled())
+ if (scx_enabled()) {
+ bool is_switching_all = READ_ONCE(scx_switching_all);
+
scx_idle_update_selcpu_topology(&sch->ops);
+ /*
+ * Update ext and fair DL servers on hotplug events.
+ */
+ if (online)
+ dl_server_on(rq, is_switching_all);
+ else
+ dl_server_off(rq, is_switching_all);
+ }
+
if (online && SCX_HAS_OP(sch, cpu_online))
SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cpu_online, NULL, cpu);
else if (!online && SCX_HAS_OP(sch, cpu_offline))
@@ -3921,6 +3983,7 @@ static void scx_disable_workfn(struct kthread_work *work)
struct scx_exit_info *ei = sch->exit_info;
struct scx_task_iter sti;
struct task_struct *p;
+ bool is_switching_all = READ_ONCE(scx_switching_all);
int kind, cpu;
kind = atomic_read(&sch->exit_kind);
@@ -3976,6 +4039,22 @@ static void scx_disable_workfn(struct kthread_work *work)
scx_init_task_enabled = false;
+ for_each_online_cpu(cpu) {
+ struct rq *rq = cpu_rq(cpu);
+
+ /*
+ * Invalidate all the rq clocks to prevent getting outdated
+ * rq clocks from a previous scx scheduler.
+ */
+ scx_rq_clock_invalidate(rq);
+
+ /*
+ * We are unloading the sched_ext scheduler, we do not need its
+ * DL server bandwidth anymore, remove it for all CPUs.
+ */
+ dl_server_off(rq, is_switching_all);
+ }
+
scx_task_iter_start(&sti);
while ((p = scx_task_iter_next_locked(&sti))) {
unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
@@ -3997,15 +4076,6 @@ static void scx_disable_workfn(struct kthread_work *work)
scx_task_iter_stop(&sti);
percpu_up_write(&scx_fork_rwsem);
- /*
- * Invalidate all the rq clocks to prevent getting outdated
- * rq clocks from a previous scx scheduler.
- */
- for_each_possible_cpu(cpu) {
- struct rq *rq = cpu_rq(cpu);
- scx_rq_clock_invalidate(rq);
- }
-
/* no task is on scx, turn off all the switches and flush in-progress calls */
static_branch_disable(&__scx_enabled);
bitmap_zero(sch->has_op, SCX_OPI_END);
@@ -4778,6 +4848,13 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
put_task_struct(p);
}
scx_task_iter_stop(&sti);
+
+ /*
+ * Enable the ext DL server on all online CPUs.
+ */
+ for_each_online_cpu(cpu)
+ dl_server_on(cpu_rq(cpu), !(ops->flags & SCX_OPS_SWITCH_PARTIAL));
+
percpu_up_write(&scx_fork_rwsem);
scx_bypass(false);
--
2.51.0
next prev parent reply other threads:[~2025-10-17 9:34 UTC|newest]
Thread overview: 45+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-17 9:25 [PATCHSET v9 sched_ext/for-6.19] Add a deadline server for sched_ext tasks Andrea Righi
2025-10-17 9:25 ` [PATCH 01/14] sched/debug: Fix updating of ppos on server write ops Andrea Righi
2025-10-20 8:36 ` Juri Lelli
2025-10-17 9:25 ` [PATCH 02/14] sched/debug: Stop and start server based on if it was active Andrea Righi
2025-10-20 9:12 ` Juri Lelli
2025-10-20 9:27 ` Juri Lelli
2025-10-17 9:25 ` [PATCH 03/14] sched/deadline: Clear the defer params Andrea Righi
2025-10-17 9:25 ` [PATCH 04/14] sched/deadline: Return EBUSY if dl_bw_cpus is zero Andrea Righi
2025-10-20 9:49 ` Juri Lelli
2025-10-20 13:38 ` Andrea Righi
2025-10-20 14:03 ` Andrea Righi
2025-10-20 14:12 ` Juri Lelli
2025-10-17 9:25 ` [PATCH 05/14] sched: Add a server arg to dl_server_update_idle_time() Andrea Righi
2025-10-20 9:54 ` Juri Lelli
2025-10-20 12:49 ` Peter Zijlstra
2025-10-17 9:25 ` [PATCH 06/14] sched_ext: Add a DL server for sched_ext tasks Andrea Righi
2025-10-17 15:40 ` Tejun Heo
2025-10-17 19:00 ` Andrea Righi
2025-10-17 15:47 ` Tejun Heo
2025-10-17 18:58 ` Andrea Righi
2025-10-17 19:04 ` Tejun Heo
2025-10-17 19:06 ` Andrea Righi
2025-10-20 11:58 ` Juri Lelli
2025-10-20 13:50 ` Andrea Righi
2025-10-20 14:09 ` Juri Lelli
2025-10-17 9:25 ` [PATCH 07/14] sched/debug: Add support to change sched_ext server params Andrea Righi
2025-10-20 12:45 ` Juri Lelli
2025-10-21 6:23 ` Andrea Righi
2025-10-17 9:25 ` [PATCH 08/14] sched/deadline: Add support to remove DL server's bandwidth contribution Andrea Righi
2025-10-20 13:46 ` Juri Lelli
2025-10-17 9:25 ` [PATCH 09/14] sched/deadline: Account ext server bandwidth Andrea Righi
2025-10-17 9:25 ` [PATCH 10/14] sched/deadline: Allow to initialize DL server when needed Andrea Righi
2025-10-17 9:25 ` [PATCH 11/14] sched/deadline: Fix DL server crash in inactive_timer callback Andrea Righi
2025-10-17 9:25 ` Andrea Righi [this message]
2025-10-17 9:26 ` [PATCH 13/14] selftests/sched_ext: Add test for sched_ext dl_server Andrea Righi
2025-10-19 19:04 ` Emil Tsalapatis
2025-10-20 13:22 ` Andrea Righi
2025-10-20 13:44 ` Andrea Righi
2025-10-20 13:26 ` Christian Loehle
2025-10-20 13:55 ` Andrea Righi
2025-10-20 14:00 ` Andrea Righi
2025-10-20 14:21 ` Christian Loehle
2025-10-23 15:01 ` Christian Loehle
2025-10-23 15:11 ` Andrea Righi
2025-10-17 9:26 ` [PATCH 14/14] selftests/sched_ext: Add test for DL server total_bw consistency Andrea Righi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251017093214.70029-13-arighi@nvidia.com \
--to=arighi@nvidia.com \
--cc=bpf@vger.kernel.org \
--cc=bsegall@google.com \
--cc=changwoo@igalia.com \
--cc=dietmar.eggemann@arm.com \
--cc=joelagnelf@nvidia.com \
--cc=juri.lelli@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mgorman@suse.de \
--cc=mingo@redhat.com \
--cc=peterz@infradead.org \
--cc=rostedt@goodmis.org \
--cc=sched-ext@lists.linux.dev \
--cc=shuah@kernel.org \
--cc=tj@kernel.org \
--cc=vincent.guittot@linaro.org \
--cc=void@manifault.com \
--cc=vschneid@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox