From: Tejun Heo <tj@kernel.org>
To: linux-kernel@vger.kernel.org, sched-ext@lists.linux.dev
Cc: void@manifault.com, arighi@nvidia.com, changwoo@igalia.com,
emil@etsalapatis.com, hannes@cmpxchg.org, mkoutny@suse.com,
cgroups@vger.kernel.org, Tejun Heo <tj@kernel.org>
Subject: [PATCH 20/34] sched_ext: Factor out scx_dispatch_sched()
Date: Tue, 24 Feb 2026 19:00:55 -1000 [thread overview]
Message-ID: <20260225050109.1070059-21-tj@kernel.org> (raw)
In-Reply-To: <20260225050109.1070059-1-tj@kernel.org>
In preparation of multiple scheduler support, factor out
scx_dispatch_sched() from balance_one(). The function boundary makes
remembering $prev_on_scx and $prev_on_rq less useful. Open code $prev_on_scx
in balance_one() and $prev_on_rq in both balance_one() and
scx_dispatch_sched().
No functional changes.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
kernel/sched/ext.c | 123 ++++++++++++++++++++++++---------------------
1 file changed, 65 insertions(+), 58 deletions(-)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 2aab3ccbd3e3..99ef2a1cc3ac 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -2384,67 +2384,22 @@ static inline void maybe_queue_balance_callback(struct rq *rq)
rq->scx.flags &= ~SCX_RQ_BAL_CB_PENDING;
}
-static int balance_one(struct rq *rq, struct task_struct *prev)
+static bool scx_dispatch_sched(struct scx_sched *sch, struct rq *rq,
+ struct task_struct *prev)
{
- struct scx_sched *sch = scx_root;
struct scx_dsp_ctx *dspc = this_cpu_ptr(scx_dsp_ctx);
bool prev_on_scx = prev->sched_class == &ext_sched_class;
- bool prev_on_rq = prev->scx.flags & SCX_TASK_QUEUED;
int nr_loops = SCX_DSP_MAX_LOOPS;
s32 cpu = cpu_of(rq);
- lockdep_assert_rq_held(rq);
- rq->scx.flags |= SCX_RQ_IN_BALANCE;
- rq->scx.flags &= ~SCX_RQ_BAL_KEEP;
-
- if ((sch->ops.flags & SCX_OPS_HAS_CPU_PREEMPT) &&
- unlikely(rq->scx.cpu_released)) {
- /*
- * If the previous sched_class for the current CPU was not SCX,
- * notify the BPF scheduler that it again has control of the
- * core. This callback complements ->cpu_release(), which is
- * emitted in switch_class().
- */
- if (SCX_HAS_OP(sch, cpu_acquire))
- SCX_CALL_OP(sch, SCX_KF_REST, cpu_acquire, rq, cpu, NULL);
- rq->scx.cpu_released = false;
- }
-
- if (prev_on_scx) {
- update_curr_scx(rq);
-
- /*
- * If @prev is runnable & has slice left, it has priority and
- * fetching more just increases latency for the fetched tasks.
- * Tell pick_task_scx() to keep running @prev. If the BPF
- * scheduler wants to handle this explicitly, it should
- * implement ->cpu_release().
- *
- * See scx_disable_workfn() for the explanation on the bypassing
- * test.
- */
- if (prev_on_rq && prev->scx.slice && !scx_bypassing(sch, cpu)) {
- rq->scx.flags |= SCX_RQ_BAL_KEEP;
- goto has_tasks;
- }
- }
-
- /* if there already are tasks to run, nothing to do */
- if (rq->scx.local_dsq.nr)
- goto has_tasks;
-
if (consume_global_dsq(sch, rq))
- goto has_tasks;
+ return true;
- if (scx_bypassing(sch, cpu)) {
- if (consume_dispatch_q(sch, rq, bypass_dsq(sch, cpu)))
- goto has_tasks;
- else
- goto no_tasks;
- }
+ if (scx_bypassing(sch, cpu))
+ return consume_dispatch_q(sch, rq, bypass_dsq(sch, cpu));
if (unlikely(!SCX_HAS_OP(sch, dispatch)) || !scx_rq_online(rq))
- goto no_tasks;
+ return false;
dspc->rq = rq;
@@ -2463,14 +2418,14 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
flush_dispatch_buf(sch, rq);
- if (prev_on_rq && prev->scx.slice) {
+ if ((prev->scx.flags & SCX_TASK_QUEUED) && prev->scx.slice) {
rq->scx.flags |= SCX_RQ_BAL_KEEP;
- goto has_tasks;
+ return true;
}
if (rq->scx.local_dsq.nr)
- goto has_tasks;
+ return true;
if (consume_global_dsq(sch, rq))
- goto has_tasks;
+ return true;
/*
* ops.dispatch() can trap us in this loop by repeatedly
@@ -2479,7 +2434,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
* balance(), we want to complete this scheduling cycle and then
* start a new one. IOW, we want to call resched_curr() on the
* next, most likely idle, task, not the current one. Use
- * scx_kick_cpu() for deferred kicking.
+ * __scx_bpf_kick_cpu() for deferred kicking.
*/
if (unlikely(!--nr_loops)) {
scx_kick_cpu(sch, cpu, 0);
@@ -2487,12 +2442,64 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
}
} while (dspc->nr_tasks);
-no_tasks:
+ return false;
+}
+
+static int balance_one(struct rq *rq, struct task_struct *prev)
+{
+ struct scx_sched *sch = scx_root;
+ s32 cpu = cpu_of(rq);
+
+ lockdep_assert_rq_held(rq);
+ rq->scx.flags |= SCX_RQ_IN_BALANCE;
+ rq->scx.flags &= ~SCX_RQ_BAL_KEEP;
+
+ if ((sch->ops.flags & SCX_OPS_HAS_CPU_PREEMPT) &&
+ unlikely(rq->scx.cpu_released)) {
+ /*
+ * If the previous sched_class for the current CPU was not SCX,
+ * notify the BPF scheduler that it again has control of the
+ * core. This callback complements ->cpu_release(), which is
+ * emitted in switch_class().
+ */
+ if (SCX_HAS_OP(sch, cpu_acquire))
+ SCX_CALL_OP(sch, SCX_KF_REST, cpu_acquire, rq, cpu, NULL);
+ rq->scx.cpu_released = false;
+ }
+
+ if (prev->sched_class == &ext_sched_class) {
+ update_curr_scx(rq);
+
+ /*
+ * If @prev is runnable & has slice left, it has priority and
+ * fetching more just increases latency for the fetched tasks.
+ * Tell pick_task_scx() to keep running @prev. If the BPF
+ * scheduler wants to handle this explicitly, it should
+ * implement ->cpu_release().
+ *
+ * See scx_disable_workfn() for the explanation on the bypassing
+ * test.
+ */
+ if ((prev->scx.flags & SCX_TASK_QUEUED) && prev->scx.slice &&
+ !scx_bypassing(sch, cpu)) {
+ rq->scx.flags |= SCX_RQ_BAL_KEEP;
+ goto has_tasks;
+ }
+ }
+
+ /* if there already are tasks to run, nothing to do */
+ if (rq->scx.local_dsq.nr)
+ goto has_tasks;
+
+ /* dispatch @sch */
+ if (scx_dispatch_sched(sch, rq, prev))
+ goto has_tasks;
+
/*
* Didn't find another task to run. Keep running @prev unless
* %SCX_OPS_ENQ_LAST is in effect.
*/
- if (prev_on_rq &&
+ if ((prev->scx.flags & SCX_TASK_QUEUED) &&
(!(sch->ops.flags & SCX_OPS_ENQ_LAST) || scx_bypassing(sch, cpu))) {
rq->scx.flags |= SCX_RQ_BAL_KEEP;
__scx_add_event(sch, SCX_EV_DISPATCH_KEEP_LAST, 1);
--
2.53.0
next prev parent reply other threads:[~2026-02-25 5:01 UTC|newest]
Thread overview: 49+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-25 5:00 [PATCHSET v2 sched_ext/for-7.1] sched_ext: Implement cgroup sub-scheduler support Tejun Heo
2026-02-25 5:00 ` [PATCH 01/34] sched_ext: Implement cgroup subtree iteration for scx_task_iter Tejun Heo
2026-02-25 5:00 ` [PATCH 02/34] sched_ext: Add @kargs to scx_fork() Tejun Heo
2026-02-25 5:00 ` [PATCH 03/34] sched/core: Swap the order between sched_post_fork() and cgroup_post_fork() Tejun Heo
2026-02-25 5:00 ` [PATCH 04/34] cgroup: Expose some cgroup helpers Tejun Heo
2026-02-25 5:00 ` [PATCH 05/34] sched_ext: Update p->scx.disallow warning in scx_init_task() Tejun Heo
2026-02-25 5:00 ` [PATCH 06/34] sched_ext: Reorganize enable/disable path for multi-scheduler support Tejun Heo
2026-02-25 5:00 ` [PATCH 07/34] sched_ext: Introduce cgroup sub-sched support Tejun Heo
2026-02-26 15:37 ` Andrea Righi
2026-02-27 20:14 ` Tejun Heo
2026-02-26 15:52 ` Andrea Righi
2026-02-27 19:51 ` Tejun Heo
2026-02-27 20:04 ` Tejun Heo
2026-02-25 5:00 ` [PATCH 08/34] sched_ext: Introduce scx_task_sched[_rcu]() Tejun Heo
2026-02-25 5:00 ` [PATCH 09/34] sched_ext: Introduce scx_prog_sched() Tejun Heo
2026-02-25 5:00 ` [PATCH 10/34] sched_ext: Enforce scheduling authority in dispatch and select_cpu operations Tejun Heo
2026-02-25 5:00 ` [PATCH 11/34] sched_ext: Enforce scheduler ownership when updating slice and dsq_vtime Tejun Heo
2026-02-26 15:13 ` Andrea Righi
2026-02-27 22:25 ` Tejun Heo
2026-02-27 23:50 ` Andrea Righi
2026-02-25 5:00 ` [PATCH 12/34] sched_ext: scx_dsq_move() should validate the task belongs to the right scheduler Tejun Heo
2026-02-25 5:00 ` [PATCH 13/34] sched_ext: Refactor task init/exit helpers Tejun Heo
2026-02-27 6:55 ` Andrea Righi
2026-02-27 19:50 ` Tejun Heo
2026-02-25 5:00 ` [PATCH 14/34] sched_ext: Make scx_prio_less() handle multiple schedulers Tejun Heo
2026-02-25 5:00 ` [PATCH 15/34] sched_ext: Move default slice to per-scheduler field Tejun Heo
2026-02-25 5:00 ` [PATCH 16/34] sched_ext: Move aborting flag " Tejun Heo
2026-02-25 5:00 ` [PATCH 17/34] sched_ext: Move bypass_dsq into scx_sched_pcpu Tejun Heo
2026-02-25 5:00 ` [PATCH 18/34] sched_ext: Move bypass state into scx_sched Tejun Heo
2026-02-25 5:00 ` [PATCH 19/34] sched_ext: Prepare bypass mode for hierarchical operation Tejun Heo
2026-02-25 5:00 ` Tejun Heo [this message]
2026-02-25 5:00 ` [PATCH 21/34] sched_ext: When calling ops.dispatch() @prev must be on the same scx_sched Tejun Heo
2026-02-25 5:00 ` [PATCH 22/34] sched_ext: Separate bypass dispatch enabling from bypass depth tracking Tejun Heo
2026-02-25 5:00 ` [PATCH 23/34] sched_ext: Implement hierarchical bypass mode Tejun Heo
2026-02-25 5:00 ` [PATCH 24/34] sched_ext: Dispatch from all scx_sched instances Tejun Heo
2026-02-25 5:01 ` [PATCH 25/34] sched_ext: Move scx_dsp_ctx and scx_dsp_max_batch into scx_sched Tejun Heo
2026-02-25 5:01 ` [PATCH 26/34] sched_ext: Make watchdog sub-sched aware Tejun Heo
2026-02-25 5:01 ` [PATCH 27/34] sched_ext: Convert scx_dump_state() spinlock to raw spinlock Tejun Heo
2026-02-25 5:01 ` [PATCH 28/34] sched_ext: Support dumping multiple schedulers and add scheduler identification Tejun Heo
2026-02-25 5:01 ` [PATCH 29/34] sched_ext: Implement cgroup sub-sched enabling and disabling Tejun Heo
2026-02-25 5:01 ` [PATCH 30/34] sched_ext: Add scx_sched back pointer to scx_sched_pcpu Tejun Heo
2026-02-25 5:01 ` [PATCH 31/34] sched_ext: Make scx_bpf_reenqueue_local() sub-sched aware Tejun Heo
2026-02-25 5:01 ` [PATCH 32/34] sched_ext: Factor out scx_link_sched() and scx_unlink_sched() Tejun Heo
2026-02-25 5:01 ` [PATCH 33/34] sched_ext: Add rhashtable lookup for sub-schedulers Tejun Heo
2026-02-25 5:01 ` [PATCH 34/34] sched_ext: Add basic building blocks for nested sub-scheduler dispatching Tejun Heo
2026-02-25 5:14 ` [PATCHSET v2 sched_ext/for-7.1] sched_ext: Implement cgroup sub-scheduler support Tejun Heo
-- strict thread matches above, loose matches on Subject: below --
2026-03-04 22:00 [PATCHSET v3 " Tejun Heo
2026-03-04 22:01 ` [PATCH 20/34] sched_ext: Factor out scx_dispatch_sched() Tejun Heo
2026-02-25 5:01 [PATCHSET v2 sched_ext/for-7.1] sched_ext: Implement cgroup sub-scheduler support Tejun Heo
2026-02-25 5:01 ` [PATCH 20/34] sched_ext: Factor out scx_dispatch_sched() Tejun Heo
2026-01-21 23:11 [PATCHSET v1 sched_ext/for-6.20] sched_ext: Implement cgroup sub-scheduler support Tejun Heo
2026-01-21 23:11 ` [PATCH 20/34] sched_ext: Factor out scx_dispatch_sched() Tejun Heo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260225050109.1070059-21-tj@kernel.org \
--to=tj@kernel.org \
--cc=arighi@nvidia.com \
--cc=cgroups@vger.kernel.org \
--cc=changwoo@igalia.com \
--cc=emil@etsalapatis.com \
--cc=hannes@cmpxchg.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mkoutny@suse.com \
--cc=sched-ext@lists.linux.dev \
--cc=void@manifault.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox