From: Peter Zijlstra <peterz@infradead.org>
To: Tejun Heo <tj@kernel.org>
Cc: David Vernet <void@manifault.com>,
Andrea Righi <andrea.righi@linux.dev>,
Changwoo Min <changwoo@igalia.com>,
linux-kernel@vger.kernel.org, sched-ext@lists.linux.dev,
Wen-Fang Liu <liuwenfang@honor.com>
Subject: Re: [PATCH 3/3] sched_ext: Allow scx_bpf_reenqueue_local() to be called from anywhere
Date: Tue, 28 Oct 2025 12:01:53 +0100 [thread overview]
Message-ID: <20251028110153.GZ4067720@noisy.programming.kicks-ass.net> (raw)
In-Reply-To: <aP-3QsygWJRn6Z2u@slm.duckdns.org>
On Mon, Oct 27, 2025 at 08:17:38AM -1000, Tejun Heo wrote:
> Hello,
>
> On Mon, Oct 27, 2025 at 07:10:28PM +0100, Peter Zijlstra wrote:
> ...
> > Just for my elucidation and such.. This is when ttwu() happens and the
> > CPU is idle and you dispatch directly to it, expecting it to then go run
> > that task. After which another wakeup/balance movement happens which
> > places/moves a task from a higher priority class to that CPU, such that
> > your initial (ext) task doesn't get to run after all. Right?
>
> Yes, that's the scenario that I was thinking.
So I've been pondering this a bit, and came up with the below. I'm not
quite happy with it, I meant to share that new queue_mask variable, but
this came out.
---
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2175,10 +2175,14 @@ void wakeup_preempt(struct rq *rq, struc
{
struct task_struct *donor = rq->donor;
- if (p->sched_class == donor->sched_class)
- donor->sched_class->wakeup_preempt(rq, p, flags);
- else if (sched_class_above(p->sched_class, donor->sched_class))
+ if (p->sched_class == rq->next_class) {
+ rq->next_class->wakeup_preempt(rq, p, flags);
+
+ } else if (sched_class_above(p->sched_class, rq->next_class)) {
+ rq->next_class->wakeup_preempt(rq, p, flags);
resched_curr(rq);
+ rq->next_class = p->sched_class;
+ }
/*
* A queue event has occurred, and we're going to schedule. In
@@ -6814,6 +6818,7 @@ static void __sched notrace __schedule(i
clear_tsk_need_resched(prev);
clear_preempt_need_resched();
keep_resched:
+ rq->next_class = next->sched_class;
rq->last_seen_need_resched_ns = 0;
is_switch = prev != next;
@@ -8653,6 +8658,8 @@ void __init sched_init(void)
rq->rt.rt_runtime = global_rt_runtime();
init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL);
#endif
+ rq->next_class = &idle_sched_class;
+
rq->sd = NULL;
rq->rd = NULL;
rq->cpu_capacity = SCHED_CAPACITY_SCALE;
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -2289,9 +2289,16 @@ static int balance_dl(struct rq *rq, str
* Only called when both the current and waking task are -deadline
* tasks.
*/
-static void wakeup_preempt_dl(struct rq *rq, struct task_struct *p,
- int flags)
+static void wakeup_preempt_dl(struct rq *rq, struct task_struct *p, int flags)
{
+ /*
+ * Can only get preempted by stop-class, and those should be
+ * few and short lived, doesn't really make sense to push
+ * anything away for that.
+ */
+ if (p->sched_class != &dl_sched_class)
+ return;
+
if (dl_entity_preempt(&p->dl, &rq->donor->dl)) {
resched_curr(rq);
return;
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -2966,7 +2966,12 @@ static void switched_from_scx(struct rq
scx_disable_task(p);
}
-static void wakeup_preempt_scx(struct rq *rq, struct task_struct *p,int wake_flags) {}
+static void wakeup_preempt_scx(struct rq *rq, struct task_struct *p, int wake_flags)
+{
+ if (p->sched_class != &ext_sched_class)
+ switch_class(rq, p);
+}
+
static void switched_to_scx(struct rq *rq, struct task_struct *p) {}
int scx_check_setscheduler(struct task_struct *p, int policy)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8729,7 +8729,7 @@ static void set_next_buddy(struct sched_
/*
* Preempt the current task with a newly woken task if needed:
*/
-static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int wake_flags)
+static void wakeup_preempt_fair(struct rq *rq, struct task_struct *p, int wake_flags)
{
struct task_struct *donor = rq->donor;
struct sched_entity *se = &donor->se, *pse = &p->se;
@@ -8737,6 +8737,12 @@ static void check_preempt_wakeup_fair(st
int cse_is_idle, pse_is_idle;
bool do_preempt_short = false;
+ /*
+ * XXX Getting preempted by higher class, try and find idle CPU?
+ */
+ if (p->sched_class != &fair_sched_class)
+ return;
+
if (unlikely(se == pse))
return;
@@ -13640,7 +13646,7 @@ DEFINE_SCHED_CLASS(fair) = {
.yield_task = yield_task_fair,
.yield_to_task = yield_to_task_fair,
- .wakeup_preempt = check_preempt_wakeup_fair,
+ .wakeup_preempt = wakeup_preempt_fair,
.pick_task = pick_task_fair,
.pick_next_task = pick_next_task_fair,
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1615,6 +1615,12 @@ static void wakeup_preempt_rt(struct rq
{
struct task_struct *donor = rq->donor;
+ /*
+ * XXX If we're preempted by DL, queue a push?
+ */
+ if (p->sched_class != &rt_sched_class)
+ return;
+
if (p->prio < donor->prio) {
resched_curr(rq);
return;
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1179,6 +1179,7 @@ struct rq {
struct sched_dl_entity *dl_server;
struct task_struct *idle;
struct task_struct *stop;
+ const struct sched_class *next_class;
unsigned long next_balance;
struct mm_struct *prev_mm;
next prev parent reply other threads:[~2025-10-28 11:02 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-25 0:18 [PATCHSET sched_ext/for-6.19] sched_ext: Deprecate ops.cpu_acquire/release() Tejun Heo
2025-10-25 0:18 ` [PATCH 1/3] sched_ext: Split schedule_deferred() into locked and unlocked variants Tejun Heo
2025-10-25 23:17 ` Emil Tsalapatis
2025-10-25 0:18 ` [PATCH 2/3] sched_ext: Factor out reenq_local() from scx_bpf_reenqueue_local() Tejun Heo
2025-10-25 23:19 ` Emil Tsalapatis
2025-10-25 0:18 ` [PATCH 3/3] sched_ext: Allow scx_bpf_reenqueue_local() to be called from anywhere Tejun Heo
2025-10-25 23:21 ` Emil Tsalapatis
2025-10-27 9:18 ` Peter Zijlstra
2025-10-27 16:00 ` Tejun Heo
2025-10-27 17:49 ` Peter Zijlstra
2025-10-27 18:05 ` Tejun Heo
2025-10-27 18:07 ` Peter Zijlstra
2025-10-27 18:10 ` Peter Zijlstra
2025-10-27 18:17 ` Tejun Heo
2025-10-28 11:01 ` Peter Zijlstra [this message]
2025-10-28 17:07 ` Tejun Heo
2025-10-27 18:19 ` [PATCH v2 " Tejun Heo
2025-10-29 10:45 ` Peter Zijlstra
2025-10-29 15:11 ` Tejun Heo
2025-10-29 15:49 ` [PATCH v3 " Tejun Heo
2025-11-27 10:39 ` Kuba Piecuch
2025-12-02 23:05 ` Tejun Heo
2025-12-11 14:24 ` Kuba Piecuch
2025-12-11 16:17 ` Tejun Heo
2025-12-11 16:20 ` Tejun Heo
2025-12-13 1:16 ` Andrea Righi
2025-12-13 1:18 ` Tejun Heo
2025-10-29 15:31 ` [PATCHSET sched_ext/for-6.19] sched_ext: Deprecate ops.cpu_acquire/release() Tejun Heo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251028110153.GZ4067720@noisy.programming.kicks-ass.net \
--to=peterz@infradead.org \
--cc=andrea.righi@linux.dev \
--cc=changwoo@igalia.com \
--cc=linux-kernel@vger.kernel.org \
--cc=liuwenfang@honor.com \
--cc=sched-ext@lists.linux.dev \
--cc=tj@kernel.org \
--cc=void@manifault.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.