From: Sasha Levin <sashal@kernel.org>
To: stable@vger.kernel.org
Cc: Tejun Heo <tj@kernel.org>, Sasha Levin <sashal@kernel.org>
Subject: [PATCH 6.12.y] sched_ext: Fix starvation of scx_enable() under fair-class saturation
Date: Tue, 17 Mar 2026 20:10:24 -0400 [thread overview]
Message-ID: <20260318001024.379990-1-sashal@kernel.org> (raw)
In-Reply-To: <2026031758-flagship-decimeter-9b5e@gregkh>
From: Tejun Heo <tj@kernel.org>
[ Upstream commit b06ccbabe2506fd70b9167a644978b049150224a ]
During scx_enable(), the READY -> ENABLED task switching loop changes the
calling thread's sched_class from fair to ext. Since fair has higher
priority than ext, saturating fair-class workloads can indefinitely starve
the enable thread, hanging the system. This was introduced when the enable
path switched from preempt_disable() to scx_bypass() which doesn't protect
against fair-class starvation. Note that the original preempt_disable()
protection wasn't complete either - in partial switch modes, the calling
thread could still be starved after preempt_enable() as it may have been
switched to ext class.
Fix it by offloading the enable body to a dedicated system-wide RT
(SCHED_FIFO) kthread which cannot be starved by either fair or ext class
tasks. scx_enable() lazily creates the kthread on first use and passes the
ops pointer through a struct scx_enable_cmd containing the kthread_work,
then synchronously waits for completion.
The workfn runs on a different kthread from sch->helper (which runs
disable_work), so it can safely flush disable_work on the error path
without deadlock.
Fixes: 8c2090c504e9 ("sched_ext: Initialize in bypass mode")
Cc: stable@vger.kernel.org # v6.12+
Signed-off-by: Tejun Heo <tj@kernel.org>
[ adapted per-scheduler scx_sched struct references to globals ]
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
kernel/sched/ext.c | 64 ++++++++++++++++++++++++++++++++++++++--------
1 file changed, 54 insertions(+), 10 deletions(-)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 7e79f39c7bcf6..f269128559a27 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -5150,19 +5150,29 @@ static int validate_ops(const struct sched_ext_ops *ops)
return 0;
}
-static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
+/*
+ * scx_ops_enable() is offloaded to a dedicated system-wide RT kthread to avoid
+ * starvation. During the READY -> ENABLED task switching loop, the calling
+ * thread's sched_class gets switched from fair to ext. As fair has higher
+ * priority than ext, the calling thread can be indefinitely starved under
+ * fair-class saturation, leading to a system hang.
+ */
+struct scx_enable_cmd {
+ struct kthread_work work;
+ struct sched_ext_ops *ops;
+ int ret;
+};
+
+static void scx_ops_enable_workfn(struct kthread_work *work)
{
+ struct scx_enable_cmd *cmd =
+ container_of(work, struct scx_enable_cmd, work);
+ struct sched_ext_ops *ops = cmd->ops;
struct scx_task_iter sti;
struct task_struct *p;
unsigned long timeout;
int i, cpu, node, ret;
- if (!cpumask_equal(housekeeping_cpumask(HK_TYPE_DOMAIN),
- cpu_possible_mask)) {
- pr_err("sched_ext: Not compatible with \"isolcpus=\" domain isolation\n");
- return -EINVAL;
- }
-
mutex_lock(&scx_ops_enable_mutex);
if (!scx_ops_helper) {
@@ -5429,7 +5439,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
atomic_long_inc(&scx_enable_seq);
- return 0;
+ cmd->ret = 0;
+ return;
err_del:
kobject_del(scx_root_kobj);
@@ -5442,7 +5453,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
}
err_unlock:
mutex_unlock(&scx_ops_enable_mutex);
- return ret;
+ cmd->ret = ret;
+ return;
err_disable_unlock_all:
scx_cgroup_unlock();
@@ -5461,7 +5473,39 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
*/
scx_ops_error("scx_ops_enable() failed (%d)", ret);
kthread_flush_work(&scx_ops_disable_work);
- return 0;
+ cmd->ret = 0;
+}
+
+static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
+{
+ static struct kthread_worker *helper;
+ static DEFINE_MUTEX(helper_mutex);
+ struct scx_enable_cmd cmd;
+
+ if (!cpumask_equal(housekeeping_cpumask(HK_TYPE_DOMAIN),
+ cpu_possible_mask)) {
+ pr_err("sched_ext: Not compatible with \"isolcpus=\" domain isolation\n");
+ return -EINVAL;
+ }
+
+ if (!READ_ONCE(helper)) {
+ mutex_lock(&helper_mutex);
+ if (!helper) {
+ helper = scx_create_rt_helper("scx_ops_enable_helper");
+ if (!helper) {
+ mutex_unlock(&helper_mutex);
+ return -ENOMEM;
+ }
+ }
+ mutex_unlock(&helper_mutex);
+ }
+
+ kthread_init_work(&cmd.work, scx_ops_enable_workfn);
+ cmd.ops = ops;
+
+ kthread_queue_work(READ_ONCE(helper), &cmd.work);
+ kthread_flush_work(&cmd.work);
+ return cmd.ret;
}
--
2.51.0
prev parent reply other threads:[~2026-03-18 0:10 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-17 11:35 FAILED: patch "[PATCH] sched_ext: Fix starvation of scx_enable() under fair-class" failed to apply to 6.12-stable tree gregkh
2026-03-18 0:10 ` Sasha Levin [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260318001024.379990-1-sashal@kernel.org \
--to=sashal@kernel.org \
--cc=stable@vger.kernel.org \
--cc=tj@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox