From: Tejun Heo <tj@kernel.org>
To: David Vernet <void@manifault.com>,
Andrea Righi <arighi@nvidia.com>,
Changwoo Min <changwoo@igalia.com>
Cc: sched-ext@lists.linux.dev, linux-kernel@vger.kernel.org,
Emil Tsalapatis <emil@etsalapatis.com>,
Chris Mason <clm@meta.com>, Ryan Newton <newton@meta.com>,
Tejun Heo <tj@kernel.org>,
stable@vger.kernel.org
Subject: [PATCH 11/13] sched_ext: Make bypass LB cpumasks per-scheduler
Date: Fri, 24 Apr 2026 10:44:16 -1000 [thread overview]
Message-ID: <20260424204418.3809733-12-tj@kernel.org> (raw)
In-Reply-To: <20260424204418.3809733-1-tj@kernel.org>
scx_bypass_lb_{donee,resched}_cpumask were file-scope statics shared by all
scheduler instances. With CONFIG_EXT_SUB_SCHED, multiple sched instances
each arm their own bypass_lb_timer; concurrent bypass_lb_node() calls RMW
the global cpumasks with no lock, corrupting donee/resched decisions.
Move the cpumasks into struct scx_sched, allocate them alongside the timer
in scx_alloc_and_add_sched(), free them in scx_sched_free_rcu_work().
Fixes: 95d1df610cdc ("sched_ext: Implement load balancer for bypass mode")
Cc: stable@vger.kernel.org # v6.19+
Reported-by: Chris Mason <clm@meta.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
kernel/sched/ext.c | 33 +++++++++++++++++++--------------
kernel/sched/ext_internal.h | 2 ++
2 files changed, 21 insertions(+), 14 deletions(-)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index da188af21b3d..0980531c0e6c 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -53,8 +53,6 @@ DEFINE_STATIC_KEY_FALSE(__scx_enabled);
DEFINE_STATIC_PERCPU_RWSEM(scx_fork_rwsem);
static atomic_t scx_enable_state_var = ATOMIC_INIT(SCX_DISABLED);
static DEFINE_RAW_SPINLOCK(scx_bypass_lock);
-static cpumask_var_t scx_bypass_lb_donee_cpumask;
-static cpumask_var_t scx_bypass_lb_resched_cpumask;
static bool scx_init_task_enabled;
static bool scx_switching_all;
DEFINE_STATIC_KEY_FALSE(__scx_switched_all);
@@ -4747,6 +4745,8 @@ static void scx_sched_free_rcu_work(struct work_struct *work)
irq_work_sync(&sch->disable_irq_work);
kthread_destroy_worker(sch->helper);
timer_shutdown_sync(&sch->bypass_lb_timer);
+ free_cpumask_var(sch->bypass_lb_donee_cpumask);
+ free_cpumask_var(sch->bypass_lb_resched_cpumask);
#ifdef CONFIG_EXT_SUB_SCHED
kfree(sch->cgrp_path);
@@ -5102,8 +5102,8 @@ static u32 bypass_lb_cpu(struct scx_sched *sch, s32 donor,
static void bypass_lb_node(struct scx_sched *sch, int node)
{
const struct cpumask *node_mask = cpumask_of_node(node);
- struct cpumask *donee_mask = scx_bypass_lb_donee_cpumask;
- struct cpumask *resched_mask = scx_bypass_lb_resched_cpumask;
+ struct cpumask *donee_mask = sch->bypass_lb_donee_cpumask;
+ struct cpumask *resched_mask = sch->bypass_lb_resched_cpumask;
u32 nr_tasks = 0, nr_cpus = 0, nr_balanced = 0;
u32 nr_target, nr_donor_target;
u32 before_min = U32_MAX, before_max = 0;
@@ -6499,6 +6499,15 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops,
init_irq_work(&sch->disable_irq_work, scx_disable_irq_workfn);
kthread_init_work(&sch->disable_work, scx_disable_workfn);
timer_setup(&sch->bypass_lb_timer, scx_bypass_lb_timerfn, 0);
+
+ if (!alloc_cpumask_var(&sch->bypass_lb_donee_cpumask, GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto err_stop_helper;
+ }
+ if (!alloc_cpumask_var(&sch->bypass_lb_resched_cpumask, GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto err_free_lb_cpumask;
+ }
sch->ops = *ops;
rcu_assign_pointer(ops->priv, sch);
@@ -6508,14 +6517,14 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops,
char *buf = kzalloc(PATH_MAX, GFP_KERNEL);
if (!buf) {
ret = -ENOMEM;
- goto err_stop_helper;
+ goto err_free_lb_resched;
}
cgroup_path(cgrp, buf, PATH_MAX);
sch->cgrp_path = kstrdup(buf, GFP_KERNEL);
kfree(buf);
if (!sch->cgrp_path) {
ret = -ENOMEM;
- goto err_stop_helper;
+ goto err_free_lb_resched;
}
sch->cgrp = cgrp;
@@ -6550,10 +6559,12 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops,
#endif /* CONFIG_EXT_SUB_SCHED */
return sch;
-#ifdef CONFIG_EXT_SUB_SCHED
+err_free_lb_resched:
+ free_cpumask_var(sch->bypass_lb_resched_cpumask);
+err_free_lb_cpumask:
+ free_cpumask_var(sch->bypass_lb_donee_cpumask);
err_stop_helper:
kthread_destroy_worker(sch->helper);
-#endif
err_free_pcpu:
for_each_possible_cpu(cpu) {
if (cpu == bypass_fail_cpu)
@@ -9740,12 +9751,6 @@ static int __init scx_init(void)
return ret;
}
- if (!alloc_cpumask_var(&scx_bypass_lb_donee_cpumask, GFP_KERNEL) ||
- !alloc_cpumask_var(&scx_bypass_lb_resched_cpumask, GFP_KERNEL)) {
- pr_err("sched_ext: Failed to allocate cpumasks\n");
- return -ENOMEM;
- }
-
return 0;
}
__initcall(scx_init);
diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h
index 62ce4eaf6a3f..a075732d4430 100644
--- a/kernel/sched/ext_internal.h
+++ b/kernel/sched/ext_internal.h
@@ -1075,6 +1075,8 @@ struct scx_sched {
struct irq_work disable_irq_work;
struct kthread_work disable_work;
struct timer_list bypass_lb_timer;
+ cpumask_var_t bypass_lb_donee_cpumask;
+ cpumask_var_t bypass_lb_resched_cpumask;
struct rcu_work rcu_work;
/* all ancestors including self */
--
2.53.0
next prev parent reply other threads:[~2026-04-24 20:44 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-24 20:44 [PATCHSET sched_ext/for-7.1-fixes] sched_ext: Assorted fixes Tejun Heo
2026-04-24 20:44 ` [PATCH 01/13] sched_ext: Unregister sub_kset on scheduler disable Tejun Heo
2026-04-24 20:44 ` [PATCH 02/13] sched_ext: Guard scx_dsq_move() against NULL kit->dsq after failed iter_new Tejun Heo
2026-04-24 20:44 ` [PATCH 03/13] sched_ext: Skip tasks with stale task_rq in bypass_lb_cpu() Tejun Heo
2026-04-24 20:44 ` [PATCH 04/13] sched_ext: Don't disable tasks in scx_sub_enable_workfn() abort path Tejun Heo
2026-04-24 20:44 ` [PATCH 05/13] sched_ext: Read scx_root under scx_cgroup_ops_rwsem in cgroup setters Tejun Heo
2026-04-24 20:44 ` [PATCH 06/13] sched_ext: Resolve caller's scheduler in scx_bpf_destroy_dsq() / scx_bpf_dsq_nr_queued() Tejun Heo
2026-04-24 20:44 ` [PATCH 07/13] sched_ext: Use dsq->first_task instead of list_empty() in dispatch_enqueue() FIFO-tail Tejun Heo
2026-04-24 20:44 ` [PATCH 08/13] sched_ext: Save and restore scx_locked_rq across SCX_CALL_OP Tejun Heo
2026-04-24 20:44 ` [PATCH 09/13] sched_ext: Pass held rq to SCX_CALL_OP() for dump_cpu/dump_task Tejun Heo
2026-04-24 20:44 ` [PATCH 10/13] sched_ext: Pass held rq to SCX_CALL_OP() for core_sched_before Tejun Heo
2026-04-24 20:44 ` Tejun Heo [this message]
2026-04-24 20:44 ` [PATCH 12/13] sched_ext: Align cgroup #ifdef guards with SUB_SCHED vs GROUP_SCHED Tejun Heo
2026-04-24 20:44 ` [PATCH 13/13] sched_ext: Refuse cross-task select_cpu_from_kfunc calls Tejun Heo
2026-04-24 21:46 ` Andrea Righi
2026-04-25 0:19 ` [PATCH v2 " Tejun Heo
2026-04-25 6:50 ` Andrea Righi
2026-04-24 21:08 ` [PATCH 14/13] sched_ext: Reject NULL-sch callers in scx_bpf_task_set_slice/dsq_vtime Tejun Heo
2026-04-24 21:08 ` [PATCH 15/13] sched_ext: Release cpus_read_lock on scx_link_sched() failure in root enable Tejun Heo
2026-04-24 22:00 ` Andrea Righi
2026-04-25 0:19 ` [PATCH v2 " Tejun Heo
2026-04-25 6:51 ` Andrea Righi
2026-04-24 22:10 ` [PATCHSET sched_ext/for-7.1-fixes] sched_ext: Assorted fixes Andrea Righi
2026-04-25 0:39 ` Tejun Heo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260424204418.3809733-12-tj@kernel.org \
--to=tj@kernel.org \
--cc=arighi@nvidia.com \
--cc=changwoo@igalia.com \
--cc=clm@meta.com \
--cc=emil@etsalapatis.com \
--cc=linux-kernel@vger.kernel.org \
--cc=newton@meta.com \
--cc=sched-ext@lists.linux.dev \
--cc=stable@vger.kernel.org \
--cc=void@manifault.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.