From: Tejun Heo <tj@kernel.org>
To: linux-kernel@vger.kernel.org, sched-ext@lists.linux.dev
Cc: void@manifault.com, arighi@nvidia.com, changwoo@igalia.com,
emil@etsalapatis.com, hannes@cmpxchg.org, mkoutny@suse.com,
cgroups@vger.kernel.org, Tejun Heo <tj@kernel.org>
Subject: [PATCH 08/34] sched_ext: Introduce scx_task_sched[_rcu]()
Date: Tue, 24 Feb 2026 19:01:26 -1000 [thread overview]
Message-ID: <20260225050152.1070601-9-tj@kernel.org> (raw)
In-Reply-To: <20260225050152.1070601-1-tj@kernel.org>
In preparation of multiple scheduler support, add p->scx.sched which points
to the scx_sched instance that the task is scheduled by, which is currently
always scx_root. Add scx_task_sched[_rcu]() accessors which return the
associated scx_sched of the specified task and replace the raw scx_root
dereferences with it where applicable. scx_task_on_sched() is also added to
test whether a given task is on the specified sched.
As scx_root is still the only scheduler, this shouldn't introduce
user-visible behavior changes.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
include/linux/sched/ext.h | 7 +++++
kernel/sched/ext.c | 63 +++++++++++++++++++++++--------------
kernel/sched/ext_internal.h | 59 ++++++++++++++++++++++++++++++++++
3 files changed, 105 insertions(+), 24 deletions(-)
diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index fa4349b319e6..3213e31c7979 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -165,6 +165,13 @@ struct scx_sched;
* for a task to be scheduled by SCX.
*/
struct sched_ext_entity {
+#ifdef CONFIG_CGROUPS
+ /*
+ * Associated scx_sched. Updated either during fork or while holding
+ * both p->pi_lock and rq lock.
+ */
+ struct scx_sched __rcu *sched;
+#endif
struct scx_dispatch_q *dsq;
atomic_long_t ops_state;
u64 ddsp_dsq_id;
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index b2b41a33c3a3..0afbc21f5f76 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -19,7 +19,7 @@ static DEFINE_RAW_SPINLOCK(scx_sched_lock);
* are used as temporary markers to indicate that the dereferences need to be
* updated to point to the associated scheduler instances rather than scx_root.
*/
-static struct scx_sched __rcu *scx_root;
+struct scx_sched __rcu *scx_root;
/*
* All scheds, writers must hold both scx_enable_mutex and scx_sched_lock.
@@ -304,9 +304,15 @@ static struct scx_sched *scx_next_descendant_pre(struct scx_sched *pos,
return NULL;
}
+
+static void scx_set_task_sched(struct task_struct *p, struct scx_sched *sch)
+{
+ rcu_assign_pointer(p->scx.sched, sch);
+}
#else /* CONFIG_EXT_SUB_SCHED */
static struct scx_sched *scx_parent(struct scx_sched *sch) { return NULL; }
static struct scx_sched *scx_next_descendant_pre(struct scx_sched *pos, struct scx_sched *root) { return pos ? NULL : root; }
+static void scx_set_task_sched(struct task_struct *p, struct scx_sched *sch) {}
#endif /* CONFIG_EXT_SUB_SCHED */
/**
@@ -1538,7 +1544,7 @@ static bool scx_rq_online(struct rq *rq)
static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
int sticky_cpu)
{
- struct scx_sched *sch = scx_root;
+ struct scx_sched *sch = scx_task_sched(p);
struct task_struct **ddsp_taskp;
struct scx_dispatch_q *dsq;
unsigned long qseq;
@@ -1668,7 +1674,7 @@ static void clr_task_runnable(struct task_struct *p, bool reset_runnable_at)
static void enqueue_task_scx(struct rq *rq, struct task_struct *p, int enq_flags)
{
- struct scx_sched *sch = scx_root;
+ struct scx_sched *sch = scx_task_sched(p);
int sticky_cpu = p->scx.sticky_cpu;
if (enq_flags & ENQUEUE_WAKEUP)
@@ -1719,7 +1725,7 @@ static void enqueue_task_scx(struct rq *rq, struct task_struct *p, int enq_flags
static void ops_dequeue(struct rq *rq, struct task_struct *p, u64 deq_flags)
{
- struct scx_sched *sch = scx_root;
+ struct scx_sched *sch = scx_task_sched(p);
unsigned long opss;
u64 op_deq_flags = deq_flags;
@@ -1790,7 +1796,7 @@ static void ops_dequeue(struct rq *rq, struct task_struct *p, u64 deq_flags)
static bool dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags)
{
- struct scx_sched *sch = scx_root;
+ struct scx_sched *sch = scx_task_sched(p);
if (!(p->scx.flags & SCX_TASK_QUEUED)) {
WARN_ON_ONCE(task_runnable(p));
@@ -1834,8 +1840,8 @@ static bool dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags
static void yield_task_scx(struct rq *rq)
{
- struct scx_sched *sch = scx_root;
struct task_struct *p = rq->donor;
+ struct scx_sched *sch = scx_task_sched(p);
if (SCX_HAS_OP(sch, yield))
SCX_CALL_OP_2TASKS_RET(sch, SCX_KF_REST, yield, rq, p, NULL);
@@ -1845,10 +1851,10 @@ static void yield_task_scx(struct rq *rq)
static bool yield_to_task_scx(struct rq *rq, struct task_struct *to)
{
- struct scx_sched *sch = scx_root;
struct task_struct *from = rq->donor;
+ struct scx_sched *sch = scx_task_sched(from);
- if (SCX_HAS_OP(sch, yield))
+ if (SCX_HAS_OP(sch, yield) && sch == scx_task_sched(to))
return SCX_CALL_OP_2TASKS_RET(sch, SCX_KF_REST, yield, rq,
from, to);
else
@@ -2513,7 +2519,7 @@ static void process_ddsp_deferred_locals(struct rq *rq)
*/
while ((p = list_first_entry_or_null(&rq->scx.ddsp_deferred_locals,
struct task_struct, scx.dsq_list.node))) {
- struct scx_sched *sch = scx_root;
+ struct scx_sched *sch = scx_task_sched(p);
struct scx_dispatch_q *dsq;
list_del_init(&p->scx.dsq_list.node);
@@ -2527,7 +2533,7 @@ static void process_ddsp_deferred_locals(struct rq *rq)
static void set_next_task_scx(struct rq *rq, struct task_struct *p, bool first)
{
- struct scx_sched *sch = scx_root;
+ struct scx_sched *sch = scx_task_sched(p);
if (p->scx.flags & SCX_TASK_QUEUED) {
/*
@@ -2624,7 +2630,7 @@ static void switch_class(struct rq *rq, struct task_struct *next)
static void put_prev_task_scx(struct rq *rq, struct task_struct *p,
struct task_struct *next)
{
- struct scx_sched *sch = scx_root;
+ struct scx_sched *sch = scx_task_sched(p);
/* see kick_cpus_irq_workfn() */
smp_store_release(&rq->scx.kick_sync, rq->scx.kick_sync + 1);
@@ -2718,14 +2724,14 @@ do_pick_task_scx(struct rq *rq, struct rq_flags *rf, bool force_scx)
if (keep_prev) {
p = prev;
if (!p->scx.slice)
- refill_task_slice_dfl(rcu_dereference_sched(scx_root), p);
+ refill_task_slice_dfl(scx_task_sched(p), p);
} else {
p = first_local_task(rq);
if (!p)
return NULL;
if (unlikely(!p->scx.slice)) {
- struct scx_sched *sch = rcu_dereference_sched(scx_root);
+ struct scx_sched *sch = scx_task_sched(p);
if (!scx_rq_bypassing(rq) && !sch->warned_zero_slice) {
printk_deferred(KERN_WARNING "sched_ext: %s[%d] has zero slice in %s()\n",
@@ -2813,7 +2819,7 @@ bool scx_prio_less(const struct task_struct *a, const struct task_struct *b,
static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flags)
{
- struct scx_sched *sch = scx_root;
+ struct scx_sched *sch = scx_task_sched(p);
bool rq_bypass;
/*
@@ -2874,7 +2880,7 @@ static void task_woken_scx(struct rq *rq, struct task_struct *p)
static void set_cpus_allowed_scx(struct task_struct *p,
struct affinity_context *ac)
{
- struct scx_sched *sch = scx_root;
+ struct scx_sched *sch = scx_task_sched(p);
set_cpus_allowed_common(p, ac);
@@ -3018,7 +3024,7 @@ void scx_tick(struct rq *rq)
static void task_tick_scx(struct rq *rq, struct task_struct *curr, int queued)
{
- struct scx_sched *sch = scx_root;
+ struct scx_sched *sch = scx_task_sched(curr);
update_curr_scx(rq);
@@ -3208,11 +3214,12 @@ static void scx_disable_task(struct task_struct *p)
static void scx_exit_task(struct task_struct *p)
{
- struct scx_sched *sch = scx_root;
+ struct scx_sched *sch = scx_task_sched(p);
struct scx_exit_task_args args = {
.cancelled = false,
};
+ lockdep_assert_held(&p->pi_lock);
lockdep_assert_rq_held(task_rq(p));
switch (scx_get_task_state(p)) {
@@ -3234,6 +3241,7 @@ static void scx_exit_task(struct task_struct *p)
if (SCX_HAS_OP(sch, exit_task))
SCX_CALL_OP_TASK(sch, SCX_KF_REST, exit_task, task_rq(p),
p, &args);
+ scx_set_task_sched(p, NULL);
scx_set_task_state(p, SCX_TASK_NONE);
}
@@ -3263,12 +3271,18 @@ void scx_pre_fork(struct task_struct *p)
int scx_fork(struct task_struct *p, struct kernel_clone_args *kargs)
{
+ s32 ret;
+
percpu_rwsem_assert_held(&scx_fork_rwsem);
- if (scx_init_task_enabled)
- return scx_init_task(p, task_group(p), true);
- else
- return 0;
+ if (scx_init_task_enabled) {
+ ret = scx_init_task(p, task_group(p), true);
+ if (!ret)
+ scx_set_task_sched(p, scx_root);
+ return ret;
+ }
+
+ return 0;
}
void scx_post_fork(struct task_struct *p)
@@ -3373,7 +3387,7 @@ void sched_ext_dead(struct task_struct *p)
static void reweight_task_scx(struct rq *rq, struct task_struct *p,
const struct load_weight *lw)
{
- struct scx_sched *sch = scx_root;
+ struct scx_sched *sch = scx_task_sched(p);
lockdep_assert_rq_held(task_rq(p));
@@ -3392,7 +3406,7 @@ static void prio_changed_scx(struct rq *rq, struct task_struct *p, u64 oldprio)
static void switching_to_scx(struct rq *rq, struct task_struct *p)
{
- struct scx_sched *sch = scx_root;
+ struct scx_sched *sch = scx_task_sched(p);
if (task_dead_and_done(p))
return;
@@ -4043,7 +4057,7 @@ bool scx_allow_ttwu_queue(const struct task_struct *p)
if (!scx_enabled())
return true;
- sch = rcu_dereference_sched(scx_root);
+ sch = scx_task_sched(p);
if (unlikely(!sch))
return true;
@@ -5554,6 +5568,7 @@ static s32 scx_root_enable(struct sched_ext_ops *ops, struct bpf_link *link)
goto err_disable_unlock_all;
}
+ scx_set_task_sched(p, sch);
scx_set_task_state(p, SCX_TASK_READY);
put_task_struct(p);
diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h
index 6b5be9bf3af5..f7bc352eed64 100644
--- a/kernel/sched/ext_internal.h
+++ b/kernel/sched/ext_internal.h
@@ -1141,6 +1141,7 @@ enum scx_ops_state {
#define SCX_OPSS_STATE_MASK ((1LU << SCX_OPSS_QSEQ_SHIFT) - 1)
#define SCX_OPSS_QSEQ_MASK (~SCX_OPSS_STATE_MASK)
+extern struct scx_sched __rcu *scx_root;
DECLARE_PER_CPU(struct rq *, scx_locked_rq_state);
/*
@@ -1161,3 +1162,61 @@ static inline bool scx_rq_bypassing(struct rq *rq)
{
return unlikely(rq->scx.flags & SCX_RQ_BYPASSING);
}
+
+#ifdef CONFIG_EXT_SUB_SCHED
+/**
+ * scx_task_sched - Find scx_sched scheduling a task
+ * @p: task of interest
+ *
+ * Return @p's scheduler instance. Must be called with @p's pi_lock or rq lock
+ * held.
+ */
+static inline struct scx_sched *scx_task_sched(const struct task_struct *p)
+{
+ return rcu_dereference_protected(p->scx.sched,
+ lockdep_is_held(&p->pi_lock) ||
+ lockdep_is_held(__rq_lockp(task_rq(p))));
+}
+
+/**
+ * scx_task_sched_rcu - Find scx_sched scheduling a task
+ * @p: task of interest
+ *
+ * Return @p's scheduler instance. The returned scx_sched is RCU protected.
+ */
+static inline struct scx_sched *scx_task_sched_rcu(const struct task_struct *p)
+{
+ return rcu_dereference_all(p->scx.sched);
+}
+
+/**
+ * scx_task_on_sched - Is a task on the specified sched?
+ * @sch: sched to test against
+ * @p: task of interest
+ *
+ * Returns %true if @p is on @sch, %false otherwise.
+ */
+static inline bool scx_task_on_sched(struct scx_sched *sch,
+ const struct task_struct *p)
+{
+ return rcu_access_pointer(p->scx.sched) == sch;
+}
+#else /* CONFIG_EXT_SUB_SCHED */
+static inline struct scx_sched *scx_task_sched(const struct task_struct *p)
+{
+ return rcu_dereference_protected(scx_root,
+ lockdep_is_held(&p->pi_lock) ||
+ lockdep_is_held(__rq_lockp(task_rq(p))));
+}
+
+static inline struct scx_sched *scx_task_sched_rcu(const struct task_struct *p)
+{
+ return rcu_dereference_all(scx_root);
+}
+
+static inline bool scx_task_on_sched(struct scx_sched *sch,
+ const struct task_struct *p)
+{
+ return true;
+}
+#endif /* CONFIG_EXT_SUB_SCHED */
--
2.53.0
next prev parent reply other threads:[~2026-02-25 5:02 UTC|newest]
Thread overview: 39+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-25 5:01 [PATCHSET v2 sched_ext/for-7.1] sched_ext: Implement cgroup sub-scheduler support Tejun Heo
2026-02-25 5:01 ` [PATCH 01/34] sched_ext: Implement cgroup subtree iteration for scx_task_iter Tejun Heo
2026-02-25 5:01 ` [PATCH 02/34] sched_ext: Add @kargs to scx_fork() Tejun Heo
2026-02-25 5:01 ` [PATCH 03/34] sched/core: Swap the order between sched_post_fork() and cgroup_post_fork() Tejun Heo
2026-02-25 5:01 ` [PATCH 04/34] cgroup: Expose some cgroup helpers Tejun Heo
2026-02-25 5:01 ` [PATCH 05/34] sched_ext: Update p->scx.disallow warning in scx_init_task() Tejun Heo
2026-02-25 5:01 ` [PATCH 06/34] sched_ext: Reorganize enable/disable path for multi-scheduler support Tejun Heo
2026-02-25 5:01 ` [PATCH 07/34] sched_ext: Introduce cgroup sub-sched support Tejun Heo
2026-02-25 5:01 ` Tejun Heo [this message]
2026-02-25 5:01 ` [PATCH 09/34] sched_ext: Introduce scx_prog_sched() Tejun Heo
2026-02-25 5:01 ` [PATCH 10/34] sched_ext: Enforce scheduling authority in dispatch and select_cpu operations Tejun Heo
2026-02-25 5:01 ` [PATCH 11/34] sched_ext: Enforce scheduler ownership when updating slice and dsq_vtime Tejun Heo
2026-02-25 5:01 ` [PATCH 12/34] sched_ext: scx_dsq_move() should validate the task belongs to the right scheduler Tejun Heo
2026-02-25 5:01 ` [PATCH 13/34] sched_ext: Refactor task init/exit helpers Tejun Heo
2026-02-25 5:01 ` [PATCH 14/34] sched_ext: Make scx_prio_less() handle multiple schedulers Tejun Heo
2026-02-25 5:01 ` [PATCH 15/34] sched_ext: Move default slice to per-scheduler field Tejun Heo
2026-02-25 5:01 ` [PATCH 16/34] sched_ext: Move aborting flag " Tejun Heo
2026-02-25 5:01 ` [PATCH 17/34] sched_ext: Move bypass_dsq into scx_sched_pcpu Tejun Heo
2026-02-25 5:01 ` [PATCH 18/34] sched_ext: Move bypass state into scx_sched Tejun Heo
2026-02-25 5:01 ` [PATCH 19/34] sched_ext: Prepare bypass mode for hierarchical operation Tejun Heo
2026-02-25 5:01 ` [PATCH 20/34] sched_ext: Factor out scx_dispatch_sched() Tejun Heo
2026-02-25 5:01 ` [PATCH 21/34] sched_ext: When calling ops.dispatch() @prev must be on the same scx_sched Tejun Heo
2026-02-25 5:01 ` [PATCH 22/34] sched_ext: Separate bypass dispatch enabling from bypass depth tracking Tejun Heo
2026-02-25 5:01 ` [PATCH 23/34] sched_ext: Implement hierarchical bypass mode Tejun Heo
2026-02-25 5:01 ` [PATCH 24/34] sched_ext: Dispatch from all scx_sched instances Tejun Heo
2026-02-25 5:01 ` [PATCH 25/34] sched_ext: Move scx_dsp_ctx and scx_dsp_max_batch into scx_sched Tejun Heo
2026-02-25 5:01 ` [PATCH 26/34] sched_ext: Make watchdog sub-sched aware Tejun Heo
2026-02-25 5:01 ` [PATCH 27/34] sched_ext: Convert scx_dump_state() spinlock to raw spinlock Tejun Heo
2026-02-25 5:01 ` [PATCH 28/34] sched_ext: Support dumping multiple schedulers and add scheduler identification Tejun Heo
2026-02-25 5:01 ` [PATCH 29/34] sched_ext: Implement cgroup sub-sched enabling and disabling Tejun Heo
2026-02-25 5:01 ` [PATCH 30/34] sched_ext: Add scx_sched back pointer to scx_sched_pcpu Tejun Heo
2026-02-25 5:01 ` [PATCH 31/34] sched_ext: Make scx_bpf_reenqueue_local() sub-sched aware Tejun Heo
2026-02-25 5:01 ` [PATCH 32/34] sched_ext: Factor out scx_link_sched() and scx_unlink_sched() Tejun Heo
2026-02-25 5:01 ` [PATCH 33/34] sched_ext: Add rhashtable lookup for sub-schedulers Tejun Heo
2026-02-25 5:01 ` [PATCH 34/34] sched_ext: Add basic building blocks for nested sub-scheduler dispatching Tejun Heo
2026-02-25 5:18 ` [PATCHSET v2 sched_ext/for-7.1] sched_ext: Implement cgroup sub-scheduler support Tejun Heo
-- strict thread matches above, loose matches on Subject: below --
2026-03-04 22:00 [PATCHSET v3 " Tejun Heo
2026-03-04 22:00 ` [PATCH 08/34] sched_ext: Introduce scx_task_sched[_rcu]() Tejun Heo
2026-02-25 5:00 [PATCHSET v2 sched_ext/for-7.1] sched_ext: Implement cgroup sub-scheduler support Tejun Heo
2026-02-25 5:00 ` [PATCH 08/34] sched_ext: Introduce scx_task_sched[_rcu]() Tejun Heo
2026-01-21 23:11 [PATCHSET v1 sched_ext/for-6.20] sched_ext: Implement cgroup sub-scheduler support Tejun Heo
2026-01-21 23:11 ` [PATCH 08/34] sched_ext: Introduce scx_task_sched[_rcu]() Tejun Heo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260225050152.1070601-9-tj@kernel.org \
--to=tj@kernel.org \
--cc=arighi@nvidia.com \
--cc=cgroups@vger.kernel.org \
--cc=changwoo@igalia.com \
--cc=emil@etsalapatis.com \
--cc=hannes@cmpxchg.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mkoutny@suse.com \
--cc=sched-ext@lists.linux.dev \
--cc=void@manifault.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.