* [PATCH 02/17] sched_ext: Rename ops_cpu_valid() to scx_cpu_valid() and expose it
@ 2026-04-24 1:32 Tejun Heo
0 siblings, 0 replies; 3+ messages in thread
From: Tejun Heo @ 2026-04-24 1:32 UTC (permalink / raw)
To: David Vernet, Andrea Righi, Changwoo Min
Cc: sched-ext, emil, linux-kernel, Cheng-Yang Chou, Zhao Mengmeng,
Tejun Heo
Rename the static ext.c helper and declare it in ext_internal.h so
ext_idle.c and the upcoming cid code can call it directly instead of
relying on build_policy.c textual inclusion.
Pure rename and visibility change.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Cheng-Yang Chou <yphbchou0911@gmail.com>
---
kernel/sched/ext.c | 22 +++++++++++-----------
kernel/sched/ext_idle.c | 6 +++---
kernel/sched/ext_internal.h | 2 ++
3 files changed, 16 insertions(+), 14 deletions(-)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 0ba12449f0c6..1d6613dc4d3b 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -1055,7 +1055,7 @@ static inline bool __cpu_valid(s32 cpu)
}
/**
- * ops_cpu_valid - Verify a cpu number, to be used on ops input args
+ * scx_cpu_valid - Verify a cpu number, to be used on ops input args
* @sch: scx_sched to abort on error
* @cpu: cpu number which came from a BPF ops
* @where: extra information reported on error
@@ -1064,7 +1064,7 @@ static inline bool __cpu_valid(s32 cpu)
* Verify that it is in range and one of the possible cpus. If invalid, trigger
* an ops error.
*/
-static bool ops_cpu_valid(struct scx_sched *sch, s32 cpu, const char *where)
+bool scx_cpu_valid(struct scx_sched *sch, s32 cpu, const char *where)
{
if (__cpu_valid(cpu)) {
return true;
@@ -1677,7 +1677,7 @@ static struct scx_dispatch_q *find_dsq_for_dispatch(struct scx_sched *sch,
if ((dsq_id & SCX_DSQ_LOCAL_ON) == SCX_DSQ_LOCAL_ON) {
s32 cpu = dsq_id & SCX_DSQ_LOCAL_CPU_MASK;
- if (!ops_cpu_valid(sch, cpu, "in SCX_DSQ_LOCAL_ON dispatch verdict"))
+ if (!scx_cpu_valid(sch, cpu, "in SCX_DSQ_LOCAL_ON dispatch verdict"))
return find_global_dsq(sch, tcpu);
return &cpu_rq(cpu)->scx.local_dsq;
@@ -3260,7 +3260,7 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag
this_rq()->scx.in_select_cpu = false;
p->scx.selected_cpu = cpu;
*ddsp_taskp = NULL;
- if (ops_cpu_valid(sch, cpu, "from ops.select_cpu()"))
+ if (scx_cpu_valid(sch, cpu, "from ops.select_cpu()"))
return cpu;
else
return prev_cpu;
@@ -8679,7 +8679,7 @@ static void scx_kick_cpu(struct scx_sched *sch, s32 cpu, u64 flags)
struct rq *this_rq;
unsigned long irq_flags;
- if (!ops_cpu_valid(sch, cpu, NULL))
+ if (!scx_cpu_valid(sch, cpu, NULL))
return;
local_irq_save(irq_flags);
@@ -8775,7 +8775,7 @@ __bpf_kfunc s32 scx_bpf_dsq_nr_queued(u64 dsq_id)
} else if ((dsq_id & SCX_DSQ_LOCAL_ON) == SCX_DSQ_LOCAL_ON) {
s32 cpu = dsq_id & SCX_DSQ_LOCAL_CPU_MASK;
- if (ops_cpu_valid(sch, cpu, NULL)) {
+ if (scx_cpu_valid(sch, cpu, NULL)) {
ret = READ_ONCE(cpu_rq(cpu)->scx.local_dsq.nr);
goto out;
}
@@ -9164,7 +9164,7 @@ __bpf_kfunc u32 scx_bpf_cpuperf_cap(s32 cpu, const struct bpf_prog_aux *aux)
guard(rcu)();
sch = scx_prog_sched(aux);
- if (likely(sch) && ops_cpu_valid(sch, cpu, NULL))
+ if (likely(sch) && scx_cpu_valid(sch, cpu, NULL))
return arch_scale_cpu_capacity(cpu);
else
return SCX_CPUPERF_ONE;
@@ -9192,7 +9192,7 @@ __bpf_kfunc u32 scx_bpf_cpuperf_cur(s32 cpu, const struct bpf_prog_aux *aux)
guard(rcu)();
sch = scx_prog_sched(aux);
- if (likely(sch) && ops_cpu_valid(sch, cpu, NULL))
+ if (likely(sch) && scx_cpu_valid(sch, cpu, NULL))
return arch_scale_freq_capacity(cpu);
else
return SCX_CPUPERF_ONE;
@@ -9228,7 +9228,7 @@ __bpf_kfunc void scx_bpf_cpuperf_set(s32 cpu, u32 perf, const struct bpf_prog_au
return;
}
- if (ops_cpu_valid(sch, cpu, NULL)) {
+ if (scx_cpu_valid(sch, cpu, NULL)) {
struct rq *rq = cpu_rq(cpu), *locked_rq = scx_locked_rq();
struct rq_flags rf;
@@ -9341,7 +9341,7 @@ __bpf_kfunc struct rq *scx_bpf_cpu_rq(s32 cpu, const struct bpf_prog_aux *aux)
if (unlikely(!sch))
return NULL;
- if (!ops_cpu_valid(sch, cpu, NULL))
+ if (!scx_cpu_valid(sch, cpu, NULL))
return NULL;
if (!sch->warned_deprecated_rq) {
@@ -9398,7 +9398,7 @@ __bpf_kfunc struct task_struct *scx_bpf_cpu_curr(s32 cpu, const struct bpf_prog_
if (unlikely(!sch))
return NULL;
- if (!ops_cpu_valid(sch, cpu, NULL))
+ if (!scx_cpu_valid(sch, cpu, NULL))
return NULL;
return rcu_dereference(cpu_rq(cpu)->curr);
diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c
index c43d62d90e40..11d11ea6ca6b 100644
--- a/kernel/sched/ext_idle.c
+++ b/kernel/sched/ext_idle.c
@@ -917,7 +917,7 @@ static s32 select_cpu_from_kfunc(struct scx_sched *sch, struct task_struct *p,
bool we_locked = false;
s32 cpu;
- if (!ops_cpu_valid(sch, prev_cpu, NULL))
+ if (!scx_cpu_valid(sch, prev_cpu, NULL))
return -EINVAL;
if (!check_builtin_idle_enabled(sch))
@@ -975,7 +975,7 @@ __bpf_kfunc s32 scx_bpf_cpu_node(s32 cpu, const struct bpf_prog_aux *aux)
guard(rcu)();
sch = scx_prog_sched(aux);
- if (unlikely(!sch) || !ops_cpu_valid(sch, cpu, NULL))
+ if (unlikely(!sch) || !scx_cpu_valid(sch, cpu, NULL))
return NUMA_NO_NODE;
return cpu_to_node(cpu);
}
@@ -1257,7 +1257,7 @@ __bpf_kfunc bool scx_bpf_test_and_clear_cpu_idle(s32 cpu, const struct bpf_prog_
if (!check_builtin_idle_enabled(sch))
return false;
- if (!ops_cpu_valid(sch, cpu, NULL))
+ if (!scx_cpu_valid(sch, cpu, NULL))
return false;
return scx_idle_test_and_clear_cpu(cpu);
diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h
index c35098668fb1..5e3b79963d41 100644
--- a/kernel/sched/ext_internal.h
+++ b/kernel/sched/ext_internal.h
@@ -1350,6 +1350,8 @@ DECLARE_PER_CPU(struct rq *, scx_locked_rq_state);
int scx_kfunc_context_filter(const struct bpf_prog *prog, u32 kfunc_id);
+bool scx_cpu_valid(struct scx_sched *sch, s32 cpu, const char *where);
+
/*
* Return the rq currently locked from an scx callback, or NULL if no rq is
* locked.
--
2.53.0
^ permalink raw reply related [flat|nested] 3+ messages in thread* [PATCHSET v2 REPOST sched_ext/for-7.2] sched_ext: Topological CPU IDs and cid-form struct_ops
@ 2026-04-24 17:27 Tejun Heo
2026-04-24 17:27 ` [PATCH 02/17] sched_ext: Rename ops_cpu_valid() to scx_cpu_valid() and expose it Tejun Heo
0 siblings, 1 reply; 3+ messages in thread
From: Tejun Heo @ 2026-04-24 17:27 UTC (permalink / raw)
To: David Vernet, Andrea Righi, Changwoo Min
Cc: sched-ext, emil, linux-kernel, Cheng-Yang Chou, Zhao Mengmeng,
Tejun Heo
Hello,
Reposting v2 because the original send was not properly threaded -
each patch went out as a standalone top-level message. Content is
unchanged from the original v2.
Original v2: https://lore.kernel.org/r/20260424013220.2923402-1-tj@kernel.org
v2 of https://lore.kernel.org/r/20260421071945.3110084-1-tj@kernel.org
v2:
- Add ext-types.h first patch for early subsystem-wide type defs.
- cid: publish the cid tables with WRITE_ONCE / read with READ_ONCE;
document the visibility contract.
- cid-kfuncs: NULL-guard scx_bpf_this_cid / scx_bpf_task_cid for
TRACING/SYSCALL callers before any SCX sched has enabled.
- cid-struct-ops: use struct_size() for the set_cmask_scratch percpu
alloc; cluster __scx_is_cid_type disable with __scx_enabled disable
in scx_root_disable().
- cid-kfunc-filter: sync per-entry kfunc flags with each kfunc's
primary BTF_ID_FLAGS() declaration (Zhao). pahole intersects flags
across occurrences; omitting them drops the flags globally - the
visible symptom was KF_IMPLICIT_ARGS getting cleared on
scx_bpf_kick_cpu, leaking bpf_prog_aux into vmlinux.h.
- cmask: narrow to the helpers this series actually uses;
cmask_copy_from_kernel contract and runtime guard.
This patchset introduces topological CPU IDs (cids) - dense,
topology-ordered cpu identifiers - and an alternative cid-form struct_ops
type that lets BPF schedulers operate in cid space directly.
Key pieces:
- cid space: scx_cid_init() walks nodes * LLCs * cores * threads and packs
a dense cid mapping. The mapping can be overridden via
scx_bpf_cid_override(). See "Topological CPU IDs" in ext_cid.h for the
model.
- cmask: a base-windowed bitmap over cid space. Kernel and BPF helpers with
identical semantics. Used by scx_qmap for per-task affinity and idle-cid
tracking; meant to be the substrate for sub-sched cid allocation.
- bpf_sched_ext_ops_cid: a parallel struct_ops type whose callbacks take
cids/cmasks instead of cpus/cpumasks. Kernel translates at the boundary
via scx_cpu_arg() / scx_cpu_ret(); the two struct types share offsets up
through @priv (verified by BUILD_BUG_ON) so the union view in scx_sched
works without function-pointer casts. Sub-sched support is tied to
cid-form: validate_ops() rejects cpu-form sub-scheds and cpu-form roots
that expose sub_attach / sub_detach.
- cid-form kfuncs: scx_bpf_kick_cid, scx_bpf_cidperf_{cap,cur,set},
scx_bpf_cid_curr, scx_bpf_task_cid, scx_bpf_this_cid,
scx_bpf_nr_{cids,online_cids}, scx_bpf_cid_to_cpu, scx_bpf_cpu_to_cid.
A cid-form program may not call cpu-only kfuncs (enforced at verifier
load via scx_kfunc_context_filter); the reverse is intentionally
permissive to ease migration.
- scx_qmap port: scx_qmap is converted to cid-form. It uses the cmask-based
idle picker, per-task cid-space cpus_allowed, and cid-form kfuncs
throughout. Sub-sched dispatching via scx_bpf_sub_dispatch() continues to
work.
v2 re-tested on the 16-cpu QEMU: cid-form scx_qmap, cpu-form scx_simple,
cid<->cpu cycling, scx_qmap under stress-ng, hotplug auto-restart, and
sub-sched (root scx_qmap + cgroup-scoped scx_qmap child). Clean.
Based on sched_ext/for-7.2 (c2929bc21dce).
0001-sched_ext-Add-ext_types.h-for-early-subsystem-wide-d.patch
0002-sched_ext-Rename-ops_cpu_valid-to-scx_cpu_valid-and-.patch
0003-sched_ext-Move-scx_exit-scx_error-and-friends-to-ext.patch
0004-sched_ext-Shift-scx_kick_cpu-validity-check-to-scx_b.patch
0005-sched_ext-Relocate-cpu_acquire-cpu_release-to-end-of.patch
0006-sched_ext-Make-scx_enable-take-scx_enable_cmd.patch
0007-sched_ext-Add-topological-CPU-IDs-cids.patch
0008-sched_ext-Add-scx_bpf_cid_override-kfunc.patch
0009-tools-sched_ext-Add-struct_size-helpers-to-common.bp.patch
0010-sched_ext-Add-cmask-a-base-windowed-bitmap-over-cid-.patch
0011-sched_ext-Add-cid-form-kfunc-wrappers-alongside-cpu-.patch
0012-sched_ext-Add-bpf_sched_ext_ops_cid-struct_ops-type.patch
0013-sched_ext-Forbid-cpu-form-kfuncs-from-cid-form-sched.patch
0014-tools-sched_ext-scx_qmap-Restart-on-hotplug-instead-.patch
0015-tools-sched_ext-scx_qmap-Add-cmask-based-idle-tracki.patch
0016-tools-sched_ext-scx_qmap-Port-to-cid-form-struct_ops.patch
0017-sched_ext-Require-cid-form-struct_ops-for-sub-sched-.patch
Git tree: git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext.git scx-cid-v2
kernel/sched/build_policy.c | 2 +
kernel/sched/ext.c | 650 +++++++++++++++++++++++++----
kernel/sched/ext_cid.c | 417 ++++++++++++++++++++
kernel/sched/ext_cid.h | 164 ++++++++
kernel/sched/ext_idle.c | 8 +-
kernel/sched/ext_internal.h | 203 +++++++---
kernel/sched/ext_types.h | 104 +++++
tools/sched_ext/include/scx/cid.bpf.h | 597 ++++++++++++++++++++++++++++
tools/sched_ext/include/scx/common.bpf.h | 23 ++
tools/sched_ext/include/scx/compat.bpf.h | 24 ++
tools/sched_ext/scx_qmap.bpf.c | 306 ++++++++-------
tools/sched_ext/scx_qmap.c | 25 +-
tools/sched_ext/scx_qmap.h | 2 +-
13 files changed, 2240 insertions(+), 285 deletions(-)
--
tejun
^ permalink raw reply [flat|nested] 3+ messages in thread* [PATCH 02/17] sched_ext: Rename ops_cpu_valid() to scx_cpu_valid() and expose it
2026-04-24 17:27 [PATCHSET v2 REPOST sched_ext/for-7.2] sched_ext: Topological CPU IDs and cid-form struct_ops Tejun Heo
@ 2026-04-24 17:27 ` Tejun Heo
0 siblings, 0 replies; 3+ messages in thread
From: Tejun Heo @ 2026-04-24 17:27 UTC (permalink / raw)
To: David Vernet, Andrea Righi, Changwoo Min
Cc: sched-ext, emil, linux-kernel, Cheng-Yang Chou, Zhao Mengmeng,
Tejun Heo
Rename the static ext.c helper and declare it in ext_internal.h so
ext_idle.c and the upcoming cid code can call it directly instead of
relying on build_policy.c textual inclusion.
Pure rename and visibility change.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Cheng-Yang Chou <yphbchou0911@gmail.com>
---
kernel/sched/ext.c | 22 +++++++++++-----------
kernel/sched/ext_idle.c | 6 +++---
kernel/sched/ext_internal.h | 2 ++
3 files changed, 16 insertions(+), 14 deletions(-)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 0ba12449f0c6..1d6613dc4d3b 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -1055,7 +1055,7 @@ static inline bool __cpu_valid(s32 cpu)
}
/**
- * ops_cpu_valid - Verify a cpu number, to be used on ops input args
+ * scx_cpu_valid - Verify a cpu number, to be used on ops input args
* @sch: scx_sched to abort on error
* @cpu: cpu number which came from a BPF ops
* @where: extra information reported on error
@@ -1064,7 +1064,7 @@ static inline bool __cpu_valid(s32 cpu)
* Verify that it is in range and one of the possible cpus. If invalid, trigger
* an ops error.
*/
-static bool ops_cpu_valid(struct scx_sched *sch, s32 cpu, const char *where)
+bool scx_cpu_valid(struct scx_sched *sch, s32 cpu, const char *where)
{
if (__cpu_valid(cpu)) {
return true;
@@ -1677,7 +1677,7 @@ static struct scx_dispatch_q *find_dsq_for_dispatch(struct scx_sched *sch,
if ((dsq_id & SCX_DSQ_LOCAL_ON) == SCX_DSQ_LOCAL_ON) {
s32 cpu = dsq_id & SCX_DSQ_LOCAL_CPU_MASK;
- if (!ops_cpu_valid(sch, cpu, "in SCX_DSQ_LOCAL_ON dispatch verdict"))
+ if (!scx_cpu_valid(sch, cpu, "in SCX_DSQ_LOCAL_ON dispatch verdict"))
return find_global_dsq(sch, tcpu);
return &cpu_rq(cpu)->scx.local_dsq;
@@ -3260,7 +3260,7 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag
this_rq()->scx.in_select_cpu = false;
p->scx.selected_cpu = cpu;
*ddsp_taskp = NULL;
- if (ops_cpu_valid(sch, cpu, "from ops.select_cpu()"))
+ if (scx_cpu_valid(sch, cpu, "from ops.select_cpu()"))
return cpu;
else
return prev_cpu;
@@ -8679,7 +8679,7 @@ static void scx_kick_cpu(struct scx_sched *sch, s32 cpu, u64 flags)
struct rq *this_rq;
unsigned long irq_flags;
- if (!ops_cpu_valid(sch, cpu, NULL))
+ if (!scx_cpu_valid(sch, cpu, NULL))
return;
local_irq_save(irq_flags);
@@ -8775,7 +8775,7 @@ __bpf_kfunc s32 scx_bpf_dsq_nr_queued(u64 dsq_id)
} else if ((dsq_id & SCX_DSQ_LOCAL_ON) == SCX_DSQ_LOCAL_ON) {
s32 cpu = dsq_id & SCX_DSQ_LOCAL_CPU_MASK;
- if (ops_cpu_valid(sch, cpu, NULL)) {
+ if (scx_cpu_valid(sch, cpu, NULL)) {
ret = READ_ONCE(cpu_rq(cpu)->scx.local_dsq.nr);
goto out;
}
@@ -9164,7 +9164,7 @@ __bpf_kfunc u32 scx_bpf_cpuperf_cap(s32 cpu, const struct bpf_prog_aux *aux)
guard(rcu)();
sch = scx_prog_sched(aux);
- if (likely(sch) && ops_cpu_valid(sch, cpu, NULL))
+ if (likely(sch) && scx_cpu_valid(sch, cpu, NULL))
return arch_scale_cpu_capacity(cpu);
else
return SCX_CPUPERF_ONE;
@@ -9192,7 +9192,7 @@ __bpf_kfunc u32 scx_bpf_cpuperf_cur(s32 cpu, const struct bpf_prog_aux *aux)
guard(rcu)();
sch = scx_prog_sched(aux);
- if (likely(sch) && ops_cpu_valid(sch, cpu, NULL))
+ if (likely(sch) && scx_cpu_valid(sch, cpu, NULL))
return arch_scale_freq_capacity(cpu);
else
return SCX_CPUPERF_ONE;
@@ -9228,7 +9228,7 @@ __bpf_kfunc void scx_bpf_cpuperf_set(s32 cpu, u32 perf, const struct bpf_prog_au
return;
}
- if (ops_cpu_valid(sch, cpu, NULL)) {
+ if (scx_cpu_valid(sch, cpu, NULL)) {
struct rq *rq = cpu_rq(cpu), *locked_rq = scx_locked_rq();
struct rq_flags rf;
@@ -9341,7 +9341,7 @@ __bpf_kfunc struct rq *scx_bpf_cpu_rq(s32 cpu, const struct bpf_prog_aux *aux)
if (unlikely(!sch))
return NULL;
- if (!ops_cpu_valid(sch, cpu, NULL))
+ if (!scx_cpu_valid(sch, cpu, NULL))
return NULL;
if (!sch->warned_deprecated_rq) {
@@ -9398,7 +9398,7 @@ __bpf_kfunc struct task_struct *scx_bpf_cpu_curr(s32 cpu, const struct bpf_prog_
if (unlikely(!sch))
return NULL;
- if (!ops_cpu_valid(sch, cpu, NULL))
+ if (!scx_cpu_valid(sch, cpu, NULL))
return NULL;
return rcu_dereference(cpu_rq(cpu)->curr);
diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c
index c43d62d90e40..11d11ea6ca6b 100644
--- a/kernel/sched/ext_idle.c
+++ b/kernel/sched/ext_idle.c
@@ -917,7 +917,7 @@ static s32 select_cpu_from_kfunc(struct scx_sched *sch, struct task_struct *p,
bool we_locked = false;
s32 cpu;
- if (!ops_cpu_valid(sch, prev_cpu, NULL))
+ if (!scx_cpu_valid(sch, prev_cpu, NULL))
return -EINVAL;
if (!check_builtin_idle_enabled(sch))
@@ -975,7 +975,7 @@ __bpf_kfunc s32 scx_bpf_cpu_node(s32 cpu, const struct bpf_prog_aux *aux)
guard(rcu)();
sch = scx_prog_sched(aux);
- if (unlikely(!sch) || !ops_cpu_valid(sch, cpu, NULL))
+ if (unlikely(!sch) || !scx_cpu_valid(sch, cpu, NULL))
return NUMA_NO_NODE;
return cpu_to_node(cpu);
}
@@ -1257,7 +1257,7 @@ __bpf_kfunc bool scx_bpf_test_and_clear_cpu_idle(s32 cpu, const struct bpf_prog_
if (!check_builtin_idle_enabled(sch))
return false;
- if (!ops_cpu_valid(sch, cpu, NULL))
+ if (!scx_cpu_valid(sch, cpu, NULL))
return false;
return scx_idle_test_and_clear_cpu(cpu);
diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h
index c35098668fb1..5e3b79963d41 100644
--- a/kernel/sched/ext_internal.h
+++ b/kernel/sched/ext_internal.h
@@ -1350,6 +1350,8 @@ DECLARE_PER_CPU(struct rq *, scx_locked_rq_state);
int scx_kfunc_context_filter(const struct bpf_prog *prog, u32 kfunc_id);
+bool scx_cpu_valid(struct scx_sched *sch, s32 cpu, const char *where);
+
/*
* Return the rq currently locked from an scx callback, or NULL if no rq is
* locked.
--
2.53.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCHSET v3 sched_ext/for-7.2] sched_ext: Topological CPU IDs and cid-form struct_ops
@ 2026-04-28 20:35 Tejun Heo
2026-04-28 20:35 ` [PATCH 02/17] sched_ext: Rename ops_cpu_valid() to scx_cpu_valid() and expose it Tejun Heo
0 siblings, 1 reply; 3+ messages in thread
From: Tejun Heo @ 2026-04-28 20:35 UTC (permalink / raw)
To: David Vernet, Andrea Righi, Changwoo Min
Cc: sched-ext, Emil Tsalapatis, linux-kernel, Tejun Heo
Hello,
v3 (all from the Sashiko AI review at
https://sashiko.dev/#/patchset/20260424172721.3458520-1-tj%40kernel.org):
- cid: drop leaked cpus_read_lock() on scx_cid_init() failure;
BUILD_BUG_ON tightened to NR_CPUS<=8192 to match the BPF cmask
helpers' CMASK_MAX_WORDS coverage.
- bpf-struct-size: use offsetof() in struct_size() to match the
kernel <linux/overflow.h> macro semantics (no inflation from
trailing struct padding).
- cmask: cmask_copy_from_kernel() validates src->base==0 via
probe-read; nr_bits check is bit-level rather than rounded-up
word-count.
- cid-qmap-idle: qmap_init() refuses to load when scx_bpf_nr_cids()
exceeds SCX_QMAP_MAX_CPUS; the task_ctx flex array would otherwise
overflow into the next slab entry.
v2: https://lore.kernel.org/r/20260424172721.3458520-1-tj@kernel.org
v1: https://lore.kernel.org/r/20260421071945.3110084-1-tj@kernel.org
This patchset introduces topological CPU IDs (cids) - dense,
topology-ordered cpu identifiers - and an alternative cid-form struct_ops
type that lets BPF schedulers operate in cid space directly.
Key pieces:
- cid space: scx_cid_init() walks nodes * LLCs * cores * threads and packs
a dense cid mapping. The mapping can be overridden via
scx_bpf_cid_override(). See "Topological CPU IDs" in ext_cid.h for the
model.
- cmask: a base-windowed bitmap over cid space. Kernel and BPF helpers with
identical semantics. Used by scx_qmap for per-task affinity and idle-cid
tracking; meant to be the substrate for sub-sched cid allocation.
- bpf_sched_ext_ops_cid: a parallel struct_ops type whose callbacks take
cids/cmasks instead of cpus/cpumasks. Kernel translates at the boundary
via scx_cpu_arg() / scx_cpu_ret(); the two struct types share offsets up
through @priv (verified by BUILD_BUG_ON) so the union view in scx_sched
works without function-pointer casts. Sub-sched support is tied to
cid-form: validate_ops() rejects cpu-form sub-scheds and cpu-form roots
that expose sub_attach / sub_detach.
- cid-form kfuncs: scx_bpf_kick_cid, scx_bpf_cidperf_{cap,cur,set},
scx_bpf_cid_curr, scx_bpf_task_cid, scx_bpf_this_cid,
scx_bpf_nr_{cids,online_cids}, scx_bpf_cid_to_cpu, scx_bpf_cpu_to_cid.
A cid-form program may not call cpu-only kfuncs (enforced at verifier
load via scx_kfunc_context_filter); the reverse is intentionally
permissive to ease migration.
- scx_qmap port: scx_qmap is converted to cid-form. It uses the cmask-based
idle picker, per-task cid-space cpus_allowed, and cid-form kfuncs
throughout. Sub-sched dispatching via scx_bpf_sub_dispatch() continues to
work.
v3 re-tested on the 16-cpu QEMU: cid-form scx_qmap under stress-ng plus
reload cycles, hotplug auto-restart, and sub-sched (root scx_qmap +
cgroup-scoped scx_qmap child). Clean.
Based on sched_ext/for-7.2 (4939721aad2e).
0001-sched_ext-Add-ext_types.h-for-early-subsystem-wide-d.patch
0002-sched_ext-Rename-ops_cpu_valid-to-scx_cpu_valid-and-.patch
0003-sched_ext-Move-scx_exit-scx_error-and-friends-to-ext.patch
0004-sched_ext-Shift-scx_kick_cpu-validity-check-to-scx_b.patch
0005-sched_ext-Relocate-cpu_acquire-cpu_release-to-end-of.patch
0006-sched_ext-Make-scx_enable-take-scx_enable_cmd.patch
0007-sched_ext-Add-topological-CPU-IDs-cids.patch
0008-sched_ext-Add-scx_bpf_cid_override-kfunc.patch
0009-tools-sched_ext-Add-struct_size-helpers-to-common.bp.patch
0010-sched_ext-Add-cmask-a-base-windowed-bitmap-over-cid-.patch
0011-sched_ext-Add-cid-form-kfunc-wrappers-alongside-cpu-.patch
0012-sched_ext-Add-bpf_sched_ext_ops_cid-struct_ops-type.patch
0013-sched_ext-Forbid-cpu-form-kfuncs-from-cid-form-sched.patch
0014-tools-sched_ext-scx_qmap-Restart-on-hotplug-instead-.patch
0015-tools-sched_ext-scx_qmap-Add-cmask-based-idle-tracki.patch
0016-tools-sched_ext-scx_qmap-Port-to-cid-form-struct_ops.patch
0017-sched_ext-Require-cid-form-struct_ops-for-sub-sched-.patch
Git tree: git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext.git scx-cid-v3
kernel/sched/build_policy.c | 3 +
kernel/sched/ext.c | 651 ++++++++++++++++++++++++++----
kernel/sched/ext_cid.c | 409 +++++++++++++++++++
kernel/sched/ext_cid.h | 164 ++++++++
kernel/sched/ext_idle.c | 8 +-
kernel/sched/ext_internal.h | 205 +++++++---
kernel/sched/ext_types.h | 104 +++++
tools/sched_ext/include/scx/cid.bpf.h | 667 +++++++++++++++++++++++++++++++
tools/sched_ext/include/scx/common.bpf.h | 23 ++
tools/sched_ext/include/scx/compat.bpf.h | 24 ++
tools/sched_ext/scx_qmap.bpf.c | 346 +++++++++-------
tools/sched_ext/scx_qmap.c | 70 +++-
tools/sched_ext/scx_qmap.h | 2 +-
13 files changed, 2391 insertions(+), 285 deletions(-)
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 3+ messages in thread* [PATCH 02/17] sched_ext: Rename ops_cpu_valid() to scx_cpu_valid() and expose it
2026-04-28 20:35 [PATCHSET v3 sched_ext/for-7.2] sched_ext: Topological CPU IDs and cid-form struct_ops Tejun Heo
@ 2026-04-28 20:35 ` Tejun Heo
0 siblings, 0 replies; 3+ messages in thread
From: Tejun Heo @ 2026-04-28 20:35 UTC (permalink / raw)
To: David Vernet, Andrea Righi, Changwoo Min
Cc: sched-ext, Emil Tsalapatis, linux-kernel, Tejun Heo,
Cheng-Yang Chou
Rename the static ext.c helper and declare it in ext_internal.h so
ext_idle.c and the upcoming cid code can call it directly instead of
relying on build_policy.c textual inclusion.
Pure rename and visibility change.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Cheng-Yang Chou <yphbchou0911@gmail.com>
---
kernel/sched/ext.c | 22 +++++++++++-----------
kernel/sched/ext_idle.c | 6 +++---
kernel/sched/ext_internal.h | 2 ++
3 files changed, 16 insertions(+), 14 deletions(-)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 11893f00be06..980231c547ec 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -1062,7 +1062,7 @@ static inline bool __cpu_valid(s32 cpu)
}
/**
- * ops_cpu_valid - Verify a cpu number, to be used on ops input args
+ * scx_cpu_valid - Verify a cpu number, to be used on ops input args
* @sch: scx_sched to abort on error
* @cpu: cpu number which came from a BPF ops
* @where: extra information reported on error
@@ -1071,7 +1071,7 @@ static inline bool __cpu_valid(s32 cpu)
* Verify that it is in range and one of the possible cpus. If invalid, trigger
* an ops error.
*/
-static bool ops_cpu_valid(struct scx_sched *sch, s32 cpu, const char *where)
+bool scx_cpu_valid(struct scx_sched *sch, s32 cpu, const char *where)
{
if (__cpu_valid(cpu)) {
return true;
@@ -1686,7 +1686,7 @@ static struct scx_dispatch_q *find_dsq_for_dispatch(struct scx_sched *sch,
if ((dsq_id & SCX_DSQ_LOCAL_ON) == SCX_DSQ_LOCAL_ON) {
s32 cpu = dsq_id & SCX_DSQ_LOCAL_CPU_MASK;
- if (!ops_cpu_valid(sch, cpu, "in SCX_DSQ_LOCAL_ON dispatch verdict"))
+ if (!scx_cpu_valid(sch, cpu, "in SCX_DSQ_LOCAL_ON dispatch verdict"))
return find_global_dsq(sch, tcpu);
return &cpu_rq(cpu)->scx.local_dsq;
@@ -3269,7 +3269,7 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag
this_rq()->scx.in_select_cpu = false;
p->scx.selected_cpu = cpu;
*ddsp_taskp = NULL;
- if (ops_cpu_valid(sch, cpu, "from ops.select_cpu()"))
+ if (scx_cpu_valid(sch, cpu, "from ops.select_cpu()"))
return cpu;
else
return prev_cpu;
@@ -8791,7 +8791,7 @@ static void scx_kick_cpu(struct scx_sched *sch, s32 cpu, u64 flags)
struct rq *this_rq;
unsigned long irq_flags;
- if (!ops_cpu_valid(sch, cpu, NULL))
+ if (!scx_cpu_valid(sch, cpu, NULL))
return;
local_irq_save(irq_flags);
@@ -8888,7 +8888,7 @@ __bpf_kfunc s32 scx_bpf_dsq_nr_queued(u64 dsq_id, const struct bpf_prog_aux *aux
} else if ((dsq_id & SCX_DSQ_LOCAL_ON) == SCX_DSQ_LOCAL_ON) {
s32 cpu = dsq_id & SCX_DSQ_LOCAL_CPU_MASK;
- if (ops_cpu_valid(sch, cpu, NULL)) {
+ if (scx_cpu_valid(sch, cpu, NULL)) {
ret = READ_ONCE(cpu_rq(cpu)->scx.local_dsq.nr);
goto out;
}
@@ -9277,7 +9277,7 @@ __bpf_kfunc u32 scx_bpf_cpuperf_cap(s32 cpu, const struct bpf_prog_aux *aux)
guard(rcu)();
sch = scx_prog_sched(aux);
- if (likely(sch) && ops_cpu_valid(sch, cpu, NULL))
+ if (likely(sch) && scx_cpu_valid(sch, cpu, NULL))
return arch_scale_cpu_capacity(cpu);
else
return SCX_CPUPERF_ONE;
@@ -9305,7 +9305,7 @@ __bpf_kfunc u32 scx_bpf_cpuperf_cur(s32 cpu, const struct bpf_prog_aux *aux)
guard(rcu)();
sch = scx_prog_sched(aux);
- if (likely(sch) && ops_cpu_valid(sch, cpu, NULL))
+ if (likely(sch) && scx_cpu_valid(sch, cpu, NULL))
return arch_scale_freq_capacity(cpu);
else
return SCX_CPUPERF_ONE;
@@ -9341,7 +9341,7 @@ __bpf_kfunc void scx_bpf_cpuperf_set(s32 cpu, u32 perf, const struct bpf_prog_au
return;
}
- if (ops_cpu_valid(sch, cpu, NULL)) {
+ if (scx_cpu_valid(sch, cpu, NULL)) {
struct rq *rq = cpu_rq(cpu), *locked_rq = scx_locked_rq();
struct rq_flags rf;
@@ -9454,7 +9454,7 @@ __bpf_kfunc struct rq *scx_bpf_cpu_rq(s32 cpu, const struct bpf_prog_aux *aux)
if (unlikely(!sch))
return NULL;
- if (!ops_cpu_valid(sch, cpu, NULL))
+ if (!scx_cpu_valid(sch, cpu, NULL))
return NULL;
if (!sch->warned_deprecated_rq) {
@@ -9511,7 +9511,7 @@ __bpf_kfunc struct task_struct *scx_bpf_cpu_curr(s32 cpu, const struct bpf_prog_
if (unlikely(!sch))
return NULL;
- if (!ops_cpu_valid(sch, cpu, NULL))
+ if (!scx_cpu_valid(sch, cpu, NULL))
return NULL;
return rcu_dereference(cpu_rq(cpu)->curr);
diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c
index f0f4d9500997..860c4634f60e 100644
--- a/kernel/sched/ext_idle.c
+++ b/kernel/sched/ext_idle.c
@@ -916,7 +916,7 @@ static s32 select_cpu_from_kfunc(struct scx_sched *sch, struct task_struct *p,
bool we_locked = false;
s32 cpu;
- if (!ops_cpu_valid(sch, prev_cpu, NULL))
+ if (!scx_cpu_valid(sch, prev_cpu, NULL))
return -EINVAL;
if (!check_builtin_idle_enabled(sch))
@@ -989,7 +989,7 @@ __bpf_kfunc s32 scx_bpf_cpu_node(s32 cpu, const struct bpf_prog_aux *aux)
guard(rcu)();
sch = scx_prog_sched(aux);
- if (unlikely(!sch) || !ops_cpu_valid(sch, cpu, NULL))
+ if (unlikely(!sch) || !scx_cpu_valid(sch, cpu, NULL))
return NUMA_NO_NODE;
return cpu_to_node(cpu);
}
@@ -1271,7 +1271,7 @@ __bpf_kfunc bool scx_bpf_test_and_clear_cpu_idle(s32 cpu, const struct bpf_prog_
if (!check_builtin_idle_enabled(sch))
return false;
- if (!ops_cpu_valid(sch, cpu, NULL))
+ if (!scx_cpu_valid(sch, cpu, NULL))
return false;
return scx_idle_test_and_clear_cpu(cpu);
diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h
index 1b2ea6fa9fd6..f59cd58b8175 100644
--- a/kernel/sched/ext_internal.h
+++ b/kernel/sched/ext_internal.h
@@ -1352,6 +1352,8 @@ DECLARE_PER_CPU(struct rq *, scx_locked_rq_state);
int scx_kfunc_context_filter(const struct bpf_prog *prog, u32 kfunc_id);
+bool scx_cpu_valid(struct scx_sched *sch, s32 cpu, const char *where);
+
/*
* Return the rq currently locked from an scx callback, or NULL if no rq is
* locked.
--
2.54.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
end of thread, other threads:[~2026-04-28 20:35 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-24 1:32 [PATCH 02/17] sched_ext: Rename ops_cpu_valid() to scx_cpu_valid() and expose it Tejun Heo
-- strict thread matches above, loose matches on Subject: below --
2026-04-24 17:27 [PATCHSET v2 REPOST sched_ext/for-7.2] sched_ext: Topological CPU IDs and cid-form struct_ops Tejun Heo
2026-04-24 17:27 ` [PATCH 02/17] sched_ext: Rename ops_cpu_valid() to scx_cpu_valid() and expose it Tejun Heo
2026-04-28 20:35 [PATCHSET v3 sched_ext/for-7.2] sched_ext: Topological CPU IDs and cid-form struct_ops Tejun Heo
2026-04-28 20:35 ` [PATCH 02/17] sched_ext: Rename ops_cpu_valid() to scx_cpu_valid() and expose it Tejun Heo
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox