* [PATCH 08/17] sched_ext: Add scx_bpf_cid_override() kfunc
@ 2026-04-24 1:32 Tejun Heo
0 siblings, 0 replies; 2+ messages in thread
From: Tejun Heo @ 2026-04-24 1:32 UTC (permalink / raw)
To: David Vernet, Andrea Righi, Changwoo Min
Cc: sched-ext, emil, linux-kernel, Cheng-Yang Chou, Zhao Mengmeng,
Tejun Heo
The auto-probed cid mapping reflects the kernel's view of topology
(node -> LLC -> core), but a BPF scheduler may want a different layout -
to align cid slices with its own partitioning, or to work around how the
kernel reports a particular machine.
Add scx_bpf_cid_override(), callable from ops.init() of the root
scheduler. It validates the caller-supplied cpu->cid array and replaces
the in-place mapping; topo info is invalidated. A compat.bpf.h wrapper
silently no-ops on kernels that lack the kfunc.
A new SCX_KF_ALLOW_INIT bit in the kfunc context filter restricts the
kfunc to ops.init() at verifier load time.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Cheng-Yang Chou <yphbchou0911@gmail.com>
---
kernel/sched/ext.c | 16 +++--
kernel/sched/ext_cid.c | 75 +++++++++++++++++++++++-
kernel/sched/ext_cid.h | 1 +
tools/sched_ext/include/scx/compat.bpf.h | 12 ++++
4 files changed, 97 insertions(+), 7 deletions(-)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index e05d35e8c261..271399b9faa4 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -9641,10 +9641,11 @@ static const struct btf_kfunc_id_set scx_kfunc_set_any = {
*/
enum scx_kf_allow_flags {
SCX_KF_ALLOW_UNLOCKED = 1 << 0,
- SCX_KF_ALLOW_CPU_RELEASE = 1 << 1,
- SCX_KF_ALLOW_DISPATCH = 1 << 2,
- SCX_KF_ALLOW_ENQUEUE = 1 << 3,
- SCX_KF_ALLOW_SELECT_CPU = 1 << 4,
+ SCX_KF_ALLOW_INIT = 1 << 1,
+ SCX_KF_ALLOW_CPU_RELEASE = 1 << 2,
+ SCX_KF_ALLOW_DISPATCH = 1 << 3,
+ SCX_KF_ALLOW_ENQUEUE = 1 << 4,
+ SCX_KF_ALLOW_SELECT_CPU = 1 << 5,
};
/*
@@ -9672,7 +9673,7 @@ static const u32 scx_kf_allow_flags[] = {
[SCX_OP_IDX(sub_detach)] = SCX_KF_ALLOW_UNLOCKED,
[SCX_OP_IDX(cpu_online)] = SCX_KF_ALLOW_UNLOCKED,
[SCX_OP_IDX(cpu_offline)] = SCX_KF_ALLOW_UNLOCKED,
- [SCX_OP_IDX(init)] = SCX_KF_ALLOW_UNLOCKED,
+ [SCX_OP_IDX(init)] = SCX_KF_ALLOW_UNLOCKED | SCX_KF_ALLOW_INIT,
[SCX_OP_IDX(exit)] = SCX_KF_ALLOW_UNLOCKED,
};
@@ -9687,6 +9688,7 @@ static const u32 scx_kf_allow_flags[] = {
int scx_kfunc_context_filter(const struct bpf_prog *prog, u32 kfunc_id)
{
bool in_unlocked = btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id);
+ bool in_init = btf_id_set8_contains(&scx_kfunc_ids_init, kfunc_id);
bool in_select_cpu = btf_id_set8_contains(&scx_kfunc_ids_select_cpu, kfunc_id);
bool in_enqueue = btf_id_set8_contains(&scx_kfunc_ids_enqueue_dispatch, kfunc_id);
bool in_dispatch = btf_id_set8_contains(&scx_kfunc_ids_dispatch, kfunc_id);
@@ -9696,7 +9698,7 @@ int scx_kfunc_context_filter(const struct bpf_prog *prog, u32 kfunc_id)
u32 moff, flags;
/* Not an SCX kfunc - allow. */
- if (!(in_unlocked || in_select_cpu || in_enqueue || in_dispatch ||
+ if (!(in_unlocked || in_init || in_select_cpu || in_enqueue || in_dispatch ||
in_cpu_release || in_idle || in_any))
return 0;
@@ -9732,6 +9734,8 @@ int scx_kfunc_context_filter(const struct bpf_prog *prog, u32 kfunc_id)
if ((flags & SCX_KF_ALLOW_UNLOCKED) && in_unlocked)
return 0;
+ if ((flags & SCX_KF_ALLOW_INIT) && in_init)
+ return 0;
if ((flags & SCX_KF_ALLOW_CPU_RELEASE) && in_cpu_release)
return 0;
if ((flags & SCX_KF_ALLOW_DISPATCH) && in_dispatch)
diff --git a/kernel/sched/ext_cid.c b/kernel/sched/ext_cid.c
index 26b705b6e20d..4c356e31394c 100644
--- a/kernel/sched/ext_cid.c
+++ b/kernel/sched/ext_cid.c
@@ -212,6 +212,68 @@ s32 scx_cid_init(struct scx_sched *sch)
__bpf_kfunc_start_defs();
+/**
+ * scx_bpf_cid_override - Install an explicit cpu->cid mapping
+ * @cpu_to_cid: array of nr_cpu_ids s32 entries (cid for each cpu)
+ * @cpu_to_cid__sz: must be nr_cpu_ids * sizeof(s32) bytes
+ * @aux: implicit BPF argument to access bpf_prog_aux hidden from BPF progs
+ *
+ * May only be called from ops.init() of the root scheduler. Replace the
+ * topology-probed cid mapping with the caller-provided one. Each possible cpu
+ * must map to a unique cid in [0, num_possible_cpus()). Topo info is cleared.
+ * On invalid input, trigger scx_error() to abort the scheduler.
+ */
+__bpf_kfunc void scx_bpf_cid_override(const s32 *cpu_to_cid, u32 cpu_to_cid__sz,
+ const struct bpf_prog_aux *aux)
+{
+ cpumask_var_t seen __free(free_cpumask_var) = CPUMASK_VAR_NULL;
+ struct scx_sched *sch;
+ bool alloced;
+ s32 cpu, cid;
+
+ /* GFP_KERNEL alloc must happen before the rcu read section */
+ alloced = zalloc_cpumask_var(&seen, GFP_KERNEL);
+
+ guard(rcu)();
+
+ sch = scx_prog_sched(aux);
+ if (unlikely(!sch))
+ return;
+
+ if (!alloced) {
+ scx_error(sch, "scx_bpf_cid_override: failed to allocate cpumask");
+ return;
+ }
+
+ if (scx_parent(sch)) {
+ scx_error(sch, "scx_bpf_cid_override() only allowed from root sched");
+ return;
+ }
+
+ if (cpu_to_cid__sz != nr_cpu_ids * sizeof(s32)) {
+ scx_error(sch, "scx_bpf_cid_override: expected %zu bytes, got %u",
+ nr_cpu_ids * sizeof(s32), cpu_to_cid__sz);
+ return;
+ }
+
+ for_each_possible_cpu(cpu) {
+ s32 c = cpu_to_cid[cpu];
+
+ if (!cid_valid(sch, c))
+ return;
+ if (cpumask_test_and_set_cpu(c, seen)) {
+ scx_error(sch, "cid %d assigned to multiple cpus", c);
+ return;
+ }
+ scx_cpu_to_cid_tbl[cpu] = c;
+ scx_cid_to_cpu_tbl[c] = cpu;
+ }
+
+ /* Invalidate stale topo info - the override carries no topology. */
+ for (cid = 0; cid < num_possible_cpus(); cid++)
+ scx_cid_topo[cid] = SCX_CID_TOPO_NEG;
+}
+
/**
* scx_bpf_cid_to_cpu - Return the raw CPU id for @cid
* @cid: cid to look up
@@ -284,6 +346,16 @@ __bpf_kfunc void scx_bpf_cid_topo(s32 cid, struct scx_cid_topo *out__uninit,
__bpf_kfunc_end_defs();
+BTF_KFUNCS_START(scx_kfunc_ids_init)
+BTF_ID_FLAGS(func, scx_bpf_cid_override, KF_IMPLICIT_ARGS | KF_SLEEPABLE)
+BTF_KFUNCS_END(scx_kfunc_ids_init)
+
+static const struct btf_kfunc_id_set scx_kfunc_set_init = {
+ .owner = THIS_MODULE,
+ .set = &scx_kfunc_ids_init,
+ .filter = scx_kfunc_context_filter,
+};
+
BTF_KFUNCS_START(scx_kfunc_ids_cid)
BTF_ID_FLAGS(func, scx_bpf_cid_to_cpu, KF_IMPLICIT_ARGS)
BTF_ID_FLAGS(func, scx_bpf_cpu_to_cid, KF_IMPLICIT_ARGS)
@@ -297,7 +369,8 @@ static const struct btf_kfunc_id_set scx_kfunc_set_cid = {
int scx_cid_kfunc_init(void)
{
- return register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_cid) ?:
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_init) ?:
+ register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_cid) ?:
register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &scx_kfunc_set_cid) ?:
register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &scx_kfunc_set_cid);
}
diff --git a/kernel/sched/ext_cid.h b/kernel/sched/ext_cid.h
index 1dbe8262ccdd..52edb66b53fd 100644
--- a/kernel/sched/ext_cid.h
+++ b/kernel/sched/ext_cid.h
@@ -49,6 +49,7 @@ struct scx_sched;
extern s16 *scx_cid_to_cpu_tbl;
extern s16 *scx_cpu_to_cid_tbl;
extern struct scx_cid_topo *scx_cid_topo;
+extern struct btf_id_set8 scx_kfunc_ids_init;
s32 scx_cid_init(struct scx_sched *sch);
int scx_cid_kfunc_init(void);
diff --git a/tools/sched_ext/include/scx/compat.bpf.h b/tools/sched_ext/include/scx/compat.bpf.h
index 2808003eef04..6b9d054c3e4f 100644
--- a/tools/sched_ext/include/scx/compat.bpf.h
+++ b/tools/sched_ext/include/scx/compat.bpf.h
@@ -121,6 +121,18 @@ static inline bool scx_bpf_sub_dispatch(u64 cgroup_id)
return false;
}
+/*
+ * v7.2: scx_bpf_cid_override() for explicit cpu->cid mapping. Ignore if
+ * missing.
+ */
+void scx_bpf_cid_override___compat(const s32 *cpu_to_cid, u32 cpu_to_cid__sz) __ksym __weak;
+
+static inline void scx_bpf_cid_override(const s32 *cpu_to_cid, u32 cpu_to_cid__sz)
+{
+ if (bpf_ksym_exists(scx_bpf_cid_override___compat))
+ return scx_bpf_cid_override___compat(cpu_to_cid, cpu_to_cid__sz);
+}
+
/**
* __COMPAT_is_enq_cpu_selected - Test if SCX_ENQ_CPU_SELECTED is on
* in a compatible way. We will preserve this __COMPAT helper until v6.16.
--
2.53.0
^ permalink raw reply related [flat|nested] 2+ messages in thread* [PATCHSET v2 REPOST sched_ext/for-7.2] sched_ext: Topological CPU IDs and cid-form struct_ops
@ 2026-04-24 17:27 Tejun Heo
2026-04-24 17:27 ` [PATCH 08/17] sched_ext: Add scx_bpf_cid_override() kfunc Tejun Heo
0 siblings, 1 reply; 2+ messages in thread
From: Tejun Heo @ 2026-04-24 17:27 UTC (permalink / raw)
To: David Vernet, Andrea Righi, Changwoo Min
Cc: sched-ext, emil, linux-kernel, Cheng-Yang Chou, Zhao Mengmeng,
Tejun Heo
Hello,
Reposting v2 because the original send was not properly threaded -
each patch went out as a standalone top-level message. Content is
unchanged from the original v2.
Original v2: https://lore.kernel.org/r/20260424013220.2923402-1-tj@kernel.org
v2 of https://lore.kernel.org/r/20260421071945.3110084-1-tj@kernel.org
v2:
- Add ext-types.h first patch for early subsystem-wide type defs.
- cid: publish the cid tables with WRITE_ONCE / read with READ_ONCE;
document the visibility contract.
- cid-kfuncs: NULL-guard scx_bpf_this_cid / scx_bpf_task_cid for
TRACING/SYSCALL callers before any SCX sched has enabled.
- cid-struct-ops: use struct_size() for the set_cmask_scratch percpu
alloc; cluster __scx_is_cid_type disable with __scx_enabled disable
in scx_root_disable().
- cid-kfunc-filter: sync per-entry kfunc flags with each kfunc's
primary BTF_ID_FLAGS() declaration (Zhao). pahole intersects flags
across occurrences; omitting them drops the flags globally - the
visible symptom was KF_IMPLICIT_ARGS getting cleared on
scx_bpf_kick_cpu, leaking bpf_prog_aux into vmlinux.h.
- cmask: narrow to the helpers this series actually uses;
cmask_copy_from_kernel contract and runtime guard.
This patchset introduces topological CPU IDs (cids) - dense,
topology-ordered cpu identifiers - and an alternative cid-form struct_ops
type that lets BPF schedulers operate in cid space directly.
Key pieces:
- cid space: scx_cid_init() walks nodes * LLCs * cores * threads and packs
a dense cid mapping. The mapping can be overridden via
scx_bpf_cid_override(). See "Topological CPU IDs" in ext_cid.h for the
model.
- cmask: a base-windowed bitmap over cid space. Kernel and BPF helpers with
identical semantics. Used by scx_qmap for per-task affinity and idle-cid
tracking; meant to be the substrate for sub-sched cid allocation.
- bpf_sched_ext_ops_cid: a parallel struct_ops type whose callbacks take
cids/cmasks instead of cpus/cpumasks. Kernel translates at the boundary
via scx_cpu_arg() / scx_cpu_ret(); the two struct types share offsets up
through @priv (verified by BUILD_BUG_ON) so the union view in scx_sched
works without function-pointer casts. Sub-sched support is tied to
cid-form: validate_ops() rejects cpu-form sub-scheds and cpu-form roots
that expose sub_attach / sub_detach.
- cid-form kfuncs: scx_bpf_kick_cid, scx_bpf_cidperf_{cap,cur,set},
scx_bpf_cid_curr, scx_bpf_task_cid, scx_bpf_this_cid,
scx_bpf_nr_{cids,online_cids}, scx_bpf_cid_to_cpu, scx_bpf_cpu_to_cid.
A cid-form program may not call cpu-only kfuncs (enforced at verifier
load via scx_kfunc_context_filter); the reverse is intentionally
permissive to ease migration.
- scx_qmap port: scx_qmap is converted to cid-form. It uses the cmask-based
idle picker, per-task cid-space cpus_allowed, and cid-form kfuncs
throughout. Sub-sched dispatching via scx_bpf_sub_dispatch() continues to
work.
v2 re-tested on the 16-cpu QEMU: cid-form scx_qmap, cpu-form scx_simple,
cid<->cpu cycling, scx_qmap under stress-ng, hotplug auto-restart, and
sub-sched (root scx_qmap + cgroup-scoped scx_qmap child). Clean.
Based on sched_ext/for-7.2 (c2929bc21dce).
0001-sched_ext-Add-ext_types.h-for-early-subsystem-wide-d.patch
0002-sched_ext-Rename-ops_cpu_valid-to-scx_cpu_valid-and-.patch
0003-sched_ext-Move-scx_exit-scx_error-and-friends-to-ext.patch
0004-sched_ext-Shift-scx_kick_cpu-validity-check-to-scx_b.patch
0005-sched_ext-Relocate-cpu_acquire-cpu_release-to-end-of.patch
0006-sched_ext-Make-scx_enable-take-scx_enable_cmd.patch
0007-sched_ext-Add-topological-CPU-IDs-cids.patch
0008-sched_ext-Add-scx_bpf_cid_override-kfunc.patch
0009-tools-sched_ext-Add-struct_size-helpers-to-common.bp.patch
0010-sched_ext-Add-cmask-a-base-windowed-bitmap-over-cid-.patch
0011-sched_ext-Add-cid-form-kfunc-wrappers-alongside-cpu-.patch
0012-sched_ext-Add-bpf_sched_ext_ops_cid-struct_ops-type.patch
0013-sched_ext-Forbid-cpu-form-kfuncs-from-cid-form-sched.patch
0014-tools-sched_ext-scx_qmap-Restart-on-hotplug-instead-.patch
0015-tools-sched_ext-scx_qmap-Add-cmask-based-idle-tracki.patch
0016-tools-sched_ext-scx_qmap-Port-to-cid-form-struct_ops.patch
0017-sched_ext-Require-cid-form-struct_ops-for-sub-sched-.patch
Git tree: git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext.git scx-cid-v2
kernel/sched/build_policy.c | 2 +
kernel/sched/ext.c | 650 +++++++++++++++++++++++++----
kernel/sched/ext_cid.c | 417 ++++++++++++++++++++
kernel/sched/ext_cid.h | 164 ++++++++
kernel/sched/ext_idle.c | 8 +-
kernel/sched/ext_internal.h | 203 +++++++---
kernel/sched/ext_types.h | 104 +++++
tools/sched_ext/include/scx/cid.bpf.h | 597 ++++++++++++++++++++++++++++
tools/sched_ext/include/scx/common.bpf.h | 23 ++
tools/sched_ext/include/scx/compat.bpf.h | 24 ++
tools/sched_ext/scx_qmap.bpf.c | 306 ++++++++-------
tools/sched_ext/scx_qmap.c | 25 +-
tools/sched_ext/scx_qmap.h | 2 +-
13 files changed, 2240 insertions(+), 285 deletions(-)
--
tejun
^ permalink raw reply [flat|nested] 2+ messages in thread* [PATCH 08/17] sched_ext: Add scx_bpf_cid_override() kfunc
2026-04-24 17:27 [PATCHSET v2 REPOST sched_ext/for-7.2] sched_ext: Topological CPU IDs and cid-form struct_ops Tejun Heo
@ 2026-04-24 17:27 ` Tejun Heo
0 siblings, 0 replies; 2+ messages in thread
From: Tejun Heo @ 2026-04-24 17:27 UTC (permalink / raw)
To: David Vernet, Andrea Righi, Changwoo Min
Cc: sched-ext, emil, linux-kernel, Cheng-Yang Chou, Zhao Mengmeng,
Tejun Heo
The auto-probed cid mapping reflects the kernel's view of topology
(node -> LLC -> core), but a BPF scheduler may want a different layout -
to align cid slices with its own partitioning, or to work around how the
kernel reports a particular machine.
Add scx_bpf_cid_override(), callable from ops.init() of the root
scheduler. It validates the caller-supplied cpu->cid array and replaces
the in-place mapping; topo info is invalidated. A compat.bpf.h wrapper
silently no-ops on kernels that lack the kfunc.
A new SCX_KF_ALLOW_INIT bit in the kfunc context filter restricts the
kfunc to ops.init() at verifier load time.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Cheng-Yang Chou <yphbchou0911@gmail.com>
---
kernel/sched/ext.c | 16 +++--
kernel/sched/ext_cid.c | 75 +++++++++++++++++++++++-
kernel/sched/ext_cid.h | 1 +
tools/sched_ext/include/scx/compat.bpf.h | 12 ++++
4 files changed, 97 insertions(+), 7 deletions(-)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index e05d35e8c261..271399b9faa4 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -9641,10 +9641,11 @@ static const struct btf_kfunc_id_set scx_kfunc_set_any = {
*/
enum scx_kf_allow_flags {
SCX_KF_ALLOW_UNLOCKED = 1 << 0,
- SCX_KF_ALLOW_CPU_RELEASE = 1 << 1,
- SCX_KF_ALLOW_DISPATCH = 1 << 2,
- SCX_KF_ALLOW_ENQUEUE = 1 << 3,
- SCX_KF_ALLOW_SELECT_CPU = 1 << 4,
+ SCX_KF_ALLOW_INIT = 1 << 1,
+ SCX_KF_ALLOW_CPU_RELEASE = 1 << 2,
+ SCX_KF_ALLOW_DISPATCH = 1 << 3,
+ SCX_KF_ALLOW_ENQUEUE = 1 << 4,
+ SCX_KF_ALLOW_SELECT_CPU = 1 << 5,
};
/*
@@ -9672,7 +9673,7 @@ static const u32 scx_kf_allow_flags[] = {
[SCX_OP_IDX(sub_detach)] = SCX_KF_ALLOW_UNLOCKED,
[SCX_OP_IDX(cpu_online)] = SCX_KF_ALLOW_UNLOCKED,
[SCX_OP_IDX(cpu_offline)] = SCX_KF_ALLOW_UNLOCKED,
- [SCX_OP_IDX(init)] = SCX_KF_ALLOW_UNLOCKED,
+ [SCX_OP_IDX(init)] = SCX_KF_ALLOW_UNLOCKED | SCX_KF_ALLOW_INIT,
[SCX_OP_IDX(exit)] = SCX_KF_ALLOW_UNLOCKED,
};
@@ -9687,6 +9688,7 @@ static const u32 scx_kf_allow_flags[] = {
int scx_kfunc_context_filter(const struct bpf_prog *prog, u32 kfunc_id)
{
bool in_unlocked = btf_id_set8_contains(&scx_kfunc_ids_unlocked, kfunc_id);
+ bool in_init = btf_id_set8_contains(&scx_kfunc_ids_init, kfunc_id);
bool in_select_cpu = btf_id_set8_contains(&scx_kfunc_ids_select_cpu, kfunc_id);
bool in_enqueue = btf_id_set8_contains(&scx_kfunc_ids_enqueue_dispatch, kfunc_id);
bool in_dispatch = btf_id_set8_contains(&scx_kfunc_ids_dispatch, kfunc_id);
@@ -9696,7 +9698,7 @@ int scx_kfunc_context_filter(const struct bpf_prog *prog, u32 kfunc_id)
u32 moff, flags;
/* Not an SCX kfunc - allow. */
- if (!(in_unlocked || in_select_cpu || in_enqueue || in_dispatch ||
+ if (!(in_unlocked || in_init || in_select_cpu || in_enqueue || in_dispatch ||
in_cpu_release || in_idle || in_any))
return 0;
@@ -9732,6 +9734,8 @@ int scx_kfunc_context_filter(const struct bpf_prog *prog, u32 kfunc_id)
if ((flags & SCX_KF_ALLOW_UNLOCKED) && in_unlocked)
return 0;
+ if ((flags & SCX_KF_ALLOW_INIT) && in_init)
+ return 0;
if ((flags & SCX_KF_ALLOW_CPU_RELEASE) && in_cpu_release)
return 0;
if ((flags & SCX_KF_ALLOW_DISPATCH) && in_dispatch)
diff --git a/kernel/sched/ext_cid.c b/kernel/sched/ext_cid.c
index 26b705b6e20d..4c356e31394c 100644
--- a/kernel/sched/ext_cid.c
+++ b/kernel/sched/ext_cid.c
@@ -212,6 +212,68 @@ s32 scx_cid_init(struct scx_sched *sch)
__bpf_kfunc_start_defs();
+/**
+ * scx_bpf_cid_override - Install an explicit cpu->cid mapping
+ * @cpu_to_cid: array of nr_cpu_ids s32 entries (cid for each cpu)
+ * @cpu_to_cid__sz: must be nr_cpu_ids * sizeof(s32) bytes
+ * @aux: implicit BPF argument to access bpf_prog_aux hidden from BPF progs
+ *
+ * May only be called from ops.init() of the root scheduler. Replace the
+ * topology-probed cid mapping with the caller-provided one. Each possible cpu
+ * must map to a unique cid in [0, num_possible_cpus()). Topo info is cleared.
+ * On invalid input, trigger scx_error() to abort the scheduler.
+ */
+__bpf_kfunc void scx_bpf_cid_override(const s32 *cpu_to_cid, u32 cpu_to_cid__sz,
+ const struct bpf_prog_aux *aux)
+{
+ cpumask_var_t seen __free(free_cpumask_var) = CPUMASK_VAR_NULL;
+ struct scx_sched *sch;
+ bool alloced;
+ s32 cpu, cid;
+
+ /* GFP_KERNEL alloc must happen before the rcu read section */
+ alloced = zalloc_cpumask_var(&seen, GFP_KERNEL);
+
+ guard(rcu)();
+
+ sch = scx_prog_sched(aux);
+ if (unlikely(!sch))
+ return;
+
+ if (!alloced) {
+ scx_error(sch, "scx_bpf_cid_override: failed to allocate cpumask");
+ return;
+ }
+
+ if (scx_parent(sch)) {
+ scx_error(sch, "scx_bpf_cid_override() only allowed from root sched");
+ return;
+ }
+
+ if (cpu_to_cid__sz != nr_cpu_ids * sizeof(s32)) {
+ scx_error(sch, "scx_bpf_cid_override: expected %zu bytes, got %u",
+ nr_cpu_ids * sizeof(s32), cpu_to_cid__sz);
+ return;
+ }
+
+ for_each_possible_cpu(cpu) {
+ s32 c = cpu_to_cid[cpu];
+
+ if (!cid_valid(sch, c))
+ return;
+ if (cpumask_test_and_set_cpu(c, seen)) {
+ scx_error(sch, "cid %d assigned to multiple cpus", c);
+ return;
+ }
+ scx_cpu_to_cid_tbl[cpu] = c;
+ scx_cid_to_cpu_tbl[c] = cpu;
+ }
+
+ /* Invalidate stale topo info - the override carries no topology. */
+ for (cid = 0; cid < num_possible_cpus(); cid++)
+ scx_cid_topo[cid] = SCX_CID_TOPO_NEG;
+}
+
/**
* scx_bpf_cid_to_cpu - Return the raw CPU id for @cid
* @cid: cid to look up
@@ -284,6 +346,16 @@ __bpf_kfunc void scx_bpf_cid_topo(s32 cid, struct scx_cid_topo *out__uninit,
__bpf_kfunc_end_defs();
+BTF_KFUNCS_START(scx_kfunc_ids_init)
+BTF_ID_FLAGS(func, scx_bpf_cid_override, KF_IMPLICIT_ARGS | KF_SLEEPABLE)
+BTF_KFUNCS_END(scx_kfunc_ids_init)
+
+static const struct btf_kfunc_id_set scx_kfunc_set_init = {
+ .owner = THIS_MODULE,
+ .set = &scx_kfunc_ids_init,
+ .filter = scx_kfunc_context_filter,
+};
+
BTF_KFUNCS_START(scx_kfunc_ids_cid)
BTF_ID_FLAGS(func, scx_bpf_cid_to_cpu, KF_IMPLICIT_ARGS)
BTF_ID_FLAGS(func, scx_bpf_cpu_to_cid, KF_IMPLICIT_ARGS)
@@ -297,7 +369,8 @@ static const struct btf_kfunc_id_set scx_kfunc_set_cid = {
int scx_cid_kfunc_init(void)
{
- return register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_cid) ?:
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_init) ?:
+ register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &scx_kfunc_set_cid) ?:
register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &scx_kfunc_set_cid) ?:
register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &scx_kfunc_set_cid);
}
diff --git a/kernel/sched/ext_cid.h b/kernel/sched/ext_cid.h
index 1dbe8262ccdd..52edb66b53fd 100644
--- a/kernel/sched/ext_cid.h
+++ b/kernel/sched/ext_cid.h
@@ -49,6 +49,7 @@ struct scx_sched;
extern s16 *scx_cid_to_cpu_tbl;
extern s16 *scx_cpu_to_cid_tbl;
extern struct scx_cid_topo *scx_cid_topo;
+extern struct btf_id_set8 scx_kfunc_ids_init;
s32 scx_cid_init(struct scx_sched *sch);
int scx_cid_kfunc_init(void);
diff --git a/tools/sched_ext/include/scx/compat.bpf.h b/tools/sched_ext/include/scx/compat.bpf.h
index 2808003eef04..6b9d054c3e4f 100644
--- a/tools/sched_ext/include/scx/compat.bpf.h
+++ b/tools/sched_ext/include/scx/compat.bpf.h
@@ -121,6 +121,18 @@ static inline bool scx_bpf_sub_dispatch(u64 cgroup_id)
return false;
}
+/*
+ * v7.2: scx_bpf_cid_override() for explicit cpu->cid mapping. Ignore if
+ * missing.
+ */
+void scx_bpf_cid_override___compat(const s32 *cpu_to_cid, u32 cpu_to_cid__sz) __ksym __weak;
+
+static inline void scx_bpf_cid_override(const s32 *cpu_to_cid, u32 cpu_to_cid__sz)
+{
+ if (bpf_ksym_exists(scx_bpf_cid_override___compat))
+ return scx_bpf_cid_override___compat(cpu_to_cid, cpu_to_cid__sz);
+}
+
/**
* __COMPAT_is_enq_cpu_selected - Test if SCX_ENQ_CPU_SELECTED is on
* in a compatible way. We will preserve this __COMPAT helper until v6.16.
--
2.53.0
^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2026-04-24 17:27 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-24 1:32 [PATCH 08/17] sched_ext: Add scx_bpf_cid_override() kfunc Tejun Heo
-- strict thread matches above, loose matches on Subject: below --
2026-04-24 17:27 [PATCHSET v2 REPOST sched_ext/for-7.2] sched_ext: Topological CPU IDs and cid-form struct_ops Tejun Heo
2026-04-24 17:27 ` [PATCH 08/17] sched_ext: Add scx_bpf_cid_override() kfunc Tejun Heo
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox