All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 02/17] sched_ext: Rename ops_cpu_valid() to scx_cpu_valid() and expose it
@ 2026-04-24  1:32 Tejun Heo
  0 siblings, 0 replies; 4+ messages in thread
From: Tejun Heo @ 2026-04-24  1:32 UTC (permalink / raw)
  To: David Vernet, Andrea Righi, Changwoo Min
  Cc: sched-ext, emil, linux-kernel, Cheng-Yang Chou, Zhao Mengmeng,
	Tejun Heo

Rename the static ext.c helper and declare it in ext_internal.h so
ext_idle.c and the upcoming cid code can call it directly instead of
relying on build_policy.c textual inclusion.

Pure rename and visibility change.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Cheng-Yang Chou <yphbchou0911@gmail.com>
---
 kernel/sched/ext.c          | 22 +++++++++++-----------
 kernel/sched/ext_idle.c     |  6 +++---
 kernel/sched/ext_internal.h |  2 ++
 3 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 0ba12449f0c6..1d6613dc4d3b 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -1055,7 +1055,7 @@ static inline bool __cpu_valid(s32 cpu)
 }
 
 /**
- * ops_cpu_valid - Verify a cpu number, to be used on ops input args
+ * scx_cpu_valid - Verify a cpu number, to be used on ops input args
  * @sch: scx_sched to abort on error
  * @cpu: cpu number which came from a BPF ops
  * @where: extra information reported on error
@@ -1064,7 +1064,7 @@ static inline bool __cpu_valid(s32 cpu)
  * Verify that it is in range and one of the possible cpus. If invalid, trigger
  * an ops error.
  */
-static bool ops_cpu_valid(struct scx_sched *sch, s32 cpu, const char *where)
+bool scx_cpu_valid(struct scx_sched *sch, s32 cpu, const char *where)
 {
 	if (__cpu_valid(cpu)) {
 		return true;
@@ -1677,7 +1677,7 @@ static struct scx_dispatch_q *find_dsq_for_dispatch(struct scx_sched *sch,
 	if ((dsq_id & SCX_DSQ_LOCAL_ON) == SCX_DSQ_LOCAL_ON) {
 		s32 cpu = dsq_id & SCX_DSQ_LOCAL_CPU_MASK;
 
-		if (!ops_cpu_valid(sch, cpu, "in SCX_DSQ_LOCAL_ON dispatch verdict"))
+		if (!scx_cpu_valid(sch, cpu, "in SCX_DSQ_LOCAL_ON dispatch verdict"))
 			return find_global_dsq(sch, tcpu);
 
 		return &cpu_rq(cpu)->scx.local_dsq;
@@ -3260,7 +3260,7 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag
 		this_rq()->scx.in_select_cpu = false;
 		p->scx.selected_cpu = cpu;
 		*ddsp_taskp = NULL;
-		if (ops_cpu_valid(sch, cpu, "from ops.select_cpu()"))
+		if (scx_cpu_valid(sch, cpu, "from ops.select_cpu()"))
 			return cpu;
 		else
 			return prev_cpu;
@@ -8679,7 +8679,7 @@ static void scx_kick_cpu(struct scx_sched *sch, s32 cpu, u64 flags)
 	struct rq *this_rq;
 	unsigned long irq_flags;
 
-	if (!ops_cpu_valid(sch, cpu, NULL))
+	if (!scx_cpu_valid(sch, cpu, NULL))
 		return;
 
 	local_irq_save(irq_flags);
@@ -8775,7 +8775,7 @@ __bpf_kfunc s32 scx_bpf_dsq_nr_queued(u64 dsq_id)
 	} else if ((dsq_id & SCX_DSQ_LOCAL_ON) == SCX_DSQ_LOCAL_ON) {
 		s32 cpu = dsq_id & SCX_DSQ_LOCAL_CPU_MASK;
 
-		if (ops_cpu_valid(sch, cpu, NULL)) {
+		if (scx_cpu_valid(sch, cpu, NULL)) {
 			ret = READ_ONCE(cpu_rq(cpu)->scx.local_dsq.nr);
 			goto out;
 		}
@@ -9164,7 +9164,7 @@ __bpf_kfunc u32 scx_bpf_cpuperf_cap(s32 cpu, const struct bpf_prog_aux *aux)
 	guard(rcu)();
 
 	sch = scx_prog_sched(aux);
-	if (likely(sch) && ops_cpu_valid(sch, cpu, NULL))
+	if (likely(sch) && scx_cpu_valid(sch, cpu, NULL))
 		return arch_scale_cpu_capacity(cpu);
 	else
 		return SCX_CPUPERF_ONE;
@@ -9192,7 +9192,7 @@ __bpf_kfunc u32 scx_bpf_cpuperf_cur(s32 cpu, const struct bpf_prog_aux *aux)
 	guard(rcu)();
 
 	sch = scx_prog_sched(aux);
-	if (likely(sch) && ops_cpu_valid(sch, cpu, NULL))
+	if (likely(sch) && scx_cpu_valid(sch, cpu, NULL))
 		return arch_scale_freq_capacity(cpu);
 	else
 		return SCX_CPUPERF_ONE;
@@ -9228,7 +9228,7 @@ __bpf_kfunc void scx_bpf_cpuperf_set(s32 cpu, u32 perf, const struct bpf_prog_au
 		return;
 	}
 
-	if (ops_cpu_valid(sch, cpu, NULL)) {
+	if (scx_cpu_valid(sch, cpu, NULL)) {
 		struct rq *rq = cpu_rq(cpu), *locked_rq = scx_locked_rq();
 		struct rq_flags rf;
 
@@ -9341,7 +9341,7 @@ __bpf_kfunc struct rq *scx_bpf_cpu_rq(s32 cpu, const struct bpf_prog_aux *aux)
 	if (unlikely(!sch))
 		return NULL;
 
-	if (!ops_cpu_valid(sch, cpu, NULL))
+	if (!scx_cpu_valid(sch, cpu, NULL))
 		return NULL;
 
 	if (!sch->warned_deprecated_rq) {
@@ -9398,7 +9398,7 @@ __bpf_kfunc struct task_struct *scx_bpf_cpu_curr(s32 cpu, const struct bpf_prog_
 	if (unlikely(!sch))
 		return NULL;
 
-	if (!ops_cpu_valid(sch, cpu, NULL))
+	if (!scx_cpu_valid(sch, cpu, NULL))
 		return NULL;
 
 	return rcu_dereference(cpu_rq(cpu)->curr);
diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c
index c43d62d90e40..11d11ea6ca6b 100644
--- a/kernel/sched/ext_idle.c
+++ b/kernel/sched/ext_idle.c
@@ -917,7 +917,7 @@ static s32 select_cpu_from_kfunc(struct scx_sched *sch, struct task_struct *p,
 	bool we_locked = false;
 	s32 cpu;
 
-	if (!ops_cpu_valid(sch, prev_cpu, NULL))
+	if (!scx_cpu_valid(sch, prev_cpu, NULL))
 		return -EINVAL;
 
 	if (!check_builtin_idle_enabled(sch))
@@ -975,7 +975,7 @@ __bpf_kfunc s32 scx_bpf_cpu_node(s32 cpu, const struct bpf_prog_aux *aux)
 	guard(rcu)();
 
 	sch = scx_prog_sched(aux);
-	if (unlikely(!sch) || !ops_cpu_valid(sch, cpu, NULL))
+	if (unlikely(!sch) || !scx_cpu_valid(sch, cpu, NULL))
 		return NUMA_NO_NODE;
 	return cpu_to_node(cpu);
 }
@@ -1257,7 +1257,7 @@ __bpf_kfunc bool scx_bpf_test_and_clear_cpu_idle(s32 cpu, const struct bpf_prog_
 	if (!check_builtin_idle_enabled(sch))
 		return false;
 
-	if (!ops_cpu_valid(sch, cpu, NULL))
+	if (!scx_cpu_valid(sch, cpu, NULL))
 		return false;
 
 	return scx_idle_test_and_clear_cpu(cpu);
diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h
index c35098668fb1..5e3b79963d41 100644
--- a/kernel/sched/ext_internal.h
+++ b/kernel/sched/ext_internal.h
@@ -1350,6 +1350,8 @@ DECLARE_PER_CPU(struct rq *, scx_locked_rq_state);
 
 int scx_kfunc_context_filter(const struct bpf_prog *prog, u32 kfunc_id);
 
+bool scx_cpu_valid(struct scx_sched *sch, s32 cpu, const char *where);
+
 /*
  * Return the rq currently locked from an scx callback, or NULL if no rq is
  * locked.
-- 
2.53.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread
* [PATCHSET v2 REPOST sched_ext/for-7.2] sched_ext: Topological CPU IDs and cid-form struct_ops
@ 2026-04-24 17:27 Tejun Heo
  2026-04-24 17:27 ` [PATCH 02/17] sched_ext: Rename ops_cpu_valid() to scx_cpu_valid() and expose it Tejun Heo
  0 siblings, 1 reply; 4+ messages in thread
From: Tejun Heo @ 2026-04-24 17:27 UTC (permalink / raw)
  To: David Vernet, Andrea Righi, Changwoo Min
  Cc: sched-ext, emil, linux-kernel, Cheng-Yang Chou, Zhao Mengmeng,
	Tejun Heo

Hello,

Reposting v2 because the original send was not properly threaded -
each patch went out as a standalone top-level message. Content is
unchanged from the original v2.

Original v2: https://lore.kernel.org/r/20260424013220.2923402-1-tj@kernel.org

v2 of https://lore.kernel.org/r/20260421071945.3110084-1-tj@kernel.org

v2:
- Add ext-types.h first patch for early subsystem-wide type defs.
- cid: publish the cid tables with WRITE_ONCE / read with READ_ONCE;
  document the visibility contract.
- cid-kfuncs: NULL-guard scx_bpf_this_cid / scx_bpf_task_cid for
  TRACING/SYSCALL callers before any SCX sched has enabled.
- cid-struct-ops: use struct_size() for the set_cmask_scratch percpu
  alloc; cluster __scx_is_cid_type disable with __scx_enabled disable
  in scx_root_disable().
- cid-kfunc-filter: sync per-entry kfunc flags with each kfunc's
  primary BTF_ID_FLAGS() declaration (Zhao). pahole intersects flags
  across occurrences; omitting them drops the flags globally - the
  visible symptom was KF_IMPLICIT_ARGS getting cleared on
  scx_bpf_kick_cpu, leaking bpf_prog_aux into vmlinux.h.
- cmask: narrow to the helpers this series actually uses;
  cmask_copy_from_kernel contract and runtime guard.

This patchset introduces topological CPU IDs (cids) - dense,
topology-ordered cpu identifiers - and an alternative cid-form struct_ops
type that lets BPF schedulers operate in cid space directly.

Key pieces:

- cid space: scx_cid_init() walks nodes * LLCs * cores * threads and packs
  a dense cid mapping. The mapping can be overridden via
  scx_bpf_cid_override(). See "Topological CPU IDs" in ext_cid.h for the
  model.

- cmask: a base-windowed bitmap over cid space. Kernel and BPF helpers with
  identical semantics. Used by scx_qmap for per-task affinity and idle-cid
  tracking; meant to be the substrate for sub-sched cid allocation.

- bpf_sched_ext_ops_cid: a parallel struct_ops type whose callbacks take
  cids/cmasks instead of cpus/cpumasks. Kernel translates at the boundary
  via scx_cpu_arg() / scx_cpu_ret(); the two struct types share offsets up
  through @priv (verified by BUILD_BUG_ON) so the union view in scx_sched
  works without function-pointer casts. Sub-sched support is tied to
  cid-form: validate_ops() rejects cpu-form sub-scheds and cpu-form roots
  that expose sub_attach / sub_detach.

- cid-form kfuncs: scx_bpf_kick_cid, scx_bpf_cidperf_{cap,cur,set},
  scx_bpf_cid_curr, scx_bpf_task_cid, scx_bpf_this_cid,
  scx_bpf_nr_{cids,online_cids}, scx_bpf_cid_to_cpu, scx_bpf_cpu_to_cid.
  A cid-form program may not call cpu-only kfuncs (enforced at verifier
  load via scx_kfunc_context_filter); the reverse is intentionally
  permissive to ease migration.

- scx_qmap port: scx_qmap is converted to cid-form. It uses the cmask-based
  idle picker, per-task cid-space cpus_allowed, and cid-form kfuncs
  throughout. Sub-sched dispatching via scx_bpf_sub_dispatch() continues to
  work.

v2 re-tested on the 16-cpu QEMU: cid-form scx_qmap, cpu-form scx_simple,
cid<->cpu cycling, scx_qmap under stress-ng, hotplug auto-restart, and
sub-sched (root scx_qmap + cgroup-scoped scx_qmap child). Clean.

Based on sched_ext/for-7.2 (c2929bc21dce).

 0001-sched_ext-Add-ext_types.h-for-early-subsystem-wide-d.patch
 0002-sched_ext-Rename-ops_cpu_valid-to-scx_cpu_valid-and-.patch
 0003-sched_ext-Move-scx_exit-scx_error-and-friends-to-ext.patch
 0004-sched_ext-Shift-scx_kick_cpu-validity-check-to-scx_b.patch
 0005-sched_ext-Relocate-cpu_acquire-cpu_release-to-end-of.patch
 0006-sched_ext-Make-scx_enable-take-scx_enable_cmd.patch
 0007-sched_ext-Add-topological-CPU-IDs-cids.patch
 0008-sched_ext-Add-scx_bpf_cid_override-kfunc.patch
 0009-tools-sched_ext-Add-struct_size-helpers-to-common.bp.patch
 0010-sched_ext-Add-cmask-a-base-windowed-bitmap-over-cid-.patch
 0011-sched_ext-Add-cid-form-kfunc-wrappers-alongside-cpu-.patch
 0012-sched_ext-Add-bpf_sched_ext_ops_cid-struct_ops-type.patch
 0013-sched_ext-Forbid-cpu-form-kfuncs-from-cid-form-sched.patch
 0014-tools-sched_ext-scx_qmap-Restart-on-hotplug-instead-.patch
 0015-tools-sched_ext-scx_qmap-Add-cmask-based-idle-tracki.patch
 0016-tools-sched_ext-scx_qmap-Port-to-cid-form-struct_ops.patch
 0017-sched_ext-Require-cid-form-struct_ops-for-sub-sched-.patch

Git tree: git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext.git scx-cid-v2

 kernel/sched/build_policy.c              |   2 +
 kernel/sched/ext.c                       | 650 +++++++++++++++++++++++++----
 kernel/sched/ext_cid.c                   | 417 ++++++++++++++++++++
 kernel/sched/ext_cid.h                   | 164 ++++++++
 kernel/sched/ext_idle.c                  |   8 +-
 kernel/sched/ext_internal.h              | 203 +++++++---
 kernel/sched/ext_types.h                 | 104 +++++
 tools/sched_ext/include/scx/cid.bpf.h    | 597 ++++++++++++++++++++++++++++
 tools/sched_ext/include/scx/common.bpf.h |  23 ++
 tools/sched_ext/include/scx/compat.bpf.h |  24 ++
 tools/sched_ext/scx_qmap.bpf.c           | 306 ++++++++-------
 tools/sched_ext/scx_qmap.c               |  25 +-
 tools/sched_ext/scx_qmap.h               |   2 +-
 13 files changed, 2240 insertions(+), 285 deletions(-)

--
tejun

^ permalink raw reply	[flat|nested] 4+ messages in thread
* [PATCHSET v3 sched_ext/for-7.2] sched_ext: Topological CPU IDs and cid-form struct_ops
@ 2026-04-28 20:35 Tejun Heo
  2026-04-28 20:35 ` [PATCH 02/17] sched_ext: Rename ops_cpu_valid() to scx_cpu_valid() and expose it Tejun Heo
  0 siblings, 1 reply; 4+ messages in thread
From: Tejun Heo @ 2026-04-28 20:35 UTC (permalink / raw)
  To: David Vernet, Andrea Righi, Changwoo Min
  Cc: sched-ext, Emil Tsalapatis, linux-kernel, Tejun Heo

Hello,

v3 (all from the Sashiko AI review at
https://sashiko.dev/#/patchset/20260424172721.3458520-1-tj%40kernel.org):

- cid: drop leaked cpus_read_lock() on scx_cid_init() failure;
  BUILD_BUG_ON tightened to NR_CPUS<=8192 to match the BPF cmask
  helpers' CMASK_MAX_WORDS coverage.
- bpf-struct-size: use offsetof() in struct_size() to match the
  kernel <linux/overflow.h> macro semantics (no inflation from
  trailing struct padding).
- cmask: cmask_copy_from_kernel() validates src->base==0 via
  probe-read; nr_bits check is bit-level rather than rounded-up
  word-count.
- cid-qmap-idle: qmap_init() refuses to load when scx_bpf_nr_cids()
  exceeds SCX_QMAP_MAX_CPUS; the task_ctx flex array would otherwise
  overflow into the next slab entry.

v2: https://lore.kernel.org/r/20260424172721.3458520-1-tj@kernel.org
v1: https://lore.kernel.org/r/20260421071945.3110084-1-tj@kernel.org

This patchset introduces topological CPU IDs (cids) - dense,
topology-ordered cpu identifiers - and an alternative cid-form struct_ops
type that lets BPF schedulers operate in cid space directly.

Key pieces:

- cid space: scx_cid_init() walks nodes * LLCs * cores * threads and packs
  a dense cid mapping. The mapping can be overridden via
  scx_bpf_cid_override(). See "Topological CPU IDs" in ext_cid.h for the
  model.

- cmask: a base-windowed bitmap over cid space. Kernel and BPF helpers with
  identical semantics. Used by scx_qmap for per-task affinity and idle-cid
  tracking; meant to be the substrate for sub-sched cid allocation.

- bpf_sched_ext_ops_cid: a parallel struct_ops type whose callbacks take
  cids/cmasks instead of cpus/cpumasks. Kernel translates at the boundary
  via scx_cpu_arg() / scx_cpu_ret(); the two struct types share offsets up
  through @priv (verified by BUILD_BUG_ON) so the union view in scx_sched
  works without function-pointer casts. Sub-sched support is tied to
  cid-form: validate_ops() rejects cpu-form sub-scheds and cpu-form roots
  that expose sub_attach / sub_detach.

- cid-form kfuncs: scx_bpf_kick_cid, scx_bpf_cidperf_{cap,cur,set},
  scx_bpf_cid_curr, scx_bpf_task_cid, scx_bpf_this_cid,
  scx_bpf_nr_{cids,online_cids}, scx_bpf_cid_to_cpu, scx_bpf_cpu_to_cid.
  A cid-form program may not call cpu-only kfuncs (enforced at verifier
  load via scx_kfunc_context_filter); the reverse is intentionally
  permissive to ease migration.

- scx_qmap port: scx_qmap is converted to cid-form. It uses the cmask-based
  idle picker, per-task cid-space cpus_allowed, and cid-form kfuncs
  throughout. Sub-sched dispatching via scx_bpf_sub_dispatch() continues to
  work.

v3 re-tested on the 16-cpu QEMU: cid-form scx_qmap under stress-ng plus
reload cycles, hotplug auto-restart, and sub-sched (root scx_qmap +
cgroup-scoped scx_qmap child). Clean.

Based on sched_ext/for-7.2 (4939721aad2e).

 0001-sched_ext-Add-ext_types.h-for-early-subsystem-wide-d.patch
 0002-sched_ext-Rename-ops_cpu_valid-to-scx_cpu_valid-and-.patch
 0003-sched_ext-Move-scx_exit-scx_error-and-friends-to-ext.patch
 0004-sched_ext-Shift-scx_kick_cpu-validity-check-to-scx_b.patch
 0005-sched_ext-Relocate-cpu_acquire-cpu_release-to-end-of.patch
 0006-sched_ext-Make-scx_enable-take-scx_enable_cmd.patch
 0007-sched_ext-Add-topological-CPU-IDs-cids.patch
 0008-sched_ext-Add-scx_bpf_cid_override-kfunc.patch
 0009-tools-sched_ext-Add-struct_size-helpers-to-common.bp.patch
 0010-sched_ext-Add-cmask-a-base-windowed-bitmap-over-cid-.patch
 0011-sched_ext-Add-cid-form-kfunc-wrappers-alongside-cpu-.patch
 0012-sched_ext-Add-bpf_sched_ext_ops_cid-struct_ops-type.patch
 0013-sched_ext-Forbid-cpu-form-kfuncs-from-cid-form-sched.patch
 0014-tools-sched_ext-scx_qmap-Restart-on-hotplug-instead-.patch
 0015-tools-sched_ext-scx_qmap-Add-cmask-based-idle-tracki.patch
 0016-tools-sched_ext-scx_qmap-Port-to-cid-form-struct_ops.patch
 0017-sched_ext-Require-cid-form-struct_ops-for-sub-sched-.patch

Git tree: git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext.git scx-cid-v3

 kernel/sched/build_policy.c              |   3 +
 kernel/sched/ext.c                       | 651 ++++++++++++++++++++++++++----
 kernel/sched/ext_cid.c                   | 409 +++++++++++++++++++
 kernel/sched/ext_cid.h                   | 164 ++++++++
 kernel/sched/ext_idle.c                  |   8 +-
 kernel/sched/ext_internal.h              | 205 +++++++---
 kernel/sched/ext_types.h                 | 104 +++++
 tools/sched_ext/include/scx/cid.bpf.h    | 667 +++++++++++++++++++++++++++++++
 tools/sched_ext/include/scx/common.bpf.h |  23 ++
 tools/sched_ext/include/scx/compat.bpf.h |  24 ++
 tools/sched_ext/scx_qmap.bpf.c           | 346 +++++++++-------
 tools/sched_ext/scx_qmap.c               |  70 +++-
 tools/sched_ext/scx_qmap.h               |   2 +-
 13 files changed, 2391 insertions(+), 285 deletions(-)

Thanks.

--
tejun

^ permalink raw reply	[flat|nested] 4+ messages in thread
* [PATCHSET v4 sched_ext/for-7.2] sched_ext: Topological CPU IDs and cid-form struct_ops
@ 2026-04-29 18:21 Tejun Heo
  2026-04-29 18:21 ` [PATCH 02/17] sched_ext: Rename ops_cpu_valid() to scx_cpu_valid() and expose it Tejun Heo
  0 siblings, 1 reply; 4+ messages in thread
From: Tejun Heo @ 2026-04-29 18:21 UTC (permalink / raw)
  To: David Vernet, Andrea Righi, Changwoo Min
  Cc: Emil Tsalapatis, sched-ext, linux-kernel, Tejun Heo

Hello,

v4:
- cmask: bump CMASK_CAS_TRIES to (1U << 23) so abort fires only after
  seconds of real spinning, not on plausible contention. The kfunc
  slow-path Changwoo suggested would let BPF loops keep banging a
  contended cacheline indefinitely - on multi-socket SPRs that path
  can stall the machine into hard lockups, so failing hard is the
  right behavior. A follow-up patch will add a kfunc to bail the BPF
  CAS loops immediately when sch->aborting is set. Switch
  __builtin_ctzll() to the ctzll() wrapper for clang compat.
- cid-qmap-port: cid-shard handling was wired against a future kfunc
  signature that didn't make it into v3, leaving the snapshot broken.
  Drop the shard test plumbing for v4, match the 2-arg
  scx_bpf_cid_override(), bound nr_cpu_ids for the verifier, and
  rename mode 3 from bad-mono to bad-range. (Changwoo, Andrea)
- Rebased over the exit_cpu plumbing in for-7.2:
    scx-error-header: scx_exit() and scx_verror() are macros now;
    move both plus the underlying __scx_exit() / scx_vexit()
    declarations to ext_internal.h.
    cid-struct-ops: dump_cpu callsite shifted into scx_dump_cpu()
    helper; the scx_cpu_arg() wrap moved with it.

v3: https://lore.kernel.org/r/20260428203545.181052-1-tj@kernel.org
v2: https://lore.kernel.org/r/20260424172721.3458520-1-tj@kernel.org
v1: https://lore.kernel.org/r/20260421071945.3110084-1-tj@kernel.org

This patchset introduces topological CPU IDs (cids) - dense,
topology-ordered cpu identifiers - and an alternative cid-form struct_ops
type that lets BPF schedulers operate in cid space directly.

Key pieces:

- cid space: scx_cid_init() walks nodes * LLCs * cores * threads and packs
  a dense cid mapping. The mapping can be overridden via
  scx_bpf_cid_override(). See "Topological CPU IDs" in ext_cid.h for the
  model.

- cmask: a base-windowed bitmap over cid space. Kernel and BPF helpers with
  identical semantics. Used by scx_qmap for per-task affinity and idle-cid
  tracking; meant to be the substrate for sub-sched cid allocation.

- bpf_sched_ext_ops_cid: a parallel struct_ops type whose callbacks take
  cids/cmasks instead of cpus/cpumasks. Kernel translates at the boundary
  via scx_cpu_arg() / scx_cpu_ret(); the two struct types share offsets up
  through @priv (verified by BUILD_BUG_ON) so the union view in scx_sched
  works without function-pointer casts. Sub-sched support is tied to
  cid-form: validate_ops() rejects cpu-form sub-scheds and cpu-form roots
  that expose sub_attach / sub_detach.

- cid-form kfuncs: scx_bpf_kick_cid, scx_bpf_cidperf_{cap,cur,set},
  scx_bpf_cid_curr, scx_bpf_task_cid, scx_bpf_this_cid,
  scx_bpf_nr_{cids,online_cids}, scx_bpf_cid_to_cpu, scx_bpf_cpu_to_cid.
  A cid-form program may not call cpu-only kfuncs (enforced at verifier
  load via scx_kfunc_context_filter); the reverse is intentionally
  permissive to ease migration.

- scx_qmap port: scx_qmap is converted to cid-form. It uses the cmask-based
  idle picker, per-task cid-space cpus_allowed, and cid-form kfuncs
  throughout. Sub-sched dispatching via scx_bpf_sub_dispatch() continues to
  work.

v4 re-tested on the 16-cpu QEMU VM with the v3 cut (only the 17 cid
patches applied): basic load + stress, cid-override modes
(shuffle/bad-dup/bad-range), and three enable/disable cycles all clean.
No BUG/WARNING/panic in the dump.

Based on sched_ext/for-7.2 (ee8391ba1164).

  0001-sched_ext-Add-ext_types.h-for-early-subsystem-wide-d.patch
  0002-sched_ext-Rename-ops_cpu_valid-to-scx_cpu_valid-and-.patch
  0003-sched_ext-Move-scx_exit-scx_error-and-friends-to-ext.patch
  0004-sched_ext-Shift-scx_kick_cpu-validity-check-to-scx_b.patch
  0005-sched_ext-Relocate-cpu_acquire-cpu_release-to-end-of.patch
  0006-sched_ext-Make-scx_enable-take-scx_enable_cmd.patch
  0007-sched_ext-Add-topological-CPU-IDs-cids.patch
  0008-sched_ext-Add-scx_bpf_cid_override-kfunc.patch
  0009-tools-sched_ext-Add-struct_size-helpers-to-common.bp.patch
  0010-sched_ext-Add-cmask-a-base-windowed-bitmap-over-cid-.patch
  0011-sched_ext-Add-cid-form-kfunc-wrappers-alongside-cpu-.patch
  0012-sched_ext-Add-bpf_sched_ext_ops_cid-struct_ops-type.patch
  0013-sched_ext-Forbid-cpu-form-kfuncs-from-cid-form-sched.patch
  0014-tools-sched_ext-scx_qmap-Restart-on-hotplug-instead-.patch
  0015-tools-sched_ext-scx_qmap-Add-cmask-based-idle-tracki.patch
  0016-tools-sched_ext-scx_qmap-Port-to-cid-form-struct_ops.patch
  0017-sched_ext-Require-cid-form-struct_ops-for-sub-sched-.patch

Git tree: git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext.git scx-cid-v4

 kernel/sched/build_policy.c              |   3 +
 kernel/sched/ext.c                       | 660 ++++++++++++++++++++++++++----
 kernel/sched/ext_cid.c                   | 409 +++++++++++++++++++
 kernel/sched/ext_cid.h                   | 164 ++++++++
 kernel/sched/ext_idle.c                  |   8 +-
 kernel/sched/ext_internal.h              | 209 +++++++---
 kernel/sched/ext_types.h                 | 104 +++++
 tools/sched_ext/include/scx/cid.bpf.h    | 666 +++++++++++++++++++++++++++++++
 tools/sched_ext/include/scx/common.bpf.h |  23 ++
 tools/sched_ext/include/scx/compat.bpf.h |  24 ++
 tools/sched_ext/scx_qmap.bpf.c           | 350 +++++++++-------
 tools/sched_ext/scx_qmap.c               |  57 ++-
 tools/sched_ext/scx_qmap.h               |   2 +-
 13 files changed, 2387 insertions(+), 292 deletions(-)

Thanks.

--
tejun

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2026-04-29 18:21 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-24  1:32 [PATCH 02/17] sched_ext: Rename ops_cpu_valid() to scx_cpu_valid() and expose it Tejun Heo
  -- strict thread matches above, loose matches on Subject: below --
2026-04-24 17:27 [PATCHSET v2 REPOST sched_ext/for-7.2] sched_ext: Topological CPU IDs and cid-form struct_ops Tejun Heo
2026-04-24 17:27 ` [PATCH 02/17] sched_ext: Rename ops_cpu_valid() to scx_cpu_valid() and expose it Tejun Heo
2026-04-28 20:35 [PATCHSET v3 sched_ext/for-7.2] sched_ext: Topological CPU IDs and cid-form struct_ops Tejun Heo
2026-04-28 20:35 ` [PATCH 02/17] sched_ext: Rename ops_cpu_valid() to scx_cpu_valid() and expose it Tejun Heo
2026-04-29 18:21 [PATCHSET v4 sched_ext/for-7.2] sched_ext: Topological CPU IDs and cid-form struct_ops Tejun Heo
2026-04-29 18:21 ` [PATCH 02/17] sched_ext: Rename ops_cpu_valid() to scx_cpu_valid() and expose it Tejun Heo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.