All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH sched_ext/for-7.2] sched_ext: Add scx_arena_to_kaddr() / scx_kaddr_to_arena()
@ 2026-06-04 10:02 Tejun Heo
  2026-06-05 19:18 ` Tejun Heo
  0 siblings, 1 reply; 2+ messages in thread
From: Tejun Heo @ 2026-06-04 10:02 UTC (permalink / raw)
  To: David Vernet, Andrea Righi, Changwoo Min
  Cc: Emil Tsalapatis, sched-ext, linux-kernel, Tejun Heo

Translating between a BPF-arena pointer and its kernel-side address is just
an add or subtract of the arena's kern_vm start. More such translations are
coming, so cache that start on scx_sched as @arena_kern_base at arena attach
and wrap both directions. Convert the existing open-coded subtraction in
scx_call_op_set_cpumask().

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/sched/ext.c          | 19 +++++++++----------
 kernel/sched/ext_internal.h | 36 +++++++++++++++++++++++++++++++-----
 2 files changed, 40 insertions(+), 15 deletions(-)

diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 62769abb553a..6567f626b3f0 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -622,19 +622,13 @@ static inline void scx_call_op_set_cpumask(struct scx_sched *sch, struct rq *rq,
 
 	if (scx_is_cid_type()) {
 		struct scx_cmask *kern_va = *this_cpu_ptr(sch->set_cmask_scratch);
-		unsigned long uaddr = (unsigned long)kern_va;
-
-		/* arena.o, which defines these, is built only on MMU && 64BIT */
-#if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
-		uaddr -= bpf_arena_map_kern_vm_start(sch->arena_map);
-#endif
 		/*
-		 * Build the per-CPU arena cmask and hand BPF the uaddr. Caller
-		 * holds the rq lock with IRQs disabled, which makes us the sole
-		 * user of the scratch area.
+		 * Build the per-CPU arena cmask and hand BPF its arena address.
+		 * Caller holds the rq lock with IRQs disabled, which makes us
+		 * the sole user of the scratch area.
 		 */
 		scx_cpumask_to_cmask(cpumask, kern_va);
-		sch->ops_cid.set_cmask(task, (struct scx_cmask *)uaddr);
+		sch->ops_cid.set_cmask(task, scx_kaddr_to_arena(sch, kern_va));
 	} else {
 		sch->ops.set_cpumask(task, cpumask);
 	}
@@ -6977,6 +6971,11 @@ static struct scx_sched *scx_alloc_and_add_sched(struct scx_enable_cmd *cmd,
 	 * runs through scx_sched_free_rcu_work() which puts it.
 	 */
 	sch->arena_map = cmd->arena_map;
+	/* BPF arena is only available on MMU && 64BIT */
+#if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
+	if (sch->arena_map)
+		sch->arena_kern_base = bpf_arena_map_kern_vm_start(sch->arena_map);
+#endif
 	cmd->arena_map = NULL;
 	return sch;
 
diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h
index 9bb65367f510..b04701190b23 100644
--- a/kernel/sched/ext_internal.h
+++ b/kernel/sched/ext_internal.h
@@ -1118,17 +1118,18 @@ struct scx_sched {
 	 * progs. NULL on cpu-form.
 	 *
 	 * @arena_pool sub-allocates @arena_map. Each gen_pool chunk is added
-	 * at the kernel-side mapping address. Grows on demand and pages are
-	 * not released until sched destroy.
+	 * at the kernel-side mapping address. @arena_kern_base is the start
+	 * of the arena's kern_vm range. See scx_arena_to_kaddr() and
+	 * scx_kaddr_to_arena().
 	 */
 	struct bpf_map		*arena_map;
 	struct gen_pool		*arena_pool;
+	uintptr_t		arena_kern_base;
 
 	/*
 	 * Per-CPU arena cmask used by scx_call_op_set_cpumask() to hand a cmask
-	 * to ops_cid.set_cmask(). The kernel writes through the stored kern_va;
-	 * the BPF-arena uaddr handed to BPF is recovered by subtracting the
-	 * arena's kern_vm_start.
+	 * to ops_cid.set_cmask(). The kernel writes through the stored kern_va
+	 * and hands BPF its arena pointer via scx_kaddr_to_arena().
 	 */
 	struct scx_cmask * __percpu *set_cmask_scratch;
 
@@ -1205,6 +1206,31 @@ struct scx_sched {
 	struct scx_sched	*ancestors[];
 };
 
+/**
+ * scx_arena_to_kaddr - Translate a BPF-arena pointer to its kernel address
+ * @sch: scheduler whose arena hosts @bpf_ptr
+ * @bpf_ptr: BPF-arena pointer, only the low 32 bits are used
+ *
+ * The (u32) cast normalizes any input into the arena's 4 GiB kern_vm range,
+ * which combined with scratch-page fault recovery makes the returned pointer
+ * safe to dereference up to GUARD_SZ / 2 past the intended object. Accesses
+ * larger than GUARD_SZ / 2 must be explicitly bounds-checked.
+ */
+static inline void *scx_arena_to_kaddr(struct scx_sched *sch, const void *bpf_ptr)
+{
+	return (void *)(sch->arena_kern_base + (u32)(uintptr_t)bpf_ptr);
+}
+
+/**
+ * scx_kaddr_to_arena - Translate a kernel arena address to its BPF form
+ * @sch: scheduler whose arena hosts @kaddr
+ * @kaddr: kernel-side arena address, supplied by trusted kernel code
+ */
+static inline void *scx_kaddr_to_arena(struct scx_sched *sch, const void *kaddr)
+{
+	return (void *)((uintptr_t)kaddr - sch->arena_kern_base);
+}
+
 enum scx_wake_flags {
 	/* expose select WF_* flags as enums */
 	SCX_WAKE_FORK		= WF_FORK,
-- 
2.54.0


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2026-06-05 19:18 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-04 10:02 [PATCH sched_ext/for-7.2] sched_ext: Add scx_arena_to_kaddr() / scx_kaddr_to_arena() Tejun Heo
2026-06-05 19:18 ` Tejun Heo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.