From: Tejun Heo <tj@kernel.org>
To: David Vernet <void@manifault.com>,
Andrea Righi <arighi@nvidia.com>,
Changwoo Min <changwoo@igalia.com>
Cc: Emil Tsalapatis <emil@etsalapatis.com>,
sched-ext@lists.linux.dev, linux-kernel@vger.kernel.org,
Tejun Heo <tj@kernel.org>
Subject: [PATCH sched_ext/for-7.2] sched_ext: Add scx_arena_to_kaddr() / scx_kaddr_to_arena()
Date: Thu, 4 Jun 2026 00:02:23 -1000 [thread overview]
Message-ID: <20260604100223.3831303-1-tj@kernel.org> (raw)
Translating between a BPF-arena pointer and its kernel-side address is just
an add or subtract of the arena's kern_vm start. More such translations are
coming, so cache that start on scx_sched as @arena_kern_base at arena attach
and wrap both directions. Convert the existing open-coded subtraction in
scx_call_op_set_cpumask().
Signed-off-by: Tejun Heo <tj@kernel.org>
---
kernel/sched/ext.c | 19 +++++++++----------
kernel/sched/ext_internal.h | 36 +++++++++++++++++++++++++++++++-----
2 files changed, 40 insertions(+), 15 deletions(-)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 62769abb553a..6567f626b3f0 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -622,19 +622,13 @@ static inline void scx_call_op_set_cpumask(struct scx_sched *sch, struct rq *rq,
if (scx_is_cid_type()) {
struct scx_cmask *kern_va = *this_cpu_ptr(sch->set_cmask_scratch);
- unsigned long uaddr = (unsigned long)kern_va;
-
- /* arena.o, which defines these, is built only on MMU && 64BIT */
-#if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
- uaddr -= bpf_arena_map_kern_vm_start(sch->arena_map);
-#endif
/*
- * Build the per-CPU arena cmask and hand BPF the uaddr. Caller
- * holds the rq lock with IRQs disabled, which makes us the sole
- * user of the scratch area.
+ * Build the per-CPU arena cmask and hand BPF its arena address.
+ * Caller holds the rq lock with IRQs disabled, which makes us
+ * the sole user of the scratch area.
*/
scx_cpumask_to_cmask(cpumask, kern_va);
- sch->ops_cid.set_cmask(task, (struct scx_cmask *)uaddr);
+ sch->ops_cid.set_cmask(task, scx_kaddr_to_arena(sch, kern_va));
} else {
sch->ops.set_cpumask(task, cpumask);
}
@@ -6977,6 +6971,11 @@ static struct scx_sched *scx_alloc_and_add_sched(struct scx_enable_cmd *cmd,
* runs through scx_sched_free_rcu_work() which puts it.
*/
sch->arena_map = cmd->arena_map;
+ /* BPF arena is only available on MMU && 64BIT */
+#if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
+ if (sch->arena_map)
+ sch->arena_kern_base = bpf_arena_map_kern_vm_start(sch->arena_map);
+#endif
cmd->arena_map = NULL;
return sch;
diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h
index 9bb65367f510..b04701190b23 100644
--- a/kernel/sched/ext_internal.h
+++ b/kernel/sched/ext_internal.h
@@ -1118,17 +1118,18 @@ struct scx_sched {
* progs. NULL on cpu-form.
*
* @arena_pool sub-allocates @arena_map. Each gen_pool chunk is added
- * at the kernel-side mapping address. Grows on demand and pages are
- * not released until sched destroy.
+ * at the kernel-side mapping address. @arena_kern_base is the start
+ * of the arena's kern_vm range. See scx_arena_to_kaddr() and
+ * scx_kaddr_to_arena().
*/
struct bpf_map *arena_map;
struct gen_pool *arena_pool;
+ uintptr_t arena_kern_base;
/*
* Per-CPU arena cmask used by scx_call_op_set_cpumask() to hand a cmask
- * to ops_cid.set_cmask(). The kernel writes through the stored kern_va;
- * the BPF-arena uaddr handed to BPF is recovered by subtracting the
- * arena's kern_vm_start.
+ * to ops_cid.set_cmask(). The kernel writes through the stored kern_va
+ * and hands BPF its arena pointer via scx_kaddr_to_arena().
*/
struct scx_cmask * __percpu *set_cmask_scratch;
@@ -1205,6 +1206,31 @@ struct scx_sched {
struct scx_sched *ancestors[];
};
+/**
+ * scx_arena_to_kaddr - Translate a BPF-arena pointer to its kernel address
+ * @sch: scheduler whose arena hosts @bpf_ptr
+ * @bpf_ptr: BPF-arena pointer, only the low 32 bits are used
+ *
+ * The (u32) cast normalizes any input into the arena's 4 GiB kern_vm range,
+ * which combined with scratch-page fault recovery makes the returned pointer
+ * safe to dereference up to GUARD_SZ / 2 past the intended object. Accesses
+ * larger than GUARD_SZ / 2 must be explicitly bounds-checked.
+ */
+static inline void *scx_arena_to_kaddr(struct scx_sched *sch, const void *bpf_ptr)
+{
+ return (void *)(sch->arena_kern_base + (u32)(uintptr_t)bpf_ptr);
+}
+
+/**
+ * scx_kaddr_to_arena - Translate a kernel arena address to its BPF form
+ * @sch: scheduler whose arena hosts @kaddr
+ * @kaddr: kernel-side arena address, supplied by trusted kernel code
+ */
+static inline void *scx_kaddr_to_arena(struct scx_sched *sch, const void *kaddr)
+{
+ return (void *)((uintptr_t)kaddr - sch->arena_kern_base);
+}
+
enum scx_wake_flags {
/* expose select WF_* flags as enums */
SCX_WAKE_FORK = WF_FORK,
--
2.54.0
next reply other threads:[~2026-06-04 10:02 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-04 10:02 Tejun Heo [this message]
2026-06-05 19:18 ` [PATCH sched_ext/for-7.2] sched_ext: Add scx_arena_to_kaddr() / scx_kaddr_to_arena() Tejun Heo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260604100223.3831303-1-tj@kernel.org \
--to=tj@kernel.org \
--cc=arighi@nvidia.com \
--cc=changwoo@igalia.com \
--cc=emil@etsalapatis.com \
--cc=linux-kernel@vger.kernel.org \
--cc=sched-ext@lists.linux.dev \
--cc=void@manifault.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.