From: Tejun Heo <tj@kernel.org>
To: David Vernet <void@manifault.com>,
Andrea Righi <arighi@nvidia.com>,
Changwoo Min <changwoo@igalia.com>,
Alexei Starovoitov <ast@kernel.org>,
Andrii Nakryiko <andrii@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
Martin KaFai Lau <martin.lau@linux.dev>,
Kumar Kartikeya Dwivedi <memxor@gmail.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>,
Will Deacon <will@kernel.org>, Thomas Gleixner <tglx@kernel.org>,
Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
Dave Hansen <dave.hansen@linux.intel.com>,
Andrew Morton <akpm@linux-foundation.org>,
David Hildenbrand <david@kernel.org>,
Mike Rapoport <rppt@kernel.org>,
Emil Tsalapatis <emil@etsalapatis.com>,
sched-ext@lists.linux.dev, bpf@vger.kernel.org, x86@kernel.org,
linux-arm-kernel@lists.infradead.org, linux-mm@kvack.org,
linux-kernel@vger.kernel.org, Tejun Heo <tj@kernel.org>
Subject: [PATCH 6/8] sched_ext: Require an arena for cid-form schedulers
Date: Sun, 17 May 2026 11:12:30 -1000 [thread overview]
Message-ID: <20260517211232.1670594-7-tj@kernel.org> (raw)
In-Reply-To: <20260517211232.1670594-1-tj@kernel.org>
Upcoming patches will let the kernel place arena-resident scratch shared
with the BPF program (e.g. per-CPU set_cmask cmask) so the BPF side can
dereference it directly via __arena pointers, replacing the current
cmask_copy_from_kernel() probe-read loop. That requires each cid-form
scheduler to expose its arena to the kernel. Kernel- side accesses are
recovered by the per-arena scratch-page mechanism.
bpf_scx_reg_cid() walks the struct_ops member progs via
bpf_struct_ops_for_each_prog() and reads each prog's arena via
bpf_prog_arena(). The verifier enforces one arena per program, so each
member prog contributes at most one arena. All non-NULL contributions must
match and at least one member prog must use an arena. The map ref is held on
scx_sched and dropped on sched destroy. cpu-form schedulers (bpf_scx_reg)
are unchanged - no arena requirement.
Signed-off-by: Tejun Heo <tj@kernel.org>
---
kernel/sched/ext.c | 56 ++++++++++++++++++++++++++++++++++++-
kernel/sched/ext_internal.h | 8 ++++++
2 files changed, 63 insertions(+), 1 deletion(-)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 64f8a096f133..94aab7037329 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -5009,6 +5009,8 @@ static void scx_sched_free_rcu_work(struct work_struct *work)
rhashtable_free_and_destroy(&sch->dsq_hash, NULL, NULL);
free_exit_info(sch->exit_info);
+ if (sch->arena_map)
+ bpf_map_put(sch->arena_map);
kfree(sch);
}
@@ -6732,6 +6734,7 @@ struct scx_enable_cmd {
struct sched_ext_ops_cid *ops_cid;
};
bool is_cid_type;
+ struct bpf_map *arena_map; /* arena ref to transfer to sch */
int ret;
};
@@ -6898,6 +6901,15 @@ static struct scx_sched *scx_alloc_and_add_sched(struct scx_enable_cmd *cmd,
return ERR_PTR(ret);
}
#endif /* CONFIG_EXT_SUB_SCHED */
+
+ /*
+ * Consume the arena_map ref bpf_scx_reg_cid() took. Defer to here so
+ * earlier failure paths leave cmd->arena_map set and bpf_scx_reg_cid
+ * drops the ref. After this point, sch owns the ref and any cleanup
+ * runs through scx_sched_free_rcu_work() which puts it.
+ */
+ sch->arena_map = cmd->arena_map;
+ cmd->arena_map = NULL;
return sch;
#ifdef CONFIG_EXT_SUB_SCHED
@@ -7884,11 +7896,53 @@ static int bpf_scx_reg(void *kdata, struct bpf_link *link)
return scx_enable(&cmd, link);
}
+struct scx_arena_scan {
+ struct bpf_map *arena;
+ int err;
+};
+
+/*
+ * The verifier enforces one arena per BPF program, so each struct_ops
+ * member prog contributes at most one arena via bpf_prog_arena().
+ * Require all non-NULL contributions to match.
+ */
+static int scx_arena_scan_prog(struct bpf_prog *prog, void *data)
+{
+ struct scx_arena_scan *s = data;
+ struct bpf_map *arena = bpf_prog_arena(prog);
+
+ if (!arena)
+ return 0;
+ if (s->arena && s->arena != arena) {
+ s->err = -EINVAL;
+ return 1;
+ }
+ s->arena = arena;
+ return 0;
+}
+
static int bpf_scx_reg_cid(void *kdata, struct bpf_link *link)
{
struct scx_enable_cmd cmd = { .ops_cid = kdata, .is_cid_type = true };
+ struct scx_arena_scan scan = {};
+ int ret;
- return scx_enable(&cmd, link);
+ bpf_struct_ops_for_each_prog(kdata, scx_arena_scan_prog, &scan);
+ if (scan.err) {
+ pr_err("sched_ext: cid-form scheduler uses multiple arena maps\n");
+ return scan.err;
+ }
+ if (!scan.arena) {
+ pr_err("sched_ext: cid-form scheduler must use a BPF arena map\n");
+ return -EINVAL;
+ }
+
+ bpf_map_inc(scan.arena);
+ cmd.arena_map = scan.arena;
+ ret = scx_enable(&cmd, link);
+ if (cmd.arena_map) /* not consumed by scx_alloc_and_add_sched() */
+ bpf_map_put(cmd.arena_map);
+ return ret;
}
static void bpf_scx_unreg(void *kdata, struct bpf_link *link)
diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h
index 7258aea94b9f..d40cfd29ddaa 100644
--- a/kernel/sched/ext_internal.h
+++ b/kernel/sched/ext_internal.h
@@ -1111,6 +1111,14 @@ struct scx_sched {
struct sched_ext_ops_cid ops_cid;
};
bool is_cid_type; /* true if registered via bpf_sched_ext_ops_cid */
+
+ /*
+ * Arena map auto-discovered from member progs at struct_ops attach.
+ * cid-form schedulers must use exactly one arena across all member
+ * progs. NULL on cpu-form.
+ */
+ struct bpf_map *arena_map;
+
DECLARE_BITMAP(has_op, SCX_OPI_END);
/*
--
2.54.0
next prev parent reply other threads:[~2026-05-17 21:12 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-17 21:12 [PATCHSET v2 sched_ext/for-7.2] bpf/arena: Direct kernel-side access Tejun Heo
2026-05-17 21:12 ` [PATCH 1/8] mm: Add ptep_try_install() for lockless empty-slot installs Tejun Heo
2026-05-17 21:12 ` [PATCH 2/8] bpf: Recover arena kernel faults with scratch page Tejun Heo
2026-05-17 21:12 ` [PATCH 3/8] bpf: Add sleepable variant of bpf_arena_alloc_pages for kernel callers Tejun Heo
2026-05-17 21:12 ` [PATCH 4/8] bpf: Add bpf_struct_ops_for_each_prog() Tejun Heo
2026-05-17 21:12 ` [PATCH 5/8] bpf/arena: Add bpf_arena_map_kern_vm_start() and bpf_prog_arena() Tejun Heo
2026-05-17 21:12 ` Tejun Heo [this message]
2026-05-17 21:12 ` [PATCH 7/8] sched_ext: Sub-allocator over kernel-claimed BPF arena pages Tejun Heo
2026-05-17 21:12 ` [PATCH 8/8] sched_ext: Convert ops.set_cmask() to arena-resident cmask Tejun Heo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260517211232.1670594-7-tj@kernel.org \
--to=tj@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=andrii@kernel.org \
--cc=arighi@nvidia.com \
--cc=ast@kernel.org \
--cc=bp@alien8.de \
--cc=bpf@vger.kernel.org \
--cc=catalin.marinas@arm.com \
--cc=changwoo@igalia.com \
--cc=daniel@iogearbox.net \
--cc=dave.hansen@linux.intel.com \
--cc=david@kernel.org \
--cc=emil@etsalapatis.com \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=martin.lau@linux.dev \
--cc=memxor@gmail.com \
--cc=mingo@redhat.com \
--cc=rppt@kernel.org \
--cc=sched-ext@lists.linux.dev \
--cc=tglx@kernel.org \
--cc=void@manifault.com \
--cc=will@kernel.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox