From: "Emil Tsalapatis" <emil@etsalapatis.com>
To: "Zhao Mengmeng" <zhaomzhao@126.com>, <tj@kernel.org>,
<void@manifault.com>, <arighi@nvidia.com>, <changwoo@igalia.com>,
<nathan@kernel.org>, <nick.desaulniers+lkml@gmail.com>,
<morbo@google.com>, <justinstitt@google.com>
Cc: <sched-ext@lists.linux.dev>, <linux-kernel@vger.kernel.org>,
<bpf@vger.kernel.org>, <llvm@lists.linux.dev>,
<zhaomengmeng@kylinos.cn>
Subject: Re: [PATCH v2 2/2] tools/sched_ext: scx_sdt: Fix BPF verifier rejection on older LLVMs
Date: Mon, 09 Mar 2026 12:36:40 -0400 [thread overview]
Message-ID: <DGYECIPVC266.25320R7256YJT@etsalapatis.com> (raw)
In-Reply-To: <20260309022847.106150-3-zhaomzhao@126.com>
On Sun Mar 8, 2026 at 10:28 PM EDT, Zhao Mengmeng wrote:
> From: Zhao Mengmeng <zhaomengmeng@kylinos.cn>
>
> Under Clang 17/18, when running scx_sdt scheduler, it fails with:
>
> libbpf: prog 'sdt_init_task': BPF program load failed: -EACCES
> libbpf: prog 'sdt_init_task': -- BEGIN PROG LOAD LOG --
> ...
> ; desc = desc_find_empty(alloc->root, &idx); @ scx_sdt.bpf.c:479
> 43: (79) r8 = *(u64 *)(r6 +32) ; frame1: R6=map_value(map=scx_sdt.bss,ks=4,vs=200,off=120) R8=scalar()
> ; for (level = zero; level < SDT_TASK_LEVELS && can_loop; level++) { @ scx_sdt.bpf.c:407
> 44: (e5) may_goto pc+51
> ; idx |= pos; @ scx_sdt.bpf.c:418
> 96: (bf) r7 = r2 ; frame1: R2=0 R7=0
> 97: (bf) r1 = r10 ; frame1: R1=fp0 R10=fp0
> ; @ scx_sdt.bpf.c:0
> 98: (07) r1 += -56 ; frame1: R1=fp-56
> ; bpf_for(u, 0, SDT_TASK_LEVELS) { @ scx_sdt.bpf.c:447
> 99: (b4) w2 = 0 ; frame1: R2=0
> 100: (b4) w3 = 3 ; frame1: R3=3
> 101: (85) call bpf_iter_num_new#82234 ; frame1: R0=scalar() fp-56=iter_num(ref_id=2,state=active,depth=0)
> 102: (18) r9 = 0x1ffffffffffffff8 ; frame1: R9=0x1ffffffffffffff8
> 104: (bf) r1 = r10 ; frame1: R1=fp0 R10=fp0
> ; @ scx_sdt.bpf.c:0
> 105: (07) r1 += -56 ; frame1: R1=fp-56
> ; bpf_for(u, 0, SDT_TASK_LEVELS) { @ scx_sdt.bpf.c:447
> 106: (85) call bpf_iter_num_next#82235 ; frame1: R0=0 fp-56=iter_num(ref_id=2,state=drained,depth=0)
> 107: (15) if r0 == 0x0 goto pc+29 ; frame1: R0=0
> ; if (tmp->nr_free > 0) @ scx_sdt.bpf.c:456
> 137: (bf) r1 = r10 ; frame1: R1=fp0 R10=fp0
> ; bpf_for(u, 0, SDT_TASK_LEVELS) { @ scx_sdt.bpf.c:447
> 138: (07) r1 += -56 ; frame1: R1=fp-56
> 139: (85) call bpf_iter_num_destroy#82232 ; frame1:
> 140: (b7) r9 = 0 ; frame1: R9=0
> ; if (unlikely(desc == NULL)) { @ scx_sdt.bpf.c:480
> 141: (15) if r8 == 0x0 goto pc+15 ; frame1: R8=scalar(umin=1)
> ; chunk = desc->chunk; @ scx_sdt.bpf.c:485
> 142: (79) r4 = *(u64 *)(r8 +72)
> R8 invalid mem access 'scalar'
>
> The reason is these older compilers lacks native support for
> __BPF_FEATURE_ADDR_SPACE_CAST, __arena macro is defined as empty.
>
> Fix it by adding cast_kern when dereferencing variables with __arena tag.
>
I am not sure if we want to support older Clang versions at this point.
This issue is fixed for Clang 19, and adding the macros back in makes it
confusing for those who use the code as a starting point. And while
it would be nice to support older Clang versions, we already don't
handle Clang 15/16 that don't have arena support. So it's not
unreasonable if we say Clang 17/18 are also incompatible with this
example.
On the other hand, maybe the extra compatibility is worth re-adding
cast_kern/cast_user to the code. I am slightly in favor of keeping it
as-is to avoid churn, but can easily see why we'd go the other way.
@htejun WDYT?
> Signed-off-by: Zhao Mengmeng <zhaomengmeng@kylinos.cn>
> ---
> tools/sched_ext/scx_sdt.bpf.c | 20 +++++++++++++++++++-
> 1 file changed, 19 insertions(+), 1 deletion(-)
>
> diff --git a/tools/sched_ext/scx_sdt.bpf.c b/tools/sched_ext/scx_sdt.bpf.c
> index 31b09958e8d5..caacc55bd7a5 100644
> --- a/tools/sched_ext/scx_sdt.bpf.c
> +++ b/tools/sched_ext/scx_sdt.bpf.c
> @@ -148,6 +148,7 @@ static sdt_desc_t *scx_alloc_chunk(void)
>
> out = desc;
>
> + cast_kern(desc);
> desc->nr_free = SDT_TASK_ENTS_PER_CHUNK;
> desc->chunk = chunk;
>
> @@ -244,6 +245,7 @@ int mark_nodes_avail(sdt_desc_t *lv_desc[SDT_TASK_LEVELS], __u64 lv_pos[SDT_TASK
> /* Only propagate upwards if we are the parent's only free chunk. */
> desc = lv_desc[level];
>
> + cast_kern(desc);
> ret = set_idx_state(desc, lv_pos[level], false);
> if (unlikely(ret != 0))
> return ret;
> @@ -298,20 +300,26 @@ int scx_alloc_free_idx(struct scx_allocator *alloc, __u64 idx)
> if (level == SDT_TASK_LEVELS - 1)
> break;
>
> + cast_kern(desc);
> chunk = desc->chunk;
>
> + cast_kern(chunk);
> desc_children = (sdt_desc_t * __arena *)chunk->descs;
> + cast_kern(desc_children);
> desc = desc_children[pos];
>
> if (unlikely(!desc))
> return -EINVAL;
> }
>
> + cast_kern(desc);
> chunk = desc->chunk;
>
> pos = idx & mask;
> + cast_kern(chunk);
> data = chunk->data[pos];
> if (likely(data)) {
> + cast_kern(data);
> *data = (struct sdt_data) {
> .tid.genn = data->tid.genn + 1,
> };
> @@ -378,6 +386,7 @@ __u64 chunk_find_empty(sdt_desc_t __arg_arena *desc)
> __u64 freeslots;
> __u64 i;
>
> + cast_kern(desc);
> for (i = 0; i < SDT_TASK_CHUNK_BITMAP_U64S; i++) {
> freeslots = ~desc->allocated[i];
> if (freeslots == (__u64)0)
> @@ -426,9 +435,12 @@ static sdt_desc_t * desc_find_empty(sdt_desc_t *desc, __u64 *idxp)
> break;
>
> /* Allocate an internal node if necessary. */
> + cast_kern(desc);
> chunk = desc->chunk;
> + cast_kern(chunk);
> desc_children = (sdt_desc_t * __arena *)chunk->descs;
>
> + cast_kern(desc_children);
> desc = desc_children[pos];
> if (!desc) {
> desc = scx_alloc_chunk();
> @@ -448,6 +460,7 @@ static sdt_desc_t * desc_find_empty(sdt_desc_t *desc, __u64 *idxp)
> level = SDT_TASK_LEVELS - 1 - u;
> tmp = lv_desc[level];
>
> + cast_kern(tmp);
> ret = set_idx_state(tmp, lv_pos[level], true);
> if (ret != 0)
> break;
> @@ -482,10 +495,12 @@ void __arena *scx_alloc(struct scx_allocator *alloc)
> return NULL;
> }
>
> + cast_kern(desc);
> chunk = desc->chunk;
>
> /* Populate the leaf node if necessary. */
> pos = idx & (SDT_TASK_ENTS_PER_CHUNK - 1);
> + cast_kern(chunk);
> data = chunk->data[pos];
> if (!data) {
> data = scx_alloc_from_pool(&alloc->pool);
> @@ -503,10 +518,12 @@ void __arena *scx_alloc(struct scx_allocator *alloc)
> alloc_stats.alloc_ops += 1;
> alloc_stats.active_allocs += 1;
>
> + cast_kern(data);
> data->tid.idx = idx;
>
> bpf_spin_unlock(&alloc_lock);
>
> + cast_user(data);
> return data;
> }
>
> @@ -544,9 +561,10 @@ void __arena *scx_task_alloc(struct task_struct *p)
> if (unlikely(!data))
> return NULL;
>
> + mval->data = data;
> + cast_kern(data);
> mval->tid = data->tid;
> mval->tptr = (__u64) p;
> - mval->data = data;
>
> return (void __arena *)data->payload;
> }
next prev parent reply other threads:[~2026-03-09 16:36 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-09 2:28 [PATCH v2 0/2] sched_ext cleanup and toolchain compatibility fixes Zhao Mengmeng
2026-03-09 2:28 ` [PATCH v2 1/2] sched_ext: remove SCX_OPS_HAS_CGROUP_WEIGHT Zhao Mengmeng
2026-03-09 2:28 ` [PATCH v2 2/2] tools/sched_ext: scx_sdt: Fix BPF verifier rejection on older LLVMs Zhao Mengmeng
2026-03-09 16:36 ` Emil Tsalapatis [this message]
2026-03-09 16:43 ` Alexei Starovoitov
2026-03-09 16:44 ` Tejun Heo
2026-03-09 16:58 ` Zhao mengmeng
2026-03-09 19:49 ` [PATCH v2 0/2] sched_ext: cleanup and toolchain compatibility fixes Tejun Heo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=DGYECIPVC266.25320R7256YJT@etsalapatis.com \
--to=emil@etsalapatis.com \
--cc=arighi@nvidia.com \
--cc=bpf@vger.kernel.org \
--cc=changwoo@igalia.com \
--cc=justinstitt@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=llvm@lists.linux.dev \
--cc=morbo@google.com \
--cc=nathan@kernel.org \
--cc=nick.desaulniers+lkml@gmail.com \
--cc=sched-ext@lists.linux.dev \
--cc=tj@kernel.org \
--cc=void@manifault.com \
--cc=zhaomengmeng@kylinos.cn \
--cc=zhaomzhao@126.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox