* Re: [PATCH 1/2] powerpc64/bpf: Implement JIT support for private stack
2026-02-16 15:22 [PATCH 1/2] powerpc64/bpf: Implement JIT support for private stack adubey
@ 2026-02-16 10:50 ` bot+bpf-ci
2026-02-16 15:22 ` [PATCH 2/2] selftests/bpf: Enable private stack tests for powerpc64 adubey
` (3 subsequent siblings)
4 siblings, 0 replies; 11+ messages in thread
From: bot+bpf-ci @ 2026-02-16 10:50 UTC (permalink / raw)
To: adubey, bpf
Cc: hbathini, linuxppc-dev, ast, daniel, andrii, martin.lau, eddyz87,
yonghong.song, clm, ihor.solodrai, chleroy, adubey, ast, andrii,
daniel, martin.lau, eddyz87, yonghong.song, clm, ihor.solodrai
[-- Attachment #1: Type: text/plain, Size: 2617 bytes --]
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
[ ... ]
> @@ -140,6 +173,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
> struct codegen_context cgctx;
> int pass;
> int flen;
> + int priv_stack_alloc_size;
> + void __percpu *priv_stack_ptr = NULL;
> struct bpf_binary_header *fhdr = NULL;
> struct bpf_binary_header *hdr = NULL;
> struct bpf_prog *org_fp = fp;
[ ... ]
> }
>
> + if (!priv_stack_ptr && fp->aux->jits_use_priv_stack) {
On extra_pass (when jit_data->addrs is non-NULL), the local
priv_stack_ptr is still NULL because it was initialized to NULL above
and is never loaded from fp->aux->priv_stack_ptr. This means the
allocation will happen again on extra_pass, leaking the per-CPU
memory allocated during the first pass and overwriting
fp->aux->priv_stack_ptr with the new allocation.
Both the x86 and arm64 implementations load the existing pointer
before this check:
arch/x86/net/bpf_jit_comp.c:bpf_int_jit_compile() {
...
priv_stack_ptr = prog->aux->priv_stack_ptr;
if (!priv_stack_ptr && prog->aux->jits_use_priv_stack) {
...
}
arch/arm64/net/bpf_jit_comp.c:bpf_int_jit_compile() {
...
priv_stack_ptr = prog->aux->priv_stack_ptr;
if (!priv_stack_ptr && prog->aux->jits_use_priv_stack) {
...
}
Is this missing a similar assignment before the check?
> + priv_stack_alloc_size = round_up(fp->aux->stack_depth, 16) +
> + 2 * PRIV_STACK_GUARD_SZ;
> + priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_size, 16, GFP_KERNEL);
> + if (!priv_stack_ptr) {
> + fp = org_fp;
> + goto out_priv_stack;
> + }
> +
> + priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_size);
> + fp->aux->priv_stack_ptr = priv_stack_ptr;
> + }
---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
CI run summary: https://github.com/kernel-patches/bpf/actions/runs/22059200816
AI-authorship-score: low
AI-authorship-explanation: The code shows human copy-and-adapt patterns from existing x86/arm64 implementations, with a characteristic human omission bug and copied pre-existing typo, inconsistent with typical AI code generation.
issues-found: 1
issue-severity-score: medium
issue-severity-explanation: Per-CPU memory leak on extra_pass in bpf_int_jit_compile due to missing load of existing priv_stack_ptr from fp->aux before allocation check, affecting programs with subprogs using private stack on powerpc64.
^ permalink raw reply [flat|nested] 11+ messages in thread* [PATCH 2/2] selftests/bpf: Enable private stack tests for powerpc64
2026-02-16 15:22 [PATCH 1/2] powerpc64/bpf: Implement JIT support for private stack adubey
2026-02-16 10:50 ` bot+bpf-ci
@ 2026-02-16 15:22 ` adubey
2026-02-23 11:11 ` Christophe Leroy (CS GROUP)
2026-02-16 19:14 ` [PATCH 1/2] powerpc64/bpf: Implement JIT support for private stack kernel test robot
` (2 subsequent siblings)
4 siblings, 1 reply; 11+ messages in thread
From: adubey @ 2026-02-16 15:22 UTC (permalink / raw)
To: bpf
Cc: hbathini, linuxppc-dev, ast, daniel, andrii, martin.lau, eddyz87,
yonghong.song, clm, ihor.solodrai, chleroy, Abhishek Dubey
From: Abhishek Dubey <adubey@linux.ibm.com>
With support of private stack, relevant tests must pass
on powerpc64.
#./test_progs -t struct_ops_private_stack
#434/1 struct_ops_private_stack/private_stack:OK
#434/2 struct_ops_private_stack/private_stack_fail:OK
#434/3 struct_ops_private_stack/private_stack_recur:OK
#434 struct_ops_private_stack:OK
Summary: 1/3 PASSED, 0 SKIPPED, 0 FAILED
Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
---
tools/testing/selftests/bpf/progs/struct_ops_private_stack.c | 3 ++-
.../selftests/bpf/progs/struct_ops_private_stack_fail.c | 3 ++-
.../selftests/bpf/progs/struct_ops_private_stack_recur.c | 3 ++-
3 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c
index dbe646013811..1df1111cd029 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c
@@ -7,7 +7,8 @@
char _license[] SEC("license") = "GPL";
-#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
+#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64) \
+ || defined(__TARGET_ARCH_powerpc)
bool skip __attribute((__section__(".data"))) = false;
#else
bool skip = true;
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c
index 3d89ad7cbe2a..e09c1a8782b4 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c
@@ -7,7 +7,8 @@
char _license[] SEC("license") = "GPL";
-#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
+#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64) \
+ || defined(__TARGET_ARCH_powerpc)
bool skip __attribute((__section__(".data"))) = false;
#else
bool skip = true;
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c
index b1f6d7e5a8e5..791800835673 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c
@@ -7,7 +7,8 @@
char _license[] SEC("license") = "GPL";
-#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
+#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64) \
+ || defined(__TARGET_ARCH_powerpc)
bool skip __attribute((__section__(".data"))) = false;
#else
bool skip = true;
--
2.52.0
^ permalink raw reply related [flat|nested] 11+ messages in thread* Re: [PATCH 2/2] selftests/bpf: Enable private stack tests for powerpc64
2026-02-16 15:22 ` [PATCH 2/2] selftests/bpf: Enable private stack tests for powerpc64 adubey
@ 2026-02-23 11:11 ` Christophe Leroy (CS GROUP)
2026-02-23 19:32 ` adubey
0 siblings, 1 reply; 11+ messages in thread
From: Christophe Leroy (CS GROUP) @ 2026-02-23 11:11 UTC (permalink / raw)
To: adubey, bpf
Cc: hbathini, linuxppc-dev, ast, daniel, andrii, martin.lau, eddyz87,
yonghong.song, clm, ihor.solodrai
Le 16/02/2026 à 16:22, adubey@linux.ibm.com a écrit :
> From: Abhishek Dubey <adubey@linux.ibm.com>
>
> With support of private stack, relevant tests must pass
> on powerpc64.
>
> #./test_progs -t struct_ops_private_stack
> #434/1 struct_ops_private_stack/private_stack:OK
> #434/2 struct_ops_private_stack/private_stack_fail:OK
> #434/3 struct_ops_private_stack/private_stack_recur:OK
> #434 struct_ops_private_stack:OK
> Summary: 1/3 PASSED, 0 SKIPPED, 0 FAILED
>
> Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
> ---
> tools/testing/selftests/bpf/progs/struct_ops_private_stack.c | 3 ++-
> .../selftests/bpf/progs/struct_ops_private_stack_fail.c | 3 ++-
> .../selftests/bpf/progs/struct_ops_private_stack_recur.c | 3 ++-
> 3 files changed, 6 insertions(+), 3 deletions(-)
>
> diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c
> index dbe646013811..1df1111cd029 100644
> --- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c
> +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c
> @@ -7,7 +7,8 @@
>
> char _license[] SEC("license") = "GPL";
>
> -#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
> +#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64) \
> + || defined(__TARGET_ARCH_powerpc)
Won't this also trigger the test on powerpc/32 ?
> bool skip __attribute((__section__(".data"))) = false;
> #else
> bool skip = true;
> diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c
> index 3d89ad7cbe2a..e09c1a8782b4 100644
> --- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c
> +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c
> @@ -7,7 +7,8 @@
>
> char _license[] SEC("license") = "GPL";
>
> -#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
> +#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64) \
> + || defined(__TARGET_ARCH_powerpc)
> bool skip __attribute((__section__(".data"))) = false;
> #else
> bool skip = true;
> diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c
> index b1f6d7e5a8e5..791800835673 100644
> --- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c
> +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c
> @@ -7,7 +7,8 @@
>
> char _license[] SEC("license") = "GPL";
>
> -#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
> +#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64) \
> + || defined(__TARGET_ARCH_powerpc)
> bool skip __attribute((__section__(".data"))) = false;
> #else
> bool skip = true;
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH 2/2] selftests/bpf: Enable private stack tests for powerpc64
2026-02-23 11:11 ` Christophe Leroy (CS GROUP)
@ 2026-02-23 19:32 ` adubey
0 siblings, 0 replies; 11+ messages in thread
From: adubey @ 2026-02-23 19:32 UTC (permalink / raw)
To: Christophe Leroy (CS GROUP)
Cc: bpf, hbathini, linuxppc-dev, ast, daniel, andrii, martin.lau,
eddyz87, yonghong.song, clm, ihor.solodrai
On 2026-02-23 16:41, Christophe Leroy (CS GROUP) wrote:
> Le 16/02/2026 à 16:22, adubey@linux.ibm.com a écrit :
>> From: Abhishek Dubey <adubey@linux.ibm.com>
>>
>> With support of private stack, relevant tests must pass
>> on powerpc64.
>>
>> #./test_progs -t struct_ops_private_stack
>> #434/1 struct_ops_private_stack/private_stack:OK
>> #434/2 struct_ops_private_stack/private_stack_fail:OK
>> #434/3 struct_ops_private_stack/private_stack_recur:OK
>> #434 struct_ops_private_stack:OK
>> Summary: 1/3 PASSED, 0 SKIPPED, 0 FAILED
>>
>> Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
>> ---
>> tools/testing/selftests/bpf/progs/struct_ops_private_stack.c | 3
>> ++-
>> .../selftests/bpf/progs/struct_ops_private_stack_fail.c | 3
>> ++-
>> .../selftests/bpf/progs/struct_ops_private_stack_recur.c | 3
>> ++-
>> 3 files changed, 6 insertions(+), 3 deletions(-)
>>
>> diff --git
>> a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c
>> b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c
>> index dbe646013811..1df1111cd029 100644
>> --- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c
>> +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c
>> @@ -7,7 +7,8 @@
>> char _license[] SEC("license") = "GPL";
>> -#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
>> +#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64) \
>> + || defined(__TARGET_ARCH_powerpc)
>
> Won't this also trigger the test on powerpc/32 ?
We have patch ready marking test separation b/w 32/64-bit ppc.
Planning to post it immediately after few concurrent patches are frozen.
With current tag, yes it does invoke on 32 bit. Our intent is to
support private stack for ppc64 only in this series.
>
>
>> bool skip __attribute((__section__(".data"))) = false;
>> #else
>> bool skip = true;
>> diff --git
>> a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c
>> b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c
>> index 3d89ad7cbe2a..e09c1a8782b4 100644
>> ---
>> a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c
>> +++
>> b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c
>> @@ -7,7 +7,8 @@
>> char _license[] SEC("license") = "GPL";
>> -#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
>> +#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64) \
>> + || defined(__TARGET_ARCH_powerpc)
>> bool skip __attribute((__section__(".data"))) = false;
>> #else
>> bool skip = true;
>> diff --git
>> a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c
>> b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c
>> index b1f6d7e5a8e5..791800835673 100644
>> ---
>> a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c
>> +++
>> b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c
>> @@ -7,7 +7,8 @@
>> char _license[] SEC("license") = "GPL";
>> -#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
>> +#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64) \
>> + || defined(__TARGET_ARCH_powerpc)
>> bool skip __attribute((__section__(".data"))) = false;
>> #else
>> bool skip = true;
-Abhishek
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 1/2] powerpc64/bpf: Implement JIT support for private stack
2026-02-16 15:22 [PATCH 1/2] powerpc64/bpf: Implement JIT support for private stack adubey
2026-02-16 10:50 ` bot+bpf-ci
2026-02-16 15:22 ` [PATCH 2/2] selftests/bpf: Enable private stack tests for powerpc64 adubey
@ 2026-02-16 19:14 ` kernel test robot
2026-02-25 10:44 ` adubey
2026-02-17 1:49 ` kernel test robot
2026-02-23 11:10 ` Christophe Leroy (CS GROUP)
4 siblings, 1 reply; 11+ messages in thread
From: kernel test robot @ 2026-02-16 19:14 UTC (permalink / raw)
To: adubey, bpf
Cc: oe-kbuild-all, hbathini, linuxppc-dev, ast, daniel, andrii,
martin.lau, eddyz87, yonghong.song, clm, ihor.solodrai, chleroy,
Abhishek Dubey
Hi,
kernel test robot noticed the following build warnings:
[auto build test WARNING on bpf-next/master]
[also build test WARNING on bpf/master powerpc/next linus/master next-20260216]
[cannot apply to bpf-next/net powerpc/fixes v6.19]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/adubey-linux-ibm-com/selftests-bpf-Enable-private-stack-tests-for-powerpc64/20260216-182353
base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
patch link: https://lore.kernel.org/r/20260216152234.36632-1-adubey%40linux.ibm.com
patch subject: [PATCH 1/2] powerpc64/bpf: Implement JIT support for private stack
config: powerpc-randconfig-001-20260217 (https://download.01.org/0day-ci/archive/20260217/202602170316.V2RZWuVa-lkp@intel.com/config)
compiler: powerpc-linux-gcc (GCC) 8.5.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260217/202602170316.V2RZWuVa-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202602170316.V2RZWuVa-lkp@intel.com/
All warnings (new ones prefixed by >>):
arch/powerpc/net/bpf_jit_comp.c: In function 'bpf_int_jit_compile':
>> arch/powerpc/net/bpf_jit_comp.c:266:35: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
cgctx.priv_sp = priv_stack_ptr ? (u64)priv_stack_ptr : 0;
^
vim +266 arch/powerpc/net/bpf_jit_comp.c
164
165 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
166 {
167 u32 proglen;
168 u32 alloclen;
169 u8 *image = NULL;
170 u32 *code_base;
171 u32 *addrs;
172 struct powerpc_jit_data *jit_data;
173 struct codegen_context cgctx;
174 int pass;
175 int flen;
176 int priv_stack_alloc_size;
177 void __percpu *priv_stack_ptr = NULL;
178 struct bpf_binary_header *fhdr = NULL;
179 struct bpf_binary_header *hdr = NULL;
180 struct bpf_prog *org_fp = fp;
181 struct bpf_prog *tmp_fp;
182 bool bpf_blinded = false;
183 bool extra_pass = false;
184 u8 *fimage = NULL;
185 u32 *fcode_base;
186 u32 extable_len;
187 u32 fixup_len;
188
189 if (!fp->jit_requested)
190 return org_fp;
191
192 tmp_fp = bpf_jit_blind_constants(org_fp);
193 if (IS_ERR(tmp_fp))
194 return org_fp;
195
196 if (tmp_fp != org_fp) {
197 bpf_blinded = true;
198 fp = tmp_fp;
199 }
200
201 jit_data = fp->aux->jit_data;
202 if (!jit_data) {
203 jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
204 if (!jit_data) {
205 fp = org_fp;
206 goto out;
207 }
208 fp->aux->jit_data = jit_data;
209 }
210
211 if (!priv_stack_ptr && fp->aux->jits_use_priv_stack) {
212 /*
213 * Allocate private stack of size equivalent to
214 * verifier-calculated stack size plus two memory
215 * guard regions to detect private stack overflow
216 * and underflow.
217 */
218 priv_stack_alloc_size = round_up(fp->aux->stack_depth, 16) +
219 2 * PRIV_STACK_GUARD_SZ;
220 priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_size, 16, GFP_KERNEL);
221 if (!priv_stack_ptr) {
222 fp = org_fp;
223 goto out_priv_stack;
224 }
225
226 priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_size);
227 fp->aux->priv_stack_ptr = priv_stack_ptr;
228 }
229
230 flen = fp->len;
231 addrs = jit_data->addrs;
232 if (addrs) {
233 cgctx = jit_data->ctx;
234 /*
235 * JIT compiled to a writable location (image/code_base) first.
236 * It is then moved to the readonly final location (fimage/fcode_base)
237 * using instruction patching.
238 */
239 fimage = jit_data->fimage;
240 fhdr = jit_data->fhdr;
241 proglen = jit_data->proglen;
242 hdr = jit_data->hdr;
243 image = (void *)hdr + ((void *)fimage - (void *)fhdr);
244 extra_pass = true;
245 /* During extra pass, ensure index is reset before repopulating extable entries */
246 cgctx.exentry_idx = 0;
247 goto skip_init_ctx;
248 }
249
250 addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL);
251 if (addrs == NULL) {
252 fp = org_fp;
253 goto out_addrs;
254 }
255
256 memset(&cgctx, 0, sizeof(struct codegen_context));
257 bpf_jit_init_reg_mapping(&cgctx);
258
259 /* Make sure that the stack is quadword aligned. */
260 cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
261 cgctx.arena_vm_start = bpf_arena_get_kern_vm_start(fp->aux->arena);
262 cgctx.user_vm_start = bpf_arena_get_user_vm_start(fp->aux->arena);
263 cgctx.is_subprog = bpf_is_subprog(fp);
264 cgctx.exception_boundary = fp->aux->exception_boundary;
265 cgctx.exception_cb = fp->aux->exception_cb;
> 266 cgctx.priv_sp = priv_stack_ptr ? (u64)priv_stack_ptr : 0;
267
268 /* Scouting faux-generate pass 0 */
269 if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
270 /* We hit something illegal or unsupported. */
271 fp = org_fp;
272 goto out_addrs;
273 }
274
275 /*
276 * If we have seen a tail call, we need a second pass.
277 * This is because bpf_jit_emit_common_epilogue() is called
278 * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen.
279 * We also need a second pass if we ended up with too large
280 * a program so as to ensure BPF_EXIT branches are in range.
281 */
282 if (cgctx.seen & SEEN_TAILCALL || !is_offset_in_branch_range((long)cgctx.idx * 4)) {
283 cgctx.idx = 0;
284 if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
285 fp = org_fp;
286 goto out_addrs;
287 }
288 }
289
290 bpf_jit_realloc_regs(&cgctx);
291 /*
292 * Pretend to build prologue, given the features we've seen. This will
293 * update ctgtx.idx as it pretends to output instructions, then we can
294 * calculate total size from idx.
295 */
296 bpf_jit_build_prologue(NULL, &cgctx);
297 addrs[fp->len] = cgctx.idx * 4;
298 bpf_jit_build_epilogue(NULL, &cgctx);
299
300 fixup_len = fp->aux->num_exentries * BPF_FIXUP_LEN * 4;
301 extable_len = fp->aux->num_exentries * sizeof(struct exception_table_entry);
302
303 proglen = cgctx.idx * 4;
304 alloclen = proglen + FUNCTION_DESCR_SIZE + fixup_len + extable_len;
305
306 fhdr = bpf_jit_binary_pack_alloc(alloclen, &fimage, 4, &hdr, &image,
307 bpf_jit_fill_ill_insns);
308 if (!fhdr) {
309 fp = org_fp;
310 goto out_addrs;
311 }
312
313 if (extable_len)
314 fp->aux->extable = (void *)fimage + FUNCTION_DESCR_SIZE + proglen + fixup_len;
315
316 skip_init_ctx:
317 code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
318 fcode_base = (u32 *)(fimage + FUNCTION_DESCR_SIZE);
319
320 /* Code generation passes 1-2 */
321 for (pass = 1; pass < 3; pass++) {
322 /* Now build the prologue, body code & epilogue for real. */
323 cgctx.idx = 0;
324 cgctx.alt_exit_addr = 0;
325 bpf_jit_build_prologue(code_base, &cgctx);
326 if (bpf_jit_build_body(fp, code_base, fcode_base, &cgctx, addrs, pass,
327 extra_pass)) {
328 bpf_arch_text_copy(&fhdr->size, &hdr->size, sizeof(hdr->size));
329 bpf_jit_binary_pack_free(fhdr, hdr);
330 fp = org_fp;
331 goto out_addrs;
332 }
333 bpf_jit_build_epilogue(code_base, &cgctx);
334
335 if (bpf_jit_enable > 1)
336 pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass,
337 proglen - (cgctx.idx * 4), cgctx.seen);
338 }
339
340 if (bpf_jit_enable > 1)
341 /*
342 * Note that we output the base address of the code_base
343 * rather than image, since opcodes are in code_base.
344 */
345 bpf_jit_dump(flen, proglen, pass, code_base);
346
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH 1/2] powerpc64/bpf: Implement JIT support for private stack
2026-02-16 19:14 ` [PATCH 1/2] powerpc64/bpf: Implement JIT support for private stack kernel test robot
@ 2026-02-25 10:44 ` adubey
0 siblings, 0 replies; 11+ messages in thread
From: adubey @ 2026-02-25 10:44 UTC (permalink / raw)
To: kernel test robot
Cc: bpf, oe-kbuild-all, hbathini, linuxppc-dev, ast, daniel, andrii,
martin.lau, eddyz87, yonghong.song, clm, ihor.solodrai, chleroy
On 2026-02-17 00:44, kernel test robot wrote:
> Hi,
>
> kernel test robot noticed the following build warnings:
>
> [auto build test WARNING on bpf-next/master]
> [also build test WARNING on bpf/master powerpc/next linus/master
> next-20260216]
> [cannot apply to bpf-next/net powerpc/fixes v6.19]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch#_base_tree_information]
>
> url:
> https://github.com/intel-lab-lkp/linux/commits/adubey-linux-ibm-com/selftests-bpf-Enable-private-stack-tests-for-powerpc64/20260216-182353
> base:
> https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
> patch link:
> https://lore.kernel.org/r/20260216152234.36632-1-adubey%40linux.ibm.com
> patch subject: [PATCH 1/2] powerpc64/bpf: Implement JIT support for
> private stack
> config: powerpc-randconfig-001-20260217
> (https://download.01.org/0day-ci/archive/20260217/202602170316.V2RZWuVa-lkp@intel.com/config)
> compiler: powerpc-linux-gcc (GCC) 8.5.0
> reproduce (this is a W=1 build):
> (https://download.01.org/0day-ci/archive/20260217/202602170316.V2RZWuVa-lkp@intel.com/reproduce)
>
> If you fix the issue in a separate patch/commit (i.e. not just a new
> version of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot <lkp@intel.com>
> | Closes:
> https://lore.kernel.org/oe-kbuild-all/202602170316.V2RZWuVa-lkp@intel.com/
>
> All warnings (new ones prefixed by >>):
>
> arch/powerpc/net/bpf_jit_comp.c: In function 'bpf_int_jit_compile':
>>> arch/powerpc/net/bpf_jit_comp.c:266:35: warning: cast from pointer to
>>> integer of different size [-Wpointer-to-int-cast]
> cgctx.priv_sp = priv_stack_ptr ? (u64)priv_stack_ptr : 0;
> ^
>
>
> vim +266 arch/powerpc/net/bpf_jit_comp.c
>
> 164
> 165 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
> 166 {
> 167 u32 proglen;
> 168 u32 alloclen;
> 169 u8 *image = NULL;
> 170 u32 *code_base;
> 171 u32 *addrs;
> 172 struct powerpc_jit_data *jit_data;
> 173 struct codegen_context cgctx;
> 174 int pass;
> 175 int flen;
> 176 int priv_stack_alloc_size;
> 177 void __percpu *priv_stack_ptr = NULL;
> 178 struct bpf_binary_header *fhdr = NULL;
> 179 struct bpf_binary_header *hdr = NULL;
> 180 struct bpf_prog *org_fp = fp;
> 181 struct bpf_prog *tmp_fp;
> 182 bool bpf_blinded = false;
> 183 bool extra_pass = false;
> 184 u8 *fimage = NULL;
> 185 u32 *fcode_base;
> 186 u32 extable_len;
> 187 u32 fixup_len;
> 188
> 189 if (!fp->jit_requested)
> 190 return org_fp;
> 191
> 192 tmp_fp = bpf_jit_blind_constants(org_fp);
> 193 if (IS_ERR(tmp_fp))
> 194 return org_fp;
> 195
> 196 if (tmp_fp != org_fp) {
> 197 bpf_blinded = true;
> 198 fp = tmp_fp;
> 199 }
> 200
> 201 jit_data = fp->aux->jit_data;
> 202 if (!jit_data) {
> 203 jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
> 204 if (!jit_data) {
> 205 fp = org_fp;
> 206 goto out;
> 207 }
> 208 fp->aux->jit_data = jit_data;
> 209 }
> 210
> 211 if (!priv_stack_ptr && fp->aux->jits_use_priv_stack) {
> 212 /*
> 213 * Allocate private stack of size equivalent to
> 214 * verifier-calculated stack size plus two memory
> 215 * guard regions to detect private stack overflow
> 216 * and underflow.
> 217 */
> 218 priv_stack_alloc_size = round_up(fp->aux->stack_depth, 16) +
> 219 2 * PRIV_STACK_GUARD_SZ;
> 220 priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_size,
> 16, GFP_KERNEL);
> 221 if (!priv_stack_ptr) {
> 222 fp = org_fp;
> 223 goto out_priv_stack;
> 224 }
> 225
> 226 priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_size);
> 227 fp->aux->priv_stack_ptr = priv_stack_ptr;
> 228 }
> 229
> 230 flen = fp->len;
> 231 addrs = jit_data->addrs;
> 232 if (addrs) {
> 233 cgctx = jit_data->ctx;
> 234 /*
> 235 * JIT compiled to a writable location (image/code_base)
> first.
> 236 * It is then moved to the readonly final location
> (fimage/fcode_base)
> 237 * using instruction patching.
> 238 */
> 239 fimage = jit_data->fimage;
> 240 fhdr = jit_data->fhdr;
> 241 proglen = jit_data->proglen;
> 242 hdr = jit_data->hdr;
> 243 image = (void *)hdr + ((void *)fimage - (void *)fhdr);
> 244 extra_pass = true;
> 245 /* During extra pass, ensure index is reset before
> repopulating extable entries */
> 246 cgctx.exentry_idx = 0;
> 247 goto skip_init_ctx;
> 248 }
> 249
> 250 addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL);
> 251 if (addrs == NULL) {
> 252 fp = org_fp;
> 253 goto out_addrs;
> 254 }
> 255
> 256 memset(&cgctx, 0, sizeof(struct codegen_context));
> 257 bpf_jit_init_reg_mapping(&cgctx);
> 258
> 259 /* Make sure that the stack is quadword aligned. */
> 260 cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
> 261 cgctx.arena_vm_start =
> bpf_arena_get_kern_vm_start(fp->aux->arena);
> 262 cgctx.user_vm_start =
> bpf_arena_get_user_vm_start(fp->aux->arena);
> 263 cgctx.is_subprog = bpf_is_subprog(fp);
> 264 cgctx.exception_boundary = fp->aux->exception_boundary;
> 265 cgctx.exception_cb = fp->aux->exception_cb;
> > 266 cgctx.priv_sp = priv_stack_ptr ? (u64)priv_stack_ptr : 0;
> 267
> 268 /* Scouting faux-generate pass 0 */
> 269 if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0,
> false)) {
> 270 /* We hit something illegal or unsupported. */
> 271 fp = org_fp;
> 272 goto out_addrs;
> 273 }
> 274
> 275 /*
> 276 * If we have seen a tail call, we need a second pass.
> 277 * This is because bpf_jit_emit_common_epilogue() is called
> 278 * from bpf_jit_emit_tail_call() with a not yet stable
> ctx->seen.
> 279 * We also need a second pass if we ended up with too large
> 280 * a program so as to ensure BPF_EXIT branches are in range.
> 281 */
> 282 if (cgctx.seen & SEEN_TAILCALL ||
> !is_offset_in_branch_range((long)cgctx.idx * 4)) {
> 283 cgctx.idx = 0;
> 284 if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0,
> false)) {
> 285 fp = org_fp;
> 286 goto out_addrs;
> 287 }
> 288 }
> 289
> 290 bpf_jit_realloc_regs(&cgctx);
> 291 /*
> 292 * Pretend to build prologue, given the features we've seen.
> This will
> 293 * update ctgtx.idx as it pretends to output instructions, then
> we can
> 294 * calculate total size from idx.
> 295 */
> 296 bpf_jit_build_prologue(NULL, &cgctx);
> 297 addrs[fp->len] = cgctx.idx * 4;
> 298 bpf_jit_build_epilogue(NULL, &cgctx);
> 299
> 300 fixup_len = fp->aux->num_exentries * BPF_FIXUP_LEN * 4;
> 301 extable_len = fp->aux->num_exentries * sizeof(struct
> exception_table_entry);
> 302
> 303 proglen = cgctx.idx * 4;
> 304 alloclen = proglen + FUNCTION_DESCR_SIZE + fixup_len +
> extable_len;
> 305
> 306 fhdr = bpf_jit_binary_pack_alloc(alloclen, &fimage, 4, &hdr,
> &image,
> 307 bpf_jit_fill_ill_insns);
> 308 if (!fhdr) {
> 309 fp = org_fp;
> 310 goto out_addrs;
> 311 }
> 312
> 313 if (extable_len)
> 314 fp->aux->extable = (void *)fimage + FUNCTION_DESCR_SIZE +
> proglen + fixup_len;
> 315
> 316 skip_init_ctx:
> 317 code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
> 318 fcode_base = (u32 *)(fimage + FUNCTION_DESCR_SIZE);
> 319
> 320 /* Code generation passes 1-2 */
> 321 for (pass = 1; pass < 3; pass++) {
> 322 /* Now build the prologue, body code & epilogue for real. */
> 323 cgctx.idx = 0;
> 324 cgctx.alt_exit_addr = 0;
> 325 bpf_jit_build_prologue(code_base, &cgctx);
> 326 if (bpf_jit_build_body(fp, code_base, fcode_base, &cgctx,
> addrs, pass,
> 327 extra_pass)) {
> 328 bpf_arch_text_copy(&fhdr->size, &hdr->size,
> sizeof(hdr->size));
> 329 bpf_jit_binary_pack_free(fhdr, hdr);
> 330 fp = org_fp;
> 331 goto out_addrs;
> 332 }
> 333 bpf_jit_build_epilogue(code_base, &cgctx);
> 334
> 335 if (bpf_jit_enable > 1)
> 336 pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass,
> 337 proglen - (cgctx.idx * 4), cgctx.seen);
> 338 }
> 339
> 340 if (bpf_jit_enable > 1)
> 341 /*
> 342 * Note that we output the base address of the code_base
> 343 * rather than image, since opcodes are in code_base.
> 344 */
> 345 bpf_jit_dump(flen, proglen, pass, code_base);
> 346
Fixed in v2 :
https://lore.kernel.org/bpf/20260216152234.36632-1-adubey@linux.ibm.com/
-Abhishek
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 1/2] powerpc64/bpf: Implement JIT support for private stack
2026-02-16 15:22 [PATCH 1/2] powerpc64/bpf: Implement JIT support for private stack adubey
` (2 preceding siblings ...)
2026-02-16 19:14 ` [PATCH 1/2] powerpc64/bpf: Implement JIT support for private stack kernel test robot
@ 2026-02-17 1:49 ` kernel test robot
2026-02-25 10:43 ` adubey
2026-02-23 11:10 ` Christophe Leroy (CS GROUP)
4 siblings, 1 reply; 11+ messages in thread
From: kernel test robot @ 2026-02-17 1:49 UTC (permalink / raw)
To: adubey, bpf
Cc: oe-kbuild-all, hbathini, linuxppc-dev, ast, daniel, andrii,
martin.lau, eddyz87, yonghong.song, clm, ihor.solodrai, chleroy,
Abhishek Dubey
Hi,
kernel test robot noticed the following build warnings:
[auto build test WARNING on bpf-next/master]
[also build test WARNING on bpf/master powerpc/next linus/master next-20260216]
[cannot apply to bpf-next/net powerpc/fixes v6.19]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/adubey-linux-ibm-com/selftests-bpf-Enable-private-stack-tests-for-powerpc64/20260216-182353
base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
patch link: https://lore.kernel.org/r/20260216152234.36632-1-adubey%40linux.ibm.com
patch subject: [PATCH 1/2] powerpc64/bpf: Implement JIT support for private stack
config: powerpc64-randconfig-r133-20260217 (https://download.01.org/0day-ci/archive/20260217/202602170941.f1KIvBuf-lkp@intel.com/config)
compiler: powerpc64-linux-gcc (GCC) 8.5.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260217/202602170941.f1KIvBuf-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202602170941.f1KIvBuf-lkp@intel.com/
sparse warnings: (new ones prefixed by >>)
>> arch/powerpc/net/bpf_jit_comp.c:266:43: sparse: sparse: cast removes address space '__percpu' of expression
--
>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes address space '__percpu' of expression
>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes address space '__percpu' of expression
>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes address space '__percpu' of expression
>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes address space '__percpu' of expression
>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes address space '__percpu' of expression
>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes address space '__percpu' of expression
>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes address space '__percpu' of expression
>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes address space '__percpu' of expression
>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes address space '__percpu' of expression
>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes address space '__percpu' of expression
>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes address space '__percpu' of expression
>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes address space '__percpu' of expression
>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes address space '__percpu' of expression
>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes address space '__percpu' of expression
>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes address space '__percpu' of expression
>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes address space '__percpu' of expression
>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes address space '__percpu' of expression
>> arch/powerpc/net/bpf_jit_comp64.c:212:32: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected void [noderef] __percpu *priv_frame_ptr @@ got void * @@
arch/powerpc/net/bpf_jit_comp64.c:212:32: sparse: expected void [noderef] __percpu *priv_frame_ptr
arch/powerpc/net/bpf_jit_comp64.c:212:32: sparse: got void *
arch/powerpc/net/bpf_jit_comp64.c:1476:41: sparse: sparse: cast truncates bits from constant value (8000000000000000 becomes 0)
arch/powerpc/net/bpf_jit_comp64.c:1476:41: sparse: sparse: cast truncates bits from constant value (8000000000000000 becomes 0)
arch/powerpc/net/bpf_jit_comp64.c:1478:41: sparse: sparse: cast truncates bits from constant value (c000000000000000 becomes 0)
arch/powerpc/net/bpf_jit_comp64.c:1478:41: sparse: sparse: cast truncates bits from constant value (c000000000000000 becomes 0)
vim +/__percpu +266 arch/powerpc/net/bpf_jit_comp.c
164
165 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
166 {
167 u32 proglen;
168 u32 alloclen;
169 u8 *image = NULL;
170 u32 *code_base;
171 u32 *addrs;
172 struct powerpc_jit_data *jit_data;
173 struct codegen_context cgctx;
174 int pass;
175 int flen;
176 int priv_stack_alloc_size;
177 void __percpu *priv_stack_ptr = NULL;
178 struct bpf_binary_header *fhdr = NULL;
179 struct bpf_binary_header *hdr = NULL;
180 struct bpf_prog *org_fp = fp;
181 struct bpf_prog *tmp_fp;
182 bool bpf_blinded = false;
183 bool extra_pass = false;
184 u8 *fimage = NULL;
185 u32 *fcode_base;
186 u32 extable_len;
187 u32 fixup_len;
188
189 if (!fp->jit_requested)
190 return org_fp;
191
192 tmp_fp = bpf_jit_blind_constants(org_fp);
193 if (IS_ERR(tmp_fp))
194 return org_fp;
195
196 if (tmp_fp != org_fp) {
197 bpf_blinded = true;
198 fp = tmp_fp;
199 }
200
201 jit_data = fp->aux->jit_data;
202 if (!jit_data) {
203 jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
204 if (!jit_data) {
205 fp = org_fp;
206 goto out;
207 }
208 fp->aux->jit_data = jit_data;
209 }
210
211 if (!priv_stack_ptr && fp->aux->jits_use_priv_stack) {
212 /*
213 * Allocate private stack of size equivalent to
214 * verifier-calculated stack size plus two memory
215 * guard regions to detect private stack overflow
216 * and underflow.
217 */
218 priv_stack_alloc_size = round_up(fp->aux->stack_depth, 16) +
219 2 * PRIV_STACK_GUARD_SZ;
220 priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_size, 16, GFP_KERNEL);
221 if (!priv_stack_ptr) {
222 fp = org_fp;
223 goto out_priv_stack;
224 }
225
226 priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_size);
227 fp->aux->priv_stack_ptr = priv_stack_ptr;
228 }
229
230 flen = fp->len;
231 addrs = jit_data->addrs;
232 if (addrs) {
233 cgctx = jit_data->ctx;
234 /*
235 * JIT compiled to a writable location (image/code_base) first.
236 * It is then moved to the readonly final location (fimage/fcode_base)
237 * using instruction patching.
238 */
239 fimage = jit_data->fimage;
240 fhdr = jit_data->fhdr;
241 proglen = jit_data->proglen;
242 hdr = jit_data->hdr;
243 image = (void *)hdr + ((void *)fimage - (void *)fhdr);
244 extra_pass = true;
245 /* During extra pass, ensure index is reset before repopulating extable entries */
246 cgctx.exentry_idx = 0;
247 goto skip_init_ctx;
248 }
249
250 addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL);
251 if (addrs == NULL) {
252 fp = org_fp;
253 goto out_addrs;
254 }
255
256 memset(&cgctx, 0, sizeof(struct codegen_context));
257 bpf_jit_init_reg_mapping(&cgctx);
258
259 /* Make sure that the stack is quadword aligned. */
260 cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
261 cgctx.arena_vm_start = bpf_arena_get_kern_vm_start(fp->aux->arena);
262 cgctx.user_vm_start = bpf_arena_get_user_vm_start(fp->aux->arena);
263 cgctx.is_subprog = bpf_is_subprog(fp);
264 cgctx.exception_boundary = fp->aux->exception_boundary;
265 cgctx.exception_cb = fp->aux->exception_cb;
> 266 cgctx.priv_sp = priv_stack_ptr ? (u64)priv_stack_ptr : 0;
267
268 /* Scouting faux-generate pass 0 */
269 if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
270 /* We hit something illegal or unsupported. */
271 fp = org_fp;
272 goto out_addrs;
273 }
274
275 /*
276 * If we have seen a tail call, we need a second pass.
277 * This is because bpf_jit_emit_common_epilogue() is called
278 * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen.
279 * We also need a second pass if we ended up with too large
280 * a program so as to ensure BPF_EXIT branches are in range.
281 */
282 if (cgctx.seen & SEEN_TAILCALL || !is_offset_in_branch_range((long)cgctx.idx * 4)) {
283 cgctx.idx = 0;
284 if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
285 fp = org_fp;
286 goto out_addrs;
287 }
288 }
289
290 bpf_jit_realloc_regs(&cgctx);
291 /*
292 * Pretend to build prologue, given the features we've seen. This will
293 * update ctgtx.idx as it pretends to output instructions, then we can
294 * calculate total size from idx.
295 */
296 bpf_jit_build_prologue(NULL, &cgctx);
297 addrs[fp->len] = cgctx.idx * 4;
298 bpf_jit_build_epilogue(NULL, &cgctx);
299
300 fixup_len = fp->aux->num_exentries * BPF_FIXUP_LEN * 4;
301 extable_len = fp->aux->num_exentries * sizeof(struct exception_table_entry);
302
303 proglen = cgctx.idx * 4;
304 alloclen = proglen + FUNCTION_DESCR_SIZE + fixup_len + extable_len;
305
306 fhdr = bpf_jit_binary_pack_alloc(alloclen, &fimage, 4, &hdr, &image,
307 bpf_jit_fill_ill_insns);
308 if (!fhdr) {
309 fp = org_fp;
310 goto out_addrs;
311 }
312
313 if (extable_len)
314 fp->aux->extable = (void *)fimage + FUNCTION_DESCR_SIZE + proglen + fixup_len;
315
316 skip_init_ctx:
317 code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
318 fcode_base = (u32 *)(fimage + FUNCTION_DESCR_SIZE);
319
320 /* Code generation passes 1-2 */
321 for (pass = 1; pass < 3; pass++) {
322 /* Now build the prologue, body code & epilogue for real. */
323 cgctx.idx = 0;
324 cgctx.alt_exit_addr = 0;
325 bpf_jit_build_prologue(code_base, &cgctx);
326 if (bpf_jit_build_body(fp, code_base, fcode_base, &cgctx, addrs, pass,
327 extra_pass)) {
328 bpf_arch_text_copy(&fhdr->size, &hdr->size, sizeof(hdr->size));
329 bpf_jit_binary_pack_free(fhdr, hdr);
330 fp = org_fp;
331 goto out_addrs;
332 }
333 bpf_jit_build_epilogue(code_base, &cgctx);
334
335 if (bpf_jit_enable > 1)
336 pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass,
337 proglen - (cgctx.idx * 4), cgctx.seen);
338 }
339
340 if (bpf_jit_enable > 1)
341 /*
342 * Note that we output the base address of the code_base
343 * rather than image, since opcodes are in code_base.
344 */
345 bpf_jit_dump(flen, proglen, pass, code_base);
346
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH 1/2] powerpc64/bpf: Implement JIT support for private stack
2026-02-17 1:49 ` kernel test robot
@ 2026-02-25 10:43 ` adubey
0 siblings, 0 replies; 11+ messages in thread
From: adubey @ 2026-02-25 10:43 UTC (permalink / raw)
To: kernel test robot
Cc: bpf, oe-kbuild-all, hbathini, linuxppc-dev, ast, daniel, andrii,
martin.lau, eddyz87, yonghong.song, clm, ihor.solodrai, chleroy
On 2026-02-17 07:19, kernel test robot wrote:
> Hi,
>
> kernel test robot noticed the following build warnings:
>
> [auto build test WARNING on bpf-next/master]
> [also build test WARNING on bpf/master powerpc/next linus/master
> next-20260216]
> [cannot apply to bpf-next/net powerpc/fixes v6.19]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch#_base_tree_information]
>
> url:
> https://github.com/intel-lab-lkp/linux/commits/adubey-linux-ibm-com/selftests-bpf-Enable-private-stack-tests-for-powerpc64/20260216-182353
> base:
> https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
> patch link:
> https://lore.kernel.org/r/20260216152234.36632-1-adubey%40linux.ibm.com
> patch subject: [PATCH 1/2] powerpc64/bpf: Implement JIT support for
> private stack
> config: powerpc64-randconfig-r133-20260217
> (https://download.01.org/0day-ci/archive/20260217/202602170941.f1KIvBuf-lkp@intel.com/config)
> compiler: powerpc64-linux-gcc (GCC) 8.5.0
> reproduce (this is a W=1 build):
> (https://download.01.org/0day-ci/archive/20260217/202602170941.f1KIvBuf-lkp@intel.com/reproduce)
>
> If you fix the issue in a separate patch/commit (i.e. not just a new
> version of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot <lkp@intel.com>
> | Closes:
> https://lore.kernel.org/oe-kbuild-all/202602170941.f1KIvBuf-lkp@intel.com/
>
> sparse warnings: (new ones prefixed by >>)
>>> arch/powerpc/net/bpf_jit_comp.c:266:43: sparse: sparse: cast removes
>>> address space '__percpu' of expression
> --
>>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes
>>> address space '__percpu' of expression
>>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes
>>> address space '__percpu' of expression
>>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes
>>> address space '__percpu' of expression
>>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes
>>> address space '__percpu' of expression
>>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes
>>> address space '__percpu' of expression
>>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes
>>> address space '__percpu' of expression
>>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes
>>> address space '__percpu' of expression
>>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes
>>> address space '__percpu' of expression
>>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes
>>> address space '__percpu' of expression
>>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes
>>> address space '__percpu' of expression
>>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes
>>> address space '__percpu' of expression
>>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes
>>> address space '__percpu' of expression
>>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes
>>> address space '__percpu' of expression
>>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes
>>> address space '__percpu' of expression
>>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes
>>> address space '__percpu' of expression
>>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes
>>> address space '__percpu' of expression
>>> arch/powerpc/net/bpf_jit_comp64.c:186:9: sparse: sparse: cast removes
>>> address space '__percpu' of expression
>>> arch/powerpc/net/bpf_jit_comp64.c:212:32: sparse: sparse: incorrect
>>> type in assignment (different address spaces) @@ expected void
>>> [noderef] __percpu *priv_frame_ptr @@ got void * @@
> arch/powerpc/net/bpf_jit_comp64.c:212:32: sparse: expected void
> [noderef] __percpu *priv_frame_ptr
> arch/powerpc/net/bpf_jit_comp64.c:212:32: sparse: got void *
> arch/powerpc/net/bpf_jit_comp64.c:1476:41: sparse: sparse: cast
> truncates bits from constant value (8000000000000000 becomes 0)
> arch/powerpc/net/bpf_jit_comp64.c:1476:41: sparse: sparse: cast
> truncates bits from constant value (8000000000000000 becomes 0)
> arch/powerpc/net/bpf_jit_comp64.c:1478:41: sparse: sparse: cast
> truncates bits from constant value (c000000000000000 becomes 0)
> arch/powerpc/net/bpf_jit_comp64.c:1478:41: sparse: sparse: cast
> truncates bits from constant value (c000000000000000 becomes 0)
>
> vim +/__percpu +266 arch/powerpc/net/bpf_jit_comp.c
>
> 164
> 165 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
> 166 {
> 167 u32 proglen;
> 168 u32 alloclen;
> 169 u8 *image = NULL;
> 170 u32 *code_base;
> 171 u32 *addrs;
> 172 struct powerpc_jit_data *jit_data;
> 173 struct codegen_context cgctx;
> 174 int pass;
> 175 int flen;
> 176 int priv_stack_alloc_size;
> 177 void __percpu *priv_stack_ptr = NULL;
> 178 struct bpf_binary_header *fhdr = NULL;
> 179 struct bpf_binary_header *hdr = NULL;
> 180 struct bpf_prog *org_fp = fp;
> 181 struct bpf_prog *tmp_fp;
> 182 bool bpf_blinded = false;
> 183 bool extra_pass = false;
> 184 u8 *fimage = NULL;
> 185 u32 *fcode_base;
> 186 u32 extable_len;
> 187 u32 fixup_len;
> 188
> 189 if (!fp->jit_requested)
> 190 return org_fp;
> 191
> 192 tmp_fp = bpf_jit_blind_constants(org_fp);
> 193 if (IS_ERR(tmp_fp))
> 194 return org_fp;
> 195
> 196 if (tmp_fp != org_fp) {
> 197 bpf_blinded = true;
> 198 fp = tmp_fp;
> 199 }
> 200
> 201 jit_data = fp->aux->jit_data;
> 202 if (!jit_data) {
> 203 jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
> 204 if (!jit_data) {
> 205 fp = org_fp;
> 206 goto out;
> 207 }
> 208 fp->aux->jit_data = jit_data;
> 209 }
> 210
> 211 if (!priv_stack_ptr && fp->aux->jits_use_priv_stack) {
> 212 /*
> 213 * Allocate private stack of size equivalent to
> 214 * verifier-calculated stack size plus two memory
> 215 * guard regions to detect private stack overflow
> 216 * and underflow.
> 217 */
> 218 priv_stack_alloc_size = round_up(fp->aux->stack_depth, 16) +
> 219 2 * PRIV_STACK_GUARD_SZ;
> 220 priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_size,
> 16, GFP_KERNEL);
> 221 if (!priv_stack_ptr) {
> 222 fp = org_fp;
> 223 goto out_priv_stack;
> 224 }
> 225
> 226 priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_size);
> 227 fp->aux->priv_stack_ptr = priv_stack_ptr;
> 228 }
> 229
> 230 flen = fp->len;
> 231 addrs = jit_data->addrs;
> 232 if (addrs) {
> 233 cgctx = jit_data->ctx;
> 234 /*
> 235 * JIT compiled to a writable location (image/code_base)
> first.
> 236 * It is then moved to the readonly final location
> (fimage/fcode_base)
> 237 * using instruction patching.
> 238 */
> 239 fimage = jit_data->fimage;
> 240 fhdr = jit_data->fhdr;
> 241 proglen = jit_data->proglen;
> 242 hdr = jit_data->hdr;
> 243 image = (void *)hdr + ((void *)fimage - (void *)fhdr);
> 244 extra_pass = true;
> 245 /* During extra pass, ensure index is reset before
> repopulating extable entries */
> 246 cgctx.exentry_idx = 0;
> 247 goto skip_init_ctx;
> 248 }
> 249
> 250 addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL);
> 251 if (addrs == NULL) {
> 252 fp = org_fp;
> 253 goto out_addrs;
> 254 }
> 255
> 256 memset(&cgctx, 0, sizeof(struct codegen_context));
> 257 bpf_jit_init_reg_mapping(&cgctx);
> 258
> 259 /* Make sure that the stack is quadword aligned. */
> 260 cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
> 261 cgctx.arena_vm_start =
> bpf_arena_get_kern_vm_start(fp->aux->arena);
> 262 cgctx.user_vm_start =
> bpf_arena_get_user_vm_start(fp->aux->arena);
> 263 cgctx.is_subprog = bpf_is_subprog(fp);
> 264 cgctx.exception_boundary = fp->aux->exception_boundary;
> 265 cgctx.exception_cb = fp->aux->exception_cb;
> > 266 cgctx.priv_sp = priv_stack_ptr ? (u64)priv_stack_ptr : 0;
> 267
> 268 /* Scouting faux-generate pass 0 */
> 269 if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0,
> false)) {
> 270 /* We hit something illegal or unsupported. */
> 271 fp = org_fp;
> 272 goto out_addrs;
> 273 }
> 274
> 275 /*
> 276 * If we have seen a tail call, we need a second pass.
> 277 * This is because bpf_jit_emit_common_epilogue() is called
> 278 * from bpf_jit_emit_tail_call() with a not yet stable
> ctx->seen.
> 279 * We also need a second pass if we ended up with too large
> 280 * a program so as to ensure BPF_EXIT branches are in range.
> 281 */
> 282 if (cgctx.seen & SEEN_TAILCALL ||
> !is_offset_in_branch_range((long)cgctx.idx * 4)) {
> 283 cgctx.idx = 0;
> 284 if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0,
> false)) {
> 285 fp = org_fp;
> 286 goto out_addrs;
> 287 }
> 288 }
> 289
> 290 bpf_jit_realloc_regs(&cgctx);
> 291 /*
> 292 * Pretend to build prologue, given the features we've seen.
> This will
> 293 * update ctgtx.idx as it pretends to output instructions, then
> we can
> 294 * calculate total size from idx.
> 295 */
> 296 bpf_jit_build_prologue(NULL, &cgctx);
> 297 addrs[fp->len] = cgctx.idx * 4;
> 298 bpf_jit_build_epilogue(NULL, &cgctx);
> 299
> 300 fixup_len = fp->aux->num_exentries * BPF_FIXUP_LEN * 4;
> 301 extable_len = fp->aux->num_exentries * sizeof(struct
> exception_table_entry);
> 302
> 303 proglen = cgctx.idx * 4;
> 304 alloclen = proglen + FUNCTION_DESCR_SIZE + fixup_len +
> extable_len;
> 305
> 306 fhdr = bpf_jit_binary_pack_alloc(alloclen, &fimage, 4, &hdr,
> &image,
> 307 bpf_jit_fill_ill_insns);
> 308 if (!fhdr) {
> 309 fp = org_fp;
> 310 goto out_addrs;
> 311 }
> 312
> 313 if (extable_len)
> 314 fp->aux->extable = (void *)fimage + FUNCTION_DESCR_SIZE +
> proglen + fixup_len;
> 315
> 316 skip_init_ctx:
> 317 code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
> 318 fcode_base = (u32 *)(fimage + FUNCTION_DESCR_SIZE);
> 319
> 320 /* Code generation passes 1-2 */
> 321 for (pass = 1; pass < 3; pass++) {
> 322 /* Now build the prologue, body code & epilogue for real. */
> 323 cgctx.idx = 0;
> 324 cgctx.alt_exit_addr = 0;
> 325 bpf_jit_build_prologue(code_base, &cgctx);
> 326 if (bpf_jit_build_body(fp, code_base, fcode_base, &cgctx,
> addrs, pass,
> 327 extra_pass)) {
> 328 bpf_arch_text_copy(&fhdr->size, &hdr->size,
> sizeof(hdr->size));
> 329 bpf_jit_binary_pack_free(fhdr, hdr);
> 330 fp = org_fp;
> 331 goto out_addrs;
> 332 }
> 333 bpf_jit_build_epilogue(code_base, &cgctx);
> 334
> 335 if (bpf_jit_enable > 1)
> 336 pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass,
> 337 proglen - (cgctx.idx * 4), cgctx.seen);
> 338 }
> 339
> 340 if (bpf_jit_enable > 1)
> 341 /*
> 342 * Note that we output the base address of the code_base
> 343 * rather than image, since opcodes are in code_base.
> 344 */
> 345 bpf_jit_dump(flen, proglen, pass, code_base);
> 346
Posted v2 with fix:
https://lore.kernel.org/bpf/20260216152234.36632-1-adubey@linux.ibm.com/
-Abhishek
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 1/2] powerpc64/bpf: Implement JIT support for private stack
2026-02-16 15:22 [PATCH 1/2] powerpc64/bpf: Implement JIT support for private stack adubey
` (3 preceding siblings ...)
2026-02-17 1:49 ` kernel test robot
@ 2026-02-23 11:10 ` Christophe Leroy (CS GROUP)
2026-02-23 19:24 ` adubey
4 siblings, 1 reply; 11+ messages in thread
From: Christophe Leroy (CS GROUP) @ 2026-02-23 11:10 UTC (permalink / raw)
To: adubey, bpf
Cc: hbathini, linuxppc-dev, ast, daniel, andrii, martin.lau, eddyz87,
yonghong.song, clm, ihor.solodrai
Le 16/02/2026 à 16:22, adubey@linux.ibm.com a écrit :
> From: Abhishek Dubey <adubey@linux.ibm.com>
>
> Provision the private stack as a per-CPU allocation during
> bpf_int_jit_compile(). Align the stack to 16 bytes and place guard
> regions at both ends to detect runtime stack overflow and underflow.
>
> Round the private stack size up to the nearest 16-byte boundary.
> Make each guard region 16 bytes to preserve the required overall
> 16-byte alignment. When private stack is set, skip bpf stack size
> accounting in kernel stack.
>
> There is no stack pointer in powerpc. Stack referencing during JIT
> is done using frame pointer. Frame pointer calculation goes like:
>
> BPF frame pointer = Priv stack allocation start address +
> Overflow guard +
> Actual stack size defined by verifier
>
> Memory layout:
>
> High Addr +--------------------------------------------------+
> | |
> | 16 bytes Underflow guard (0xEB9F12345678eb9fULL) |
> | |
> BPF FP -> +--------------------------------------------------+
> | |
> | Private stack - determined by verifier |
> | 16-bytes aligned |
> | |
> +--------------------------------------------------+
> | |
> Lower Addr | 16 byte Overflow guard (0xEB9F12345678eb9fULL) |
> | |
> Priv stack alloc ->+--------------------------------------------------+
> start
>
> Update BPF_REG_FP to point to the calculated offset within the
> allocated private stack buffer. Now, BPF stack usage reference
> in the allocated private stack.
>
> The patch is rebase over fixes by Hari:
> https://lore.kernel.org/bpf/20260216065639.1750181-1-hbathini@linux.ibm.com/T/#mf02cad9096fa4ad1f05610b1f464da1cddf7445a
>
> Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
> ---
> arch/powerpc/net/bpf_jit.h | 5 +++
> arch/powerpc/net/bpf_jit_comp.c | 74 +++++++++++++++++++++++++++++++
> arch/powerpc/net/bpf_jit_comp64.c | 38 +++++++++++++---
> 3 files changed, 112 insertions(+), 5 deletions(-)
>
> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
> index 7354e1d72f79..5a115c54e43a 100644
> --- a/arch/powerpc/net/bpf_jit.h
> +++ b/arch/powerpc/net/bpf_jit.h
> @@ -178,8 +178,13 @@ struct codegen_context {
> bool is_subprog;
> bool exception_boundary;
> bool exception_cb;
> + u64 priv_sp;
Are you sure you want a u64, not a long ?
> };
>
> +/* Memory size & magic-value to detect private stack overflow/underflow */
> +#define PRIV_STACK_GUARD_SZ 16
> +#define PRIV_STACK_GUARD_VAL 0xEB9F12345678eb9fULL
> +
> #define bpf_to_ppc(r) (ctx->b2p[r])
>
> #ifdef CONFIG_PPC32
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index 278e09b57560..7a78e03d482f 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -129,6 +129,39 @@ bool bpf_jit_needs_zext(void)
> return true;
> }
>
> +static void priv_stack_init_guard(void __percpu *priv_stack_ptr, int alloc_size)
> +{
> + int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
> + u64 *stack_ptr;
> +
> + for_each_possible_cpu(cpu) {
> + stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu);
> + stack_ptr[0] = PRIV_STACK_GUARD_VAL;
> + stack_ptr[1] = PRIV_STACK_GUARD_VAL;
> + stack_ptr[underflow_idx] = PRIV_STACK_GUARD_VAL;
> + stack_ptr[underflow_idx + 1] = PRIV_STACK_GUARD_VAL;
> + }
> +}
> +
> +static void priv_stack_check_guard(void __percpu *priv_stack_ptr, int alloc_size,
> + struct bpf_prog *fp)
> +{
> + int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
> + u64 *stack_ptr;
> +
> + for_each_possible_cpu(cpu) {
> + stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu);
> + if (stack_ptr[0] != PRIV_STACK_GUARD_VAL ||
> + stack_ptr[1] != PRIV_STACK_GUARD_VAL ||
> + stack_ptr[underflow_idx] != PRIV_STACK_GUARD_VAL ||
> + stack_ptr[underflow_idx + 1] != PRIV_STACK_GUARD_VAL) {
> + pr_err("BPF private stack overflow/underflow detected for prog %sx\n",
> + bpf_jit_get_prog_name(fp));
> + break;
> + }
> + }
> +}
> +
> struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
> {
> u32 proglen;
> @@ -140,6 +173,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
> struct codegen_context cgctx;
> int pass;
> int flen;
> + int priv_stack_alloc_size;
> + void __percpu *priv_stack_ptr = NULL;
> struct bpf_binary_header *fhdr = NULL;
> struct bpf_binary_header *hdr = NULL;
> struct bpf_prog *org_fp = fp;
> @@ -173,6 +208,25 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
> fp->aux->jit_data = jit_data;
> }
>
> + if (!priv_stack_ptr && fp->aux->jits_use_priv_stack) {
> + /*
> + * Allocate private stack of size equivalent to
> + * verifier-calculated stack size plus two memory
> + * guard regions to detect private stack overflow
> + * and underflow.
> + */
> + priv_stack_alloc_size = round_up(fp->aux->stack_depth, 16) +
> + 2 * PRIV_STACK_GUARD_SZ;
> + priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_size, 16, GFP_KERNEL);
> + if (!priv_stack_ptr) {
> + fp = org_fp;
> + goto out_priv_stack;
> + }
> +
> + priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_size);
> + fp->aux->priv_stack_ptr = priv_stack_ptr;
> + }
> +
> flen = fp->len;
> addrs = jit_data->addrs;
> if (addrs) {
> @@ -209,6 +263,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
> cgctx.is_subprog = bpf_is_subprog(fp);
> cgctx.exception_boundary = fp->aux->exception_boundary;
> cgctx.exception_cb = fp->aux->exception_cb;
> + cgctx.priv_sp = priv_stack_ptr ? (u64)priv_stack_ptr : 0;
priv_stack_ptr is a pointer, ie a long.
priv_sp is a u64, ie a long long.
This will mismatch on powerpc/32
>
> /* Scouting faux-generate pass 0 */
> if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
> @@ -306,7 +361,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
> }
> bpf_prog_fill_jited_linfo(fp, addrs);
> out_addrs:
> + if (!image && priv_stack_ptr) {
> + free_percpu(priv_stack_ptr);
> + fp->aux->priv_stack_ptr = NULL;
Shouldn't those two lines be swapped ?
> + }
> kfree(addrs);
> +out_priv_stack:
> kfree(jit_data);
> fp->aux->jit_data = NULL;
> } else {
> @@ -419,6 +479,8 @@ void bpf_jit_free(struct bpf_prog *fp)
> if (fp->jited) {
> struct powerpc_jit_data *jit_data = fp->aux->jit_data;
> struct bpf_binary_header *hdr;
> + void __percpu *priv_stack_ptr;
> + int priv_stack_alloc_size;
>
> /*
> * If we fail the final pass of JIT (from jit_subprogs),
> @@ -432,6 +494,13 @@ void bpf_jit_free(struct bpf_prog *fp)
> }
> hdr = bpf_jit_binary_pack_hdr(fp);
> bpf_jit_binary_pack_free(hdr, NULL);
> + priv_stack_ptr = fp->aux->priv_stack_ptr;
> + if (priv_stack_ptr) {
> + priv_stack_alloc_size = round_up(fp->aux->stack_depth, 16) +
> + 2 * PRIV_STACK_GUARD_SZ;
> + priv_stack_check_guard(priv_stack_ptr, priv_stack_alloc_size, fp);
> + free_percpu(priv_stack_ptr);
> + }
> WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
> }
>
> @@ -453,6 +522,11 @@ bool bpf_jit_supports_kfunc_call(void)
> return true;
> }
>
> +bool bpf_jit_supports_private_stack(void)
> +{
> + return IS_ENABLED(CONFIG_PPC64);
> +}
> +
> bool bpf_jit_supports_arena(void)
> {
> return IS_ENABLED(CONFIG_PPC64);
> diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
> index 640b84409687..206ef43b4090 100644
> --- a/arch/powerpc/net/bpf_jit_comp64.c
> +++ b/arch/powerpc/net/bpf_jit_comp64.c
> @@ -183,6 +183,20 @@ void bpf_jit_realloc_regs(struct codegen_context *ctx)
> {
> }
>
> +static void emit_fp_priv_stack(u32 *image, struct codegen_context *ctx, void __percpu *ptr)
> +{
> + /* Load percpu data offset */
> + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), _R13,
> + offsetof(struct paca_struct, data_offset)));
> + PPC_LI64(bpf_to_ppc(BPF_REG_FP), (u64)ptr);
> + /*
> + * Set frame pointer with percpu allocated
> + * buffer for private stack.
> + */
> + EMIT(PPC_RAW_ADD(bpf_to_ppc(BPF_REG_FP),
> + bpf_to_ppc(TMP_REG_1), bpf_to_ppc(BPF_REG_FP)));
> +}
> +
> /*
> * For exception boundary & exception_cb progs:
> * return increased size to accommodate additional NVRs.
> @@ -197,6 +211,12 @@ static int bpf_jit_stack_size(struct codegen_context *ctx)
> void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
> {
> int i;
> + void __percpu *priv_frame_ptr = NULL;
> +
> + if (ctx->priv_sp) {
> + priv_frame_ptr = (void*) ctx->priv_sp + PRIV_STACK_GUARD_SZ +
> + round_up(ctx->stack_size, 16);
> + }
>
> /* Instruction for trampoline attach */
> EMIT(PPC_RAW_NOP());
> @@ -251,7 +271,7 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
> }
>
> EMIT(PPC_RAW_STDU(_R1, _R1,
> - -(bpf_jit_stack_size(ctx) + ctx->stack_size)));
> + -(bpf_jit_stack_size(ctx) + (ctx->priv_sp ? 0 : ctx->stack_size))));
> }
>
> /*
> @@ -307,9 +327,16 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
> * Exception_cb not restricted from using stack area or arena.
> * Setup frame pointer to point to the bpf stack area
> */
> - if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP)))
> - EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), _R1,
> - STACK_FRAME_MIN_SIZE + ctx->stack_size));
> + if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP))) {
> + if (ctx->priv_sp && priv_frame_ptr) {
> + /* Set up private stack pointer */
> + emit_fp_priv_stack(image, ctx, priv_frame_ptr);
> + } else {
> + /* Setup frame pointer to point to the bpf stack area */
> + EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), _R1,
> + STACK_FRAME_MIN_SIZE + ctx->stack_size));
> + }
> + }
>
> if (ctx->arena_vm_start)
> PPC_LI64(bpf_to_ppc(ARENA_VM_START), ctx->arena_vm_start);
> @@ -339,7 +366,8 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx
>
> /* Tear down our stack frame */
> if (bpf_has_stack_frame(ctx)) {
> - EMIT(PPC_RAW_ADDI(_R1, _R1, bpf_jit_stack_size(ctx) + ctx->stack_size));
> + EMIT(PPC_RAW_ADDI(_R1, _R1,
> + bpf_jit_stack_size(ctx) + (ctx->priv_sp ? 0 : ctx->stack_size)));
>
> if (ctx->seen & SEEN_FUNC || ctx->exception_cb) {
> EMIT(PPC_RAW_LD(_R0, _R1, PPC_LR_STKOFF));
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH 1/2] powerpc64/bpf: Implement JIT support for private stack
2026-02-23 11:10 ` Christophe Leroy (CS GROUP)
@ 2026-02-23 19:24 ` adubey
0 siblings, 0 replies; 11+ messages in thread
From: adubey @ 2026-02-23 19:24 UTC (permalink / raw)
To: Christophe Leroy (CS GROUP)
Cc: bpf, hbathini, linuxppc-dev, ast, daniel, andrii, martin.lau,
eddyz87, yonghong.song, clm, ihor.solodrai
On 2026-02-23 16:40, Christophe Leroy (CS GROUP) wrote:
> Le 16/02/2026 à 16:22, adubey@linux.ibm.com a écrit :
>> From: Abhishek Dubey <adubey@linux.ibm.com>
>>
>> Provision the private stack as a per-CPU allocation during
>> bpf_int_jit_compile(). Align the stack to 16 bytes and place guard
>> regions at both ends to detect runtime stack overflow and underflow.
>>
>> Round the private stack size up to the nearest 16-byte boundary.
>> Make each guard region 16 bytes to preserve the required overall
>> 16-byte alignment. When private stack is set, skip bpf stack size
>> accounting in kernel stack.
>>
>> There is no stack pointer in powerpc. Stack referencing during JIT
>> is done using frame pointer. Frame pointer calculation goes like:
>>
>> BPF frame pointer = Priv stack allocation start address +
>> Overflow guard +
>> Actual stack size defined by verifier
>>
>> Memory layout:
>>
>> High Addr
>> +--------------------------------------------------+
>> |
>> |
>> | 16 bytes Underflow guard (0xEB9F12345678eb9fULL)
>> |
>> |
>> |
>> BPF FP ->
>> +--------------------------------------------------+
>> |
>> |
>> | Private stack - determined by verifier
>> |
>> | 16-bytes aligned
>> |
>> |
>> |
>>
>> +--------------------------------------------------+
>> |
>> |
>> Lower Addr | 16 byte Overflow guard (0xEB9F12345678eb9fULL)
>> |
>> |
>> |
>> Priv stack alloc
>> ->+--------------------------------------------------+
>> start
>>
>> Update BPF_REG_FP to point to the calculated offset within the
>> allocated private stack buffer. Now, BPF stack usage reference
>> in the allocated private stack.
>>
>> The patch is rebase over fixes by Hari:
>> https://lore.kernel.org/bpf/20260216065639.1750181-1-hbathini@linux.ibm.com/T/#mf02cad9096fa4ad1f05610b1f464da1cddf7445a
>>
>> Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
>> ---
>> arch/powerpc/net/bpf_jit.h | 5 +++
>> arch/powerpc/net/bpf_jit_comp.c | 74
>> +++++++++++++++++++++++++++++++
>> arch/powerpc/net/bpf_jit_comp64.c | 38 +++++++++++++---
>> 3 files changed, 112 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
>> index 7354e1d72f79..5a115c54e43a 100644
>> --- a/arch/powerpc/net/bpf_jit.h
>> +++ b/arch/powerpc/net/bpf_jit.h
>> @@ -178,8 +178,13 @@ struct codegen_context {
>> bool is_subprog;
>> bool exception_boundary;
>> bool exception_cb;
>> + u64 priv_sp;
>
> Are you sure you want a u64, not a long ?
After bot's warning, I was considering void __percpu *priv_sp in
codegen_context, to keep compiler happy throughout.
If not above, I would prefer long(~= pointer). Thoughts? @Christophe
>
>> };
>> +/* Memory size & magic-value to detect private stack
>> overflow/underflow */
>> +#define PRIV_STACK_GUARD_SZ 16
>> +#define PRIV_STACK_GUARD_VAL 0xEB9F12345678eb9fULL
>> +
>> #define bpf_to_ppc(r) (ctx->b2p[r])
>> #ifdef CONFIG_PPC32
>> diff --git a/arch/powerpc/net/bpf_jit_comp.c
>> b/arch/powerpc/net/bpf_jit_comp.c
>> index 278e09b57560..7a78e03d482f 100644
>> --- a/arch/powerpc/net/bpf_jit_comp.c
>> +++ b/arch/powerpc/net/bpf_jit_comp.c
>> @@ -129,6 +129,39 @@ bool bpf_jit_needs_zext(void)
>> return true;
>> }
>> +static void priv_stack_init_guard(void __percpu *priv_stack_ptr,
>> int alloc_size)
>> +{
>> + int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
>> + u64 *stack_ptr;
>> +
>> + for_each_possible_cpu(cpu) {
>> + stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu);
>> + stack_ptr[0] = PRIV_STACK_GUARD_VAL;
>> + stack_ptr[1] = PRIV_STACK_GUARD_VAL;
>> + stack_ptr[underflow_idx] = PRIV_STACK_GUARD_VAL;
>> + stack_ptr[underflow_idx + 1] = PRIV_STACK_GUARD_VAL;
>> + }
>> +}
>> +
>> +static void priv_stack_check_guard(void __percpu *priv_stack_ptr, int
>> alloc_size,
>> + struct bpf_prog *fp)
>> +{
>> + int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
>> + u64 *stack_ptr;
>> +
>> + for_each_possible_cpu(cpu) {
>> + stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu);
>> + if (stack_ptr[0] != PRIV_STACK_GUARD_VAL ||
>> + stack_ptr[1] != PRIV_STACK_GUARD_VAL ||
>> + stack_ptr[underflow_idx] != PRIV_STACK_GUARD_VAL ||
>> + stack_ptr[underflow_idx + 1] != PRIV_STACK_GUARD_VAL) {
>> + pr_err("BPF private stack overflow/underflow detected for prog
>> %sx\n",
>> + bpf_jit_get_prog_name(fp));
>> + break;
>> + }
>> + }
>> +}
>> +
>> struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>> {
>> u32 proglen;
>> @@ -140,6 +173,8 @@ struct bpf_prog *bpf_int_jit_compile(struct
>> bpf_prog *fp)
>> struct codegen_context cgctx;
>> int pass;
>> int flen;
>> + int priv_stack_alloc_size;
>> + void __percpu *priv_stack_ptr = NULL;
>> struct bpf_binary_header *fhdr = NULL;
>> struct bpf_binary_header *hdr = NULL;
>> struct bpf_prog *org_fp = fp;
>> @@ -173,6 +208,25 @@ struct bpf_prog *bpf_int_jit_compile(struct
>> bpf_prog *fp)
>> fp->aux->jit_data = jit_data;
>> }
>> + if (!priv_stack_ptr && fp->aux->jits_use_priv_stack) {
>> + /*
>> + * Allocate private stack of size equivalent to
>> + * verifier-calculated stack size plus two memory
>> + * guard regions to detect private stack overflow
>> + * and underflow.
>> + */
>> + priv_stack_alloc_size = round_up(fp->aux->stack_depth, 16) +
>> + 2 * PRIV_STACK_GUARD_SZ;
>> + priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_size, 16,
>> GFP_KERNEL);
>> + if (!priv_stack_ptr) {
>> + fp = org_fp;
>> + goto out_priv_stack;
>> + }
>> +
>> + priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_size);
>> + fp->aux->priv_stack_ptr = priv_stack_ptr;
>> + }
>> +
>> flen = fp->len;
>> addrs = jit_data->addrs;
>> if (addrs) {
>> @@ -209,6 +263,7 @@ struct bpf_prog *bpf_int_jit_compile(struct
>> bpf_prog *fp)
>> cgctx.is_subprog = bpf_is_subprog(fp);
>> cgctx.exception_boundary = fp->aux->exception_boundary;
>> cgctx.exception_cb = fp->aux->exception_cb;
>> + cgctx.priv_sp = priv_stack_ptr ? (u64)priv_stack_ptr : 0;
>
> priv_stack_ptr is a pointer, ie a long.
> priv_sp is a u64, ie a long long.
Ack!
>
> This will mismatch on powerpc/32
long/void* should work fine everywhere.
>
>> /* Scouting faux-generate pass 0 */
>> if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
>> @@ -306,7 +361,12 @@ struct bpf_prog *bpf_int_jit_compile(struct
>> bpf_prog *fp)
>> }
>> bpf_prog_fill_jited_linfo(fp, addrs);
>> out_addrs:
>> + if (!image && priv_stack_ptr) {
>> + free_percpu(priv_stack_ptr);
>> + fp->aux->priv_stack_ptr = NULL;
>
> Shouldn't those two lines be swapped ?
Good catch! This can lead to use-after-free situation.
>
>> + }
>> kfree(addrs);
>> +out_priv_stack:
>> kfree(jit_data);
>> fp->aux->jit_data = NULL;
>> } else {
>> @@ -419,6 +479,8 @@ void bpf_jit_free(struct bpf_prog *fp)
>> if (fp->jited) {
>> struct powerpc_jit_data *jit_data = fp->aux->jit_data;
>> struct bpf_binary_header *hdr;
>> + void __percpu *priv_stack_ptr;
>> + int priv_stack_alloc_size;
>> /*
>> * If we fail the final pass of JIT (from jit_subprogs),
>> @@ -432,6 +494,13 @@ void bpf_jit_free(struct bpf_prog *fp)
>> }
>> hdr = bpf_jit_binary_pack_hdr(fp);
>> bpf_jit_binary_pack_free(hdr, NULL);
>> + priv_stack_ptr = fp->aux->priv_stack_ptr;
>> + if (priv_stack_ptr) {
>> + priv_stack_alloc_size = round_up(fp->aux->stack_depth, 16) +
>> + 2 * PRIV_STACK_GUARD_SZ;
>> + priv_stack_check_guard(priv_stack_ptr, priv_stack_alloc_size, fp);
>> + free_percpu(priv_stack_ptr);
>> + }
>> WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
>> }
>> @@ -453,6 +522,11 @@ bool bpf_jit_supports_kfunc_call(void)
>> return true;
>> }
>> +bool bpf_jit_supports_private_stack(void)
>> +{
>> + return IS_ENABLED(CONFIG_PPC64);
>> +}
>> +
>> bool bpf_jit_supports_arena(void)
>> {
>> return IS_ENABLED(CONFIG_PPC64);
>> diff --git a/arch/powerpc/net/bpf_jit_comp64.c
>> b/arch/powerpc/net/bpf_jit_comp64.c
>> index 640b84409687..206ef43b4090 100644
>> --- a/arch/powerpc/net/bpf_jit_comp64.c
>> +++ b/arch/powerpc/net/bpf_jit_comp64.c
>> @@ -183,6 +183,20 @@ void bpf_jit_realloc_regs(struct codegen_context
>> *ctx)
>> {
>> }
>> +static void emit_fp_priv_stack(u32 *image, struct codegen_context
>> *ctx, void __percpu *ptr)
>> +{
>> + /* Load percpu data offset */
>> + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), _R13,
>> + offsetof(struct paca_struct, data_offset)));
>> + PPC_LI64(bpf_to_ppc(BPF_REG_FP), (u64)ptr);
>> + /*
>> + * Set frame pointer with percpu allocated
>> + * buffer for private stack.
>> + */
>> + EMIT(PPC_RAW_ADD(bpf_to_ppc(BPF_REG_FP),
>> + bpf_to_ppc(TMP_REG_1), bpf_to_ppc(BPF_REG_FP)));
>> +}
>> +
>> /*
>> * For exception boundary & exception_cb progs:
>> * return increased size to accommodate additional NVRs.
>> @@ -197,6 +211,12 @@ static int bpf_jit_stack_size(struct
>> codegen_context *ctx)
>> void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
>> {
>> int i;
>> + void __percpu *priv_frame_ptr = NULL;
>> +
>> + if (ctx->priv_sp) {
>> + priv_frame_ptr = (void*) ctx->priv_sp + PRIV_STACK_GUARD_SZ +
>> + round_up(ctx->stack_size, 16);
>> + }
>> /* Instruction for trampoline attach */
>> EMIT(PPC_RAW_NOP());
>> @@ -251,7 +271,7 @@ void bpf_jit_build_prologue(u32 *image, struct
>> codegen_context *ctx)
>> }
>> EMIT(PPC_RAW_STDU(_R1, _R1,
>> - -(bpf_jit_stack_size(ctx) + ctx->stack_size)));
>> + -(bpf_jit_stack_size(ctx) + (ctx->priv_sp ? 0 :
>> ctx->stack_size))));
>> }
>> /*
>> @@ -307,9 +327,16 @@ void bpf_jit_build_prologue(u32 *image, struct
>> codegen_context *ctx)
>> * Exception_cb not restricted from using stack area or arena.
>> * Setup frame pointer to point to the bpf stack area
>> */
>> - if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP)))
>> - EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), _R1,
>> - STACK_FRAME_MIN_SIZE + ctx->stack_size));
>> + if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP))) {
>> + if (ctx->priv_sp && priv_frame_ptr) {
>> + /* Set up private stack pointer */
>> + emit_fp_priv_stack(image, ctx, priv_frame_ptr);
>> + } else {
>> + /* Setup frame pointer to point to the bpf stack area */
>> + EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), _R1,
>> + STACK_FRAME_MIN_SIZE + ctx->stack_size));
>> + }
>> + }
>> if (ctx->arena_vm_start)
>> PPC_LI64(bpf_to_ppc(ARENA_VM_START), ctx->arena_vm_start);
>> @@ -339,7 +366,8 @@ static void bpf_jit_emit_common_epilogue(u32
>> *image, struct codegen_context *ctx
>> /* Tear down our stack frame */
>> if (bpf_has_stack_frame(ctx)) {
>> - EMIT(PPC_RAW_ADDI(_R1, _R1, bpf_jit_stack_size(ctx) +
>> ctx->stack_size));
>> + EMIT(PPC_RAW_ADDI(_R1, _R1,
>> + bpf_jit_stack_size(ctx) + (ctx->priv_sp ? 0 : ctx->stack_size)));
>> if (ctx->seen & SEEN_FUNC || ctx->exception_cb) {
>> EMIT(PPC_RAW_LD(_R0, _R1, PPC_LR_STKOFF));
-Abhishek
^ permalink raw reply [flat|nested] 11+ messages in thread