public inbox for linuxppc-dev@ozlabs.org
 help / color / mirror / Atom feed
From: Hari Bathini <hbathini@linux.ibm.com>
To: adubey@linux.ibm.com, linuxppc-dev@lists.ozlabs.org
Cc: bpf@vger.kernel.org, maddy@linux.ibm.com, ast@kernel.org,
	andrii@kernel.org, daniel@iogearbox.net
Subject: Re: [PATCH v6 1/2] powerpc64/bpf: Implement JIT support for private stack
Date: Tue, 31 Mar 2026 10:37:18 +0530	[thread overview]
Message-ID: <c953ad0b-df8a-4503-8ca1-d9bf5b285dc9@linux.ibm.com> (raw)
In-Reply-To: <20260331080309.173612-1-adubey@linux.ibm.com>



On 31/03/26 1:33 pm, adubey@linux.ibm.com wrote:
> From: Abhishek Dubey <adubey@linux.ibm.com>
> 
> Provision the private stack as a per-CPU allocation during
> bpf_int_jit_compile(). Align the stack to 16 bytes and place guard
> regions at both ends to detect runtime stack overflow and underflow.
> 
> Round the private stack size up to the nearest 16-byte boundary.
> Make each guard region 16 bytes to preserve the required overall
> 16-byte alignment. When private stack is set, skip bpf stack size
> accounting in kernel stack.
> 
> There is no stack pointer in powerpc. Stack referencing during JIT
> is done using frame pointer. Frame pointer calculation goes like:
> 
> BPF frame pointer = Priv stack allocation start address +
>                      Overflow guard +
>                      Actual stack size defined by verifier
> 
> Memory layout:
> 
> High Addr          +--------------------------------------------------+
>                     |                                                  |
>                     | 16 bytes Underflow guard (0xEB9F12345678eb9fULL) |
>                     |                                                  |
>           BPF FP -> +--------------------------------------------------+
>                     |                                                  |
>                     | Private stack - determined by verifier           |
>                     | 16-bytes aligned                                 |
>                     |                                                  |
>                     +--------------------------------------------------+
>                     |                                                  |
> Lower Addr         | 16 byte Overflow guard (0xEB9F12345678eb9fULL)   |
>                     |                                                  |
> Priv stack alloc ->+--------------------------------------------------+
> start
> 
> Update BPF_REG_FP to point to the calculated offset within the
> allocated private stack buffer. Now, BPF stack usage reference
> in the allocated private stack.
> 
> v5->v6:
>    No change
> v4->v5:
>    Rebasing over latest changes
> v3->v4:
>    Added new field to fix priv_stack allocation
> v2->v3:
>    Fix ci-bot bug targeting clobbered NVRs on stack rollback
> v1->v2:
>    Fix ci-bot warning for percpu pointer casting
>    Minor refactoring
> 
> [v5]: https://lore.kernel.org/bpf/20260330232034.44776-1-adubey@linux.ibm.com
> [v4]: https://lore.kernel.org/bpf/20260226031324.17352-1-adubey@linux.ibm.com
> [v3]: https://lore.kernel.org/bpf/20260226005440.9570-1-adubey@linux.ibm.com
> [v2]: https://lore.kernel.org/bpf/20260225153950.15331-1-adubey@linux.ibm.com
> [v1]: https://lore.kernel.org/bpf/20260216152234.36632-1-adubey@linux.ibm.com
> 
> Signed-off-by: Abhishek Dubey <adubey@linux.ibm.com>
> ---
>   arch/powerpc/net/bpf_jit.h        |  6 ++
>   arch/powerpc/net/bpf_jit_comp.c   | 97 +++++++++++++++++++++++++++++--
>   arch/powerpc/net/bpf_jit_comp64.c | 29 ++++++++-
>   3 files changed, 124 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
> index 7354e1d72f79..a232f3fb73be 100644
> --- a/arch/powerpc/net/bpf_jit.h
> +++ b/arch/powerpc/net/bpf_jit.h
> @@ -178,8 +178,14 @@ struct codegen_context {
>   	bool is_subprog;
>   	bool exception_boundary;
>   	bool exception_cb;
> +	void __percpu *priv_sp;
> +	unsigned int priv_stack_size;
>   };
>   
> +/* Memory size & magic-value to detect private stack overflow/underflow */
> +#define PRIV_STACK_GUARD_SZ    16
> +#define PRIV_STACK_GUARD_VAL   0xEB9F12345678eb9fULL
> +
>   #define bpf_to_ppc(r)	(ctx->b2p[r])
>   
>   #ifdef CONFIG_PPC32
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index a62a9a92b7b5..2018260f56c6 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -129,25 +129,60 @@ bool bpf_jit_needs_zext(void)
>   	return true;
>   }
>   
> +static void priv_stack_init_guard(void __percpu *priv_stack_ptr, int alloc_size)
> +{
> +	int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
> +	u64 *stack_ptr;
> +
> +	for_each_possible_cpu(cpu) {
> +		stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu);
> +		stack_ptr[0] = PRIV_STACK_GUARD_VAL;
> +		stack_ptr[1] = PRIV_STACK_GUARD_VAL;
> +		stack_ptr[underflow_idx] = PRIV_STACK_GUARD_VAL;
> +		stack_ptr[underflow_idx + 1] = PRIV_STACK_GUARD_VAL;
> +	}
> +}
> +
> +static void priv_stack_check_guard(void __percpu *priv_stack_ptr, int alloc_size,
> +								struct bpf_prog *fp)
> +{
> +	int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
> +	u64 *stack_ptr;
> +
> +	for_each_possible_cpu(cpu) {
> +		stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu);
> +		if (stack_ptr[0] != PRIV_STACK_GUARD_VAL ||
> +			stack_ptr[1] != PRIV_STACK_GUARD_VAL ||
> +			stack_ptr[underflow_idx] != PRIV_STACK_GUARD_VAL ||
> +			stack_ptr[underflow_idx + 1] != PRIV_STACK_GUARD_VAL) {
> +			pr_err("BPF private stack overflow/underflow detected for prog %s\n",
> +			bpf_jit_get_prog_name(fp));
> +			break;
> +		}
> +	}
> +}
> +
>   struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>   {
>   	u32 proglen;
>   	u32 alloclen;
>   	u8 *image = NULL;
> -	u32 *code_base;
> -	u32 *addrs;
> -	struct powerpc_jit_data *jit_data;
> +	u32 *code_base = NULL;
> +	u32 *addrs = NULL;
> +	struct powerpc_jit_data *jit_data = NULL;
>   	struct codegen_context cgctx;
>   	int pass;
>   	int flen;
> +	int priv_stack_alloc_size;
> +	void __percpu *priv_stack_ptr = NULL;
>   	struct bpf_binary_header *fhdr = NULL;
>   	struct bpf_binary_header *hdr = NULL;
>   	struct bpf_prog *org_fp = fp;
> -	struct bpf_prog *tmp_fp;
> +	struct bpf_prog *tmp_fp = NULL;
>   	bool bpf_blinded = false;
>   	bool extra_pass = false;
>   	u8 *fimage = NULL;
> -	u32 *fcode_base;
> +	u32 *fcode_base = NULL;
>   	u32 extable_len;
>   	u32 fixup_len;
>   
> @@ -173,6 +208,26 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>   		fp->aux->jit_data = jit_data;
>   	}
>   
> +	priv_stack_ptr = fp->aux->priv_stack_ptr;
> +	if (!priv_stack_ptr && fp->aux->jits_use_priv_stack) {
> +		/*
> +		 * Allocate private stack of size equivalent to
> +		 * verifier-calculated stack size plus two memory
> +		 * guard regions to detect private stack overflow
> +		 * and underflow.
> +		 */
> +		priv_stack_alloc_size = round_up(fp->aux->stack_depth, 16) +
> +							2 * PRIV_STACK_GUARD_SZ;
> +		priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_size, 16, GFP_KERNEL);
> +		if (!priv_stack_ptr) {
> +			fp = org_fp;
> +			goto out_priv_stack;
> +		}
> +
> +		priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_size);
> +		fp->aux->priv_stack_ptr = priv_stack_ptr;
> +	}
> +
>   	flen = fp->len;
>   	addrs = jit_data->addrs;
>   	if (addrs) {
> @@ -209,6 +264,19 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>   	cgctx.is_subprog = bpf_is_subprog(fp);
>   	cgctx.exception_boundary = fp->aux->exception_boundary;
>   	cgctx.exception_cb = fp->aux->exception_cb;
> +	cgctx.priv_sp = priv_stack_ptr;
> +	cgctx.priv_stack_size = 0;
> +	if (priv_stack_ptr) {
> +		/*
> +		 * priv_stack_size required for setting bpf FP inside
> +		 * percpu allocation.
> +		 * stack_size is marked 0 to prevent allocation on
> +		 * general stack and offset calculation don't go for
> +		 * a toss in bpf_jit_stack_offsetof() & bpf_jit_stack_local()
> +		 */
> +		cgctx.priv_stack_size = cgctx.stack_size;
> +		cgctx.stack_size = 0;
> +	}
>   
>   	/* Scouting faux-generate pass 0 */
>   	if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
> @@ -306,6 +374,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>   		}
>   		bpf_prog_fill_jited_linfo(fp, addrs);
>   out_addrs:
> +		if (!image && priv_stack_ptr) {
> +			fp->aux->priv_stack_ptr = NULL;
> +			free_percpu(priv_stack_ptr);
> +		}
> +out_priv_stack:
>   		kfree(addrs);
>   		kfree(jit_data);
>   		fp->aux->jit_data = NULL;
> @@ -419,6 +492,8 @@ void bpf_jit_free(struct bpf_prog *fp)
>   	if (fp->jited) {
>   		struct powerpc_jit_data *jit_data = fp->aux->jit_data;
>   		struct bpf_binary_header *hdr;
> +		void __percpu *priv_stack_ptr;
> +		int priv_stack_alloc_size;
>   
>   		/*
>   		 * If we fail the final pass of JIT (from jit_subprogs),
> @@ -432,6 +507,13 @@ void bpf_jit_free(struct bpf_prog *fp)
>   		}
>   		hdr = bpf_jit_binary_pack_hdr(fp);
>   		bpf_jit_binary_pack_free(hdr, NULL);
> +		priv_stack_ptr = fp->aux->priv_stack_ptr;
> +		if (priv_stack_ptr) {
> +			priv_stack_alloc_size = round_up(fp->aux->stack_depth, 16) +
> +							2 * PRIV_STACK_GUARD_SZ;
> +			priv_stack_check_guard(priv_stack_ptr, priv_stack_alloc_size, fp);
> +			free_percpu(priv_stack_ptr);
> +		}
>   		WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
>   	}
>   
> @@ -453,6 +535,11 @@ bool bpf_jit_supports_kfunc_call(void)
>   	return IS_ENABLED(CONFIG_PPC64);
>   }
>   
> +bool bpf_jit_supports_private_stack(void)
> +{
> +	return IS_ENABLED(CONFIG_PPC64);
> +}
> +
>   bool bpf_jit_supports_arena(void)
>   {
>   	return IS_ENABLED(CONFIG_PPC64);


> diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
> index c5e26d231cd5..8101e098d125 100644
> --- a/arch/powerpc/net/bpf_jit_comp64.c
> +++ b/arch/powerpc/net/bpf_jit_comp64.c
> @@ -183,6 +183,22 @@ void bpf_jit_realloc_regs(struct codegen_context *ctx)
>   {
>   }
>   
> +static void emit_fp_priv_stack(u32 *image, struct codegen_context *ctx)
> +{
> +	/* Load percpu data offset */
> +	EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), _R13,
> +			offsetof(struct paca_struct, data_offset)));
> +	PPC_LI64(bpf_to_ppc(BPF_REG_FP), (__force long)ctx->priv_sp);
> +	/*
> +	 * Load base percpu pointer of private stack allocation.
> +	 * Runtime per-cpu address = (base + data_offset) + (guard + stack_size)
> +	 */
> +	EMIT(PPC_RAW_ADD(bpf_to_ppc(BPF_REG_FP),
> +			bpf_to_ppc(TMP_REG_1), bpf_to_ppc(BPF_REG_FP)));
> +	EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), bpf_to_ppc(BPF_REG_FP),
> +			PRIV_STACK_GUARD_SZ + round_up(ctx->priv_stack_size, 16)));
> +}

This will fail for !CONFIG_SMP case.

This change on top of th patch should fix it:

diff --git a/arch/powerpc/net/bpf_jit_comp64.c 
b/arch/powerpc/net/bpf_jit_comp64.c
index 8101e098d125..2971dafaa5d1 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -185,16 +185,18 @@ void bpf_jit_realloc_regs(struct codegen_context *ctx)

  static void emit_fp_priv_stack(u32 *image, struct codegen_context *ctx)
  {
-	/* Load percpu data offset */
-	EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), _R13,
-			offsetof(struct paca_struct, data_offset)));
  	PPC_LI64(bpf_to_ppc(BPF_REG_FP), (__force long)ctx->priv_sp);
+
  	/*
  	 * Load base percpu pointer of private stack allocation.
  	 * Runtime per-cpu address = (base + data_offset) + (guard + stack_size)
  	 */
+#ifdef CONFIG_SMP
+	EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), _R13,
+			offsetof(struct paca_struct, data_offset)));
  	EMIT(PPC_RAW_ADD(bpf_to_ppc(BPF_REG_FP),
  			bpf_to_ppc(TMP_REG_1), bpf_to_ppc(BPF_REG_FP)));
+#endif
  	EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), bpf_to_ppc(BPF_REG_FP),
  			PRIV_STACK_GUARD_SZ + round_up(ctx->priv_stack_size, 16)));
  }

Except for the above change, rest of the patch looks good to me.

Acked-by: Hari Bathini <hbathini@linux.ibm.com>

> +
>   /*
>    * For exception boundary & exception_cb progs:
>    *     return increased size to accommodate additional NVRs.
> @@ -307,9 +323,16 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
>   	 * Exception_cb not restricted from using stack area or arena.
>   	 * Setup frame pointer to point to the bpf stack area
>   	 */
> -	if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP)))
> -		EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), _R1,
> -			STACK_FRAME_MIN_SIZE + ctx->stack_size));
> +	if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP))) {
> +		if (ctx->priv_sp) {
> +			/* Set up fp in private stack */
> +			emit_fp_priv_stack(image, ctx);
> +		} else {
> +			/* Setup frame pointer to point to the bpf stack area */
> +			EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), _R1,
> +				STACK_FRAME_MIN_SIZE + ctx->stack_size));
> +		}
> +	}
>   
>   	if (ctx->arena_vm_start)
>   		PPC_LI64(bpf_to_ppc(ARENA_VM_START), ctx->arena_vm_start);



  reply	other threads:[~2026-03-31  5:07 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-31  8:03 [PATCH v6 1/2] powerpc64/bpf: Implement JIT support for private stack adubey
2026-03-31  5:07 ` Hari Bathini [this message]
2026-03-31  8:03 ` [PATCH v6 2/2] selftests/bpf: Enable private stack tests for powerpc64 adubey
2026-03-31  5:11   ` Hari Bathini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=c953ad0b-df8a-4503-8ca1-d9bf5b285dc9@linux.ibm.com \
    --to=hbathini@linux.ibm.com \
    --cc=adubey@linux.ibm.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=maddy@linux.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox