linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] powerpc64/bpf: fix JIT code size calculation of bpf trampoline
@ 2025-04-16 19:40 Hari Bathini
  2025-04-17 14:36 ` Venkat Rao Bagalkote
  2025-04-19 11:01 ` Naveen N Rao
  0 siblings, 2 replies; 5+ messages in thread
From: Hari Bathini @ 2025-04-16 19:40 UTC (permalink / raw)
  To: linuxppc-dev, bpf
  Cc: Madhavan Srinivasan, Naveen N. Rao, Michael Ellerman,
	Christophe Leroy, Nicholas Piggin, Venkat Rao Bagalkote,
	Daniel Borkmann, Andrii Nakryiko, Alexei Starovoitov, stable

arch_bpf_trampoline_size() provides JIT size of the BPF trampoline
before the buffer for JIT'ing it is allocated. The total number of
instructions emitted for BPF trampoline JIT code depends on where
the final image is located. So, the size arrived at with the dummy
pass in arch_bpf_trampoline_size() can vary from the actual size
needed in  arch_prepare_bpf_trampoline().  When the instructions
accounted in  arch_bpf_trampoline_size() is less than the number of
instructions emitted during the actual JIT compile of the trampoline,
the below warning is produced:

  WARNING: CPU: 8 PID: 204190 at arch/powerpc/net/bpf_jit_comp.c:981 __arch_prepare_bpf_trampoline.isra.0+0xd2c/0xdcc

which is:

  /* Make sure the trampoline generation logic doesn't overflow */
  if (image && WARN_ON_ONCE(&image[ctx->idx] >
  			(u32 *)rw_image_end - BPF_INSN_SAFETY)) {

So, during the dummy pass, instead of providing some arbitrary image
location, account for maximum possible instructions if and when there
is a dependency with image location for JIT'ing.

Fixes: d243b62b7bd3 ("powerpc64/bpf: Add support for bpf trampolines")
Reported-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Closes: https://lore.kernel.org/all/6168bfc8-659f-4b5a-a6fb-90a916dde3b3@linux.ibm.com/
Cc: stable@vger.kernel.org # v6.13+
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
---

Changes since v1:
- Pass NULL for image during intial pass and account for max. possible
  instruction during this pass as Naveen suggested.


 arch/powerpc/net/bpf_jit.h        | 20 ++++++++++++++++---
 arch/powerpc/net/bpf_jit_comp.c   | 33 ++++++++++---------------------
 arch/powerpc/net/bpf_jit_comp64.c |  9 +++++++++
 3 files changed, 36 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 6beacaec63d3..4c26912c2e3c 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -51,8 +51,16 @@
 		EMIT(PPC_INST_BRANCH_COND | (((cond) & 0x3ff) << 16) | (offset & 0xfffc));					\
 	} while (0)
 
-/* Sign-extended 32-bit immediate load */
+/*
+ * Sign-extended 32-bit immediate load
+ *
+ * If this is a dummy pass (!image), account for
+ * maximum possible instructions.
+ */
 #define PPC_LI32(d, i)		do {					      \
+	if (!image)							      \
+		ctx->idx += 2;						      \
+	else {								      \
 		if ((int)(uintptr_t)(i) >= -32768 &&			      \
 				(int)(uintptr_t)(i) < 32768)		      \
 			EMIT(PPC_RAW_LI(d, i));				      \
@@ -60,10 +68,15 @@
 			EMIT(PPC_RAW_LIS(d, IMM_H(i)));			      \
 			if (IMM_L(i))					      \
 				EMIT(PPC_RAW_ORI(d, d, IMM_L(i)));	      \
-		} } while(0)
+		}							      \
+	} } while (0)
 
 #ifdef CONFIG_PPC64
+/* If dummy pass (!image), account for maximum possible instructions */
 #define PPC_LI64(d, i)		do {					      \
+	if (!image)							      \
+		ctx->idx += 5;						      \
+	else {								      \
 		if ((long)(i) >= -2147483648 &&				      \
 				(long)(i) < 2147483648)			      \
 			PPC_LI32(d, i);					      \
@@ -84,7 +97,8 @@
 			if ((uintptr_t)(i) & 0x000000000000ffffULL)	      \
 				EMIT(PPC_RAW_ORI(d, d, (uintptr_t)(i) &       \
 							0xffff));             \
-		} } while (0)
+		}							      \
+	} } while (0)
 #define PPC_LI_ADDR	PPC_LI64
 
 #ifndef CONFIG_PPC_KERNEL_PCREL
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 2991bb171a9b..c0684733e9d6 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -504,10 +504,11 @@ static int invoke_bpf_prog(u32 *image, u32 *ro_image, struct codegen_context *ct
 	EMIT(PPC_RAW_ADDI(_R3, _R1, regs_off));
 	if (!p->jited)
 		PPC_LI_ADDR(_R4, (unsigned long)p->insnsi);
-	if (!create_branch(&branch_insn, (u32 *)&ro_image[ctx->idx], (unsigned long)p->bpf_func,
-			   BRANCH_SET_LINK)) {
-		if (image)
-			image[ctx->idx] = ppc_inst_val(branch_insn);
+	/* Account for max possible instructions during dummy pass for size calculation */
+	if (image && !create_branch(&branch_insn, (u32 *)&ro_image[ctx->idx],
+				    (unsigned long)p->bpf_func,
+				    BRANCH_SET_LINK)) {
+		image[ctx->idx] = ppc_inst_val(branch_insn);
 		ctx->idx++;
 	} else {
 		EMIT(PPC_RAW_LL(_R12, _R25, offsetof(struct bpf_prog, bpf_func)));
@@ -889,7 +890,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
 			bpf_trampoline_restore_tail_call_cnt(image, ctx, func_frame_offset, r4_off);
 
 		/* Reserve space to patch branch instruction to skip fexit progs */
-		im->ip_after_call = &((u32 *)ro_image)[ctx->idx];
+		if (ro_image) /* image is NULL for dummy pass */
+			im->ip_after_call = &((u32 *)ro_image)[ctx->idx];
 		EMIT(PPC_RAW_NOP());
 	}
 
@@ -912,7 +914,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
 		}
 
 	if (flags & BPF_TRAMP_F_CALL_ORIG) {
-		im->ip_epilogue = &((u32 *)ro_image)[ctx->idx];
+		if (ro_image) /* image is NULL for dummy pass */
+			im->ip_epilogue = &((u32 *)ro_image)[ctx->idx];
 		PPC_LI_ADDR(_R3, im);
 		ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx,
 						 (unsigned long)__bpf_tramp_exit);
@@ -973,25 +976,9 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
 			     struct bpf_tramp_links *tlinks, void *func_addr)
 {
 	struct bpf_tramp_image im;
-	void *image;
 	int ret;
 
-	/*
-	 * Allocate a temporary buffer for __arch_prepare_bpf_trampoline().
-	 * This will NOT cause fragmentation in direct map, as we do not
-	 * call set_memory_*() on this buffer.
-	 *
-	 * We cannot use kvmalloc here, because we need image to be in
-	 * module memory range.
-	 */
-	image = bpf_jit_alloc_exec(PAGE_SIZE);
-	if (!image)
-		return -ENOMEM;
-
-	ret = __arch_prepare_bpf_trampoline(&im, image, image + PAGE_SIZE, image,
-					    m, flags, tlinks, func_addr);
-	bpf_jit_free_exec(image);
-
+	ret = __arch_prepare_bpf_trampoline(&im, NULL, NULL, NULL, m, flags, tlinks, func_addr);
 	return ret;
 }
 
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 233703b06d7c..91f9efe8b8d7 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -225,6 +225,15 @@ int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct codegen_context *
 	}
 
 #ifdef CONFIG_PPC_KERNEL_PCREL
+	/*
+	 * If fimage is NULL (the initial pass to find image size),
+	 * account for the maximum no. of instructions possible.
+	 */
+	if (!fimage) {
+		ctx->idx += 7;
+		return 0;
+	}
+
 	reladdr = func_addr - local_paca->kernelbase;
 
 	if (reladdr < (long)SZ_8G && reladdr >= -(long)SZ_8G) {
-- 
2.49.0



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] powerpc64/bpf: fix JIT code size calculation of bpf trampoline
  2025-04-16 19:40 [PATCH v2] powerpc64/bpf: fix JIT code size calculation of bpf trampoline Hari Bathini
@ 2025-04-17 14:36 ` Venkat Rao Bagalkote
  2025-04-22  8:30   ` Hari Bathini
  2025-04-19 11:01 ` Naveen N Rao
  1 sibling, 1 reply; 5+ messages in thread
From: Venkat Rao Bagalkote @ 2025-04-17 14:36 UTC (permalink / raw)
  To: Hari Bathini, linuxppc-dev, bpf
  Cc: Madhavan Srinivasan, Naveen N. Rao, Michael Ellerman,
	Christophe Leroy, Nicholas Piggin, Daniel Borkmann,
	Andrii Nakryiko, Alexei Starovoitov, stable


On 17/04/25 1:10 am, Hari Bathini wrote:
> arch_bpf_trampoline_size() provides JIT size of the BPF trampoline
> before the buffer for JIT'ing it is allocated. The total number of
> instructions emitted for BPF trampoline JIT code depends on where
> the final image is located. So, the size arrived at with the dummy
> pass in arch_bpf_trampoline_size() can vary from the actual size
> needed in  arch_prepare_bpf_trampoline().  When the instructions
> accounted in  arch_bpf_trampoline_size() is less than the number of
> instructions emitted during the actual JIT compile of the trampoline,
> the below warning is produced:
>
>    WARNING: CPU: 8 PID: 204190 at arch/powerpc/net/bpf_jit_comp.c:981 __arch_prepare_bpf_trampoline.isra.0+0xd2c/0xdcc
>
> which is:
>
>    /* Make sure the trampoline generation logic doesn't overflow */
>    if (image && WARN_ON_ONCE(&image[ctx->idx] >
>    			(u32 *)rw_image_end - BPF_INSN_SAFETY)) {
>
> So, during the dummy pass, instead of providing some arbitrary image
> location, account for maximum possible instructions if and when there
> is a dependency with image location for JIT'ing.
>
> Fixes: d243b62b7bd3 ("powerpc64/bpf: Add support for bpf trampolines")
> Reported-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
> Closes: https://lore.kernel.org/all/6168bfc8-659f-4b5a-a6fb-90a916dde3b3@linux.ibm.com/
> Cc: stable@vger.kernel.org # v6.13+
> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
> ---
>
> Changes since v1:
> - Pass NULL for image during intial pass and account for max. possible
>    instruction during this pass as Naveen suggested.
>
>
>   arch/powerpc/net/bpf_jit.h        | 20 ++++++++++++++++---
>   arch/powerpc/net/bpf_jit_comp.c   | 33 ++++++++++---------------------
>   arch/powerpc/net/bpf_jit_comp64.c |  9 +++++++++
>   3 files changed, 36 insertions(+), 26 deletions(-)
>
> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
> index 6beacaec63d3..4c26912c2e3c 100644
> --- a/arch/powerpc/net/bpf_jit.h
> +++ b/arch/powerpc/net/bpf_jit.h
> @@ -51,8 +51,16 @@
>   		EMIT(PPC_INST_BRANCH_COND | (((cond) & 0x3ff) << 16) | (offset & 0xfffc));					\
>   	} while (0)
>   
> -/* Sign-extended 32-bit immediate load */
> +/*
> + * Sign-extended 32-bit immediate load
> + *
> + * If this is a dummy pass (!image), account for
> + * maximum possible instructions.
> + */
>   #define PPC_LI32(d, i)		do {					      \
> +	if (!image)							      \
> +		ctx->idx += 2;						      \
> +	else {								      \
>   		if ((int)(uintptr_t)(i) >= -32768 &&			      \
>   				(int)(uintptr_t)(i) < 32768)		      \
>   			EMIT(PPC_RAW_LI(d, i));				      \
> @@ -60,10 +68,15 @@
>   			EMIT(PPC_RAW_LIS(d, IMM_H(i)));			      \
>   			if (IMM_L(i))					      \
>   				EMIT(PPC_RAW_ORI(d, d, IMM_L(i)));	      \
> -		} } while(0)
> +		}							      \
> +	} } while (0)
>   
>   #ifdef CONFIG_PPC64
> +/* If dummy pass (!image), account for maximum possible instructions */
>   #define PPC_LI64(d, i)		do {					      \
> +	if (!image)							      \
> +		ctx->idx += 5;						      \
> +	else {								      \
>   		if ((long)(i) >= -2147483648 &&				      \
>   				(long)(i) < 2147483648)			      \
>   			PPC_LI32(d, i);					      \
> @@ -84,7 +97,8 @@
>   			if ((uintptr_t)(i) & 0x000000000000ffffULL)	      \
>   				EMIT(PPC_RAW_ORI(d, d, (uintptr_t)(i) &       \
>   							0xffff));             \
> -		} } while (0)
> +		}							      \
> +	} } while (0)
>   #define PPC_LI_ADDR	PPC_LI64
>   
>   #ifndef CONFIG_PPC_KERNEL_PCREL
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index 2991bb171a9b..c0684733e9d6 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -504,10 +504,11 @@ static int invoke_bpf_prog(u32 *image, u32 *ro_image, struct codegen_context *ct
>   	EMIT(PPC_RAW_ADDI(_R3, _R1, regs_off));
>   	if (!p->jited)
>   		PPC_LI_ADDR(_R4, (unsigned long)p->insnsi);
> -	if (!create_branch(&branch_insn, (u32 *)&ro_image[ctx->idx], (unsigned long)p->bpf_func,
> -			   BRANCH_SET_LINK)) {
> -		if (image)
> -			image[ctx->idx] = ppc_inst_val(branch_insn);
> +	/* Account for max possible instructions during dummy pass for size calculation */
> +	if (image && !create_branch(&branch_insn, (u32 *)&ro_image[ctx->idx],
> +				    (unsigned long)p->bpf_func,
> +				    BRANCH_SET_LINK)) {
> +		image[ctx->idx] = ppc_inst_val(branch_insn);
>   		ctx->idx++;
>   	} else {
>   		EMIT(PPC_RAW_LL(_R12, _R25, offsetof(struct bpf_prog, bpf_func)));
> @@ -889,7 +890,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>   			bpf_trampoline_restore_tail_call_cnt(image, ctx, func_frame_offset, r4_off);
>   
>   		/* Reserve space to patch branch instruction to skip fexit progs */
> -		im->ip_after_call = &((u32 *)ro_image)[ctx->idx];
> +		if (ro_image) /* image is NULL for dummy pass */
> +			im->ip_after_call = &((u32 *)ro_image)[ctx->idx];
>   		EMIT(PPC_RAW_NOP());
>   	}
>   
> @@ -912,7 +914,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>   		}
>   
>   	if (flags & BPF_TRAMP_F_CALL_ORIG) {
> -		im->ip_epilogue = &((u32 *)ro_image)[ctx->idx];
> +		if (ro_image) /* image is NULL for dummy pass */
> +			im->ip_epilogue = &((u32 *)ro_image)[ctx->idx];
>   		PPC_LI_ADDR(_R3, im);
>   		ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx,
>   						 (unsigned long)__bpf_tramp_exit);
> @@ -973,25 +976,9 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
>   			     struct bpf_tramp_links *tlinks, void *func_addr)
>   {
>   	struct bpf_tramp_image im;
> -	void *image;
>   	int ret;
>   
> -	/*
> -	 * Allocate a temporary buffer for __arch_prepare_bpf_trampoline().
> -	 * This will NOT cause fragmentation in direct map, as we do not
> -	 * call set_memory_*() on this buffer.
> -	 *
> -	 * We cannot use kvmalloc here, because we need image to be in
> -	 * module memory range.
> -	 */
> -	image = bpf_jit_alloc_exec(PAGE_SIZE);
> -	if (!image)
> -		return -ENOMEM;
> -
> -	ret = __arch_prepare_bpf_trampoline(&im, image, image + PAGE_SIZE, image,
> -					    m, flags, tlinks, func_addr);
> -	bpf_jit_free_exec(image);
> -
> +	ret = __arch_prepare_bpf_trampoline(&im, NULL, NULL, NULL, m, flags, tlinks, func_addr);
>   	return ret;
>   }
>   
> diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
> index 233703b06d7c..91f9efe8b8d7 100644
> --- a/arch/powerpc/net/bpf_jit_comp64.c
> +++ b/arch/powerpc/net/bpf_jit_comp64.c
> @@ -225,6 +225,15 @@ int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct codegen_context *
>   	}
>   
>   #ifdef CONFIG_PPC_KERNEL_PCREL
> +	/*
> +	 * If fimage is NULL (the initial pass to find image size),
> +	 * account for the maximum no. of instructions possible.
> +	 */
> +	if (!fimage) {
> +		ctx->idx += 7;
> +		return 0;
> +	}
> +
>   	reladdr = func_addr - local_paca->kernelbase;
>   
>   	if (reladdr < (long)SZ_8G && reladdr >= -(long)SZ_8G) {


Above patch fixes the reported issue. Ran the test( ./test_progs) for 
three times, and issue is not seen. Hence,


Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>


Regards,

Venkat.



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] powerpc64/bpf: fix JIT code size calculation of bpf trampoline
  2025-04-16 19:40 [PATCH v2] powerpc64/bpf: fix JIT code size calculation of bpf trampoline Hari Bathini
  2025-04-17 14:36 ` Venkat Rao Bagalkote
@ 2025-04-19 11:01 ` Naveen N Rao
  2025-04-22  8:29   ` Hari Bathini
  1 sibling, 1 reply; 5+ messages in thread
From: Naveen N Rao @ 2025-04-19 11:01 UTC (permalink / raw)
  To: Hari Bathini
  Cc: linuxppc-dev, bpf, Madhavan Srinivasan, Michael Ellerman,
	Christophe Leroy, Nicholas Piggin, Venkat Rao Bagalkote,
	Daniel Borkmann, Andrii Nakryiko, Alexei Starovoitov, stable

On Thu, Apr 17, 2025 at 01:10:37AM +0530, Hari Bathini wrote:
> arch_bpf_trampoline_size() provides JIT size of the BPF trampoline
> before the buffer for JIT'ing it is allocated. The total number of
> instructions emitted for BPF trampoline JIT code depends on where
> the final image is located. So, the size arrived at with the dummy
> pass in arch_bpf_trampoline_size() can vary from the actual size
> needed in  arch_prepare_bpf_trampoline().  When the instructions
> accounted in  arch_bpf_trampoline_size() is less than the number of
> instructions emitted during the actual JIT compile of the trampoline,
> the below warning is produced:
> 
>   WARNING: CPU: 8 PID: 204190 at arch/powerpc/net/bpf_jit_comp.c:981 __arch_prepare_bpf_trampoline.isra.0+0xd2c/0xdcc
> 
> which is:
> 
>   /* Make sure the trampoline generation logic doesn't overflow */
>   if (image && WARN_ON_ONCE(&image[ctx->idx] >
>   			(u32 *)rw_image_end - BPF_INSN_SAFETY)) {
> 
> So, during the dummy pass, instead of providing some arbitrary image
> location, account for maximum possible instructions if and when there
> is a dependency with image location for JIT'ing.
> 
> Fixes: d243b62b7bd3 ("powerpc64/bpf: Add support for bpf trampolines")
> Reported-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
> Closes: https://lore.kernel.org/all/6168bfc8-659f-4b5a-a6fb-90a916dde3b3@linux.ibm.com/
> Cc: stable@vger.kernel.org # v6.13+
> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
> ---
> 
> Changes since v1:
> - Pass NULL for image during intial pass and account for max. possible
>   instruction during this pass as Naveen suggested.
> 
> 
>  arch/powerpc/net/bpf_jit.h        | 20 ++++++++++++++++---
>  arch/powerpc/net/bpf_jit_comp.c   | 33 ++++++++++---------------------
>  arch/powerpc/net/bpf_jit_comp64.c |  9 +++++++++
>  3 files changed, 36 insertions(+), 26 deletions(-)
> 
> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
> index 6beacaec63d3..4c26912c2e3c 100644
> --- a/arch/powerpc/net/bpf_jit.h
> +++ b/arch/powerpc/net/bpf_jit.h
> @@ -51,8 +51,16 @@
>  		EMIT(PPC_INST_BRANCH_COND | (((cond) & 0x3ff) << 16) | (offset & 0xfffc));					\
>  	} while (0)
>  
> -/* Sign-extended 32-bit immediate load */
> +/*
> + * Sign-extended 32-bit immediate load
> + *
> + * If this is a dummy pass (!image), account for
> + * maximum possible instructions.
> + */
>  #define PPC_LI32(d, i)		do {					      \
> +	if (!image)							      \
> +		ctx->idx += 2;						      \
> +	else {								      \
>  		if ((int)(uintptr_t)(i) >= -32768 &&			      \
>  				(int)(uintptr_t)(i) < 32768)		      \
>  			EMIT(PPC_RAW_LI(d, i));				      \
> @@ -60,10 +68,15 @@
>  			EMIT(PPC_RAW_LIS(d, IMM_H(i)));			      \
>  			if (IMM_L(i))					      \
>  				EMIT(PPC_RAW_ORI(d, d, IMM_L(i)));	      \
> -		} } while(0)
> +		}							      \
> +	} } while (0)
>  
>  #ifdef CONFIG_PPC64
> +/* If dummy pass (!image), account for maximum possible instructions */
>  #define PPC_LI64(d, i)		do {					      \
> +	if (!image)							      \
> +		ctx->idx += 5;						      \
> +	else {								      \
>  		if ((long)(i) >= -2147483648 &&				      \
>  				(long)(i) < 2147483648)			      \
>  			PPC_LI32(d, i);					      \
> @@ -84,7 +97,8 @@
>  			if ((uintptr_t)(i) & 0x000000000000ffffULL)	      \
>  				EMIT(PPC_RAW_ORI(d, d, (uintptr_t)(i) &       \
>  							0xffff));             \
> -		} } while (0)
> +		}							      \
> +	} } while (0)

You should now also be able to remove the padding we add in 
bpf_jit_comp64.c for 'case BPF_LD | BPF_IMM | BPF_DW:'

>  #define PPC_LI_ADDR	PPC_LI64
>  
>  #ifndef CONFIG_PPC_KERNEL_PCREL
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index 2991bb171a9b..c0684733e9d6 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -504,10 +504,11 @@ static int invoke_bpf_prog(u32 *image, u32 *ro_image, struct codegen_context *ct
>  	EMIT(PPC_RAW_ADDI(_R3, _R1, regs_off));
>  	if (!p->jited)
>  		PPC_LI_ADDR(_R4, (unsigned long)p->insnsi);
> -	if (!create_branch(&branch_insn, (u32 *)&ro_image[ctx->idx], (unsigned long)p->bpf_func,
> -			   BRANCH_SET_LINK)) {
> -		if (image)
> -			image[ctx->idx] = ppc_inst_val(branch_insn);
> +	/* Account for max possible instructions during dummy pass for size calculation */
> +	if (image && !create_branch(&branch_insn, (u32 *)&ro_image[ctx->idx],
> +				    (unsigned long)p->bpf_func,
> +				    BRANCH_SET_LINK)) {
> +		image[ctx->idx] = ppc_inst_val(branch_insn);
>  		ctx->idx++;
>  	} else {
>  		EMIT(PPC_RAW_LL(_R12, _R25, offsetof(struct bpf_prog, bpf_func)));
> @@ -889,7 +890,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>  			bpf_trampoline_restore_tail_call_cnt(image, ctx, func_frame_offset, r4_off);
>  
>  		/* Reserve space to patch branch instruction to skip fexit progs */
> -		im->ip_after_call = &((u32 *)ro_image)[ctx->idx];
> +		if (ro_image) /* image is NULL for dummy pass */
> +			im->ip_after_call = &((u32 *)ro_image)[ctx->idx];
>  		EMIT(PPC_RAW_NOP());
>  	}
>  
> @@ -912,7 +914,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>  		}
>  
>  	if (flags & BPF_TRAMP_F_CALL_ORIG) {
> -		im->ip_epilogue = &((u32 *)ro_image)[ctx->idx];
> +		if (ro_image) /* image is NULL for dummy pass */
> +			im->ip_epilogue = &((u32 *)ro_image)[ctx->idx];
>  		PPC_LI_ADDR(_R3, im);
>  		ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx,
>  						 (unsigned long)__bpf_tramp_exit);
> @@ -973,25 +976,9 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
>  			     struct bpf_tramp_links *tlinks, void *func_addr)
>  {
>  	struct bpf_tramp_image im;
> -	void *image;
>  	int ret;
>  
> -	/*
> -	 * Allocate a temporary buffer for __arch_prepare_bpf_trampoline().
> -	 * This will NOT cause fragmentation in direct map, as we do not
> -	 * call set_memory_*() on this buffer.
> -	 *
> -	 * We cannot use kvmalloc here, because we need image to be in
> -	 * module memory range.
> -	 */
> -	image = bpf_jit_alloc_exec(PAGE_SIZE);
> -	if (!image)
> -		return -ENOMEM;
> -
> -	ret = __arch_prepare_bpf_trampoline(&im, image, image + PAGE_SIZE, image,
> -					    m, flags, tlinks, func_addr);
> -	bpf_jit_free_exec(image);
> -
> +	ret = __arch_prepare_bpf_trampoline(&im, NULL, NULL, NULL, m, flags, tlinks, func_addr);
>  	return ret;
>  }
>  
> diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
> index 233703b06d7c..91f9efe8b8d7 100644
> --- a/arch/powerpc/net/bpf_jit_comp64.c
> +++ b/arch/powerpc/net/bpf_jit_comp64.c
> @@ -225,6 +225,15 @@ int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct codegen_context *
>  	}
>  
>  #ifdef CONFIG_PPC_KERNEL_PCREL
> +	/*
> +	 * If fimage is NULL (the initial pass to find image size),
> +	 * account for the maximum no. of instructions possible.
> +	 */
> +	if (!fimage) {
> +		ctx->idx += 7;
> +		return 0;
> +	}
> +

I would merge this with the below if conditional so that this gets 
noticed if the instruction sequence below ever changes.

>  	reladdr = func_addr - local_paca->kernelbase;
>  
>  	if (reladdr < (long)SZ_8G && reladdr >= -(long)SZ_8G) {

Other than that:
Acked-by: Naveen N Rao (AMD) <naveen@kernel.org>


- Naveen



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] powerpc64/bpf: fix JIT code size calculation of bpf trampoline
  2025-04-19 11:01 ` Naveen N Rao
@ 2025-04-22  8:29   ` Hari Bathini
  0 siblings, 0 replies; 5+ messages in thread
From: Hari Bathini @ 2025-04-22  8:29 UTC (permalink / raw)
  To: Naveen N Rao
  Cc: linuxppc-dev, bpf, Madhavan Srinivasan, Michael Ellerman,
	Christophe Leroy, Nicholas Piggin, Venkat Rao Bagalkote,
	Daniel Borkmann, Andrii Nakryiko, Alexei Starovoitov, stable



On 19/04/25 4:31 pm, Naveen N Rao wrote:
> On Thu, Apr 17, 2025 at 01:10:37AM +0530, Hari Bathini wrote:
>> arch_bpf_trampoline_size() provides JIT size of the BPF trampoline
>> before the buffer for JIT'ing it is allocated. The total number of
>> instructions emitted for BPF trampoline JIT code depends on where
>> the final image is located. So, the size arrived at with the dummy
>> pass in arch_bpf_trampoline_size() can vary from the actual size
>> needed in  arch_prepare_bpf_trampoline().  When the instructions
>> accounted in  arch_bpf_trampoline_size() is less than the number of
>> instructions emitted during the actual JIT compile of the trampoline,
>> the below warning is produced:
>>
>>    WARNING: CPU: 8 PID: 204190 at arch/powerpc/net/bpf_jit_comp.c:981 __arch_prepare_bpf_trampoline.isra.0+0xd2c/0xdcc
>>
>> which is:
>>
>>    /* Make sure the trampoline generation logic doesn't overflow */
>>    if (image && WARN_ON_ONCE(&image[ctx->idx] >
>>    			(u32 *)rw_image_end - BPF_INSN_SAFETY)) {
>>
>> So, during the dummy pass, instead of providing some arbitrary image
>> location, account for maximum possible instructions if and when there
>> is a dependency with image location for JIT'ing.
>>
>> Fixes: d243b62b7bd3 ("powerpc64/bpf: Add support for bpf trampolines")
>> Reported-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
>> Closes: https://lore.kernel.org/all/6168bfc8-659f-4b5a-a6fb-90a916dde3b3@linux.ibm.com/
>> Cc: stable@vger.kernel.org # v6.13+
>> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
>> ---
>>
>> Changes since v1:
>> - Pass NULL for image during intial pass and account for max. possible
>>    instruction during this pass as Naveen suggested.
>>
>>
>>   arch/powerpc/net/bpf_jit.h        | 20 ++++++++++++++++---
>>   arch/powerpc/net/bpf_jit_comp.c   | 33 ++++++++++---------------------
>>   arch/powerpc/net/bpf_jit_comp64.c |  9 +++++++++
>>   3 files changed, 36 insertions(+), 26 deletions(-)
>>
>> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
>> index 6beacaec63d3..4c26912c2e3c 100644
>> --- a/arch/powerpc/net/bpf_jit.h
>> +++ b/arch/powerpc/net/bpf_jit.h
>> @@ -51,8 +51,16 @@
>>   		EMIT(PPC_INST_BRANCH_COND | (((cond) & 0x3ff) << 16) | (offset & 0xfffc));					\
>>   	} while (0)
>>   
>> -/* Sign-extended 32-bit immediate load */
>> +/*
>> + * Sign-extended 32-bit immediate load
>> + *
>> + * If this is a dummy pass (!image), account for
>> + * maximum possible instructions.
>> + */
>>   #define PPC_LI32(d, i)		do {					      \
>> +	if (!image)							      \
>> +		ctx->idx += 2;						      \
>> +	else {								      \
>>   		if ((int)(uintptr_t)(i) >= -32768 &&			      \
>>   				(int)(uintptr_t)(i) < 32768)		      \
>>   			EMIT(PPC_RAW_LI(d, i));				      \
>> @@ -60,10 +68,15 @@
>>   			EMIT(PPC_RAW_LIS(d, IMM_H(i)));			      \
>>   			if (IMM_L(i))					      \
>>   				EMIT(PPC_RAW_ORI(d, d, IMM_L(i)));	      \
>> -		} } while(0)
>> +		}							      \
>> +	} } while (0)
>>   
>>   #ifdef CONFIG_PPC64
>> +/* If dummy pass (!image), account for maximum possible instructions */
>>   #define PPC_LI64(d, i)		do {					      \
>> +	if (!image)							      \
>> +		ctx->idx += 5;						      \
>> +	else {								      \
>>   		if ((long)(i) >= -2147483648 &&				      \
>>   				(long)(i) < 2147483648)			      \
>>   			PPC_LI32(d, i);					      \
>> @@ -84,7 +97,8 @@
>>   			if ((uintptr_t)(i) & 0x000000000000ffffULL)	      \
>>   				EMIT(PPC_RAW_ORI(d, d, (uintptr_t)(i) &       \
>>   							0xffff));             \
>> -		} } while (0)
>> +		}							      \
>> +	} } while (0)
> 
> You should now also be able to remove the padding we add in
> bpf_jit_comp64.c for 'case BPF_LD | BPF_IMM | BPF_DW:'
> 
>>   #define PPC_LI_ADDR	PPC_LI64
>>   
>>   #ifndef CONFIG_PPC_KERNEL_PCREL
>> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
>> index 2991bb171a9b..c0684733e9d6 100644
>> --- a/arch/powerpc/net/bpf_jit_comp.c
>> +++ b/arch/powerpc/net/bpf_jit_comp.c
>> @@ -504,10 +504,11 @@ static int invoke_bpf_prog(u32 *image, u32 *ro_image, struct codegen_context *ct
>>   	EMIT(PPC_RAW_ADDI(_R3, _R1, regs_off));
>>   	if (!p->jited)
>>   		PPC_LI_ADDR(_R4, (unsigned long)p->insnsi);
>> -	if (!create_branch(&branch_insn, (u32 *)&ro_image[ctx->idx], (unsigned long)p->bpf_func,
>> -			   BRANCH_SET_LINK)) {
>> -		if (image)
>> -			image[ctx->idx] = ppc_inst_val(branch_insn);
>> +	/* Account for max possible instructions during dummy pass for size calculation */
>> +	if (image && !create_branch(&branch_insn, (u32 *)&ro_image[ctx->idx],
>> +				    (unsigned long)p->bpf_func,
>> +				    BRANCH_SET_LINK)) {
>> +		image[ctx->idx] = ppc_inst_val(branch_insn);
>>   		ctx->idx++;
>>   	} else {
>>   		EMIT(PPC_RAW_LL(_R12, _R25, offsetof(struct bpf_prog, bpf_func)));
>> @@ -889,7 +890,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>>   			bpf_trampoline_restore_tail_call_cnt(image, ctx, func_frame_offset, r4_off);
>>   
>>   		/* Reserve space to patch branch instruction to skip fexit progs */
>> -		im->ip_after_call = &((u32 *)ro_image)[ctx->idx];
>> +		if (ro_image) /* image is NULL for dummy pass */
>> +			im->ip_after_call = &((u32 *)ro_image)[ctx->idx];
>>   		EMIT(PPC_RAW_NOP());
>>   	}
>>   
>> @@ -912,7 +914,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>>   		}
>>   
>>   	if (flags & BPF_TRAMP_F_CALL_ORIG) {
>> -		im->ip_epilogue = &((u32 *)ro_image)[ctx->idx];
>> +		if (ro_image) /* image is NULL for dummy pass */
>> +			im->ip_epilogue = &((u32 *)ro_image)[ctx->idx];
>>   		PPC_LI_ADDR(_R3, im);
>>   		ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx,
>>   						 (unsigned long)__bpf_tramp_exit);
>> @@ -973,25 +976,9 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
>>   			     struct bpf_tramp_links *tlinks, void *func_addr)
>>   {
>>   	struct bpf_tramp_image im;
>> -	void *image;
>>   	int ret;
>>   
>> -	/*
>> -	 * Allocate a temporary buffer for __arch_prepare_bpf_trampoline().
>> -	 * This will NOT cause fragmentation in direct map, as we do not
>> -	 * call set_memory_*() on this buffer.
>> -	 *
>> -	 * We cannot use kvmalloc here, because we need image to be in
>> -	 * module memory range.
>> -	 */
>> -	image = bpf_jit_alloc_exec(PAGE_SIZE);
>> -	if (!image)
>> -		return -ENOMEM;
>> -
>> -	ret = __arch_prepare_bpf_trampoline(&im, image, image + PAGE_SIZE, image,
>> -					    m, flags, tlinks, func_addr);
>> -	bpf_jit_free_exec(image);
>> -
>> +	ret = __arch_prepare_bpf_trampoline(&im, NULL, NULL, NULL, m, flags, tlinks, func_addr);
>>   	return ret;
>>   }
>>   
>> diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
>> index 233703b06d7c..91f9efe8b8d7 100644
>> --- a/arch/powerpc/net/bpf_jit_comp64.c
>> +++ b/arch/powerpc/net/bpf_jit_comp64.c
>> @@ -225,6 +225,15 @@ int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct codegen_context *
>>   	}
>>   
>>   #ifdef CONFIG_PPC_KERNEL_PCREL
>> +	/*
>> +	 * If fimage is NULL (the initial pass to find image size),
>> +	 * account for the maximum no. of instructions possible.
>> +	 */
>> +	if (!fimage) {
>> +		ctx->idx += 7;
>> +		return 0;
>> +	}
>> +
> 
> I would merge this with the below if conditional so that this gets
> noticed if the instruction sequence below ever changes.
> 
>>   	reladdr = func_addr - local_paca->kernelbase;
>>   
>>   	if (reladdr < (long)SZ_8G && reladdr >= -(long)SZ_8G) {
> 
> Other than that:
> Acked-by: Naveen N Rao (AMD) <naveen@kernel.org>

Thanks for the review, Naveen.
Posted v3 with the suggested change.

  
https://lore.kernel.org/linuxppc-dev/20250422082609.949301-1-hbathini@linux.ibm.com/

- Hari


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] powerpc64/bpf: fix JIT code size calculation of bpf trampoline
  2025-04-17 14:36 ` Venkat Rao Bagalkote
@ 2025-04-22  8:30   ` Hari Bathini
  0 siblings, 0 replies; 5+ messages in thread
From: Hari Bathini @ 2025-04-22  8:30 UTC (permalink / raw)
  To: Venkat Rao Bagalkote, linuxppc-dev, bpf
  Cc: Madhavan Srinivasan, Naveen N. Rao, Michael Ellerman,
	Christophe Leroy, Nicholas Piggin, Daniel Borkmann,
	Andrii Nakryiko, Alexei Starovoitov, stable



On 17/04/25 8:06 pm, Venkat Rao Bagalkote wrote:
> 
> On 17/04/25 1:10 am, Hari Bathini wrote:
>> arch_bpf_trampoline_size() provides JIT size of the BPF trampoline
>> before the buffer for JIT'ing it is allocated. The total number of
>> instructions emitted for BPF trampoline JIT code depends on where
>> the final image is located. So, the size arrived at with the dummy
>> pass in arch_bpf_trampoline_size() can vary from the actual size
>> needed in  arch_prepare_bpf_trampoline().  When the instructions
>> accounted in  arch_bpf_trampoline_size() is less than the number of
>> instructions emitted during the actual JIT compile of the trampoline,
>> the below warning is produced:
>>
>>    WARNING: CPU: 8 PID: 204190 at arch/powerpc/net/bpf_jit_comp.c:981 
>> __arch_prepare_bpf_trampoline.isra.0+0xd2c/0xdcc
>>
>> which is:
>>
>>    /* Make sure the trampoline generation logic doesn't overflow */
>>    if (image && WARN_ON_ONCE(&image[ctx->idx] >
>>                (u32 *)rw_image_end - BPF_INSN_SAFETY)) {
>>
>> So, during the dummy pass, instead of providing some arbitrary image
>> location, account for maximum possible instructions if and when there
>> is a dependency with image location for JIT'ing.
>>
>> Fixes: d243b62b7bd3 ("powerpc64/bpf: Add support for bpf trampolines")
>> Reported-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
>> Closes: https://lore.kernel.org/all/6168bfc8-659f-4b5a- 
>> a6fb-90a916dde3b3@linux.ibm.com/
>> Cc: stable@vger.kernel.org # v6.13+
>> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
>> ---
>>
>> Changes since v1:
>> - Pass NULL for image during intial pass and account for max. possible
>>    instruction during this pass as Naveen suggested.
>>
>>
>>   arch/powerpc/net/bpf_jit.h        | 20 ++++++++++++++++---
>>   arch/powerpc/net/bpf_jit_comp.c   | 33 ++++++++++---------------------
>>   arch/powerpc/net/bpf_jit_comp64.c |  9 +++++++++
>>   3 files changed, 36 insertions(+), 26 deletions(-)
>>
>> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
>> index 6beacaec63d3..4c26912c2e3c 100644
>> --- a/arch/powerpc/net/bpf_jit.h
>> +++ b/arch/powerpc/net/bpf_jit.h
>> @@ -51,8 +51,16 @@
>>           EMIT(PPC_INST_BRANCH_COND | (((cond) & 0x3ff) << 16) | 
>> (offset & 0xfffc));                    \
>>       } while (0)
>> -/* Sign-extended 32-bit immediate load */
>> +/*
>> + * Sign-extended 32-bit immediate load
>> + *
>> + * If this is a dummy pass (!image), account for
>> + * maximum possible instructions.
>> + */
>>   #define PPC_LI32(d, i)        do {                          \
>> +    if (!image)                                  \
>> +        ctx->idx += 2;                              \
>> +    else {                                      \
>>           if ((int)(uintptr_t)(i) >= -32768 &&                  \
>>                   (int)(uintptr_t)(i) < 32768)              \
>>               EMIT(PPC_RAW_LI(d, i));                      \
>> @@ -60,10 +68,15 @@
>>               EMIT(PPC_RAW_LIS(d, IMM_H(i)));                  \
>>               if (IMM_L(i))                          \
>>                   EMIT(PPC_RAW_ORI(d, d, IMM_L(i)));          \
>> -        } } while(0)
>> +        }                                  \
>> +    } } while (0)
>>   #ifdef CONFIG_PPC64
>> +/* If dummy pass (!image), account for maximum possible instructions */
>>   #define PPC_LI64(d, i)        do {                          \
>> +    if (!image)                                  \
>> +        ctx->idx += 5;                              \
>> +    else {                                      \
>>           if ((long)(i) >= -2147483648 &&                      \
>>                   (long)(i) < 2147483648)                  \
>>               PPC_LI32(d, i);                          \
>> @@ -84,7 +97,8 @@
>>               if ((uintptr_t)(i) & 0x000000000000ffffULL)          \
>>                   EMIT(PPC_RAW_ORI(d, d, (uintptr_t)(i) &       \
>>                               0xffff));             \
>> -        } } while (0)
>> +        }                                  \
>> +    } } while (0)
>>   #define PPC_LI_ADDR    PPC_LI64
>>   #ifndef CONFIG_PPC_KERNEL_PCREL
>> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/ 
>> bpf_jit_comp.c
>> index 2991bb171a9b..c0684733e9d6 100644
>> --- a/arch/powerpc/net/bpf_jit_comp.c
>> +++ b/arch/powerpc/net/bpf_jit_comp.c
>> @@ -504,10 +504,11 @@ static int invoke_bpf_prog(u32 *image, u32 
>> *ro_image, struct codegen_context *ct
>>       EMIT(PPC_RAW_ADDI(_R3, _R1, regs_off));
>>       if (!p->jited)
>>           PPC_LI_ADDR(_R4, (unsigned long)p->insnsi);
>> -    if (!create_branch(&branch_insn, (u32 *)&ro_image[ctx->idx], 
>> (unsigned long)p->bpf_func,
>> -               BRANCH_SET_LINK)) {
>> -        if (image)
>> -            image[ctx->idx] = ppc_inst_val(branch_insn);
>> +    /* Account for max possible instructions during dummy pass for 
>> size calculation */
>> +    if (image && !create_branch(&branch_insn, (u32 *)&ro_image[ctx- 
>> >idx],
>> +                    (unsigned long)p->bpf_func,
>> +                    BRANCH_SET_LINK)) {
>> +        image[ctx->idx] = ppc_inst_val(branch_insn);
>>           ctx->idx++;
>>       } else {
>>           EMIT(PPC_RAW_LL(_R12, _R25, offsetof(struct bpf_prog, 
>> bpf_func)));
>> @@ -889,7 +890,8 @@ static int __arch_prepare_bpf_trampoline(struct 
>> bpf_tramp_image *im, void *rw_im
>>               bpf_trampoline_restore_tail_call_cnt(image, ctx, 
>> func_frame_offset, r4_off);
>>           /* Reserve space to patch branch instruction to skip fexit 
>> progs */
>> -        im->ip_after_call = &((u32 *)ro_image)[ctx->idx];
>> +        if (ro_image) /* image is NULL for dummy pass */
>> +            im->ip_after_call = &((u32 *)ro_image)[ctx->idx];
>>           EMIT(PPC_RAW_NOP());
>>       }
>> @@ -912,7 +914,8 @@ static int __arch_prepare_bpf_trampoline(struct 
>> bpf_tramp_image *im, void *rw_im
>>           }
>>       if (flags & BPF_TRAMP_F_CALL_ORIG) {
>> -        im->ip_epilogue = &((u32 *)ro_image)[ctx->idx];
>> +        if (ro_image) /* image is NULL for dummy pass */
>> +            im->ip_epilogue = &((u32 *)ro_image)[ctx->idx];
>>           PPC_LI_ADDR(_R3, im);
>>           ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx,
>>                            (unsigned long)__bpf_tramp_exit);
>> @@ -973,25 +976,9 @@ int arch_bpf_trampoline_size(const struct 
>> btf_func_model *m, u32 flags,
>>                    struct bpf_tramp_links *tlinks, void *func_addr)
>>   {
>>       struct bpf_tramp_image im;
>> -    void *image;
>>       int ret;
>> -    /*
>> -     * Allocate a temporary buffer for __arch_prepare_bpf_trampoline().
>> -     * This will NOT cause fragmentation in direct map, as we do not
>> -     * call set_memory_*() on this buffer.
>> -     *
>> -     * We cannot use kvmalloc here, because we need image to be in
>> -     * module memory range.
>> -     */
>> -    image = bpf_jit_alloc_exec(PAGE_SIZE);
>> -    if (!image)
>> -        return -ENOMEM;
>> -
>> -    ret = __arch_prepare_bpf_trampoline(&im, image, image + 
>> PAGE_SIZE, image,
>> -                        m, flags, tlinks, func_addr);
>> -    bpf_jit_free_exec(image);
>> -
>> +    ret = __arch_prepare_bpf_trampoline(&im, NULL, NULL, NULL, m, 
>> flags, tlinks, func_addr);
>>       return ret;
>>   }
>> diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/ 
>> bpf_jit_comp64.c
>> index 233703b06d7c..91f9efe8b8d7 100644
>> --- a/arch/powerpc/net/bpf_jit_comp64.c
>> +++ b/arch/powerpc/net/bpf_jit_comp64.c
>> @@ -225,6 +225,15 @@ int bpf_jit_emit_func_call_rel(u32 *image, u32 
>> *fimage, struct codegen_context *
>>       }
>>   #ifdef CONFIG_PPC_KERNEL_PCREL
>> +    /*
>> +     * If fimage is NULL (the initial pass to find image size),
>> +     * account for the maximum no. of instructions possible.
>> +     */
>> +    if (!fimage) {
>> +        ctx->idx += 7;
>> +        return 0;
>> +    }
>> +
>>       reladdr = func_addr - local_paca->kernelbase;
>>       if (reladdr < (long)SZ_8G && reladdr >= -(long)SZ_8G) {
> 
> 
> Above patch fixes the reported issue. Ran the test( ./test_progs) for 
> three times, and issue is not seen. Hence,
> 
> 
> Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>

Thanks for testing, Venkat.
Posted v3 carrying your Tested-by tag.

  
https://lore.kernel.org/linuxppc-dev/20250422082609.949301-1-hbathini@linux.ibm.com/

- Hari



^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2025-04-22  8:31 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-04-16 19:40 [PATCH v2] powerpc64/bpf: fix JIT code size calculation of bpf trampoline Hari Bathini
2025-04-17 14:36 ` Venkat Rao Bagalkote
2025-04-22  8:30   ` Hari Bathini
2025-04-19 11:01 ` Naveen N Rao
2025-04-22  8:29   ` Hari Bathini

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).