linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 4/4] powerpc/bpf: use bpf_jit_binary_pack_[alloc|finalize|free]
  2023-03-09 18:00 [PATCH v2 0/4] enable bpf_prog_pack allocator for powerpc Hari Bathini
@ 2023-03-09 18:00 ` Hari Bathini
  0 siblings, 0 replies; 15+ messages in thread
From: Hari Bathini @ 2023-03-09 18:00 UTC (permalink / raw)
  To: linuxppc-dev, bpf
  Cc: Song Liu, Daniel Borkmann, Alexei Starovoitov, Andrii Nakryiko,
	Naveen N. Rao

Use bpf_jit_binary_pack_alloc in powerpc jit. The jit engine first
writes the program to the rw buffer. When the jit is done, the program
is copied to the final location with bpf_jit_binary_pack_finalize.
With multiple jit_subprogs, bpf_jit_free is called on some subprograms
that haven't got bpf_jit_binary_pack_finalize() yet. Implement custom
bpf_jit_free() like in commit 1d5f82d9dd47 ("bpf, x86: fix freeing of
not-finalized bpf_prog_pack") to call bpf_jit_binary_pack_finalize(),
if necessary. While here, correct the misnomer powerpc64_jit_data to
powerpc_jit_data as it is meant for both ppc32 and ppc64.

Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
---
 arch/powerpc/net/bpf_jit.h        |   7 +-
 arch/powerpc/net/bpf_jit_comp.c   | 104 +++++++++++++++++++++---------
 arch/powerpc/net/bpf_jit_comp32.c |   4 +-
 arch/powerpc/net/bpf_jit_comp64.c |   6 +-
 4 files changed, 83 insertions(+), 38 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index d767e39d5645..a8b7480c4d43 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -168,15 +168,16 @@ static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
 
 void bpf_jit_init_reg_mapping(struct codegen_context *ctx);
 int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func);
-int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
+int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx,
 		       u32 *addrs, int pass, bool extra_pass);
 void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
 void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
 void bpf_jit_realloc_regs(struct codegen_context *ctx);
 int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr);
 
-int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx,
-			  int insn_idx, int jmp_off, int dst_reg);
+int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass,
+			  struct codegen_context *ctx, int insn_idx,
+			  int jmp_off, int dst_reg);
 
 #endif
 
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index d1794d9f0154..ece75c829499 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -42,10 +42,11 @@ int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg,
 	return 0;
 }
 
-struct powerpc64_jit_data {
-	struct bpf_binary_header *header;
+struct powerpc_jit_data {
+	struct bpf_binary_header *hdr;
+	struct bpf_binary_header *fhdr;
 	u32 *addrs;
-	u8 *image;
+	u8 *fimage;
 	u32 proglen;
 	struct codegen_context ctx;
 };
@@ -62,15 +63,18 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	u8 *image = NULL;
 	u32 *code_base;
 	u32 *addrs;
-	struct powerpc64_jit_data *jit_data;
+	struct powerpc_jit_data *jit_data;
 	struct codegen_context cgctx;
 	int pass;
 	int flen;
-	struct bpf_binary_header *bpf_hdr;
+	struct bpf_binary_header *fhdr = NULL;
+	struct bpf_binary_header *hdr = NULL;
 	struct bpf_prog *org_fp = fp;
 	struct bpf_prog *tmp_fp;
 	bool bpf_blinded = false;
 	bool extra_pass = false;
+	u8 *fimage = NULL;
+	u32 *fcode_base;
 	u32 extable_len;
 	u32 fixup_len;
 
@@ -100,9 +104,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	addrs = jit_data->addrs;
 	if (addrs) {
 		cgctx = jit_data->ctx;
-		image = jit_data->image;
-		bpf_hdr = jit_data->header;
+		fimage = jit_data->fimage;
+		fhdr = jit_data->fhdr;
 		proglen = jit_data->proglen;
+		hdr = jit_data->hdr;
+		image = (void *)hdr + ((void *)fimage - (void *)fhdr);
 		extra_pass = true;
 		goto skip_init_ctx;
 	}
@@ -120,7 +126,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
 
 	/* Scouting faux-generate pass 0 */
-	if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0, false)) {
+	if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
 		/* We hit something illegal or unsupported. */
 		fp = org_fp;
 		goto out_addrs;
@@ -135,7 +141,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	 */
 	if (cgctx.seen & SEEN_TAILCALL || !is_offset_in_branch_range((long)cgctx.idx * 4)) {
 		cgctx.idx = 0;
-		if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0, false)) {
+		if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
 			fp = org_fp;
 			goto out_addrs;
 		}
@@ -157,17 +163,19 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	proglen = cgctx.idx * 4;
 	alloclen = proglen + FUNCTION_DESCR_SIZE + fixup_len + extable_len;
 
-	bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, bpf_jit_fill_ill_insns);
-	if (!bpf_hdr) {
+	fhdr = bpf_jit_binary_pack_alloc(alloclen, &fimage, 4, &hdr, &image,
+					      bpf_jit_fill_ill_insns);
+	if (!fhdr) {
 		fp = org_fp;
 		goto out_addrs;
 	}
 
 	if (extable_len)
-		fp->aux->extable = (void *)image + FUNCTION_DESCR_SIZE + proglen + fixup_len;
+		fp->aux->extable = (void *)fimage + FUNCTION_DESCR_SIZE + proglen + fixup_len;
 
 skip_init_ctx:
 	code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
+	fcode_base = (u32 *)(fimage + FUNCTION_DESCR_SIZE);
 
 	/* Code generation passes 1-2 */
 	for (pass = 1; pass < 3; pass++) {
@@ -175,8 +183,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 		cgctx.idx = 0;
 		cgctx.alt_exit_addr = 0;
 		bpf_jit_build_prologue(code_base, &cgctx);
-		if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, pass, extra_pass)) {
-			bpf_jit_binary_free(bpf_hdr);
+		if (bpf_jit_build_body(fp, code_base, fcode_base, &cgctx, addrs, pass, extra_pass)) {
+			bpf_arch_text_copy(&fhdr->size, &hdr->size, sizeof(hdr->size));
+			bpf_jit_binary_pack_free(fhdr, hdr);
 			fp = org_fp;
 			goto out_addrs;
 		}
@@ -192,21 +201,23 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 		 * Note that we output the base address of the code_base
 		 * rather than image, since opcodes are in code_base.
 		 */
-		bpf_jit_dump(flen, proglen, pass, code_base);
+		bpf_jit_dump(flen, proglen, pass, fcode_base);
 
 #ifdef CONFIG_PPC64_ELF_ABI_V1
 	/* Function descriptor nastiness: Address + TOC */
-	((u64 *)image)[0] = (u64)code_base;
+	((u64 *)image)[0] = (u64)fcode_base;
 	((u64 *)image)[1] = local_paca->kernel_toc;
 #endif
 
-	fp->bpf_func = (void *)image;
+	fp->bpf_func = (void *)fimage;
 	fp->jited = 1;
 	fp->jited_len = proglen + FUNCTION_DESCR_SIZE;
 
-	bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + bpf_hdr->size);
 	if (!fp->is_func || extra_pass) {
-		bpf_jit_binary_lock_ro(bpf_hdr);
+		if (bpf_jit_binary_pack_finalize(fp, fhdr, hdr)) {
+			fp = org_fp;
+			goto out_addrs;
+		}
 		bpf_prog_fill_jited_linfo(fp, addrs);
 out_addrs:
 		kfree(addrs);
@@ -216,8 +227,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 		jit_data->addrs = addrs;
 		jit_data->ctx = cgctx;
 		jit_data->proglen = proglen;
-		jit_data->image = image;
-		jit_data->header = bpf_hdr;
+		jit_data->fimage = fimage;
+		jit_data->fhdr = fhdr;
+		jit_data->hdr = hdr;
 	}
 
 out:
@@ -231,12 +243,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
  * The caller should check for (BPF_MODE(code) == BPF_PROBE_MEM) before calling
  * this function, as this only applies to BPF_PROBE_MEM, for now.
  */
-int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx,
-			  int insn_idx, int jmp_off, int dst_reg)
+int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass,
+			  struct codegen_context *ctx, int insn_idx, int jmp_off,
+			  int dst_reg)
 {
 	off_t offset;
 	unsigned long pc;
-	struct exception_table_entry *ex;
+	struct exception_table_entry *ex, *ex_entry;
 	u32 *fixup;
 
 	/* Populate extable entries only in the last pass */
@@ -247,9 +260,16 @@ int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct code
 	    WARN_ON_ONCE(ctx->exentry_idx >= fp->aux->num_exentries))
 		return -EINVAL;
 
+	/*
+	 * Program is firt written to image before copying to the
+	 * final location (fimage). Accordingly, update in the image first.
+	 * As all offsets used are relative, copying as is to the
+	 * final location should be alright.
+	 */
 	pc = (unsigned long)&image[insn_idx];
+	ex = (void *)fp->aux->extable - (void *)fimage + (void *)image;
 
-	fixup = (void *)fp->aux->extable -
+	fixup = (void *)ex -
 		(fp->aux->num_exentries * BPF_FIXUP_LEN * 4) +
 		(ctx->exentry_idx * BPF_FIXUP_LEN * 4);
 
@@ -260,17 +280,17 @@ int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct code
 	fixup[BPF_FIXUP_LEN - 1] =
 		PPC_RAW_BRANCH((long)(pc + jmp_off) - (long)&fixup[BPF_FIXUP_LEN - 1]);
 
-	ex = &fp->aux->extable[ctx->exentry_idx];
+	ex_entry = &ex[ctx->exentry_idx];
 
-	offset = pc - (long)&ex->insn;
+	offset = pc - (long)&ex_entry->insn;
 	if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
 		return -ERANGE;
-	ex->insn = offset;
+	ex_entry->insn = offset;
 
-	offset = (long)fixup - (long)&ex->fixup;
+	offset = (long)fixup - (long)&ex_entry->fixup;
 	if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
 		return -ERANGE;
-	ex->fixup = offset;
+	ex_entry->fixup = offset;
 
 	ctx->exentry_idx++;
 	return 0;
@@ -308,3 +328,27 @@ int bpf_arch_text_invalidate(void *dst, size_t len)
 
 	return ret;
 }
+
+void bpf_jit_free(struct bpf_prog *fp)
+{
+	if (fp->jited) {
+		struct powerpc_jit_data *jit_data = fp->aux->jit_data;
+		struct bpf_binary_header *hdr;
+
+		/*
+		 * If we fail the final pass of JIT (from jit_subprogs),
+		 * the program may not be finalized yet. Call finalize here
+		 * before freeing it.
+		 */
+		if (jit_data) {
+			bpf_jit_binary_pack_finalize(fp, jit_data->fhdr, jit_data->hdr);
+			kvfree(jit_data->addrs);
+			kfree(jit_data);
+		}
+		hdr = bpf_jit_binary_pack_hdr(fp);
+		bpf_jit_binary_pack_free(hdr, NULL);
+		WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
+	}
+
+	bpf_prog_unlock_free(fp);
+}
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index 7f91ea064c08..fb2761b54d64 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -278,7 +278,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o
 }
 
 /* Assemble the body code between the prologue & epilogue */
-int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
+int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx,
 		       u32 *addrs, int pass, bool extra_pass)
 {
 	const struct bpf_insn *insn = fp->insnsi;
@@ -997,7 +997,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 					jmp_off += 4;
 				}
 
-				ret = bpf_add_extable_entry(fp, image, pass, ctx, insn_idx,
+				ret = bpf_add_extable_entry(fp, image, fimage, pass, ctx, insn_idx,
 							    jmp_off, dst_reg);
 				if (ret)
 					return ret;
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 8dd3cabaa83a..37a8970a7065 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -343,7 +343,7 @@ asm (
 );
 
 /* Assemble the body code between the prologue & epilogue */
-int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
+int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx,
 		       u32 *addrs, int pass, bool extra_pass)
 {
 	enum stf_barrier_type stf_barrier = stf_barrier_type_get();
@@ -922,8 +922,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 				addrs[++i] = ctx->idx * 4;
 
 			if (BPF_MODE(code) == BPF_PROBE_MEM) {
-				ret = bpf_add_extable_entry(fp, image, pass, ctx, ctx->idx - 1,
-							    4, dst_reg);
+				ret = bpf_add_extable_entry(fp, image, fimage, pass, ctx,
+							    ctx->idx - 1, 4, dst_reg);
 				if (ret)
 					return ret;
 			}
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v2 0/4] enable bpf_prog_pack allocator for powerpc
@ 2023-03-09 18:02 Hari Bathini
  2023-03-09 18:02 ` [PATCH v2 1/4] powerpc/code-patching: introduce patch_instructions() Hari Bathini
                   ` (3 more replies)
  0 siblings, 4 replies; 15+ messages in thread
From: Hari Bathini @ 2023-03-09 18:02 UTC (permalink / raw)
  To: linuxppc-dev, bpf
  Cc: Song Liu, Daniel Borkmann, Alexei Starovoitov, Andrii Nakryiko,
	Naveen N. Rao

Most BPF programs are small, but they consume a page each. For systems
with busy traffic and many BPF programs, this may also add significant
pressure on instruction TLB. High iTLB pressure usually slows down the
whole system causing visible performance degradation for production
workloads.

bpf_prog_pack, a customized allocator that packs multiple bpf programs
into preallocated memory chunks, was proposed [1] to address it. This
series extends this support on powerpc.

The first patch introduces patch_instructions() function to enable
patching more than one instruction at a time. This change showed
around 5X improvement in the time taken to run test_bpf test cases.
Patches 2 & 3 add the arch specific functions needed to support this
feature. Patch 4 enables the support for powerpc and ensures cleanup
is handled racefully. Tested the changes successfully.

[1] https://lore.kernel.org/bpf/20220204185742.271030-1-song@kernel.org/
[2] https://lore.kernel.org/all/20221110184303.393179-1-hbathini@linux.ibm.com/ 

Changes in v2:
* Introduced patch_instructions() to help with patching bpf programs.

Hari Bathini (4):
  powerpc/code-patching: introduce patch_instructions()
  powerpc/bpf: implement bpf_arch_text_copy
  powerpc/bpf: implement bpf_arch_text_invalidate for bpf_prog_pack
  powerpc/bpf: use bpf_jit_binary_pack_[alloc|finalize|free]

 arch/powerpc/include/asm/code-patching.h |   1 +
 arch/powerpc/lib/code-patching.c         | 151 ++++++++++++++++-------
 arch/powerpc/net/bpf_jit.h               |   7 +-
 arch/powerpc/net/bpf_jit_comp.c          | 142 ++++++++++++++++-----
 arch/powerpc/net/bpf_jit_comp32.c        |   4 +-
 arch/powerpc/net/bpf_jit_comp64.c        |   6 +-
 6 files changed, 226 insertions(+), 85 deletions(-)

-- 
2.39.2


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH v2 1/4] powerpc/code-patching: introduce patch_instructions()
  2023-03-09 18:02 [PATCH v2 0/4] enable bpf_prog_pack allocator for powerpc Hari Bathini
@ 2023-03-09 18:02 ` Hari Bathini
  2023-03-10 18:26   ` Christophe Leroy
  2023-03-11  8:02   ` Christophe Leroy
  2023-03-09 18:02 ` [PATCH v2 2/4] powerpc/bpf: implement bpf_arch_text_copy Hari Bathini
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 15+ messages in thread
From: Hari Bathini @ 2023-03-09 18:02 UTC (permalink / raw)
  To: linuxppc-dev, bpf
  Cc: Song Liu, Daniel Borkmann, Alexei Starovoitov, Andrii Nakryiko,
	Naveen N. Rao

patch_instruction() entails setting up pte, patching the instruction,
clearing the pte and flushing the tlb. If multiple instructions need
to be patched, every instruction would have to go through the above
drill unnecessarily. Instead, introduce function patch_instructions()
that patches multiple instructions at one go while setting up the pte,
clearing the pte and flushing the tlb only once per page range of
instructions. Observed ~5X improvement in speed of execution using
patch_instructions() over patch_instructions(), when more instructions
are to be patched.

Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
---
 arch/powerpc/include/asm/code-patching.h |   1 +
 arch/powerpc/lib/code-patching.c         | 151 ++++++++++++++++-------
 2 files changed, 106 insertions(+), 46 deletions(-)

diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
index 3f881548fb61..059fc4fe700e 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -74,6 +74,7 @@ int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
 int patch_branch(u32 *addr, unsigned long target, int flags);
 int patch_instruction(u32 *addr, ppc_inst_t instr);
 int raw_patch_instruction(u32 *addr, ppc_inst_t instr);
+int patch_instructions(u32 *addr, u32 *code, bool fill_inst, size_t len);
 
 static inline unsigned long patch_site_addr(s32 *site)
 {
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index b00112d7ad46..33857b9b53de 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -278,77 +278,117 @@ static void unmap_patch_area(unsigned long addr)
 	flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
 }
 
-static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
+static int __do_patch_instructions_mm(u32 *addr, u32 *code, bool fill_inst, size_t len)
 {
-	int err;
-	u32 *patch_addr;
-	unsigned long text_poke_addr;
-	pte_t *pte;
-	unsigned long pfn = get_patch_pfn(addr);
-	struct mm_struct *patching_mm;
-	struct mm_struct *orig_mm;
+	struct mm_struct *patching_mm, *orig_mm;
+	unsigned long text_poke_addr, pfn;
+	u32 *patch_addr, *end, *pend;
+	ppc_inst_t instr;
 	spinlock_t *ptl;
+	int ilen, err;
+	pte_t *pte;
 
 	patching_mm = __this_cpu_read(cpu_patching_context.mm);
 	text_poke_addr = __this_cpu_read(cpu_patching_context.addr);
-	patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
 
 	pte = get_locked_pte(patching_mm, text_poke_addr, &ptl);
 	if (!pte)
 		return -ENOMEM;
 
-	__set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
+	end = (void *)addr + len;
+	do {
+		pfn = get_patch_pfn(addr);
+		__set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
 
-	/* order PTE update before use, also serves as the hwsync */
-	asm volatile("ptesync": : :"memory");
-
-	/* order context switch after arbitrary prior code */
-	isync();
-
-	orig_mm = start_using_temp_mm(patching_mm);
-
-	err = __patch_instruction(addr, instr, patch_addr);
+		/* order PTE update before use, also serves as the hwsync */
+		asm volatile("ptesync": : :"memory");
 
-	/* hwsync performed by __patch_instruction (sync) if successful */
-	if (err)
-		mb();  /* sync */
+		/* order context switch after arbitrary prior code */
+		isync();
+
+		orig_mm = start_using_temp_mm(patching_mm);
+
+		patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
+		pend = (void *)addr + PAGE_SIZE - offset_in_page(addr);
+		if (end < pend)
+			pend = end;
+
+		while (addr < pend) {
+			instr = ppc_inst_read(code);
+			ilen = ppc_inst_len(instr);
+			err = __patch_instruction(addr, instr, patch_addr);
+			/* hwsync performed by __patch_instruction (sync) if successful */
+			if (err) {
+				mb();  /* sync */
+				break;
+			}
+
+			patch_addr = (void *)patch_addr + ilen;
+			addr = (void *)addr + ilen;
+			if (!fill_inst)
+				code = (void *)code + ilen;
+		}
 
-	/* context synchronisation performed by __patch_instruction (isync or exception) */
-	stop_using_temp_mm(patching_mm, orig_mm);
+		/* context synchronisation performed by __patch_instruction (isync or exception) */
+		stop_using_temp_mm(patching_mm, orig_mm);
 
-	pte_clear(patching_mm, text_poke_addr, pte);
-	/*
-	 * ptesync to order PTE update before TLB invalidation done
-	 * by radix__local_flush_tlb_page_psize (in _tlbiel_va)
-	 */
-	local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize);
+		pte_clear(patching_mm, text_poke_addr, pte);
+		/*
+		 * ptesync to order PTE update before TLB invalidation done
+		 * by radix__local_flush_tlb_page_psize (in _tlbiel_va)
+		 */
+		local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize);
+		if (err)
+			break;
+	} while (addr < end);
 
 	pte_unmap_unlock(pte, ptl);
 
 	return err;
 }
 
-static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
+static int __do_patch_instructions(u32 *addr, u32 *code, bool fill_inst, size_t len)
 {
-	int err;
-	u32 *patch_addr;
-	unsigned long text_poke_addr;
+	unsigned long text_poke_addr, pfn;
+	u32 *patch_addr, *end, *pend;
+	ppc_inst_t instr;
+	int ilen, err;
 	pte_t *pte;
-	unsigned long pfn = get_patch_pfn(addr);
 
 	text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK;
-	patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
-
 	pte = __this_cpu_read(cpu_patching_context.pte);
-	__set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
-	/* See ptesync comment in radix__set_pte_at() */
-	if (radix_enabled())
-		asm volatile("ptesync": : :"memory");
 
-	err = __patch_instruction(addr, instr, patch_addr);
+	end = (void *)addr + len;
+	do {
+		pfn = get_patch_pfn(addr);
+		__set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
+		/* See ptesync comment in radix__set_pte_at() */
+		if (radix_enabled())
+			asm volatile("ptesync": : :"memory");
+
+		patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
+		pend = (void *)addr + PAGE_SIZE - offset_in_page(addr);
+		if (end < pend)
+			pend = end;
+
+		while (addr < pend) {
+			instr = ppc_inst_read(code);
+			ilen = ppc_inst_len(instr);
+			err = __patch_instruction(addr, instr, patch_addr);
+			if (err)
+				break;
+
+			patch_addr = (void *)patch_addr + ilen;
+			addr = (void *)addr + ilen;
+			if (!fill_inst)
+				code = (void *)code + ilen;
+		}
 
-	pte_clear(&init_mm, text_poke_addr, pte);
-	flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
+		pte_clear(&init_mm, text_poke_addr, pte);
+		flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
+		if (err)
+			break;
+	} while (addr < end);
 
 	return err;
 }
@@ -369,15 +409,34 @@ int patch_instruction(u32 *addr, ppc_inst_t instr)
 
 	local_irq_save(flags);
 	if (mm_patch_enabled())
-		err = __do_patch_instruction_mm(addr, instr);
+		err = __do_patch_instructions_mm(addr, (u32 *)&instr, false, ppc_inst_len(instr));
 	else
-		err = __do_patch_instruction(addr, instr);
+		err = __do_patch_instructions(addr, (u32 *)&instr, false, ppc_inst_len(instr));
 	local_irq_restore(flags);
 
 	return err;
 }
 NOKPROBE_SYMBOL(patch_instruction);
 
+/*
+ * Patch 'addr' with 'len' bytes of instructions from 'code'.
+ */
+int patch_instructions(u32 *addr, u32 *code, bool fill_inst, size_t len)
+{
+	unsigned long flags;
+	int err;
+
+	local_irq_save(flags);
+	if (mm_patch_enabled())
+		err = __do_patch_instructions_mm(addr, code, fill_inst, len);
+	else
+		err = __do_patch_instructions(addr, code, fill_inst, len);
+	local_irq_restore(flags);
+
+	return err;
+}
+NOKPROBE_SYMBOL(patch_instructions);
+
 int patch_branch(u32 *addr, unsigned long target, int flags)
 {
 	ppc_inst_t instr;
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v2 2/4] powerpc/bpf: implement bpf_arch_text_copy
  2023-03-09 18:02 [PATCH v2 0/4] enable bpf_prog_pack allocator for powerpc Hari Bathini
  2023-03-09 18:02 ` [PATCH v2 1/4] powerpc/code-patching: introduce patch_instructions() Hari Bathini
@ 2023-03-09 18:02 ` Hari Bathini
  2023-03-10 22:04   ` Song Liu
  2023-03-09 18:02 ` [PATCH v2 3/4] powerpc/bpf: implement bpf_arch_text_invalidate for bpf_prog_pack Hari Bathini
  2023-03-09 18:02 ` [PATCH v2 4/4] powerpc/bpf: use bpf_jit_binary_pack_[alloc|finalize|free] Hari Bathini
  3 siblings, 1 reply; 15+ messages in thread
From: Hari Bathini @ 2023-03-09 18:02 UTC (permalink / raw)
  To: linuxppc-dev, bpf
  Cc: Song Liu, Daniel Borkmann, Alexei Starovoitov, Andrii Nakryiko,
	Naveen N. Rao

bpf_arch_text_copy is used to dump JITed binary to RX page, allowing
multiple BPF programs to share the same page. Use the newly introduced
patch_instructions() to implement it. Around 5X improvement in speed
of execution observed, using the new patch_instructions() function
over patch_instruction(), while running the tests from test_bpf.ko.

Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
---
 arch/powerpc/net/bpf_jit_comp.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index e93aefcfb83f..0a70319116d1 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -13,9 +13,12 @@
 #include <linux/netdevice.h>
 #include <linux/filter.h>
 #include <linux/if_vlan.h>
-#include <asm/kprobes.h>
+#include <linux/memory.h>
 #include <linux/bpf.h>
 
+#include <asm/kprobes.h>
+#include <asm/code-patching.h>
+
 #include "bpf_jit.h"
 
 static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
@@ -272,3 +275,21 @@ int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct code
 	ctx->exentry_idx++;
 	return 0;
 }
+
+void *bpf_arch_text_copy(void *dst, void *src, size_t len)
+{
+	void *ret = ERR_PTR(-EINVAL);
+	int err;
+
+	if (WARN_ON_ONCE(core_kernel_text((unsigned long)dst)))
+		return ret;
+
+	ret = dst;
+	mutex_lock(&text_mutex);
+	err = patch_instructions(dst, src, false, len);
+	if (err)
+		ret = ERR_PTR(err);
+	mutex_unlock(&text_mutex);
+
+	return ret;
+}
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v2 3/4] powerpc/bpf: implement bpf_arch_text_invalidate for bpf_prog_pack
  2023-03-09 18:02 [PATCH v2 0/4] enable bpf_prog_pack allocator for powerpc Hari Bathini
  2023-03-09 18:02 ` [PATCH v2 1/4] powerpc/code-patching: introduce patch_instructions() Hari Bathini
  2023-03-09 18:02 ` [PATCH v2 2/4] powerpc/bpf: implement bpf_arch_text_copy Hari Bathini
@ 2023-03-09 18:02 ` Hari Bathini
  2023-03-10 22:06   ` Song Liu
  2023-03-09 18:02 ` [PATCH v2 4/4] powerpc/bpf: use bpf_jit_binary_pack_[alloc|finalize|free] Hari Bathini
  3 siblings, 1 reply; 15+ messages in thread
From: Hari Bathini @ 2023-03-09 18:02 UTC (permalink / raw)
  To: linuxppc-dev, bpf
  Cc: Song Liu, Daniel Borkmann, Alexei Starovoitov, Andrii Nakryiko,
	Naveen N. Rao

Implement bpf_arch_text_invalidate and use it to fill unused part of
the bpf_prog_pack with trap instructions when a BPF program is freed.

Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
---
 arch/powerpc/net/bpf_jit_comp.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 0a70319116d1..d1794d9f0154 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -293,3 +293,18 @@ void *bpf_arch_text_copy(void *dst, void *src, size_t len)
 
 	return ret;
 }
+
+int bpf_arch_text_invalidate(void *dst, size_t len)
+{
+	u32 inst = BREAKPOINT_INSTRUCTION;
+	int ret = -EINVAL;
+
+	if (WARN_ON_ONCE(core_kernel_text((unsigned long)dst)))
+		return ret;
+
+	mutex_lock(&text_mutex);
+	ret = patch_instructions(dst, &inst, true, len);
+	mutex_unlock(&text_mutex);
+
+	return ret;
+}
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v2 4/4] powerpc/bpf: use bpf_jit_binary_pack_[alloc|finalize|free]
  2023-03-09 18:02 [PATCH v2 0/4] enable bpf_prog_pack allocator for powerpc Hari Bathini
                   ` (2 preceding siblings ...)
  2023-03-09 18:02 ` [PATCH v2 3/4] powerpc/bpf: implement bpf_arch_text_invalidate for bpf_prog_pack Hari Bathini
@ 2023-03-09 18:02 ` Hari Bathini
  2023-03-10 22:35   ` Song Liu
  2023-03-11 10:16   ` Christophe Leroy
  3 siblings, 2 replies; 15+ messages in thread
From: Hari Bathini @ 2023-03-09 18:02 UTC (permalink / raw)
  To: linuxppc-dev, bpf
  Cc: Song Liu, Daniel Borkmann, Alexei Starovoitov, Andrii Nakryiko,
	Naveen N. Rao

Use bpf_jit_binary_pack_alloc in powerpc jit. The jit engine first
writes the program to the rw buffer. When the jit is done, the program
is copied to the final location with bpf_jit_binary_pack_finalize.
With multiple jit_subprogs, bpf_jit_free is called on some subprograms
that haven't got bpf_jit_binary_pack_finalize() yet. Implement custom
bpf_jit_free() like in commit 1d5f82d9dd47 ("bpf, x86: fix freeing of
not-finalized bpf_prog_pack") to call bpf_jit_binary_pack_finalize(),
if necessary. While here, correct the misnomer powerpc64_jit_data to
powerpc_jit_data as it is meant for both ppc32 and ppc64.

Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
---
 arch/powerpc/net/bpf_jit.h        |   7 +-
 arch/powerpc/net/bpf_jit_comp.c   | 104 +++++++++++++++++++++---------
 arch/powerpc/net/bpf_jit_comp32.c |   4 +-
 arch/powerpc/net/bpf_jit_comp64.c |   6 +-
 4 files changed, 83 insertions(+), 38 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index d767e39d5645..a8b7480c4d43 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -168,15 +168,16 @@ static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
 
 void bpf_jit_init_reg_mapping(struct codegen_context *ctx);
 int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func);
-int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
+int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx,
 		       u32 *addrs, int pass, bool extra_pass);
 void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
 void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
 void bpf_jit_realloc_regs(struct codegen_context *ctx);
 int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr);
 
-int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx,
-			  int insn_idx, int jmp_off, int dst_reg);
+int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass,
+			  struct codegen_context *ctx, int insn_idx,
+			  int jmp_off, int dst_reg);
 
 #endif
 
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index d1794d9f0154..ece75c829499 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -42,10 +42,11 @@ int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg,
 	return 0;
 }
 
-struct powerpc64_jit_data {
-	struct bpf_binary_header *header;
+struct powerpc_jit_data {
+	struct bpf_binary_header *hdr;
+	struct bpf_binary_header *fhdr;
 	u32 *addrs;
-	u8 *image;
+	u8 *fimage;
 	u32 proglen;
 	struct codegen_context ctx;
 };
@@ -62,15 +63,18 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	u8 *image = NULL;
 	u32 *code_base;
 	u32 *addrs;
-	struct powerpc64_jit_data *jit_data;
+	struct powerpc_jit_data *jit_data;
 	struct codegen_context cgctx;
 	int pass;
 	int flen;
-	struct bpf_binary_header *bpf_hdr;
+	struct bpf_binary_header *fhdr = NULL;
+	struct bpf_binary_header *hdr = NULL;
 	struct bpf_prog *org_fp = fp;
 	struct bpf_prog *tmp_fp;
 	bool bpf_blinded = false;
 	bool extra_pass = false;
+	u8 *fimage = NULL;
+	u32 *fcode_base;
 	u32 extable_len;
 	u32 fixup_len;
 
@@ -100,9 +104,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	addrs = jit_data->addrs;
 	if (addrs) {
 		cgctx = jit_data->ctx;
-		image = jit_data->image;
-		bpf_hdr = jit_data->header;
+		fimage = jit_data->fimage;
+		fhdr = jit_data->fhdr;
 		proglen = jit_data->proglen;
+		hdr = jit_data->hdr;
+		image = (void *)hdr + ((void *)fimage - (void *)fhdr);
 		extra_pass = true;
 		goto skip_init_ctx;
 	}
@@ -120,7 +126,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
 
 	/* Scouting faux-generate pass 0 */
-	if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0, false)) {
+	if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
 		/* We hit something illegal or unsupported. */
 		fp = org_fp;
 		goto out_addrs;
@@ -135,7 +141,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	 */
 	if (cgctx.seen & SEEN_TAILCALL || !is_offset_in_branch_range((long)cgctx.idx * 4)) {
 		cgctx.idx = 0;
-		if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0, false)) {
+		if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
 			fp = org_fp;
 			goto out_addrs;
 		}
@@ -157,17 +163,19 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	proglen = cgctx.idx * 4;
 	alloclen = proglen + FUNCTION_DESCR_SIZE + fixup_len + extable_len;
 
-	bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, bpf_jit_fill_ill_insns);
-	if (!bpf_hdr) {
+	fhdr = bpf_jit_binary_pack_alloc(alloclen, &fimage, 4, &hdr, &image,
+					      bpf_jit_fill_ill_insns);
+	if (!fhdr) {
 		fp = org_fp;
 		goto out_addrs;
 	}
 
 	if (extable_len)
-		fp->aux->extable = (void *)image + FUNCTION_DESCR_SIZE + proglen + fixup_len;
+		fp->aux->extable = (void *)fimage + FUNCTION_DESCR_SIZE + proglen + fixup_len;
 
 skip_init_ctx:
 	code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
+	fcode_base = (u32 *)(fimage + FUNCTION_DESCR_SIZE);
 
 	/* Code generation passes 1-2 */
 	for (pass = 1; pass < 3; pass++) {
@@ -175,8 +183,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 		cgctx.idx = 0;
 		cgctx.alt_exit_addr = 0;
 		bpf_jit_build_prologue(code_base, &cgctx);
-		if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, pass, extra_pass)) {
-			bpf_jit_binary_free(bpf_hdr);
+		if (bpf_jit_build_body(fp, code_base, fcode_base, &cgctx, addrs, pass, extra_pass)) {
+			bpf_arch_text_copy(&fhdr->size, &hdr->size, sizeof(hdr->size));
+			bpf_jit_binary_pack_free(fhdr, hdr);
 			fp = org_fp;
 			goto out_addrs;
 		}
@@ -192,21 +201,23 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 		 * Note that we output the base address of the code_base
 		 * rather than image, since opcodes are in code_base.
 		 */
-		bpf_jit_dump(flen, proglen, pass, code_base);
+		bpf_jit_dump(flen, proglen, pass, fcode_base);
 
 #ifdef CONFIG_PPC64_ELF_ABI_V1
 	/* Function descriptor nastiness: Address + TOC */
-	((u64 *)image)[0] = (u64)code_base;
+	((u64 *)image)[0] = (u64)fcode_base;
 	((u64 *)image)[1] = local_paca->kernel_toc;
 #endif
 
-	fp->bpf_func = (void *)image;
+	fp->bpf_func = (void *)fimage;
 	fp->jited = 1;
 	fp->jited_len = proglen + FUNCTION_DESCR_SIZE;
 
-	bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + bpf_hdr->size);
 	if (!fp->is_func || extra_pass) {
-		bpf_jit_binary_lock_ro(bpf_hdr);
+		if (bpf_jit_binary_pack_finalize(fp, fhdr, hdr)) {
+			fp = org_fp;
+			goto out_addrs;
+		}
 		bpf_prog_fill_jited_linfo(fp, addrs);
 out_addrs:
 		kfree(addrs);
@@ -216,8 +227,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 		jit_data->addrs = addrs;
 		jit_data->ctx = cgctx;
 		jit_data->proglen = proglen;
-		jit_data->image = image;
-		jit_data->header = bpf_hdr;
+		jit_data->fimage = fimage;
+		jit_data->fhdr = fhdr;
+		jit_data->hdr = hdr;
 	}
 
 out:
@@ -231,12 +243,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
  * The caller should check for (BPF_MODE(code) == BPF_PROBE_MEM) before calling
  * this function, as this only applies to BPF_PROBE_MEM, for now.
  */
-int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx,
-			  int insn_idx, int jmp_off, int dst_reg)
+int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass,
+			  struct codegen_context *ctx, int insn_idx, int jmp_off,
+			  int dst_reg)
 {
 	off_t offset;
 	unsigned long pc;
-	struct exception_table_entry *ex;
+	struct exception_table_entry *ex, *ex_entry;
 	u32 *fixup;
 
 	/* Populate extable entries only in the last pass */
@@ -247,9 +260,16 @@ int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct code
 	    WARN_ON_ONCE(ctx->exentry_idx >= fp->aux->num_exentries))
 		return -EINVAL;
 
+	/*
+	 * Program is firt written to image before copying to the
+	 * final location (fimage). Accordingly, update in the image first.
+	 * As all offsets used are relative, copying as is to the
+	 * final location should be alright.
+	 */
 	pc = (unsigned long)&image[insn_idx];
+	ex = (void *)fp->aux->extable - (void *)fimage + (void *)image;
 
-	fixup = (void *)fp->aux->extable -
+	fixup = (void *)ex -
 		(fp->aux->num_exentries * BPF_FIXUP_LEN * 4) +
 		(ctx->exentry_idx * BPF_FIXUP_LEN * 4);
 
@@ -260,17 +280,17 @@ int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct code
 	fixup[BPF_FIXUP_LEN - 1] =
 		PPC_RAW_BRANCH((long)(pc + jmp_off) - (long)&fixup[BPF_FIXUP_LEN - 1]);
 
-	ex = &fp->aux->extable[ctx->exentry_idx];
+	ex_entry = &ex[ctx->exentry_idx];
 
-	offset = pc - (long)&ex->insn;
+	offset = pc - (long)&ex_entry->insn;
 	if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
 		return -ERANGE;
-	ex->insn = offset;
+	ex_entry->insn = offset;
 
-	offset = (long)fixup - (long)&ex->fixup;
+	offset = (long)fixup - (long)&ex_entry->fixup;
 	if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
 		return -ERANGE;
-	ex->fixup = offset;
+	ex_entry->fixup = offset;
 
 	ctx->exentry_idx++;
 	return 0;
@@ -308,3 +328,27 @@ int bpf_arch_text_invalidate(void *dst, size_t len)
 
 	return ret;
 }
+
+void bpf_jit_free(struct bpf_prog *fp)
+{
+	if (fp->jited) {
+		struct powerpc_jit_data *jit_data = fp->aux->jit_data;
+		struct bpf_binary_header *hdr;
+
+		/*
+		 * If we fail the final pass of JIT (from jit_subprogs),
+		 * the program may not be finalized yet. Call finalize here
+		 * before freeing it.
+		 */
+		if (jit_data) {
+			bpf_jit_binary_pack_finalize(fp, jit_data->fhdr, jit_data->hdr);
+			kvfree(jit_data->addrs);
+			kfree(jit_data);
+		}
+		hdr = bpf_jit_binary_pack_hdr(fp);
+		bpf_jit_binary_pack_free(hdr, NULL);
+		WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
+	}
+
+	bpf_prog_unlock_free(fp);
+}
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index 7f91ea064c08..fb2761b54d64 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -278,7 +278,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o
 }
 
 /* Assemble the body code between the prologue & epilogue */
-int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
+int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx,
 		       u32 *addrs, int pass, bool extra_pass)
 {
 	const struct bpf_insn *insn = fp->insnsi;
@@ -997,7 +997,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 					jmp_off += 4;
 				}
 
-				ret = bpf_add_extable_entry(fp, image, pass, ctx, insn_idx,
+				ret = bpf_add_extable_entry(fp, image, fimage, pass, ctx, insn_idx,
 							    jmp_off, dst_reg);
 				if (ret)
 					return ret;
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 8dd3cabaa83a..37a8970a7065 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -343,7 +343,7 @@ asm (
 );
 
 /* Assemble the body code between the prologue & epilogue */
-int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
+int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx,
 		       u32 *addrs, int pass, bool extra_pass)
 {
 	enum stf_barrier_type stf_barrier = stf_barrier_type_get();
@@ -922,8 +922,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
 				addrs[++i] = ctx->idx * 4;
 
 			if (BPF_MODE(code) == BPF_PROBE_MEM) {
-				ret = bpf_add_extable_entry(fp, image, pass, ctx, ctx->idx - 1,
-							    4, dst_reg);
+				ret = bpf_add_extable_entry(fp, image, fimage, pass, ctx,
+							    ctx->idx - 1, 4, dst_reg);
 				if (ret)
 					return ret;
 			}
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/4] powerpc/code-patching: introduce patch_instructions()
  2023-03-09 18:02 ` [PATCH v2 1/4] powerpc/code-patching: introduce patch_instructions() Hari Bathini
@ 2023-03-10 18:26   ` Christophe Leroy
  2023-03-11  7:42     ` Christophe Leroy
  2023-03-11  8:02   ` Christophe Leroy
  1 sibling, 1 reply; 15+ messages in thread
From: Christophe Leroy @ 2023-03-10 18:26 UTC (permalink / raw)
  To: Hari Bathini, linuxppc-dev, bpf@vger.kernel.org
  Cc: Song Liu, Daniel Borkmann, Alexei Starovoitov, Andrii Nakryiko,
	Naveen N. Rao



Le 09/03/2023 à 19:02, Hari Bathini a écrit :
> patch_instruction() entails setting up pte, patching the instruction,
> clearing the pte and flushing the tlb. If multiple instructions need
> to be patched, every instruction would have to go through the above
> drill unnecessarily. Instead, introduce function patch_instructions()
> that patches multiple instructions at one go while setting up the pte,
> clearing the pte and flushing the tlb only once per page range of
> instructions. Observed ~5X improvement in speed of execution using
> patch_instructions() over patch_instructions(), when more instructions
> are to be patched.

I get a 13% degradation on the time needed to activate ftrace on a 
powerpc 8xx.

Before your patch, activation ftrace takes 550k timebase ticks. After 
your patch it takes 620k timebase ticks.

Christophe

> 
> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
> ---
>   arch/powerpc/include/asm/code-patching.h |   1 +
>   arch/powerpc/lib/code-patching.c         | 151 ++++++++++++++++-------
>   2 files changed, 106 insertions(+), 46 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
> index 3f881548fb61..059fc4fe700e 100644
> --- a/arch/powerpc/include/asm/code-patching.h
> +++ b/arch/powerpc/include/asm/code-patching.h
> @@ -74,6 +74,7 @@ int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
>   int patch_branch(u32 *addr, unsigned long target, int flags);
>   int patch_instruction(u32 *addr, ppc_inst_t instr);
>   int raw_patch_instruction(u32 *addr, ppc_inst_t instr);
> +int patch_instructions(u32 *addr, u32 *code, bool fill_inst, size_t len);
>   
>   static inline unsigned long patch_site_addr(s32 *site)
>   {
> diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
> index b00112d7ad46..33857b9b53de 100644
> --- a/arch/powerpc/lib/code-patching.c
> +++ b/arch/powerpc/lib/code-patching.c
> @@ -278,77 +278,117 @@ static void unmap_patch_area(unsigned long addr)
>   	flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
>   }
>   
> -static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
> +static int __do_patch_instructions_mm(u32 *addr, u32 *code, bool fill_inst, size_t len)
>   {
> -	int err;
> -	u32 *patch_addr;
> -	unsigned long text_poke_addr;
> -	pte_t *pte;
> -	unsigned long pfn = get_patch_pfn(addr);
> -	struct mm_struct *patching_mm;
> -	struct mm_struct *orig_mm;
> +	struct mm_struct *patching_mm, *orig_mm;
> +	unsigned long text_poke_addr, pfn;
> +	u32 *patch_addr, *end, *pend;
> +	ppc_inst_t instr;
>   	spinlock_t *ptl;
> +	int ilen, err;
> +	pte_t *pte;
>   
>   	patching_mm = __this_cpu_read(cpu_patching_context.mm);
>   	text_poke_addr = __this_cpu_read(cpu_patching_context.addr);
> -	patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
>   
>   	pte = get_locked_pte(patching_mm, text_poke_addr, &ptl);
>   	if (!pte)
>   		return -ENOMEM;
>   
> -	__set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
> +	end = (void *)addr + len;
> +	do {
> +		pfn = get_patch_pfn(addr);
> +		__set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
>   
> -	/* order PTE update before use, also serves as the hwsync */
> -	asm volatile("ptesync": : :"memory");
> -
> -	/* order context switch after arbitrary prior code */
> -	isync();
> -
> -	orig_mm = start_using_temp_mm(patching_mm);
> -
> -	err = __patch_instruction(addr, instr, patch_addr);
> +		/* order PTE update before use, also serves as the hwsync */
> +		asm volatile("ptesync": : :"memory");
>   
> -	/* hwsync performed by __patch_instruction (sync) if successful */
> -	if (err)
> -		mb();  /* sync */
> +		/* order context switch after arbitrary prior code */
> +		isync();
> +
> +		orig_mm = start_using_temp_mm(patching_mm);
> +
> +		patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
> +		pend = (void *)addr + PAGE_SIZE - offset_in_page(addr);
> +		if (end < pend)
> +			pend = end;
> +
> +		while (addr < pend) {
> +			instr = ppc_inst_read(code);
> +			ilen = ppc_inst_len(instr);
> +			err = __patch_instruction(addr, instr, patch_addr);
> +			/* hwsync performed by __patch_instruction (sync) if successful */
> +			if (err) {
> +				mb();  /* sync */
> +				break;
> +			}
> +
> +			patch_addr = (void *)patch_addr + ilen;
> +			addr = (void *)addr + ilen;
> +			if (!fill_inst)
> +				code = (void *)code + ilen;
> +		}
>   
> -	/* context synchronisation performed by __patch_instruction (isync or exception) */
> -	stop_using_temp_mm(patching_mm, orig_mm);
> +		/* context synchronisation performed by __patch_instruction (isync or exception) */
> +		stop_using_temp_mm(patching_mm, orig_mm);
>   
> -	pte_clear(patching_mm, text_poke_addr, pte);
> -	/*
> -	 * ptesync to order PTE update before TLB invalidation done
> -	 * by radix__local_flush_tlb_page_psize (in _tlbiel_va)
> -	 */
> -	local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize);
> +		pte_clear(patching_mm, text_poke_addr, pte);
> +		/*
> +		 * ptesync to order PTE update before TLB invalidation done
> +		 * by radix__local_flush_tlb_page_psize (in _tlbiel_va)
> +		 */
> +		local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize);
> +		if (err)
> +			break;
> +	} while (addr < end);
>   
>   	pte_unmap_unlock(pte, ptl);
>   
>   	return err;
>   }
>   
> -static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
> +static int __do_patch_instructions(u32 *addr, u32 *code, bool fill_inst, size_t len)
>   {
> -	int err;
> -	u32 *patch_addr;
> -	unsigned long text_poke_addr;
> +	unsigned long text_poke_addr, pfn;
> +	u32 *patch_addr, *end, *pend;
> +	ppc_inst_t instr;
> +	int ilen, err;
>   	pte_t *pte;
> -	unsigned long pfn = get_patch_pfn(addr);
>   
>   	text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK;
> -	patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
> -
>   	pte = __this_cpu_read(cpu_patching_context.pte);
> -	__set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
> -	/* See ptesync comment in radix__set_pte_at() */
> -	if (radix_enabled())
> -		asm volatile("ptesync": : :"memory");
>   
> -	err = __patch_instruction(addr, instr, patch_addr);
> +	end = (void *)addr + len;
> +	do {
> +		pfn = get_patch_pfn(addr);
> +		__set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
> +		/* See ptesync comment in radix__set_pte_at() */
> +		if (radix_enabled())
> +			asm volatile("ptesync": : :"memory");
> +
> +		patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
> +		pend = (void *)addr + PAGE_SIZE - offset_in_page(addr);
> +		if (end < pend)
> +			pend = end;
> +
> +		while (addr < pend) {
> +			instr = ppc_inst_read(code);
> +			ilen = ppc_inst_len(instr);
> +			err = __patch_instruction(addr, instr, patch_addr);
> +			if (err)
> +				break;
> +
> +			patch_addr = (void *)patch_addr + ilen;
> +			addr = (void *)addr + ilen;
> +			if (!fill_inst)
> +				code = (void *)code + ilen;
> +		}
>   
> -	pte_clear(&init_mm, text_poke_addr, pte);
> -	flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
> +		pte_clear(&init_mm, text_poke_addr, pte);
> +		flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
> +		if (err)
> +			break;
> +	} while (addr < end);
>   
>   	return err;
>   }
> @@ -369,15 +409,34 @@ int patch_instruction(u32 *addr, ppc_inst_t instr)
>   
>   	local_irq_save(flags);
>   	if (mm_patch_enabled())
> -		err = __do_patch_instruction_mm(addr, instr);
> +		err = __do_patch_instructions_mm(addr, (u32 *)&instr, false, ppc_inst_len(instr));
>   	else
> -		err = __do_patch_instruction(addr, instr);
> +		err = __do_patch_instructions(addr, (u32 *)&instr, false, ppc_inst_len(instr));
>   	local_irq_restore(flags);
>   
>   	return err;
>   }
>   NOKPROBE_SYMBOL(patch_instruction);
>   
> +/*
> + * Patch 'addr' with 'len' bytes of instructions from 'code'.
> + */
> +int patch_instructions(u32 *addr, u32 *code, bool fill_inst, size_t len)
> +{
> +	unsigned long flags;
> +	int err;
> +
> +	local_irq_save(flags);
> +	if (mm_patch_enabled())
> +		err = __do_patch_instructions_mm(addr, code, fill_inst, len);
> +	else
> +		err = __do_patch_instructions(addr, code, fill_inst, len);
> +	local_irq_restore(flags);
> +
> +	return err;
> +}
> +NOKPROBE_SYMBOL(patch_instructions);
> +
>   int patch_branch(u32 *addr, unsigned long target, int flags)
>   {
>   	ppc_inst_t instr;

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 2/4] powerpc/bpf: implement bpf_arch_text_copy
  2023-03-09 18:02 ` [PATCH v2 2/4] powerpc/bpf: implement bpf_arch_text_copy Hari Bathini
@ 2023-03-10 22:04   ` Song Liu
  0 siblings, 0 replies; 15+ messages in thread
From: Song Liu @ 2023-03-10 22:04 UTC (permalink / raw)
  To: Hari Bathini
  Cc: Song Liu, Daniel Borkmann, Alexei Starovoitov, Andrii Nakryiko,
	Naveen N. Rao, bpf, linuxppc-dev

On Thu, Mar 9, 2023 at 10:02 AM Hari Bathini <hbathini@linux.ibm.com> wrote:
>
> bpf_arch_text_copy is used to dump JITed binary to RX page, allowing
> multiple BPF programs to share the same page. Use the newly introduced
> patch_instructions() to implement it. Around 5X improvement in speed
> of execution observed, using the new patch_instructions() function
> over patch_instruction(), while running the tests from test_bpf.ko.
>
> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
> ---
>  arch/powerpc/net/bpf_jit_comp.c | 23 ++++++++++++++++++++++-
>  1 file changed, 22 insertions(+), 1 deletion(-)
>
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index e93aefcfb83f..0a70319116d1 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -13,9 +13,12 @@
>  #include <linux/netdevice.h>
>  #include <linux/filter.h>
>  #include <linux/if_vlan.h>
> -#include <asm/kprobes.h>
> +#include <linux/memory.h>
>  #include <linux/bpf.h>
>
> +#include <asm/kprobes.h>
> +#include <asm/code-patching.h>
> +
>  #include "bpf_jit.h"
>
>  static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
> @@ -272,3 +275,21 @@ int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct code
>         ctx->exentry_idx++;
>         return 0;
>  }
> +
> +void *bpf_arch_text_copy(void *dst, void *src, size_t len)
> +{
> +       void *ret = ERR_PTR(-EINVAL);
> +       int err;
> +
> +       if (WARN_ON_ONCE(core_kernel_text((unsigned long)dst)))
> +               return ret;
> +
> +       ret = dst;
> +       mutex_lock(&text_mutex);
> +       err = patch_instructions(dst, src, false, len);
> +       if (err)
> +               ret = ERR_PTR(err);
> +       mutex_unlock(&text_mutex);
> +
> +       return ret;
> +}

It seems we don't really need "ret". How about something like:

+void *bpf_arch_text_copy(void *dst, void *src, size_t len)
+{
+       int err;
+
+       if (WARN_ON_ONCE(core_kernel_text((unsigned long)dst)))
+               return ERR_PTR(-EINVAL);
+
+       mutex_lock(&text_mutex);
+       err = patch_instructions(dst, src, false, len);
+       mutex_unlock(&text_mutex);
+
+       return err ? ERR_PTR(err) : dst;
+}

Song

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 3/4] powerpc/bpf: implement bpf_arch_text_invalidate for bpf_prog_pack
  2023-03-09 18:02 ` [PATCH v2 3/4] powerpc/bpf: implement bpf_arch_text_invalidate for bpf_prog_pack Hari Bathini
@ 2023-03-10 22:06   ` Song Liu
  0 siblings, 0 replies; 15+ messages in thread
From: Song Liu @ 2023-03-10 22:06 UTC (permalink / raw)
  To: Hari Bathini
  Cc: Song Liu, Daniel Borkmann, Alexei Starovoitov, Andrii Nakryiko,
	Naveen N. Rao, bpf, linuxppc-dev

On Thu, Mar 9, 2023 at 10:02 AM Hari Bathini <hbathini@linux.ibm.com> wrote:
>
> Implement bpf_arch_text_invalidate and use it to fill unused part of
> the bpf_prog_pack with trap instructions when a BPF program is freed.
>
> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
> ---
>  arch/powerpc/net/bpf_jit_comp.c | 15 +++++++++++++++
>  1 file changed, 15 insertions(+)
>
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index 0a70319116d1..d1794d9f0154 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -293,3 +293,18 @@ void *bpf_arch_text_copy(void *dst, void *src, size_t len)
>
>         return ret;
>  }
> +
> +int bpf_arch_text_invalidate(void *dst, size_t len)
> +{
> +       u32 inst = BREAKPOINT_INSTRUCTION;
> +       int ret = -EINVAL;

No need to set to -EINVAL here.

> +
> +       if (WARN_ON_ONCE(core_kernel_text((unsigned long)dst)))
> +               return ret;

Just return -EINVAL instead.

> +
> +       mutex_lock(&text_mutex);
> +       ret = patch_instructions(dst, &inst, true, len);
> +       mutex_unlock(&text_mutex);
> +
> +       return ret;
> +}
> --
> 2.39.2
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 4/4] powerpc/bpf: use bpf_jit_binary_pack_[alloc|finalize|free]
  2023-03-09 18:02 ` [PATCH v2 4/4] powerpc/bpf: use bpf_jit_binary_pack_[alloc|finalize|free] Hari Bathini
@ 2023-03-10 22:35   ` Song Liu
  2023-08-25 15:40     ` Hari Bathini
  2023-03-11 10:16   ` Christophe Leroy
  1 sibling, 1 reply; 15+ messages in thread
From: Song Liu @ 2023-03-10 22:35 UTC (permalink / raw)
  To: Hari Bathini
  Cc: Song Liu, Daniel Borkmann, Alexei Starovoitov, Andrii Nakryiko,
	Naveen N. Rao, bpf, linuxppc-dev

On Thu, Mar 9, 2023 at 10:03 AM Hari Bathini <hbathini@linux.ibm.com> wrote:
>
> Use bpf_jit_binary_pack_alloc in powerpc jit. The jit engine first
> writes the program to the rw buffer. When the jit is done, the program
> is copied to the final location with bpf_jit_binary_pack_finalize.
> With multiple jit_subprogs, bpf_jit_free is called on some subprograms
> that haven't got bpf_jit_binary_pack_finalize() yet. Implement custom
> bpf_jit_free() like in commit 1d5f82d9dd47 ("bpf, x86: fix freeing of
> not-finalized bpf_prog_pack") to call bpf_jit_binary_pack_finalize(),
> if necessary. While here, correct the misnomer powerpc64_jit_data to
> powerpc_jit_data as it is meant for both ppc32 and ppc64.
>
> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
> ---
>  arch/powerpc/net/bpf_jit.h        |   7 +-
>  arch/powerpc/net/bpf_jit_comp.c   | 104 +++++++++++++++++++++---------
>  arch/powerpc/net/bpf_jit_comp32.c |   4 +-
>  arch/powerpc/net/bpf_jit_comp64.c |   6 +-
>  4 files changed, 83 insertions(+), 38 deletions(-)
>
> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
> index d767e39d5645..a8b7480c4d43 100644
> --- a/arch/powerpc/net/bpf_jit.h
> +++ b/arch/powerpc/net/bpf_jit.h
> @@ -168,15 +168,16 @@ static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
>
>  void bpf_jit_init_reg_mapping(struct codegen_context *ctx);
>  int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func);
> -int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
> +int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx,
>                        u32 *addrs, int pass, bool extra_pass);
>  void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
>  void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
>  void bpf_jit_realloc_regs(struct codegen_context *ctx);
>  int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr);
>
> -int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx,
> -                         int insn_idx, int jmp_off, int dst_reg);
> +int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass,
> +                         struct codegen_context *ctx, int insn_idx,
> +                         int jmp_off, int dst_reg);
>
>  #endif
>
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index d1794d9f0154..ece75c829499 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -42,10 +42,11 @@ int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg,
>         return 0;
>  }
>
> -struct powerpc64_jit_data {
> -       struct bpf_binary_header *header;
> +struct powerpc_jit_data {
> +       struct bpf_binary_header *hdr;
> +       struct bpf_binary_header *fhdr;
>         u32 *addrs;
> -       u8 *image;
> +       u8 *fimage;
>         u32 proglen;
>         struct codegen_context ctx;
>  };

Some comments about the f- prefix will be helpful. (Yes, I should have done
better job adding comments for the x86 counterpart..)

> @@ -62,15 +63,18 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>         u8 *image = NULL;
>         u32 *code_base;
>         u32 *addrs;
> -       struct powerpc64_jit_data *jit_data;
> +       struct powerpc_jit_data *jit_data;
>         struct codegen_context cgctx;
>         int pass;
>         int flen;
> -       struct bpf_binary_header *bpf_hdr;
> +       struct bpf_binary_header *fhdr = NULL;
> +       struct bpf_binary_header *hdr = NULL;
>         struct bpf_prog *org_fp = fp;
>         struct bpf_prog *tmp_fp;
>         bool bpf_blinded = false;
>         bool extra_pass = false;
> +       u8 *fimage = NULL;
> +       u32 *fcode_base;
>         u32 extable_len;
>         u32 fixup_len;
>
> @@ -100,9 +104,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>         addrs = jit_data->addrs;
>         if (addrs) {
>                 cgctx = jit_data->ctx;
> -               image = jit_data->image;
> -               bpf_hdr = jit_data->header;
> +               fimage = jit_data->fimage;
> +               fhdr = jit_data->fhdr;
>                 proglen = jit_data->proglen;
> +               hdr = jit_data->hdr;
> +               image = (void *)hdr + ((void *)fimage - (void *)fhdr);
>                 extra_pass = true;
>                 goto skip_init_ctx;
>         }
> @@ -120,7 +126,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>         cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
>
>         /* Scouting faux-generate pass 0 */
> -       if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0, false)) {
> +       if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
>                 /* We hit something illegal or unsupported. */
>                 fp = org_fp;
>                 goto out_addrs;
> @@ -135,7 +141,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>          */
>         if (cgctx.seen & SEEN_TAILCALL || !is_offset_in_branch_range((long)cgctx.idx * 4)) {
>                 cgctx.idx = 0;
> -               if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0, false)) {
> +               if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
>                         fp = org_fp;
>                         goto out_addrs;
>                 }
> @@ -157,17 +163,19 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>         proglen = cgctx.idx * 4;
>         alloclen = proglen + FUNCTION_DESCR_SIZE + fixup_len + extable_len;
>
> -       bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, bpf_jit_fill_ill_insns);
> -       if (!bpf_hdr) {
> +       fhdr = bpf_jit_binary_pack_alloc(alloclen, &fimage, 4, &hdr, &image,
> +                                             bpf_jit_fill_ill_insns);
> +       if (!fhdr) {
>                 fp = org_fp;
>                 goto out_addrs;
>         }
>
>         if (extable_len)
> -               fp->aux->extable = (void *)image + FUNCTION_DESCR_SIZE + proglen + fixup_len;
> +               fp->aux->extable = (void *)fimage + FUNCTION_DESCR_SIZE + proglen + fixup_len;
>
>  skip_init_ctx:
>         code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
> +       fcode_base = (u32 *)(fimage + FUNCTION_DESCR_SIZE);
>
>         /* Code generation passes 1-2 */
>         for (pass = 1; pass < 3; pass++) {
> @@ -175,8 +183,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>                 cgctx.idx = 0;
>                 cgctx.alt_exit_addr = 0;
>                 bpf_jit_build_prologue(code_base, &cgctx);
> -               if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, pass, extra_pass)) {
> -                       bpf_jit_binary_free(bpf_hdr);
> +               if (bpf_jit_build_body(fp, code_base, fcode_base, &cgctx, addrs, pass, extra_pass)) {
> +                       bpf_arch_text_copy(&fhdr->size, &hdr->size, sizeof(hdr->size));
> +                       bpf_jit_binary_pack_free(fhdr, hdr);
>                         fp = org_fp;
>                         goto out_addrs;
>                 }
> @@ -192,21 +201,23 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>                  * Note that we output the base address of the code_base
>                  * rather than image, since opcodes are in code_base.
>                  */
Maybe update the comment above with fcode_base to avoid
confusion.

> -               bpf_jit_dump(flen, proglen, pass, code_base);
> +               bpf_jit_dump(flen, proglen, pass, fcode_base);
>
>  #ifdef CONFIG_PPC64_ELF_ABI_V1
>         /* Function descriptor nastiness: Address + TOC */
> -       ((u64 *)image)[0] = (u64)code_base;
> +       ((u64 *)image)[0] = (u64)fcode_base;
>         ((u64 *)image)[1] = local_paca->kernel_toc;
>  #endif
>
> -       fp->bpf_func = (void *)image;
> +       fp->bpf_func = (void *)fimage;
>         fp->jited = 1;
>         fp->jited_len = proglen + FUNCTION_DESCR_SIZE;
>
> -       bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + bpf_hdr->size);
>         if (!fp->is_func || extra_pass) {
> -               bpf_jit_binary_lock_ro(bpf_hdr);
> +               if (bpf_jit_binary_pack_finalize(fp, fhdr, hdr)) {
> +                       fp = org_fp;
> +                       goto out_addrs;
> +               }
>                 bpf_prog_fill_jited_linfo(fp, addrs);
>  out_addrs:
>                 kfree(addrs);
> @@ -216,8 +227,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>                 jit_data->addrs = addrs;
>                 jit_data->ctx = cgctx;
>                 jit_data->proglen = proglen;
> -               jit_data->image = image;
> -               jit_data->header = bpf_hdr;
> +               jit_data->fimage = fimage;
> +               jit_data->fhdr = fhdr;
> +               jit_data->hdr = hdr;
>         }
>
>  out:
> @@ -231,12 +243,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>   * The caller should check for (BPF_MODE(code) == BPF_PROBE_MEM) before calling
>   * this function, as this only applies to BPF_PROBE_MEM, for now.
>   */
> -int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx,
> -                         int insn_idx, int jmp_off, int dst_reg)
> +int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass,
> +                         struct codegen_context *ctx, int insn_idx, int jmp_off,
> +                         int dst_reg)
>  {
>         off_t offset;
>         unsigned long pc;
> -       struct exception_table_entry *ex;
> +       struct exception_table_entry *ex, *ex_entry;
>         u32 *fixup;
>
>         /* Populate extable entries only in the last pass */
> @@ -247,9 +260,16 @@ int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct code
>             WARN_ON_ONCE(ctx->exentry_idx >= fp->aux->num_exentries))
>                 return -EINVAL;
>
> +       /*
> +        * Program is firt written to image before copying to the
s/firt/first/

> +        * final location (fimage). Accordingly, update in the image first.
> +        * As all offsets used are relative, copying as is to the
> +        * final location should be alright.
> +        */
>         pc = (unsigned long)&image[insn_idx];
> +       ex = (void *)fp->aux->extable - (void *)fimage + (void *)image;
>
> -       fixup = (void *)fp->aux->extable -
> +       fixup = (void *)ex -
>                 (fp->aux->num_exentries * BPF_FIXUP_LEN * 4) +
>                 (ctx->exentry_idx * BPF_FIXUP_LEN * 4);
>
> @@ -260,17 +280,17 @@ int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct code
>         fixup[BPF_FIXUP_LEN - 1] =
>                 PPC_RAW_BRANCH((long)(pc + jmp_off) - (long)&fixup[BPF_FIXUP_LEN - 1]);
>
> -       ex = &fp->aux->extable[ctx->exentry_idx];
> +       ex_entry = &ex[ctx->exentry_idx];
>
> -       offset = pc - (long)&ex->insn;
> +       offset = pc - (long)&ex_entry->insn;
>         if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
>                 return -ERANGE;
> -       ex->insn = offset;
> +       ex_entry->insn = offset;
>
> -       offset = (long)fixup - (long)&ex->fixup;
> +       offset = (long)fixup - (long)&ex_entry->fixup;
>         if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
>                 return -ERANGE;
> -       ex->fixup = offset;
> +       ex_entry->fixup = offset;
>
>         ctx->exentry_idx++;
>         return 0;
> @@ -308,3 +328,27 @@ int bpf_arch_text_invalidate(void *dst, size_t len)
>
>         return ret;
>  }
> +
> +void bpf_jit_free(struct bpf_prog *fp)
> +{
> +       if (fp->jited) {
> +               struct powerpc_jit_data *jit_data = fp->aux->jit_data;
> +               struct bpf_binary_header *hdr;
> +
> +               /*
> +                * If we fail the final pass of JIT (from jit_subprogs),
> +                * the program may not be finalized yet. Call finalize here
> +                * before freeing it.
> +                */
> +               if (jit_data) {
> +                       bpf_jit_binary_pack_finalize(fp, jit_data->fhdr, jit_data->hdr);

I just realized x86 is the same. But I think we only need the following
here?

bpf_arch_text_copy(&jit_data->fhdr->size, &jit_data->hdr->size,
sizeof(jit_data->hdr->size));

Right?

Thanks,
Song

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/4] powerpc/code-patching: introduce patch_instructions()
  2023-03-10 18:26   ` Christophe Leroy
@ 2023-03-11  7:42     ` Christophe Leroy
  0 siblings, 0 replies; 15+ messages in thread
From: Christophe Leroy @ 2023-03-11  7:42 UTC (permalink / raw)
  To: Hari Bathini, linuxppc-dev, bpf@vger.kernel.org
  Cc: Song Liu, Daniel Borkmann, Alexei Starovoitov, Andrii Nakryiko,
	Naveen N. Rao



Le 10/03/2023 à 19:26, Christophe Leroy a écrit :
> 
> 
> Le 09/03/2023 à 19:02, Hari Bathini a écrit :
>> patch_instruction() entails setting up pte, patching the instruction,
>> clearing the pte and flushing the tlb. If multiple instructions need
>> to be patched, every instruction would have to go through the above
>> drill unnecessarily. Instead, introduce function patch_instructions()
>> that patches multiple instructions at one go while setting up the pte,
>> clearing the pte and flushing the tlb only once per page range of
>> instructions. Observed ~5X improvement in speed of execution using
>> patch_instructions() over patch_instructions(), when more instructions
>> are to be patched.
> 
> I get a 13% degradation on the time needed to activate ftrace on a 
> powerpc 8xx.
> 
> Before your patch, activation ftrace takes 550k timebase ticks. After 
> your patch it takes 620k timebase ticks.
> 

More details about the problem:

Before your patch, patch_instruction() is a simple, stackless function 
(Note that the first branch is noped out after startup).

00000254 <patch_instruction>:
  254:	48 00 00 6c 	b       2c0 <patch_instruction+0x6c>
  258:	7c e0 00 a6 	mfmsr   r7
  25c:	7c 51 13 a6 	mtspr   81,r2
  260:	3d 40 00 00 	lis     r10,0
			262: R_PPC_ADDR16_HA	.data
  264:	39 4a 00 00 	addi    r10,r10,0
			266: R_PPC_ADDR16_LO	.data
  268:	7c 69 1b 78 	mr      r9,r3
  26c:	3d 29 40 00 	addis   r9,r9,16384
  270:	81 0a 00 08 	lwz     r8,8(r10)
  274:	55 29 00 26 	rlwinm  r9,r9,0,0,19
  278:	81 4a 00 04 	lwz     r10,4(r10)
  27c:	61 29 01 25 	ori     r9,r9,293
  280:	91 28 00 00 	stw     r9,0(r8)
  284:	55 49 00 26 	rlwinm  r9,r10,0,0,19
  288:	50 6a 05 3e 	rlwimi  r10,r3,0,20,31
  28c:	90 8a 00 00 	stw     r4,0(r10)
  290:	7c 00 50 6c 	dcbst   0,r10
  294:	7c 00 04 ac 	hwsync
  298:	7c 00 1f ac 	icbi    0,r3
  29c:	7c 00 04 ac 	hwsync
  2a0:	4c 00 01 2c 	isync
  2a4:	38 60 00 00 	li      r3,0
  2a8:	39 40 00 00 	li      r10,0
  2ac:	91 48 00 00 	stw     r10,0(r8)
  2b0:	7c 00 4a 64 	tlbie   r9,r0
  2b4:	7c 00 04 ac 	hwsync
  2b8:	7c e0 01 24 	mtmsr   r7
  2bc:	4e 80 00 20 	blr

  2c0:	90 83 00 00 	stw     r4,0(r3)
  2c4:	7c 00 18 6c 	dcbst   0,r3
  2c8:	7c 00 04 ac 	hwsync
  2cc:	7c 00 1f ac 	icbi    0,r3
  2d0:	7c 00 04 ac 	hwsync
  2d4:	4c 00 01 2c 	isync
  2d8:	38 60 00 00 	li      r3,0
  2dc:	4e 80 00 20 	blr
  2e0:	38 60 ff ff 	li      r3,-1
  2e4:	4b ff ff c4 	b       2a8 <patch_instruction+0x54>
  2e8:	38 60 ff ff 	li      r3,-1
  2ec:	4e 80 00 20 	blr


Once your patch is there, patch_instruction() becomes a function that 
has to step up a stack in order to call __do_patch_instructions().
And __do_patch_instructions() is quite a big function.

0000022c <__do_patch_instructions>:
  22c:	3d 20 00 00 	lis     r9,0
			22e: R_PPC_ADDR16_HA	.data
  230:	39 29 00 00 	addi    r9,r9,0
			232: R_PPC_ADDR16_LO	.data
  234:	81 69 00 04 	lwz     r11,4(r9)
  238:	2c 05 00 00 	cmpwi   r5,0
  23c:	81 89 00 08 	lwz     r12,8(r9)
  240:	7c c3 32 14 	add     r6,r3,r6
  244:	55 6b 00 26 	rlwinm  r11,r11,0,0,19
  248:	38 00 00 00 	li      r0,0
  24c:	54 6a 05 3e 	clrlwi  r10,r3,20
  250:	21 0a 10 00 	subfic  r8,r10,4096
  254:	7d 03 42 14 	add     r8,r3,r8
  258:	7c 69 1b 78 	mr      r9,r3
  25c:	7f 88 30 40 	cmplw   cr7,r8,r6
  260:	3d 29 40 00 	addis   r9,r9,16384
  264:	55 29 00 26 	rlwinm  r9,r9,0,0,19
  268:	61 29 01 25 	ori     r9,r9,293
  26c:	91 2c 00 00 	stw     r9,0(r12)
  270:	7d 4a 5b 78 	or      r10,r10,r11
  274:	40 9d 00 08 	ble     cr7,27c <__do_patch_instructions+0x50>
  278:	7c c8 33 78 	mr      r8,r6
  27c:	7f 83 40 40 	cmplw   cr7,r3,r8
  280:	40 9c 01 2c 	bge     cr7,3ac <__do_patch_instructions+0x180>
  284:	7c 69 18 f8 	not     r9,r3
  288:	7d 28 4a 14 	add     r9,r8,r9
  28c:	55 29 f7 fe 	rlwinm  r9,r9,30,31,31
  290:	7c e3 50 50 	subf    r7,r3,r10
  294:	80 a4 00 00 	lwz     r5,0(r4)
  298:	90 aa 00 00 	stw     r5,0(r10)
  29c:	7c 00 50 6c 	dcbst   0,r10
  2a0:	7c 00 04 ac 	hwsync
  2a4:	7c 00 1f ac 	icbi    0,r3
  2a8:	7c 00 04 ac 	hwsync
  2ac:	4c 00 01 2c 	isync
  2b0:	38 63 00 04 	addi    r3,r3,4
  2b4:	40 82 00 08 	bne     2bc <__do_patch_instructions+0x90>
  2b8:	38 84 00 04 	addi    r4,r4,4
  2bc:	7f 83 40 40 	cmplw   cr7,r3,r8
  2c0:	40 9c 00 a4 	bge     cr7,364 <__do_patch_instructions+0x138>
  2c4:	2f 89 00 00 	cmpwi   cr7,r9,0
  2c8:	41 9e 00 38 	beq     cr7,300 <__do_patch_instructions+0xd4>
  2cc:	7d 23 3a 14 	add     r9,r3,r7
  2d0:	81 44 00 00 	lwz     r10,0(r4)
  2d4:	91 49 00 00 	stw     r10,0(r9)
  2d8:	7c 00 48 6c 	dcbst   0,r9
  2dc:	7c 00 04 ac 	hwsync
  2e0:	7c 00 1f ac 	icbi    0,r3
  2e4:	7c 00 04 ac 	hwsync
  2e8:	4c 00 01 2c 	isync
  2ec:	38 63 00 04 	addi    r3,r3,4
  2f0:	40 82 00 08 	bne     2f8 <__do_patch_instructions+0xcc>
  2f4:	38 84 00 04 	addi    r4,r4,4
  2f8:	7f 83 40 40 	cmplw   cr7,r3,r8
  2fc:	40 9c 00 68 	bge     cr7,364 <__do_patch_instructions+0x138>
  300:	7d 23 3a 14 	add     r9,r3,r7
  304:	81 44 00 00 	lwz     r10,0(r4)
  308:	91 49 00 00 	stw     r10,0(r9)
  30c:	7c 00 48 6c 	dcbst   0,r9
  310:	7c 00 04 ac 	hwsync
  314:	7c 00 1f ac 	icbi    0,r3
  318:	7c 00 04 ac 	hwsync
  31c:	4c 00 01 2c 	isync
  320:	38 63 00 04 	addi    r3,r3,4
  324:	7c 69 1b 78 	mr      r9,r3
  328:	40 82 00 08 	bne     330 <__do_patch_instructions+0x104>
  32c:	38 84 00 04 	addi    r4,r4,4
  330:	7d 49 3a 14 	add     r10,r9,r7
  334:	80 a4 00 00 	lwz     r5,0(r4)
  338:	90 aa 00 00 	stw     r5,0(r10)
  33c:	7c 00 50 6c 	dcbst   0,r10
  340:	7c 00 04 ac 	hwsync
  344:	7c 00 4f ac 	icbi    0,r9
  348:	7c 00 04 ac 	hwsync
  34c:	4c 00 01 2c 	isync
  350:	38 69 00 04 	addi    r3,r9,4
  354:	7f 83 40 40 	cmplw   cr7,r3,r8
  358:	40 82 00 08 	bne     360 <__do_patch_instructions+0x134>
  35c:	38 84 00 04 	addi    r4,r4,4
  360:	41 9c ff a0 	blt     cr7,300 <__do_patch_instructions+0xd4>
  364:	90 0c 00 00 	stw     r0,0(r12)
  368:	39 20 00 00 	li      r9,0
  36c:	7c 00 5a 64 	tlbie   r11,r0
  370:	7c 00 04 ac 	hwsync
  374:	2f 89 00 00 	cmpwi   cr7,r9,0
  378:	40 9e 00 2c 	bne     cr7,3a4 <__do_patch_instructions+0x178>
  37c:	7f 86 18 40 	cmplw   cr7,r6,r3
  380:	41 9d fe cc 	bgt     cr7,24c <__do_patch_instructions+0x20>
  384:	38 60 00 00 	li      r3,0
  388:	4e 80 00 20 	blr
  38c:	90 0c 00 00 	stw     r0,0(r12)
  390:	39 20 ff ff 	li      r9,-1
  394:	7c 00 5a 64 	tlbie   r11,r0
  398:	7c 00 04 ac 	hwsync
  39c:	2f 89 00 00 	cmpwi   cr7,r9,0
  3a0:	41 9e ff dc 	beq     cr7,37c <__do_patch_instructions+0x150>
  3a4:	38 60 ff ff 	li      r3,-1
  3a8:	4e 80 00 20 	blr
  3ac:	39 20 00 00 	li      r9,0
  3b0:	91 2c 00 00 	stw     r9,0(r12)
  3b4:	7c 00 5a 64 	tlbie   r11,r0
  3b8:	7c 00 04 ac 	hwsync
  3bc:	4b ff ff c0 	b       37c <__do_patch_instructions+0x150>

000003e8 <patch_instruction>:
  3e8:	94 21 ff e0 	stwu    r1,-32(r1)
  3ec:	90 81 00 08 	stw     r4,8(r1)
  3f0:	48 00 00 40 	b       430 <patch_instruction+0x48>
  3f4:	7c 08 02 a6 	mflr    r0
  3f8:	90 01 00 24 	stw     r0,36(r1)
  3fc:	93 e1 00 1c 	stw     r31,28(r1)
  400:	7f e0 00 a6 	mfmsr   r31
  404:	7c 51 13 a6 	mtspr   81,r2
  408:	38 c0 00 04 	li      r6,4
  40c:	38 81 00 08 	addi    r4,r1,8
  410:	38 a0 00 00 	li      r5,0
  414:	4b ff fe 19 	bl      22c <__do_patch_instructions>
  418:	7f e0 01 24 	mtmsr   r31
  41c:	80 01 00 24 	lwz     r0,36(r1)
  420:	83 e1 00 1c 	lwz     r31,28(r1)
  424:	7c 08 03 a6 	mtlr    r0
  428:	38 21 00 20 	addi    r1,r1,32
  42c:	4e 80 00 20 	blr

  430:	81 21 00 08 	lwz     r9,8(r1)
  434:	91 23 00 00 	stw     r9,0(r3)
  438:	7c 00 18 6c 	dcbst   0,r3
  43c:	7c 00 04 ac 	hwsync
  440:	7c 00 1f ac 	icbi    0,r3
  444:	7c 00 04 ac 	hwsync
  448:	4c 00 01 2c 	isync
  44c:	38 60 00 00 	li      r3,0
  450:	4b ff ff d8 	b       428 <patch_instruction+0x40>
  454:	38 60 ff ff 	li      r3,-1
  458:	4b ff ff d0 	b       428 <patch_instruction+0x40>

Christophe

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/4] powerpc/code-patching: introduce patch_instructions()
  2023-03-09 18:02 ` [PATCH v2 1/4] powerpc/code-patching: introduce patch_instructions() Hari Bathini
  2023-03-10 18:26   ` Christophe Leroy
@ 2023-03-11  8:02   ` Christophe Leroy
  1 sibling, 0 replies; 15+ messages in thread
From: Christophe Leroy @ 2023-03-11  8:02 UTC (permalink / raw)
  To: Hari Bathini, linuxppc-dev, bpf@vger.kernel.org
  Cc: Song Liu, Daniel Borkmann, Alexei Starovoitov, Andrii Nakryiko,
	Naveen N. Rao



Le 09/03/2023 à 19:02, Hari Bathini a écrit :
> patch_instruction() entails setting up pte, patching the instruction,
> clearing the pte and flushing the tlb. If multiple instructions need
> to be patched, every instruction would have to go through the above
> drill unnecessarily. Instead, introduce function patch_instructions()
> that patches multiple instructions at one go while setting up the pte,
> clearing the pte and flushing the tlb only once per page range of
> instructions. Observed ~5X improvement in speed of execution using
> patch_instructions() over patch_instructions(), when more instructions
> are to be patched.
> 
> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
> ---
>   arch/powerpc/include/asm/code-patching.h |   1 +
>   arch/powerpc/lib/code-patching.c         | 151 ++++++++++++++++-------
>   2 files changed, 106 insertions(+), 46 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
> index 3f881548fb61..059fc4fe700e 100644
> --- a/arch/powerpc/include/asm/code-patching.h
> +++ b/arch/powerpc/include/asm/code-patching.h
> @@ -74,6 +74,7 @@ int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
>   int patch_branch(u32 *addr, unsigned long target, int flags);
>   int patch_instruction(u32 *addr, ppc_inst_t instr);
>   int raw_patch_instruction(u32 *addr, ppc_inst_t instr);
> +int patch_instructions(u32 *addr, u32 *code, bool fill_inst, size_t len);
>   
>   static inline unsigned long patch_site_addr(s32 *site)
>   {
> diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
> index b00112d7ad46..33857b9b53de 100644
> --- a/arch/powerpc/lib/code-patching.c
> +++ b/arch/powerpc/lib/code-patching.c
> @@ -278,77 +278,117 @@ static void unmap_patch_area(unsigned long addr)
>   	flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
>   }
>   
> -static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
> +static int __do_patch_instructions_mm(u32 *addr, u32 *code, bool fill_inst, size_t len)

Some time ago we did a huge work to clean up use of u32 as code versus 
ppc_inst_t.

Please carefully audit all places where you use u32 instead of 
ppc_inst_t. If you do so, don't use anymore the word 'instruction' in 
the function name.

>   {
> -	int err;
> -	u32 *patch_addr;
> -	unsigned long text_poke_addr;
> -	pte_t *pte;
> -	unsigned long pfn = get_patch_pfn(addr);
> -	struct mm_struct *patching_mm;
> -	struct mm_struct *orig_mm;
> +	struct mm_struct *patching_mm, *orig_mm;

This change is cosmetic and not functionnaly liked to the patch.

> +	unsigned long text_poke_addr, pfn;
> +	u32 *patch_addr, *end, *pend;
> +	ppc_inst_t instr;
>   	spinlock_t *ptl;
> +	int ilen, err;
> +	pte_t *pte;

Why move this declaration ?

>   
>   	patching_mm = __this_cpu_read(cpu_patching_context.mm);
>   	text_poke_addr = __this_cpu_read(cpu_patching_context.addr);
> -	patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
>   
>   	pte = get_locked_pte(patching_mm, text_poke_addr, &ptl);
>   	if (!pte)
>   		return -ENOMEM;
>   
> -	__set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
> +	end = (void *)addr + len;
> +	do {
> +		pfn = get_patch_pfn(addr);
> +		__set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
>   
> -	/* order PTE update before use, also serves as the hwsync */
> -	asm volatile("ptesync": : :"memory");
> -
> -	/* order context switch after arbitrary prior code */
> -	isync();
> -
> -	orig_mm = start_using_temp_mm(patching_mm);
> -
> -	err = __patch_instruction(addr, instr, patch_addr);
> +		/* order PTE update before use, also serves as the hwsync */
> +		asm volatile("ptesync": : :"memory");
>   
> -	/* hwsync performed by __patch_instruction (sync) if successful */
> -	if (err)
> -		mb();  /* sync */
> +		/* order context switch after arbitrary prior code */
> +		isync();
> +
> +		orig_mm = start_using_temp_mm(patching_mm);
> +
> +		patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
> +		pend = (void *)addr + PAGE_SIZE - offset_in_page(addr);
> +		if (end < pend)
> +			pend = end;
> +
> +		while (addr < pend) {
> +			instr = ppc_inst_read(code);
> +			ilen = ppc_inst_len(instr);
> +			err = __patch_instruction(addr, instr, patch_addr);
> +			/* hwsync performed by __patch_instruction (sync) if successful */

As you aim at optimising the loops, you should before cache flushed 
outside this loop, with flush_dcache_range() and invalidate_dcache_range().

> +			if (err) {
> +				mb();  /* sync */
> +				break;
> +			}
> +
> +			patch_addr = (void *)patch_addr + ilen;
> +			addr = (void *)addr + ilen;

So patch_addr and addr are u32*, ilen is either 4 or 8, and you cast to 
void to do the math ? That looks odd and error prone.

> +			if (!fill_inst)
> +				code = (void *)code + ilen;
> +		}
>   
> -	/* context synchronisation performed by __patch_instruction (isync or exception) */
> -	stop_using_temp_mm(patching_mm, orig_mm);
> +		/* context synchronisation performed by __patch_instruction (isync or exception) */
> +		stop_using_temp_mm(patching_mm, orig_mm);
>   
> -	pte_clear(patching_mm, text_poke_addr, pte);
> -	/*
> -	 * ptesync to order PTE update before TLB invalidation done
> -	 * by radix__local_flush_tlb_page_psize (in _tlbiel_va)
> -	 */
> -	local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize);
> +		pte_clear(patching_mm, text_poke_addr, pte);
> +		/*
> +		 * ptesync to order PTE update before TLB invalidation done
> +		 * by radix__local_flush_tlb_page_psize (in _tlbiel_va)
> +		 */
> +		local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize);
> +		if (err)
> +			break;
> +	} while (addr < end);
>   
>   	pte_unmap_unlock(pte, ptl);
>   
>   	return err;
>   }
>   
> -static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
> +static int __do_patch_instructions(u32 *addr, u32 *code, bool fill_inst, size_t len)
>   {
> -	int err;
> -	u32 *patch_addr;
> -	unsigned long text_poke_addr;
> +	unsigned long text_poke_addr, pfn;
> +	u32 *patch_addr, *end, *pend;
> +	ppc_inst_t instr;
> +	int ilen, err;
>   	pte_t *pte;
> -	unsigned long pfn = get_patch_pfn(addr);
>   
>   	text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK;
> -	patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
> -
>   	pte = __this_cpu_read(cpu_patching_context.pte);
> -	__set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
> -	/* See ptesync comment in radix__set_pte_at() */
> -	if (radix_enabled())
> -		asm volatile("ptesync": : :"memory");
>   
> -	err = __patch_instruction(addr, instr, patch_addr);
> +	end = (void *)addr + len;
> +	do {
> +		pfn = get_patch_pfn(addr);
> +		__set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
> +		/* See ptesync comment in radix__set_pte_at() */
> +		if (radix_enabled())
> +			asm volatile("ptesync": : :"memory");
> +
> +		patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
> +		pend = (void *)addr + PAGE_SIZE - offset_in_page(addr);
> +		if (end < pend)
> +			pend = end;
> +
> +		while (addr < pend) {
> +			instr = ppc_inst_read(code);
> +			ilen = ppc_inst_len(instr);
> +			err = __patch_instruction(addr, instr, patch_addr);
> +			if (err)
> +				break;
> +
> +			patch_addr = (void *)patch_addr + ilen;
> +			addr = (void *)addr + ilen;
> +			if (!fill_inst)
> +				code = (void *)code + ilen;
> +		}
>   
> -	pte_clear(&init_mm, text_poke_addr, pte);
> -	flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
> +		pte_clear(&init_mm, text_poke_addr, pte);
> +		flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
> +		if (err)
> +			break;
> +	} while (addr < end);
>   
>   	return err;
>   }
> @@ -369,15 +409,34 @@ int patch_instruction(u32 *addr, ppc_inst_t instr)
>   
>   	local_irq_save(flags);
>   	if (mm_patch_enabled())
> -		err = __do_patch_instruction_mm(addr, instr);
> +		err = __do_patch_instructions_mm(addr, (u32 *)&instr, false, ppc_inst_len(instr));

Do not mix-up u32* and ppc_inst_t, they are not equivalent, do not cast 
one to the other.

>   	else
> -		err = __do_patch_instruction(addr, instr);
> +		err = __do_patch_instructions(addr, (u32 *)&instr, false, ppc_inst_len(instr));

Same.

>   	local_irq_restore(flags);
>   
>   	return err;
>   }
>   NOKPROBE_SYMBOL(patch_instruction);
>   
> +/*
> + * Patch 'addr' with 'len' bytes of instructions from 'code'.
> + */
> +int patch_instructions(u32 *addr, u32 *code, bool fill_inst, size_t len)
> +{
> +	unsigned long flags;
> +	int err;
> +
> +	local_irq_save(flags);

Won't the irq lock be a bit long ?

> +	if (mm_patch_enabled())
> +		err = __do_patch_instructions_mm(addr, code, fill_inst, len);
> +	else
> +		err = __do_patch_instructions(addr, code, fill_inst, len);
> +	local_irq_restore(flags);
> +
> +	return err;
> +}
> +NOKPROBE_SYMBOL(patch_instructions);
> +
>   int patch_branch(u32 *addr, unsigned long target, int flags)
>   {
>   	ppc_inst_t instr;

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 4/4] powerpc/bpf: use bpf_jit_binary_pack_[alloc|finalize|free]
  2023-03-09 18:02 ` [PATCH v2 4/4] powerpc/bpf: use bpf_jit_binary_pack_[alloc|finalize|free] Hari Bathini
  2023-03-10 22:35   ` Song Liu
@ 2023-03-11 10:16   ` Christophe Leroy
  2023-08-25 15:29     ` Hari Bathini
  1 sibling, 1 reply; 15+ messages in thread
From: Christophe Leroy @ 2023-03-11 10:16 UTC (permalink / raw)
  To: Hari Bathini, linuxppc-dev, bpf@vger.kernel.org
  Cc: Song Liu, Daniel Borkmann, Alexei Starovoitov, Andrii Nakryiko,
	Naveen N. Rao



Le 09/03/2023 à 19:02, Hari Bathini a écrit :
> Use bpf_jit_binary_pack_alloc in powerpc jit. The jit engine first
> writes the program to the rw buffer. When the jit is done, the program
> is copied to the final location with bpf_jit_binary_pack_finalize.
> With multiple jit_subprogs, bpf_jit_free is called on some subprograms
> that haven't got bpf_jit_binary_pack_finalize() yet. Implement custom
> bpf_jit_free() like in commit 1d5f82d9dd47 ("bpf, x86: fix freeing of
> not-finalized bpf_prog_pack") to call bpf_jit_binary_pack_finalize(),
> if necessary. While here, correct the misnomer powerpc64_jit_data to
> powerpc_jit_data as it is meant for both ppc32 and ppc64.

root@vgoip:~# echo 1 > /proc/sys/net/core/bpf_jit_enable
root@vgoip:~# insmod test_bpf.ko
[  570.270983] kernel tried to execute exec-protected page (bd42c198) - 
exploit attempt? (uid: 0)
[  570.279414] BUG: Unable to handle kernel instruction fetch
[  570.284822] Faulting instruction address: 0xbd42c198
[  570.289734] Oops: Kernel access of bad area, sig: 11 [#1]
[  570.295062] BE PAGE_SIZE=16K PREEMPT CMPC885
[  570.302811] Modules linked in: test_bpf(+) test_module
[  570.307891] CPU: 0 PID: 559 Comm: insmod Not tainted 
6.3.0-rc1-s3k-dev-g4ae0418b3500 #258
[  570.315975] Hardware name: MIAE 8xx 0x500000 CMPC885
[  570.320882] NIP:  bd42c198 LR: be8180ec CTR: be818010
[  570.325873] REGS: cae2bc40 TRAP: 0400   Not tainted 
(6.3.0-rc1-s3k-dev-g4ae0418b3500)
[  570.333704] MSR:  40009032 <EE,ME,IR,DR,RI>  CR: 88008222  XER: 00000000
[  570.340503]
[  570.340503] GPR00: be806eac cae2bd00 c2977340 00000000 c2c40900 
00000000 c1a18a80 00000000
[  570.340503] GPR08: 00000002 00000001 00000000 00000000 ffffffff 
100d815e ca6a0000 00000001
[  570.340503] GPR16: 1234aaaa ca242250 c1180000 00000001 1234aaab 
c9050030 00000000 00000000
[  570.340503] GPR24: c2c40900 00000000 ffffffff 00000000 c1a18a80 
00000000 00000002 ca24225c
[  570.376819] NIP [bd42c198] 0xbd42c198
[  570.380436] LR [be8180ec] 0xbe8180ec
[  570.383965] Call Trace:
[  570.386373] [cae2bd00] [0000000b] 0xb (unreliable)
[  570.391107] [cae2bd50] [be806eac] __run_one+0x58/0x224 [test_bpf]
[  570.397390] [cae2bd90] [be80ca94] test_bpf_init+0x8d8/0x1010 [test_bpf]
[  570.404189] [cae2be20] [c00049f0] do_one_initcall+0x38/0x1e4
[  570.409782] [cae2be80] [c0090aa8] do_init_module+0x50/0x234
[  570.415291] [cae2bea0] [c0092e08] sys_finit_module+0xb4/0xf8
[  570.420884] [cae2bf20] [c000e344] system_call_exception+0x94/0x150
[  570.426995] [cae2bf30] [c00120a8] ret_from_syscall+0x0/0x28
[  570.432502] --- interrupt: c00 at 0xfd5fca0
[  570.436632] NIP:  0fd5fca0 LR: 10014568 CTR: 10013294
[  570.441625] REGS: cae2bf40 TRAP: 0c00   Not tainted 
(6.3.0-rc1-s3k-dev-g4ae0418b3500)
[  570.449455] MSR:  0000d032 <EE,PR,ME,IR,DR,RI>  CR: 44002224  XER: 
00000000
[  570.456513]
[  570.456513] GPR00: 00000161 7f868d30 77ed34d0 00000003 100bc4ef 
00000000 0fd51868 0000d032
[  570.456513] GPR08: 000007b1 10013294 00000000 00000002 52454753 
100d815e 100a44b8 00000000
[  570.456513] GPR16: 100d167c 100b0000 1198426c 119854cd 100d0000 
100d0000 00000000 100a4498
[  570.456513] GPR24: ffffffa2 ffffffff 11984244 00000003 1198426c 
100bc4ef 11984288 1198426c
[  570.492828] NIP [0fd5fca0] 0xfd5fca0
[  570.496358] LR [10014568] 0x10014568
[  570.499887] --- interrupt: c00
[  570.502902] Code: XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX 
XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX 
XXXXXXXX XXXXXXXX XXXXXXXX
[  570.517973] ---[ end trace 0000000000000000 ]---
[  570.522523]
[  570.523986] note: insmod[559] exited with irqs disabled
Segmentation fault

Christophe

> 
> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
> ---
>   arch/powerpc/net/bpf_jit.h        |   7 +-
>   arch/powerpc/net/bpf_jit_comp.c   | 104 +++++++++++++++++++++---------
>   arch/powerpc/net/bpf_jit_comp32.c |   4 +-
>   arch/powerpc/net/bpf_jit_comp64.c |   6 +-
>   4 files changed, 83 insertions(+), 38 deletions(-)
> 
> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
> index d767e39d5645..a8b7480c4d43 100644
> --- a/arch/powerpc/net/bpf_jit.h
> +++ b/arch/powerpc/net/bpf_jit.h
> @@ -168,15 +168,16 @@ static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
>   
>   void bpf_jit_init_reg_mapping(struct codegen_context *ctx);
>   int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func);
> -int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
> +int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx,
>   		       u32 *addrs, int pass, bool extra_pass);
>   void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
>   void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
>   void bpf_jit_realloc_regs(struct codegen_context *ctx);
>   int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr);
>   
> -int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx,
> -			  int insn_idx, int jmp_off, int dst_reg);
> +int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass,
> +			  struct codegen_context *ctx, int insn_idx,
> +			  int jmp_off, int dst_reg);
>   
>   #endif
>   
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index d1794d9f0154..ece75c829499 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -42,10 +42,11 @@ int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg,
>   	return 0;
>   }
>   
> -struct powerpc64_jit_data {
> -	struct bpf_binary_header *header;
> +struct powerpc_jit_data {
> +	struct bpf_binary_header *hdr;
> +	struct bpf_binary_header *fhdr;
>   	u32 *addrs;
> -	u8 *image;
> +	u8 *fimage;
>   	u32 proglen;
>   	struct codegen_context ctx;
>   };
> @@ -62,15 +63,18 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>   	u8 *image = NULL;
>   	u32 *code_base;
>   	u32 *addrs;
> -	struct powerpc64_jit_data *jit_data;
> +	struct powerpc_jit_data *jit_data;
>   	struct codegen_context cgctx;
>   	int pass;
>   	int flen;
> -	struct bpf_binary_header *bpf_hdr;
> +	struct bpf_binary_header *fhdr = NULL;
> +	struct bpf_binary_header *hdr = NULL;
>   	struct bpf_prog *org_fp = fp;
>   	struct bpf_prog *tmp_fp;
>   	bool bpf_blinded = false;
>   	bool extra_pass = false;
> +	u8 *fimage = NULL;
> +	u32 *fcode_base;
>   	u32 extable_len;
>   	u32 fixup_len;
>   
> @@ -100,9 +104,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>   	addrs = jit_data->addrs;
>   	if (addrs) {
>   		cgctx = jit_data->ctx;
> -		image = jit_data->image;
> -		bpf_hdr = jit_data->header;
> +		fimage = jit_data->fimage;
> +		fhdr = jit_data->fhdr;
>   		proglen = jit_data->proglen;
> +		hdr = jit_data->hdr;
> +		image = (void *)hdr + ((void *)fimage - (void *)fhdr);
>   		extra_pass = true;
>   		goto skip_init_ctx;
>   	}
> @@ -120,7 +126,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>   	cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
>   
>   	/* Scouting faux-generate pass 0 */
> -	if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0, false)) {
> +	if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
>   		/* We hit something illegal or unsupported. */
>   		fp = org_fp;
>   		goto out_addrs;
> @@ -135,7 +141,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>   	 */
>   	if (cgctx.seen & SEEN_TAILCALL || !is_offset_in_branch_range((long)cgctx.idx * 4)) {
>   		cgctx.idx = 0;
> -		if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0, false)) {
> +		if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
>   			fp = org_fp;
>   			goto out_addrs;
>   		}
> @@ -157,17 +163,19 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>   	proglen = cgctx.idx * 4;
>   	alloclen = proglen + FUNCTION_DESCR_SIZE + fixup_len + extable_len;
>   
> -	bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, bpf_jit_fill_ill_insns);
> -	if (!bpf_hdr) {
> +	fhdr = bpf_jit_binary_pack_alloc(alloclen, &fimage, 4, &hdr, &image,
> +					      bpf_jit_fill_ill_insns);
> +	if (!fhdr) {
>   		fp = org_fp;
>   		goto out_addrs;
>   	}
>   
>   	if (extable_len)
> -		fp->aux->extable = (void *)image + FUNCTION_DESCR_SIZE + proglen + fixup_len;
> +		fp->aux->extable = (void *)fimage + FUNCTION_DESCR_SIZE + proglen + fixup_len;
>   
>   skip_init_ctx:
>   	code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
> +	fcode_base = (u32 *)(fimage + FUNCTION_DESCR_SIZE);
>   
>   	/* Code generation passes 1-2 */
>   	for (pass = 1; pass < 3; pass++) {
> @@ -175,8 +183,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>   		cgctx.idx = 0;
>   		cgctx.alt_exit_addr = 0;
>   		bpf_jit_build_prologue(code_base, &cgctx);
> -		if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, pass, extra_pass)) {
> -			bpf_jit_binary_free(bpf_hdr);
> +		if (bpf_jit_build_body(fp, code_base, fcode_base, &cgctx, addrs, pass, extra_pass)) {
> +			bpf_arch_text_copy(&fhdr->size, &hdr->size, sizeof(hdr->size));
> +			bpf_jit_binary_pack_free(fhdr, hdr);
>   			fp = org_fp;
>   			goto out_addrs;
>   		}
> @@ -192,21 +201,23 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>   		 * Note that we output the base address of the code_base
>   		 * rather than image, since opcodes are in code_base.
>   		 */
> -		bpf_jit_dump(flen, proglen, pass, code_base);
> +		bpf_jit_dump(flen, proglen, pass, fcode_base);
>   
>   #ifdef CONFIG_PPC64_ELF_ABI_V1
>   	/* Function descriptor nastiness: Address + TOC */
> -	((u64 *)image)[0] = (u64)code_base;
> +	((u64 *)image)[0] = (u64)fcode_base;
>   	((u64 *)image)[1] = local_paca->kernel_toc;
>   #endif
>   
> -	fp->bpf_func = (void *)image;
> +	fp->bpf_func = (void *)fimage;
>   	fp->jited = 1;
>   	fp->jited_len = proglen + FUNCTION_DESCR_SIZE;
>   
> -	bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + bpf_hdr->size);
>   	if (!fp->is_func || extra_pass) {
> -		bpf_jit_binary_lock_ro(bpf_hdr);
> +		if (bpf_jit_binary_pack_finalize(fp, fhdr, hdr)) {
> +			fp = org_fp;
> +			goto out_addrs;
> +		}
>   		bpf_prog_fill_jited_linfo(fp, addrs);
>   out_addrs:
>   		kfree(addrs);
> @@ -216,8 +227,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>   		jit_data->addrs = addrs;
>   		jit_data->ctx = cgctx;
>   		jit_data->proglen = proglen;
> -		jit_data->image = image;
> -		jit_data->header = bpf_hdr;
> +		jit_data->fimage = fimage;
> +		jit_data->fhdr = fhdr;
> +		jit_data->hdr = hdr;
>   	}
>   
>   out:
> @@ -231,12 +243,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>    * The caller should check for (BPF_MODE(code) == BPF_PROBE_MEM) before calling
>    * this function, as this only applies to BPF_PROBE_MEM, for now.
>    */
> -int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx,
> -			  int insn_idx, int jmp_off, int dst_reg)
> +int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass,
> +			  struct codegen_context *ctx, int insn_idx, int jmp_off,
> +			  int dst_reg)
>   {
>   	off_t offset;
>   	unsigned long pc;
> -	struct exception_table_entry *ex;
> +	struct exception_table_entry *ex, *ex_entry;
>   	u32 *fixup;
>   
>   	/* Populate extable entries only in the last pass */
> @@ -247,9 +260,16 @@ int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct code
>   	    WARN_ON_ONCE(ctx->exentry_idx >= fp->aux->num_exentries))
>   		return -EINVAL;
>   
> +	/*
> +	 * Program is firt written to image before copying to the
> +	 * final location (fimage). Accordingly, update in the image first.
> +	 * As all offsets used are relative, copying as is to the
> +	 * final location should be alright.
> +	 */
>   	pc = (unsigned long)&image[insn_idx];
> +	ex = (void *)fp->aux->extable - (void *)fimage + (void *)image;
>   
> -	fixup = (void *)fp->aux->extable -
> +	fixup = (void *)ex -
>   		(fp->aux->num_exentries * BPF_FIXUP_LEN * 4) +
>   		(ctx->exentry_idx * BPF_FIXUP_LEN * 4);
>   
> @@ -260,17 +280,17 @@ int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct code
>   	fixup[BPF_FIXUP_LEN - 1] =
>   		PPC_RAW_BRANCH((long)(pc + jmp_off) - (long)&fixup[BPF_FIXUP_LEN - 1]);
>   
> -	ex = &fp->aux->extable[ctx->exentry_idx];
> +	ex_entry = &ex[ctx->exentry_idx];
>   
> -	offset = pc - (long)&ex->insn;
> +	offset = pc - (long)&ex_entry->insn;
>   	if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
>   		return -ERANGE;
> -	ex->insn = offset;
> +	ex_entry->insn = offset;
>   
> -	offset = (long)fixup - (long)&ex->fixup;
> +	offset = (long)fixup - (long)&ex_entry->fixup;
>   	if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
>   		return -ERANGE;
> -	ex->fixup = offset;
> +	ex_entry->fixup = offset;
>   
>   	ctx->exentry_idx++;
>   	return 0;
> @@ -308,3 +328,27 @@ int bpf_arch_text_invalidate(void *dst, size_t len)
>   
>   	return ret;
>   }
> +
> +void bpf_jit_free(struct bpf_prog *fp)
> +{
> +	if (fp->jited) {
> +		struct powerpc_jit_data *jit_data = fp->aux->jit_data;
> +		struct bpf_binary_header *hdr;
> +
> +		/*
> +		 * If we fail the final pass of JIT (from jit_subprogs),
> +		 * the program may not be finalized yet. Call finalize here
> +		 * before freeing it.
> +		 */
> +		if (jit_data) {
> +			bpf_jit_binary_pack_finalize(fp, jit_data->fhdr, jit_data->hdr);
> +			kvfree(jit_data->addrs);
> +			kfree(jit_data);
> +		}
> +		hdr = bpf_jit_binary_pack_hdr(fp);
> +		bpf_jit_binary_pack_free(hdr, NULL);
> +		WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
> +	}
> +
> +	bpf_prog_unlock_free(fp);
> +}
> diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
> index 7f91ea064c08..fb2761b54d64 100644
> --- a/arch/powerpc/net/bpf_jit_comp32.c
> +++ b/arch/powerpc/net/bpf_jit_comp32.c
> @@ -278,7 +278,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o
>   }
>   
>   /* Assemble the body code between the prologue & epilogue */
> -int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
> +int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx,
>   		       u32 *addrs, int pass, bool extra_pass)
>   {
>   	const struct bpf_insn *insn = fp->insnsi;
> @@ -997,7 +997,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   					jmp_off += 4;
>   				}
>   
> -				ret = bpf_add_extable_entry(fp, image, pass, ctx, insn_idx,
> +				ret = bpf_add_extable_entry(fp, image, fimage, pass, ctx, insn_idx,
>   							    jmp_off, dst_reg);
>   				if (ret)
>   					return ret;
> diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
> index 8dd3cabaa83a..37a8970a7065 100644
> --- a/arch/powerpc/net/bpf_jit_comp64.c
> +++ b/arch/powerpc/net/bpf_jit_comp64.c
> @@ -343,7 +343,7 @@ asm (
>   );
>   
>   /* Assemble the body code between the prologue & epilogue */
> -int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
> +int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx,
>   		       u32 *addrs, int pass, bool extra_pass)
>   {
>   	enum stf_barrier_type stf_barrier = stf_barrier_type_get();
> @@ -922,8 +922,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
>   				addrs[++i] = ctx->idx * 4;
>   
>   			if (BPF_MODE(code) == BPF_PROBE_MEM) {
> -				ret = bpf_add_extable_entry(fp, image, pass, ctx, ctx->idx - 1,
> -							    4, dst_reg);
> +				ret = bpf_add_extable_entry(fp, image, fimage, pass, ctx,
> +							    ctx->idx - 1, 4, dst_reg);
>   				if (ret)
>   					return ret;
>   			}

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 4/4] powerpc/bpf: use bpf_jit_binary_pack_[alloc|finalize|free]
  2023-03-11 10:16   ` Christophe Leroy
@ 2023-08-25 15:29     ` Hari Bathini
  0 siblings, 0 replies; 15+ messages in thread
From: Hari Bathini @ 2023-08-25 15:29 UTC (permalink / raw)
  To: Christophe Leroy, linuxppc-dev, bpf@vger.kernel.org
  Cc: Naveen N. Rao, Song Liu, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko



On 11/03/23 3:46 pm, Christophe Leroy wrote:
> 
> 
> Le 09/03/2023 à 19:02, Hari Bathini a écrit :
>> Use bpf_jit_binary_pack_alloc in powerpc jit. The jit engine first
>> writes the program to the rw buffer. When the jit is done, the program
>> is copied to the final location with bpf_jit_binary_pack_finalize.
>> With multiple jit_subprogs, bpf_jit_free is called on some subprograms
>> that haven't got bpf_jit_binary_pack_finalize() yet. Implement custom
>> bpf_jit_free() like in commit 1d5f82d9dd47 ("bpf, x86: fix freeing of
>> not-finalized bpf_prog_pack") to call bpf_jit_binary_pack_finalize(),
>> if necessary. While here, correct the misnomer powerpc64_jit_data to
>> powerpc_jit_data as it is meant for both ppc32 and ppc64.
> 
> root@vgoip:~# echo 1 > /proc/sys/net/core/bpf_jit_enable
> root@vgoip:~# insmod test_bpf.ko
> [  570.270983] kernel tried to execute exec-protected page (bd42c198) -
> exploit attempt? (uid: 0)
> [  570.279414] BUG: Unable to handle kernel instruction fetch
> [  570.284822] Faulting instruction address: 0xbd42c198
> [  570.289734] Oops: Kernel access of bad area, sig: 11 [#1]
> [  570.295062] BE PAGE_SIZE=16K PREEMPT CMPC885
> [  570.302811] Modules linked in: test_bpf(+) test_module
> [  570.307891] CPU: 0 PID: 559 Comm: insmod Not tainted
> 6.3.0-rc1-s3k-dev-g4ae0418b3500 #258
> [  570.315975] Hardware name: MIAE 8xx 0x500000 CMPC885
> [  570.320882] NIP:  bd42c198 LR: be8180ec CTR: be818010
> [  570.325873] REGS: cae2bc40 TRAP: 0400   Not tainted
> (6.3.0-rc1-s3k-dev-g4ae0418b3500)
> [  570.333704] MSR:  40009032 <EE,ME,IR,DR,RI>  CR: 88008222  XER: 00000000
> [  570.340503]
> [  570.340503] GPR00: be806eac cae2bd00 c2977340 00000000 c2c40900
> 00000000 c1a18a80 00000000
> [  570.340503] GPR08: 00000002 00000001 00000000 00000000 ffffffff
> 100d815e ca6a0000 00000001
> [  570.340503] GPR16: 1234aaaa ca242250 c1180000 00000001 1234aaab
> c9050030 00000000 00000000
> [  570.340503] GPR24: c2c40900 00000000 ffffffff 00000000 c1a18a80
> 00000000 00000002 ca24225c
> [  570.376819] NIP [bd42c198] 0xbd42c198
> [  570.380436] LR [be8180ec] 0xbe8180ec
> [  570.383965] Call Trace:
> [  570.386373] [cae2bd00] [0000000b] 0xb (unreliable)
> [  570.391107] [cae2bd50] [be806eac] __run_one+0x58/0x224 [test_bpf]
> [  570.397390] [cae2bd90] [be80ca94] test_bpf_init+0x8d8/0x1010 [test_bpf]
> [  570.404189] [cae2be20] [c00049f0] do_one_initcall+0x38/0x1e4
> [  570.409782] [cae2be80] [c0090aa8] do_init_module+0x50/0x234
> [  570.415291] [cae2bea0] [c0092e08] sys_finit_module+0xb4/0xf8
> [  570.420884] [cae2bf20] [c000e344] system_call_exception+0x94/0x150
> [  570.426995] [cae2bf30] [c00120a8] ret_from_syscall+0x0/0x28
> [  570.432502] --- interrupt: c00 at 0xfd5fca0
> [  570.436632] NIP:  0fd5fca0 LR: 10014568 CTR: 10013294
> [  570.441625] REGS: cae2bf40 TRAP: 0c00   Not tainted
> (6.3.0-rc1-s3k-dev-g4ae0418b3500)
> [  570.449455] MSR:  0000d032 <EE,PR,ME,IR,DR,RI>  CR: 44002224  XER:
> 00000000
> [  570.456513]
> [  570.456513] GPR00: 00000161 7f868d30 77ed34d0 00000003 100bc4ef
> 00000000 0fd51868 0000d032
> [  570.456513] GPR08: 000007b1 10013294 00000000 00000002 52454753
> 100d815e 100a44b8 00000000
> [  570.456513] GPR16: 100d167c 100b0000 1198426c 119854cd 100d0000
> 100d0000 00000000 100a4498
> [  570.456513] GPR24: ffffffa2 ffffffff 11984244 00000003 1198426c
> 100bc4ef 11984288 1198426c
> [  570.492828] NIP [0fd5fca0] 0xfd5fca0
> [  570.496358] LR [10014568] 0x10014568
> [  570.499887] --- interrupt: c00
> [  570.502902] Code: XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX
> XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX
> XXXXXXXX XXXXXXXX XXXXXXXX
> [  570.517973] ---[ end trace 0000000000000000 ]---
> [  570.522523]
> [  570.523986] note: insmod[559] exited with irqs disabled
> Segmentation fault

Thanks a lot for reviewing v2, Christophe.
Posted v3 for reviewing..

Thanks
Hari

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 4/4] powerpc/bpf: use bpf_jit_binary_pack_[alloc|finalize|free]
  2023-03-10 22:35   ` Song Liu
@ 2023-08-25 15:40     ` Hari Bathini
  0 siblings, 0 replies; 15+ messages in thread
From: Hari Bathini @ 2023-08-25 15:40 UTC (permalink / raw)
  To: Song Liu
  Cc: Song Liu, Daniel Borkmann, Alexei Starovoitov, Andrii Nakryiko,
	Naveen N. Rao, bpf, linuxppc-dev



On 11/03/23 4:05 am, Song Liu wrote:
> On Thu, Mar 9, 2023 at 10:03 AM Hari Bathini <hbathini@linux.ibm.com> wrote:
>>
>> Use bpf_jit_binary_pack_alloc in powerpc jit. The jit engine first
>> writes the program to the rw buffer. When the jit is done, the program
>> is copied to the final location with bpf_jit_binary_pack_finalize.
>> With multiple jit_subprogs, bpf_jit_free is called on some subprograms
>> that haven't got bpf_jit_binary_pack_finalize() yet. Implement custom
>> bpf_jit_free() like in commit 1d5f82d9dd47 ("bpf, x86: fix freeing of
>> not-finalized bpf_prog_pack") to call bpf_jit_binary_pack_finalize(),
>> if necessary. While here, correct the misnomer powerpc64_jit_data to
>> powerpc_jit_data as it is meant for both ppc32 and ppc64.
>>
>> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
>> ---
>>   arch/powerpc/net/bpf_jit.h        |   7 +-
>>   arch/powerpc/net/bpf_jit_comp.c   | 104 +++++++++++++++++++++---------
>>   arch/powerpc/net/bpf_jit_comp32.c |   4 +-
>>   arch/powerpc/net/bpf_jit_comp64.c |   6 +-
>>   4 files changed, 83 insertions(+), 38 deletions(-)
>>
>> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
>> index d767e39d5645..a8b7480c4d43 100644
>> --- a/arch/powerpc/net/bpf_jit.h
>> +++ b/arch/powerpc/net/bpf_jit.h
>> @@ -168,15 +168,16 @@ static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
>>
>>   void bpf_jit_init_reg_mapping(struct codegen_context *ctx);
>>   int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func);
>> -int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
>> +int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx,
>>                         u32 *addrs, int pass, bool extra_pass);
>>   void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
>>   void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
>>   void bpf_jit_realloc_regs(struct codegen_context *ctx);
>>   int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr);
>>
>> -int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx,
>> -                         int insn_idx, int jmp_off, int dst_reg);
>> +int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass,
>> +                         struct codegen_context *ctx, int insn_idx,
>> +                         int jmp_off, int dst_reg);
>>
>>   #endif
>>
>> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
>> index d1794d9f0154..ece75c829499 100644
>> --- a/arch/powerpc/net/bpf_jit_comp.c
>> +++ b/arch/powerpc/net/bpf_jit_comp.c
>> @@ -42,10 +42,11 @@ int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg,
>>          return 0;
>>   }
>>
>> -struct powerpc64_jit_data {
>> -       struct bpf_binary_header *header;
>> +struct powerpc_jit_data {
>> +       struct bpf_binary_header *hdr;
>> +       struct bpf_binary_header *fhdr;
>>          u32 *addrs;
>> -       u8 *image;
>> +       u8 *fimage;
>>          u32 proglen;
>>          struct codegen_context ctx;
>>   };
> 
> Some comments about the f- prefix will be helpful. (Yes, I should have done
> better job adding comments for the x86 counterpart..)
> 
>> @@ -62,15 +63,18 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>>          u8 *image = NULL;
>>          u32 *code_base;
>>          u32 *addrs;
>> -       struct powerpc64_jit_data *jit_data;
>> +       struct powerpc_jit_data *jit_data;
>>          struct codegen_context cgctx;
>>          int pass;
>>          int flen;
>> -       struct bpf_binary_header *bpf_hdr;
>> +       struct bpf_binary_header *fhdr = NULL;
>> +       struct bpf_binary_header *hdr = NULL;
>>          struct bpf_prog *org_fp = fp;
>>          struct bpf_prog *tmp_fp;
>>          bool bpf_blinded = false;
>>          bool extra_pass = false;
>> +       u8 *fimage = NULL;
>> +       u32 *fcode_base;
>>          u32 extable_len;
>>          u32 fixup_len;
>>
>> @@ -100,9 +104,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>>          addrs = jit_data->addrs;
>>          if (addrs) {
>>                  cgctx = jit_data->ctx;
>> -               image = jit_data->image;
>> -               bpf_hdr = jit_data->header;
>> +               fimage = jit_data->fimage;
>> +               fhdr = jit_data->fhdr;
>>                  proglen = jit_data->proglen;
>> +               hdr = jit_data->hdr;
>> +               image = (void *)hdr + ((void *)fimage - (void *)fhdr);
>>                  extra_pass = true;
>>                  goto skip_init_ctx;
>>          }
>> @@ -120,7 +126,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>>          cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
>>
>>          /* Scouting faux-generate pass 0 */
>> -       if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0, false)) {
>> +       if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
>>                  /* We hit something illegal or unsupported. */
>>                  fp = org_fp;
>>                  goto out_addrs;
>> @@ -135,7 +141,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>>           */
>>          if (cgctx.seen & SEEN_TAILCALL || !is_offset_in_branch_range((long)cgctx.idx * 4)) {
>>                  cgctx.idx = 0;
>> -               if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0, false)) {
>> +               if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
>>                          fp = org_fp;
>>                          goto out_addrs;
>>                  }
>> @@ -157,17 +163,19 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>>          proglen = cgctx.idx * 4;
>>          alloclen = proglen + FUNCTION_DESCR_SIZE + fixup_len + extable_len;
>>
>> -       bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, bpf_jit_fill_ill_insns);
>> -       if (!bpf_hdr) {
>> +       fhdr = bpf_jit_binary_pack_alloc(alloclen, &fimage, 4, &hdr, &image,
>> +                                             bpf_jit_fill_ill_insns);
>> +       if (!fhdr) {
>>                  fp = org_fp;
>>                  goto out_addrs;
>>          }
>>
>>          if (extable_len)
>> -               fp->aux->extable = (void *)image + FUNCTION_DESCR_SIZE + proglen + fixup_len;
>> +               fp->aux->extable = (void *)fimage + FUNCTION_DESCR_SIZE + proglen + fixup_len;
>>
>>   skip_init_ctx:
>>          code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
>> +       fcode_base = (u32 *)(fimage + FUNCTION_DESCR_SIZE);
>>
>>          /* Code generation passes 1-2 */
>>          for (pass = 1; pass < 3; pass++) {
>> @@ -175,8 +183,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>>                  cgctx.idx = 0;
>>                  cgctx.alt_exit_addr = 0;
>>                  bpf_jit_build_prologue(code_base, &cgctx);
>> -               if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, pass, extra_pass)) {
>> -                       bpf_jit_binary_free(bpf_hdr);
>> +               if (bpf_jit_build_body(fp, code_base, fcode_base, &cgctx, addrs, pass, extra_pass)) {
>> +                       bpf_arch_text_copy(&fhdr->size, &hdr->size, sizeof(hdr->size));
>> +                       bpf_jit_binary_pack_free(fhdr, hdr);
>>                          fp = org_fp;
>>                          goto out_addrs;
>>                  }
>> @@ -192,21 +201,23 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>>                   * Note that we output the base address of the code_base
>>                   * rather than image, since opcodes are in code_base.
>>                   */
> Maybe update the comment above with fcode_base to avoid
> confusion.
> 
>> -               bpf_jit_dump(flen, proglen, pass, code_base);
>> +               bpf_jit_dump(flen, proglen, pass, fcode_base);
>>
>>   #ifdef CONFIG_PPC64_ELF_ABI_V1
>>          /* Function descriptor nastiness: Address + TOC */
>> -       ((u64 *)image)[0] = (u64)code_base;
>> +       ((u64 *)image)[0] = (u64)fcode_base;
>>          ((u64 *)image)[1] = local_paca->kernel_toc;
>>   #endif
>>
>> -       fp->bpf_func = (void *)image;
>> +       fp->bpf_func = (void *)fimage;
>>          fp->jited = 1;
>>          fp->jited_len = proglen + FUNCTION_DESCR_SIZE;
>>
>> -       bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + bpf_hdr->size);
>>          if (!fp->is_func || extra_pass) {
>> -               bpf_jit_binary_lock_ro(bpf_hdr);
>> +               if (bpf_jit_binary_pack_finalize(fp, fhdr, hdr)) {
>> +                       fp = org_fp;
>> +                       goto out_addrs;
>> +               }
>>                  bpf_prog_fill_jited_linfo(fp, addrs);
>>   out_addrs:
>>                  kfree(addrs);
>> @@ -216,8 +227,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>>                  jit_data->addrs = addrs;
>>                  jit_data->ctx = cgctx;
>>                  jit_data->proglen = proglen;
>> -               jit_data->image = image;
>> -               jit_data->header = bpf_hdr;
>> +               jit_data->fimage = fimage;
>> +               jit_data->fhdr = fhdr;
>> +               jit_data->hdr = hdr;
>>          }
>>
>>   out:
>> @@ -231,12 +243,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>>    * The caller should check for (BPF_MODE(code) == BPF_PROBE_MEM) before calling
>>    * this function, as this only applies to BPF_PROBE_MEM, for now.
>>    */
>> -int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx,
>> -                         int insn_idx, int jmp_off, int dst_reg)
>> +int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass,
>> +                         struct codegen_context *ctx, int insn_idx, int jmp_off,
>> +                         int dst_reg)
>>   {
>>          off_t offset;
>>          unsigned long pc;
>> -       struct exception_table_entry *ex;
>> +       struct exception_table_entry *ex, *ex_entry;
>>          u32 *fixup;
>>
>>          /* Populate extable entries only in the last pass */
>> @@ -247,9 +260,16 @@ int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct code
>>              WARN_ON_ONCE(ctx->exentry_idx >= fp->aux->num_exentries))
>>                  return -EINVAL;
>>
>> +       /*
>> +        * Program is firt written to image before copying to the
> s/firt/first/
> 
>> +        * final location (fimage). Accordingly, update in the image first.
>> +        * As all offsets used are relative, copying as is to the
>> +        * final location should be alright.
>> +        */
>>          pc = (unsigned long)&image[insn_idx];
>> +       ex = (void *)fp->aux->extable - (void *)fimage + (void *)image;
>>
>> -       fixup = (void *)fp->aux->extable -
>> +       fixup = (void *)ex -
>>                  (fp->aux->num_exentries * BPF_FIXUP_LEN * 4) +
>>                  (ctx->exentry_idx * BPF_FIXUP_LEN * 4);
>>
>> @@ -260,17 +280,17 @@ int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct code
>>          fixup[BPF_FIXUP_LEN - 1] =
>>                  PPC_RAW_BRANCH((long)(pc + jmp_off) - (long)&fixup[BPF_FIXUP_LEN - 1]);
>>
>> -       ex = &fp->aux->extable[ctx->exentry_idx];
>> +       ex_entry = &ex[ctx->exentry_idx];
>>
>> -       offset = pc - (long)&ex->insn;
>> +       offset = pc - (long)&ex_entry->insn;
>>          if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
>>                  return -ERANGE;
>> -       ex->insn = offset;
>> +       ex_entry->insn = offset;
>>
>> -       offset = (long)fixup - (long)&ex->fixup;
>> +       offset = (long)fixup - (long)&ex_entry->fixup;
>>          if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
>>                  return -ERANGE;
>> -       ex->fixup = offset;
>> +       ex_entry->fixup = offset;
>>
>>          ctx->exentry_idx++;
>>          return 0;
>> @@ -308,3 +328,27 @@ int bpf_arch_text_invalidate(void *dst, size_t len)
>>
>>          return ret;
>>   }
>> +
>> +void bpf_jit_free(struct bpf_prog *fp)
>> +{
>> +       if (fp->jited) {
>> +               struct powerpc_jit_data *jit_data = fp->aux->jit_data;
>> +               struct bpf_binary_header *hdr;
>> +
>> +               /*
>> +                * If we fail the final pass of JIT (from jit_subprogs),
>> +                * the program may not be finalized yet. Call finalize here
>> +                * before freeing it.
>> +                */
>> +               if (jit_data) {
>> +                       bpf_jit_binary_pack_finalize(fp, jit_data->fhdr, jit_data->hdr);
> 
> I just realized x86 is the same. But I think we only need the following
> here?
> 
> bpf_arch_text_copy(&jit_data->fhdr->size, &jit_data->hdr->size,
> sizeof(jit_data->hdr->size));
> 
> Right?

Thanks for reviewing.
Better off with bpf_jit_binary_pack_finalize, probably?
Kept it that way for v3. Posted v3. Please review.

Thanks
Hari

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2023-08-25 15:42 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-03-09 18:02 [PATCH v2 0/4] enable bpf_prog_pack allocator for powerpc Hari Bathini
2023-03-09 18:02 ` [PATCH v2 1/4] powerpc/code-patching: introduce patch_instructions() Hari Bathini
2023-03-10 18:26   ` Christophe Leroy
2023-03-11  7:42     ` Christophe Leroy
2023-03-11  8:02   ` Christophe Leroy
2023-03-09 18:02 ` [PATCH v2 2/4] powerpc/bpf: implement bpf_arch_text_copy Hari Bathini
2023-03-10 22:04   ` Song Liu
2023-03-09 18:02 ` [PATCH v2 3/4] powerpc/bpf: implement bpf_arch_text_invalidate for bpf_prog_pack Hari Bathini
2023-03-10 22:06   ` Song Liu
2023-03-09 18:02 ` [PATCH v2 4/4] powerpc/bpf: use bpf_jit_binary_pack_[alloc|finalize|free] Hari Bathini
2023-03-10 22:35   ` Song Liu
2023-08-25 15:40     ` Hari Bathini
2023-03-11 10:16   ` Christophe Leroy
2023-08-25 15:29     ` Hari Bathini
  -- strict thread matches above, loose matches on Subject: below --
2023-03-09 18:00 [PATCH v2 0/4] enable bpf_prog_pack allocator for powerpc Hari Bathini
2023-03-09 18:00 ` [PATCH v2 4/4] powerpc/bpf: use bpf_jit_binary_pack_[alloc|finalize|free] Hari Bathini

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).