* [PATCH v1 1/2] powerpc/code-patching: Introduce open_patch_window()/close_patch_window()
@ 2024-03-15 2:59 Benjamin Gray
2024-03-15 2:59 ` [PATCH v1 2/2] powerpc/code-patching: Convert to open_patch_window()/close_patch_window() Benjamin Gray
0 siblings, 1 reply; 6+ messages in thread
From: Benjamin Gray @ 2024-03-15 2:59 UTC (permalink / raw)
To: linuxppc-dev, mpe; +Cc: Benjamin Gray
The code patching capabilities have grown, and the specific quirks for
setting up and tearing down the patching window are getting duplicated.
This commit introduces an abstraction for working with this patching
window. It defines open_patch_window() to set up the writable alias
page, and close_patch_window() to remove it and flush the TLB. The
actual mechanism for providing this patching window is an implementation
detail that consumers don't need to worry about. Consumers are still
responsible for flushing/syncing any changes they make through this
alias page though.
Signed-off-by: Benjamin Gray <bgray@linux.ibm.com>
---
This design can be readily extended to remap the writable page to
another physical page without incurring all of the entry and exit
overhead. But that might have problems with spending too long in
an interrupts disabled context, so I've left it out for now.
---
arch/powerpc/lib/code-patching.c | 113 +++++++++++++++++++++++++++++++
1 file changed, 113 insertions(+)
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index ed450a32918c..d1b812f84154 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -276,6 +276,119 @@ static void unmap_patch_area(unsigned long addr)
flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
}
+/*
+ * Represents an active patching window.
+ */
+struct patch_window {
+ /*
+ * Page aligned patching window address. The page is a writable alias of
+ * the configured target page.
+ */
+ unsigned long text_poke_addr;
+ /*
+ * Pointer to the PTE for the patching window page.
+ */
+ pte_t *ptep;
+ /*
+ * (Temporary MM patching only) The original MM before creating the
+ * patching window.
+ */
+ struct mm_struct *orig_mm;
+ /*
+ * (Temporary MM patching only) The patching window MM.
+ */
+ struct mm_struct *patch_mm;
+ /*
+ * (Temporary MM patching only) Lock for the patching window PTE.
+ */
+ spinlock_t *ptl;
+};
+
+/*
+ * Calling this function opens a 'patching window' that maps a
+ * page starting at ctx->text_poke_addr (which is set by this function)
+ * as a writable alias to the page starting at addr.
+ *
+ * Upon success, callers must invoke the corresponding close_patch_window()
+ * function to return to the original state. Callers are also responsible
+ * for syncing any changes they make inside the window.
+ *
+ * Interrupts must be disabled for the entire duration of the patching. The PIDR
+ * is potentially changed during this time.
+ */
+static int open_patch_window(void *addr, struct patch_window *ctx)
+{
+ unsigned long pfn = get_patch_pfn(addr);
+
+ lockdep_assert_irqs_disabled();
+
+ ctx->text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr);
+
+ if (!mm_patch_enabled()) {
+ ctx->ptep = __this_cpu_read(cpu_patching_context.pte);
+ __set_pte_at(&init_mm, ctx->text_poke_addr,
+ ctx->ptep, pfn_pte(pfn, PAGE_KERNEL), 0);
+
+ /* See ptesync comment in radix__set_pte_at() */
+ if (radix_enabled())
+ asm volatile("ptesync" ::: "memory");
+
+ return 0;
+ }
+
+ ctx->orig_mm = current->active_mm;
+ ctx->patch_mm = __this_cpu_read(cpu_patching_context.mm);
+
+ ctx->ptep = get_locked_pte(ctx->patch_mm, ctx->text_poke_addr, &ctx->ptl);
+ if (!ctx->ptep)
+ return -ENOMEM;
+
+ __set_pte_at(ctx->patch_mm, ctx->text_poke_addr,
+ ctx->ptep, pfn_pte(pfn, PAGE_KERNEL), 0);
+
+ /* order PTE update before use, also serves as the hwsync */
+ asm volatile("ptesync" ::: "memory");
+
+ /*
+ * Changing mm requires context synchronising instructions on both sides of
+ * the context switch, as well as a hwsync between the last instruction for
+ * which the address of an associated storage access was translated using
+ * the current context. switch_mm_irqs_off() performs an isync after the
+ * context switch.
+ */
+ isync();
+ switch_mm_irqs_off(ctx->orig_mm, ctx->patch_mm, current);
+
+ WARN_ON(!mm_is_thread_local(ctx->patch_mm));
+
+ suspend_breakpoints();
+ return 0;
+}
+
+static void close_patch_window(struct patch_window *ctx)
+{
+ lockdep_assert_irqs_disabled();
+
+ if (!mm_patch_enabled()) {
+ pte_clear(&init_mm, ctx->text_poke_addr, ctx->ptep);
+ flush_tlb_kernel_range(ctx->text_poke_addr, ctx->text_poke_addr + PAGE_SIZE);
+ return;
+ }
+
+ isync();
+ switch_mm_irqs_off(ctx->patch_mm, ctx->orig_mm, current);
+ restore_breakpoints();
+
+ pte_clear(ctx->patch_mm, ctx->text_poke_addr, ctx->ptep);
+ /*
+ * ptesync to order PTE update before TLB invalidation done
+ * by radix__local_flush_tlb_page_psize (in _tlbiel_va)
+ */
+ local_flush_tlb_page_psize(ctx->patch_mm, ctx->text_poke_addr, mmu_virtual_psize);
+
+ pte_unmap_unlock(ctx->ptep, ctx->ptl);
+}
+
static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
{
int err;
--
2.44.0
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v1 2/2] powerpc/code-patching: Convert to open_patch_window()/close_patch_window()
2024-03-15 2:59 [PATCH v1 1/2] powerpc/code-patching: Introduce open_patch_window()/close_patch_window() Benjamin Gray
@ 2024-03-15 2:59 ` Benjamin Gray
2024-03-15 8:38 ` Christophe Leroy
0 siblings, 1 reply; 6+ messages in thread
From: Benjamin Gray @ 2024-03-15 2:59 UTC (permalink / raw)
To: linuxppc-dev, mpe; +Cc: Benjamin Gray
The existing patching alias page setup and teardown sections can be
simplified to make use of the new open_patch_window() abstraction.
This eliminates the _mm variants of the helpers, consumers no longer
need to check mm_patch_enabled(), and consumers no longer need to worry
about synchronization and flushing beyond the changes they make in the
patching window.
Signed-off-by: Benjamin Gray <bgray@linux.ibm.com>
---
arch/powerpc/lib/code-patching.c | 180 +++----------------------------
1 file changed, 16 insertions(+), 164 deletions(-)
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index d1b812f84154..fd6f8576033a 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -66,40 +66,6 @@ static bool mm_patch_enabled(void)
return IS_ENABLED(CONFIG_SMP) && radix_enabled();
}
-/*
- * The following applies for Radix MMU. Hash MMU has different requirements,
- * and so is not supported.
- *
- * Changing mm requires context synchronising instructions on both sides of
- * the context switch, as well as a hwsync between the last instruction for
- * which the address of an associated storage access was translated using
- * the current context.
- *
- * switch_mm_irqs_off() performs an isync after the context switch. It is
- * the responsibility of the caller to perform the CSI and hwsync before
- * starting/stopping the temp mm.
- */
-static struct mm_struct *start_using_temp_mm(struct mm_struct *temp_mm)
-{
- struct mm_struct *orig_mm = current->active_mm;
-
- lockdep_assert_irqs_disabled();
- switch_mm_irqs_off(orig_mm, temp_mm, current);
-
- WARN_ON(!mm_is_thread_local(temp_mm));
-
- suspend_breakpoints();
- return orig_mm;
-}
-
-static void stop_using_temp_mm(struct mm_struct *temp_mm,
- struct mm_struct *orig_mm)
-{
- lockdep_assert_irqs_disabled();
- switch_mm_irqs_off(temp_mm, orig_mm, current);
- restore_breakpoints();
-}
-
static int text_area_cpu_up(unsigned int cpu)
{
struct vm_struct *area;
@@ -389,73 +355,20 @@ static void close_patch_window(struct patch_window *ctx)
pte_unmap_unlock(ctx->ptep, ctx->ptl);
}
-static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
-{
- int err;
- u32 *patch_addr;
- unsigned long text_poke_addr;
- pte_t *pte;
- unsigned long pfn = get_patch_pfn(addr);
- struct mm_struct *patching_mm;
- struct mm_struct *orig_mm;
- spinlock_t *ptl;
-
- patching_mm = __this_cpu_read(cpu_patching_context.mm);
- text_poke_addr = __this_cpu_read(cpu_patching_context.addr);
- patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
-
- pte = get_locked_pte(patching_mm, text_poke_addr, &ptl);
- if (!pte)
- return -ENOMEM;
-
- __set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
-
- /* order PTE update before use, also serves as the hwsync */
- asm volatile("ptesync": : :"memory");
-
- /* order context switch after arbitrary prior code */
- isync();
-
- orig_mm = start_using_temp_mm(patching_mm);
-
- err = __patch_instruction(addr, instr, patch_addr);
-
- /* context synchronisation performed by __patch_instruction (isync or exception) */
- stop_using_temp_mm(patching_mm, orig_mm);
-
- pte_clear(patching_mm, text_poke_addr, pte);
- /*
- * ptesync to order PTE update before TLB invalidation done
- * by radix__local_flush_tlb_page_psize (in _tlbiel_va)
- */
- local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize);
-
- pte_unmap_unlock(pte, ptl);
-
- return err;
-}
-
static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
{
- int err;
+ struct patch_window ctx;
u32 *patch_addr;
- unsigned long text_poke_addr;
- pte_t *pte;
- unsigned long pfn = get_patch_pfn(addr);
-
- text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK;
- patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
+ int err;
- pte = __this_cpu_read(cpu_patching_context.pte);
- __set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
- /* See ptesync comment in radix__set_pte_at() */
- if (radix_enabled())
- asm volatile("ptesync": : :"memory");
+ err = open_patch_window(addr, &ctx);
+ if (err)
+ return err;
+ patch_addr = (u32 *)(ctx.text_poke_addr + offset_in_page(addr));
err = __patch_instruction(addr, instr, patch_addr);
- pte_clear(&init_mm, text_poke_addr, pte);
- flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
+ close_patch_window(&ctx);
return err;
}
@@ -475,10 +388,7 @@ int patch_instruction(u32 *addr, ppc_inst_t instr)
return raw_patch_instruction(addr, instr);
local_irq_save(flags);
- if (mm_patch_enabled())
- err = __do_patch_instruction_mm(addr, instr);
- else
- err = __do_patch_instruction(addr, instr);
+ err = __do_patch_instruction(addr, instr);
local_irq_restore(flags);
return err;
@@ -545,80 +455,25 @@ static int __patch_instructions(u32 *patch_addr, u32 *code, size_t len, bool rep
return err;
}
-/*
- * A page is mapped and instructions that fit the page are patched.
- * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below.
- */
-static int __do_patch_instructions_mm(u32 *addr, u32 *code, size_t len, bool repeat_instr)
-{
- struct mm_struct *patching_mm, *orig_mm;
- unsigned long pfn = get_patch_pfn(addr);
- unsigned long text_poke_addr;
- spinlock_t *ptl;
- u32 *patch_addr;
- pte_t *pte;
- int err;
-
- patching_mm = __this_cpu_read(cpu_patching_context.mm);
- text_poke_addr = __this_cpu_read(cpu_patching_context.addr);
- patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
-
- pte = get_locked_pte(patching_mm, text_poke_addr, &ptl);
- if (!pte)
- return -ENOMEM;
-
- __set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
-
- /* order PTE update before use, also serves as the hwsync */
- asm volatile("ptesync" ::: "memory");
-
- /* order context switch after arbitrary prior code */
- isync();
-
- orig_mm = start_using_temp_mm(patching_mm);
-
- err = __patch_instructions(patch_addr, code, len, repeat_instr);
-
- /* context synchronisation performed by __patch_instructions */
- stop_using_temp_mm(patching_mm, orig_mm);
-
- pte_clear(patching_mm, text_poke_addr, pte);
- /*
- * ptesync to order PTE update before TLB invalidation done
- * by radix__local_flush_tlb_page_psize (in _tlbiel_va)
- */
- local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize);
-
- pte_unmap_unlock(pte, ptl);
-
- return err;
-}
-
/*
* A page is mapped and instructions that fit the page are patched.
* Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below.
*/
static int __do_patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr)
{
- unsigned long pfn = get_patch_pfn(addr);
- unsigned long text_poke_addr;
+ struct patch_window ctx;
u32 *patch_addr;
- pte_t *pte;
int err;
- text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK;
- patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
-
- pte = __this_cpu_read(cpu_patching_context.pte);
- __set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
- /* See ptesync comment in radix__set_pte_at() */
- if (radix_enabled())
- asm volatile("ptesync" ::: "memory");
+ err = open_patch_window(addr, &ctx);
+ if (err)
+ return err;
+ patch_addr = (u32 *)(ctx.text_poke_addr + offset_in_page(addr));
err = __patch_instructions(patch_addr, code, len, repeat_instr);
- pte_clear(&init_mm, text_poke_addr, pte);
- flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
+ /* context synchronisation performed by __patch_instructions */
+ close_patch_window(&ctx);
return err;
}
@@ -639,10 +494,7 @@ int patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr)
plen = min_t(size_t, PAGE_SIZE - offset_in_page(addr), len);
local_irq_save(flags);
- if (mm_patch_enabled())
- err = __do_patch_instructions_mm(addr, code, plen, repeat_instr);
- else
- err = __do_patch_instructions(addr, code, plen, repeat_instr);
+ err = __do_patch_instructions(addr, code, plen, repeat_instr);
local_irq_restore(flags);
if (err)
return err;
--
2.44.0
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH v1 2/2] powerpc/code-patching: Convert to open_patch_window()/close_patch_window()
2024-03-15 2:59 ` [PATCH v1 2/2] powerpc/code-patching: Convert to open_patch_window()/close_patch_window() Benjamin Gray
@ 2024-03-15 8:38 ` Christophe Leroy
2024-03-16 9:50 ` Christophe Leroy
2024-03-16 10:10 ` Christophe Leroy
0 siblings, 2 replies; 6+ messages in thread
From: Christophe Leroy @ 2024-03-15 8:38 UTC (permalink / raw)
To: Benjamin Gray, linuxppc-dev@lists.ozlabs.org, mpe@ellerman.id.au
Le 15/03/2024 à 03:59, Benjamin Gray a écrit :
> The existing patching alias page setup and teardown sections can be
> simplified to make use of the new open_patch_window() abstraction.
>
> This eliminates the _mm variants of the helpers, consumers no longer
> need to check mm_patch_enabled(), and consumers no longer need to worry
> about synchronization and flushing beyond the changes they make in the
> patching window.
With this patch, the time needed to activate or de-activate function
tracer is approx 10% longer on powerpc 8xx.
Christophe
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v1 2/2] powerpc/code-patching: Convert to open_patch_window()/close_patch_window()
2024-03-15 8:38 ` Christophe Leroy
@ 2024-03-16 9:50 ` Christophe Leroy
2024-03-16 10:10 ` Christophe Leroy
1 sibling, 0 replies; 6+ messages in thread
From: Christophe Leroy @ 2024-03-16 9:50 UTC (permalink / raw)
To: Benjamin Gray, linuxppc-dev@lists.ozlabs.org, mpe@ellerman.id.au
Le 15/03/2024 à 09:38, Christophe Leroy a écrit :
>
>
> Le 15/03/2024 à 03:59, Benjamin Gray a écrit :
>> The existing patching alias page setup and teardown sections can be
>> simplified to make use of the new open_patch_window() abstraction.
>>
>> This eliminates the _mm variants of the helpers, consumers no longer
>> need to check mm_patch_enabled(), and consumers no longer need to worry
>> about synchronization and flushing beyond the changes they make in the
>> patching window.
>
> With this patch, the time needed to activate or de-activate function
> tracer is approx 10% longer on powerpc 8xx.
See below difference of patch_instruction() before and after your patch,
both for 4k pages and 16k pages:
16k pages, before your patch:
00000278 <patch_instruction>:
278: 48 00 00 84 nop
27c: 7c e0 00 a6 mfmsr r7
280: 7c 51 13 a6 mtspr 81,r2
284: 3d 00 00 00 lis r8,0
286: R_PPC_ADDR16_HA .data
288: 39 08 00 00 addi r8,r8,0
28a: R_PPC_ADDR16_LO .data
28c: 7c 69 1b 78 mr r9,r3
290: 3d 29 40 00 addis r9,r9,16384
294: 81 48 00 08 lwz r10,8(r8)
298: 55 29 00 22 clrrwi r9,r9,14
29c: 81 08 00 04 lwz r8,4(r8)
2a0: 61 29 01 2d ori r9,r9,301
2a4: 55 06 00 22 clrrwi r6,r8,14
2a8: 91 2a 00 00 stw r9,0(r10)
2ac: 91 2a 00 04 stw r9,4(r10)
2b0: 91 2a 00 08 stw r9,8(r10)
2b4: 91 2a 00 0c stw r9,12(r10)
2b8: 50 68 04 be rlwimi r8,r3,0,18,31
2bc: 90 88 00 00 stw r4,0(r8)
2c0: 7c 00 40 6c dcbst 0,r8
2c4: 7c 00 04 ac hwsync
2c8: 7c 00 1f ac icbi 0,r3
2cc: 7c 00 04 ac hwsync
2d0: 4c 00 01 2c isync
2d4: 38 60 00 00 li r3,0
2d8: 39 20 00 00 li r9,0
2dc: 91 2a 00 00 stw r9,0(r10)
2e0: 91 2a 00 04 stw r9,4(r10)
2e4: 91 2a 00 08 stw r9,8(r10)
2e8: 91 2a 00 0c stw r9,12(r10)
2ec: 7c 00 32 64 tlbie r6,r0
2f0: 7c 00 04 ac hwsync
2f4: 7c e0 01 24 mtmsr r7
2f8: 4e 80 00 20 blr
16k pages, after your patch. Now we have a stack frame for the call to
close_patch_window(). And the branch in close_patch_window() is
unexpected as patch_instruction() works on single pages.
0000024c <close_patch_window>:
24c: 81 23 00 04 lwz r9,4(r3)
250: 39 40 00 00 li r10,0
254: 91 49 00 00 stw r10,0(r9)
258: 91 49 00 04 stw r10,4(r9)
25c: 91 49 00 08 stw r10,8(r9)
260: 91 49 00 0c stw r10,12(r9)
264: 81 23 00 00 lwz r9,0(r3)
268: 55 2a 00 22 clrrwi r10,r9,14
26c: 39 29 40 00 addi r9,r9,16384
270: 7d 2a 48 50 subf r9,r10,r9
274: 28 09 40 00 cmplwi r9,16384
278: 41 81 00 10 bgt 288 <close_patch_window+0x3c>
27c: 7c 00 52 64 tlbie r10,r0
280: 7c 00 04 ac hwsync
284: 4e 80 00 20 blr
288: 7c 00 04 ac hwsync
28c: 7c 00 02 e4 tlbia
290: 4c 00 01 2c isync
294: 4e 80 00 20 blr
000002c4 <patch_instruction>:
2c4: 94 21 ff d0 stwu r1,-48(r1)
2c8: 93 c1 00 28 stw r30,40(r1)
2cc: 48 00 00 ac nop
2d0: 7c 08 02 a6 mflr r0
2d4: 90 01 00 34 stw r0,52(r1)
2d8: 93 e1 00 2c stw r31,44(r1)
2dc: 7f e0 00 a6 mfmsr r31
2e0: 7c 51 13 a6 mtspr 81,r2
2e4: 3d 40 00 00 lis r10,0
2e6: R_PPC_ADDR16_HA .data
2e8: 39 4a 00 00 addi r10,r10,0
2ea: R_PPC_ADDR16_LO .data
2ec: 7c 69 1b 78 mr r9,r3
2f0: 3d 29 40 00 addis r9,r9,16384
2f4: 81 0a 00 08 lwz r8,8(r10)
2f8: 80 ca 00 04 lwz r6,4(r10)
2fc: 55 29 00 22 clrrwi r9,r9,14
300: 61 29 01 2d ori r9,r9,301
304: 38 e0 00 00 li r7,0
308: 54 6a 04 be clrlwi r10,r3,18
30c: 91 28 00 00 stw r9,0(r8)
310: 91 28 00 04 stw r9,4(r8)
314: 91 28 00 08 stw r9,8(r8)
318: 91 28 00 0c stw r9,12(r8)
31c: 91 01 00 0c stw r8,12(r1)
320: 90 c1 00 08 stw r6,8(r1)
324: 7d 4a 32 14 add r10,r10,r6
328: 90 e1 00 10 stw r7,16(r1)
32c: 90 e1 00 14 stw r7,20(r1)
330: 90 e1 00 18 stw r7,24(r1)
334: 90 8a 00 00 stw r4,0(r10)
338: 7c 00 50 6c dcbst 0,r10
33c: 7c 00 04 ac hwsync
340: 7c 00 1f ac icbi 0,r3
344: 7c 00 04 ac hwsync
348: 4c 00 01 2c isync
34c: 3b c0 00 00 li r30,0
350: 38 61 00 08 addi r3,r1,8
354: 4b ff fe f9 bl 24c <close_patch_window>
358: 7f e0 01 24 mtmsr r31
35c: 80 01 00 34 lwz r0,52(r1)
360: 83 e1 00 2c lwz r31,44(r1)
364: 7c 08 03 a6 mtlr r0
368: 7f c3 f3 78 mr r3,r30
36c: 83 c1 00 28 lwz r30,40(r1)
370: 38 21 00 30 addi r1,r1,48
374: 4e 80 00 20 blr
4k pages before your patch:
00000268 <patch_instruction>:
268: 48 00 00 6c nop
26c: 7c e0 00 a6 mfmsr r7
270: 7c 51 13 a6 mtspr 81,r2
274: 3d 40 00 00 lis r10,0
276: R_PPC_ADDR16_HA .data
278: 39 4a 00 00 addi r10,r10,0
27a: R_PPC_ADDR16_LO .data
27c: 7c 69 1b 78 mr r9,r3
280: 3d 29 40 00 addis r9,r9,16384
284: 81 0a 00 08 lwz r8,8(r10)
288: 55 29 00 26 clrrwi r9,r9,12
28c: 81 4a 00 04 lwz r10,4(r10)
290: 61 29 01 25 ori r9,r9,293
294: 91 28 00 00 stw r9,0(r8)
298: 55 49 00 26 clrrwi r9,r10,12
29c: 50 6a 05 3e rlwimi r10,r3,0,20,31
2a0: 90 8a 00 00 stw r4,0(r10)
2a4: 7c 00 50 6c dcbst 0,r10
2a8: 7c 00 04 ac hwsync
2ac: 7c 00 1f ac icbi 0,r3
2b0: 7c 00 04 ac hwsync
2b4: 4c 00 01 2c isync
2b8: 38 60 00 00 li r3,0
2bc: 39 40 00 00 li r10,0
2c0: 91 48 00 00 stw r10,0(r8)
2c4: 7c 00 4a 64 tlbie r9,r0
2c8: 7c 00 04 ac hwsync
2cc: 7c e0 01 24 mtmsr r7
2d0: 4e 80 00 20 blr
4k pages after your patch. Here close_patch_window() is inlined but the
test at the end is crazy, we've been patching one instruction with the
assumption it is properly aligned, so why is there a branch with a tlbia
whereas only one page needs to be invalidated ?:
00000268 <patch_instruction>:
268: 48 00 00 84 nop
26c: 7c c0 00 a6 mfmsr r6
270: 7c 51 13 a6 mtspr 81,r2
274: 3d 40 00 00 lis r10,0
276: R_PPC_ADDR16_HA .data
278: 39 4a 00 00 addi r10,r10,0
27a: R_PPC_ADDR16_LO .data
27c: 7c 69 1b 78 mr r9,r3
280: 3d 29 40 00 addis r9,r9,16384
284: 80 ea 00 08 lwz r7,8(r10)
288: 55 29 00 26 clrrwi r9,r9,12
28c: 81 4a 00 04 lwz r10,4(r10)
290: 61 29 01 25 ori r9,r9,293
294: 54 68 05 3e clrlwi r8,r3,20
298: 91 27 00 00 stw r9,0(r7)
29c: 7d 08 52 14 add r8,r8,r10
2a0: 90 88 00 00 stw r4,0(r8)
2a4: 7c 00 40 6c dcbst 0,r8
2a8: 7c 00 04 ac hwsync
2ac: 7c 00 1f ac icbi 0,r3
2b0: 7c 00 04 ac hwsync
2b4: 4c 00 01 2c isync
2b8: 38 60 00 00 li r3,0
2bc: 55 49 00 26 clrrwi r9,r10,12
2c0: 39 4a 10 00 addi r10,r10,4096
2c4: 7d 49 50 50 subf r10,r9,r10
2c8: 28 0a 10 00 cmplwi r10,4096
2cc: 39 40 00 00 li r10,0
2d0: 91 47 00 00 stw r10,0(r7)
2d4: 40 81 00 38 ble 30c <patch_instruction+0xa4>
2d8: 7c 00 04 ac hwsync
2dc: 7c 00 02 e4 tlbia
2e0: 4c 00 01 2c isync
2e4: 7c c0 01 24 mtmsr r6
2e8: 4e 80 00 20 blr
...
30c: 7c 00 4a 64 tlbie r9,r0
310: 7c 00 04 ac hwsync
314: 7c c0 01 24 mtmsr r6
318: 4e 80 00 20 blr
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v1 2/2] powerpc/code-patching: Convert to open_patch_window()/close_patch_window()
2024-03-15 8:38 ` Christophe Leroy
2024-03-16 9:50 ` Christophe Leroy
@ 2024-03-16 10:10 ` Christophe Leroy
2024-03-17 21:34 ` Benjamin Gray
1 sibling, 1 reply; 6+ messages in thread
From: Christophe Leroy @ 2024-03-16 10:10 UTC (permalink / raw)
To: Benjamin Gray, linuxppc-dev@lists.ozlabs.org, mpe@ellerman.id.au
Le 15/03/2024 à 09:38, Christophe Leroy a écrit :
>
>
> Le 15/03/2024 à 03:59, Benjamin Gray a écrit :
>> The existing patching alias page setup and teardown sections can be
>> simplified to make use of the new open_patch_window() abstraction.
>>
>> This eliminates the _mm variants of the helpers, consumers no longer
>> need to check mm_patch_enabled(), and consumers no longer need to worry
>> about synchronization and flushing beyond the changes they make in the
>> patching window.
>
> With this patch, the time needed to activate or de-activate function
> tracer is approx 10% longer on powerpc 8xx.
With the following changes, the performance is restored:
diff --git a/arch/powerpc/lib/code-patching.c
b/arch/powerpc/lib/code-patching.c
index fd6f8576033a..bc92b85913d8 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -282,13 +282,13 @@ struct patch_window {
* Interrupts must be disabled for the entire duration of the
patching. The PIDR
* is potentially changed during this time.
*/
-static int open_patch_window(void *addr, struct patch_window *ctx)
+static __always_inline int open_patch_window(void *addr, struct
patch_window *ctx)
{
unsigned long pfn = get_patch_pfn(addr);
lockdep_assert_irqs_disabled();
- ctx->text_poke_addr = (unsigned
long)__this_cpu_read(cpu_patching_context.addr);
+ ctx->text_poke_addr = (unsigned
long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK;
if (!mm_patch_enabled()) {
ctx->ptep = __this_cpu_read(cpu_patching_context.pte);
@@ -331,7 +331,7 @@ static int open_patch_window(void *addr, struct
patch_window *ctx)
return 0;
}
-static void close_patch_window(struct patch_window *ctx)
+static __always_inline void close_patch_window(struct patch_window *ctx)
{
lockdep_assert_irqs_disabled();
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH v1 2/2] powerpc/code-patching: Convert to open_patch_window()/close_patch_window()
2024-03-16 10:10 ` Christophe Leroy
@ 2024-03-17 21:34 ` Benjamin Gray
0 siblings, 0 replies; 6+ messages in thread
From: Benjamin Gray @ 2024-03-17 21:34 UTC (permalink / raw)
To: Christophe Leroy, linuxppc-dev@lists.ozlabs.org,
mpe@ellerman.id.au
On Sat, 2024-03-16 at 10:10 +0000, Christophe Leroy wrote:
>
>
> Le 15/03/2024 à 09:38, Christophe Leroy a écrit :
> >
> >
> > Le 15/03/2024 à 03:59, Benjamin Gray a écrit :
> > > The existing patching alias page setup and teardown sections can
> > > be
> > > simplified to make use of the new open_patch_window()
> > > abstraction.
> > >
> > > This eliminates the _mm variants of the helpers, consumers no
> > > longer
> > > need to check mm_patch_enabled(), and consumers no longer need to
> > > worry
> > > about synchronization and flushing beyond the changes they make
> > > in the
> > > patching window.
> >
> > With this patch, the time needed to activate or de-activate
> > function
> > tracer is approx 10% longer on powerpc 8xx.
>
> With the following changes, the performance is restored:
>
> diff --git a/arch/powerpc/lib/code-patching.c
> b/arch/powerpc/lib/code-patching.c
> index fd6f8576033a..bc92b85913d8 100644
> --- a/arch/powerpc/lib/code-patching.c
> +++ b/arch/powerpc/lib/code-patching.c
> @@ -282,13 +282,13 @@ struct patch_window {
> * Interrupts must be disabled for the entire duration of the
> patching. The PIDR
> * is potentially changed during this time.
> */
> -static int open_patch_window(void *addr, struct patch_window *ctx)
> +static __always_inline int open_patch_window(void *addr, struct
> patch_window *ctx)
> {
> unsigned long pfn = get_patch_pfn(addr);
>
> lockdep_assert_irqs_disabled();
>
> - ctx->text_poke_addr = (unsigned
> long)__this_cpu_read(cpu_patching_context.addr);
> + ctx->text_poke_addr = (unsigned
> long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK;
>
> if (!mm_patch_enabled()) {
> ctx->ptep =
> __this_cpu_read(cpu_patching_context.pte);
> @@ -331,7 +331,7 @@ static int open_patch_window(void *addr, struct
> patch_window *ctx)
> return 0;
> }
>
> -static void close_patch_window(struct patch_window *ctx)
> +static __always_inline void close_patch_window(struct patch_window
> *ctx)
> {
> lockdep_assert_irqs_disabled();
>
>
Thanks for checking that. I did restore the page mask optimisation in a
later patch while still developing, but the 64-bit assembly looked
slightly worse for it. I didn't check the 32-bit; no way to benchmark
it anyway.
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2024-03-17 21:35 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-03-15 2:59 [PATCH v1 1/2] powerpc/code-patching: Introduce open_patch_window()/close_patch_window() Benjamin Gray
2024-03-15 2:59 ` [PATCH v1 2/2] powerpc/code-patching: Convert to open_patch_window()/close_patch_window() Benjamin Gray
2024-03-15 8:38 ` Christophe Leroy
2024-03-16 9:50 ` Christophe Leroy
2024-03-16 10:10 ` Christophe Leroy
2024-03-17 21:34 ` Benjamin Gray
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).