LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH] powerpc/code-patching: Return error on patch_branch() out-of-range failure
From: Naveen N. Rao @ 2021-09-22 12:06 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Christophe Leroy, Michael Ellerman,
	Paul Mackerras
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <4940b03de220d1dfe2c6b47a41e60925497ce125.1630657331.git.christophe.leroy@csgroup.eu>

Christophe Leroy wrote:
> Do not silentely ignore a failure of create_branch() in
> patch_branch(). Return -ERANGE.
> 
> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> ---
>  arch/powerpc/lib/code-patching.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)

Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>


> 
> diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
> index f9a3019e37b4..0bc9cc0416b8 100644
> --- a/arch/powerpc/lib/code-patching.c
> +++ b/arch/powerpc/lib/code-patching.c
> @@ -202,7 +202,9 @@ int patch_branch(u32 *addr, unsigned long target, int flags)
>  {
>  	struct ppc_inst instr;
>  
> -	create_branch(&instr, addr, target, flags);
> +	if (create_branch(&instr, addr, target, flags))
> +		return -ERANGE;
> +
>  	return patch_instruction(addr, instr);
>  }
>  
> -- 
> 2.25.0
> 
> 

^ permalink raw reply

* [RESEND PATCH 2/2] powerpc/powermac: constify device_node in of_irq_parse_oldworld()
From: Krzysztof Kozlowski @ 2021-09-22  8:44 UTC (permalink / raw)
  To: Michael Ellerman, Benjamin Herrenschmidt, Paul Mackerras,
	Rob Herring, Frank Rowand, Krzysztof Kozlowski, linuxppc-dev,
	linux-kernel, devicetree
In-Reply-To: <20210922084415.18269-1-krzysztof.kozlowski@canonical.com>

The of_irq_parse_oldworld() does not modify passed device_node so make
it a pointer to const for safety.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
---
 arch/powerpc/platforms/powermac/pic.c | 2 +-
 include/linux/of_irq.h                | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index 4921bccf0376..af5ca1f41bb1 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -384,7 +384,7 @@ static void __init pmac_pic_probe_oldstyle(void)
 #endif
 }
 
-int of_irq_parse_oldworld(struct device_node *device, int index,
+int of_irq_parse_oldworld(const struct device_node *device, int index,
 			struct of_phandle_args *out_irq)
 {
 	const u32 *ints = NULL;
diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index aaf219bd0354..6074fdf51f0c 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -20,12 +20,12 @@ typedef int (*of_irq_init_cb_t)(struct device_node *, struct device_node *);
 #if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)
 extern unsigned int of_irq_workarounds;
 extern struct device_node *of_irq_dflt_pic;
-extern int of_irq_parse_oldworld(struct device_node *device, int index,
+extern int of_irq_parse_oldworld(const struct device_node *device, int index,
 			       struct of_phandle_args *out_irq);
 #else /* CONFIG_PPC32 && CONFIG_PPC_PMAC */
 #define of_irq_workarounds (0)
 #define of_irq_dflt_pic (NULL)
-static inline int of_irq_parse_oldworld(struct device_node *device, int index,
+static inline int of_irq_parse_oldworld(const struct device_node *device, int index,
 				      struct of_phandle_args *out_irq)
 {
 	return -EINVAL;
-- 
2.30.2


^ permalink raw reply related

* [RESEND PATCH 1/2] powerpc/powermac: add missing g5_phy_disable_cpu1() declaration
From: Krzysztof Kozlowski @ 2021-09-22  8:44 UTC (permalink / raw)
  To: Michael Ellerman, Benjamin Herrenschmidt, Paul Mackerras,
	Rob Herring, Frank Rowand, Krzysztof Kozlowski, linuxppc-dev,
	linux-kernel, devicetree

g5_phy_disable_cpu1() is used outside of platforms/powermac/feature.c,
so it should have a declaration to fix W=1 warning:

  arch/powerpc/platforms/powermac/feature.c:1533:6:
    error: no previous prototype for ‘g5_phy_disable_cpu1’ [-Werror=missing-prototypes]

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
---
 arch/powerpc/include/asm/pmac_feature.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/include/asm/pmac_feature.h b/arch/powerpc/include/asm/pmac_feature.h
index e08e829261b6..7703e5bf1203 100644
--- a/arch/powerpc/include/asm/pmac_feature.h
+++ b/arch/powerpc/include/asm/pmac_feature.h
@@ -143,6 +143,10 @@
  */
 struct device_node;
 
+#ifdef CONFIG_PPC64
+void g5_phy_disable_cpu1(void);
+#endif /* CONFIG_PPC64 */
+
 static inline long pmac_call_feature(int selector, struct device_node* node,
 					long param, long value)
 {
-- 
2.30.2


^ permalink raw reply related

* [PATCH] powerpc/breakpoint: Cleanup
From: Christophe Leroy @ 2021-09-22 13:37 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel

cache_op_size() does exactly the same as l1_dcache_bytes().

Remove it.

MSR_64BIT already exists, no need to enclode the check
around #ifdef __powerpc64__

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/kernel/hw_breakpoint_constraints.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kernel/hw_breakpoint_constraints.c b/arch/powerpc/kernel/hw_breakpoint_constraints.c
index 675d1f66ab72..42b967e3d85c 100644
--- a/arch/powerpc/kernel/hw_breakpoint_constraints.c
+++ b/arch/powerpc/kernel/hw_breakpoint_constraints.c
@@ -127,15 +127,6 @@ bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr,
 	return false;
 }
 
-static int cache_op_size(void)
-{
-#ifdef __powerpc64__
-	return ppc64_caches.l1d.block_size;
-#else
-	return L1_CACHE_BYTES;
-#endif
-}
-
 void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr,
 			 int *type, int *size, unsigned long *ea)
 {
@@ -147,14 +138,14 @@ void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr,
 	analyse_instr(&op, regs, *instr);
 	*type = GETTYPE(op.type);
 	*ea = op.ea;
-#ifdef __powerpc64__
+
 	if (!(regs->msr & MSR_64BIT))
 		*ea &= 0xffffffffUL;
-#endif
+
 
 	*size = GETSIZE(op.type);
 	if (*type == CACHEOP) {
-		*size = cache_op_size();
+		*size = l1_dcache_bytes();
 		*ea &= ~(*size - 1);
 	} else if (*type == LOAD_VMX || *type == STORE_VMX) {
 		*ea &= ~(*size - 1);
-- 
2.31.1


^ permalink raw reply related

* Re: [PATCH v3 5/8] x86/sme: Replace occurrences of sme_active() with cc_platform_has()
From: Tom Lendacky @ 2021-09-22 13:40 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Sathyanarayanan Kuppuswamy, linux-efi, Brijesh Singh, kvm,
	Peter Zijlstra, Dave Hansen, dri-devel, platform-driver-x86,
	Will Deacon, linux-s390, Andi Kleen, Joerg Roedel, x86, amd-gfx,
	Christoph Hellwig, Ingo Molnar, linux-graphics-maintainer,
	Tianyu Lan, Borislav Petkov, Andy Lutomirski, Thomas Gleixner,
	kexec, linux-kernel, iommu, linux-fsdevel, linuxppc-dev
In-Reply-To: <20210921215830.vqxd75r4eyau6cxy@box.shutemov.name>

On 9/21/21 4:58 PM, Kirill A. Shutemov wrote:
> On Tue, Sep 21, 2021 at 04:43:59PM -0500, Tom Lendacky wrote:
>> On 9/21/21 4:34 PM, Kirill A. Shutemov wrote:
>>> On Tue, Sep 21, 2021 at 11:27:17PM +0200, Borislav Petkov wrote:
>>>> On Wed, Sep 22, 2021 at 12:20:59AM +0300, Kirill A. Shutemov wrote:
>>>>> I still believe calling cc_platform_has() from __startup_64() is totally
>>>>> broken as it lacks proper wrapping while accessing global variables.
>>>>
>>>> Well, one of the issues on the AMD side was using boot_cpu_data too
>>>> early and the Intel side uses it too. Can you replace those checks with
>>>> is_tdx_guest() or whatever was the helper's name which would check
>>>> whether the the kernel is running as a TDX guest, and see if that helps?
>>>
>>> There's no need in Intel check this early. Only AMD need it. Maybe just
>>> opencode them?
>>
>> Any way you can put a gzipped/bzipped copy of your vmlinux file somewhere I
>> can grab it from and take a look at it?
> 
> You can find broken vmlinux and bzImage here:
> 
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fdrive.google.com%2Fdrive%2Ffolders%2F1n74vUQHOGebnF70Im32qLFY8iS3wvjIs%3Fusp%3Dsharing&amp;data=04%7C01%7Cthomas.lendacky%40amd.com%7C1c7adf380cbe4c1a6bb708d97d4af6ff%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637678583935705530%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&amp;sdata=gA30x%2Bfu97tUx0p2UqI8HgjiL8bxDbK1GqgJBbUrUE4%3D&amp;reserved=0
> 
> Let me know when I can remove it.

Looking at everything, it is all RIP relative addressing, so those
accesses should be fine. Your image has the intel_cc_platform_has()
function, does it work if you remove that call? Because I think it may be
the early call into that function which looks like it has instrumentation
that uses %gs in __sanitizer_cov_trace_pc and %gs is not setup properly
yet. And since boot_cpu_data.x86_vendor will likely be zero this early it
will match X86_VENDOR_INTEL and call into that function.

ffffffff8124f880 <intel_cc_platform_has>:
ffffffff8124f880:       e8 bb 64 06 00          callq  ffffffff812b5d40 <__fentry__>
ffffffff8124f885:       e8 36 ca 42 00          callq  ffffffff8167c2c0 <__sanitizer_cov_trace_pc>
ffffffff8124f88a:       31 c0                   xor    %eax,%eax
ffffffff8124f88c:       c3                      retq


ffffffff8167c2c0 <__sanitizer_cov_trace_pc>:
ffffffff8167c2c0:       65 8b 05 39 ad 9a 7e    mov    %gs:0x7e9aad39(%rip),%eax        # 27000 <__preempt_count>
ffffffff8167c2c7:       89 c6                   mov    %eax,%esi
ffffffff8167c2c9:       48 8b 0c 24             mov    (%rsp),%rcx
ffffffff8167c2cd:       81 e6 00 01 00 00       and    $0x100,%esi
ffffffff8167c2d3:       65 48 8b 14 25 40 70    mov    %gs:0x27040,%rdx

Thanks,
Tom

> 

^ permalink raw reply

* Re: [RESEND PATCH 1/2] powerpc/powermac: add missing g5_phy_disable_cpu1() declaration
From: Christophe Leroy @ 2021-09-22 13:52 UTC (permalink / raw)
  To: Krzysztof Kozlowski, Michael Ellerman, Benjamin Herrenschmidt,
	Paul Mackerras, Rob Herring, Frank Rowand, linuxppc-dev,
	linux-kernel, devicetree
In-Reply-To: <20210922084415.18269-1-krzysztof.kozlowski@canonical.com>



Le 22/09/2021 à 10:44, Krzysztof Kozlowski a écrit :
> g5_phy_disable_cpu1() is used outside of platforms/powermac/feature.c,
> so it should have a declaration to fix W=1 warning:
> 
>    arch/powerpc/platforms/powermac/feature.c:1533:6:
>      error: no previous prototype for ‘g5_phy_disable_cpu1’ [-Werror=missing-prototypes]


While you are at it, can you clean it up completely, that is remove the 
declaration in arch/powerpc/platforms/powermac/smp.c ?


> 
> Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
> ---
>   arch/powerpc/include/asm/pmac_feature.h | 4 ++++
>   1 file changed, 4 insertions(+)
> 
> diff --git a/arch/powerpc/include/asm/pmac_feature.h b/arch/powerpc/include/asm/pmac_feature.h
> index e08e829261b6..7703e5bf1203 100644
> --- a/arch/powerpc/include/asm/pmac_feature.h
> +++ b/arch/powerpc/include/asm/pmac_feature.h
> @@ -143,6 +143,10 @@
>    */
>   struct device_node;
>   
> +#ifdef CONFIG_PPC64
> +void g5_phy_disable_cpu1(void);
> +#endif /* CONFIG_PPC64 */
> +
>   static inline long pmac_call_feature(int selector, struct device_node* node,
>   					long param, long value)
>   {
> 

^ permalink raw reply

* Re: [RESEND PATCH 2/2] powerpc/powermac: constify device_node in of_irq_parse_oldworld()
From: Christophe Leroy @ 2021-09-22 13:55 UTC (permalink / raw)
  To: Krzysztof Kozlowski, Michael Ellerman, Benjamin Herrenschmidt,
	Paul Mackerras, Rob Herring, Frank Rowand, linuxppc-dev,
	linux-kernel, devicetree
In-Reply-To: <20210922084415.18269-2-krzysztof.kozlowski@canonical.com>



Le 22/09/2021 à 10:44, Krzysztof Kozlowski a écrit :
> The of_irq_parse_oldworld() does not modify passed device_node so make
> it a pointer to const for safety.

AFAIKS this patch is unrelated to previous one so you should send them 
out separately instead of sending as a series.

> 
> Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
> ---
>   arch/powerpc/platforms/powermac/pic.c | 2 +-
>   include/linux/of_irq.h                | 4 ++--
>   2 files changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
> index 4921bccf0376..af5ca1f41bb1 100644
> --- a/arch/powerpc/platforms/powermac/pic.c
> +++ b/arch/powerpc/platforms/powermac/pic.c
> @@ -384,7 +384,7 @@ static void __init pmac_pic_probe_oldstyle(void)
>   #endif
>   }
>   
> -int of_irq_parse_oldworld(struct device_node *device, int index,
> +int of_irq_parse_oldworld(const struct device_node *device, int index,
>   			struct of_phandle_args *out_irq)
>   {
>   	const u32 *ints = NULL;
> diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
> index aaf219bd0354..6074fdf51f0c 100644
> --- a/include/linux/of_irq.h
> +++ b/include/linux/of_irq.h
> @@ -20,12 +20,12 @@ typedef int (*of_irq_init_cb_t)(struct device_node *, struct device_node *);
>   #if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)
>   extern unsigned int of_irq_workarounds;
>   extern struct device_node *of_irq_dflt_pic;
> -extern int of_irq_parse_oldworld(struct device_node *device, int index,
> +extern int of_irq_parse_oldworld(const struct device_node *device, int index,
>   			       struct of_phandle_args *out_irq);

Please remove 'extern' which is useless for prototypes.

>   #else /* CONFIG_PPC32 && CONFIG_PPC_PMAC */
>   #define of_irq_workarounds (0)
>   #define of_irq_dflt_pic (NULL)
> -static inline int of_irq_parse_oldworld(struct device_node *device, int index,
> +static inline int of_irq_parse_oldworld(const struct device_node *device, int index,
>   				      struct of_phandle_args *out_irq)
>   {
>   	return -EINVAL;
> 

^ permalink raw reply

* Re: [RESEND PATCH 1/2] powerpc/powermac: add missing g5_phy_disable_cpu1() declaration
From: Krzysztof Kozlowski @ 2021-09-22 14:10 UTC (permalink / raw)
  To: Christophe Leroy, Michael Ellerman, Benjamin Herrenschmidt,
	Paul Mackerras, Rob Herring, Frank Rowand, linuxppc-dev,
	linux-kernel, devicetree
In-Reply-To: <ee9fc44e-daab-10e6-f293-fb45b43ff5b1@csgroup.eu>

On 22/09/2021 15:52, Christophe Leroy wrote:
> 
> 
> Le 22/09/2021 à 10:44, Krzysztof Kozlowski a écrit :
>> g5_phy_disable_cpu1() is used outside of platforms/powermac/feature.c,
>> so it should have a declaration to fix W=1 warning:
>>
>>    arch/powerpc/platforms/powermac/feature.c:1533:6:
>>      error: no previous prototype for ‘g5_phy_disable_cpu1’ [-Werror=missing-prototypes]
> 
> 
> While you are at it, can you clean it up completely, that is remove the 
> declaration in arch/powerpc/platforms/powermac/smp.c ?
> 

Sure, I'll send a v2. Thanks for pointing this out.


Best regards,
Krzysztof

^ permalink raw reply

* Re: [RESEND PATCH 2/2] powerpc/powermac: constify device_node in of_irq_parse_oldworld()
From: Krzysztof Kozlowski @ 2021-09-22 14:12 UTC (permalink / raw)
  To: Christophe Leroy, Michael Ellerman, Benjamin Herrenschmidt,
	Paul Mackerras, Rob Herring, Frank Rowand, linuxppc-dev,
	linux-kernel, devicetree
In-Reply-To: <a33f0978-b617-6a07-7240-ec011f894680@csgroup.eu>

On 22/09/2021 15:55, Christophe Leroy wrote:
> 
> 
> Le 22/09/2021 à 10:44, Krzysztof Kozlowski a écrit :
>> The of_irq_parse_oldworld() does not modify passed device_node so make
>> it a pointer to const for safety.
> 
> AFAIKS this patch is unrelated to previous one so you should send them 
> out separately instead of sending as a series.

The relation it's a series of bugfixes. Although they can be applied
independently, having a series is actually very useful - you run "b4 am"
on one message ID and get everything. The same with patchwork, if you
use that one.

> 
>>
>> Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
>> ---
>>   arch/powerpc/platforms/powermac/pic.c | 2 +-
>>   include/linux/of_irq.h                | 4 ++--
>>   2 files changed, 3 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
>> index 4921bccf0376..af5ca1f41bb1 100644
>> --- a/arch/powerpc/platforms/powermac/pic.c
>> +++ b/arch/powerpc/platforms/powermac/pic.c
>> @@ -384,7 +384,7 @@ static void __init pmac_pic_probe_oldstyle(void)
>>   #endif
>>   }
>>   
>> -int of_irq_parse_oldworld(struct device_node *device, int index,
>> +int of_irq_parse_oldworld(const struct device_node *device, int index,
>>   			struct of_phandle_args *out_irq)
>>   {
>>   	const u32 *ints = NULL;
>> diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
>> index aaf219bd0354..6074fdf51f0c 100644
>> --- a/include/linux/of_irq.h
>> +++ b/include/linux/of_irq.h
>> @@ -20,12 +20,12 @@ typedef int (*of_irq_init_cb_t)(struct device_node *, struct device_node *);
>>   #if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)
>>   extern unsigned int of_irq_workarounds;
>>   extern struct device_node *of_irq_dflt_pic;
>> -extern int of_irq_parse_oldworld(struct device_node *device, int index,
>> +extern int of_irq_parse_oldworld(const struct device_node *device, int index,
>>   			       struct of_phandle_args *out_irq);
> 
> Please remove 'extern' which is useless for prototypes.

OK


Best regards,
Krzysztof

^ permalink raw reply

* Re: [PATCH 0/5] KVM: rseq: Fix and a test for a KVM+rseq bug
From: Paolo Bonzini @ 2021-09-22 14:12 UTC (permalink / raw)
  To: Sean Christopherson, Russell King, Catalin Marinas, Will Deacon,
	Guo Ren, Thomas Bogendoerfer, Michael Ellerman, Heiko Carstens,
	Vasily Gorbik, Christian Borntraeger, Oleg Nesterov,
	Steven Rostedt, Ingo Molnar, Thomas Gleixner, Peter Zijlstra,
	Andy Lutomirski, Mathieu Desnoyers, Paul E. McKenney, Boqun Feng,
	Shuah Khan
  Cc: linux-s390, kvm, linux-kernel, linux-csky, linux-mips,
	Peter Foley, Paul Mackerras, linux-kselftest, Ben Gardon,
	Shakeel Butt, linuxppc-dev, linux-arm-kernel
In-Reply-To: <20210818001210.4073390-1-seanjc@google.com>

On 18/08/21 02:12, Sean Christopherson wrote:
> Patch 1 fixes a KVM+rseq bug where KVM's handling of TIF_NOTIFY_RESUME,
> e.g. for task migration, clears the flag without informing rseq and leads
> to stale data in userspace's rseq struct.
> 
> Patch 2 is a cleanup to try and make future bugs less likely.  It's also
> a baby step towards moving and renaming tracehook_notify_resume() since
> it has nothing to do with tracing.  It kills me to not do the move/rename
> as part of this series, but having a dedicated series/discussion seems
> more appropriate given the sheer number of architectures that call
> tracehook_notify_resume() and the lack of an obvious home for the code.
> 
> Patch 3 is a fix/cleanup to stop overriding x86's unistd_{32,64}.h when
> the include path (intentionally) omits tools' uapi headers.  KVM's
> selftests do exactly that so that they can pick up the uapi headers from
> the installed kernel headers, and still use various tools/ headers that
> mirror kernel code, e.g. linux/types.h.  This allows the new test in
> patch 4 to reference __NR_rseq without having to manually define it.
> 
> Patch 4 is a regression test for the KVM+rseq bug.
> 
> Patch 5 is a cleanup made possible by patch 3.
> 
> 
> Sean Christopherson (5):
>    KVM: rseq: Update rseq when processing NOTIFY_RESUME on xfer to KVM
>      guest
>    entry: rseq: Call rseq_handle_notify_resume() in
>      tracehook_notify_resume()
>    tools: Move x86 syscall number fallbacks to .../uapi/
>    KVM: selftests: Add a test for KVM_RUN+rseq to detect task migration
>      bugs
>    KVM: selftests: Remove __NR_userfaultfd syscall fallback
> 
>   arch/arm/kernel/signal.c                      |   1 -
>   arch/arm64/kernel/signal.c                    |   1 -
>   arch/csky/kernel/signal.c                     |   4 +-
>   arch/mips/kernel/signal.c                     |   4 +-
>   arch/powerpc/kernel/signal.c                  |   4 +-
>   arch/s390/kernel/signal.c                     |   1 -
>   include/linux/tracehook.h                     |   2 +
>   kernel/entry/common.c                         |   4 +-
>   kernel/rseq.c                                 |   4 +-
>   .../x86/include/{ => uapi}/asm/unistd_32.h    |   0
>   .../x86/include/{ => uapi}/asm/unistd_64.h    |   3 -
>   tools/testing/selftests/kvm/.gitignore        |   1 +
>   tools/testing/selftests/kvm/Makefile          |   3 +
>   tools/testing/selftests/kvm/rseq_test.c       | 131 ++++++++++++++++++
>   14 files changed, 143 insertions(+), 20 deletions(-)
>   rename tools/arch/x86/include/{ => uapi}/asm/unistd_32.h (100%)
>   rename tools/arch/x86/include/{ => uapi}/asm/unistd_64.h (83%)
>   create mode 100644 tools/testing/selftests/kvm/rseq_test.c
> 

Queued v3, thanks.  I'll send it in a separate pull request to Linus 
since it touches stuff outside my usual turf.

Thanks,

Paolo


^ permalink raw reply

* Re: [PATCH v2 01/16] ASoC: eureka-tlv320: Update to modern clocking terminology
From: Mark Brown @ 2021-09-22 14:21 UTC (permalink / raw)
  To: Fabio Estevam, Liam Girdwood, Mark Brown, Shengjiu Wang,
	Nicolin Chen, Xiubo Li
  Cc: alsa-devel, linuxppc-dev, linux-arm-kernel
In-Reply-To: <20210921213542.31688-1-broonie@kernel.org>

On Tue, 21 Sep 2021 22:35:27 +0100, Mark Brown wrote:
> As part of moving to remove the old style defines for the bus clocks update
> the eureka-tlv320 driver to use more modern terminology for clocking.
> 
> 

Applied to

   https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git for-next

Thanks!

[01/16] ASoC: eureka-tlv320: Update to modern clocking terminology
        commit: 4348be6330a18b123fa82494df9f5a134feecb7f
[02/16] ASoC: fsl-asoc-card: Update to modern clocking terminology
        commit: 8fcfd3493426c229f4f28bc5757dd3359e02cee8
[03/16] ASoC: fsl-audmix: Update to modern clocking terminology
        commit: 2757b340b25dd2cb3afc748d48c1dff6c9689f80
[04/16] ASoC: fsl-esai: Update to modern clocking terminology
        commit: e0b64fa34c7f444908549c32dd68f81ac436299e
[05/16] ASoC: fsl-mqs: Update to modern clocking terminology
        commit: a51da9dc9b3a844460a355cd10d0db4320f4d726
[06/16] ASoC: fsl_sai: Update to modern clocking terminology
        commit: 361284a4eb598eaf28e8458c542f214d3689b134
[07/16] ASoC: fsl_ssi: Update to modern clocking terminology
        commit: 89efbdaaa444d63346bf1bdf3b58dfb421de91f1
[08/16] ASoC: imx-audmix: Update to modern clocking terminology
        commit: bf101022487091032fd8102c835b1157b8283c43
[09/16] ASoC: imx-card: Update to modern clocking terminology
        commit: d689e280121abf1cdf0d37734b0b306098a774ed
[10/16] ASoC: imx-es8328: Update to modern clocking terminology
        commit: 56b69e4e4bc24c732b68ff6df54be83226a3b4e6
[11/16] ASoC: imx-hdmi: Update to modern clocking terminology
        commit: a90f847ad2f1c8575f6a7980e5ee9937d1a5eeb4
[12/16] ASoC: imx-rpmsg: Update to modern clocking terminology
        commit: caa0a6075a6e9239e49690a40a131496398602ab
[13/16] ASoC: imx-sgtl5000: Update to modern clocking terminology
        commit: 419099b4c3318a3c486f9f65b015760e71d53f0a
[14/16] ASoC: mpc8610_hpcd: Update to modern clocking terminology
        commit: 8a7f299b857b81a10566fe19c585fae4d1c1f8ef
[15/16] ASoC: pl1022_ds: Update to modern clocking terminology
        commit: fcd444bf6a29a22e529510de07c72555b7e46224
[16/16] ASoC: pl1022_rdk: Update to modern clocking terminology
        commit: 39e178a4cc7d042cd6353e73f3024d87e79a86ca

All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.

You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.

If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.

Please add any relevant lists and maintainers to the CCs when replying
to this mail.

Thanks,
Mark

^ permalink raw reply

* Re: [PATCH 01/16] ASoC: eureka-tlv320: Update to modern clocking terminology
From: Mark Brown @ 2021-09-22 14:21 UTC (permalink / raw)
  To: Fabio Estevam, Liam Girdwood, Mark Brown, Shengjiu Wang,
	Nicolin Chen, Xiubo Li
  Cc: alsa-devel, linuxppc-dev, linux-arm-kernel
In-Reply-To: <20210921211040.11624-1-broonie@kernel.org>

On Tue, 21 Sep 2021 22:10:25 +0100, Mark Brown wrote:
> As part of moving to remove the old style defines for the bus clocks update
> the eureka-tlv320 driver to use more modern terminology for clocking.
> 
> 

Applied to

   https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git for-next

Thanks!

[01/16] ASoC: eureka-tlv320: Update to modern clocking terminology
        commit: 4348be6330a18b123fa82494df9f5a134feecb7f
[02/16] ASoC: fsl-asoc-card: Update to modern clocking terminology
        commit: 8fcfd3493426c229f4f28bc5757dd3359e02cee8
[03/16] ASoC: fsl-audmix: Update to modern clocking terminology
        commit: 2757b340b25dd2cb3afc748d48c1dff6c9689f80
[04/16] ASoC: fsl-esai: Update to modern clocking terminology
        commit: e0b64fa34c7f444908549c32dd68f81ac436299e
[05/16] ASoC: fsl-mqs: Update to modern clocking terminology
        commit: a51da9dc9b3a844460a355cd10d0db4320f4d726
[06/16] ASoC: fsl_sai: Update to modern clocking terminology
        commit: 361284a4eb598eaf28e8458c542f214d3689b134
[07/16] ASoC: fsl_ssi: Update to modern clocking terminology
        commit: 89efbdaaa444d63346bf1bdf3b58dfb421de91f1
[08/16] ASoC: imx-audmix: Update to modern clocking terminology
        commit: bf101022487091032fd8102c835b1157b8283c43
[09/16] ASoC: imx-card: Update to modern clocking terminology
        commit: d689e280121abf1cdf0d37734b0b306098a774ed
[10/16] ASoC: imx-es8328: Update to modern clocking terminology
        commit: 56b69e4e4bc24c732b68ff6df54be83226a3b4e6
[11/16] ASoC: imx-hdmi: Update to modern clocking terminology
        commit: a90f847ad2f1c8575f6a7980e5ee9937d1a5eeb4
[12/16] ASoC: imx-rpmsg: Update to modern clocking terminology
        commit: caa0a6075a6e9239e49690a40a131496398602ab
[13/16] ASoC: imx-sgtl5000: Update to modern clocking terminology
        commit: 419099b4c3318a3c486f9f65b015760e71d53f0a
[14/16] ASoC: mpc8610_hpcd: Update to modern clocking terminology
        commit: 8a7f299b857b81a10566fe19c585fae4d1c1f8ef
[15/16] ASoC: pl1022_ds: Update to modern clocking terminology
        commit: fcd444bf6a29a22e529510de07c72555b7e46224
[16/16] ASoC: pl1022_rdk: Update to modern clocking terminology
        commit: 39e178a4cc7d042cd6353e73f3024d87e79a86ca

All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.

You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.

If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.

Please add any relevant lists and maintainers to the CCs when replying
to this mail.

Thanks,
Mark

^ permalink raw reply

* Re: [PATCH v3 5/8] x86/sme: Replace occurrences of sme_active() with cc_platform_has()
From: Kirill A. Shutemov @ 2021-09-22 14:30 UTC (permalink / raw)
  To: Tom Lendacky
  Cc: Sathyanarayanan Kuppuswamy, linux-efi, Brijesh Singh, kvm,
	Peter Zijlstra, Dave Hansen, dri-devel, platform-driver-x86,
	Will Deacon, linux-s390, Andi Kleen, Joerg Roedel, x86, amd-gfx,
	Christoph Hellwig, Ingo Molnar, linux-graphics-maintainer,
	Tianyu Lan, Borislav Petkov, Andy Lutomirski, Thomas Gleixner,
	kexec, linux-kernel, iommu, linux-fsdevel, linuxppc-dev
In-Reply-To: <01891f59-7ec3-cf62-a8fc-79f79ca76587@amd.com>

On Wed, Sep 22, 2021 at 08:40:43AM -0500, Tom Lendacky wrote:
> On 9/21/21 4:58 PM, Kirill A. Shutemov wrote:
> > On Tue, Sep 21, 2021 at 04:43:59PM -0500, Tom Lendacky wrote:
> > > On 9/21/21 4:34 PM, Kirill A. Shutemov wrote:
> > > > On Tue, Sep 21, 2021 at 11:27:17PM +0200, Borislav Petkov wrote:
> > > > > On Wed, Sep 22, 2021 at 12:20:59AM +0300, Kirill A. Shutemov wrote:
> > > > > > I still believe calling cc_platform_has() from __startup_64() is totally
> > > > > > broken as it lacks proper wrapping while accessing global variables.
> > > > > 
> > > > > Well, one of the issues on the AMD side was using boot_cpu_data too
> > > > > early and the Intel side uses it too. Can you replace those checks with
> > > > > is_tdx_guest() or whatever was the helper's name which would check
> > > > > whether the the kernel is running as a TDX guest, and see if that helps?
> > > > 
> > > > There's no need in Intel check this early. Only AMD need it. Maybe just
> > > > opencode them?
> > > 
> > > Any way you can put a gzipped/bzipped copy of your vmlinux file somewhere I
> > > can grab it from and take a look at it?
> > 
> > You can find broken vmlinux and bzImage here:
> > 
> > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fdrive.google.com%2Fdrive%2Ffolders%2F1n74vUQHOGebnF70Im32qLFY8iS3wvjIs%3Fusp%3Dsharing&amp;data=04%7C01%7Cthomas.lendacky%40amd.com%7C1c7adf380cbe4c1a6bb708d97d4af6ff%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637678583935705530%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&amp;sdata=gA30x%2Bfu97tUx0p2UqI8HgjiL8bxDbK1GqgJBbUrUE4%3D&amp;reserved=0
> > 
> > Let me know when I can remove it.
> 
> Looking at everything, it is all RIP relative addressing, so those
> accesses should be fine.

Not fine, but waiting to blowup with random build environment change.

> Your image has the intel_cc_platform_has()
> function, does it work if you remove that call? Because I think it may be
> the early call into that function which looks like it has instrumentation
> that uses %gs in __sanitizer_cov_trace_pc and %gs is not setup properly
> yet. And since boot_cpu_data.x86_vendor will likely be zero this early it
> will match X86_VENDOR_INTEL and call into that function.

Right removing call to intel_cc_platform_has() or moving it to
cc_platform.c fixes the issue.

-- 
 Kirill A. Shutemov

^ permalink raw reply

* [PATCH v1] powerpc/64/interrupt: Reconcile soft-mask state in NMI and fix false BUG
From: Nicholas Piggin @ 2021-09-22 14:49 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

If a NMI hits early in an interrupt handler before the irq soft-mask
state is reconciled, that can cause a false-positive BUG with a
CONFIG_PPC_IRQ_SOFT_MASK_DEBUG assertion.

Remove that assertion and instead check the case that if regs->msr has
EE clear, then regs->softe should be marked as disabled so the irq state
looks correct to NMI handlers, the same as how it's fixed up in the
case it was implicit soft-masked.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/interrupt.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index b32ed910a8cf..b76ab848aa0d 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -265,13 +265,16 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte
 	local_paca->irq_soft_mask = IRQS_ALL_DISABLED;
 	local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
 
-	if (is_implicit_soft_masked(regs)) {
-		// Adjust regs->softe soft implicit soft-mask, so
-		// arch_irq_disabled_regs(regs) behaves as expected.
+	if (!(regs->msr & MSR_EE) || is_implicit_soft_masked(regs)) {
+		/*
+		 * Adjust regs->softe to be soft-masked if it had not been
+		 * reconcied (e.g., interrupt entry with MSR[EE]=0 but softe
+		 * not yet set disabled), or if it was in an implicit soft
+		 * masked state. This makes arch_irq_disabled_regs(regs)
+		 * behave as expected.
+		 */
 		regs->softe = IRQS_ALL_DISABLED;
 	}
-	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
-		BUG_ON(!arch_irq_disabled_regs(regs) && !(regs->msr & MSR_EE));
 
 	/* Don't do any per-CPU operations until interrupt state is fixed */
 
-- 
2.23.0


^ permalink raw reply related

* [PATCH v3 0/6] powerpc/64s: interrupt speedups
From: Nicholas Piggin @ 2021-09-22 14:54 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

Here's a few stragglers. The first patch was submitted already but had
some bugs with unrecoverable exceptions on HPT (current->blah being
accessed before MSR[RI] was enabled). Those should be fixed now.

The others are generally for helping asynch interrupts, which are a bit
harder to measure well but important for IO and IPIs.

After this series, the SPR accesses of the interrupt handlers for radix
are becoming pretty optimal except for PPR which we could improve on,
and virt CPU accounting which is very costly -- we might disable that
by default unless someone comes up with a good reason to keep it.

Since v1:
- Compile fixes for 64e.
- Fixed a SOFT_MASK_DEBUG false positive.
- Improve function name and comments explaining why patch 2 does not
  need to hard enable when PMU is enabled via sysfs.

Since v2:
- Split first patch into patch 1 and 2, improve on the changelogs.
- More compile fixes.
- Fixed several review comments from Daniel.
- Added patch 5.

Thanks,
Nick

Nicholas Piggin (6):
  powerpc/64/interrupt: make normal synchronous interrupts enable
    MSR[EE] if possible
  powerpc/64s/interrupt: handle MSR EE and RI in interrupt entry wrapper
  powerpc/64s/perf: add power_pmu_wants_prompt_pmi to say whether perf
    wants PMIs to be soft-NMI
  powerpc/64s/interrupt: Don't enable MSR[EE] in irq handlers unless
    perf is in use
  powerpc/64/interrupt: reduce expensive debug tests
  powerpc/64s/interrupt: avoid saving CFAR in some asynchronous
    interrupts

 arch/powerpc/include/asm/hw_irq.h    |  59 +++++++++++++---
 arch/powerpc/include/asm/interrupt.h |  58 ++++++++++++---
 arch/powerpc/kernel/dbell.c          |   3 +-
 arch/powerpc/kernel/exceptions-64s.S | 101 ++++++++++++++++++---------
 arch/powerpc/kernel/fpu.S            |   5 ++
 arch/powerpc/kernel/irq.c            |   3 +-
 arch/powerpc/kernel/time.c           |  31 ++++----
 arch/powerpc/kernel/vector.S         |  10 +++
 arch/powerpc/perf/core-book3s.c      |  31 ++++++++
 9 files changed, 232 insertions(+), 69 deletions(-)

-- 
2.23.0


^ permalink raw reply

* [PATCH v3 1/6] powerpc/64/interrupt: make normal synchronous interrupts enable MSR[EE] if possible
From: Nicholas Piggin @ 2021-09-22 14:54 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20210922145452.352571-1-npiggin@gmail.com>

Make synchronous interrupt handler entry wrappers enable MSR[EE] if
MSR[EE] was enabled in the interrupted context. IRQs are soft-disabled
at this point so there is no change to high level code, but it's a
masked interrupt could fire.

This is a performance disadvantage for interrupts which do not later
call interrupt_cond_local_irq_enable(), because an an additional mtmsrd
or wrtee instruction is executed. However the important synchronous
interrupts (e.g., page fault) do enable interrupts, so the performance
disadvantage is mostly avoided.

In the next patch, MSR[RI] enabling can be combined with MSR[EE]
enabling, which mitigates the performance drop for the former and gives
a performance advanage for the latter interrupts, on 64s machines. 64e
is coming along for the ride for now to avoid divergences with 64s in
this tricky code.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/interrupt.h | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index b76ab848aa0d..3802390d8eea 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -150,7 +150,20 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
 #ifdef CONFIG_PPC64
 	if (irq_soft_mask_set_return(IRQS_ALL_DISABLED) == IRQS_ENABLED)
 		trace_hardirqs_off();
-	local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+	/*
+	 * If the interrupt was taken with HARD_DIS clear, then enable MSR[EE].
+	 * Asynchronous interrupts get here with HARD_DIS set (see below), so
+	 * this enables MSR[EE] for synchronous interrupts. IRQs remain
+	 * soft-masked. The interrupt handler may later call
+	 * interrupt_cond_local_irq_enable() to achieve a regular process
+	 * context.
+	 */
+	if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) {
+		if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+			BUG_ON(!(regs->msr & MSR_EE));
+		__hard_irq_enable();
+	}
 
 	if (user_mode(regs)) {
 		CT_WARN_ON(ct_state() != CONTEXT_USER);
@@ -200,6 +213,10 @@ static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt
 
 static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
 {
+#ifdef CONFIG_PPC64
+	/* Ensure interrupt_enter_prepare does not enable MSR[EE] */
+	local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+#endif
 #ifdef CONFIG_PPC_BOOK3S_64
 	if (cpu_has_feature(CPU_FTR_CTRL) &&
 	    !test_thread_local_flags(_TLF_RUNLATCH))
-- 
2.23.0


^ permalink raw reply related

* [PATCH v3 2/6] powerpc/64s/interrupt: handle MSR EE and RI in interrupt entry wrapper
From: Nicholas Piggin @ 2021-09-22 14:54 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20210922145452.352571-1-npiggin@gmail.com>

The mtmsrd to enable MSR[RI] can be combined with the mtmsrd to enable
MSR[EE] in interrupt entry code, for those interrupts which enable EE.
This helps performance of important synchronous interrupts (e.g., page
faults).

This is similar to what commit dd152f70bdc1 ("powerpc/64s: system call
avoid setting MSR[RI] until we set MSR[EE]") does for system calls.

Do this by enabling EE and RI together at the beginning of the entry
wrapper if PACA_IRQ_HARD_DIS is clear, and only enabling RI if it is
set.

Asynchronous interrupts set PACA_IRQ_HARD_DIS, but synchronous ones
leave it unchanged, so by default they always get EE=1 unless they have
interrupted a caller that is hard disabled. When the sync interrupt
later calls interrupt_cond_local_irq_enable(), it will not require
another mtmsrd because MSR[EE] was already enabled here.

This avoids one mtmsrd L=1 for synchronous interrupts on 64s, which
saves about 20 cycles on POWER9. And for kernel-mode interrupts, both
synchronous and asynchronous, this saves an additional 40 cycles due to
the mtmsrd being moved ahead of mfspr SPRN_AMR, which prevents a SPR
scoreboard stall.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/interrupt.h | 27 +++++++++++++++++---
 arch/powerpc/kernel/exceptions-64s.S | 38 +++-------------------------
 arch/powerpc/kernel/fpu.S            |  5 ++++
 arch/powerpc/kernel/vector.S         | 10 ++++++++
 4 files changed, 42 insertions(+), 38 deletions(-)

diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 3802390d8eea..e178d143671a 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -148,8 +148,14 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
 #endif
 
 #ifdef CONFIG_PPC64
-	if (irq_soft_mask_set_return(IRQS_ALL_DISABLED) == IRQS_ENABLED)
-		trace_hardirqs_off();
+	bool trace_enable = false;
+
+	if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS)) {
+		if (irq_soft_mask_set_return(IRQS_ALL_DISABLED) == IRQS_ENABLED)
+			trace_enable = true;
+	} else {
+		irq_soft_mask_set(IRQS_ALL_DISABLED);
+	}
 
 	/*
 	 * If the interrupt was taken with HARD_DIS clear, then enable MSR[EE].
@@ -163,8 +169,14 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
 		if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
 			BUG_ON(!(regs->msr & MSR_EE));
 		__hard_irq_enable();
+	} else {
+		__hard_RI_enable();
 	}
 
+	/* Do this when RI=1 because it can cause SLB faults */
+	if (trace_enable)
+		trace_hardirqs_off();
+
 	if (user_mode(regs)) {
 		CT_WARN_ON(ct_state() != CONTEXT_USER);
 		user_exit_irqoff();
@@ -217,13 +229,16 @@ static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct in
 	/* Ensure interrupt_enter_prepare does not enable MSR[EE] */
 	local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
 #endif
+	interrupt_enter_prepare(regs, state);
 #ifdef CONFIG_PPC_BOOK3S_64
+	/*
+	 * RI=1 is set by interrupt_enter_prepare, so this thread flags access
+	 * has to come afterward (it can cause SLB faults).
+	 */
 	if (cpu_has_feature(CPU_FTR_CTRL) &&
 	    !test_thread_local_flags(_TLF_RUNLATCH))
 		__ppc64_runlatch_on();
 #endif
-
-	interrupt_enter_prepare(regs, state);
 	irq_enter();
 }
 
@@ -293,6 +308,8 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte
 		regs->softe = IRQS_ALL_DISABLED;
 	}
 
+	__hard_RI_enable();
+
 	/* Don't do any per-CPU operations until interrupt state is fixed */
 
 	if (nmi_disables_ftrace(regs)) {
@@ -390,6 +407,8 @@ interrupt_handler long func(struct pt_regs *regs)			\
 {									\
 	long ret;							\
 									\
+	__hard_RI_enable();						\
+									\
 	ret = ____##func (regs);					\
 									\
 	return ret;							\
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 37859e62a8dc..4dcc76206f8e 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -113,7 +113,6 @@ name:
 #define IISIDE		.L_IISIDE_\name\()	/* Uses SRR0/1 not DAR/DSISR */
 #define IDAR		.L_IDAR_\name\()	/* Uses DAR (or SRR0) */
 #define IDSISR		.L_IDSISR_\name\()	/* Uses DSISR (or SRR1) */
-#define ISET_RI		.L_ISET_RI_\name\()	/* Run common code w/ MSR[RI]=1 */
 #define IBRANCH_TO_COMMON	.L_IBRANCH_TO_COMMON_\name\() /* ENTRY branch to common */
 #define IREALMODE_COMMON	.L_IREALMODE_COMMON_\name\() /* Common runs in realmode */
 #define IMASK		.L_IMASK_\name\()	/* IRQ soft-mask bit */
@@ -157,9 +156,6 @@ do_define_int n
 	.ifndef IDSISR
 		IDSISR=0
 	.endif
-	.ifndef ISET_RI
-		ISET_RI=1
-	.endif
 	.ifndef IBRANCH_TO_COMMON
 		IBRANCH_TO_COMMON=1
 	.endif
@@ -512,11 +508,6 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real)
 	stb	r10,PACASRR_VALID(r13)
 	.endif
 
-	.if ISET_RI
-	li	r10,MSR_RI
-	mtmsrd	r10,1			/* Set MSR_RI */
-	.endif
-
 	.if ISTACK
 	.if IKUAP
 	kuap_save_amr_and_lock r9, r10, cr1, cr0
@@ -902,11 +893,6 @@ INT_DEFINE_BEGIN(system_reset)
 	IVEC=0x100
 	IAREA=PACA_EXNMI
 	IVIRT=0 /* no virt entry point */
-	/*
-	 * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is
-	 * being used, so a nested NMI exception would corrupt it.
-	 */
-	ISET_RI=0
 	ISTACK=0
 	IKVM_REAL=1
 INT_DEFINE_END(system_reset)
@@ -979,16 +965,14 @@ TRAMP_REAL_BEGIN(system_reset_fwnmi)
 EXC_COMMON_BEGIN(system_reset_common)
 	__GEN_COMMON_ENTRY system_reset
 	/*
-	 * Increment paca->in_nmi then enable MSR_RI. SLB or MCE will be able
-	 * to recover, but nested NMI will notice in_nmi and not recover
-	 * because of the use of the NMI stack. in_nmi reentrancy is tested in
-	 * system_reset_exception.
+	 * Increment paca->in_nmi. When the interrupt entry wrapper later
+	 * enable MSR_RI, then SLB or MCE will be able to recover, but a nested
+	 * NMI will notice in_nmi and not recover because of the use of the NMI
+	 * stack. in_nmi reentrancy is tested in system_reset_exception.
 	 */
 	lhz	r10,PACA_IN_NMI(r13)
 	addi	r10,r10,1
 	sth	r10,PACA_IN_NMI(r13)
-	li	r10,MSR_RI
-	mtmsrd 	r10,1
 
 	mr	r10,r1
 	ld	r1,PACA_NMI_EMERG_SP(r13)
@@ -1062,12 +1046,6 @@ INT_DEFINE_BEGIN(machine_check_early)
 	IAREA=PACA_EXMC
 	IVIRT=0 /* no virt entry point */
 	IREALMODE_COMMON=1
-	/*
-	 * MSR_RI is not enabled, because PACA_EXMC is being used, so a
-	 * nested machine check corrupts it. machine_check_common enables
-	 * MSR_RI.
-	 */
-	ISET_RI=0
 	ISTACK=0
 	IDAR=1
 	IDSISR=1
@@ -1078,7 +1056,6 @@ INT_DEFINE_BEGIN(machine_check)
 	IVEC=0x200
 	IAREA=PACA_EXMC
 	IVIRT=0 /* no virt entry point */
-	ISET_RI=0
 	IDAR=1
 	IDSISR=1
 	IKVM_REAL=1
@@ -1148,9 +1125,6 @@ EXC_COMMON_BEGIN(machine_check_early_common)
 BEGIN_FTR_SECTION
 	bl	enable_machine_check
 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
-	li	r10,MSR_RI
-	mtmsrd	r10,1
-
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	machine_check_early
 	std	r3,RESULT(r1)	/* Save result */
@@ -1238,10 +1212,6 @@ EXC_COMMON_BEGIN(machine_check_common)
 	 * save area: PACA_EXMC instead of PACA_EXGEN.
 	 */
 	GEN_COMMON machine_check
-
-	/* Enable MSR_RI when finished with PACA_EXMC */
-	li	r10,MSR_RI
-	mtmsrd 	r10,1
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	machine_check_exception
 	b	interrupt_return_srr
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index ba4afe3b5a9c..f71f2bbd4de6 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -81,7 +81,12 @@ EXPORT_SYMBOL(store_fp_state)
  */
 _GLOBAL(load_up_fpu)
 	mfmsr	r5
+#ifdef CONFIG_PPC_BOOK3S_64
+	/* interrupt doesn't set MSR[RI] and HPT can fault on current access */
+	ori	r5,r5,MSR_FP|MSR_RI
+#else
 	ori	r5,r5,MSR_FP
+#endif
 #ifdef CONFIG_VSX
 BEGIN_FTR_SECTION
 	oris	r5,r5,MSR_VSX@h
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index ba03eedfdcd8..5cc24d8cce94 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -47,6 +47,10 @@ EXPORT_SYMBOL(store_vr_state)
  */
 _GLOBAL(load_up_altivec)
 	mfmsr	r5			/* grab the current MSR */
+#ifdef CONFIG_PPC_BOOK3S_64
+	/* interrupt doesn't set MSR[RI] and HPT can fault on current access */
+	ori	r5,r5,MSR_RI
+#endif
 	oris	r5,r5,MSR_VEC@h
 	MTMSRD(r5)			/* enable use of AltiVec now */
 	isync
@@ -126,6 +130,12 @@ _GLOBAL(load_up_vsx)
 	andis.	r5,r12,MSR_VEC@h
 	beql+	load_up_altivec		/* skip if already loaded */
 
+#ifdef CONFIG_PPC_BOOK3S_64
+	/* interrupt doesn't set MSR[RI] and HPT can fault on current access */
+	li	r5,MSR_RI
+	mtmsrd	r5,1
+#endif
+
 	ld	r4,PACACURRENT(r13)
 	addi	r4,r4,THREAD		/* Get THREAD */
 	li	r6,1
-- 
2.23.0


^ permalink raw reply related

* [PATCH v3 3/6] powerpc/64s/perf: add power_pmu_wants_prompt_pmi to say whether perf wants PMIs to be soft-NMI
From: Nicholas Piggin @ 2021-09-22 14:54 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Athira Rajeev, Madhavan Srinivasan, Nicholas Piggin
In-Reply-To: <20210922145452.352571-1-npiggin@gmail.com>

Interrupt code enables MSR[EE] in some irq handlers while keeping local
irqs disabled via soft-mask, allowing PMI interrupts to be taken as
soft-NMI to improve profiling of irq handlers.

When perf is not enabled, there is no point to doing this, it's
additional overhead. So provide a function that can say if PMIs should
be taken promptly if possible.

Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/hw_irq.h |  2 ++
 arch/powerpc/perf/core-book3s.c   | 31 +++++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+)

diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 21cc571ea9c2..b987822e552e 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -306,6 +306,8 @@ static inline bool lazy_irq_pending_nocheck(void)
 	return __lazy_irq_pending(local_paca->irq_happened);
 }
 
+bool power_pmu_wants_prompt_pmi(void);
+
 /*
  * This is called by asynchronous interrupts to conditionally
  * re-enable hard interrupts after having cleared the source
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 73e62e9b179b..773d07d68b6d 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -17,6 +17,7 @@
 #include <asm/firmware.h>
 #include <asm/ptrace.h>
 #include <asm/code-patching.h>
+#include <asm/hw_irq.h>
 #include <asm/interrupt.h>
 
 #ifdef CONFIG_PPC64
@@ -2381,6 +2382,36 @@ static void perf_event_interrupt(struct pt_regs *regs)
 	perf_sample_event_took(sched_clock() - start_clock);
 }
 
+/*
+ * If the perf subsystem wants performance monitor interrupts as soon as
+ * possible (e.g., to sample the instruction address and stack chain),
+ * this should return true. The IRQ masking code can then enable MSR[EE]
+ * in some places (e.g., interrupt handlers) that allows PMI interrupts
+ * though to improve accuracy of profiles, at the cost of some performance.
+ *
+ * The PMU counters can be enabled by other means (e.g., sysfs raw SPR
+ * access), but in that case there is no need for prompt PMI handling.
+ *
+ * This currently returns true if any perf counter is being used. It
+ * could possibly return false if only events are being counted rather than
+ * samples being taken, but for now this is good enough.
+ */
+bool power_pmu_wants_prompt_pmi(void)
+{
+	struct cpu_hw_events *cpuhw;
+
+	/*
+	 * This could simply test local_paca->pmcregs_in_use if that were not
+	 * under ifdef KVM.
+	 */
+
+	if (!ppmu)
+		return false;
+
+	cpuhw = this_cpu_ptr(&cpu_hw_events);
+	return cpuhw->n_events;
+}
+
 static int power_pmu_prepare_cpu(unsigned int cpu)
 {
 	struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
-- 
2.23.0


^ permalink raw reply related

* [PATCH v3 4/6] powerpc/64s/interrupt: Don't enable MSR[EE] in irq handlers unless perf is in use
From: Nicholas Piggin @ 2021-09-22 14:54 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20210922145452.352571-1-npiggin@gmail.com>

Enabling MSR[EE] in interrupt handlers while interrupts are still soft
masked allows PMIs to profile interrupt handlers to some degree, beyond
what SIAR latching allows.

When perf is not being used, this is almost useless work. It requires an
extra mtmsrd in the irq handler, and it also opens the door to masked
interrupts hitting and requiring replay, which is more expensive than
just taking them directly. This effect can be noticable in high IRQ
workloads.

Avoid enabling MSR[EE] unless perf is currently in use. This saves about
60 cycles (or 8%) on a simple decrementer interrupt microbenchmark.
Replayed interrupts drop from 1.4% of all interrupts taken, to 0.003%.

This does prevent the soft-nmi interrupt being taken in these handlers,
but that's not too reliable anyway. The SMP watchdog will continue to be
the reliable way to catch lockups.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/hw_irq.h | 57 +++++++++++++++++++++++++------
 arch/powerpc/kernel/dbell.c       |  3 +-
 arch/powerpc/kernel/irq.c         |  3 +-
 arch/powerpc/kernel/time.c        | 31 +++++++++--------
 4 files changed, 67 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index b987822e552e..55e3fa44f280 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -309,17 +309,54 @@ static inline bool lazy_irq_pending_nocheck(void)
 bool power_pmu_wants_prompt_pmi(void);
 
 /*
- * This is called by asynchronous interrupts to conditionally
- * re-enable hard interrupts after having cleared the source
- * of the interrupt. They are kept disabled if there is a different
- * soft-masked interrupt pending that requires hard masking.
+ * This is called by asynchronous interrupts to check whether to
+ * conditionally re-enable hard interrupts after having cleared
+ * the source of the interrupt. They are kept disabled if there
+ * is a different soft-masked interrupt pending that requires hard
+ * masking.
  */
-static inline void may_hard_irq_enable(void)
+static inline bool should_hard_irq_enable(void)
 {
-	if (!(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK)) {
-		get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS;
-		__hard_irq_enable();
-	}
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+	WARN_ON(irq_soft_mask_return() == IRQS_ENABLED);
+	WARN_ON(mfmsr() & MSR_EE);
+#endif
+#ifdef CONFIG_PERF_EVENTS
+	/*
+	 * If the PMU is not running, there is not much reason to enable
+	 * MSR[EE] in irq handlers because any interrupts would just be
+	 * soft-masked.
+	 *
+	 * TODO: Add test for 64e
+	 */
+	if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !power_pmu_wants_prompt_pmi())
+		return false;
+
+	if (get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK)
+		return false;
+
+	return true;
+#else
+	return false;
+#endif
+}
+
+/*
+ * Do the hard enabling, only call this if should_hard_irq_enable is true.
+ */
+static inline void do_hard_irq_enable(void)
+{
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+	WARN_ON(irq_soft_mask_return() == IRQS_ENABLED);
+	WARN_ON(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK);
+	WARN_ON(mfmsr() & MSR_EE);
+#endif
+	/*
+	 * This allows PMI interrupts (and watchdog soft-NMIs) through.
+	 * There is no other reason to enable this way.
+	 */
+	get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS;
+	__hard_irq_enable();
 }
 
 static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
@@ -400,7 +437,7 @@ static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
 	return !(regs->msr & MSR_EE);
 }
 
-static inline bool may_hard_irq_enable(void)
+static inline bool should_hard_irq_enable(void)
 {
 	return false;
 }
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index 5545c9cd17c1..f55c6fb34a3a 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -27,7 +27,8 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception)
 
 	ppc_msgsync();
 
-	may_hard_irq_enable();
+	if (should_hard_irq_enable())
+		do_hard_irq_enable();
 
 	kvmppc_clear_host_ipi(smp_processor_id());
 	__this_cpu_inc(irq_stat.doorbell_irqs);
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 551b653228c4..f658aa22a21e 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -739,7 +739,8 @@ void __do_irq(struct pt_regs *regs)
 	irq = ppc_md.get_irq();
 
 	/* We can hard enable interrupts now to allow perf interrupts */
-	may_hard_irq_enable();
+	if (should_hard_irq_enable())
+		do_hard_irq_enable();
 
 	/* And finally process it */
 	if (unlikely(!irq))
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 934d8ae66cc6..c3c663ff7c48 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -566,22 +566,23 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
 		return;
 	}
 
-	/* Ensure a positive value is written to the decrementer, or else
-	 * some CPUs will continue to take decrementer exceptions. When the
-	 * PPC_WATCHDOG (decrementer based) is configured, keep this at most
-	 * 31 bits, which is about 4 seconds on most systems, which gives
-	 * the watchdog a chance of catching timer interrupt hard lockups.
-	 */
-	if (IS_ENABLED(CONFIG_PPC_WATCHDOG))
-		set_dec(0x7fffffff);
-	else
-		set_dec(decrementer_max);
-
-	/* Conditionally hard-enable interrupts now that the DEC has been
-	 * bumped to its maximum value
-	 */
-	may_hard_irq_enable();
+	/* Conditionally hard-enable interrupts. */
+	if (should_hard_irq_enable()) {
+		/*
+		 * Ensure a positive value is written to the decrementer, or
+		 * else some CPUs will continue to take decrementer exceptions.
+		 * When the PPC_WATCHDOG (decrementer based) is configured,
+		 * keep this at most 31 bits, which is about 4 seconds on most
+		 * systems, which gives the watchdog a chance of catching timer
+		 * interrupt hard lockups.
+		 */
+		if (IS_ENABLED(CONFIG_PPC_WATCHDOG))
+			set_dec(0x7fffffff);
+		else
+			set_dec(decrementer_max);
 
+		do_hard_irq_enable();
+	}
 
 #if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)
 	if (atomic_read(&ppc_n_lost_interrupts) != 0)
-- 
2.23.0


^ permalink raw reply related

* [PATCH v3 5/6] powerpc/64/interrupt: reduce expensive debug tests
From: Nicholas Piggin @ 2021-09-22 14:54 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20210922145452.352571-1-npiggin@gmail.com>

Move the assertions requiring restart table searches under
CONFIG_PPC_IRQ_SOFT_MASK_DEBUG.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/interrupt.h | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index e178d143671a..0e84e99af37b 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -97,6 +97,11 @@ static inline void srr_regs_clobbered(void)
 	local_paca->hsrr_valid = 0;
 }
 #else
+static inline unsigned long search_kernel_restart_table(unsigned long addr)
+{
+	return 0;
+}
+
 static inline bool is_implicit_soft_masked(struct pt_regs *regs)
 {
 	return false;
@@ -190,13 +195,14 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
 		 */
 		if (TRAP(regs) != INTERRUPT_PROGRAM) {
 			CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
-			BUG_ON(is_implicit_soft_masked(regs));
+			if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+				BUG_ON(is_implicit_soft_masked(regs));
 		}
-#ifdef CONFIG_PPC_BOOK3S
+
 		/* Move this under a debugging check */
-		if (arch_irq_disabled_regs(regs))
+		if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) &&
+				arch_irq_disabled_regs(regs))
 			BUG_ON(search_kernel_restart_table(regs->nip));
-#endif
 	}
 	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
 		BUG_ON(!arch_irq_disabled_regs(regs) && !(regs->msr & MSR_EE));
-- 
2.23.0


^ permalink raw reply related

* [PATCH v3 6/6] powerpc/64s/interrupt: avoid saving CFAR in some asynchronous interrupts
From: Nicholas Piggin @ 2021-09-22 14:54 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin
In-Reply-To: <20210922145452.352571-1-npiggin@gmail.com>

Reading the CFAR register is quite costly (~20 cycles on POWER9). It is
a good idea to have for most synchronous interrupts, but for async ones
it is much less important.

Doorbell, external, and decrementer interrupts are the important
asynchronous ones. HV interrupts can't skip CFAR if KVM HV is possible,
because it might be a guest exit that requires CFAR preserved. But the
important pseries interrupts can avoid loading CFAR.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kernel/exceptions-64s.S | 63 ++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 4dcc76206f8e..eb3af5abdd37 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -111,6 +111,8 @@ name:
 #define IAREA		.L_IAREA_\name\()	/* PACA save area */
 #define IVIRT		.L_IVIRT_\name\()	/* Has virt mode entry point */
 #define IISIDE		.L_IISIDE_\name\()	/* Uses SRR0/1 not DAR/DSISR */
+#define ICFAR		.L_ICFAR_\name\()	/* Uses CFAR */
+#define ICFAR_IF_HVMODE	.L_ICFAR_IF_HVMODE_\name\() /* Uses CFAR if HV */
 #define IDAR		.L_IDAR_\name\()	/* Uses DAR (or SRR0) */
 #define IDSISR		.L_IDSISR_\name\()	/* Uses DSISR (or SRR1) */
 #define IBRANCH_TO_COMMON	.L_IBRANCH_TO_COMMON_\name\() /* ENTRY branch to common */
@@ -150,6 +152,12 @@ do_define_int n
 	.ifndef IISIDE
 		IISIDE=0
 	.endif
+	.ifndef ICFAR
+		ICFAR=1
+	.endif
+	.ifndef ICFAR_IF_HVMODE
+		ICFAR_IF_HVMODE=0
+	.endif
 	.ifndef IDAR
 		IDAR=0
 	.endif
@@ -287,9 +295,21 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	HMT_MEDIUM
 	std	r10,IAREA+EX_R10(r13)		/* save r10 - r12 */
+	.if ICFAR
 BEGIN_FTR_SECTION
 	mfspr	r10,SPRN_CFAR
 END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+	.elseif ICFAR_IF_HVMODE
+BEGIN_FTR_SECTION
+  BEGIN_FTR_SECTION_NESTED(69)
+	mfspr	r10,SPRN_CFAR
+  END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 69)
+FTR_SECTION_ELSE
+  BEGIN_FTR_SECTION_NESTED(69)
+	li	r10,0
+  END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 69)
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+	.endif
 	.if \ool
 	.if !\virt
 	b	tramp_real_\name
@@ -305,9 +325,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
 BEGIN_FTR_SECTION
 	std	r9,IAREA+EX_PPR(r13)
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+	.if ICFAR || ICFAR_IF_HVMODE
 BEGIN_FTR_SECTION
 	std	r10,IAREA+EX_CFAR(r13)
 END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+	.endif
 	INTERRUPT_TO_KERNEL
 	mfctr	r10
 	std	r10,IAREA+EX_CTR(r13)
@@ -559,7 +581,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	.endif
 
 BEGIN_FTR_SECTION
+	.if ICFAR || ICFAR_IF_HVMODE
 	ld	r10,IAREA+EX_CFAR(r13)
+	.else
+	li	r10,0
+	.endif
 	std	r10,ORIG_GPR3(r1)
 END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
 	ld	r10,IAREA+EX_CTR(r13)
@@ -1502,6 +1528,12 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
  *
  * If soft masked, the masked handler will note the pending interrupt for
  * replay, and clear MSR[EE] in the interrupted context.
+ *
+ * CFAR is not required because this is an asynchronous interrupt that in
+ * general won't have much bearing on the state of the CPU, with the possible
+ * exception of crash/debug IPIs, but those are generally moving to use SRESET
+ * IPIs. Unless this is an HV interrupt and KVM HV is possible, in which case
+ * it may be exiting the guest and need CFAR to be saved.
  */
 INT_DEFINE_BEGIN(hardware_interrupt)
 	IVEC=0x500
@@ -1509,6 +1541,10 @@ INT_DEFINE_BEGIN(hardware_interrupt)
 	IMASK=IRQS_DISABLED
 	IKVM_REAL=1
 	IKVM_VIRT=1
+	ICFAR=0
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	ICFAR_IF_HVMODE=1
+#endif
 INT_DEFINE_END(hardware_interrupt)
 
 EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
@@ -1727,6 +1763,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
  * If PPC_WATCHDOG is configured, the soft masked handler will actually set
  * things back up to run soft_nmi_interrupt as a regular interrupt handler
  * on the emergency stack.
+ *
+ * CFAR is not required because this is asynchronous (see hardware_interrupt).
+ * A watchdog interrupt may like to have CFAR, but usually the interesting
+ * branch is long gone by that point (e.g., infinite loop).
  */
 INT_DEFINE_BEGIN(decrementer)
 	IVEC=0x900
@@ -1734,6 +1774,7 @@ INT_DEFINE_BEGIN(decrementer)
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 	IKVM_REAL=1
 #endif
+	ICFAR=0
 INT_DEFINE_END(decrementer)
 
 EXC_REAL_BEGIN(decrementer, 0x900, 0x80)
@@ -1809,6 +1850,8 @@ EXC_COMMON_BEGIN(hdecrementer_common)
  * If soft masked, the masked handler will note the pending interrupt for
  * replay, leaving MSR[EE] enabled in the interrupted context because the
  * doorbells are edge triggered.
+ *
+ * CFAR is not required, similarly to hardware_interrupt.
  */
 INT_DEFINE_BEGIN(doorbell_super)
 	IVEC=0xa00
@@ -1816,6 +1859,7 @@ INT_DEFINE_BEGIN(doorbell_super)
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 	IKVM_REAL=1
 #endif
+	ICFAR=0
 INT_DEFINE_END(doorbell_super)
 
 EXC_REAL_BEGIN(doorbell_super, 0xa00, 0x100)
@@ -1867,6 +1911,7 @@ INT_DEFINE_BEGIN(system_call)
 	IVEC=0xc00
 	IKVM_REAL=1
 	IKVM_VIRT=1
+	ICFAR=0
 INT_DEFINE_END(system_call)
 
 .macro SYSTEM_CALL virt
@@ -2165,6 +2210,11 @@ EXC_COMMON_BEGIN(hmi_exception_common)
  * Interrupt 0xe80 - Directed Hypervisor Doorbell Interrupt.
  * This is an asynchronous interrupt in response to a msgsnd doorbell.
  * Similar to the 0xa00 doorbell but for host rather than guest.
+ *
+ * CFAR is not required (similar to doorbell_interrupt), unless KVM HV
+ * is enabled, in which case it may be a guest exit. Most PowerNV kernels
+ * include KVM support so it would be nice if this could be dynamically
+ * patched out if KVM was not currently running any guests.
  */
 INT_DEFINE_BEGIN(h_doorbell)
 	IVEC=0xe80
@@ -2172,6 +2222,9 @@ INT_DEFINE_BEGIN(h_doorbell)
 	IMASK=IRQS_DISABLED
 	IKVM_REAL=1
 	IKVM_VIRT=1
+#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	ICFAR=0
+#endif
 INT_DEFINE_END(h_doorbell)
 
 EXC_REAL_BEGIN(h_doorbell, 0xe80, 0x20)
@@ -2195,6 +2248,9 @@ EXC_COMMON_BEGIN(h_doorbell_common)
  * Interrupt 0xea0 - Hypervisor Virtualization Interrupt.
  * This is an asynchronous interrupt in response to an "external exception".
  * Similar to 0x500 but for host only.
+ *
+ * Like h_doorbell, CFAR is only required for KVM HV because this can be
+ * a guest exit.
  */
 INT_DEFINE_BEGIN(h_virt_irq)
 	IVEC=0xea0
@@ -2202,6 +2258,9 @@ INT_DEFINE_BEGIN(h_virt_irq)
 	IMASK=IRQS_DISABLED
 	IKVM_REAL=1
 	IKVM_VIRT=1
+#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	ICFAR=0
+#endif
 INT_DEFINE_END(h_virt_irq)
 
 EXC_REAL_BEGIN(h_virt_irq, 0xea0, 0x20)
@@ -2238,6 +2297,8 @@ EXC_VIRT_NONE(0x4ee0, 0x20)
  *
  * If soft masked, the masked handler will note the pending interrupt for
  * replay, and clear MSR[EE] in the interrupted context.
+ *
+ * CFAR is not used by perf interrupts so not required.
  */
 INT_DEFINE_BEGIN(performance_monitor)
 	IVEC=0xf00
@@ -2245,6 +2306,7 @@ INT_DEFINE_BEGIN(performance_monitor)
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 	IKVM_REAL=1
 #endif
+	ICFAR=0
 INT_DEFINE_END(performance_monitor)
 
 EXC_REAL_BEGIN(performance_monitor, 0xf00, 0x20)
@@ -2669,6 +2731,7 @@ EXC_VIRT_NONE(0x5800, 0x100)
 INT_DEFINE_BEGIN(soft_nmi)
 	IVEC=0x900
 	ISTACK=0
+	ICFAR=0
 INT_DEFINE_END(soft_nmi)
 
 /*
-- 
2.23.0


^ permalink raw reply related

* Re: [PATCH] powerpc/paravirt: correct preempt debug splat in vcpu_is_preempted()
From: Nathan Lynch @ 2021-09-22 15:28 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev; +Cc: srikar, npiggin
In-Reply-To: <87fstxi0hc.fsf@mpe.ellerman.id.au>

Michael Ellerman <mpe@ellerman.id.au> writes:
> Nathan Lynch <nathanl@linux.ibm.com> writes:
>> --- a/arch/powerpc/include/asm/paravirt.h
>> +++ b/arch/powerpc/include/asm/paravirt.h
>> @@ -97,7 +97,14 @@ static inline bool vcpu_is_preempted(int cpu)
>>  
>>  #ifdef CONFIG_PPC_SPLPAR
>>  	if (!is_kvm_guest()) {
>> -		int first_cpu = cpu_first_thread_sibling(smp_processor_id());
>> +		int first_cpu;
>> +
>> +		/*
>> +		 * This is only a guess at best, and this function may be
>> +		 * called with preemption enabled. Using raw_smp_processor_id()
>> +		 * does not damage accuracy.
>> +		 */
>> +		first_cpu = cpu_first_thread_sibling(raw_smp_processor_id());
>
> This change seems good, except I think the comment needs to be a lot
> more explicit about what it's doing and why.
>
> A casual reader is going to be confused about vcpu preemption vs
> "preemption", which are basically unrelated yet use the same word.
>
> It's not clear how raw_smp_processor_id() is related to (Linux)
> preemption, unless you know that smp_processor_id() is the alternative
> and it contains a preemption check.
>
> And "this is only a guess" is not clear on what *this* is, you're
> referring to the result of the whole function, but that's not obvious.

You're right.

>
>>  		/*
>>  		 * Preemption can only happen at core granularity. This CPU
>                    ^^^^^^^^^^
>                    Means something different to "preemption" above.
>
> I know you didn't write that comment, and maybe we need to rewrite some
> of those existing comments to make it clear they're not talking about
> Linux preemption.

Thanks, agreed on all points. I'll rework the existing comments and any
new ones to clearly distinguish between the two senses of preemption
here.

^ permalink raw reply

* [Bug 213837] "Kernel panic - not syncing: corrupted stack end detected inside scheduler" at building via distcc on a G5
From: bugzilla-daemon @ 2021-09-22 15:33 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <bug-213837-206035@https.bugzilla.kernel.org/>

https://bugzilla.kernel.org/show_bug.cgi?id=213837

--- Comment #7 from Erhard F. (erhard_f@mailbox.org) ---
Created attachment 298919
  --> https://bugzilla.kernel.org/attachment.cgi?id=298919&action=edit
dmesg (5.15-rc2 + patch, PowerMac G5 11,2)

(In reply to mpe from comment #6)
> Can you try this patch, it might help us work out what is corrupting the
> stack.
With your patch applied to recent v5.15-rc2 the output looks like this:

[...]
stack corrupted? stack end = 0xc000000029fdc000
stack: c000000029fdbc00: 5a5a5a5a 5a5a5a5a cccccccc cccccccc  ZZZZZZZZ........
stack: c000000029fdbc10: 00000ddc 7c000010 cccccccc cccccccc  ....|...........
stack: c000000029fdbc20: 29fc4e41 673d4bb3 5a5a5a5a 5a5a5a5a  ).NAg=K.ZZZZZZZZ
stack: c000000029fdbc30: cccccccc cccccccc 00000ddc 8e000010  ................
stack: c000000029fdbc40: cccccccc cccccccc 41fc4e41 673d41a3  ........A.NAg=A.
stack: c000000029fdbc50: 5a5a5a5a 5a5a5a5a cccccccc cccccccc  ZZZZZZZZ........
stack: c000000029fdbc60: 00000ddc 8e00000c cccccccc cccccccc  ................
stack: c000000029fdbc70: 79fc4e41 673d4dab 5a5a5a5a 5a5a5a5a  y.NAg=M.ZZZZZZZZ
stack: c000000029fdbc80: cccccccc cccccccc 00000ddc 90000008  ................
stack: c000000029fdbc90: cccccccc cccccccc 91fc4e41 673d4573  ..........NAg=Es
stack: c000000029fdbca0: 5a5a5a5a 5a5a5a5a cccccccc cccccccc  ZZZZZZZZ........
stack: c000000029fdbcb0: 00000dd7 ac000016 cccccccc cccccccc  ................
stack: c000000029fdbcc0: c9fc4e41 673d4203 5a5a5a5a 5a5a5a5a  ..NAg=B.ZZZZZZZZ
stack: c000000029fdbcd0: cccccccc cccccccc 00000ddc 6c000004  ............l...
stack: c000000029fdbce0: cccccccc cccccccc e1fc4e41 673d474b  ..........NAg=GK
stack: c000000029fdbcf0: 5a5a5a5a 5a5a5a5a cccccccc cccccccc  ZZZZZZZZ........
stack: c000000029fdbd00: 00000ddc 88000000 cccccccc cccccccc  ................
stack: c000000029fdbd10: 19fd4e41 673d4143 5a5a5a5a 5a5a5a5a  ..NAg=ACZZZZZZZZ
[...]
stack: c000000029fdffd0: 00000000 00000000 00000000 00000000  ................
stack: c000000029fdffe0: 00000000 00000000 00000000 00000000  ................
stack: c000000029fdfff0: 00000000 00000000 00000000 00000000  ................
Kernel panic - not syncing: corrupted stack end detected inside scheduler
CPU: 0 PID: 686 Comm: kworker/u4:0 Tainted: G        W        
5.15.0-rc2-PowerMacG5+ #2
Workqueue: writeback .wb_workfn (flush-254:1)
Call Trace:
[c000000029fdf400] [c0000000005532c8] .dump_stack_lvl+0x98/0xe0 (unreliable)
[c000000029fdf490] [c000000000069534] .panic+0x14c/0x3e8
[c000000029fdf540] [c00000000081d598] .__schedule+0xc0/0x874
[c000000029fdf610] [c00000000081de98] .preempt_schedule_common+0x28/0x48
[c000000029fdf690] [c00000000081dee4] .__cond_resched+0x2c/0x50
[c000000029fdf700] [c0000000002b31b8] .writeback_sb_inodes+0x328/0x4c8
[c000000029fdf880] [c0000000002b33e8] .__writeback_inodes_wb+0x90/0xcc
[c000000029fdf930] [c0000000002b3650] .wb_writeback+0x22c/0x3c8
[c000000029fdfa50] [c0000000002b45a8] .wb_workfn+0x380/0x460
[c000000029fdfbb0] [c00000000008b300] .process_one_work+0x31c/0x4ec
[c000000029fdfca0] [c00000000008b950] .worker_thread+0x1d4/0x290
[c000000029fdfd60] [c000000000093b0c] .kthread+0x124/0x12c
[c000000029fdfe10] [c00000000000bce0] .ret_from_kernel_thread+0x58/0x60
Rebooting in 40 seconds..

Can't make much sense out of it but hopefully you can. ;) For the full trace
please have a look at the attached kernel dmesg (via netconsole).

-- 
You may reply to this email to add a comment.

You are receiving this mail because:
You are watching the assignee of the bug.
You are watching someone on the CC list of the bug.

^ permalink raw reply

* Re: [PATCH] powerpc/paravirt: correct preempt debug splat in vcpu_is_preempted()
From: Nathan Lynch @ 2021-09-22 16:01 UTC (permalink / raw)
  To: Srikar Dronamraju; +Cc: linuxppc-dev, npiggin
In-Reply-To: <20210922075718.GA2004@linux.vnet.ibm.com>

Srikar Dronamraju <srikar@linux.vnet.ibm.com> writes:
> * Nathan Lynch <nathanl@linux.ibm.com> [2021-09-20 22:12:13]:
>
>> vcpu_is_preempted() can be used outside of preempt-disabled critical
>> sections, yielding warnings such as:
>> 
>> BUG: using smp_processor_id() in preemptible [00000000] code: systemd-udevd/185
>> caller is rwsem_spin_on_owner+0x1cc/0x2d0
>> CPU: 1 PID: 185 Comm: systemd-udevd Not tainted 5.15.0-rc2+ #33
>> Call Trace:
>> [c000000012907ac0] [c000000000aa30a8] dump_stack_lvl+0xac/0x108 (unreliable)
>> [c000000012907b00] [c000000001371f70] check_preemption_disabled+0x150/0x160
>> [c000000012907b90] [c0000000001e0e8c] rwsem_spin_on_owner+0x1cc/0x2d0
>> [c000000012907be0] [c0000000001e1408] rwsem_down_write_slowpath+0x478/0x9a0
>> [c000000012907ca0] [c000000000576cf4] filename_create+0x94/0x1e0
>> [c000000012907d10] [c00000000057ac08] do_symlinkat+0x68/0x1a0
>> [c000000012907d70] [c00000000057ae18] sys_symlink+0x58/0x70
>> [c000000012907da0] [c00000000002e448] system_call_exception+0x198/0x3c0
>> [c000000012907e10] [c00000000000c54c] system_call_common+0xec/0x250
>> 
>> The result of vcpu_is_preempted() is always subject to invalidation by
>> events inside and outside of Linux; it's just a best guess at a point in
>> time. Use raw_smp_processor_id() to avoid such warnings.
>
> Typically smp_processor_id() and raw_smp_processor_id() except for the
> CONFIG_DEBUG_PREEMPT.

Sorry, I don't follow...

> In the CONFIG_DEBUG_PREEMPT case, smp_processor_id()
> is actually debug_smp_processor_id(), which does all the checks.

Yes, OK.

> I believe these checks in debug_smp_processor_id() are only valid for x86
> case (aka cases were they have __smp_processor_id() defined.)

Hmm, I am under the impression that the checks in
debug_smp_processor_id() are valid regardless of whether the arch
overrides __smp_processor_id().

I think the stack trace here correctly identifies an incorrect use of
smp_processor_id(), and the call site needs to be changed. Do you
disagree?

^ permalink raw reply

* [PATCH v2 2/2] powerpc/powermac: constify device_node in of_irq_parse_oldworld()
From: Krzysztof Kozlowski @ 2021-09-22 16:04 UTC (permalink / raw)
  To: Michael Ellerman, Benjamin Herrenschmidt, Paul Mackerras,
	Rob Herring, Frank Rowand, Krzysztof Kozlowski, Christophe Leroy,
	Aneesh Kumar K.V, linuxppc-dev, linux-kernel, devicetree
In-Reply-To: <20210922160436.130931-1-krzysztof.kozlowski@canonical.com>

The of_irq_parse_oldworld() does not modify passed device_node so make
it a pointer to const for safety.  Drop the extern while modifying the
line.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>

---

Changes since v1:
1. Drop extern.
---
 arch/powerpc/platforms/powermac/pic.c | 2 +-
 include/linux/of_irq.h                | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index 4921bccf0376..af5ca1f41bb1 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -384,7 +384,7 @@ static void __init pmac_pic_probe_oldstyle(void)
 #endif
 }
 
-int of_irq_parse_oldworld(struct device_node *device, int index,
+int of_irq_parse_oldworld(const struct device_node *device, int index,
 			struct of_phandle_args *out_irq)
 {
 	const u32 *ints = NULL;
diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index aaf219bd0354..83fccd0c9bba 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -20,12 +20,12 @@ typedef int (*of_irq_init_cb_t)(struct device_node *, struct device_node *);
 #if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)
 extern unsigned int of_irq_workarounds;
 extern struct device_node *of_irq_dflt_pic;
-extern int of_irq_parse_oldworld(struct device_node *device, int index,
-			       struct of_phandle_args *out_irq);
+int of_irq_parse_oldworld(const struct device_node *device, int index,
+			  struct of_phandle_args *out_irq);
 #else /* CONFIG_PPC32 && CONFIG_PPC_PMAC */
 #define of_irq_workarounds (0)
 #define of_irq_dflt_pic (NULL)
-static inline int of_irq_parse_oldworld(struct device_node *device, int index,
+static inline int of_irq_parse_oldworld(const struct device_node *device, int index,
 				      struct of_phandle_args *out_irq)
 {
 	return -EINVAL;
-- 
2.30.2


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox