LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v2] powerpc/powermac: Fix low_sleep_handler with CONFIG_VMAP_STACK
From: Christophe Leroy @ 2020-12-08  5:24 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	giuseppe
  Cc: linuxppc-dev, linux-kernel

low_sleep_handler() can't restore the context from standard
stack because the stack can hardly be accessed with MMU OFF.

Store everything in a global storage area instead of storing
a pointer to the stack in that global storage area.

To avoid a complete churn of the function, still use r1 as
the pointer to the storage area during restore.

Reported-by: Giuseppe Sacco <giuseppe@sguazz.it>
Fixes: cd08f109e262 ("powerpc/32s: Enable CONFIG_VMAP_STACK")
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
This is only build tested. Giuseppe can you test it ? Thanks.

v2: Changed an erroneous 'addis' to 'addi' ; Using bss instead of data section
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/platforms/Kconfig.cputype  |   2 +-
 arch/powerpc/platforms/powermac/sleep.S | 132 +++++++++++-------------
 2 files changed, 60 insertions(+), 74 deletions(-)

diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index c194c4ae8bc7..32a9c4c09b98 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -36,7 +36,7 @@ config PPC_BOOK3S_6xx
 	select PPC_HAVE_PMU_SUPPORT
 	select PPC_HAVE_KUEP
 	select PPC_HAVE_KUAP
-	select HAVE_ARCH_VMAP_STACK if !ADB_PMU
+	select HAVE_ARCH_VMAP_STACK
 
 config PPC_85xx
 	bool "Freescale 85xx"
diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S
index 7e0f8ba6e54a..d497a60003d2 100644
--- a/arch/powerpc/platforms/powermac/sleep.S
+++ b/arch/powerpc/platforms/powermac/sleep.S
@@ -44,7 +44,8 @@
 #define SL_TB		0xa0
 #define SL_R2		0xa8
 #define SL_CR		0xac
-#define SL_R12		0xb0	/* r12 to r31 */
+#define SL_LR		0xb0
+#define SL_R12		0xb4	/* r12 to r31 */
 #define SL_SIZE		(SL_R12 + 80)
 
 	.section .text
@@ -63,105 +64,107 @@ _GLOBAL(low_sleep_handler)
 	blr
 #else
 	mflr	r0
-	stw	r0,4(r1)
-	stwu	r1,-SL_SIZE(r1)
+	lis	r11,sleep_storage@ha
+	addi	r11,r11,sleep_storage@l
+	stw	r0,SL_LR(r11)
 	mfcr	r0
-	stw	r0,SL_CR(r1)
-	stw	r2,SL_R2(r1)
-	stmw	r12,SL_R12(r1)
+	stw	r0,SL_CR(r11)
+	stw	r1,SL_SP(r11)
+	stw	r2,SL_R2(r11)
+	stmw	r12,SL_R12(r11)
 
 	/* Save MSR & SDR1 */
 	mfmsr	r4
-	stw	r4,SL_MSR(r1)
+	stw	r4,SL_MSR(r11)
 	mfsdr1	r4
-	stw	r4,SL_SDR1(r1)
+	stw	r4,SL_SDR1(r11)
 
 	/* Get a stable timebase and save it */
 1:	mftbu	r4
-	stw	r4,SL_TB(r1)
+	stw	r4,SL_TB(r11)
 	mftb	r5
-	stw	r5,SL_TB+4(r1)
+	stw	r5,SL_TB+4(r11)
 	mftbu	r3
 	cmpw	r3,r4
 	bne	1b
 
 	/* Save SPRGs */
 	mfsprg	r4,0
-	stw	r4,SL_SPRG0(r1)
+	stw	r4,SL_SPRG0(r11)
 	mfsprg	r4,1
-	stw	r4,SL_SPRG0+4(r1)
+	stw	r4,SL_SPRG0+4(r11)
 	mfsprg	r4,2
-	stw	r4,SL_SPRG0+8(r1)
+	stw	r4,SL_SPRG0+8(r11)
 	mfsprg	r4,3
-	stw	r4,SL_SPRG0+12(r1)
+	stw	r4,SL_SPRG0+12(r11)
 
 	/* Save BATs */
 	mfdbatu	r4,0
-	stw	r4,SL_DBAT0(r1)
+	stw	r4,SL_DBAT0(r11)
 	mfdbatl	r4,0
-	stw	r4,SL_DBAT0+4(r1)
+	stw	r4,SL_DBAT0+4(r11)
 	mfdbatu	r4,1
-	stw	r4,SL_DBAT1(r1)
+	stw	r4,SL_DBAT1(r11)
 	mfdbatl	r4,1
-	stw	r4,SL_DBAT1+4(r1)
+	stw	r4,SL_DBAT1+4(r11)
 	mfdbatu	r4,2
-	stw	r4,SL_DBAT2(r1)
+	stw	r4,SL_DBAT2(r11)
 	mfdbatl	r4,2
-	stw	r4,SL_DBAT2+4(r1)
+	stw	r4,SL_DBAT2+4(r11)
 	mfdbatu	r4,3
-	stw	r4,SL_DBAT3(r1)
+	stw	r4,SL_DBAT3(r11)
 	mfdbatl	r4,3
-	stw	r4,SL_DBAT3+4(r1)
+	stw	r4,SL_DBAT3+4(r11)
 	mfibatu	r4,0
-	stw	r4,SL_IBAT0(r1)
+	stw	r4,SL_IBAT0(r11)
 	mfibatl	r4,0
-	stw	r4,SL_IBAT0+4(r1)
+	stw	r4,SL_IBAT0+4(r11)
 	mfibatu	r4,1
-	stw	r4,SL_IBAT1(r1)
+	stw	r4,SL_IBAT1(r11)
 	mfibatl	r4,1
-	stw	r4,SL_IBAT1+4(r1)
+	stw	r4,SL_IBAT1+4(r11)
 	mfibatu	r4,2
-	stw	r4,SL_IBAT2(r1)
+	stw	r4,SL_IBAT2(r11)
 	mfibatl	r4,2
-	stw	r4,SL_IBAT2+4(r1)
+	stw	r4,SL_IBAT2+4(r11)
 	mfibatu	r4,3
-	stw	r4,SL_IBAT3(r1)
+	stw	r4,SL_IBAT3(r11)
 	mfibatl	r4,3
-	stw	r4,SL_IBAT3+4(r1)
+	stw	r4,SL_IBAT3+4(r11)
 
 BEGIN_MMU_FTR_SECTION
 	mfspr	r4,SPRN_DBAT4U
-	stw	r4,SL_DBAT4(r1)
+	stw	r4,SL_DBAT4(r11)
 	mfspr	r4,SPRN_DBAT4L
-	stw	r4,SL_DBAT4+4(r1)
+	stw	r4,SL_DBAT4+4(r11)
 	mfspr	r4,SPRN_DBAT5U
-	stw	r4,SL_DBAT5(r1)
+	stw	r4,SL_DBAT5(r11)
 	mfspr	r4,SPRN_DBAT5L
-	stw	r4,SL_DBAT5+4(r1)
+	stw	r4,SL_DBAT5+4(r11)
 	mfspr	r4,SPRN_DBAT6U
-	stw	r4,SL_DBAT6(r1)
+	stw	r4,SL_DBAT6(r11)
 	mfspr	r4,SPRN_DBAT6L
-	stw	r4,SL_DBAT6+4(r1)
+	stw	r4,SL_DBAT6+4(r11)
 	mfspr	r4,SPRN_DBAT7U
-	stw	r4,SL_DBAT7(r1)
+	stw	r4,SL_DBAT7(r11)
 	mfspr	r4,SPRN_DBAT7L
-	stw	r4,SL_DBAT7+4(r1)
+	stw	r4,SL_DBAT7+4(r11)
 	mfspr	r4,SPRN_IBAT4U
-	stw	r4,SL_IBAT4(r1)
+	stw	r4,SL_IBAT4(r11)
 	mfspr	r4,SPRN_IBAT4L
-	stw	r4,SL_IBAT4+4(r1)
+	stw	r4,SL_IBAT4+4(r11)
 	mfspr	r4,SPRN_IBAT5U
-	stw	r4,SL_IBAT5(r1)
+	stw	r4,SL_IBAT5(r11)
 	mfspr	r4,SPRN_IBAT5L
-	stw	r4,SL_IBAT5+4(r1)
+	stw	r4,SL_IBAT5+4(r11)
 	mfspr	r4,SPRN_IBAT6U
-	stw	r4,SL_IBAT6(r1)
+	stw	r4,SL_IBAT6(r11)
 	mfspr	r4,SPRN_IBAT6L
-	stw	r4,SL_IBAT6+4(r1)
+	stw	r4,SL_IBAT6+4(r11)
 	mfspr	r4,SPRN_IBAT7U
-	stw	r4,SL_IBAT7(r1)
+	stw	r4,SL_IBAT7(r11)
 	mfspr	r4,SPRN_IBAT7L
-	stw	r4,SL_IBAT7+4(r1)
+	stw	r4,SL_IBAT7+4(r11)
 END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
 
 	/* Backup various CPU config stuffs */
@@ -180,9 +183,9 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
 	lis	r5,grackle_wake_up@ha
 	addi	r5,r5,grackle_wake_up@l
 	tophys(r5,r5)
-	stw	r5,SL_PC(r1)
+	stw	r5,SL_PC(r11)
 	lis	r4,KERNELBASE@h
-	tophys(r5,r1)
+	tophys(r5,r11)
 	addi	r5,r5,SL_PC
 	lis	r6,MAGIC@ha
 	addi	r6,r6,MAGIC@l
@@ -194,12 +197,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
 	tophys(r3,r3)
 	stw	r3,0x80(r4)
 	stw	r5,0x84(r4)
-	/* Store a pointer to our backup storage into
-	 * a kernel global
-	 */
-	lis r3,sleep_storage@ha
-	addi r3,r3,sleep_storage@l
-	stw r5,0(r3)
 
 	.globl	low_cpu_offline_self
 low_cpu_offline_self:
@@ -279,7 +276,7 @@ _GLOBAL(core99_wake_up)
 	lis	r3,sleep_storage@ha
 	addi	r3,r3,sleep_storage@l
 	tophys(r3,r3)
-	lwz	r1,0(r3)
+	addi	r1,r3,SL_PC
 
 	/* Pass thru to older resume code ... */
 _ASM_NOKPROBE_SYMBOL(core99_wake_up)
@@ -399,13 +396,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
 	blt	1b
 	sync
 
-	/* restore the MSR and turn on the MMU */
-	lwz	r3,SL_MSR(r1)
-	bl	turn_on_mmu
-
-	/* get back the stack pointer */
-	tovirt(r1,r1)
-
 	/* Restore TB */
 	li	r3,0
 	mttbl	r3
@@ -419,28 +409,24 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
 	mtcr	r0
 	lwz	r2,SL_R2(r1)
 	lmw	r12,SL_R12(r1)
-	addi	r1,r1,SL_SIZE
-	lwz	r0,4(r1)
-	mtlr	r0
-	blr
-_ASM_NOKPROBE_SYMBOL(grackle_wake_up)
 
-turn_on_mmu:
-	mflr	r4
-	tovirt(r4,r4)
+	/* restore the MSR and SP and turn on the MMU and return */
+	lwz	r3,SL_MSR(r1)
+	lwz	r4,SL_LR(r1)
+	lwz	r1,SL_SP(r1)
 	mtsrr0	r4
 	mtsrr1	r3
 	sync
 	isync
 	rfi
-_ASM_NOKPROBE_SYMBOL(turn_on_mmu)
+_ASM_NOKPROBE_SYMBOL(grackle_wake_up)
 
 #endif /* defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ) */
 
-	.section .data
+	.section .bss
 	.balign	L1_CACHE_BYTES
 sleep_storage:
-	.long 0
+	.space SL_SIZE
 	.balign	L1_CACHE_BYTES, 0
 
 #endif /* CONFIG_PPC_BOOK3S_32 */
-- 
2.25.0


^ permalink raw reply related

* Re: [PATCH 00/20] ethernet: ucc_geth: assorted fixes and simplifications
From: Rasmus Villemoes @ 2020-12-08  8:13 UTC (permalink / raw)
  To: Qiang Zhao, Jakub Kicinski
  Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org, Leo Li,
	Vladimir Oltean, linuxppc-dev@lists.ozlabs.org, David S. Miller,
	linux-arm-kernel@lists.infradead.org
In-Reply-To: <VE1PR04MB676805F3EEDF86A8BE370F8691CD0@VE1PR04MB6768.eurprd04.prod.outlook.com>

On 08/12/2020 04.07, Qiang Zhao wrote:
> On 06/12/2020 05:12, Rasmus Villemoes <rasmus.villemoes@prevas.dk> wrote:
> 

>> I think patch 2 is a bug fix as well, but I'd like someone from NXP to comment.
> 
> It 's ok for me.

I was hoping for something a bit more than that. Can you please go check
with the people who made the hardware and those who wrote the manual
(probably not the same ones) what is actually up and down, and then
report on what they said.

It's fairly obvious that allocating 192 bytes instead of 128 should
never hurt (unless we run out of muram), but it would be nice with an
official "Yes, table 8-111 is wrong, it should say 192", or
alternatively, "No, table 8-53 is wrong, those MTU etc. fields don't
really exist". Extra points for providing details such as "first
revision of the IP had $foo, but that was never shipped in real
products, then $bar was changed", etc.

Thanks,
Rasmus

^ permalink raw reply

* [PATCH v3 4/5] powerpc/fault: Avoid heavy search_exception_tables() verification
From: Christophe Leroy @ 2020-12-08  8:37 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin,
	aneesh.kumar
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <0d37490a067840f53fc5b118869917c0aec9ab87.1607416578.git.christophe.leroy@csgroup.eu>

search_exception_tables() is an heavy operation, we have to avoid it.
When KUAP is selected, we'll know the fault has been blocked by KUAP.
Otherwise, it behaves just as if the address was already in the TLBs
and no fault was generated.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
---
v3: rebased
v2: Squashed with the preceeding patch which was re-ordering tests that get removed in this patch.
---
 arch/powerpc/mm/fault.c | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 3fcd34c28e10..1770b41e4730 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -210,28 +210,19 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
 		return true;
 	}
 
-	if (!is_exec && address < TASK_SIZE && (error_code & (DSISR_PROTFAULT | DSISR_KEYFAULT)) &&
-	    !search_exception_tables(regs->nip)) {
-		pr_crit_ratelimited("Kernel attempted to access user page (%lx) - exploit attempt? (uid: %d)\n",
-				    address,
-				    from_kuid(&init_user_ns, current_uid()));
-	}
-
 	// Kernel fault on kernel address is bad
 	if (address >= TASK_SIZE)
 		return true;
 
-	// Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad
-	if (!search_exception_tables(regs->nip))
-		return true;
-
-	// Read/write fault in a valid region (the exception table search passed
-	// above), but blocked by KUAP is bad, it can never succeed.
-	if (bad_kuap_fault(regs, address, is_write))
+	// Read/write fault blocked by KUAP is bad, it can never succeed.
+	if (bad_kuap_fault(regs, address, is_write)) {
+		pr_crit_ratelimited("Kernel attempted to %s user page (%lx) - exploit attempt? (uid: %d)\n",
+				    is_write ? "write" : "read", address,
+				    from_kuid(&init_user_ns, current_uid()));
 		return true;
+	}
 
-	// What's left? Kernel fault on user in well defined regions (extable
-	// matched), and allowed by KUAP in the faulting context.
+	// What's left? Kernel fault on user and allowed by KUAP in the faulting context.
 	return false;
 }
 
-- 
2.25.0


^ permalink raw reply related

* [PATCH v3 2/5] powerpc/mm: sanity_check_fault() should work for all,  not only BOOK3S
From: Christophe Leroy @ 2020-12-08  8:36 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin,
	aneesh.kumar
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <0d37490a067840f53fc5b118869917c0aec9ab87.1607416578.git.christophe.leroy@csgroup.eu>

The verification and message introduced by commit 374f3f5979f9
("powerpc/mm/hash: Handle user access of kernel address gracefully")
applies to all platforms, it should not be limited to BOOK3S.

Make the BOOK3S version of sanity_check_fault() the one for all,
and bail out earlier if not BOOK3S.

Fixes: 374f3f5979f9 ("powerpc/mm/hash: Handle user access of kernel address gracefully")
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/mm/fault.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index b12595102525..f6ae56a0d7a3 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -303,7 +303,6 @@ static inline void cmo_account_page_fault(void)
 static inline void cmo_account_page_fault(void) { }
 #endif /* CONFIG_PPC_SMLPAR */
 
-#ifdef CONFIG_PPC_BOOK3S
 static void sanity_check_fault(bool is_write, bool is_user,
 			       unsigned long error_code, unsigned long address)
 {
@@ -320,6 +319,9 @@ static void sanity_check_fault(bool is_write, bool is_user,
 		return;
 	}
 
+	if (!IS_ENABLED(CONFIG_PPC_BOOK3S))
+		return;
+
 	/*
 	 * For hash translation mode, we should never get a
 	 * PROTFAULT. Any update to pte to reduce access will result in us
@@ -354,10 +356,6 @@ static void sanity_check_fault(bool is_write, bool is_user,
 
 	WARN_ON_ONCE(error_code & DSISR_PROTFAULT);
 }
-#else
-static void sanity_check_fault(bool is_write, bool is_user,
-			       unsigned long error_code, unsigned long address) { }
-#endif /* CONFIG_PPC_BOOK3S */
 
 /*
  * Define the correct "is_write" bit in error_code based
-- 
2.25.0


^ permalink raw reply related

* [PATCH v3 5/5] powerpc/fault: Perform exception fixup in do_page_fault()
From: Christophe Leroy @ 2020-12-08  8:37 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin,
	aneesh.kumar
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <0d37490a067840f53fc5b118869917c0aec9ab87.1607416578.git.christophe.leroy@csgroup.eu>

Exception fixup doesn't require the heady full regs saving,
do it from do_page_fault() directly.

For that, split bad_page_fault() in two parts.

As bad_page_fault() can also be called from other places than
handle_page_fault(), it will still perform exception fixup and
fallback on __bad_page_fault().

handle_page_fault() directly calls __bad_page_fault() as the
exception fixup will now be done by do_page_fault()

Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
v2: Add prototype of __bad_page_fault() in asm/bug.h
---
 arch/powerpc/include/asm/bug.h       |  1 +
 arch/powerpc/kernel/entry_32.S       |  2 +-
 arch/powerpc/kernel/exceptions-64e.S |  2 +-
 arch/powerpc/kernel/exceptions-64s.S |  2 +-
 arch/powerpc/mm/fault.c              | 33 ++++++++++++++++++++--------
 5 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index ba0500872cce..464f8ca8a5c9 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -113,6 +113,7 @@
 struct pt_regs;
 extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long);
 extern void bad_page_fault(struct pt_regs *, unsigned long, int);
+void __bad_page_fault(struct pt_regs *regs, unsigned long address, int sig);
 extern void _exception(int, struct pt_regs *, int, unsigned long);
 extern void _exception_pkey(struct pt_regs *, unsigned long, int);
 extern void die(const char *, struct pt_regs *, long);
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 58177c71dfd4..1c9b0ccc2172 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -684,7 +684,7 @@ handle_page_fault:
 	mr	r5,r3
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	lwz	r4,_DAR(r1)
-	bl	bad_page_fault
+	bl	__bad_page_fault
 	b	ret_from_except_full
 
 #ifdef CONFIG_PPC_BOOK3S_32
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index f579ce46eef2..74d07dc0bb48 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -1023,7 +1023,7 @@ storage_fault_common:
 	mr	r5,r3
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	ld	r4,_DAR(r1)
-	bl	bad_page_fault
+	bl	__bad_page_fault
 	b	ret_from_except
 
 /*
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 1c8f1b90e174..e02ad6fefa46 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -3259,7 +3259,7 @@ handle_page_fault:
 	mr	r5,r3
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	ld	r4,_DAR(r1)
-	bl	bad_page_fault
+	bl	__bad_page_fault
 	b	interrupt_return
 
 /* We have a data breakpoint exception - handle it */
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 1770b41e4730..2e50bc1c3783 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -538,10 +538,20 @@ NOKPROBE_SYMBOL(__do_page_fault);
 int do_page_fault(struct pt_regs *regs, unsigned long address,
 		  unsigned long error_code)
 {
+	const struct exception_table_entry *entry;
 	enum ctx_state prev_state = exception_enter();
 	int rc = __do_page_fault(regs, address, error_code);
 	exception_exit(prev_state);
-	return rc;
+	if (likely(!rc))
+		return 0;
+
+	entry = search_exception_tables(regs->nip);
+	if (unlikely(!entry))
+		return rc;
+
+	instruction_pointer_set(regs, extable_fixup(entry));
+
+	return 0;
 }
 NOKPROBE_SYMBOL(do_page_fault);
 
@@ -550,17 +560,10 @@ NOKPROBE_SYMBOL(do_page_fault);
  * It is called from the DSI and ISI handlers in head.S and from some
  * of the procedures in traps.c.
  */
-void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
+void __bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
 {
-	const struct exception_table_entry *entry;
 	int is_write = page_fault_is_write(regs->dsisr);
 
-	/* Are we prepared to handle this fault?  */
-	if ((entry = search_exception_tables(regs->nip)) != NULL) {
-		regs->nip = extable_fixup(entry);
-		return;
-	}
-
 	/* kernel has accessed a bad area */
 
 	switch (TRAP(regs)) {
@@ -594,3 +597,15 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
 
 	die("Kernel access of bad area", regs, sig);
 }
+
+void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
+{
+	const struct exception_table_entry *entry;
+
+	/* Are we prepared to handle this fault?  */
+	entry = search_exception_tables(instruction_pointer(regs));
+	if (entry)
+		instruction_pointer_set(regs, extable_fixup(entry));
+	else
+		__bad_page_fault(regs, address, sig);
+}
-- 
2.25.0


^ permalink raw reply related

* [PATCH v3 3/5] powerpc/fault: Unnest definition of page_fault_is_write() and page_fault_is_bad()
From: Christophe Leroy @ 2020-12-08  8:37 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin,
	aneesh.kumar
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <0d37490a067840f53fc5b118869917c0aec9ab87.1607416578.git.christophe.leroy@csgroup.eu>

To make it more readable, separate page_fault_is_write() and page_fault_is_bad()
to avoir several levels of #ifdefs

Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/mm/fault.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index f6ae56a0d7a3..3fcd34c28e10 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -363,17 +363,19 @@ static void sanity_check_fault(bool is_write, bool is_user,
  */
 #if (defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
 #define page_fault_is_write(__err)	((__err) & ESR_DST)
-#define page_fault_is_bad(__err)	(0)
 #else
 #define page_fault_is_write(__err)	((__err) & DSISR_ISSTORE)
-#if defined(CONFIG_PPC_8xx)
+#endif
+
+#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
+#define page_fault_is_bad(__err)	(0)
+#elif defined(CONFIG_PPC_8xx)
 #define page_fault_is_bad(__err)	((__err) & DSISR_NOEXEC_OR_G)
 #elif defined(CONFIG_PPC64)
 #define page_fault_is_bad(__err)	((__err) & DSISR_BAD_FAULT_64S)
 #else
 #define page_fault_is_bad(__err)	((__err) & DSISR_BAD_FAULT_32S)
 #endif
-#endif
 
 /*
  * For 600- and 800-family processors, the error_code parameter is DSISR
-- 
2.25.0


^ permalink raw reply related

* [PATCH v3 1/5] powerpc/book3s64/kuap: Improve error reporting with KUAP
From: Christophe Leroy @ 2020-12-08  8:36 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin,
	aneesh.kumar
  Cc: linuxppc-dev, linux-kernel

From: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>

This partially reverts commit eb232b162446 ("powerpc/book3s64/kuap: Improve
error reporting with KUAP") and update the fault handler to print

[   55.022514] Kernel attempted to access user page (7e6725b70000) - exploit attempt? (uid: 0)
[   55.022528] BUG: Unable to handle kernel data access on read at 0x7e6725b70000
[   55.022533] Faulting instruction address: 0xc000000000e8b9bc
[   55.022540] Oops: Kernel access of bad area, sig: 11 [#1]
....

when the kernel access userspace address without unlocking AMR.

bad_kuap_fault() is added as part of commit 5e5be3aed230 ("powerpc/mm: Detect
bad KUAP faults") to catch userspace access incorrectly blocked by AMR. Hence
retain the full stack dump there even with hash translation. Also, add a comment
explaining the difference between hash and radix.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/book3s/32/kup.h     |  4 +--
 arch/powerpc/include/asm/book3s/64/kup.h     | 34 ++++++++++----------
 arch/powerpc/include/asm/kup.h               |  4 +--
 arch/powerpc/include/asm/nohash/32/kup-8xx.h |  4 +--
 arch/powerpc/mm/fault.c                      |  4 +--
 5 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
index b18cd931e325..32fd4452e960 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -177,8 +177,8 @@ static inline void restore_user_access(unsigned long flags)
 		allow_user_access(to, to, end - addr, KUAP_READ_WRITE);
 }
 
-static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address,
-				  bool is_write, unsigned long error_code)
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
 {
 	unsigned long begin = regs->kuap & 0xf0000000;
 	unsigned long end = regs->kuap << 28;
diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h
index f2e6dd78d5e2..7075c92c320c 100644
--- a/arch/powerpc/include/asm/book3s/64/kup.h
+++ b/arch/powerpc/include/asm/book3s/64/kup.h
@@ -353,29 +353,29 @@ static inline void set_kuap(unsigned long value)
 	isync();
 }
 
-#define RADIX_KUAP_BLOCK_READ	UL(0x4000000000000000)
-#define RADIX_KUAP_BLOCK_WRITE	UL(0x8000000000000000)
-
 static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address,
-				  bool is_write, unsigned long error_code)
+				  bool is_write)
 {
 	if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP))
 		return false;
-
-	if (radix_enabled()) {
-		/*
-		 * Will be a storage protection fault.
-		 * Only check the details of AMR[0]
-		 */
-		return WARN((regs->kuap & (is_write ? RADIX_KUAP_BLOCK_WRITE : RADIX_KUAP_BLOCK_READ)),
-			    "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read");
-	}
 	/*
-	 * We don't want to WARN here because userspace can setup
-	 * keys such that a kernel access to user address can cause
-	 * fault
+	 * For radix this will be a storage protection fault (DSISR_PROTFAULT).
+	 * For hash this will be a key fault (DSISR_KEYFAULT)
 	 */
-	return !!(error_code & DSISR_KEYFAULT);
+	/*
+	 * We do have exception table entry, but accessing the
+	 * userspace results in fault.  This could be because we
+	 * didn't unlock the AMR or access is denied by userspace
+	 * using a key value that blocks access. We are only interested
+	 * in catching the use case of accessing without unlocking
+	 * the AMR. Hence check for BLOCK_WRITE/READ against AMR.
+	 */
+	if (is_write) {
+		return WARN(((regs->amr & AMR_KUAP_BLOCK_WRITE) == AMR_KUAP_BLOCK_WRITE),
+			    "Bug: Write fault blocked by AMR!");
+	}
+	return WARN(((regs->amr & AMR_KUAP_BLOCK_READ) == AMR_KUAP_BLOCK_READ),
+		    "Bug: Read fault blocked by AMR!");
 }
 
 static __always_inline void allow_user_access(void __user *to, const void __user *from,
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index f8ec679bd2de..5a9820c54da9 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -62,8 +62,8 @@ void setup_kuap(bool disabled);
 #else
 static inline void setup_kuap(bool disabled) { }
 
-static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address,
-				  bool is_write, unsigned long error_code)
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
 {
 	return false;
 }
diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
index 7bdd9e5b63ed..567cdc557402 100644
--- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
@@ -60,8 +60,8 @@ static inline void restore_user_access(unsigned long flags)
 	mtspr(SPRN_MD_AP, flags);
 }
 
-static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address,
-				  bool is_write, unsigned long error_code)
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
 {
 	return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xff000000),
 		    "Bug: fault blocked by AP register !");
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index c91621df0c61..b12595102525 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -210,7 +210,7 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
 		return true;
 	}
 
-	if (!is_exec && address < TASK_SIZE && (error_code & DSISR_PROTFAULT) &&
+	if (!is_exec && address < TASK_SIZE && (error_code & (DSISR_PROTFAULT | DSISR_KEYFAULT)) &&
 	    !search_exception_tables(regs->nip)) {
 		pr_crit_ratelimited("Kernel attempted to access user page (%lx) - exploit attempt? (uid: %d)\n",
 				    address,
@@ -227,7 +227,7 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
 
 	// Read/write fault in a valid region (the exception table search passed
 	// above), but blocked by KUAP is bad, it can never succeed.
-	if (bad_kuap_fault(regs, address, is_write, error_code))
+	if (bad_kuap_fault(regs, address, is_write))
 		return true;
 
 	// What's left? Kernel fault on user in well defined regions (extable
-- 
2.25.0


^ permalink raw reply related

* Re: [PATCH v2] powerpc/powermac: Fix low_sleep_handler with CONFIG_VMAP_STACK
From: Giuseppe Sacco @ 2020-12-08  8:30 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	Michael Ellerman
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <e3e0d8042a3ba75cb4a9546c19c408b5b5b28994.1607404931.git.christophe.leroy@csgroup.eu>

Hello Christophe,

Il giorno mar, 08/12/2020 alle 05.24 +0000, Christophe Leroy ha
scritto:
> low_sleep_handler() can't restore the context from standard
> stack because the stack can hardly be accessed with MMU OFF.
> 
> Store everything in a global storage area instead of storing
> a pointer to the stack in that global storage area.
> 
> To avoid a complete churn of the function, still use r1 as
> the pointer to the storage area during restore.
> 
> Reported-by: Giuseppe Sacco <giuseppe@sguazz.it>
> Fixes: cd08f109e262 ("powerpc/32s: Enable CONFIG_VMAP_STACK")
> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> ---
> This is only build tested. Giuseppe can you test it ? Thanks.
> 
> v2: Changed an erroneous 'addis' to 'addi' ; Using bss instead of
> data section
> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> ---
>  arch/powerpc/platforms/Kconfig.cputype  |   2 +-
>  arch/powerpc/platforms/powermac/sleep.S | 132 +++++++++++-----------
> --
>  2 files changed, 60 insertions(+), 74 deletions(-)
[...]

I just tested the v2 patch against latest kernel. Please note that
yesterday patch was on a kernel named 5.10.0-rc6+, while today's patch
is on a kernel named 5.10.0-rc7+. Even with the latest kernel updates,
the patch apply correctly and machine boots normally, fixing the
problem.

$ uname -a
Linux titanium4 5.10.0-rc7+ #3 Tue Dec 8 09:11:20 CET 2020 ppc GNU/Linux
$ grep VMAP /boot/config-5.10.0-rc7+
CONFIG_HAVE_ARCH_VMAP_STACK=y
CONFIG_VMAP_STACK=y

Thank you again.

Bye,
Giuseppe



^ permalink raw reply

* Re: [PATCH] EDAC/mv64x60: Remove orphan mv64x60 driver
From: Michael Ellerman @ 2020-12-08  9:02 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: tony.luck, rric, linux-kernel, linuxppc-dev, james.morse, mchehab,
	linux-edac
In-Reply-To: <20201207111727.GC20489@zn.tnic>

Borislav Petkov <bp@alien8.de> writes:
> On Mon, Dec 07, 2020 at 03:02:53PM +1100, Michael Ellerman wrote:
>> The mv64x60 EDAC driver depends on CONFIG_MV64X60. But that symbol is
>> not user-selectable, and the last code that selected it was removed
>> with the C2K board support in 2018, see:
>> 
>>   92c8c16f3457 ("powerpc/embedded6xx: Remove C2K board support")
>> 
>> That means the driver is now dead code, so remove it.
>> 
>> Suggested-by: Borislav Petkov <bp@alien8.de>
>> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
>> ---
>>  drivers/edac/Kconfig        |   7 -
>>  drivers/edac/Makefile       |   1 -
>>  drivers/edac/mv64x60_edac.c | 883 ------------------------------------
>>  drivers/edac/mv64x60_edac.h | 114 -----
>>  4 files changed, 1005 deletions(-)
>>  delete mode 100644 drivers/edac/mv64x60_edac.c
>>  delete mode 100644 drivers/edac/mv64x60_edac.h
>
> Gladly taken and applied, thanks!

Thanks.

cheers

^ permalink raw reply

* linux-next: manual merge of the akpm-current tree with the powerpc tree
From: Stephen Rothwell @ 2020-12-08  9:40 UTC (permalink / raw)
  To: Andrew Morton, Michael Ellerman, PowerPC
  Cc: Ganesh Goudar, Linux Next Mailing List, Francis Laniel,
	Linux Kernel Mailing List, Mahesh Salgaonkar

[-- Attachment #1: Type: text/plain, Size: 2145 bytes --]

Hi all,

Today's linux-next merge of the akpm-current tree got conflicts in:

  drivers/misc/lkdtm/Makefile
  drivers/misc/lkdtm/lkdtm.h
  tools/testing/selftests/lkdtm/tests.txt

between commit:

  3ba150fb2120 ("lkdtm/powerpc: Add SLB multihit test")

from the powerpc tree and commit:

  014a486edd8a ("drivers/misc/lkdtm: add new file in LKDTM to test fortified strscpy")

from the akpm-current tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc drivers/misc/lkdtm/Makefile
index 5a92c74eca92,d898f7b22045..000000000000
--- a/drivers/misc/lkdtm/Makefile
+++ b/drivers/misc/lkdtm/Makefile
@@@ -10,7 -10,7 +10,8 @@@ lkdtm-$(CONFIG_LKDTM)		+= rodata_objcop
  lkdtm-$(CONFIG_LKDTM)		+= usercopy.o
  lkdtm-$(CONFIG_LKDTM)		+= stackleak.o
  lkdtm-$(CONFIG_LKDTM)		+= cfi.o
+ lkdtm-$(CONFIG_LKDTM)		+= fortify.o
 +lkdtm-$(CONFIG_PPC_BOOK3S_64)	+= powerpc.o
  
  KASAN_SANITIZE_stackleak.o	:= n
  KCOV_INSTRUMENT_rodata.o	:= n
diff --cc drivers/misc/lkdtm/lkdtm.h
index 79ec05c18dd1,6aa6d6a1a839..000000000000
--- a/drivers/misc/lkdtm/lkdtm.h
+++ b/drivers/misc/lkdtm/lkdtm.h
@@@ -102,7 -104,7 +104,10 @@@ void lkdtm_STACKLEAK_ERASING(void)
  /* cfi.c */
  void lkdtm_CFI_FORWARD_PROTO(void);
  
+ /* fortify.c */
+ void lkdtm_FORTIFIED_STRSCPY(void);
+ 
 +/* powerpc.c */
 +void lkdtm_PPC_SLB_MULTIHIT(void);
 +
  #endif
diff --cc tools/testing/selftests/lkdtm/tests.txt
index 18e4599863c0,92ba4cc41314..000000000000
--- a/tools/testing/selftests/lkdtm/tests.txt
+++ b/tools/testing/selftests/lkdtm/tests.txt
@@@ -68,4 -68,4 +68,5 @@@ USERCOPY_STACK_BEYON
  USERCOPY_KERNEL
  STACKLEAK_ERASING OK: the rest of the thread stack is properly erased
  CFI_FORWARD_PROTO
+ FORTIFIED_STRSCPY
 +PPC_SLB_MULTIHIT Recovered

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply

* Re: [PATCH] powerpc/mce: Remove per cpu variables from MCE handlers
From: Michael Ellerman @ 2020-12-08 10:31 UTC (permalink / raw)
  To: Ganesh Goudar, linuxppc-dev; +Cc: Ganesh Goudar, mahesh, npiggin
In-Reply-To: <20201204102310.76213-1-ganeshgr@linux.ibm.com>

Ganesh Goudar <ganeshgr@linux.ibm.com> writes:
> diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
> index 9454d29ff4b4..4769954efa7d 100644
> --- a/arch/powerpc/include/asm/paca.h
> +++ b/arch/powerpc/include/asm/paca.h
> @@ -273,6 +274,17 @@ struct paca_struct {
>  #ifdef CONFIG_MMIOWB
>  	struct mmiowb_state mmiowb_state;
>  #endif
> +#ifdef CONFIG_PPC_BOOK3S_64
> +	int mce_nest_count;
> +	struct machine_check_event mce_event[MAX_MC_EVT];
> +	/* Queue for delayed MCE events. */
> +	int mce_queue_count;
> +	struct machine_check_event mce_event_queue[MAX_MC_EVT];
> +
> +	/* Queue for delayed MCE UE events. */
> +	int mce_ue_count;
> +	struct machine_check_event  mce_ue_event_queue[MAX_MC_EVT];
> +#endif /* CONFIG_PPC_BOOK3S_64 */
>  } ____cacheline_aligned;

How much does this expand the paca by?

pahole can tell you.

cheers

^ permalink raw reply

* Re: [PATCH] powerpc/mce: Remove per cpu variables from MCE handlers
From: Ganesh @ 2020-12-08 10:46 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev; +Cc: mahesh, npiggin
In-Reply-To: <871rg0twpw.fsf@mpe.ellerman.id.au>


On 12/8/20 4:01 PM, Michael Ellerman wrote:
> Ganesh Goudar <ganeshgr@linux.ibm.com> writes:
>> diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
>> index 9454d29ff4b4..4769954efa7d 100644
>> --- a/arch/powerpc/include/asm/paca.h
>> +++ b/arch/powerpc/include/asm/paca.h
>> @@ -273,6 +274,17 @@ struct paca_struct {
>>   #ifdef CONFIG_MMIOWB
>>   	struct mmiowb_state mmiowb_state;
>>   #endif
>> +#ifdef CONFIG_PPC_BOOK3S_64
>> +	int mce_nest_count;
>> +	struct machine_check_event mce_event[MAX_MC_EVT];
>> +	/* Queue for delayed MCE events. */
>> +	int mce_queue_count;
>> +	struct machine_check_event mce_event_queue[MAX_MC_EVT];
>> +
>> +	/* Queue for delayed MCE UE events. */
>> +	int mce_ue_count;
>> +	struct machine_check_event  mce_ue_event_queue[MAX_MC_EVT];
>> +#endif /* CONFIG_PPC_BOOK3S_64 */
>>   } ____cacheline_aligned;
> How much does this expand the paca by?

Size of paca is 4480 bytes, these add up another 2160 bytes, so expands it by 48%.


^ permalink raw reply

* Re: linux-next: build warning after merge of the akpm tree
From: Stephen Rothwell @ 2020-12-08 12:01 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Kees Cook, Mathieu Malaterre, Linux Kernel Mailing List,
	Nicholas Piggin, Linux Next Mailing List, PowerPC
In-Reply-To: <20201204210000.660293c6@canb.auug.org.au>

[-- Attachment #1: Type: text/plain, Size: 2154 bytes --]

Hi Stephen,

On Fri, 4 Dec 2020 21:00:00 +1100 Stephen Rothwell <sfr@canb.auug.org.au> wrote:
>
> Hi all,
> 
> After merging the akpm tree, today's linux-next build (powerpc
> allyesconfig) produced warnings like this:
> 
> ld: warning: orphan section `.data..Lubsan_data177' from `arch/powerpc/oprofile/op_model_pa6t.o' being placed in section `.data..Lubsan_data177'
> 
> (lots of these latter ones)

781584 of them today!

> I don't know what produced these, but it is in the akpm-current or
> akpm trees.

Presumably the result of commit

  186c3e18dba3 ("ubsan: enable for all*config builds")

from the akpm-current tree.

arch/powerpc/kernel/vmlinux.lds.S has:

#ifdef CONFIG_PPC32
        .data : AT(ADDR(.data) - LOAD_OFFSET) {
                DATA_DATA
#ifdef CONFIG_UBSAN
                *(.data..Lubsan_data*)
                *(.data..Lubsan_type*)
#endif
                *(.data.rel*)
                *(SDATA_MAIN)

added by commit

  beba24ac5913 ("powerpc/32: Add .data..Lubsan_data*/.data..Lubsan_type* sections explicitly")

in 2018, but no equivalent for 64 bit.

I will try the following patch tomorrow:

From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 8 Dec 2020 22:58:24 +1100
Subject: [PATCH] powerpc: Add .data..Lubsan_data*/.data..Lubsan_type* sections explicitly

Similarly to commit

  beba24ac5913 ("powerpc/32: Add .data..Lubsan_data*/.data..Lubsan_type* sections explicitly")

since CONFIG_UBSAN bits can now be enabled for all*config.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/kernel/vmlinux.lds.S | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 3b4c26e94328..0318ba436f34 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -296,6 +296,10 @@ SECTIONS
 #else
 	.data : AT(ADDR(.data) - LOAD_OFFSET) {
 		DATA_DATA
+#ifdef CONFIG_UBSAN
+		*(.data..Lubsan_data*)
+		*(.data..Lubsan_type*)
+#endif
 		*(.data.rel*)
 		*(.toc1)
 		*(.branch_lt)
-- 
2.29.2

-- 
Cheers,
Stephen Rothwell

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply related

* Re: [PATCH] powerpc/mce: Remove per cpu variables from MCE handlers
From: Mahesh Jagannath Salgaonkar @ 2020-12-08 12:04 UTC (permalink / raw)
  To: Ganesh, Michael Ellerman, linuxppc-dev; +Cc: npiggin
In-Reply-To: <a514db98-6090-467a-74ae-9c7b4337d0c1@linux.ibm.com>

On 12/8/20 4:16 PM, Ganesh wrote:
> 
> On 12/8/20 4:01 PM, Michael Ellerman wrote:
>> Ganesh Goudar <ganeshgr@linux.ibm.com> writes:
>>> diff --git a/arch/powerpc/include/asm/paca.h
>>> b/arch/powerpc/include/asm/paca.h
>>> index 9454d29ff4b4..4769954efa7d 100644
>>> --- a/arch/powerpc/include/asm/paca.h
>>> +++ b/arch/powerpc/include/asm/paca.h
>>> @@ -273,6 +274,17 @@ struct paca_struct {
>>>   #ifdef CONFIG_MMIOWB
>>>       struct mmiowb_state mmiowb_state;
>>>   #endif
>>> +#ifdef CONFIG_PPC_BOOK3S_64
>>> +    int mce_nest_count;
>>> +    struct machine_check_event mce_event[MAX_MC_EVT];
>>> +    /* Queue for delayed MCE events. */
>>> +    int mce_queue_count;
>>> +    struct machine_check_event mce_event_queue[MAX_MC_EVT];
>>> +
>>> +    /* Queue for delayed MCE UE events. */
>>> +    int mce_ue_count;
>>> +    struct machine_check_event  mce_ue_event_queue[MAX_MC_EVT];
>>> +#endif /* CONFIG_PPC_BOOK3S_64 */
>>>   } ____cacheline_aligned;
>> How much does this expand the paca by?
> 
> Size of paca is 4480 bytes, these add up another 2160 bytes, so expands
> it by 48%.
> 

Should we dynamically allocate the array sizes early as similar to that
of paca->mce_faulty_slbs so that we don't bump up paca size ?

Thanks,
-Mahesh.

^ permalink raw reply

* Re: [RFC][PATCH 1/2] libnvdimm: Introduce ND_CMD_GET_STAT to retrieve nvdimm statistics
From: Peter Zijlstra @ 2020-12-08 13:00 UTC (permalink / raw)
  To: Dan Williams
  Cc: Santosh Sivaraj, Ira Weiny, linux-nvdimm,
	Arnaldo Carvalho de Melo, Ingo Molnar, Aneesh Kumar K . V,
	Vaibhav Jain, linuxppc-dev
In-Reply-To: <CAPcyv4h0PAPyYoea2oxqw_mOZ-Ec-o1MwcdSN0gf5UXqZqjafQ@mail.gmail.com>

On Mon, Dec 07, 2020 at 04:54:21PM -0800, Dan Williams wrote:
> [ add perf maintainers ]
> 
> On Sun, Nov 8, 2020 at 1:16 PM Vaibhav Jain <vaibhav@linux.ibm.com> wrote:
> >
> > Implement support for exposing generic nvdimm statistics via newly
> > introduced dimm-command ND_CMD_GET_STAT that can be handled by nvdimm
> > command handler function and provide values for these statistics back
> > to libnvdimm. Following generic nvdimm statistics are defined as an
> > enumeration in 'uapi/ndctl.h':
> >
> > * "media_reads" : Number of media reads that have occurred since reboot.
> > * "media_writes" : Number of media writes that have occurred since reboot.
> > * "read_requests" : Number of read requests that have occurred since reboot.
> > * "write_requests" : Number of write requests that have occurred since reboot.
> 
> Perhaps document these as "since device reset"? As I can imagine some
> devices might have a mechanism to reset the count outside of "reboot"
> which is a bit ambiguous.
> 
> > * "total_media_reads" : Total number of media reads that have occurred.
> > * "total_media_writes" : Total number of media writes that have occurred.
> > * "total_read_requests" : Total number of read requests that have occurred.
> > * "total_write_requests" : Total number of write requests that have occurred.
> >
> > Apart from ND_CMD_GET_STAT ioctl these nvdimm statistics are also
> > exposed via sysfs '<nvdimm-device>/stats' directory for easy user-space
> > access like below:
> >
> > /sys/class/nd/ndctl0/device/nmem0/stats # tail -n +1 *
> > ==> media_reads <==
> > 252197707602
> > ==> media_writes <==
> > 20684685172
> > ==> read_requests <==
> > 658810924962
> > ==> write_requests <==
> > 404464081574
> 
> Hmm, I haven't looked but how hard would it be to plumb these to be
> perf counter-events. So someone could combine these with other perf
> counters?
> 
> > In case a specific nvdimm-statistic is not supported than nvdimm
> > command handler function can simply return an error (e.g -ENOENT) for
> > request to read that nvdimm-statistic.
> 
> Makes sense, but I expect the perf route also has a way to enumerate
> which statistics / counters are supported. I'm not opposed to also
> having them in sysfs, but I think perf support should be a first class
> citizen.

arch/x86/events/msr.c might be a good starting point for a software pmu
delivering pure counters.

^ permalink raw reply

* Re: [PATCH v3 4/5] powerpc/fault: Avoid heavy search_exception_tables() verification
From: Aneesh Kumar K.V @ 2020-12-08 13:00 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <731bdee26a5a5c81cd815ed624a6fb3bdef8b4db.1607416578.git.christophe.leroy@csgroup.eu>

On 12/8/20 2:07 PM, Christophe Leroy wrote:
> search_exception_tables() is an heavy operation, we have to avoid it.
> When KUAP is selected, we'll know the fault has been blocked by KUAP.
> Otherwise, it behaves just as if the address was already in the TLBs
> and no fault was generated.
> 
> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> v3: rebased
> v2: Squashed with the preceeding patch which was re-ordering tests that get removed in this patch.
> ---
>   arch/powerpc/mm/fault.c | 23 +++++++----------------
>   1 file changed, 7 insertions(+), 16 deletions(-)
> 
> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
> index 3fcd34c28e10..1770b41e4730 100644
> --- a/arch/powerpc/mm/fault.c
> +++ b/arch/powerpc/mm/fault.c
> @@ -210,28 +210,19 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
>   		return true;
>   	}
>   
> -	if (!is_exec && address < TASK_SIZE && (error_code & (DSISR_PROTFAULT | DSISR_KEYFAULT)) &&
> -	    !search_exception_tables(regs->nip)) {
> -		pr_crit_ratelimited("Kernel attempted to access user page (%lx) - exploit attempt? (uid: %d)\n",
> -				    address,
> -				    from_kuid(&init_user_ns, current_uid()));
> -	}
> -
>   	// Kernel fault on kernel address is bad
>   	if (address >= TASK_SIZE)
>   		return true;
>   
> -	// Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad
> -	if (!search_exception_tables(regs->nip))
> -		return true;
> -
> -	// Read/write fault in a valid region (the exception table search passed
> -	// above), but blocked by KUAP is bad, it can never succeed.
> -	if (bad_kuap_fault(regs, address, is_write))
> +	// Read/write fault blocked by KUAP is bad, it can never succeed.
> +	if (bad_kuap_fault(regs, address, is_write)) {
> +		pr_crit_ratelimited("Kernel attempted to %s user page (%lx) - exploit attempt? (uid: %d)\n",
> +				    is_write ? "write" : "read", address,
> +				    from_kuid(&init_user_ns, current_uid()));
>   		return true;
> +	}


Should we update bad_kuap_fault to check for !is_kernel_addr() and 
error_code & (DSISIR_PROT_FAULT | DSISIR_KEYFAULT). I am wondering 
whether we can take another fault w.r.t kernel address/user address and 
end up reporting that as KUAP fault?

>   
> -	// What's left? Kernel fault on user in well defined regions (extable
> -	// matched), and allowed by KUAP in the faulting context.
> +	// What's left? Kernel fault on user and allowed by KUAP in the faulting context.
>   	return false;
>   }
>   
> 


-aneesh

^ permalink raw reply

* Re: [PATCH] powerpc/book3s_hv_uvmem: Check for failed page migration
From: Ram Pai @ 2020-12-08 14:20 UTC (permalink / raw)
  To: Alistair Popple; +Cc: linuxppc-dev, bharata
In-Reply-To: <20201203050812.5234-1-alistair@popple.id.au>

On Thu, Dec 03, 2020 at 04:08:12PM +1100, Alistair Popple wrote:
> migrate_vma_pages() may still clear MIGRATE_PFN_MIGRATE on pages which
> are not able to be migrated. Drivers may safely copy data prior to
> calling migrate_vma_pages() however a remote mapping must not be
> established until after migrate_vma_pages() has returned as the
> migration could still fail.
> 
> UV_PAGE_IN_in both copies and maps the data page, therefore it should
> only be called after checking the results of migrate_vma_pages().
> 
> Signed-off-by: Alistair Popple <alistair@popple.id.au>
> ---
>  arch/powerpc/kvm/book3s_hv_uvmem.c | 7 ++++---
>  1 file changed, 4 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
> index 84e5a2dc8be5..08aa6a90c525 100644
> --- a/arch/powerpc/kvm/book3s_hv_uvmem.c
> +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
> @@ -762,7 +762,10 @@ static int kvmppc_svm_page_in(struct vm_area_struct *vma,
>  		goto out_finalize;
>  	}
> 
> -	if (pagein) {
> +	*mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
> +	migrate_vma_pages(&mig);
> +
> +	if ((*mig.src & MIGRATE_PFN_MIGRATE) && pagein) {
>  		pfn = *mig.src >> MIGRATE_PFN_SHIFT;
>  		spage = migrate_pfn_to_page(*mig.src);
>  		if (spage) {
> @@ -773,8 +776,6 @@ static int kvmppc_svm_page_in(struct vm_area_struct *vma,
>  		}
>  	}
> 
> -	*mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
> -	migrate_vma_pages(&mig);
>  out_finalize:
>  	migrate_vma_finalize(&mig);
>  	return ret;

Though this patch did not solve the specific problem, I am running into,
my tests did not expose any regression.

Tested-by: Ram Pai <linuxram@us.ibm.com>


Reviewed-by: Ram Pai <linuxram@us.ibm.com>

^ permalink raw reply

* Re: [PATCH v3 4/5] powerpc/fault: Avoid heavy search_exception_tables() verification
From: Christophe Leroy @ 2020-12-08 14:26 UTC (permalink / raw)
  To: Aneesh Kumar K.V, Benjamin Herrenschmidt, Paul Mackerras,
	Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <0e25f03d-9f59-b963-312c-c3ae1d7953a2@linux.ibm.com>



Le 08/12/2020 à 14:00, Aneesh Kumar K.V a écrit :
> On 12/8/20 2:07 PM, Christophe Leroy wrote:
>> search_exception_tables() is an heavy operation, we have to avoid it.
>> When KUAP is selected, we'll know the fault has been blocked by KUAP.
>> Otherwise, it behaves just as if the address was already in the TLBs
>> and no fault was generated.
>>
>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>> v3: rebased
>> v2: Squashed with the preceeding patch which was re-ordering tests that get removed in this patch.
>> ---
>>   arch/powerpc/mm/fault.c | 23 +++++++----------------
>>   1 file changed, 7 insertions(+), 16 deletions(-)
>>
>> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
>> index 3fcd34c28e10..1770b41e4730 100644
>> --- a/arch/powerpc/mm/fault.c
>> +++ b/arch/powerpc/mm/fault.c
>> @@ -210,28 +210,19 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
>>           return true;
>>       }
>> -    if (!is_exec && address < TASK_SIZE && (error_code & (DSISR_PROTFAULT | DSISR_KEYFAULT)) &&
>> -        !search_exception_tables(regs->nip)) {
>> -        pr_crit_ratelimited("Kernel attempted to access user page (%lx) - exploit attempt? (uid: 
>> %d)\n",
>> -                    address,
>> -                    from_kuid(&init_user_ns, current_uid()));
>> -    }
>> -
>>       // Kernel fault on kernel address is bad
>>       if (address >= TASK_SIZE)
>>           return true;
>> -    // Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad
>> -    if (!search_exception_tables(regs->nip))
>> -        return true;
>> -
>> -    // Read/write fault in a valid region (the exception table search passed
>> -    // above), but blocked by KUAP is bad, it can never succeed.
>> -    if (bad_kuap_fault(regs, address, is_write))
>> +    // Read/write fault blocked by KUAP is bad, it can never succeed.
>> +    if (bad_kuap_fault(regs, address, is_write)) {
>> +        pr_crit_ratelimited("Kernel attempted to %s user page (%lx) - exploit attempt? (uid: %d)\n",
>> +                    is_write ? "write" : "read", address,
>> +                    from_kuid(&init_user_ns, current_uid()));
>>           return true;
>> +    }
> 
> 
> Should we update bad_kuap_fault to check for !is_kernel_addr() and error_code & (DSISIR_PROT_FAULT | 
> DSISIR_KEYFAULT). I am wondering whether we can take another fault w.r.t kernel address/user address 
> and end up reporting that as KUAP fault?

Just before this we do:

	if (address >= TASK_SIZE)
		return true;

About the error code, I don't know. Can we take a fault that is not a DSISR_PROT_FAULT |
  DSISR_KEYFAULT and that is not a KUAP fault ?

Previously (before this patch), the error code was taken into account for the call to 
search_exception_tables(), but has never been for the bad_kuap_fault().

> 
>> -    // What's left? Kernel fault on user in well defined regions (extable
>> -    // matched), and allowed by KUAP in the faulting context.
>> +    // What's left? Kernel fault on user and allowed by KUAP in the faulting context.
>>       return false;
>>   }
>>
> 
> 
> -aneesh

Christophe

^ permalink raw reply

* Re: [PATCH v3 4/5] powerpc/fault: Avoid heavy search_exception_tables() verification
From: Aneesh Kumar K.V @ 2020-12-08 14:31 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <d66f9706-9e36-5b92-5a87-90ebd05587e9@csgroup.eu>

Christophe Leroy <christophe.leroy@csgroup.eu> writes:

> Le 08/12/2020 à 14:00, Aneesh Kumar K.V a écrit :
>> On 12/8/20 2:07 PM, Christophe Leroy wrote:
>>> search_exception_tables() is an heavy operation, we have to avoid it.
>>> When KUAP is selected, we'll know the fault has been blocked by KUAP.
>>> Otherwise, it behaves just as if the address was already in the TLBs
>>> and no fault was generated.
>>>
>>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>>> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
>>> ---
>>> v3: rebased
>>> v2: Squashed with the preceeding patch which was re-ordering tests that get removed in this patch.
>>> ---
>>>   arch/powerpc/mm/fault.c | 23 +++++++----------------
>>>   1 file changed, 7 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
>>> index 3fcd34c28e10..1770b41e4730 100644
>>> --- a/arch/powerpc/mm/fault.c
>>> +++ b/arch/powerpc/mm/fault.c
>>> @@ -210,28 +210,19 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
>>>           return true;
>>>       }
>>> -    if (!is_exec && address < TASK_SIZE && (error_code & (DSISR_PROTFAULT | DSISR_KEYFAULT)) &&
>>> -        !search_exception_tables(regs->nip)) {
>>> -        pr_crit_ratelimited("Kernel attempted to access user page (%lx) - exploit attempt? (uid: 
>>> %d)\n",
>>> -                    address,
>>> -                    from_kuid(&init_user_ns, current_uid()));
>>> -    }
>>> -
>>>       // Kernel fault on kernel address is bad
>>>       if (address >= TASK_SIZE)
>>>           return true;
>>> -    // Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad
>>> -    if (!search_exception_tables(regs->nip))
>>> -        return true;
>>> -
>>> -    // Read/write fault in a valid region (the exception table search passed
>>> -    // above), but blocked by KUAP is bad, it can never succeed.
>>> -    if (bad_kuap_fault(regs, address, is_write))
>>> +    // Read/write fault blocked by KUAP is bad, it can never succeed.
>>> +    if (bad_kuap_fault(regs, address, is_write)) {
>>> +        pr_crit_ratelimited("Kernel attempted to %s user page (%lx) - exploit attempt? (uid: %d)\n",
>>> +                    is_write ? "write" : "read", address,
>>> +                    from_kuid(&init_user_ns, current_uid()));
>>>           return true;
>>> +    }
>> 
>> 
>> Should we update bad_kuap_fault to check for !is_kernel_addr() and error_code & (DSISIR_PROT_FAULT | 
>> DSISIR_KEYFAULT). I am wondering whether we can take another fault w.r.t kernel address/user address 
>> and end up reporting that as KUAP fault?
>
> Just before this we do:
>
> 	if (address >= TASK_SIZE)
> 		return true;
>
> About the error code, I don't know. Can we take a fault that is not a DSISR_PROT_FAULT |
>   DSISR_KEYFAULT and that is not a KUAP fault ?
>
> Previously (before this patch), the error code was taken into account for the call to 
> search_exception_tables(), but has never been for the bad_kuap_fault().
>

a KUAP fault on radix will result in PROTFAULT and on hash it will
generate KEYFAULT. ie, something like below

diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
index 32fd4452e960..b18cd931e325 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -177,8 +177,8 @@ static inline void restore_user_access(unsigned long flags)
 		allow_user_access(to, to, end - addr, KUAP_READ_WRITE);
 }
 
-static inline bool
-bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address,
+				  bool is_write, unsigned long error_code)
 {
 	unsigned long begin = regs->kuap & 0xf0000000;
 	unsigned long end = regs->kuap << 28;
diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h
index 91af50e9b09a..16bb6aee9fcd 100644
--- a/arch/powerpc/include/asm/book3s/64/kup.h
+++ b/arch/powerpc/include/asm/book3s/64/kup.h
@@ -392,14 +392,24 @@ static inline void set_kuap(unsigned long value)
 }
 
 static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address,
-				  bool is_write)
+				  bool is_write, unsigned long error_code)
 {
+	unsigned long fault;
+
 	if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP))
 		return false;
+
+	if (radix_enabled())
+		fault = DSISR_PROTFAULT;
+	else
+		fault = DSISR_KEYFAULT;
+
 	/*
 	 * For radix this will be a storage protection fault (DSISR_PROTFAULT).
 	 * For hash this will be a key fault (DSISR_KEYFAULT)
 	 */
+	if (!(error_code & fault))
+		return false;
 	/*
 	 * We do have exception table entry, but accessing the
 	 * userspace results in fault.  This could be because we
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index 49fe4b4a9434..bdffa2664bf0 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -62,8 +62,8 @@ void setup_kuap(bool disabled);
 #else
 static inline void setup_kuap(bool disabled) { }
 
-static inline bool
-bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address,
+				  bool is_write, unsigned long error_code)
 {
 	return false;
 }
diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
index 567cdc557402..7bdd9e5b63ed 100644
--- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
@@ -60,8 +60,8 @@ static inline void restore_user_access(unsigned long flags)
 	mtspr(SPRN_MD_AP, flags);
 }
 
-static inline bool
-bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address,
+				  bool is_write, unsigned long error_code)
 {
 	return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xff000000),
 		    "Bug: fault blocked by AP register !");
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index b12595102525..03c3414bdc79 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -218,7 +218,7 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
 	}
 
 	// Kernel fault on kernel address is bad
-	if (address >= TASK_SIZE)
+	if (is_kernel_addr(address))
 		return true;
 
 	// Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad
@@ -227,7 +227,7 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
 
 	// Read/write fault in a valid region (the exception table search passed
 	// above), but blocked by KUAP is bad, it can never succeed.
-	if (bad_kuap_fault(regs, address, is_write))
+	if (bad_kuap_fault(regs, address, is_write, error_code))
 		return true;
 
 	// What's left? Kernel fault on user in well defined regions (extable


^ permalink raw reply related

* Re: [PATCH] arch: fix 'unexpected IRQ trap at vector' warnings
From: Helge Deller @ 2020-12-08 14:42 UTC (permalink / raw)
  To: Michael Ellerman, Enrico Weigelt, metux IT consult, linux-kernel
  Cc: linux-s390, hpa, linux-parisc, jdike, x86, linux-um,
	James.Bottomley, mingo, paulus, richard, bp, tglx, linuxppc-dev,
	anton.ivanov
In-Reply-To: <877dptt5av.fsf@mpe.ellerman.id.au>

On 12/8/20 3:11 AM, Michael Ellerman wrote:
> "Enrico Weigelt, metux IT consult" <info@metux.net> writes:
>> All archs, except Alpha, print out the irq number in hex, but the message
>> looks like it was a decimal number, which is quite confusing. Fixing this
>> by adding "0x" prefix.
>
> Arguably decimal would be better, /proc/interrupts and /proc/irq/ both
> use decimal.

I agree.

> The whole message is very dated IMO, these days the number it prints is
> (possibly) virtualised via IRQ domains, ie. it's not necessarily a
> "vector" if that even makes sense on all arches). Arguably "trap" is the
> wrong term on some arches too.
>
> So it would be better reworded entirely IMO, and also switched to
> decimal to match other sources of information on interrupts.
>
> Perhaps:
> 	"Unexpected Linux IRQ %d."

Yes.

and while cleaning it up, introducing a default weak implementation of ack_bad_irq()
which adds and increases irq_err_count for all platforms would be a nice cleanup.

Helge

> If anyone else is having deja vu like me, yes this has come up before:
>   https://lore.kernel.org/lkml/20150712220211.7166.42035.stgit@bhelgaas-glaptop2.roam.corp.google.com/
>
> cheers
>
>
>
>> diff --git a/arch/arm/include/asm/hw_irq.h b/arch/arm/include/asm/hw_irq.h
>> index cecc13214ef1..2749f19271d9 100644
>> --- a/arch/arm/include/asm/hw_irq.h
>> +++ b/arch/arm/include/asm/hw_irq.h
>> @@ -9,7 +9,7 @@ static inline void ack_bad_irq(int irq)
>>  {
>>  	extern unsigned long irq_err_count;
>>  	irq_err_count++;
>> -	pr_crit("unexpected IRQ trap at vector %02x\n", irq);
>> +	pr_crit("unexpected IRQ trap at vector 0x%02x\n", irq);
>>  }
>>
>>  #define ARCH_IRQ_INIT_FLAGS	(IRQ_NOREQUEST | IRQ_NOPROBE)
>> diff --git a/arch/parisc/include/asm/hardirq.h b/arch/parisc/include/asm/hardirq.h
>> index 7f7039516e53..c3348af88d3f 100644
>> --- a/arch/parisc/include/asm/hardirq.h
>> +++ b/arch/parisc/include/asm/hardirq.h
>> @@ -35,6 +35,6 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
>>  #define __IRQ_STAT(cpu, member) (irq_stat[cpu].member)
>>  #define inc_irq_stat(member)	this_cpu_inc(irq_stat.member)
>>  #define __inc_irq_stat(member)	__this_cpu_inc(irq_stat.member)
>> -#define ack_bad_irq(irq) WARN(1, "unexpected IRQ trap at vector %02x\n", irq)
>> +#define ack_bad_irq(irq) WARN(1, "unexpected IRQ trap at vector 0x%02x\n", irq)
>>
>>  #endif /* _PARISC_HARDIRQ_H */
>> diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h
>> index f133b5930ae1..ec8cf3cf6e49 100644
>> --- a/arch/powerpc/include/asm/hardirq.h
>> +++ b/arch/powerpc/include/asm/hardirq.h
>> @@ -29,7 +29,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
>>
>>  static inline void ack_bad_irq(unsigned int irq)
>>  {
>> -	printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
>> +	printk(KERN_CRIT "unexpected IRQ trap at vector 0x%02x\n", irq);
>>  }
>>
>>  extern u64 arch_irq_stat_cpu(unsigned int cpu);
>> diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h
>> index dfbc3c6c0674..aaaec5cdd4fe 100644
>> --- a/arch/s390/include/asm/hardirq.h
>> +++ b/arch/s390/include/asm/hardirq.h
>> @@ -23,7 +23,7 @@
>>
>>  static inline void ack_bad_irq(unsigned int irq)
>>  {
>> -	printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
>> +	printk(KERN_CRIT "unexpected IRQ trap at vector 0x%02x\n", irq);
>>  }
>>
>>  #endif /* __ASM_HARDIRQ_H */
>> diff --git a/arch/um/include/asm/hardirq.h b/arch/um/include/asm/hardirq.h
>> index b426796d26fd..2a2e6eae034b 100644
>> --- a/arch/um/include/asm/hardirq.h
>> +++ b/arch/um/include/asm/hardirq.h
>> @@ -15,7 +15,7 @@ typedef struct {
>>  #ifndef ack_bad_irq
>>  static inline void ack_bad_irq(unsigned int irq)
>>  {
>> -	printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
>> +	printk(KERN_CRIT "unexpected IRQ trap at vector 0x%02x\n", irq);
>>  }
>>  #endif
>>
>> diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
>> index c5dd50369e2f..957c716f2df7 100644
>> --- a/arch/x86/kernel/irq.c
>> +++ b/arch/x86/kernel/irq.c
>> @@ -37,7 +37,7 @@ atomic_t irq_err_count;
>>  void ack_bad_irq(unsigned int irq)
>>  {
>>  	if (printk_ratelimit())
>> -		pr_err("unexpected IRQ trap at vector %02x\n", irq);
>> +		pr_err("unexpected IRQ trap at vector 0x%02x\n", irq);
>>
>>  	/*
>>  	 * Currently unexpected vectors happen only on SMP and APIC.
>> --
>> 2.11.0


^ permalink raw reply

* Re: [PATCH v3 4/5] powerpc/fault: Avoid heavy search_exception_tables() verification
From: Aneesh Kumar K.V @ 2020-12-08 14:52 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras,
	Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <731bdee26a5a5c81cd815ed624a6fb3bdef8b4db.1607416578.git.christophe.leroy@csgroup.eu>

Christophe Leroy <christophe.leroy@csgroup.eu> writes:

> search_exception_tables() is an heavy operation, we have to avoid it.
> When KUAP is selected, we'll know the fault has been blocked by KUAP.
> Otherwise, it behaves just as if the address was already in the TLBs
> and no fault was generated.
>
> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> v3: rebased
> v2: Squashed with the preceeding patch which was re-ordering tests that get removed in this patch.
> ---
>  arch/powerpc/mm/fault.c | 23 +++++++----------------
>  1 file changed, 7 insertions(+), 16 deletions(-)
>
> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
> index 3fcd34c28e10..1770b41e4730 100644
> --- a/arch/powerpc/mm/fault.c
> +++ b/arch/powerpc/mm/fault.c
> @@ -210,28 +210,19 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
>  		return true;
>  	}
>  
> -	if (!is_exec && address < TASK_SIZE && (error_code & (DSISR_PROTFAULT | DSISR_KEYFAULT)) &&
> -	    !search_exception_tables(regs->nip)) {
> -		pr_crit_ratelimited("Kernel attempted to access user page (%lx) - exploit attempt? (uid: %d)\n",
> -				    address,
> -				    from_kuid(&init_user_ns, current_uid()));
> -	}
> -
>  	// Kernel fault on kernel address is bad
>  	if (address >= TASK_SIZE)
>  		return true;
>  
> -	// Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad
> -	if (!search_exception_tables(regs->nip))
> -		return true;
> -
> -	// Read/write fault in a valid region (the exception table search passed
> -	// above), but blocked by KUAP is bad, it can never succeed.
> -	if (bad_kuap_fault(regs, address, is_write))
> +	// Read/write fault blocked by KUAP is bad, it can never succeed.
> +	if (bad_kuap_fault(regs, address, is_write)) {
> +		pr_crit_ratelimited("Kernel attempted to %s user page (%lx) - exploit attempt? (uid: %d)\n",
> +				    is_write ? "write" : "read", address,
> +				    from_kuid(&init_user_ns, current_uid()));
>  		return true;
> +	}


With this I am wondering whether the WARN() in bad_kuap_fault() is
needed. A direct access of userspace address will trigger this, whereas
previously we used bad_kuap_fault() only to identify incorrect restore
of AMR register (ie, to identify kernel bugs). Hence a WARN() there was
useful. We loose that differentiation now?


>  
> -	// What's left? Kernel fault on user in well defined regions (extable
> -	// matched), and allowed by KUAP in the faulting context.
> +	// What's left? Kernel fault on user and allowed by KUAP in the faulting context.
>  	return false;
>  }
>  
> -- 
> 2.25.0

^ permalink raw reply

* [PATCH] drivers: usb: gadget: prefer pr_*() functions over raw printk()
From: Enrico Weigelt, metux IT consult @ 2020-12-08 14:44 UTC (permalink / raw)
  To: linux-kernel; +Cc: balbi, linux-usb, linuxppc-dev, laurent.pinchart, leoyang.li

Reduce a bit logging boilerplate by using the preferred pr_*()
macros instead of raw printk().

Signed-off-by: Enrico Weigelt, metux IT consult <info@metux.net>
---
 drivers/usb/gadget/function/uvc.h       |  2 +-
 drivers/usb/gadget/udc/atmel_usba_udc.c |  2 +-
 drivers/usb/gadget/udc/fsl_udc_core.c   |  4 +--
 drivers/usb/gadget/udc/fsl_usb2_udc.h   |  4 +--
 drivers/usb/gadget/udc/fusb300_udc.c    | 64 ++++++++++++++++-----------------
 drivers/usb/gadget/udc/goku_udc.c       |  2 +-
 drivers/usb/gadget/udc/r8a66597-udc.h   |  2 +-
 7 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
index 23ee25383c1f..d546eb7c348c 100644
--- a/drivers/usb/gadget/function/uvc.h
+++ b/drivers/usb/gadget/function/uvc.h
@@ -49,7 +49,7 @@ extern unsigned int uvc_gadget_trace_param;
 #define uvc_trace(flag, msg...) \
 	do { \
 		if (uvc_gadget_trace_param & flag) \
-			printk(KERN_DEBUG "uvcvideo: " msg); \
+			pr_debug("uvcvideo: " msg); \
 	} while (0)
 
 #define uvcg_dbg(f, fmt, args...) \
diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c
index 2b893bceea45..4834fafb3f70 100644
--- a/drivers/usb/gadget/udc/atmel_usba_udc.c
+++ b/drivers/usb/gadget/udc/atmel_usba_udc.c
@@ -1573,7 +1573,7 @@ static void usba_control_irq(struct usba_udc *udc, struct usba_ep *ep)
 		 * generate or receive a reply right away. */
 		usba_ep_writel(ep, CLR_STA, USBA_RX_SETUP);
 
-		/* printk(KERN_DEBUG "setup: %d: %02x.%02x\n",
+		/* pr_debug("setup: %d: %02x.%02x\n",
 			ep->state, crq.crq.bRequestType,
 			crq.crq.bRequest); */
 
diff --git a/drivers/usb/gadget/udc/fsl_udc_core.c b/drivers/usb/gadget/udc/fsl_udc_core.c
index ad6ff9c4188e..cab4def04f9f 100644
--- a/drivers/usb/gadget/udc/fsl_udc_core.c
+++ b/drivers/usb/gadget/udc/fsl_udc_core.c
@@ -1474,7 +1474,7 @@ __acquires(udc->lock)
 			mdelay(10);
 			tmp = fsl_readl(&dr_regs->portsc1) | (ptc << 16);
 			fsl_writel(tmp, &dr_regs->portsc1);
-			printk(KERN_INFO "udc: switch to test mode %d.\n", ptc);
+			pr_info("udc: switch to test mode %d.\n", ptc);
 		}
 
 		return;
@@ -1952,7 +1952,7 @@ static int fsl_udc_start(struct usb_gadget *g,
 	if (!IS_ERR_OR_NULL(udc_controller->transceiver)) {
 		/* Suspend the controller until OTG enable it */
 		udc_controller->stopped = 1;
-		printk(KERN_INFO "Suspend udc for OTG auto detect\n");
+		pr_info("Suspend udc for OTG auto detect\n");
 
 		/* connect to bus through transceiver */
 		if (!IS_ERR_OR_NULL(udc_controller->transceiver)) {
diff --git a/drivers/usb/gadget/udc/fsl_usb2_udc.h b/drivers/usb/gadget/udc/fsl_usb2_udc.h
index 4ba651ae9048..b180bf14dd0c 100644
--- a/drivers/usb/gadget/udc/fsl_usb2_udc.h
+++ b/drivers/usb/gadget/udc/fsl_usb2_udc.h
@@ -509,7 +509,7 @@ struct fsl_udc {
 /*-------------------------------------------------------------------------*/
 
 #ifdef DEBUG
-#define DBG(fmt, args...) 	printk(KERN_DEBUG "[%s]  " fmt "\n", \
+#define DBG(fmt, args...) 	pr_debug("[%s]  " fmt "\n", \
 				__func__, ## args)
 #else
 #define DBG(fmt, args...)	do{}while(0)
@@ -535,7 +535,7 @@ static void dump_msg(const char *label, const u8 * buf, unsigned int length)
 			p += 3;
 		}
 		*p = 0;
-		printk(KERN_DEBUG "%6x: %s\n", start, line);
+		pr_debug("%6x: %s\n", start, line);
 		buf += num;
 		start += num;
 		length -= num;
diff --git a/drivers/usb/gadget/udc/fusb300_udc.c b/drivers/usb/gadget/udc/fusb300_udc.c
index 9af8b415f303..c4e7e4b8e46f 100644
--- a/drivers/usb/gadget/udc/fusb300_udc.c
+++ b/drivers/usb/gadget/udc/fusb300_udc.c
@@ -352,24 +352,24 @@ static void fusb300_wrcxf(struct fusb300_ep *ep,
 		for (i = length >> 2; i > 0; i--) {
 			data = *tmp | *(tmp + 1) << 8 | *(tmp + 2) << 16 |
 				*(tmp + 3) << 24;
-			printk(KERN_DEBUG "    0x%x\n", data);
+			pr_debug("    0x%x\n", data);
 			iowrite32(data, fusb300->reg + FUSB300_OFFSET_CXPORT);
 			tmp = tmp + 4;
 		}
 		switch (length % 4) {
 		case 1:
 			data = *tmp;
-			printk(KERN_DEBUG "    0x%x\n", data);
+			pr_debug("    0x%x\n", data);
 			iowrite32(data, fusb300->reg + FUSB300_OFFSET_CXPORT);
 			break;
 		case 2:
 			data = *tmp | *(tmp + 1) << 8;
-			printk(KERN_DEBUG "    0x%x\n", data);
+			pr_debug("    0x%x\n", data);
 			iowrite32(data, fusb300->reg + FUSB300_OFFSET_CXPORT);
 			break;
 		case 3:
 			data = *tmp | *(tmp + 1) << 8 | *(tmp + 2) << 16;
-			printk(KERN_DEBUG "    0x%x\n", data);
+			pr_debug("    0x%x\n", data);
 			iowrite32(data, fusb300->reg + FUSB300_OFFSET_CXPORT);
 			break;
 		default:
@@ -390,7 +390,7 @@ static void fusb300_clear_epnstall(struct fusb300 *fusb300, u8 ep)
 	u32 reg = ioread32(fusb300->reg + FUSB300_OFFSET_EPSET0(ep));
 
 	if (reg & FUSB300_EPSET0_STL) {
-		printk(KERN_DEBUG "EP%d stall... Clear!!\n", ep);
+		pr_debug("EP%d stall... Clear!!\n", ep);
 		reg |= FUSB300_EPSET0_STL_CLR;
 		iowrite32(reg, fusb300->reg + FUSB300_OFFSET_EPSET0(ep));
 	}
@@ -402,7 +402,7 @@ static void ep0_queue(struct fusb300_ep *ep, struct fusb300_request *req)
 		if (req->req.length) {
 			fusb300_wrcxf(ep, req);
 		} else
-			printk(KERN_DEBUG "%s : req->req.length = 0x%x\n",
+			pr_debug("%s : req->req.length = 0x%x\n",
 				__func__, req->req.length);
 		if ((req->req.length == req->req.actual) ||
 		    (req->req.actual < ep->ep.maxpacket))
@@ -565,7 +565,7 @@ static void fusb300_rdcxf(struct fusb300 *fusb300,
 
 	for (i = (length >> 2); i > 0; i--) {
 		data = ioread32(fusb300->reg + FUSB300_OFFSET_CXPORT);
-		printk(KERN_DEBUG "    0x%x\n", data);
+		pr_debug("    0x%x\n", data);
 		*tmp = data & 0xFF;
 		*(tmp + 1) = (data >> 8) & 0xFF;
 		*(tmp + 2) = (data >> 16) & 0xFF;
@@ -576,18 +576,18 @@ static void fusb300_rdcxf(struct fusb300 *fusb300,
 	switch (length % 4) {
 	case 1:
 		data = ioread32(fusb300->reg + FUSB300_OFFSET_CXPORT);
-		printk(KERN_DEBUG "    0x%x\n", data);
+		pr_debug("    0x%x\n", data);
 		*tmp = data & 0xFF;
 		break;
 	case 2:
 		data = ioread32(fusb300->reg + FUSB300_OFFSET_CXPORT);
-		printk(KERN_DEBUG "    0x%x\n", data);
+		pr_debug("    0x%x\n", data);
 		*tmp = data & 0xFF;
 		*(tmp + 1) = (data >> 8) & 0xFF;
 		break;
 	case 3:
 		data = ioread32(fusb300->reg + FUSB300_OFFSET_CXPORT);
-		printk(KERN_DEBUG "    0x%x\n", data);
+		pr_debug("    0x%x\n", data);
 		*tmp = data & 0xFF;
 		*(tmp + 1) = (data >> 8) & 0xFF;
 		*(tmp + 2) = (data >> 16) & 0xFF;
@@ -610,7 +610,7 @@ static void fusb300_rdfifo(struct fusb300_ep *ep,
 	req->req.actual += length;
 
 	if (req->req.actual > req->req.length)
-		printk(KERN_DEBUG "req->req.actual > req->req.length\n");
+		pr_debug("req->req.actual > req->req.length\n");
 
 	for (i = (length >> 2); i > 0; i--) {
 		data = ioread32(fusb300->reg +
@@ -649,7 +649,7 @@ static void fusb300_rdfifo(struct fusb300_ep *ep,
 		reg = ioread32(fusb300->reg + FUSB300_OFFSET_IGR1);
 		reg &= FUSB300_IGR1_SYNF0_EMPTY_INT;
 		if (i)
-			printk(KERN_INFO "sync fifo is not empty!\n");
+			pr_info("sync fifo is not empty!\n");
 		i++;
 	} while (!reg);
 }
@@ -677,7 +677,7 @@ static u8 fusb300_get_cxstall(struct fusb300 *fusb300)
 static void request_error(struct fusb300 *fusb300)
 {
 	fusb300_set_cxstall(fusb300);
-	printk(KERN_DEBUG "request error!!\n");
+	pr_debug("request error!!\n");
 }
 
 static void get_status(struct fusb300 *fusb300, struct usb_ctrlrequest *ctrl)
@@ -999,7 +999,7 @@ static void check_device_mode(struct fusb300 *fusb300)
 		fusb300->gadget.speed = USB_SPEED_UNKNOWN;
 		break;
 	}
-	printk(KERN_INFO "dev_mode = %d\n", (reg & FUSB300_GCR_DEVEN_MSK));
+	pr_info("dev_mode = %d\n", (reg & FUSB300_GCR_DEVEN_MSK));
 }
 
 
@@ -1076,14 +1076,14 @@ static irqreturn_t fusb300_irq(int irq, void *_fusb300)
 	if (int_grp1 & FUSB300_IGR1_WARM_RST_INT) {
 		fusb300_clear_int(fusb300, FUSB300_OFFSET_IGR1,
 				  FUSB300_IGR1_WARM_RST_INT);
-		printk(KERN_INFO"fusb300_warmreset\n");
+		pr_info("fusb300_warmreset\n");
 		fusb300_reset();
 	}
 
 	if (int_grp1 & FUSB300_IGR1_HOT_RST_INT) {
 		fusb300_clear_int(fusb300, FUSB300_OFFSET_IGR1,
 				  FUSB300_IGR1_HOT_RST_INT);
-		printk(KERN_INFO"fusb300_hotreset\n");
+		pr_info("fusb300_hotreset\n");
 		fusb300_reset();
 	}
 
@@ -1097,13 +1097,13 @@ static irqreturn_t fusb300_irq(int irq, void *_fusb300)
 	if (int_grp1 & FUSB300_IGR1_CX_COMABT_INT) {
 		fusb300_clear_int(fusb300, FUSB300_OFFSET_IGR1,
 				  FUSB300_IGR1_CX_COMABT_INT);
-		printk(KERN_INFO"fusb300_ep0abt\n");
+		pr_info("fusb300_ep0abt\n");
 	}
 
 	if (int_grp1 & FUSB300_IGR1_VBUS_CHG_INT) {
 		fusb300_clear_int(fusb300, FUSB300_OFFSET_IGR1,
 				  FUSB300_IGR1_VBUS_CHG_INT);
-		printk(KERN_INFO"fusb300_vbus_change\n");
+		pr_info("fusb300_vbus_change\n");
 	}
 
 	if (int_grp1 & FUSB300_IGR1_U3_EXIT_FAIL_INT) {
@@ -1134,25 +1134,25 @@ static irqreturn_t fusb300_irq(int irq, void *_fusb300)
 	if (int_grp1 & FUSB300_IGR1_U3_EXIT_INT) {
 		fusb300_clear_int(fusb300, FUSB300_OFFSET_IGR1,
 				  FUSB300_IGR1_U3_EXIT_INT);
-		printk(KERN_INFO "FUSB300_IGR1_U3_EXIT_INT\n");
+		pr_info("FUSB300_IGR1_U3_EXIT_INT\n");
 	}
 
 	if (int_grp1 & FUSB300_IGR1_U2_EXIT_INT) {
 		fusb300_clear_int(fusb300, FUSB300_OFFSET_IGR1,
 				  FUSB300_IGR1_U2_EXIT_INT);
-		printk(KERN_INFO "FUSB300_IGR1_U2_EXIT_INT\n");
+		pr_info("FUSB300_IGR1_U2_EXIT_INT\n");
 	}
 
 	if (int_grp1 & FUSB300_IGR1_U1_EXIT_INT) {
 		fusb300_clear_int(fusb300, FUSB300_OFFSET_IGR1,
 				  FUSB300_IGR1_U1_EXIT_INT);
-		printk(KERN_INFO "FUSB300_IGR1_U1_EXIT_INT\n");
+		pr_info("FUSB300_IGR1_U1_EXIT_INT\n");
 	}
 
 	if (int_grp1 & FUSB300_IGR1_U3_ENTRY_INT) {
 		fusb300_clear_int(fusb300, FUSB300_OFFSET_IGR1,
 				  FUSB300_IGR1_U3_ENTRY_INT);
-		printk(KERN_INFO "FUSB300_IGR1_U3_ENTRY_INT\n");
+		pr_info("FUSB300_IGR1_U3_ENTRY_INT\n");
 		fusb300_enable_bit(fusb300, FUSB300_OFFSET_SSCR1,
 				   FUSB300_SSCR1_GO_U3_DONE);
 	}
@@ -1160,31 +1160,31 @@ static irqreturn_t fusb300_irq(int irq, void *_fusb300)
 	if (int_grp1 & FUSB300_IGR1_U2_ENTRY_INT) {
 		fusb300_clear_int(fusb300, FUSB300_OFFSET_IGR1,
 				  FUSB300_IGR1_U2_ENTRY_INT);
-		printk(KERN_INFO "FUSB300_IGR1_U2_ENTRY_INT\n");
+		pr_info("FUSB300_IGR1_U2_ENTRY_INT\n");
 	}
 
 	if (int_grp1 & FUSB300_IGR1_U1_ENTRY_INT) {
 		fusb300_clear_int(fusb300, FUSB300_OFFSET_IGR1,
 				  FUSB300_IGR1_U1_ENTRY_INT);
-		printk(KERN_INFO "FUSB300_IGR1_U1_ENTRY_INT\n");
+		pr_info("FUSB300_IGR1_U1_ENTRY_INT\n");
 	}
 
 	if (int_grp1 & FUSB300_IGR1_RESM_INT) {
 		fusb300_clear_int(fusb300, FUSB300_OFFSET_IGR1,
 				  FUSB300_IGR1_RESM_INT);
-		printk(KERN_INFO "fusb300_resume\n");
+		pr_info("fusb300_resume\n");
 	}
 
 	if (int_grp1 & FUSB300_IGR1_SUSP_INT) {
 		fusb300_clear_int(fusb300, FUSB300_OFFSET_IGR1,
 				  FUSB300_IGR1_SUSP_INT);
-		printk(KERN_INFO "fusb300_suspend\n");
+		pr_info("fusb300_suspend\n");
 	}
 
 	if (int_grp1 & FUSB300_IGR1_HS_LPM_INT) {
 		fusb300_clear_int(fusb300, FUSB300_OFFSET_IGR1,
 				  FUSB300_IGR1_HS_LPM_INT);
-		printk(KERN_INFO "fusb300_HS_LPM_INT\n");
+		pr_info("fusb300_HS_LPM_INT\n");
 	}
 
 	if (int_grp1 & FUSB300_IGR1_DEV_MODE_CHG_INT) {
@@ -1195,11 +1195,11 @@ static irqreturn_t fusb300_irq(int irq, void *_fusb300)
 
 	if (int_grp1 & FUSB300_IGR1_CX_COMFAIL_INT) {
 		fusb300_set_cxstall(fusb300);
-		printk(KERN_INFO "fusb300_ep0fail\n");
+		pr_info("fusb300_ep0fail\n");
 	}
 
 	if (int_grp1 & FUSB300_IGR1_CX_SETUP_INT) {
-		printk(KERN_INFO "fusb300_ep0setup\n");
+		pr_info("fusb300_ep0setup\n");
 		if (setup_packet(fusb300, &ctrl)) {
 			spin_unlock(&fusb300->lock);
 			if (fusb300->driver->setup(&fusb300->gadget, &ctrl) < 0)
@@ -1209,16 +1209,16 @@ static irqreturn_t fusb300_irq(int irq, void *_fusb300)
 	}
 
 	if (int_grp1 & FUSB300_IGR1_CX_CMDEND_INT)
-		printk(KERN_INFO "fusb300_cmdend\n");
+		pr_info("fusb300_cmdend\n");
 
 
 	if (int_grp1 & FUSB300_IGR1_CX_OUT_INT) {
-		printk(KERN_INFO "fusb300_cxout\n");
+		pr_info("fusb300_cxout\n");
 		fusb300_ep0out(fusb300);
 	}
 
 	if (int_grp1 & FUSB300_IGR1_CX_IN_INT) {
-		printk(KERN_INFO "fusb300_cxin\n");
+		pr_info("fusb300_cxin\n");
 		fusb300_ep0in(fusb300);
 	}
 
diff --git a/drivers/usb/gadget/udc/goku_udc.c b/drivers/usb/gadget/udc/goku_udc.c
index 3e1267d38774..4f225552861a 100644
--- a/drivers/usb/gadget/udc/goku_udc.c
+++ b/drivers/usb/gadget/udc/goku_udc.c
@@ -1748,7 +1748,7 @@ static int goku_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	int			retval;
 
 	if (!pdev->irq) {
-		printk(KERN_ERR "Check PCI %s IRQ setup!\n", pci_name(pdev));
+		pr_err("Check PCI %s IRQ setup!\n", pci_name(pdev));
 		retval = -ENODEV;
 		goto err;
 	}
diff --git a/drivers/usb/gadget/udc/r8a66597-udc.h b/drivers/usb/gadget/udc/r8a66597-udc.h
index 9a115caba661..fa4d62c32ea1 100644
--- a/drivers/usb/gadget/udc/r8a66597-udc.h
+++ b/drivers/usb/gadget/udc/r8a66597-udc.h
@@ -247,7 +247,7 @@ static inline u16 get_xtal_from_pdata(struct r8a66597_platdata *pdata)
 		clock = XTAL48;
 		break;
 	default:
-		printk(KERN_ERR "r8a66597: platdata clock is wrong.\n");
+		pr_err("r8a66597: platdata clock is wrong.\n");
 		break;
 	}
 
-- 
2.11.0


^ permalink raw reply related

* Re: [PATCH v3 4/5] powerpc/fault: Avoid heavy search_exception_tables() verification
From: Christophe Leroy @ 2020-12-08 15:07 UTC (permalink / raw)
  To: Aneesh Kumar K.V, Benjamin Herrenschmidt, Paul Mackerras,
	Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <87lfe8qrik.fsf@linux.ibm.com>



Le 08/12/2020 à 15:52, Aneesh Kumar K.V a écrit :
> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
> 
>> search_exception_tables() is an heavy operation, we have to avoid it.
>> When KUAP is selected, we'll know the fault has been blocked by KUAP.
>> Otherwise, it behaves just as if the address was already in the TLBs
>> and no fault was generated.
>>
>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>> v3: rebased
>> v2: Squashed with the preceeding patch which was re-ordering tests that get removed in this patch.
>> ---
>>   arch/powerpc/mm/fault.c | 23 +++++++----------------
>>   1 file changed, 7 insertions(+), 16 deletions(-)
>>
>> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
>> index 3fcd34c28e10..1770b41e4730 100644
>> --- a/arch/powerpc/mm/fault.c
>> +++ b/arch/powerpc/mm/fault.c
>> @@ -210,28 +210,19 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
>>   		return true;
>>   	}
>>   
>> -	if (!is_exec && address < TASK_SIZE && (error_code & (DSISR_PROTFAULT | DSISR_KEYFAULT)) &&
>> -	    !search_exception_tables(regs->nip)) {
>> -		pr_crit_ratelimited("Kernel attempted to access user page (%lx) - exploit attempt? (uid: %d)\n",
>> -				    address,
>> -				    from_kuid(&init_user_ns, current_uid()));
>> -	}
>> -
>>   	// Kernel fault on kernel address is bad
>>   	if (address >= TASK_SIZE)
>>   		return true;
>>   
>> -	// Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad
>> -	if (!search_exception_tables(regs->nip))
>> -		return true;
>> -
>> -	// Read/write fault in a valid region (the exception table search passed
>> -	// above), but blocked by KUAP is bad, it can never succeed.
>> -	if (bad_kuap_fault(regs, address, is_write))
>> +	// Read/write fault blocked by KUAP is bad, it can never succeed.
>> +	if (bad_kuap_fault(regs, address, is_write)) {
>> +		pr_crit_ratelimited("Kernel attempted to %s user page (%lx) - exploit attempt? (uid: %d)\n",
>> +				    is_write ? "write" : "read", address,
>> +				    from_kuid(&init_user_ns, current_uid()));
>>   		return true;
>> +	}
> 
> 
> With this I am wondering whether the WARN() in bad_kuap_fault() is
> needed. A direct access of userspace address will trigger this, whereas
> previously we used bad_kuap_fault() only to identify incorrect restore
> of AMR register (ie, to identify kernel bugs). Hence a WARN() there was
> useful. We loose that differentiation now?

Yes, I wanted to remove the WARN(), see 
https://patchwork.ozlabs.org/project/linuxppc-dev/patch/cc9129bdda1dbc2f0a09cf45fece7d0b0e690784.1605541983.git.christophe.leroy@csgroup.eu/
but I understood from Michael that maybe it was not a good idea, so I left it aside for now when 
rebasing to v3.

Yes previously we were able to differentiate between a direct access of userspace and a valid access 
triggering a KUAP fault, but at the cost of the heavy search_exception_tables().
The issue was reported by Nick through https://github.com/linuxppc/issues/issues/317

Should be perform the search_exception_tables() once we have hit the KUAP fault and WARN() only in 
that case ?

I was wondering also if we should keep the WARN() only when CONFIG_PPC_KUAP_DEBUG is set ?

> 
> 
>>   
>> -	// What's left? Kernel fault on user in well defined regions (extable
>> -	// matched), and allowed by KUAP in the faulting context.
>> +	// What's left? Kernel fault on user and allowed by KUAP in the faulting context.
>>   	return false;
>>   }
>>   
>> -- 
>> 2.25.0

^ permalink raw reply

* [PATCH 02/13] powerpc/xive: Rename XIVE_IRQ_NO_EOI to show its a flag
From: Cédric Le Goater @ 2020-12-08 15:11 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Cédric Le Goater
In-Reply-To: <20201208151124.1329942-1-clg@kaod.org>

This is a simple cleanup to identify easily all flags of the XIVE
interrupt structure. The interrupts flagged with XIVE_IRQ_FLAG_NO_EOI
are the escalations used to wake up vCPUs in KVM. They are handled
very differently from the rest.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 arch/powerpc/include/asm/xive.h   | 2 +-
 arch/powerpc/kvm/book3s_xive.c    | 4 ++--
 arch/powerpc/sysdev/xive/common.c | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index 309b4d65b74f..d332dd9a18de 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -66,7 +66,7 @@ struct xive_irq_data {
 #define XIVE_IRQ_FLAG_H_INT_ESB	0x20
 
 /* Special flag set by KVM for excalation interrupts */
-#define XIVE_IRQ_NO_EOI		0x80
+#define XIVE_IRQ_FLAG_NO_EOI	0x80
 
 #define XIVE_INVALID_CHIP_ID	-1
 
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 18a6b75a3bfd..fae1c2e8da29 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -219,7 +219,7 @@ int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
 	/* In single escalation mode, we grab the ESB MMIO of the
 	 * interrupt and mask it. Also populate the VCPU v/raddr
 	 * of the ESB page for use by asm entry/exit code. Finally
-	 * set the XIVE_IRQ_NO_EOI flag which will prevent the
+	 * set the XIVE_IRQ_FLAG_NO_EOI flag which will prevent the
 	 * core code from performing an EOI on the escalation
 	 * interrupt, thus leaving it effectively masked after
 	 * it fires once.
@@ -231,7 +231,7 @@ int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
 		xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01);
 		vcpu->arch.xive_esc_raddr = xd->eoi_page;
 		vcpu->arch.xive_esc_vaddr = (__force u64)xd->eoi_mmio;
-		xd->flags |= XIVE_IRQ_NO_EOI;
+		xd->flags |= XIVE_IRQ_FLAG_NO_EOI;
 	}
 
 	return 0;
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index a80440af491a..65af34ac1fa2 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -416,7 +416,7 @@ static void xive_irq_eoi(struct irq_data *d)
 	 * been passed-through to a KVM guest
 	 */
 	if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) &&
-	    !(xd->flags & XIVE_IRQ_NO_EOI))
+	    !(xd->flags & XIVE_IRQ_FLAG_NO_EOI))
 		xive_do_source_eoi(irqd_to_hwirq(d), xd);
 	else
 		xd->stale_p = true;
-- 
2.26.2


^ permalink raw reply related

* [PATCH 04/13] powerpc/xive: Use cpu_to_node() instead of ibm, chip-id property
From: Cédric Le Goater @ 2020-12-08 15:11 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Cédric Le Goater
In-Reply-To: <20201208151124.1329942-1-clg@kaod.org>

The 'chip_id' field of the XIVE CPU structure is used to choose a
target for a source located on the same chip when possible. This field
is assigned on the PowerNV platform using the "ibm,chip-id" property
on pSeries under KVM when NUMA nodes are defined but it is undefined
under PowerVM. The XIVE source structure has a similar field
'src_chip' which is only assigned on the PowerNV platform.

cpu_to_node() returns a compatible value on all platforms, 0 being the
default node. It will also give us the opportunity to set the affinity
of a source on pSeries when we can localize them.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 arch/powerpc/sysdev/xive/common.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index ee375daf8114..605238ca65e4 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -1342,16 +1342,11 @@ static int xive_prepare_cpu(unsigned int cpu)

 	xc = per_cpu(xive_cpu, cpu);
 	if (!xc) {
-		struct device_node *np;
-
 		xc = kzalloc_node(sizeof(struct xive_cpu),
 				  GFP_KERNEL, cpu_to_node(cpu));
 		if (!xc)
 			return -ENOMEM;
-		np = of_get_cpu_node(cpu, NULL);
-		if (np)
-			xc->chip_id = of_get_ibm_chip_id(np);
-		of_node_put(np);
+		xc->chip_id = cpu_to_node(cpu);
 		xc->hw_ipi = XIVE_BAD_IRQ;

 		per_cpu(xive_cpu, cpu) = xc;
-- 
2.26.2

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox