LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH v1 11/41] powerpc/32: Use fast instruction to set MSR RI in exception prolog on 8xx
From: Christophe Leroy @ 2021-02-09  9:56 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1612864003.git.christophe.leroy@csgroup.eu>

8xx has registers SPRN_NRI, SPRN_EID and SPRN_EIE for changing
MSR EE and RI.

Use SPRN_EID in exception prolog to set RI.

On an 8xx, it reduces the null_syscall test by 3 cycles.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/kernel/head_32.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index ac6b391f1493..25ee6b26ef5a 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -107,6 +107,8 @@
 #endif
 #ifdef CONFIG_40x
 	rlwinm	r9,r9,0,14,12		/* clear MSR_WE (necessary?) */
+#elif defined(CONFIG_PPC_8xx)
+	mtspr	SPRN_EID, r2		/* Set MSR_RI */
 #else
 #ifdef CONFIG_VMAP_STACK
 	li	r10, MSR_KERNEL & ~MSR_IR /* can take exceptions */
-- 
2.25.0


^ permalink raw reply related

* [RFC PATCH v1 10/41] powerpc/32: Handle bookE debugging in C in exception entry
From: Christophe Leroy @ 2021-02-09  9:56 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1612864003.git.christophe.leroy@csgroup.eu>

The handling of SPRN_DBCR0 and other registers can easily
be done in C instead of ASM.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/interrupt.h |  7 +++++++
 arch/powerpc/kernel/entry_32.S       | 23 -----------------------
 2 files changed, 7 insertions(+), 23 deletions(-)

diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index e794111d7c03..d70c761edc00 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -52,6 +52,13 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
 	if (user_mode(regs))
 		account_cpu_user_entry();
 #endif
+
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+	if (IS_ENABLED(CONFIG_PPC32) && unlikely(ts->debug.dbcr0 & DBCR0_IDM)) {
+		mtspr(SPRN_DBSR, -1);
+		mtspr(SPRN_DBCR0, global_dbcr0[smp_processor_id()]);
+	}
+#endif
 }
 
 /*
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 87c06e241fc2..971980d71b04 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -146,32 +146,9 @@ transfer_to_handler:
 	addi	r2, r12, -THREAD
 	addi	r11,r1,STACK_FRAME_OVERHEAD
 	stw	r11,PT_REGS(r12)
-#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
-	/* Check to see if the dbcr0 register is set up to debug.  Use the
-	   internal debug mode bit to do this. */
-	lwz	r12,THREAD_DBCR0(r12)
-	andis.	r12,r12,DBCR0_IDM@h
-#endif
 #ifdef CONFIG_PPC_BOOK3S_32
 	kuep_lock r11, r12
 #endif
-#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
-	beq+	3f
-	/* From user and task is ptraced - load up global dbcr0 */
-	li	r12,-1			/* clear all pending debug events */
-	mtspr	SPRN_DBSR,r12
-	lis	r11,global_dbcr0@ha
-	tophys_novmstack r11,r11
-	addi	r11,r11,global_dbcr0@l
-#ifdef CONFIG_SMP
-	lwz	r9,TASK_CPU(r2)
-	slwi	r9,r9,2
-	add	r11,r11,r9
-#endif
-	lwz	r12,0(r11)
-	mtspr	SPRN_DBCR0,r12
-#endif
-
 	b	3f
 
 2:	/* if from kernel, check interrupted DOZE/NAP mode and
-- 
2.25.0


^ permalink raw reply related

* [RFC PATCH v1 09/41] powerpc/32: Entry cpu time accounting in C
From: Christophe Leroy @ 2021-02-09  9:56 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1612864003.git.christophe.leroy@csgroup.eu>

There is no need for this to be in asm,
use the new interrupt entry wrapper.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/interrupt.h |  3 +++
 arch/powerpc/include/asm/ppc_asm.h   | 10 ----------
 arch/powerpc/kernel/entry_32.S       |  1 -
 3 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 24671c43f930..e794111d7c03 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -19,6 +19,9 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
 #ifdef CONFIG_PPC32
 	if (regs->msr & MSR_EE)
 		trace_hardirqs_off();
+
+	if (user_mode(regs))
+		account_cpu_user_entry();
 #endif
 	/*
 	 * Book3E reconciles irq soft mask in asm
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 3dceb64fc9af..8998122fc7e2 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -23,18 +23,8 @@
  */
 
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-#define ACCOUNT_CPU_USER_ENTRY(ptr, ra, rb)
 #define ACCOUNT_CPU_USER_EXIT(ptr, ra, rb)
 #else
-#define ACCOUNT_CPU_USER_ENTRY(ptr, ra, rb)				\
-	MFTB(ra);			/* get timebase */		\
-	PPC_LL	rb, ACCOUNT_STARTTIME_USER(ptr);			\
-	PPC_STL	ra, ACCOUNT_STARTTIME(ptr);				\
-	subf	rb,rb,ra;		/* subtract start value */	\
-	PPC_LL	ra, ACCOUNT_USER_TIME(ptr);				\
-	add	ra,ra,rb;		/* add on to user time */	\
-	PPC_STL	ra, ACCOUNT_USER_TIME(ptr);				\
-
 #define ACCOUNT_CPU_USER_EXIT(ptr, ra, rb)				\
 	MFTB(ra);			/* get timebase */		\
 	PPC_LL	rb, ACCOUNT_STARTTIME(ptr);				\
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 53fb16f21b07..87c06e241fc2 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -152,7 +152,6 @@ transfer_to_handler:
 	lwz	r12,THREAD_DBCR0(r12)
 	andis.	r12,r12,DBCR0_IDM@h
 #endif
-	ACCOUNT_CPU_USER_ENTRY(r2, r11, r12)
 #ifdef CONFIG_PPC_BOOK3S_32
 	kuep_lock r11, r12
 #endif
-- 
2.25.0


^ permalink raw reply related

* [RFC PATCH v1 06/41] powerpc/40x: Prepare for enabling MMU in critical exception prolog
From: Christophe Leroy @ 2021-02-09  9:56 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1612864003.git.christophe.leroy@csgroup.eu>

In order the enable MMU early in exception prolog, implement
CONFIG_VMAP_STACK principles in critical exception prolog.

There is no intention to use CONFIG_VMAP_STACK on 40x,
but related code will be used to enable MMU early in exception
in a later patch.

Also address (critirq_ctx-PAGE_OFFSET) directly instead of
using tophys() in order to win one instruction.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/kernel/head_40x.S | 40 +++++++++++++++++++++++++++++++---
 1 file changed, 37 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 5b337bf49bcb..1468f38c3860 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -89,6 +89,12 @@ _ENTRY(crit_srr0)
 	.space	4
 _ENTRY(crit_srr1)
 	.space	4
+_ENTRY(crit_r1)
+	.space	4
+_ENTRY(crit_dear)
+	.space	4
+_ENTRY(crit_esr)
+	.space	4
 _ENTRY(saved_ksp_limit)
 	.space	4
 
@@ -107,32 +113,60 @@ _ENTRY(saved_ksp_limit)
 	mfspr	r11,SPRN_SRR1
 	stw	r10,crit_srr0@l(0)
 	stw	r11,crit_srr1@l(0)
+#ifdef CONFIG_VMAP_STACK
+	mfspr	r10,SPRN_DEAR
+	mfspr	r11,SPRN_ESR
+	stw	r10,crit_dear@l(0)
+	stw	r11,crit_esr@l(0)
+#endif
 	mfcr	r10			/* save CR in r10 for now	   */
 	mfspr	r11,SPRN_SRR3		/* check whether user or kernel    */
 	andi.	r11,r11,MSR_PR
-	lis	r11,critirq_ctx@ha
-	tophys(r11,r11)
-	lwz	r11,critirq_ctx@l(r11)
+	lis	r11,(critirq_ctx-PAGE_OFFSET)@ha
+	lwz	r11,(critirq_ctx-PAGE_OFFSET)@l(r11)
 	beq	1f
 	/* COMING FROM USER MODE */
 	mfspr	r11,SPRN_SPRG_THREAD	/* if from user, start at top of   */
 	lwz	r11,TASK_STACK-THREAD(r11) /* this thread's kernel stack */
+#ifdef CONFIG_VMAP_STACK
+1:	stw	r1,crit_r1@l(0)
+	addi	r1,r11,THREAD_SIZE-INT_FRAME_SIZE /* Alloc an excpt frm  */
+	LOAD_REG_IMMEDIATE(r11,MSR_KERNEL & ~(MSR_IR | MSR_RI))
+	mtmsr	r11
+	isync
+	lwz	r11,crit_r1@l(0)
+	stw	r11,GPR1(r1)
+	stw	r11,0(r1)
+	mr	r11,r1
+#else
 1:	addi	r11,r11,THREAD_SIZE-INT_FRAME_SIZE /* Alloc an excpt frm  */
 	tophys(r11,r11)
 	stw	r1,GPR1(r11)
 	stw	r1,0(r11)
 	tovirt(r1,r11)
+#endif
 	stw	r10,_CCR(r11)		/* save various registers	   */
 	stw	r12,GPR12(r11)
 	stw	r9,GPR9(r11)
 	mflr	r10
 	stw	r10,_LINK(r11)
+#ifdef CONFIG_VMAP_STACK
+	lis	r9,PAGE_OFFSET@ha
+	lwz	r10,crit_r10@l(r9)
+	lwz	r12,crit_r11@l(r9)
+#else
 	lwz	r10,crit_r10@l(0)
 	lwz	r12,crit_r11@l(0)
+#endif
 	stw	r10,GPR10(r11)
 	stw	r12,GPR11(r11)
+#ifdef CONFIG_VMAP_STACK
+	lwz	r12,crit_dear@l(r9)
+	lwz	r9,crit_esr@l(r9)
+#else
 	mfspr	r12,SPRN_DEAR		/* save DEAR and ESR in the frame  */
 	mfspr	r9,SPRN_ESR		/* in them at the point where the  */
+#endif
 	stw	r12,_DEAR(r11)		/* since they may have had stuff   */
 	stw	r9,_ESR(r11)		/* exception was taken		   */
 	mfspr	r12,SPRN_SRR2
-- 
2.25.0


^ permalink raw reply related

* [RFC PATCH v1 05/41] powerpc/40x: Reorder a few instructions in critical exception prolog
From: Christophe Leroy @ 2021-02-09  9:56 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1612864003.git.christophe.leroy@csgroup.eu>

In order to ease preparation for CONFIG_VMAP_STACK, reorder
a few instruction, especially save r1 into stack frame earlier.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/kernel/head_40x.S | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 067ae1302c1c..5b337bf49bcb 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -119,6 +119,9 @@ _ENTRY(saved_ksp_limit)
 	lwz	r11,TASK_STACK-THREAD(r11) /* this thread's kernel stack */
 1:	addi	r11,r11,THREAD_SIZE-INT_FRAME_SIZE /* Alloc an excpt frm  */
 	tophys(r11,r11)
+	stw	r1,GPR1(r11)
+	stw	r1,0(r11)
+	tovirt(r1,r11)
 	stw	r10,_CCR(r11)		/* save various registers	   */
 	stw	r12,GPR12(r11)
 	stw	r9,GPR9(r11)
@@ -129,14 +132,11 @@ _ENTRY(saved_ksp_limit)
 	stw	r10,GPR10(r11)
 	stw	r12,GPR11(r11)
 	mfspr	r12,SPRN_DEAR		/* save DEAR and ESR in the frame  */
-	stw	r12,_DEAR(r11)		/* since they may have had stuff   */
 	mfspr	r9,SPRN_ESR		/* in them at the point where the  */
+	stw	r12,_DEAR(r11)		/* since they may have had stuff   */
 	stw	r9,_ESR(r11)		/* exception was taken		   */
 	mfspr	r12,SPRN_SRR2
-	stw	r1,GPR1(r11)
 	mfspr	r9,SPRN_SRR3
-	stw	r1,0(r11)
-	tovirt(r1,r11)
 	rlwinm	r9,r9,0,14,12		/* clear MSR_WE (necessary?)	   */
 	stw	r0,GPR0(r11)
 	lis	r10, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
-- 
2.25.0


^ permalink raw reply related

* [RFC PATCH v1 07/41] powerpc/40x: Prepare normal exception handler for enabling MMU early
From: Christophe Leroy @ 2021-02-09  9:56 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1612864003.git.christophe.leroy@csgroup.eu>

Ensure normal exception handler are able to manage stuff with
MMU enabled. For that we use CONFIG_VMAP_STACK related code
allthough there is no intention to really activate CONFIG_VMAP_STACK
on powerpc 40x for the moment.

40x uses SPRN_DEAR instead of SPRN_DAR and SPRN_ESR instead of
SPRN_DSISR. Take it into account in common macros.

40x MSR value doesn't fit on 15 bits, use LOAD_REG_IMMEDIATE() in
common macros that will be used also with 40x.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/kernel/entry_32.S |  2 +-
 arch/powerpc/kernel/head_32.h  | 15 ++++++++++++++-
 arch/powerpc/kernel/head_40x.S | 17 ++++++-----------
 3 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 1e59d0bb1a6f..9312634085ba 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -162,7 +162,7 @@ transfer_to_handler:
 	li	r12,-1			/* clear all pending debug events */
 	mtspr	SPRN_DBSR,r12
 	lis	r11,global_dbcr0@ha
-	tophys(r11,r11)
+	tophys_novmstack r11,r11
 	addi	r11,r11,global_dbcr0@l
 #ifdef CONFIG_SMP
 	lwz	r9,TASK_CPU(r2)
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 5d4706c14572..ac6b391f1493 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -22,9 +22,17 @@
 #ifdef CONFIG_VMAP_STACK
 	mfspr	r10, SPRN_SPRG_THREAD
 	.if	\handle_dar_dsisr
+#ifdef CONFIG_40x
+	mfspr	r11, SPRN_DEAR
+#else
 	mfspr	r11, SPRN_DAR
+#endif
 	stw	r11, DAR(r10)
+#ifdef CONFIG_40x
+	mfspr	r11, SPRN_ESR
+#else
 	mfspr	r11, SPRN_DSISR
+#endif
 	stw	r11, DSISR(r10)
 	.endif
 	mfspr	r11, SPRN_SRR0
@@ -61,7 +69,7 @@
 
 .macro EXCEPTION_PROLOG_2 handle_dar_dsisr=0
 #ifdef CONFIG_VMAP_STACK
-	li	r11, MSR_KERNEL & ~(MSR_IR | MSR_RI) /* can take DTLB miss */
+	LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_IR | MSR_RI)) /* can take DTLB miss */
 	mtmsr	r11
 	isync
 	mfspr	r11, SPRN_SPRG_SCRATCH2
@@ -158,8 +166,13 @@
 
 .macro save_dar_dsisr_on_stack reg1, reg2, sp
 #ifndef CONFIG_VMAP_STACK
+#ifdef CONFIG_40x
+	mfspr	\reg1, SPRN_DEAR
+	mfspr	\reg2, SPRN_ESR
+#else
 	mfspr	\reg1, SPRN_DAR
 	mfspr	\reg2, SPRN_DSISR
+#endif
 	stw	\reg1, _DAR(\sp)
 	stw	\reg2, _DSISR(\sp)
 #endif
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 1468f38c3860..4bf0aee858eb 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -221,11 +221,8 @@ _ENTRY(saved_ksp_limit)
  * if they can't resolve the lightweight TLB fault.
  */
 	START_EXCEPTION(0x0300,	DataStorage)
-	EXCEPTION_PROLOG
-	mfspr	r5, SPRN_ESR		/* Grab the ESR, save it */
-	stw	r5, _ESR(r11)
-	mfspr	r4, SPRN_DEAR		/* Grab the DEAR, save it */
-	stw	r4, _DEAR(r11)
+	EXCEPTION_PROLOG handle_dar_dsisr=1
+	save_dar_dsisr_on_stack	r4, r5, r11
 	EXC_XFER_LITE(0x300, handle_page_fault)
 
 /*
@@ -244,17 +241,15 @@ _ENTRY(saved_ksp_limit)
 
 /* 0x0600 - Alignment Exception */
 	START_EXCEPTION(0x0600, Alignment)
-	EXCEPTION_PROLOG
-	mfspr	r4,SPRN_DEAR		/* Grab the DEAR and save it */
-	stw	r4,_DEAR(r11)
+	EXCEPTION_PROLOG handle_dar_dsisr=1
+	save_dar_dsisr_on_stack r4, r5, r11
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	EXC_XFER_STD(0x600, alignment_exception)
 
 /* 0x0700 - Program Exception */
 	START_EXCEPTION(0x0700, ProgramCheck)
-	EXCEPTION_PROLOG
-	mfspr	r4,SPRN_ESR		/* Grab the ESR and save it */
-	stw	r4,_ESR(r11)
+	EXCEPTION_PROLOG handle_dar_dsisr=1
+	save_dar_dsisr_on_stack r4, r5, r11
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	EXC_XFER_STD(0x700, program_check_exception)
 
-- 
2.25.0


^ permalink raw reply related

* [RFC PATCH v1 08/41] powerpc/32: Reconcile interrupts in C
From: Christophe Leroy @ 2021-02-09  9:56 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1612864003.git.christophe.leroy@csgroup.eu>

There is no need for this to be in asm anymore,
use the new interrupt entry wrapper.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/interrupt.h |  4 ++
 arch/powerpc/kernel/entry_32.S       | 58 ----------------------------
 2 files changed, 4 insertions(+), 58 deletions(-)

diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 4badb3e51c19..24671c43f930 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -16,6 +16,10 @@ struct interrupt_state {
 
 static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
 {
+#ifdef CONFIG_PPC32
+	if (regs->msr & MSR_EE)
+		trace_hardirqs_off();
+#endif
 	/*
 	 * Book3E reconciles irq soft mask in asm
 	 */
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 9312634085ba..53fb16f21b07 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -202,22 +202,6 @@ transfer_to_handler_cont:
 	lwz	r9,4(r9)		/* where to go when done */
 #if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
 	mtspr	SPRN_NRI, r0
-#endif
-#ifdef CONFIG_TRACE_IRQFLAGS
-	/*
-	 * When tracing IRQ state (lockdep) we enable the MMU before we call
-	 * the IRQ tracing functions as they might access vmalloc space or
-	 * perform IOs for console output.
-	 *
-	 * To speed up the syscall path where interrupts stay on, let's check
-	 * first if we are changing the MSR value at all.
-	 */
-	tophys_novmstack r12, r1
-	lwz	r12,_MSR(r12)
-	andi.	r12,r12,MSR_EE
-	bne	1f
-
-	/* MSR isn't changing, just transition directly */
 #endif
 	mtspr	SPRN_SRR0,r11
 	mtspr	SPRN_SRR1,r10
@@ -244,48 +228,6 @@ transfer_to_handler_cont:
 _ASM_NOKPROBE_SYMBOL(transfer_to_handler)
 _ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont)
 
-#ifdef CONFIG_TRACE_IRQFLAGS
-1:	/* MSR is changing, re-enable MMU so we can notify lockdep. We need to
-	 * keep interrupts disabled at this point otherwise we might risk
-	 * taking an interrupt before we tell lockdep they are enabled.
-	 */
-	lis	r12,reenable_mmu@h
-	ori	r12,r12,reenable_mmu@l
-	LOAD_REG_IMMEDIATE(r0, MSR_KERNEL)
-	mtspr	SPRN_SRR0,r12
-	mtspr	SPRN_SRR1,r0
-	rfi
-#ifdef CONFIG_40x
-	b .	/* Prevent prefetch past rfi */
-#endif
-
-reenable_mmu:
-	/*
-	 * We save a bunch of GPRs,
-	 * r3 can be different from GPR3(r1) at this point, r9 and r11
-	 * contains the old MSR and handler address respectively,
-	 * r0, r4-r8, r12, CCR, CTR, XER etc... are left
-	 * clobbered as they aren't useful past this point.
-	 */
-
-	stwu	r1,-32(r1)
-	stw	r9,8(r1)
-	stw	r11,12(r1)
-	stw	r3,16(r1)
-
-	/* If we are disabling interrupts (normal case), simply log it with
-	 * lockdep
-	 */
-1:	bl	trace_hardirqs_off
-	lwz	r3,16(r1)
-	lwz	r11,12(r1)
-	lwz	r9,8(r1)
-	addi	r1,r1,32
-	mtctr	r11
-	mtlr	r9
-	bctr				/* jump to handler */
-#endif /* CONFIG_TRACE_IRQFLAGS */
-
 #ifndef CONFIG_VMAP_STACK
 /*
  * On kernel stack overflow, load up an initial stack pointer
-- 
2.25.0


^ permalink raw reply related

* [RFC PATCH v1 02/41] powerpc/40x: Don't use SPRN_SPRG_SCRATCH0/1 in TLB miss handlers
From: Christophe Leroy @ 2021-02-09  9:56 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1612864003.git.christophe.leroy@csgroup.eu>

SPRN_SPRG_SCRATCH5 is used to save SPRN_PID.
SPRN_SPRG_SCRATCH6 is already available.

SPRN_PID is only 8 bits. We have r12 that contains CR.
We only need to preserve CR0, so we have space available in r12
to save PID.

Keep PID in r12 and free up SPRN_SPRG_SCRATCH5.

Then In TLB miss handlers, instead of using SPRN_SPRG_SCRATCH0 and
SPRN_SPRG_SCRATCH1, use SPRN_SPRG_SCRATCH5 and SPRN_SPRG_SCRATCH6
to avoid future conflicts with normal exception prologs.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/kernel/head_40x.S | 39 ++++++++++++++++------------------
 1 file changed, 18 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 24724a7dad49..383238a98f77 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -249,13 +249,13 @@ _ENTRY(saved_ksp_limit)
  * load TLB entries from the page table if they exist.
  */
 	START_EXCEPTION(0x1100,	DTLBMiss)
-	mtspr	SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */
-	mtspr	SPRN_SPRG_SCRATCH1, r11
+	mtspr	SPRN_SPRG_SCRATCH5, r10 /* Save some working registers */
+	mtspr	SPRN_SPRG_SCRATCH6, r11
 	mtspr	SPRN_SPRG_SCRATCH3, r12
 	mtspr	SPRN_SPRG_SCRATCH4, r9
 	mfcr	r12
 	mfspr	r9, SPRN_PID
-	mtspr	SPRN_SPRG_SCRATCH5, r9
+	rlwimi	r12, r9, 0, 0xff
 	mfspr	r10, SPRN_DEAR		/* Get faulting address */
 
 	/* If we are faulting a kernel address, we have to use the
@@ -316,13 +316,12 @@ _ENTRY(saved_ksp_limit)
 	/* The bailout.  Restore registers to pre-exception conditions
 	 * and call the heavyweights to help us out.
 	 */
-	mfspr	r9, SPRN_SPRG_SCRATCH5
-	mtspr	SPRN_PID, r9
-	mtcr	r12
+	mtspr	SPRN_PID, r12
+	mtcrf	0x80, r12
 	mfspr	r9, SPRN_SPRG_SCRATCH4
 	mfspr	r12, SPRN_SPRG_SCRATCH3
-	mfspr	r11, SPRN_SPRG_SCRATCH1
-	mfspr	r10, SPRN_SPRG_SCRATCH0
+	mfspr	r11, SPRN_SPRG_SCRATCH6
+	mfspr	r10, SPRN_SPRG_SCRATCH5
 	b	DataStorage
 
 /* 0x1200 - Instruction TLB Miss Exception
@@ -330,13 +329,13 @@ _ENTRY(saved_ksp_limit)
  * registers and bailout to a different point.
  */
 	START_EXCEPTION(0x1200,	ITLBMiss)
-	mtspr	SPRN_SPRG_SCRATCH0, r10	 /* Save some working registers */
-	mtspr	SPRN_SPRG_SCRATCH1, r11
+	mtspr	SPRN_SPRG_SCRATCH5, r10	 /* Save some working registers */
+	mtspr	SPRN_SPRG_SCRATCH6, r11
 	mtspr	SPRN_SPRG_SCRATCH3, r12
 	mtspr	SPRN_SPRG_SCRATCH4, r9
 	mfcr	r12
 	mfspr	r9, SPRN_PID
-	mtspr	SPRN_SPRG_SCRATCH5, r9
+	rlwimi	r12, r9, 0, 0xff
 	mfspr	r10, SPRN_SRR0		/* Get faulting address */
 
 	/* If we are faulting a kernel address, we have to use the
@@ -397,13 +396,12 @@ _ENTRY(saved_ksp_limit)
 	/* The bailout.  Restore registers to pre-exception conditions
 	 * and call the heavyweights to help us out.
 	 */
-	mfspr	r9, SPRN_SPRG_SCRATCH5
-	mtspr	SPRN_PID, r9
-	mtcr	r12
+	mtspr	SPRN_PID, r12
+	mtcrf	0x80, r12
 	mfspr	r9, SPRN_SPRG_SCRATCH4
 	mfspr	r12, SPRN_SPRG_SCRATCH3
-	mfspr	r11, SPRN_SPRG_SCRATCH1
-	mfspr	r10, SPRN_SPRG_SCRATCH0
+	mfspr	r11, SPRN_SPRG_SCRATCH6
+	mfspr	r10, SPRN_SPRG_SCRATCH5
 	b	InstructionAccess
 
 	EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_STD)
@@ -543,13 +541,12 @@ finish_tlb_load:
 
 	/* Done...restore registers and get out of here.
 	*/
-	mfspr	r9, SPRN_SPRG_SCRATCH5
-	mtspr	SPRN_PID, r9
-	mtcr	r12
+	mtspr	SPRN_PID, r12
+	mtcrf	0x80, r12
 	mfspr	r9, SPRN_SPRG_SCRATCH4
 	mfspr	r12, SPRN_SPRG_SCRATCH3
-	mfspr	r11, SPRN_SPRG_SCRATCH1
-	mfspr	r10, SPRN_SPRG_SCRATCH0
+	mfspr	r11, SPRN_SPRG_SCRATCH6
+	mfspr	r10, SPRN_SPRG_SCRATCH5
 	rfi			/* Should sync shadow TLBs */
 	b	.		/* prevent prefetch past rfi */
 
-- 
2.25.0


^ permalink raw reply related

* [RFC PATCH v1 03/41] powerpc/40x: Change CRITICAL_EXCEPTION_PROLOG macro to a gas macro
From: Christophe Leroy @ 2021-02-09  9:56 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1612864003.git.christophe.leroy@csgroup.eu>

Change CRITICAL_EXCEPTION_PROLOG macro to a gas macro to
remove the ugly ; and \ on each line.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/kernel/head_40x.S | 71 +++++++++++++++++-----------------
 1 file changed, 36 insertions(+), 35 deletions(-)

diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 383238a98f77..9cef423d574b 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -100,42 +100,43 @@ _ENTRY(saved_ksp_limit)
  * Instead we use a couple of words of memory at low physical addresses.
  * This is OK since we don't support SMP on these processors.
  */
-#define CRITICAL_EXCEPTION_PROLOG					     \
-	stw	r10,crit_r10@l(0);	/* save two registers to work with */\
-	stw	r11,crit_r11@l(0);					     \
-	mfcr	r10;			/* save CR in r10 for now	   */\
-	mfspr	r11,SPRN_SRR3;		/* check whether user or kernel    */\
-	andi.	r11,r11,MSR_PR;						     \
-	lis	r11,critirq_ctx@ha;					     \
-	tophys(r11,r11);						     \
-	lwz	r11,critirq_ctx@l(r11);					     \
-	beq	1f;							     \
-	/* COMING FROM USER MODE */					     \
-	mfspr	r11,SPRN_SPRG_THREAD;	/* if from user, start at top of   */\
-	lwz	r11,TASK_STACK-THREAD(r11); /* this thread's kernel stack */\
-1:	addi	r11,r11,THREAD_SIZE-INT_FRAME_SIZE; /* Alloc an excpt frm  */\
-	tophys(r11,r11);						     \
-	stw	r10,_CCR(r11);          /* save various registers	   */\
-	stw	r12,GPR12(r11);						     \
-	stw	r9,GPR9(r11);						     \
-	mflr	r10;							     \
-	stw	r10,_LINK(r11);						     \
-	mfspr	r12,SPRN_DEAR;		/* save DEAR and ESR in the frame  */\
-	stw	r12,_DEAR(r11);		/* since they may have had stuff   */\
-	mfspr	r9,SPRN_ESR;		/* in them at the point where the  */\
-	stw	r9,_ESR(r11);		/* exception was taken		   */\
-	mfspr	r12,SPRN_SRR2;						     \
-	stw	r1,GPR1(r11);						     \
-	mfspr	r9,SPRN_SRR3;						     \
-	stw	r1,0(r11);						     \
-	tovirt(r1,r11);							     \
-	rlwinm	r9,r9,0,14,12;		/* clear MSR_WE (necessary?)	   */\
-	stw	r0,GPR0(r11);						     \
-	lis	r10, STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */\
-	addi	r10, r10, STACK_FRAME_REGS_MARKER@l;			     \
-	stw	r10, 8(r11);						     \
-	SAVE_4GPRS(3, r11);						     \
+.macro CRITICAL_EXCEPTION_PROLOG
+	stw	r10,crit_r10@l(0)	/* save two registers to work with */
+	stw	r11,crit_r11@l(0)
+	mfcr	r10			/* save CR in r10 for now	   */
+	mfspr	r11,SPRN_SRR3		/* check whether user or kernel    */
+	andi.	r11,r11,MSR_PR
+	lis	r11,critirq_ctx@ha
+	tophys(r11,r11)
+	lwz	r11,critirq_ctx@l(r11)
+	beq	1f
+	/* COMING FROM USER MODE */
+	mfspr	r11,SPRN_SPRG_THREAD	/* if from user, start at top of   */
+	lwz	r11,TASK_STACK-THREAD(r11) /* this thread's kernel stack */
+1:	addi	r11,r11,THREAD_SIZE-INT_FRAME_SIZE /* Alloc an excpt frm  */
+	tophys(r11,r11)
+	stw	r10,_CCR(r11)		/* save various registers	   */
+	stw	r12,GPR12(r11)
+	stw	r9,GPR9(r11)
+	mflr	r10
+	stw	r10,_LINK(r11)
+	mfspr	r12,SPRN_DEAR		/* save DEAR and ESR in the frame  */
+	stw	r12,_DEAR(r11)		/* since they may have had stuff   */
+	mfspr	r9,SPRN_ESR		/* in them at the point where the  */
+	stw	r9,_ESR(r11)		/* exception was taken		   */
+	mfspr	r12,SPRN_SRR2
+	stw	r1,GPR1(r11)
+	mfspr	r9,SPRN_SRR3
+	stw	r1,0(r11)
+	tovirt(r1,r11)
+	rlwinm	r9,r9,0,14,12		/* clear MSR_WE (necessary?)	   */
+	stw	r0,GPR0(r11)
+	lis	r10, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
+	addi	r10, r10, STACK_FRAME_REGS_MARKER@l
+	stw	r10, 8(r11)
+	SAVE_4GPRS(3, r11)
 	SAVE_2GPRS(7, r11)
+.endm
 
 	/*
 	 * State at this point:
-- 
2.25.0


^ permalink raw reply related

* [RFC PATCH v1 00/41] powerpc/32: Switch to interrupt entry/exit in C
From: Christophe Leroy @ 2021-02-09  9:56 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel

This series aims at porting interrupt entry/exit in C on PPC32, using
the work already merged for PPC64.

First part do minimal changes in 40x in order to be able to enable MMU
earlier in exception entry.

Second part prepares and switches interrupt exit in C.

Third part moves more and more things in C, ending with KUAP management.

v1 is boot tested on 8xx and 83xx, releasing it as an RFC to get early feedback.

This series applies on top of the one switching ppc32 syscall entry/exit in C.

First patch is a bug fix already submitted but not yet merged that interracts with the series.

Christophe Leroy (41):
  powerpc/32: Preserve cr1 in exception prolog stack check to fix build
    error
  powerpc/40x: Don't use SPRN_SPRG_SCRATCH0/1 in TLB miss handlers
  powerpc/40x: Change CRITICAL_EXCEPTION_PROLOG macro to a gas macro
  powerpc/40x: Save SRR0/SRR1 and r10/r11 earlier in critical exception
  powerpc/40x: Reorder a few instructions in critical exception prolog
  powerpc/40x: Prepare for enabling MMU in critical exception prolog
  powerpc/40x: Prepare normal exception handler for enabling MMU early
  powerpc/32: Reconcile interrupts in C
  powerpc/32: Entry cpu time accounting in C
  powerpc/32: Handle bookE debugging in C in exception entry
  powerpc/32: Use fast instruction to set MSR RI in exception prolog on
    8xx
  powerpc/32: Remove ksp_limit
  powerpc/32: Always enable data translation in exception prolog
  powerpc/32: Tag DAR in EXCEPTION_PROLOG_2 for the 8xx
  powerpc/32: Enable instruction translation at the same time as data
    translation
  powerpc/32: Statically initialise first emergency context
  powerpc/32: Add vmap_stack_overflow label inside the macro
  powerpc/32: Use START_EXCEPTION() as much as possible
  powerpc/32: Move exception prolog code into .text once MMU is back on
  powerpc/32: Provide a name to exception prolog continuation in virtual
    mode
  powerpc/32: Refactor booke critical registers saving
  powerpc/32: Perform normal function call in exception entry
  powerpc/32: Always save non volatile registers on exception entry
  powerpc/32: Replace ASM exception exit by C exception exit from ppc64
  powerpc/32: Set regs parameter in r3 in transfer_to_handler
  powerpc/32: Remove handle_page_fault()
  powerpc/32: Save trap number on stack in exception prolog
  powerpc/32: Add a prepare_transfer_to_handler macro for exception
    prologs
  powerpc/32: Only restore non volatile registers when required
  powerpc/32: Dismantle EXC_XFER_STD/LITE/TEMPLATE
  powerpc/32: Remove the xfer parameter in EXCEPTION() macro
  powerpc/32: Refactor saving of volatile registers in exception prologs
  powerpc/32: Save remaining registers in exception prolog
  powerpc/32: Set current->thread.regs in C interrupt entry
  powerpc/32: Return directly from power_save_ppc32_restore()
  powerpc/32: Only use prepare_transfer_to_handler function on book3s/32
    and e500
  powerpc/32s: Move KUEP locking/unlocking in C
  powerpc/64s: Make kuap_check_amr() and kuap_get_and_check_amr()
    generic
  powerpc/32s: Create C version of kuap save/restore/check helpers
  powerpc/8xx: Create C version of kuap save/restore/check helpers
  powerpc/32: Manage KUAP in C

 arch/powerpc/include/asm/book3s/32/kup.h     | 126 ++-
 arch/powerpc/include/asm/book3s/64/kup.h     |  24 +-
 arch/powerpc/include/asm/interrupt.h         |  21 +
 arch/powerpc/include/asm/kup.h               |  37 +-
 arch/powerpc/include/asm/nohash/32/kup-8xx.h |  58 +-
 arch/powerpc/include/asm/ppc_asm.h           |  10 -
 arch/powerpc/include/asm/processor.h         |   6 +-
 arch/powerpc/include/asm/ptrace.h            |  13 +-
 arch/powerpc/kernel/asm-offsets.c            |   4 -
 arch/powerpc/kernel/entry_32.S               | 810 ++++---------------
 arch/powerpc/kernel/fpu.S                    |   2 -
 arch/powerpc/kernel/head_32.h                | 197 ++---
 arch/powerpc/kernel/head_40x.S               | 271 ++++---
 arch/powerpc/kernel/head_44x.S               |  10 +-
 arch/powerpc/kernel/head_8xx.S               | 151 ++--
 arch/powerpc/kernel/head_book3s_32.S         | 239 +++---
 arch/powerpc/kernel/head_booke.h             | 188 +++--
 arch/powerpc/kernel/head_fsl_booke.S         |  64 +-
 arch/powerpc/kernel/idle_6xx.S               |  14 +-
 arch/powerpc/kernel/idle_e500.S              |  14 +-
 arch/powerpc/kernel/interrupt.c              |  35 +-
 arch/powerpc/kernel/misc_32.S                |  14 -
 arch/powerpc/kernel/process.c                |   6 +-
 arch/powerpc/kernel/setup_32.c               |   2 +-
 arch/powerpc/kernel/traps.c                  |   9 -
 arch/powerpc/kernel/vector.S                 |   2 -
 arch/powerpc/lib/sstep.c                     |   9 -
 arch/powerpc/mm/book3s32/Makefile            |   1 +
 arch/powerpc/mm/book3s32/hash_low.S          |  14 -
 arch/powerpc/mm/book3s32/kuep.c              |  38 +
 arch/powerpc/mm/fault.c                      |   4 +-
 31 files changed, 875 insertions(+), 1518 deletions(-)
 create mode 100644 arch/powerpc/mm/book3s32/kuep.c

-- 
2.25.0


^ permalink raw reply

* [RFC PATCH v1 04/41] powerpc/40x: Save SRR0/SRR1 and r10/r11 earlier in critical exception
From: Christophe Leroy @ 2021-02-09  9:56 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1612864003.git.christophe.leroy@csgroup.eu>

In order to be able to switch MMU on in exception prolog, save
SRR0 and SRR1 earlier.

Also save r10 and r11 into stack earlier to better match with the
normal exception prolog.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/kernel/entry_32.S | 9 ---------
 arch/powerpc/kernel/head_40x.S | 8 ++++++++
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 8dea4d3b1d06..1e59d0bb1a6f 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -107,15 +107,6 @@ _ASM_NOKPROBE_SYMBOL(crit_transfer_to_handler)
 #ifdef CONFIG_40x
 	.globl	crit_transfer_to_handler
 crit_transfer_to_handler:
-	lwz	r0,crit_r10@l(0)
-	stw	r0,GPR10(r11)
-	lwz	r0,crit_r11@l(0)
-	stw	r0,GPR11(r11)
-	mfspr	r0,SPRN_SRR0
-	stw	r0,crit_srr0@l(0)
-	mfspr	r0,SPRN_SRR1
-	stw	r0,crit_srr1@l(0)
-
 	/* set the stack limit to the current stack */
 	mfspr	r8,SPRN_SPRG_THREAD
 	lwz	r0,KSP_LIMIT(r8)
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 9cef423d574b..067ae1302c1c 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -103,6 +103,10 @@ _ENTRY(saved_ksp_limit)
 .macro CRITICAL_EXCEPTION_PROLOG
 	stw	r10,crit_r10@l(0)	/* save two registers to work with */
 	stw	r11,crit_r11@l(0)
+	mfspr	r10,SPRN_SRR0
+	mfspr	r11,SPRN_SRR1
+	stw	r10,crit_srr0@l(0)
+	stw	r11,crit_srr1@l(0)
 	mfcr	r10			/* save CR in r10 for now	   */
 	mfspr	r11,SPRN_SRR3		/* check whether user or kernel    */
 	andi.	r11,r11,MSR_PR
@@ -120,6 +124,10 @@ _ENTRY(saved_ksp_limit)
 	stw	r9,GPR9(r11)
 	mflr	r10
 	stw	r10,_LINK(r11)
+	lwz	r10,crit_r10@l(0)
+	lwz	r12,crit_r11@l(0)
+	stw	r10,GPR10(r11)
+	stw	r12,GPR11(r11)
 	mfspr	r12,SPRN_DEAR		/* save DEAR and ESR in the frame  */
 	stw	r12,_DEAR(r11)		/* since they may have had stuff   */
 	mfspr	r9,SPRN_ESR		/* in them at the point where the  */
-- 
2.25.0


^ permalink raw reply related

* [RFC PATCH v1 01/41] powerpc/32: Preserve cr1 in exception prolog stack check to fix build error
From: Christophe Leroy @ 2021-02-09  9:56 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1612864003.git.christophe.leroy@csgroup.eu>

THREAD_ALIGN_SHIFT = THREAD_SHIFT + 1 = PAGE_SHIFT + 1
Maximum PAGE_SHIFT is 18 for 256k pages so
THREAD_ALIGN_SHIFT is 19 at the maximum.

No need to clobber cr1, it can be preserved when moving r1
into CR when we check stack overflow.

This reduces the number of instructions in Machine Check Exception
prolog and fixes a build failure reported by the kernel test robot
on v5.10 stable when building with RTAS + VMAP_STACK + KVM. That
build failure is due to too many instructions in the prolog hence
not fitting between 0x200 and 0x300. Allthough the problem doesn't
show up in mainline, it is still worth the change.

Reported-by: kernel test robot <lkp@intel.com>
Fixes: 98bf2d3f4970 ("powerpc/32s: Fix RTAS machine check with VMAP stack")
Cc: stable@vger.kernel.org
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/kernel/head_32.h        | 2 +-
 arch/powerpc/kernel/head_book3s_32.S | 6 ------
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 961b1ce3b6bf..5d4706c14572 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -47,7 +47,7 @@
 	lwz	r1,TASK_STACK-THREAD(r1)
 	addi	r1, r1, THREAD_SIZE - INT_FRAME_SIZE
 1:
-	mtcrf	0x7f, r1
+	mtcrf	0x3f, r1
 	bt	32 - THREAD_ALIGN_SHIFT, stack_overflow
 #else
 	subi	r11, r1, INT_FRAME_SIZE		/* use r1 if kernel */
diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S
index 086970bec32c..727fdab557c9 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -278,12 +278,6 @@ MachineCheck:
 7:	EXCEPTION_PROLOG_2
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 #ifdef CONFIG_PPC_CHRP
-#ifdef CONFIG_VMAP_STACK
-	mfspr	r4, SPRN_SPRG_THREAD
-	tovirt(r4, r4)
-	lwz	r4, RTAS_SP(r4)
-	cmpwi	cr1, r4, 0
-#endif
 	beq	cr1, machine_check_tramp
 	twi	31, 0, 0
 #else
-- 
2.25.0


^ permalink raw reply related

* [PATCH V3] powerpc/perf: Adds support for programming of Thresholding in P10
From: Kajol Jain @ 2021-02-09  9:52 UTC (permalink / raw)
  To: mpe; +Cc: kjain, atrajeev, maddy, linuxppc-dev

Thresholding, a performance monitoring unit feature, can be
used to identify marked instructions which take more than
expected cycles between start event and end event.
Threshold compare (thresh_cmp) bits are programmed in MMCRA
register. In Power9, thresh_cmp bits were part of the
event code. But in case of P10, thresh_cmp are not part of
event code due to inclusion of MMCR3 bits.

Patch here adds an option to use attr.config1 variable
to be used to pass thresh_cmp value to be programmed in
MMCRA register. A new ppmu flag called PPMU_HAS_ATTR_CONFIG1
has been added and this flag is used to notify the use of
attr.config1 variable.

Patch has extended the parameter list of 'compute_mmcr',
to include power_pmu's 'flags' element and parameter list of
get_constraint to include attr.config1 value. It also extend
parameter list of power_check_constraints inorder to pass
perf_event list.

As stated by commit ef0e3b650f8d ("powerpc/perf: Fix Threshold
Event Counter Multiplier width for P10"), constraint bits for
thresh_cmp is also needed to be increased to 11 bits, which is
handled as part of this patch. We added bit number 53 as part
of constraint bits of thresh_cmp for power10 to make it an
11 bit field.

Updated layout for p10:

/*
 * Layout of constraint bits:
 *
 *        60        56        52        48        44        40        36        32
 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
 *   [   fab_match   ]         [       thresh_cmp      ] [   thresh_ctl    ] [   ]
 *                                          |                                  |
 *                           [  thresh_cmp bits for p10]           thresh_sel -*
 *
 *        28        24        20        16        12         8         4         0
 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
 *               [ ] |   [ ] |  [  sample ]   [     ]   [6] [5]   [4] [3]   [2] [1]
 *                |  |    |  |                  |
 *      BHRB IFM -*  |    |  |*radix_scope      |      Count of events for each PMC.
 *              EBB -*    |                     |        p1, p2, p3, p4, p5, p6.
 *      L1 I/D qualifier -*                     |
 *                     nc - number of counters -*
 *
 * The PMC fields P1..P6, and NC, are adder fields. As we accumulate constraints
 * we want the low bit of each field to be added to any existing value.
 *
 * Everything else is a value field.
 */

Result:
command#: cat /sys/devices/cpu/format/thresh_cmp
config1:0-17

ex. usage:

command#: perf record -I --weight -d  -e
	 cpu/event=0x67340101EC,thresh_cmp=500/ ./ebizzy -S 2 -t 1 -s 4096
1826636 records/s
real  2.00 s
user  2.00 s
sys   0.00 s
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.038 MB perf.data (61 samples) ]

Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
---
 arch/powerpc/include/asm/perf_event_server.h |  5 +-
 arch/powerpc/perf/core-book3s.c              | 15 +++--
 arch/powerpc/perf/isa207-common.c            | 67 +++++++++++++++++---
 arch/powerpc/perf/isa207-common.h            | 15 +++--
 arch/powerpc/perf/mpc7450-pmu.c              |  5 +-
 arch/powerpc/perf/power10-pmu.c              |  4 +-
 arch/powerpc/perf/power5+-pmu.c              |  5 +-
 arch/powerpc/perf/power5-pmu.c               |  5 +-
 arch/powerpc/perf/power6-pmu.c               |  5 +-
 arch/powerpc/perf/power7-pmu.c               |  5 +-
 arch/powerpc/perf/ppc970-pmu.c               |  5 +-
 11 files changed, 102 insertions(+), 34 deletions(-)

---
Changelog
v2 -> v3
- Removed field 'events_config1' from struct cpu_hw_events
  and directly using attr.config1 field instead as suggested
  by Michael Ellerman.
- Extended the parameter list of 'power_check_constraints' function
  to also pass perf_event struct array as we need to access
  event's attr.config1 value. And since when we call
  'power_check_constraints' from 'power_pmu_event_init' the
  cpu_hw_events structure not get updated.

v1 -> v2
- Add new function 'p10_thresh_cmp_val' to evaluate thresh_cmp
  value.
- Extended the parameter list of get_constraint function
  to include attr.config1 value.
- Added bit number 53 as part of constraint bits of thresh_cmp
  to make it a 11 bit field for power10.
- Updated PPMU_HAS_ATTR_CONFIG1 value to 0x00000800
- Add new field 'events_config1' in struct cpu_hw_events and also
  update this field in all required functions accordingly.
---

diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index 3b7baba01c92..00e7e671bb4b 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -36,9 +36,9 @@ struct power_pmu {
 	unsigned long	test_adder;
 	int		(*compute_mmcr)(u64 events[], int n_ev,
 				unsigned int hwc[], struct mmcr_regs *mmcr,
-				struct perf_event *pevents[]);
+				struct perf_event *pevents[], u32 flags);
 	int		(*get_constraint)(u64 event_id, unsigned long *mskp,
-				unsigned long *valp);
+				unsigned long *valp, u64 event_config1);
 	int		(*get_alternatives)(u64 event_id, unsigned int flags,
 				u64 alt[]);
 	void		(*get_mem_data_src)(union perf_mem_data_src *dsrc,
@@ -83,6 +83,7 @@ struct power_pmu {
 #define PPMU_NO_SIAR		0x00000100 /* Do not use SIAR */
 #define PPMU_ARCH_31		0x00000200 /* Has MMCR3, SIER2 and SIER3 */
 #define PPMU_P10_DD1		0x00000400 /* Is power10 DD1 processor version */
+#define PPMU_HAS_ATTR_CONFIG1	0x00000800 /* Using config1 attribute */
 
 /*
  * Values for flags to get_alternatives()
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 28206b1fe172..85073d9e79fe 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -915,7 +915,7 @@ void perf_event_print_debug(void)
  */
 static int power_check_constraints(struct cpu_hw_events *cpuhw,
 				   u64 event_id[], unsigned int cflags[],
-				   int n_ev)
+				   int n_ev, struct perf_event **event)
 {
 	unsigned long mask, value, nv;
 	unsigned long smasks[MAX_HWEVENTS], svalues[MAX_HWEVENTS];
@@ -938,7 +938,7 @@ static int power_check_constraints(struct cpu_hw_events *cpuhw,
 			event_id[i] = cpuhw->alternatives[i][0];
 		}
 		if (ppmu->get_constraint(event_id[i], &cpuhw->amasks[i][0],
-					 &cpuhw->avalues[i][0]))
+					 &cpuhw->avalues[i][0], event[i]->attr.config1))
 			return -1;
 	}
 	value = mask = 0;
@@ -973,7 +973,8 @@ static int power_check_constraints(struct cpu_hw_events *cpuhw,
 		for (j = 1; j < n_alt[i]; ++j)
 			ppmu->get_constraint(cpuhw->alternatives[i][j],
 					     &cpuhw->amasks[i][j],
-					     &cpuhw->avalues[i][j]);
+					     &cpuhw->avalues[i][j],
+					     event[i]->attr.config1);
 	}
 
 	/* enumerate all possibilities and see if any will work */
@@ -1391,7 +1392,7 @@ static void power_pmu_enable(struct pmu *pmu)
 	memset(&cpuhw->mmcr, 0, sizeof(cpuhw->mmcr));
 
 	if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index,
-			       &cpuhw->mmcr, cpuhw->event)) {
+			       &cpuhw->mmcr, cpuhw->event, ppmu->flags)) {
 		/* shouldn't ever get here */
 		printk(KERN_ERR "oops compute_mmcr failed\n");
 		goto out;
@@ -1579,7 +1580,7 @@ static int power_pmu_add(struct perf_event *event, int ef_flags)
 
 	if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
 		goto out;
-	if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1))
+	if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1, cpuhw->event))
 		goto out;
 	event->hw.config = cpuhw->events[n0];
 
@@ -1789,7 +1790,7 @@ static int power_pmu_commit_txn(struct pmu *pmu)
 	n = cpuhw->n_events;
 	if (check_excludes(cpuhw->event, cpuhw->flags, 0, n))
 		return -EAGAIN;
-	i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n);
+	i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n, cpuhw->event);
 	if (i < 0)
 		return -EAGAIN;
 
@@ -2027,7 +2028,7 @@ static int power_pmu_event_init(struct perf_event *event)
 	local_irq_save(irq_flags);
 	cpuhw = this_cpu_ptr(&cpu_hw_events);
 
-	err = power_check_constraints(cpuhw, events, cflags, n + 1);
+	err = power_check_constraints(cpuhw, events, cflags, n + 1, ctrs);
 
 	if (has_branch_stack(event)) {
 		u64 bhrb_filter = -1;
diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
index 6ab5b272090a..e4f577da33d8 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -108,12 +108,57 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
 		*mmcra |= MMCRA_SDAR_MODE_TLB;
 }
 
+static u64 p10_thresh_cmp_val(u64 value)
+{
+	int exp = 0;
+	u64 result = value;
+
+	if (!value)
+		return value;
+
+	/*
+	 * Incase of P10, thresh_cmp value is not part of raw event code
+	 * and provided via attr.config1 parameter. To program threshold in MMCRA,
+	 * take a 18 bit number N and shift right 2 places and increment
+	 * the exponent E by 1 until the upper 10 bits of N are zero.
+	 * Write E to the threshold exponent and write the lower 8 bits of N
+	 * to the threshold mantissa.
+	 * The max threshold that can be written is 261120.
+	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+		if (value > 261120)
+			value = 261120;
+		while ((64 - __builtin_clzl(value)) > 8) {
+			exp++;
+			value >>= 2;
+		}
+
+		/*
+		 * Note that it is invalid to write a mantissa with the
+		 * upper 2 bits of mantissa being zero, unless the
+		 * exponent is also zero.
+		 */
+		if (!(value & 0xC0) && exp)
+			result = 0;
+		else
+			result = (exp << 8) | value;
+	}
+	return result;
+}
+
 static u64 thresh_cmp_val(u64 value)
 {
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		value = p10_thresh_cmp_val(value);
+
+	/*
+	 * Since location of threshold compare bits in MMCRA
+	 * is different for p8, using different shift value.
+	 */
 	if (cpu_has_feature(CPU_FTR_ARCH_300))
 		return value << p9_MMCRA_THR_CMP_SHIFT;
-
-	return value << MMCRA_THR_CMP_SHIFT;
+	else
+		return value << MMCRA_THR_CMP_SHIFT;
 }
 
 static unsigned long combine_from_event(u64 event)
@@ -141,13 +186,13 @@ static bool is_thresh_cmp_valid(u64 event)
 {
 	unsigned int cmp, exp;
 
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		return p10_thresh_cmp_val(event) != 0;
+
 	/*
 	 * Check the mantissa upper two bits are not zero, unless the
 	 * exponent is also zero. See the THRESH_CMP_MANTISSA doc.
-	 * Power10: thresh_cmp is replaced by l2_l3 event select.
 	 */
-	if (cpu_has_feature(CPU_FTR_ARCH_31))
-		return false;
 
 	cmp = (event >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK;
 	exp = cmp >> 7;
@@ -256,7 +301,7 @@ void isa207_get_mem_weight(u64 *weight)
 		*weight = mantissa << (2 * exp);
 }
 
-int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
+int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1)
 {
 	unsigned int unit, pmc, cache, ebb;
 	unsigned long mask, value;
@@ -355,9 +400,11 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
 	}
 
 	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
-		if (event_is_threshold(event)) {
+		if (event_is_threshold(event) && is_thresh_cmp_valid(event_config1)) {
 			mask  |= CNST_THRESH_CTL_SEL_MASK;
 			value |= CNST_THRESH_CTL_SEL_VAL(event >> EVENT_THRESH_SHIFT);
+			mask  |= p10_CNST_THRESH_CMP_MASK;
+			value |= p10_CNST_THRESH_CMP_VAL(p10_thresh_cmp_val(event_config1));
 		}
 	} else if (cpu_has_feature(CPU_FTR_ARCH_300))  {
 		if (event_is_threshold(event) && is_thresh_cmp_valid(event)) {
@@ -411,7 +458,7 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
 
 int isa207_compute_mmcr(u64 event[], int n_ev,
 			       unsigned int hwc[], struct mmcr_regs *mmcr,
-			       struct perf_event *pevents[])
+			       struct perf_event *pevents[], u32 flags)
 {
 	unsigned long mmcra, mmcr1, mmcr2, unit, combine, psel, cache, val;
 	unsigned long mmcr3;
@@ -504,6 +551,10 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
 				val = (event[i] >> EVENT_THR_CMP_SHIFT) &
 					EVENT_THR_CMP_MASK;
 				mmcra |= thresh_cmp_val(val);
+			} else if (flags & PPMU_HAS_ATTR_CONFIG1) {
+				val = (pevents[i]->attr.config1 >> p10_EVENT_THR_CMP_SHIFT) &
+					p10_EVENT_THR_CMP_MASK;
+				mmcra |= thresh_cmp_val(val);
 			}
 		}
 
diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h
index 454b32c31440..1af0e8c97ac7 100644
--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -105,6 +105,10 @@
 #define p10_EVENT_RADIX_SCOPE_QUAL_MASK	0x1
 #define p10_MMCR1_RADIX_SCOPE_QUAL_SHIFT	45
 
+/* Event Threshold Compare bit constant for power10 in config1 attribute */
+#define p10_EVENT_THR_CMP_SHIFT        0
+#define p10_EVENT_THR_CMP_MASK 0x3FFFFull
+
 #define p10_EVENT_VALID_MASK		\
 	((p10_SDAR_MODE_MASK   << p10_SDAR_MODE_SHIFT		|	\
 	(p10_EVENT_THRESH_MASK  << EVENT_THRESH_SHIFT)		|	\
@@ -124,8 +128,8 @@
  *        60        56        52        48        44        40        36        32
  * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
  *   [   fab_match   ]         [       thresh_cmp      ] [   thresh_ctl    ] [   ]
- *                                                                             |
- *                                                                 thresh_sel -*
+ *                                          |                                  |
+ *                           [  thresh_cmp bits for p10]           thresh_sel -*
  *
  *        28        24        20        16        12         8         4         0
  * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
@@ -152,6 +156,9 @@
 #define CNST_THRESH_CTL_SEL_VAL(v)	(((v) & 0x7ffull) << 32)
 #define CNST_THRESH_CTL_SEL_MASK	CNST_THRESH_CTL_SEL_VAL(0x7ff)
 
+#define p10_CNST_THRESH_CMP_VAL(v) (((v) & 0x7ffull) << 43)
+#define p10_CNST_THRESH_CMP_MASK   p10_CNST_THRESH_CMP_VAL(0x7ff)
+
 #define CNST_EBB_VAL(v)		(((v) & EVENT_EBB_MASK) << 24)
 #define CNST_EBB_MASK		CNST_EBB_VAL(EVENT_EBB_MASK)
 
@@ -262,10 +269,10 @@
 #define PH(a, b)			(P(LVL, HIT) | P(a, b))
 #define PM(a, b)			(P(LVL, MISS) | P(a, b))
 
-int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp);
+int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1);
 int isa207_compute_mmcr(u64 event[], int n_ev,
 				unsigned int hwc[], struct mmcr_regs *mmcr,
-				struct perf_event *pevents[]);
+				struct perf_event *pevents[], u32 flags);
 void isa207_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr);
 int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags,
 					const unsigned int ev_alt[][MAX_ALT]);
diff --git a/arch/powerpc/perf/mpc7450-pmu.c b/arch/powerpc/perf/mpc7450-pmu.c
index 1919e9df9165..e39b15b79a83 100644
--- a/arch/powerpc/perf/mpc7450-pmu.c
+++ b/arch/powerpc/perf/mpc7450-pmu.c
@@ -148,7 +148,7 @@ static u32 classbits[N_CLASSES - 1][2] = {
 };
 
 static int mpc7450_get_constraint(u64 event, unsigned long *maskp,
-				  unsigned long *valp)
+				  unsigned long *valp, u64 event_config1 __maybe_unused)
 {
 	int pmc, class;
 	u32 mask, value;
@@ -258,7 +258,8 @@ static const u32 pmcsel_mask[N_COUNTER] = {
  */
 static int mpc7450_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[],
 				struct mmcr_regs *mmcr,
-				struct perf_event *pevents[])
+				struct perf_event *pevents[],
+				u32 flags __maybe_unused)
 {
 	u8 event_index[N_CLASSES][N_COUNTER];
 	int n_classevent[N_CLASSES];
diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c
index 79e0206ca454..a901c1348cad 100644
--- a/arch/powerpc/perf/power10-pmu.c
+++ b/arch/powerpc/perf/power10-pmu.c
@@ -216,6 +216,7 @@ PMU_FORMAT_ATTR(invert_bit,     "config:47");
 PMU_FORMAT_ATTR(src_mask,       "config:48-53");
 PMU_FORMAT_ATTR(src_match,      "config:54-59");
 PMU_FORMAT_ATTR(radix_scope,	"config:9");
+PMU_FORMAT_ATTR(thresh_cmp,     "config1:0-17");
 
 static struct attribute *power10_pmu_format_attr[] = {
 	&format_attr_event.attr,
@@ -236,6 +237,7 @@ static struct attribute *power10_pmu_format_attr[] = {
 	&format_attr_src_mask.attr,
 	&format_attr_src_match.attr,
 	&format_attr_radix_scope.attr,
+	&format_attr_thresh_cmp.attr,
 	NULL,
 };
 
@@ -550,7 +552,7 @@ static struct power_pmu power10_pmu = {
 	.get_mem_weight		= isa207_get_mem_weight,
 	.disable_pmc		= isa207_disable_pmc,
 	.flags			= PPMU_HAS_SIER | PPMU_ARCH_207S |
-				  PPMU_ARCH_31,
+				  PPMU_ARCH_31 | PPMU_HAS_ATTR_CONFIG1,
 	.n_generic		= ARRAY_SIZE(power10_generic_events),
 	.generic_events		= power10_generic_events,
 	.cache_events		= &power10_cache_events,
diff --git a/arch/powerpc/perf/power5+-pmu.c b/arch/powerpc/perf/power5+-pmu.c
index 3e64b4a1511f..18732267993a 100644
--- a/arch/powerpc/perf/power5+-pmu.c
+++ b/arch/powerpc/perf/power5+-pmu.c
@@ -132,7 +132,7 @@ static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
 };
 
 static int power5p_get_constraint(u64 event, unsigned long *maskp,
-				  unsigned long *valp)
+				  unsigned long *valp, u64 event_config1 __maybe_unused)
 {
 	int pmc, byte, unit, sh;
 	int bit, fmask;
@@ -451,7 +451,8 @@ static int power5p_marked_instr_event(u64 event)
 
 static int power5p_compute_mmcr(u64 event[], int n_ev,
 				unsigned int hwc[], struct mmcr_regs *mmcr,
-				struct perf_event *pevents[])
+				struct perf_event *pevents[],
+				u32 flags __maybe_unused)
 {
 	unsigned long mmcr1 = 0;
 	unsigned long mmcra = 0;
diff --git a/arch/powerpc/perf/power5-pmu.c b/arch/powerpc/perf/power5-pmu.c
index 017bb19b73fb..cb611c1e7abe 100644
--- a/arch/powerpc/perf/power5-pmu.c
+++ b/arch/powerpc/perf/power5-pmu.c
@@ -136,7 +136,7 @@ static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
 };
 
 static int power5_get_constraint(u64 event, unsigned long *maskp,
-				 unsigned long *valp)
+				 unsigned long *valp, u64 event_config1 __maybe_unused)
 {
 	int pmc, byte, unit, sh;
 	int bit, fmask;
@@ -382,7 +382,8 @@ static int power5_marked_instr_event(u64 event)
 
 static int power5_compute_mmcr(u64 event[], int n_ev,
 			       unsigned int hwc[], struct mmcr_regs *mmcr,
-			       struct perf_event *pevents[])
+			       struct perf_event *pevents[],
+			       u32 flags __maybe_unused)
 {
 	unsigned long mmcr1 = 0;
 	unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c
index 189974478e9f..69ef38216418 100644
--- a/arch/powerpc/perf/power6-pmu.c
+++ b/arch/powerpc/perf/power6-pmu.c
@@ -173,7 +173,8 @@ static int power6_marked_instr_event(u64 event)
  * Assign PMC numbers and compute MMCR1 value for a set of events
  */
 static int p6_compute_mmcr(u64 event[], int n_ev,
-			   unsigned int hwc[], struct mmcr_regs *mmcr, struct perf_event *pevents[])
+			   unsigned int hwc[], struct mmcr_regs *mmcr, struct perf_event *pevents[],
+			   u32 flags __maybe_unused)
 {
 	unsigned long mmcr1 = 0;
 	unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
@@ -266,7 +267,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev,
  *	32-34	select field: nest (subunit) event selector
  */
 static int p6_get_constraint(u64 event, unsigned long *maskp,
-			     unsigned long *valp)
+			     unsigned long *valp, u64 event_config1 __maybe_unused)
 {
 	int pmc, byte, sh, subunit;
 	unsigned long mask = 0, value = 0;
diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c
index bacfab104a1a..894c17f9a762 100644
--- a/arch/powerpc/perf/power7-pmu.c
+++ b/arch/powerpc/perf/power7-pmu.c
@@ -81,7 +81,7 @@ enum {
  */
 
 static int power7_get_constraint(u64 event, unsigned long *maskp,
-				 unsigned long *valp)
+				 unsigned long *valp, u64 event_config1 __maybe_unused)
 {
 	int pmc, sh, unit;
 	unsigned long mask = 0, value = 0;
@@ -245,7 +245,8 @@ static int power7_marked_instr_event(u64 event)
 
 static int power7_compute_mmcr(u64 event[], int n_ev,
 			       unsigned int hwc[], struct mmcr_regs *mmcr,
-			       struct perf_event *pevents[])
+			       struct perf_event *pevents[],
+			       u32 flags __maybe_unused)
 {
 	unsigned long mmcr1 = 0;
 	unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
diff --git a/arch/powerpc/perf/ppc970-pmu.c b/arch/powerpc/perf/ppc970-pmu.c
index 7d78df97f272..1f8263785286 100644
--- a/arch/powerpc/perf/ppc970-pmu.c
+++ b/arch/powerpc/perf/ppc970-pmu.c
@@ -190,7 +190,7 @@ static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
 };
 
 static int p970_get_constraint(u64 event, unsigned long *maskp,
-			       unsigned long *valp)
+			       unsigned long *valp, u64 event_config1 __maybe_unused)
 {
 	int pmc, byte, unit, sh, spcsel;
 	unsigned long mask = 0, value = 0;
@@ -256,7 +256,8 @@ static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[])
 
 static int p970_compute_mmcr(u64 event[], int n_ev,
 			     unsigned int hwc[], struct mmcr_regs *mmcr,
-			     struct perf_event *pevents[])
+			     struct perf_event *pevents[],
+			     u32 flags __maybe_unused)
 {
 	unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0;
 	unsigned int pmc, unit, byte, psel;
-- 
2.26.2


^ permalink raw reply related

* Re: [PATCH v2 2/7] ASoC: fsl_rpmsg: Add CPU DAI driver for audio base on rpmsg
From: Shengjiu Wang @ 2021-02-09  9:16 UTC (permalink / raw)
  To: Mark Brown
  Cc: open list:OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS,
	alsa-devel, Timur Tabi, Xiubo Li, Fabio Estevam, Shengjiu Wang,
	Takashi Iwai, Liam Girdwood, linux-kernel, Nicolin Chen,
	Rob Herring, linuxppc-dev
In-Reply-To: <20210208115112.GD8645@sirena.org.uk>

On Mon, Feb 8, 2021 at 7:53 PM Mark Brown <broonie@kernel.org> wrote:
>
> On Sun, Feb 07, 2021 at 06:23:50PM +0800, Shengjiu Wang wrote:
>
> > +static int fsl_rpmsg_hw_params(struct snd_pcm_substream *substream,
> > +                            struct snd_pcm_hw_params *params,
> > +                            struct snd_soc_dai *dai)
> > +{
>
> ...
>
> > +     ret = clk_prepare_enable(rpmsg->mclk);
> > +     if (ret)
> > +             dev_err(dai->dev, "failed to enable mclk: %d\n", ret);
> > +
> > +     return ret;
> > +}
> > +
> > +static int fsl_rpmsg_hw_free(struct snd_pcm_substream *substream,
> > +                          struct snd_soc_dai *dai)
> > +{
> > +     struct fsl_rpmsg *rpmsg = snd_soc_dai_get_drvdata(dai);
> > +
> > +     clk_disable_unprepare(rpmsg->mclk);
>
> hw_params() can be called multiple times and there's no need for it to
> be balanced with hw_free(), I'd move this to a different callback (DAPM
> should work well).

Which callback should I use? Is there an example?

best regards
wang shengjiu

^ permalink raw reply

* Re: [PATCH v4 01/14] swiotlb: Remove external access to io_tlb_start
From: Christoph Hellwig @ 2021-02-09  8:40 UTC (permalink / raw)
  To: Claire Chang
  Cc: heikki.krogerus, peterz, grant.likely, paulus, Frank Rowand,
	mingo, Marek Szyprowski, sstabellini, Saravana Kannan,
	Joerg Roedel, Rafael J . Wysocki, Christoph Hellwig,
	Bartosz Golaszewski, xen-devel, Thierry Reding, linux-devicetree,
	Will Deacon, Konrad Rzeszutek Wilk, Dan Williams, linuxppc-dev,
	Rob Herring, boris.ostrovsky, Andy Shevchenko, jgross,
	Nicolas Boichat, Greg KH, Randy Dunlap, lkml,
	list@263.net:IOMMU DRIVERS, Jim Quinlan, xypron.glpk,
	Robin Murphy, bauerman
In-Reply-To: <20210209062131.2300005-2-tientzu@chromium.org>

On Tue, Feb 09, 2021 at 02:21:18PM +0800, Claire Chang wrote:
> This can be dropped if Christoph's swiotlb cleanups are landed.
> https://lore.kernel.org/linux-iommu/20210207160934.2955931-1-hch@lst.de/T/#m7124f29b6076d462101fcff6433295157621da09 

FYI, I've also started looking into additional cleanups based on your
struct in this branch, but I'd like to wait for all the previous
changes to settle first:

http://git.infradead.org/users/hch/misc.git/shortlog/refs/heads/swiotlb-struct

^ permalink raw reply

* Re: [PATCH v5 17/22] powerpc/syscall: Do not check unsupported scv vector on PPC32
From: Nicholas Piggin @ 2021-02-09  7:56 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Christophe Leroy, Michael Ellerman,
	msuchanek, Paul Mackerras
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <82c4abb1-cb52-e856-b2dd-d7c7d48bd292@csgroup.eu>

Excerpts from Christophe Leroy's message of February 9, 2021 4:13 pm:
> 
> 
> Le 09/02/2021 à 03:00, Nicholas Piggin a écrit :
>> Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:
>>> Only PPC64 has scv. No need to check the 0x7ff0 trap on PPC32.
>>> For that, add a helper trap_is_unsupported_scv() similar to
>>> trap_is_scv().
>>>
>>> And ignore the scv parameter in syscall_exit_prepare (Save 14 cycles
>>> 346 => 332 cycles)
>>>
>>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>>> ---
>>> v5: Added a helper trap_is_unsupported_scv()
>>> ---
>>>   arch/powerpc/include/asm/ptrace.h | 5 +++++
>>>   arch/powerpc/kernel/entry_32.S    | 1 -
>>>   arch/powerpc/kernel/interrupt.c   | 7 +++++--
>>>   3 files changed, 10 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
>>> index 58f9dc060a7b..2c842b11a924 100644
>>> --- a/arch/powerpc/include/asm/ptrace.h
>>> +++ b/arch/powerpc/include/asm/ptrace.h
>>> @@ -229,6 +229,11 @@ static inline bool trap_is_scv(struct pt_regs *regs)
>>>   	return (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && TRAP(regs) == 0x3000);
>>>   }
>>>   
>>> +static inline bool trap_is_unsupported_scv(struct pt_regs *regs)
>>> +{
>>> +	return (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && TRAP(regs) == 0x7ff0);
>>> +}
>> 
>> This change is good.
>> 
>>> +
>>>   static inline bool trap_is_syscall(struct pt_regs *regs)
>>>   {
>>>   	return (trap_is_scv(regs) || TRAP(regs) == 0xc00);
>>> diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
>>> index cffe58e63356..7c824e8928d0 100644
>>> --- a/arch/powerpc/kernel/entry_32.S
>>> +++ b/arch/powerpc/kernel/entry_32.S
>>> @@ -344,7 +344,6 @@ transfer_to_syscall:
>>>   
>>>   ret_from_syscall:
>>>   	addi    r4,r1,STACK_FRAME_OVERHEAD
>>> -	li	r5,0
>>>   	bl	syscall_exit_prepare
>> 
>> For this one, I think it would be nice to do the "right" thing and make
>> the function prototypes different on !64S. They could then declare a
>> local const bool scv = 0.
>> 
>> We could have syscall_exit_prepare and syscall_exit_prepare_maybe_scv
>> or something like that, 64s can use the latter one and the former can be
>> a wrapper that passes constant 0 for scv. Then we don't have different
>> prototypes for the same function, but you just have to make the 32-bit
>> version static inline and the 64-bit version exported to asm.
> 
> You can't call a static inline function from ASM, I don't understand you.

I mean

#ifdef CONFIG_PPC_BOOK3S_64
notrace unsigned long syscall_exit_prepare_scv(unsigned long r3,
                                           struct pt_regs *regs,
                                           long scv)
#else
static inline long syscall_exit_prepare_scv(unsigned long r3,
                                           struct pt_regs *regs,
                                           long scv)
#endif

#ifndef CONFIG_PPC_BOOK3S_64
notrace unsigned long syscall_exit_prepare(unsigned long r3,
                                           struct pt_regs *regs)
{
	return syscall_exit_prepare_scv(r3, regs, 0);
}
#endif


> 
> What is wrong for you really here ? Is that the fact we leave scv random, or is that the below 
> IS_ENABLED() ?

That scv arg is random. I know generated code essentially would be no 
different and no possibility of tracing, but would just prefer to call 
the C "correctly" if possible.

> I don't mind keeping the 'li r5,0' before calling the function if you find it cleaner, the real 
> performance gain is with setting scv to 0 below for PPC32 (and maybe it should be set to zero for 
> book3e/64 too ?).

Yes 64e would like this optimisation.

Thanks,
Nick

^ permalink raw reply

* Re: [PATCH v5 09/22] powerpc/syscall: Make interrupt.c buildable on PPC32
From: Nicholas Piggin @ 2021-02-09  7:50 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Christophe Leroy, Michael Ellerman,
	msuchanek, Paul Mackerras
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <73fd6e9b-fe99-e804-d681-c0a22b9bef38@csgroup.eu>

Excerpts from Christophe Leroy's message of February 9, 2021 4:02 pm:
> 
> 
> Le 09/02/2021 à 02:27, Nicholas Piggin a écrit :
>> Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:
>>> To allow building interrupt.c on PPC32, ifdef out specific PPC64
>>> code or use helpers which are available on both PP32 and PPC64
>>>
>>> Modify Makefile to always build interrupt.o
>>>
>>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>>> ---
>>> v5:
>>> - Also for interrupt exit preparation
>>> - Opted out kuap related code, ppc32 keeps it in ASM for the time being
>>> ---
>>>   arch/powerpc/kernel/Makefile    |  4 ++--
>>>   arch/powerpc/kernel/interrupt.c | 31 ++++++++++++++++++++++++-------
>>>   2 files changed, 26 insertions(+), 9 deletions(-)
>>>
> 
>>> diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
>>> index d6be4f9a67e5..2dac4d2bb1cf 100644
>>> --- a/arch/powerpc/kernel/interrupt.c
>>> +++ b/arch/powerpc/kernel/interrupt.c
>>> @@ -39,7 +39,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
>>>   		BUG_ON(!(regs->msr & MSR_RI));
>>>   	BUG_ON(!(regs->msr & MSR_PR));
>>>   	BUG_ON(!FULL_REGS(regs));
>>> -	BUG_ON(regs->softe != IRQS_ENABLED);
>>> +	BUG_ON(arch_irq_disabled_regs(regs));
>>>   
>>>   #ifdef CONFIG_PPC_PKEY
>>>   	if (mmu_has_feature(MMU_FTR_PKEY)) {
>>> @@ -65,7 +65,9 @@ notrace long system_call_exception(long r3, long r4, long r5,
>>>   			isync();
>>>   	} else
>>>   #endif
>>> +#ifdef CONFIG_PPC64
>>>   		kuap_check_amr();
>>> +#endif
>> 
>> Wouldn't mind trying to get rid of these ifdefs at some point, but
>> there's some kuap / keys changes going on recently so I'm happy enough
>> to let this settle then look at whether we can refactor.
> 
> I have a follow up series that implements interrupts entries/exits in C and that removes all kuap 
> assembly, I will likely release it as RFC later today.
> 
>> 
>>>   
>>>   	account_cpu_user_entry();
>>>   
>>> @@ -318,7 +323,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
>>>   	return ret;
>>>   }
>>>   
>>> -#ifdef CONFIG_PPC_BOOK3S /* BOOK3E not yet using this */
>>> +#ifndef CONFIG_PPC_BOOK3E_64 /* BOOK3E not yet using this */
>>>   notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr)
>>>   {
>>>   #ifdef CONFIG_PPC_BOOK3E
>> 
>> Why are you building this for 32? I don't mind if it's just to keep
>> things similar and make it build for now, but you're not using it yet,
>> right?
> 
> The series using that will follow, I thought it would be worth doing this at once.

Yeah that's fine by me then.

Thanks,
Nick

^ permalink raw reply

* Re: [PATCH v5 05/22] powerpc/irq: Add helper to set regs->softe
From: Nicholas Piggin @ 2021-02-09  7:49 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Christophe Leroy, Michael Ellerman,
	msuchanek, Paul Mackerras
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <5987787e-ee80-ed0e-0c34-9884f6aad3c5@csgroup.eu>

Excerpts from Christophe Leroy's message of February 9, 2021 4:18 pm:
> 
> 
> Le 09/02/2021 à 02:11, Nicholas Piggin a écrit :
>> Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:
>>> regs->softe doesn't exist on PPC32.
>>>
>>> Add irq_soft_mask_regs_set_state() helper to set regs->softe.
>>> This helper will void on PPC32.
>>>
>>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>>> ---
>> 
>> You could do the same with the kuap_ functions to change some ifdefs
>> to IS_ENABLED.
>> 
>> That's just my preference but if you prefer this way I guess that's
>> okay.
>> 
> 
> 
> That's also my preference on the long term.
> 
> Here it is ephemeral, I have a follow up series implementing interrupt exit/entry in C and getting 
> rid of all the assembly kuap hence getting rid of those ifdefs.

I thought it might have been because you hate ifdef more tha most :)
 
> The issue I see when using IS_ENABLED() is that you have to indent to the right, then you interfere 
> with the file history and 'git blame'

Valid point if it's just going to indent back the other way in your next 
series.

> Thanks for reviewing my series and looking forward to your feedback on my series on the interrupt 
> entry/exit that I will likely release later today.

Cool, I'm eager to see them.

Thanks,
Nick

^ permalink raw reply

* Re: [PATCH v5 05/22] powerpc/irq: Add helper to set regs->softe
From: Nicholas Piggin @ 2021-02-09  7:47 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Christophe Leroy, Michael Ellerman,
	msuchanek, Paul Mackerras
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <258ac0c6-ef40-86d4-2ce4-772cfc4a95e5@csgroup.eu>

Excerpts from Christophe Leroy's message of February 9, 2021 3:57 pm:
> 
> 
> Le 09/02/2021 à 02:11, Nicholas Piggin a écrit :
>> Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:
>>> regs->softe doesn't exist on PPC32.
>>>
>>> Add irq_soft_mask_regs_set_state() helper to set regs->softe.
>>> This helper will void on PPC32.
>>>
>>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>>> ---
>>>   arch/powerpc/include/asm/hw_irq.h | 11 +++++++++--
>>>   1 file changed, 9 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
>>> index 614957f74cee..ed0c3b049dfd 100644
>>> --- a/arch/powerpc/include/asm/hw_irq.h
>>> +++ b/arch/powerpc/include/asm/hw_irq.h
>>> @@ -38,6 +38,8 @@
>>>   #define PACA_IRQ_MUST_HARD_MASK	(PACA_IRQ_EE)
>>>   #endif
>>>   
>>> +#endif /* CONFIG_PPC64 */
>>> +
>>>   /*
>>>    * flags for paca->irq_soft_mask
>>>    */
>>> @@ -46,8 +48,6 @@
>>>   #define IRQS_PMI_DISABLED	2
>>>   #define IRQS_ALL_DISABLED	(IRQS_DISABLED | IRQS_PMI_DISABLED)
>>>   
>>> -#endif /* CONFIG_PPC64 */
>>> -
>>>   #ifndef __ASSEMBLY__
>>>   
>>>   #ifdef CONFIG_PPC64
>>> @@ -287,6 +287,10 @@ extern void irq_set_pending_from_srr1(unsigned long srr1);
>>>   
>>>   extern void force_external_irq_replay(void);
>>>   
>>> +static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned long val)
>>> +{
>>> +	regs->softe = val;
>>> +}
>>>   #else /* CONFIG_PPC64 */
>>>   
>>>   static inline unsigned long arch_local_save_flags(void)
>>> @@ -355,6 +359,9 @@ static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
>>>   
>>>   static inline void may_hard_irq_enable(void) { }
>>>   
>>> +static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned long val)
>>> +{
>>> +}
>>>   #endif /* CONFIG_PPC64 */
>>>   
>>>   #define ARCH_IRQ_INIT_FLAGS	IRQ_NOREQUEST
>> 
>> What I don't like about this where you use it is it kind of pollutes
>> the ppc32 path with this function which is not valid to use.
>> 
>> I would prefer if you had this purely so it could compile with:
>> 
>>    if (IS_ENABLED(CONFIG_PPC64)))
>>        irq_soft_mask_regs_set_state(regs, blah);
>> 
>> And then you could make the ppc32 cause a link error if it did not
>> get eliminated at compile time (e.g., call an undefined function).
>> 
>> You could do the same with the kuap_ functions to change some ifdefs
>> to IS_ENABLED.
>> 
>> That's just my preference but if you prefer this way I guess that's
>> okay.
> 
> I see you didn't change your mind since last April :)
> 
> I'll see what I can do.

If you have more patches in the works and will do some cleanup passes I 
don't mind so much.

Thanks,
Nick

^ permalink raw reply

* Re: [PATCH v7 32/42] powerpc/64: context tracking move to interrupt wrappers
From: Nicholas Piggin @ 2021-02-09  7:45 UTC (permalink / raw)
  To: Christophe Leroy, linuxppc-dev; +Cc: Athira Rajeev
In-Reply-To: <3cb26edb-c9cc-4f21-0b6d-dcd4bbeed7b3@csgroup.eu>

Excerpts from Christophe Leroy's message of February 9, 2021 3:49 pm:
> 
> 
> Le 30/01/2021 à 14:08, Nicholas Piggin a écrit :
>> This moves exception_enter/exit calls to wrapper functions for
>> synchronous interrupts. More interrupt handlers are covered by
>> this than previously.
> 
> Why did you enclose everything in #ifdef CONFIG_PPC64 ? As far as I understand, before this patch 
> exception_enter() and exception_exit() are called also on PPC32.

PPC32 never selects CONTEXT_TRACKING AFAIKS, but I'm not sure. I worried 
ctx_state would not be no-oped, but if it's all inlined into the same
function then maybe the compiler will eliminate it.

On the other hand I may move some of the wrapper into its own function 
if that helps code size, but we can do something about it then...

Hmm, end result is it shouldn't matter for PPC32 at the moment.

Thanks,
Nick

> 
> Christophe
> 
> 
>> 
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>>   arch/powerpc/include/asm/interrupt.h  |  9 ++++
>>   arch/powerpc/kernel/traps.c           | 74 ++++++---------------------
>>   arch/powerpc/mm/book3s64/hash_utils.c |  3 --
>>   arch/powerpc/mm/fault.c               |  9 +---
>>   4 files changed, 27 insertions(+), 68 deletions(-)
>> 
>> diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
>> index 488bdd5bd922..e65ce3e2b071 100644
>> --- a/arch/powerpc/include/asm/interrupt.h
>> +++ b/arch/powerpc/include/asm/interrupt.h
>> @@ -7,10 +7,16 @@
>>   #include <asm/ftrace.h>
>>   
>>   struct interrupt_state {
>> +#ifdef CONFIG_PPC64
>> +	enum ctx_state ctx_state;
>> +#endif
>>   };
>>   
>>   static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
>>   {
>> +#ifdef CONFIG_PPC64
>> +	state->ctx_state = exception_enter();
>> +#endif
>>   }
>>   
>>   /*
>> @@ -29,6 +35,9 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
>>    */
>>   static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt_state *state)
>>   {
>> +#ifdef CONFIG_PPC64
>> +	exception_exit(state->ctx_state);
>> +#endif
>>   }
>>   
>>   static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
>> diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
>> index da488e62fb5f..21fd14828827 100644
>> --- a/arch/powerpc/kernel/traps.c
>> +++ b/arch/powerpc/kernel/traps.c
>> @@ -1087,41 +1087,28 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(handle_hmi_exception)
>>   
>>   DEFINE_INTERRUPT_HANDLER(unknown_exception)
>>   {
>> -	enum ctx_state prev_state = exception_enter();
>> -
>>   	printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
>>   	       regs->nip, regs->msr, regs->trap);
>>   
>>   	_exception(SIGTRAP, regs, TRAP_UNK, 0);
>> -
>> -	exception_exit(prev_state);
>>   }
>>   
>>   DEFINE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception)
>>   {
>> -	enum ctx_state prev_state = exception_enter();
>> -
>>   	printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
>>   	       regs->nip, regs->msr, regs->trap);
>>   
>>   	_exception(SIGTRAP, regs, TRAP_UNK, 0);
>> -
>> -	exception_exit(prev_state);
>>   }
>>   
>>   DEFINE_INTERRUPT_HANDLER(instruction_breakpoint_exception)
>>   {
>> -	enum ctx_state prev_state = exception_enter();
>> -
>>   	if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5,
>>   					5, SIGTRAP) == NOTIFY_STOP)
>> -		goto bail;
>> +		return;
>>   	if (debugger_iabr_match(regs))
>> -		goto bail;
>> +		return;
>>   	_exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
>> -
>> -bail:
>> -	exception_exit(prev_state);
>>   }
>>   
>>   DEFINE_INTERRUPT_HANDLER(RunModeException)
>> @@ -1131,8 +1118,6 @@ DEFINE_INTERRUPT_HANDLER(RunModeException)
>>   
>>   DEFINE_INTERRUPT_HANDLER(single_step_exception)
>>   {
>> -	enum ctx_state prev_state = exception_enter();
>> -
>>   	clear_single_step(regs);
>>   	clear_br_trace(regs);
>>   
>> @@ -1141,14 +1126,11 @@ DEFINE_INTERRUPT_HANDLER(single_step_exception)
>>   
>>   	if (notify_die(DIE_SSTEP, "single_step", regs, 5,
>>   					5, SIGTRAP) == NOTIFY_STOP)
>> -		goto bail;
>> +		return;
>>   	if (debugger_sstep(regs))
>> -		goto bail;
>> +		return;
>>   
>>   	_exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
>> -
>> -bail:
>> -	exception_exit(prev_state);
>>   }
>>   NOKPROBE_SYMBOL(single_step_exception);
>>   
>> @@ -1476,7 +1458,6 @@ static inline int emulate_math(struct pt_regs *regs) { return -1; }
>>   
>>   DEFINE_INTERRUPT_HANDLER(program_check_exception)
>>   {
>> -	enum ctx_state prev_state = exception_enter();
>>   	unsigned int reason = get_reason(regs);
>>   
>>   	/* We can now get here via a FP Unavailable exception if the core
>> @@ -1485,22 +1466,22 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception)
>>   	if (reason & REASON_FP) {
>>   		/* IEEE FP exception */
>>   		parse_fpe(regs);
>> -		goto bail;
>> +		return;
>>   	}
>>   	if (reason & REASON_TRAP) {
>>   		unsigned long bugaddr;
>>   		/* Debugger is first in line to stop recursive faults in
>>   		 * rcu_lock, notify_die, or atomic_notifier_call_chain */
>>   		if (debugger_bpt(regs))
>> -			goto bail;
>> +			return;
>>   
>>   		if (kprobe_handler(regs))
>> -			goto bail;
>> +			return;
>>   
>>   		/* trap exception */
>>   		if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP)
>>   				== NOTIFY_STOP)
>> -			goto bail;
>> +			return;
>>   
>>   		bugaddr = regs->nip;
>>   		/*
>> @@ -1512,10 +1493,10 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception)
>>   		if (!(regs->msr & MSR_PR) &&  /* not user-mode */
>>   		    report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) {
>>   			regs->nip += 4;
>> -			goto bail;
>> +			return;
>>   		}
>>   		_exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
>> -		goto bail;
>> +		return;
>>   	}
>>   #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
>>   	if (reason & REASON_TM) {
>> @@ -1536,7 +1517,7 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception)
>>   		 */
>>   		if (user_mode(regs)) {
>>   			_exception(SIGILL, regs, ILL_ILLOPN, regs->nip);
>> -			goto bail;
>> +			return;
>>   		} else {
>>   			printk(KERN_EMERG "Unexpected TM Bad Thing exception "
>>   			       "at %lx (msr 0x%lx) tm_scratch=%llx\n",
>> @@ -1567,7 +1548,7 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception)
>>   	 * pattern to occurrences etc. -dgibson 31/Mar/2003
>>   	 */
>>   	if (!emulate_math(regs))
>> -		goto bail;
>> +		return;
>>   
>>   	/* Try to emulate it if we should. */
>>   	if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) {
>> @@ -1575,10 +1556,10 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception)
>>   		case 0:
>>   			regs->nip += 4;
>>   			emulate_single_step(regs);
>> -			goto bail;
>> +			return;
>>   		case -EFAULT:
>>   			_exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
>> -			goto bail;
>> +			return;
>>   		}
>>   	}
>>   
>> @@ -1587,9 +1568,6 @@ DEFINE_INTERRUPT_HANDLER(program_check_exception)
>>   		_exception(SIGILL, regs, ILL_PRVOPC, regs->nip);
>>   	else
>>   		_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
>> -
>> -bail:
>> -	exception_exit(prev_state);
>>   }
>>   NOKPROBE_SYMBOL(program_check_exception);
>>   
>> @@ -1606,14 +1584,12 @@ NOKPROBE_SYMBOL(emulation_assist_interrupt);
>>   
>>   DEFINE_INTERRUPT_HANDLER(alignment_exception)
>>   {
>> -	enum ctx_state prev_state = exception_enter();
>>   	int sig, code, fixed = 0;
>>   	unsigned long  reason;
>>   
>>   	interrupt_cond_local_irq_enable(regs);
>>   
>>   	reason = get_reason(regs);
>> -
>>   	if (reason & REASON_BOUNDARY) {
>>   		sig = SIGBUS;
>>   		code = BUS_ADRALN;
>> @@ -1621,7 +1597,7 @@ DEFINE_INTERRUPT_HANDLER(alignment_exception)
>>   	}
>>   
>>   	if (tm_abort_check(regs, TM_CAUSE_ALIGNMENT | TM_CAUSE_PERSISTENT))
>> -		goto bail;
>> +		return;
>>   
>>   	/* we don't implement logging of alignment exceptions */
>>   	if (!(current->thread.align_ctl & PR_UNALIGN_SIGBUS))
>> @@ -1631,7 +1607,7 @@ DEFINE_INTERRUPT_HANDLER(alignment_exception)
>>   		/* skip over emulated instruction */
>>   		regs->nip += inst_length(reason);
>>   		emulate_single_step(regs);
>> -		goto bail;
>> +		return;
>>   	}
>>   
>>   	/* Operand address was bad */
>> @@ -1647,9 +1623,6 @@ DEFINE_INTERRUPT_HANDLER(alignment_exception)
>>   		_exception(sig, regs, code, regs->dar);
>>   	else
>>   		bad_page_fault(regs, sig);
>> -
>> -bail:
>> -	exception_exit(prev_state);
>>   }
>>   
>>   DEFINE_INTERRUPT_HANDLER(StackOverflow)
>> @@ -1663,41 +1636,28 @@ DEFINE_INTERRUPT_HANDLER(StackOverflow)
>>   
>>   DEFINE_INTERRUPT_HANDLER(stack_overflow_exception)
>>   {
>> -	enum ctx_state prev_state = exception_enter();
>> -
>>   	die("Kernel stack overflow", regs, SIGSEGV);
>> -
>> -	exception_exit(prev_state);
>>   }
>>   
>>   DEFINE_INTERRUPT_HANDLER(kernel_fp_unavailable_exception)
>>   {
>> -	enum ctx_state prev_state = exception_enter();
>> -
>>   	printk(KERN_EMERG "Unrecoverable FP Unavailable Exception "
>>   			  "%lx at %lx\n", regs->trap, regs->nip);
>>   	die("Unrecoverable FP Unavailable Exception", regs, SIGABRT);
>> -
>> -	exception_exit(prev_state);
>>   }
>>   
>>   DEFINE_INTERRUPT_HANDLER(altivec_unavailable_exception)
>>   {
>> -	enum ctx_state prev_state = exception_enter();
>> -
>>   	if (user_mode(regs)) {
>>   		/* A user program has executed an altivec instruction,
>>   		   but this kernel doesn't support altivec. */
>>   		_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
>> -		goto bail;
>> +		return;
>>   	}
>>   
>>   	printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception "
>>   			"%lx at %lx\n", regs->trap, regs->nip);
>>   	die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT);
>> -
>> -bail:
>> -	exception_exit(prev_state);
>>   }
>>   
>>   DEFINE_INTERRUPT_HANDLER(vsx_unavailable_exception)
>> diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
>> index d681dc5a7b1c..fb7c10524bcd 100644
>> --- a/arch/powerpc/mm/book3s64/hash_utils.c
>> +++ b/arch/powerpc/mm/book3s64/hash_utils.c
>> @@ -1514,7 +1514,6 @@ EXPORT_SYMBOL_GPL(hash_page);
>>   DECLARE_INTERRUPT_HANDLER_RET(__do_hash_fault);
>>   DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
>>   {
>> -	enum ctx_state prev_state = exception_enter();
>>   	unsigned long ea = regs->dar;
>>   	unsigned long dsisr = regs->dsisr;
>>   	unsigned long access = _PAGE_PRESENT | _PAGE_READ;
>> @@ -1563,8 +1562,6 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
>>   		err = 0;
>>   	}
>>   
>> -	exception_exit(prev_state);
>> -
>>   	return err;
>>   }
>>   
>> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
>> index 9c4220efc20f..b26a7643fc6e 100644
>> --- a/arch/powerpc/mm/fault.c
>> +++ b/arch/powerpc/mm/fault.c
>> @@ -564,14 +564,7 @@ NOKPROBE_SYMBOL(__do_page_fault);
>>   
>>   DEFINE_INTERRUPT_HANDLER_RET(do_page_fault)
>>   {
>> -	enum ctx_state prev_state = exception_enter();
>> -	long err;
>> -
>> -	err = __do_page_fault(regs);
>> -
>> -	exception_exit(prev_state);
>> -
>> -	return err;
>> +	return __do_page_fault(regs);
>>   }
>>   NOKPROBE_SYMBOL(do_page_fault);
>>   
>> 
> 

^ permalink raw reply

* Re: [RFC PATCH v3 0/6] Restricted DMA
From: Claire Chang @ 2021-02-09  6:27 UTC (permalink / raw)
  To: Florian Fainelli
  Cc: heikki.krogerus, peterz, grant.likely, paulus, Frank Rowand,
	mingo, Marek Szyprowski, sstabellini, Saravana Kannan,
	Joerg Roedel, Rafael J . Wysocki, Christoph Hellwig,
	Bartosz Golaszewski, xen-devel, Thierry Reding, linux-devicetree,
	Will Deacon, Konrad Rzeszutek Wilk, Dan Williams, linuxppc-dev,
	Rob Herring, boris.ostrovsky, Andy Shevchenko, jgross,
	Nicolas Boichat, Greg KH, Randy Dunlap, lkml, Tomasz Figa,
	list@263.net:IOMMU DRIVERS, Jim Quinlan, xypron.glpk,
	Robin Murphy, bauerman
In-Reply-To: <7fe99ad2-79a7-9c8b-65ce-ce8353e9d9bf@gmail.com>

v4 here: https://lore.kernel.org/patchwork/cover/1378113/

^ permalink raw reply

* [PATCH v4 14/14] of: Add plumbing for restricted DMA pool
From: Claire Chang @ 2021-02-09  6:21 UTC (permalink / raw)
  To: Rob Herring, mpe, Joerg Roedel, Will Deacon, Frank Rowand,
	Konrad Rzeszutek Wilk, boris.ostrovsky, jgross, Christoph Hellwig,
	Marek Szyprowski
  Cc: heikki.krogerus, peterz, grant.likely, paulus, mingo, sstabellini,
	Saravana Kannan, xypron.glpk, Rafael J . Wysocki,
	Bartosz Golaszewski, xen-devel, Thierry Reding, linux-devicetree,
	linuxppc-dev, Nicolas Boichat, Claire Chang, Dan Williams,
	Andy Shevchenko, Greg KH, Randy Dunlap, lkml,
	list@263.net:IOMMU DRIVERS, Jim Quinlan, Robin Murphy, bauerman
In-Reply-To: <20210209062131.2300005-1-tientzu@chromium.org>

If a device is not behind an IOMMU, we look up the device node and set
up the restricted DMA when the restricted-dma-pool is presented.

Signed-off-by: Claire Chang <tientzu@chromium.org>
---
 drivers/of/address.c    | 25 +++++++++++++++++++++++++
 drivers/of/device.c     |  3 +++
 drivers/of/of_private.h |  5 +++++
 3 files changed, 33 insertions(+)

diff --git a/drivers/of/address.c b/drivers/of/address.c
index 73ddf2540f3f..b6093c9b135d 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -8,6 +8,7 @@
 #include <linux/logic_pio.h>
 #include <linux/module.h>
 #include <linux/of_address.h>
+#include <linux/of_reserved_mem.h>
 #include <linux/pci.h>
 #include <linux/pci_regs.h>
 #include <linux/sizes.h>
@@ -1094,3 +1095,27 @@ bool of_dma_is_coherent(struct device_node *np)
 	return false;
 }
 EXPORT_SYMBOL_GPL(of_dma_is_coherent);
+
+int of_dma_set_restricted_buffer(struct device *dev)
+{
+	struct device_node *node;
+	int count, i;
+
+	if (!dev->of_node)
+		return 0;
+
+	count = of_property_count_elems_of_size(dev->of_node, "memory-region",
+						sizeof(phandle));
+	for (i = 0; i < count; i++) {
+		node = of_parse_phandle(dev->of_node, "memory-region", i);
+		/* There might be multiple memory regions, but only one
+		 * restriced-dma-pool region is allowed.
+		 */
+		if (of_device_is_compatible(node, "restricted-dma-pool") &&
+		    of_device_is_available(node))
+			return of_reserved_mem_device_init_by_idx(
+				dev, dev->of_node, i);
+	}
+
+	return 0;
+}
diff --git a/drivers/of/device.c b/drivers/of/device.c
index 1122daa8e273..38c631f1fafa 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -186,6 +186,9 @@ int of_dma_configure_id(struct device *dev, struct device_node *np,
 
 	arch_setup_dma_ops(dev, dma_start, size, iommu, coherent);
 
+	if (!iommu)
+		return of_dma_set_restricted_buffer(dev);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(of_dma_configure_id);
diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h
index d9e6a324de0a..28a2dfa197ba 100644
--- a/drivers/of/of_private.h
+++ b/drivers/of/of_private.h
@@ -161,12 +161,17 @@ struct bus_dma_region;
 #if defined(CONFIG_OF_ADDRESS) && defined(CONFIG_HAS_DMA)
 int of_dma_get_range(struct device_node *np,
 		const struct bus_dma_region **map);
+int of_dma_set_restricted_buffer(struct device *dev);
 #else
 static inline int of_dma_get_range(struct device_node *np,
 		const struct bus_dma_region **map)
 {
 	return -ENODEV;
 }
+static inline int of_dma_get_restricted_buffer(struct device *dev)
+{
+	return -ENODEV;
+}
 #endif
 
 #endif /* _LINUX_OF_PRIVATE_H */
-- 
2.30.0.478.g8a0d178c01-goog


^ permalink raw reply related

* [PATCH v4 13/14] dt-bindings: of: Add restricted DMA pool
From: Claire Chang @ 2021-02-09  6:21 UTC (permalink / raw)
  To: Rob Herring, mpe, Joerg Roedel, Will Deacon, Frank Rowand,
	Konrad Rzeszutek Wilk, boris.ostrovsky, jgross, Christoph Hellwig,
	Marek Szyprowski
  Cc: heikki.krogerus, peterz, grant.likely, paulus, mingo, sstabellini,
	Saravana Kannan, xypron.glpk, Rafael J . Wysocki,
	Bartosz Golaszewski, xen-devel, Thierry Reding, linux-devicetree,
	linuxppc-dev, Nicolas Boichat, Claire Chang, Dan Williams,
	Andy Shevchenko, Greg KH, Randy Dunlap, lkml,
	list@263.net:IOMMU DRIVERS, Jim Quinlan, Robin Murphy, bauerman
In-Reply-To: <20210209062131.2300005-1-tientzu@chromium.org>

Introduce the new compatible string, restricted-dma-pool, for restricted
DMA. One can specify the address and length of the restricted DMA memory
region by restricted-dma-pool in the reserved-memory node.

Signed-off-by: Claire Chang <tientzu@chromium.org>
---
 .../reserved-memory/reserved-memory.txt       | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
index e8d3096d922c..fc9a12c2f679 100644
--- a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
+++ b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
@@ -51,6 +51,20 @@ compatible (optional) - standard definition
           used as a shared pool of DMA buffers for a set of devices. It can
           be used by an operating system to instantiate the necessary pool
           management subsystem if necessary.
+        - restricted-dma-pool: This indicates a region of memory meant to be
+          used as a pool of restricted DMA buffers for a set of devices. The
+          memory region would be the only region accessible to those devices.
+          When using this, the no-map and reusable properties must not be set,
+          so the operating system can create a virtual mapping that will be used
+          for synchronization. The main purpose for restricted DMA is to
+          mitigate the lack of DMA access control on systems without an IOMMU,
+          which could result in the DMA accessing the system memory at
+          unexpected times and/or unexpected addresses, possibly leading to data
+          leakage or corruption. The feature on its own provides a basic level
+          of protection against the DMA overwriting buffer contents at
+          unexpected times. However, to protect against general data leakage and
+          system memory corruption, the system needs to provide way to lock down
+          the memory access, e.g., MPU.
         - vendor specific string in the form <vendor>,[<device>-]<usage>
 no-map (optional) - empty property
     - Indicates the operating system must not create a virtual mapping
@@ -120,6 +134,11 @@ one for multimedia processing (named multimedia-memory@77000000, 64MiB).
 			compatible = "acme,multimedia-memory";
 			reg = <0x77000000 0x4000000>;
 		};
+
+		restricted_dma_mem_reserved: restricted_dma_mem_reserved {
+			compatible = "restricted-dma-pool";
+			reg = <0x50000000 0x400000>;
+		};
 	};
 
 	/* ... */
@@ -138,4 +157,9 @@ one for multimedia processing (named multimedia-memory@77000000, 64MiB).
 		memory-region = <&multimedia_reserved>;
 		/* ... */
 	};
+
+	pcie_device: pcie_device@0,0 {
+		memory-region = <&restricted_dma_mem_reserved>;
+		/* ... */
+	};
 };
-- 
2.30.0.478.g8a0d178c01-goog


^ permalink raw reply related

* [PATCH v4 12/14] swiotlb: Add restricted DMA alloc/free support.
From: Claire Chang @ 2021-02-09  6:21 UTC (permalink / raw)
  To: Rob Herring, mpe, Joerg Roedel, Will Deacon, Frank Rowand,
	Konrad Rzeszutek Wilk, boris.ostrovsky, jgross, Christoph Hellwig,
	Marek Szyprowski
  Cc: heikki.krogerus, peterz, grant.likely, paulus, mingo, sstabellini,
	Saravana Kannan, xypron.glpk, Rafael J . Wysocki,
	Bartosz Golaszewski, xen-devel, Thierry Reding, linux-devicetree,
	linuxppc-dev, Nicolas Boichat, Claire Chang, Dan Williams,
	Andy Shevchenko, Greg KH, Randy Dunlap, lkml,
	list@263.net:IOMMU DRIVERS, Jim Quinlan, Robin Murphy, bauerman
In-Reply-To: <20210209062131.2300005-1-tientzu@chromium.org>

Add the functions, dev_swiotlb_{alloc,free} to support the memory
allocation from restricted DMA pool.

Signed-off-by: Claire Chang <tientzu@chromium.org>
---
 include/linux/swiotlb.h |  2 ++
 kernel/dma/direct.c     | 30 ++++++++++++++++++++++--------
 kernel/dma/swiotlb.c    | 34 ++++++++++++++++++++++++++++++++++
 3 files changed, 58 insertions(+), 8 deletions(-)

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index b9f2a250c8da..2cd39e102915 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -74,6 +74,8 @@ extern enum swiotlb_force swiotlb_force;
 #ifdef CONFIG_DMA_RESTRICTED_POOL
 bool is_swiotlb_force(struct device *dev);
 bool is_dev_swiotlb_force(struct device *dev);
+struct page *dev_swiotlb_alloc(struct device *dev, size_t size, gfp_t gfp);
+bool dev_swiotlb_free(struct device *dev, struct page *page, size_t size);
 #else
 static inline bool is_swiotlb_force(struct device *dev)
 {
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index a76a1a2f24da..f9a9321f7559 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -12,6 +12,7 @@
 #include <linux/pfn.h>
 #include <linux/vmalloc.h>
 #include <linux/set_memory.h>
+#include <linux/swiotlb.h>
 #include <linux/slab.h>
 #include "direct.h"
 
@@ -77,6 +78,10 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
 
 static void __dma_direct_free_pages(struct device *dev, struct page *page, size_t size)
 {
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+	if (dev_swiotlb_free(dev, page, size))
+		return;
+#endif
 	dma_free_contiguous(dev, page, size);
 }
 
@@ -89,6 +94,12 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
 
 	WARN_ON_ONCE(!PAGE_ALIGNED(size));
 
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+	page = dev_swiotlb_alloc(dev, size, gfp);
+	if (page)
+		return page;
+#endif
+
 	gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
 					   &phys_limit);
 	page = dma_alloc_contiguous(dev, size, gfp);
@@ -147,7 +158,7 @@ void *dma_direct_alloc(struct device *dev, size_t size,
 		gfp |= __GFP_NOWARN;
 
 	if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
-	    !force_dma_unencrypted(dev)) {
+	    !force_dma_unencrypted(dev) && !is_dev_swiotlb_force(dev)) {
 		page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO);
 		if (!page)
 			return NULL;
@@ -160,8 +171,8 @@ void *dma_direct_alloc(struct device *dev, size_t size,
 	}
 
 	if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) &&
-	    !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
-	    !dev_is_dma_coherent(dev))
+	    !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && !dev_is_dma_coherent(dev) &&
+	    !is_dev_swiotlb_force(dev))
 		return arch_dma_alloc(dev, size, dma_handle, gfp, attrs);
 
 	/*
@@ -171,7 +182,9 @@ void *dma_direct_alloc(struct device *dev, size_t size,
 	if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
 	    !gfpflags_allow_blocking(gfp) &&
 	    (force_dma_unencrypted(dev) ||
-	     (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && !dev_is_dma_coherent(dev))))
+	     (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
+	      !dev_is_dma_coherent(dev))) &&
+	    !is_dev_swiotlb_force(dev))
 		return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp);
 
 	/* we always manually zero the memory once we are done */
@@ -252,15 +265,15 @@ void dma_direct_free(struct device *dev, size_t size,
 	unsigned int page_order = get_order(size);
 
 	if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
-	    !force_dma_unencrypted(dev)) {
+	    !force_dma_unencrypted(dev) && !is_dev_swiotlb_force(dev)) {
 		/* cpu_addr is a struct page cookie, not a kernel address */
 		dma_free_contiguous(dev, cpu_addr, size);
 		return;
 	}
 
 	if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) &&
-	    !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
-	    !dev_is_dma_coherent(dev)) {
+	    !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && !dev_is_dma_coherent(dev) &&
+	    !is_dev_swiotlb_force(dev)) {
 		arch_dma_free(dev, size, cpu_addr, dma_addr, attrs);
 		return;
 	}
@@ -288,7 +301,8 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size,
 	void *ret;
 
 	if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
-	    force_dma_unencrypted(dev) && !gfpflags_allow_blocking(gfp))
+	    force_dma_unencrypted(dev) && !gfpflags_allow_blocking(gfp) &&
+	    !is_dev_swiotlb_force(dev))
 		return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp);
 
 	page = __dma_direct_alloc_pages(dev, size, gfp);
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index fd9c1bd183ac..8b77fd64199e 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -836,6 +836,40 @@ late_initcall(swiotlb_create_default_debugfs);
 #endif
 
 #ifdef CONFIG_DMA_RESTRICTED_POOL
+struct page *dev_swiotlb_alloc(struct device *dev, size_t size, gfp_t gfp)
+{
+	struct swiotlb *swiotlb;
+	phys_addr_t tlb_addr;
+	unsigned int index;
+
+	/* dev_swiotlb_alloc can be used only in the context which permits sleeping. */
+	if (!dev->dev_swiotlb || !gfpflags_allow_blocking(gfp))
+		return NULL;
+
+	swiotlb = dev->dev_swiotlb;
+	index = swiotlb_tbl_find_free_region(dev, swiotlb->start, size, 0);
+	if (index < 0)
+		return NULL;
+
+	tlb_addr = swiotlb->start + (index << IO_TLB_SHIFT);
+
+	return pfn_to_page(PFN_DOWN(tlb_addr));
+}
+
+bool dev_swiotlb_free(struct device *dev, struct page *page, size_t size)
+{
+	unsigned int index;
+	phys_addr_t tlb_addr = page_to_phys(page);
+
+	if (!is_swiotlb_buffer(dev, tlb_addr))
+		return false;
+
+	index = (tlb_addr - dev->dev_swiotlb->start) >> IO_TLB_SHIFT;
+	swiotlb_tbl_release_region(dev, index, size);
+
+	return true;
+}
+
 bool is_swiotlb_force(struct device *dev)
 {
 	return unlikely(swiotlb_force == SWIOTLB_FORCE) || dev->dev_swiotlb;
-- 
2.30.0.478.g8a0d178c01-goog


^ permalink raw reply related

* [PATCH v4 11/14] swiotlb: Add is_dev_swiotlb_force()
From: Claire Chang @ 2021-02-09  6:21 UTC (permalink / raw)
  To: Rob Herring, mpe, Joerg Roedel, Will Deacon, Frank Rowand,
	Konrad Rzeszutek Wilk, boris.ostrovsky, jgross, Christoph Hellwig,
	Marek Szyprowski
  Cc: heikki.krogerus, peterz, grant.likely, paulus, mingo, sstabellini,
	Saravana Kannan, xypron.glpk, Rafael J . Wysocki,
	Bartosz Golaszewski, xen-devel, Thierry Reding, linux-devicetree,
	linuxppc-dev, Nicolas Boichat, Claire Chang, Dan Williams,
	Andy Shevchenko, Greg KH, Randy Dunlap, lkml,
	list@263.net:IOMMU DRIVERS, Jim Quinlan, Robin Murphy, bauerman
In-Reply-To: <20210209062131.2300005-1-tientzu@chromium.org>

Add is_dev_swiotlb_force() which returns true if the device has
restricted DMA pool (e.g. dev->dev_swiotlb is set).

Signed-off-by: Claire Chang <tientzu@chromium.org>
---
 include/linux/swiotlb.h | 9 +++++++++
 kernel/dma/swiotlb.c    | 5 +++++
 2 files changed, 14 insertions(+)

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 76f86c684524..b9f2a250c8da 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -73,11 +73,16 @@ extern enum swiotlb_force swiotlb_force;
 
 #ifdef CONFIG_DMA_RESTRICTED_POOL
 bool is_swiotlb_force(struct device *dev);
+bool is_dev_swiotlb_force(struct device *dev);
 #else
 static inline bool is_swiotlb_force(struct device *dev)
 {
 	return unlikely(swiotlb_force == SWIOTLB_FORCE);
 }
+static inline bool is_dev_swiotlb_force(struct device *dev)
+{
+	return false;
+}
 #endif /* CONFIG_DMA_RESTRICTED_POOL */
 
 bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr);
@@ -93,6 +98,10 @@ static inline bool is_swiotlb_force(struct device *dev)
 {
 	return false;
 }
+static inline bool is_dev_swiotlb_force(struct device *dev)
+{
+	return false;
+}
 static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
 {
 	return false;
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index f64cbe6e84cc..fd9c1bd183ac 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -841,6 +841,11 @@ bool is_swiotlb_force(struct device *dev)
 	return unlikely(swiotlb_force == SWIOTLB_FORCE) || dev->dev_swiotlb;
 }
 
+bool is_dev_swiotlb_force(struct device *dev)
+{
+	return dev->dev_swiotlb;
+}
+
 static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
 				    struct device *dev)
 {
-- 
2.30.0.478.g8a0d178c01-goog


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox