LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH v1 34/41] powerpc/32: Set current->thread.regs in C interrupt entry
From: Christophe Leroy @ 2021-02-09  9:56 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1612864003.git.christophe.leroy@csgroup.eu>

No need to do that is assembly, do it in C.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/interrupt.h | 4 +++-
 arch/powerpc/kernel/entry_32.S       | 3 +--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index d70c761edc00..550ad1d69a7b 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -20,8 +20,10 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
 	if (regs->msr & MSR_EE)
 		trace_hardirqs_off();
 
-	if (user_mode(regs))
+	if (user_mode(regs)) {
+		current->thread.regs = regs;
 		account_cpu_user_entry();
+	}
 #endif
 	/*
 	 * Book3E reconciles irq soft mask in asm
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index fd24520b2e0c..71ef5ab84b03 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -59,8 +59,7 @@
 prepare_transfer_to_handler:
 	andi.	r0,r9,MSR_PR
 	addi	r12, r2, THREAD
-	beq	2f			/* if from user, fix up THREAD.regs */
-	stw	r3,PT_REGS(r12)
+	beq	2f
 #ifdef CONFIG_PPC_BOOK3S_32
 	kuep_lock r11, r12
 #endif
-- 
2.25.0


^ permalink raw reply related

* [RFC PATCH v1 36/41] powerpc/32: Only use prepare_transfer_to_handler function on book3s/32 and e500
From: Christophe Leroy @ 2021-02-09  9:57 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1612864003.git.christophe.leroy@csgroup.eu>

Only book3s/32 and e500 have significative work to do in
prepare_transfer_to_handler.

Other 32 bit have nothing to do at all.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/kernel/entry_32.S   | 6 ++----
 arch/powerpc/kernel/head_32.h    | 2 ++
 arch/powerpc/kernel/head_booke.h | 2 ++
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 70bc18f18f1a..544a9a2270ff 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -55,6 +55,7 @@
  * Note that we rely on the caller having set cr0.eq iff the exception
  * occurred in kernel mode (i.e. MSR:PR = 0).
  */
+#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
 	.globl	prepare_transfer_to_handler
 prepare_transfer_to_handler:
 	andi.	r0,r9,MSR_PR
@@ -69,15 +70,12 @@ prepare_transfer_to_handler:
          * check for stack overflow
          */
 	kuap_save_and_lock r11, r12, r9, r5, r6
-#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
 	lwz	r12,TI_LOCAL_FLAGS(r2)
 	mtcrf	0x01,r12
 	bt-	31-TLF_NAPPING,4f
 	bt-	31-TLF_SLEEPING,7f
-#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */
 	blr
 
-#if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
 4:	rlwinm	r12,r12,0,~_TLF_NAPPING
 	stw	r12,TI_LOCAL_FLAGS(r2)
 	b	power_save_ppc32_restore
@@ -88,9 +86,9 @@ prepare_transfer_to_handler:
 	rlwinm	r9,r9,0,~MSR_EE
 	stw	r9,_MSR(r11)
 	b	fast_exception_return
-#endif
 _ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler)
 _ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont)
+#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */
 
 	.globl	transfer_to_syscall
 transfer_to_syscall:
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index b29c13221baa..e994ee3575d2 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -132,7 +132,9 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
 .endm
 
 .macro prepare_transfer_to_handler
+#ifdef CONFIG_PPC_BOOK3S_32
 	bl	prepare_transfer_to_handler
+#endif
 .endm
 
 .macro SYSCALL_ENTRY trapno
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 7e47b5dfdd9c..750ffc0915df 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -92,7 +92,9 @@ END_BTB_FLUSH_SECTION
 .end
 
 .macro prepare_transfer_to_handler
+#ifdef CONFIG_E500
 	bl	prepare_transfer_to_handler
+#endif
 .endm
 
 .macro SYSCALL_ENTRY trapno intno srr1
-- 
2.25.0


^ permalink raw reply related

* [RFC PATCH v1 37/41] powerpc/32s: Move KUEP locking/unlocking in C
From: Christophe Leroy @ 2021-02-09  9:57 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1612864003.git.christophe.leroy@csgroup.eu>

This can be done in C, do it.

Unrolling the loop gains approx. 15% performance.

From now on, prepare_transfer_to_handler() is only for
interrupts from kernel.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/book3s/32/kup.h | 31 -------------------
 arch/powerpc/include/asm/interrupt.h     |  3 ++
 arch/powerpc/include/asm/kup.h           |  8 +++++
 arch/powerpc/kernel/entry_32.S           | 17 ++---------
 arch/powerpc/kernel/interrupt.c          |  4 +++
 arch/powerpc/mm/book3s32/Makefile        |  1 +
 arch/powerpc/mm/book3s32/kuep.c          | 38 ++++++++++++++++++++++++
 7 files changed, 56 insertions(+), 46 deletions(-)
 create mode 100644 arch/powerpc/mm/book3s32/kuep.c

diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
index 73bc5d2c431d..b97ea60f6fa3 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -7,37 +7,6 @@
 
 #ifdef __ASSEMBLY__
 
-.macro kuep_update_sr	gpr1, gpr2		/* NEVER use r0 as gpr2 due to addis */
-101:	mtsrin	\gpr1, \gpr2
-	addi	\gpr1, \gpr1, 0x111		/* next VSID */
-	rlwinm	\gpr1, \gpr1, 0, 0xf0ffffff	/* clear VSID overflow */
-	addis	\gpr2, \gpr2, 0x1000		/* address of next segment */
-	bdnz	101b
-	isync
-.endm
-
-.macro kuep_lock	gpr1, gpr2
-#ifdef CONFIG_PPC_KUEP
-	li	\gpr1, NUM_USER_SEGMENTS
-	li	\gpr2, 0
-	mtctr	\gpr1
-	mfsrin	\gpr1, \gpr2
-	oris	\gpr1, \gpr1, SR_NX@h		/* set Nx */
-	kuep_update_sr \gpr1, \gpr2
-#endif
-.endm
-
-.macro kuep_unlock	gpr1, gpr2
-#ifdef CONFIG_PPC_KUEP
-	li	\gpr1, NUM_USER_SEGMENTS
-	li	\gpr2, 0
-	mtctr	\gpr1
-	mfsrin	\gpr1, \gpr2
-	rlwinm	\gpr1, \gpr1, 0, ~SR_NX		/* Clear Nx */
-	kuep_update_sr \gpr1, \gpr2
-#endif
-.endm
-
 #ifdef CONFIG_PPC_KUAP
 
 .macro kuap_update_sr	gpr1, gpr2, gpr3	/* NEVER use r0 as gpr2 due to addis */
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 550ad1d69a7b..e5e04e962165 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -21,6 +21,7 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
 		trace_hardirqs_off();
 
 	if (user_mode(regs)) {
+		kuep_lock();
 		current->thread.regs = regs;
 		account_cpu_user_entry();
 	}
@@ -83,6 +84,8 @@ static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt
 	exception_exit(state->ctx_state);
 #endif
 
+	if (user_mode(regs))
+		kuep_unlock();
 	/*
 	 * Book3S exits to user via interrupt_exit_user_prepare(), which does
 	 * context tracking, which is a cleaner way to handle PREEMPT=y
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index 7ec21af49a45..25671f711ec2 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -55,6 +55,14 @@ void setup_kuep(bool disabled);
 static inline void setup_kuep(bool disabled) { }
 #endif /* CONFIG_PPC_KUEP */
 
+#if defined(CONFIG_PPC_KUEP) && defined(CONFIG_PPC_BOOK3S_32)
+void kuep_lock(void);
+void kuep_unlock(void);
+#else
+static inline void kuep_lock(void) { }
+static inline void kuep_unlock(void) { }
+#endif
+
 #ifdef CONFIG_PPC_KUAP
 void setup_kuap(bool disabled);
 #else
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 544a9a2270ff..3ae790a362d1 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -60,13 +60,9 @@
 prepare_transfer_to_handler:
 	andi.	r0,r9,MSR_PR
 	addi	r12, r2, THREAD
-	beq	2f
-#ifdef CONFIG_PPC_BOOK3S_32
-	kuep_lock r11, r12
-#endif
-	blr
+	bnelr
 
-2:	/* if from kernel, check interrupted DOZE/NAP mode and
+	/* if from kernel, check interrupted DOZE/NAP mode and
          * check for stack overflow
          */
 	kuap_save_and_lock r11, r12, r9, r5, r6
@@ -93,9 +89,6 @@ _ASM_NOKPROBE_SYMBOL(transfer_to_handler_cont)
 	.globl	transfer_to_syscall
 transfer_to_syscall:
 	SAVE_NVGPRS(r1)
-#ifdef CONFIG_PPC_BOOK3S_32
-	kuep_lock r11, r12
-#endif
 
 	/* Calling convention has r9 = orig r0, r10 = regs */
 	addi	r10,r1,STACK_FRAME_OVERHEAD
@@ -112,9 +105,6 @@ ret_from_syscall:
 	cmplwi	cr0,r5,0
 	bne-	2f
 #endif /* CONFIG_PPC_47x */
-#ifdef CONFIG_PPC_BOOK3S_32
-	kuep_unlock r5, r7
-#endif
 	kuap_check r2, r4
 	lwz	r4,_LINK(r1)
 	lwz	r5,_CCR(r1)
@@ -295,9 +285,6 @@ interrupt_return:
 	bne-	.Lrestore_nvgprs
 
 .Lfast_user_interrupt_return:
-#ifdef CONFIG_PPC_BOOK3S_32
-	kuep_unlock	r10, r11
-#endif
 	kuap_check r2, r4
 	lwz	r11,_NIP(r1)
 	lwz	r12,_MSR(r1)
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 6111acf61373..3c2e9b28b05d 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -30,6 +30,8 @@ notrace long system_call_exception(long r3, long r4, long r5,
 	syscall_fn f;
 	unsigned long expected_msr;
 
+	kuep_lock();
+
 	regs->orig_gpr3 = r3;
 
 	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
@@ -355,6 +357,8 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
 	 */
 	kuap_user_restore(regs);
 #endif
+	kuep_unlock();
+
 	return ret;
 }
 
diff --git a/arch/powerpc/mm/book3s32/Makefile b/arch/powerpc/mm/book3s32/Makefile
index 446d9de88ce4..7f0c8a78ba0c 100644
--- a/arch/powerpc/mm/book3s32/Makefile
+++ b/arch/powerpc/mm/book3s32/Makefile
@@ -9,3 +9,4 @@ endif
 obj-y += mmu.o mmu_context.o
 obj-$(CONFIG_PPC_BOOK3S_603) += nohash_low.o
 obj-$(CONFIG_PPC_BOOK3S_604) += hash_low.o tlb.o
+obj-$(CONFIG_PPC_KUEP) += kuep.o
diff --git a/arch/powerpc/mm/book3s32/kuep.c b/arch/powerpc/mm/book3s32/kuep.c
new file mode 100644
index 000000000000..7b3e4c57aaf9
--- /dev/null
+++ b/arch/powerpc/mm/book3s32/kuep.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <asm/reg.h>
+#include <asm/task_size_32.h>
+#include <asm/mmu.h>
+
+#define KUEP_UPDATE_TWO_USER_SEGMENTS(n) do { 		\
+	if (TASK_SIZE > (n << 28))			\
+		mtsr(val1, n << 28);			\
+	if (TASK_SIZE > ((n + 1) << 28))		\
+		mtsr(val2, (n + 1) << 28);		\
+	val1 = (val1 + 0x222) & 0xf0ffffff;		\
+	val2 = (val2 + 0x222) & 0xf0ffffff;		\
+} while (0)
+
+static __always_inline void kuep_update(u32 val)
+{
+	int val1 = val;
+	int val2 = (val + 0x111) & 0xf0ffffff;
+
+	KUEP_UPDATE_TWO_USER_SEGMENTS(0);
+	KUEP_UPDATE_TWO_USER_SEGMENTS(2);
+	KUEP_UPDATE_TWO_USER_SEGMENTS(4);
+	KUEP_UPDATE_TWO_USER_SEGMENTS(6);
+	KUEP_UPDATE_TWO_USER_SEGMENTS(8);
+	KUEP_UPDATE_TWO_USER_SEGMENTS(10);
+	KUEP_UPDATE_TWO_USER_SEGMENTS(12);
+	KUEP_UPDATE_TWO_USER_SEGMENTS(14);
+}
+
+void kuep_lock(void)
+{
+	kuep_update(mfsr(0) | SR_NX);
+}
+
+void kuep_unlock(void)
+{
+	kuep_update(mfsr(0) & ~SR_NX);
+}
-- 
2.25.0


^ permalink raw reply related

* [RFC PATCH v1 38/41] powerpc/64s: Make kuap_check_amr() and kuap_get_and_check_amr() generic
From: Christophe Leroy @ 2021-02-09  9:57 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1612864003.git.christophe.leroy@csgroup.eu>

In preparation of porting powerpc32 to C syscall entry/exit,
rename kuap_check_amr() and kuap_get_and_check_amr() as kuap_check()
and kuap_get_and_check(), and move in the generic asm/kup.h the stub
for when CONFIG_PPC_KUAP is not selected.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/book3s/64/kup.h | 24 ++----------------------
 arch/powerpc/include/asm/kup.h           | 10 +++++++++-
 arch/powerpc/kernel/interrupt.c          | 12 ++++++------
 3 files changed, 17 insertions(+), 29 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h
index 8bd905050896..d9b07e9998be 100644
--- a/arch/powerpc/include/asm/book3s/64/kup.h
+++ b/arch/powerpc/include/asm/book3s/64/kup.h
@@ -287,7 +287,7 @@ static inline void kuap_kernel_restore(struct pt_regs *regs,
 	 */
 }
 
-static inline unsigned long kuap_get_and_check_amr(void)
+static inline unsigned long kuap_get_and_check(void)
 {
 	if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) {
 		unsigned long amr = mfspr(SPRN_AMR);
@@ -298,27 +298,7 @@ static inline unsigned long kuap_get_and_check_amr(void)
 	return 0;
 }
 
-#else /* CONFIG_PPC_PKEY */
-
-static inline void kuap_user_restore(struct pt_regs *regs)
-{
-}
-
-static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long amr)
-{
-}
-
-static inline unsigned long kuap_get_and_check_amr(void)
-{
-	return 0;
-}
-
-#endif /* CONFIG_PPC_PKEY */
-
-
-#ifdef CONFIG_PPC_KUAP
-
-static inline void kuap_check_amr(void)
+static inline void kuap_check(void)
 {
 	if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && mmu_has_feature(MMU_FTR_BOOK3S_KUAP))
 		WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED);
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index 25671f711ec2..b7efa46b3109 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -74,7 +74,15 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
 	return false;
 }
 
-static inline void kuap_check_amr(void) { }
+static inline void kuap_check(void) { }
+static inline void kuap_save_and_lock(struct pt_regs *regs) { }
+static inline void kuap_user_restore(struct pt_regs *regs) { }
+static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) { }
+
+static inline unsigned long kuap_get_and_check(void)
+{
+	return 0;
+}
 
 /*
  * book3s/64/kup-radix.h defines these functions for the !KUAP case to flush
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 3c2e9b28b05d..8180ed261a27 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -75,7 +75,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
 	} else
 #endif
 #ifdef CONFIG_PPC64
-		kuap_check_amr();
+		kuap_check();
 #endif
 
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
@@ -241,7 +241,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
 	CT_WARN_ON(ct_state() == CONTEXT_USER);
 
 #ifdef CONFIG_PPC64
-	kuap_check_amr();
+	kuap_check();
 #endif
 
 	regs->result = r3;
@@ -381,7 +381,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned
 	 * AMR can only have been unlocked if we interrupted the kernel.
 	 */
 #ifdef CONFIG_PPC64
-	kuap_check_amr();
+	kuap_check();
 #endif
 
 	local_irq_save(flags);
@@ -467,7 +467,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
 	unsigned long flags;
 	unsigned long ret = 0;
 #ifdef CONFIG_PPC64
-	unsigned long amr;
+	unsigned long kuap;
 #endif
 
 	if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) &&
@@ -483,7 +483,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
 		CT_WARN_ON(ct_state() == CONTEXT_USER);
 
 #ifdef CONFIG_PPC64
-	amr = kuap_get_and_check_amr();
+	kuap = kuap_get_and_check();
 #endif
 
 	if (unlikely(current_thread_info()->flags & _TIF_EMULATE_STACK_STORE)) {
@@ -527,7 +527,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
 	 * value from the check above.
 	 */
 #ifdef CONFIG_PPC64
-	kuap_kernel_restore(regs, amr);
+	kuap_kernel_restore(regs, kuap);
 #endif
 
 	return ret;
-- 
2.25.0


^ permalink raw reply related

* [RFC PATCH v1 39/41] powerpc/32s: Create C version of kuap save/restore/check helpers
From: Christophe Leroy @ 2021-02-09  9:57 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1612864003.git.christophe.leroy@csgroup.eu>

In preparation of porting PPC32 to C syscall entry/exit,
create C version of kuap_save_and_lock() and kuap_user_restore() and
kuap_kernel_restore() and kuap_check() and kuap_get_and_check()
on book3s/32.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/book3s/32/kup.h | 45 ++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
index b97ea60f6fa3..c9d6c28bcd10 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -72,6 +72,51 @@ static inline void kuap_update_sr(u32 sr, u32 addr, u32 end)
 	isync();	/* Context sync required after mtsr() */
 }
 
+static inline void kuap_save_and_lock(struct pt_regs *regs)
+{
+	unsigned long kuap = current->thread.kuap;
+	u32 addr = kuap & 0xf0000000;
+	u32 end = kuap << 28;
+
+	regs->kuap = kuap;
+	if (unlikely(!kuap))
+		return;
+
+	current->thread.kuap = 0;
+	kuap_update_sr(mfsr(addr) | SR_KS, addr, end);	/* Set Ks */
+}
+
+static inline void kuap_user_restore(struct pt_regs *regs)
+{
+}
+
+static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
+{
+	u32 addr = regs->kuap & 0xf0000000;
+	u32 end = regs->kuap << 28;
+
+	current->thread.kuap = regs->kuap;
+
+	if (unlikely(regs->kuap == kuap))
+		return;
+
+	kuap_update_sr(mfsr(addr) & ~SR_KS, addr, end);	/* Clear Ks */
+}
+
+static inline unsigned long kuap_get_and_check(void)
+{
+	unsigned long kuap = current->thread.kuap;
+
+	WARN_ON_ONCE(IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && kuap != 0);
+
+	return kuap;
+}
+
+static inline void kuap_check(void)
+{
+	kuap_get_and_check();
+}
+
 static __always_inline void allow_user_access(void __user *to, const void __user *from,
 					      u32 size, unsigned long dir)
 {
-- 
2.25.0


^ permalink raw reply related

* [RFC PATCH v1 41/41] powerpc/32: Manage KUAP in C
From: Christophe Leroy @ 2021-02-09  9:57 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1612864003.git.christophe.leroy@csgroup.eu>

Move all KUAP management in C.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/book3s/32/kup.h     | 50 +-------------------
 arch/powerpc/include/asm/interrupt.h         |  2 +
 arch/powerpc/include/asm/kup.h               | 19 +-------
 arch/powerpc/include/asm/nohash/32/kup-8xx.h | 25 +---------
 arch/powerpc/kernel/entry_32.S               |  6 ---
 arch/powerpc/kernel/interrupt.c              | 19 ++------
 arch/powerpc/kernel/process.c                |  3 ++
 7 files changed, 12 insertions(+), 112 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
index c9d6c28bcd10..27991e0d2cf9 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -5,55 +5,7 @@
 #include <asm/bug.h>
 #include <asm/book3s/32/mmu-hash.h>
 
-#ifdef __ASSEMBLY__
-
-#ifdef CONFIG_PPC_KUAP
-
-.macro kuap_update_sr	gpr1, gpr2, gpr3	/* NEVER use r0 as gpr2 due to addis */
-101:	mtsrin	\gpr1, \gpr2
-	addi	\gpr1, \gpr1, 0x111		/* next VSID */
-	rlwinm	\gpr1, \gpr1, 0, 0xf0ffffff	/* clear VSID overflow */
-	addis	\gpr2, \gpr2, 0x1000		/* address of next segment */
-	cmplw	\gpr2, \gpr3
-	blt-	101b
-	isync
-.endm
-
-.macro kuap_save_and_lock	sp, thread, gpr1, gpr2, gpr3
-	lwz	\gpr2, KUAP(\thread)
-	rlwinm.	\gpr3, \gpr2, 28, 0xf0000000
-	stw	\gpr2, STACK_REGS_KUAP(\sp)
-	beq+	102f
-	li	\gpr1, 0
-	stw	\gpr1, KUAP(\thread)
-	mfsrin	\gpr1, \gpr2
-	oris	\gpr1, \gpr1, SR_KS@h	/* set Ks */
-	kuap_update_sr	\gpr1, \gpr2, \gpr3
-102:
-.endm
-
-.macro kuap_restore	sp, current, gpr1, gpr2, gpr3
-	lwz	\gpr2, STACK_REGS_KUAP(\sp)
-	rlwinm.	\gpr3, \gpr2, 28, 0xf0000000
-	stw	\gpr2, THREAD + KUAP(\current)
-	beq+	102f
-	mfsrin	\gpr1, \gpr2
-	rlwinm	\gpr1, \gpr1, 0, ~SR_KS	/* Clear Ks */
-	kuap_update_sr	\gpr1, \gpr2, \gpr3
-102:
-.endm
-
-.macro kuap_check	current, gpr
-#ifdef CONFIG_PPC_KUAP_DEBUG
-	lwz	\gpr, THREAD + KUAP(\current)
-999:	twnei	\gpr, 0
-	EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE)
-#endif
-.endm
-
-#endif /* CONFIG_PPC_KUAP */
-
-#else /* !__ASSEMBLY__ */
+#ifndef __ASSEMBLY__
 
 #ifdef CONFIG_PPC_KUAP
 
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index e5e04e962165..0e38185d4d90 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -24,6 +24,8 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
 		kuep_lock();
 		current->thread.regs = regs;
 		account_cpu_user_entry();
+	} else {
+		kuap_save_and_lock(regs);
 	}
 #endif
 	/*
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index b7efa46b3109..f463256a57a6 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -26,24 +26,7 @@
 #include <asm/book3s/32/kup.h>
 #endif
 
-#ifdef __ASSEMBLY__
-#ifndef CONFIG_PPC_KUAP
-.macro kuap_save_and_lock	sp, thread, gpr1, gpr2, gpr3
-.endm
-
-.macro kuap_restore	sp, current, gpr1, gpr2, gpr3
-.endm
-
-.macro kuap_check	current, gpr
-.endm
-
-.macro kuap_check_amr	gpr1, gpr2
-.endm
-
-#endif
-
-#else /* !__ASSEMBLY__ */
-
+#ifndef __ASSEMBLY__
 extern bool disable_kuep;
 extern bool disable_kuap;
 
diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
index 6f191fe08a2e..add0413f31d4 100644
--- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
@@ -7,30 +7,7 @@
 
 #ifdef CONFIG_PPC_KUAP
 
-#ifdef __ASSEMBLY__
-
-.macro kuap_save_and_lock	sp, thread, gpr1, gpr2, gpr3
-	lis	\gpr2, MD_APG_KUAP@h	/* only APG0 and APG1 are used */
-	mfspr	\gpr1, SPRN_MD_AP
-	mtspr	SPRN_MD_AP, \gpr2
-	stw	\gpr1, STACK_REGS_KUAP(\sp)
-.endm
-
-.macro kuap_restore	sp, current, gpr1, gpr2, gpr3
-	lwz	\gpr1, STACK_REGS_KUAP(\sp)
-	mtspr	SPRN_MD_AP, \gpr1
-.endm
-
-.macro kuap_check	current, gpr
-#ifdef CONFIG_PPC_KUAP_DEBUG
-	mfspr	\gpr, SPRN_MD_AP
-	rlwinm	\gpr, \gpr, 16, 0xffff
-999:	twnei	\gpr, MD_APG_KUAP@h
-	EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE)
-#endif
-.endm
-
-#else /* !__ASSEMBLY__ */
+#ifndef __ASSEMBLY__
 
 #include <asm/reg.h>
 
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 3ae790a362d1..2f3b41c8b23f 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -59,13 +59,11 @@
 	.globl	prepare_transfer_to_handler
 prepare_transfer_to_handler:
 	andi.	r0,r9,MSR_PR
-	addi	r12, r2, THREAD
 	bnelr
 
 	/* if from kernel, check interrupted DOZE/NAP mode and
          * check for stack overflow
          */
-	kuap_save_and_lock r11, r12, r9, r5, r6
 	lwz	r12,TI_LOCAL_FLAGS(r2)
 	mtcrf	0x01,r12
 	bt-	31-TLF_NAPPING,4f
@@ -105,7 +103,6 @@ ret_from_syscall:
 	cmplwi	cr0,r5,0
 	bne-	2f
 #endif /* CONFIG_PPC_47x */
-	kuap_check r2, r4
 	lwz	r4,_LINK(r1)
 	lwz	r5,_CCR(r1)
 	mtlr	r4
@@ -217,7 +214,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE)
 	stw	r10,_CCR(r1)
 	stw	r1,KSP(r3)	/* Set old stack pointer */
 
-	kuap_check r2, r0
 #ifdef CONFIG_SMP
 	/* We need a sync somewhere here to make sure that if the
 	 * previous task gets rescheduled on another CPU, it sees all
@@ -285,7 +281,6 @@ interrupt_return:
 	bne-	.Lrestore_nvgprs
 
 .Lfast_user_interrupt_return:
-	kuap_check r2, r4
 	lwz	r11,_NIP(r1)
 	lwz	r12,_MSR(r1)
 	mtspr	SPRN_SRR0,r11
@@ -330,7 +325,6 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
 
 .Lfast_kernel_interrupt_return:
 	cmpwi	cr1,r3,0
-	kuap_restore r1, r2, r3, r4, r5
 	lwz	r11,_NIP(r1)
 	lwz	r12,_MSR(r1)
 	mtspr	SPRN_SRR0,r11
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 8180ed261a27..42769d619f7d 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -31,6 +31,9 @@ notrace long system_call_exception(long r3, long r4, long r5,
 	unsigned long expected_msr;
 
 	kuep_lock();
+#ifdef CONFIG_PPC32
+	kuap_save_and_lock(regs);
+#endif
 
 	regs->orig_gpr3 = r3;
 
@@ -74,9 +77,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
 			isync();
 	} else
 #endif
-#ifdef CONFIG_PPC64
 		kuap_check();
-#endif
 
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 	if (IS_ENABLED(CONFIG_PPC32) && unlikely(current->thread.debug.dbcr0 & DBCR0_IDM)) {
@@ -240,9 +241,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
 
 	CT_WARN_ON(ct_state() == CONTEXT_USER);
 
-#ifdef CONFIG_PPC64
 	kuap_check();
-#endif
 
 	regs->result = r3;
 
@@ -351,7 +350,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
 
 	account_cpu_user_exit();
 
-#ifdef CONFIG_PPC_BOOK3S_64 /* BOOK3E and ppc32 not using this */
+#ifndef CONFIG_PPC_BOOK3E_64 /* BOOK3E not using this */
 	/*
 	 * We do this at the end so that we do context switch with KERNEL AMR
 	 */
@@ -380,9 +379,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned
 	 * We don't need to restore AMR on the way back to userspace for KUAP.
 	 * AMR can only have been unlocked if we interrupted the kernel.
 	 */
-#ifdef CONFIG_PPC64
 	kuap_check();
-#endif
 
 	local_irq_save(flags);
 
@@ -453,9 +450,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned
 	/*
 	 * We do this at the end so that we do context switch with KERNEL AMR
 	 */
-#ifdef CONFIG_PPC64
 	kuap_user_restore(regs);
-#endif
 	return ret;
 }
 
@@ -466,9 +461,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
 {
 	unsigned long flags;
 	unsigned long ret = 0;
-#ifdef CONFIG_PPC64
 	unsigned long kuap;
-#endif
 
 	if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) &&
 	    unlikely(!(regs->msr & MSR_RI)))
@@ -482,9 +475,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
 	if (TRAP(regs) != 0x700)
 		CT_WARN_ON(ct_state() == CONTEXT_USER);
 
-#ifdef CONFIG_PPC64
 	kuap = kuap_get_and_check();
-#endif
 
 	if (unlikely(current_thread_info()->flags & _TIF_EMULATE_STACK_STORE)) {
 		clear_bits(_TIF_EMULATE_STACK_STORE, &current_thread_info()->flags);
@@ -526,9 +517,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign
 	 * which would cause Read-After-Write stalls. Hence, we take the AMR
 	 * value from the check above.
 	 */
-#ifdef CONFIG_PPC64
 	kuap_kernel_restore(regs, kuap);
-#endif
 
 	return ret;
 }
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 384951381329..dda871dd9bfb 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1255,6 +1255,9 @@ struct task_struct *__switch_to(struct task_struct *prev,
 	 */
 	restore_sprs(old_thread, new_thread);
 
+#ifdef CONFIG_PPC32
+	kuap_check();
+#endif
 	last = _switch(old_thread, new_thread);
 
 #ifdef CONFIG_PPC_BOOK3S_64
-- 
2.25.0


^ permalink raw reply related

* [RFC PATCH v1 40/41] powerpc/8xx: Create C version of kuap save/restore/check helpers
From: Christophe Leroy @ 2021-02-09  9:57 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1612864003.git.christophe.leroy@csgroup.eu>

In preparation of porting PPC32 to C syscall entry/exit,
create C version of kuap_save_and_lock() and kuap_user_restore() and
kuap_kernel_restore() and kuap_check() and kuap_get_and_check() on 8xx.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/nohash/32/kup-8xx.h | 33 ++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
index 17a4a616436f..6f191fe08a2e 100644
--- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
@@ -34,6 +34,39 @@
 
 #include <asm/reg.h>
 
+static inline void kuap_save_and_lock(struct pt_regs *regs)
+{
+	regs->kuap = mfspr(SPRN_MD_AP);
+	mtspr(SPRN_MD_AP, MD_APG_KUAP);
+}
+
+static inline void kuap_user_restore(struct pt_regs *regs)
+{
+}
+
+static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
+{
+	mtspr(SPRN_MD_AP, regs->kuap);
+}
+
+static inline void kuap_check(void)
+{
+	if (!IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
+		return;
+
+	WARN_ON_ONCE(mfspr(SPRN_MD_AP) >> 16 != MD_APG_KUAP >> 16);
+}
+
+static inline unsigned long kuap_get_and_check(void)
+{
+	unsigned long kuap = mfspr(SPRN_MD_AP);
+
+	if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
+		WARN_ON_ONCE(kuap >> 16 != MD_APG_KUAP >> 16);
+
+	return kuap;
+}
+
 static inline void allow_user_access(void __user *to, const void __user *from,
 				     unsigned long size, unsigned long dir)
 {
-- 
2.25.0


^ permalink raw reply related

* linux-next: build failure after merge of the powerpc tree
From: Stephen Rothwell @ 2021-02-09 10:19 UTC (permalink / raw)
  To: Michael Ellerman, PowerPC
  Cc: Linux Next Mailing List, Linux Kernel Mailing List,
	Nicholas Piggin

[-- Attachment #1: Type: text/plain, Size: 866 bytes --]

Hi all,

After merging the powerpc tree, today's linux-next build (powerpc
allyesconfig) failed like this:

arch/powerpc/kernel/head_64.o:(__ftr_alt_97+0x0): relocation truncated to fit: R_PPC64_REL24 (OPD) against symbol `do_page_fault' defined in .opd section in arch/powerpc/mm/fault.o
arch/powerpc/kernel/head_64.o:(__ftr_alt_97+0x8): relocation truncated to fit: R_PPC64_REL24 (OPD) against symbol `do_page_fault' defined in .opd section in arch/powerpc/mm/fault.o
arch/powerpc/kernel/head_64.o:(__ftr_alt_97+0x28): relocation truncated to fit: R_PPC64_REL24 (OPD) against symbol `unknown_exception' defined in .opd section in arch/powerpc/kernel/traps.o

Not sure exactly which commit caused this, but it is most likkely part
of a series in the powerpc tree.

I have left the allyesconfig build broken for today.

-- 
Cheers,
Stephen Rothwell

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply

* [PATCH 2/3] powerpc/time: Avoid using get_tbl()
From: Christophe Leroy @ 2021-02-09 10:26 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, broonie
  Cc: linuxppc-dev, linux-kernel, linux-spi
In-Reply-To: <99bf008e2970de7f8ed3225cda69a6d06ae1a644.1612866360.git.christophe.leroy@csgroup.eu>

get_tbl() is confusing as it returns the content TBL register
on PPC32 but the concatenation of TBL and TBU on PPC64.

Use mftb() instead.

This will allow the removal of get_tbl() in a following patch.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
index 05e19470d523..b91ebebd9ff2 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
@@ -229,7 +229,7 @@ static irqreturn_t mpc52xx_lpbfifo_irq(int irq, void *dev_id)
 	int dma, write, poll_dma;
 
 	spin_lock_irqsave(&lpbfifo.lock, flags);
-	ts = get_tbl();
+	ts = mftb();
 
 	req = lpbfifo.req;
 	if (!req) {
@@ -307,7 +307,7 @@ static irqreturn_t mpc52xx_lpbfifo_irq(int irq, void *dev_id)
 	if (irq != 0) /* don't increment on polled case */
 		req->irq_count++;
 
-	req->irq_ticks += get_tbl() - ts;
+	req->irq_ticks += mftb() - ts;
 	spin_unlock_irqrestore(&lpbfifo.lock, flags);
 
 	/* Spinlock is released; it is now safe to call the callback */
@@ -330,7 +330,7 @@ static irqreturn_t mpc52xx_lpbfifo_bcom_irq(int irq, void *dev_id)
 	u32 ts;
 
 	spin_lock_irqsave(&lpbfifo.lock, flags);
-	ts = get_tbl();
+	ts = mftb();
 
 	req = lpbfifo.req;
 	if (!req || (req->flags & MPC52XX_LPBFIFO_FLAG_NO_DMA)) {
@@ -361,7 +361,7 @@ static irqreturn_t mpc52xx_lpbfifo_bcom_irq(int irq, void *dev_id)
 	lpbfifo.req = NULL;
 
 	/* Release the lock before calling out to the callback. */
-	req->irq_ticks += get_tbl() - ts;
+	req->irq_ticks += mftb() - ts;
 	spin_unlock_irqrestore(&lpbfifo.lock, flags);
 
 	if (req->callback)
-- 
2.25.0


^ permalink raw reply related

* [PATCH 1/3] spi: mpc52xx: Avoid using get_tbl()
From: Christophe Leroy @ 2021-02-09 10:26 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, broonie
  Cc: linuxppc-dev, linux-kernel, linux-spi

get_tbl() is confusing as it returns the content TBL register
on PPC32 but the concatenation of TBL and TBU on PPC64.

Use mftb() instead.

This will allow the removal of get_tbl() in a following patch.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 drivers/spi/spi-mpc52xx.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/spi/spi-mpc52xx.c b/drivers/spi/spi-mpc52xx.c
index ef2f24420460..e6a30f232370 100644
--- a/drivers/spi/spi-mpc52xx.c
+++ b/drivers/spi/spi-mpc52xx.c
@@ -120,7 +120,7 @@ static void mpc52xx_spi_start_transfer(struct mpc52xx_spi *ms)
 	ms->cs_change = ms->transfer->cs_change;
 
 	/* Write out the first byte */
-	ms->wcol_tx_timestamp = get_tbl();
+	ms->wcol_tx_timestamp = mftb();
 	if (ms->tx_buf)
 		out_8(ms->regs + SPI_DATA, *ms->tx_buf++);
 	else
@@ -221,8 +221,8 @@ static int mpc52xx_spi_fsmstate_transfer(int irq, struct mpc52xx_spi *ms,
 		 * but it can also be worked around simply by retrying the
 		 * transfer which is what we do here. */
 		ms->wcol_count++;
-		ms->wcol_ticks += get_tbl() - ms->wcol_tx_timestamp;
-		ms->wcol_tx_timestamp = get_tbl();
+		ms->wcol_ticks += mftb() - ms->wcol_tx_timestamp;
+		ms->wcol_tx_timestamp = mftb();
 		data = 0;
 		if (ms->tx_buf)
 			data = *(ms->tx_buf - 1);
@@ -247,14 +247,14 @@ static int mpc52xx_spi_fsmstate_transfer(int irq, struct mpc52xx_spi *ms,
 	/* Is the transfer complete? */
 	ms->len--;
 	if (ms->len == 0) {
-		ms->timestamp = get_tbl();
+		ms->timestamp = mftb();
 		ms->timestamp += ms->transfer->delay_usecs * tb_ticks_per_usec;
 		ms->state = mpc52xx_spi_fsmstate_wait;
 		return FSM_CONTINUE;
 	}
 
 	/* Write out the next byte */
-	ms->wcol_tx_timestamp = get_tbl();
+	ms->wcol_tx_timestamp = mftb();
 	if (ms->tx_buf)
 		out_8(ms->regs + SPI_DATA, *ms->tx_buf++);
 	else
@@ -276,7 +276,7 @@ mpc52xx_spi_fsmstate_wait(int irq, struct mpc52xx_spi *ms, u8 status, u8 data)
 		dev_err(&ms->master->dev, "spurious irq, status=0x%.2x\n",
 			status);
 
-	if (((int)get_tbl()) - ms->timestamp < 0)
+	if (((int)mftb()) - ms->timestamp < 0)
 		return FSM_POLL;
 
 	ms->message->actual_length += ms->transfer->len;
-- 
2.25.0


^ permalink raw reply related

* [PATCH 3/3] powerpc/time: Remove get_tbl()
From: Christophe Leroy @ 2021-02-09 10:26 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, broonie
  Cc: linuxppc-dev, linux-kernel, linux-spi
In-Reply-To: <99bf008e2970de7f8ed3225cda69a6d06ae1a644.1612866360.git.christophe.leroy@csgroup.eu>

There are no more users of get_tbl(). Remove it.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/vdso/timebase.h | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/arch/powerpc/include/asm/vdso/timebase.h b/arch/powerpc/include/asm/vdso/timebase.h
index 881f655caa0a..891c9d5eaabe 100644
--- a/arch/powerpc/include/asm/vdso/timebase.h
+++ b/arch/powerpc/include/asm/vdso/timebase.h
@@ -43,12 +43,6 @@
 #define mttbl(v)	asm volatile("mttbl %0":: "r"(v))
 #define mttbu(v)	asm volatile("mttbu %0":: "r"(v))
 
-/* For compatibility, get_tbl() is defined as get_tb() on ppc64 */
-static inline unsigned long get_tbl(void)
-{
-	return mftb();
-}
-
 static __always_inline u64 get_tb(void)
 {
 	unsigned int tbhi, tblo, tbhi2;
-- 
2.25.0


^ permalink raw reply related

* Re: [PATCH] tools/perf: Fix powerpc gap between kernel end and module start
From: Arnaldo Carvalho de Melo @ 2021-02-09 12:47 UTC (permalink / raw)
  To: Athira Rajeev
  Cc: linuxppc-dev, Madhavan Srinivasan, Jiri Olsa, Jiri Olsa,
	Kajol Jain
In-Reply-To: <20210203153148.GC854763@kernel.org>

Em Wed, Feb 03, 2021 at 12:31:48PM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Tue, Feb 02, 2021 at 04:02:36PM +0530, Athira Rajeev escreveu:
> > 
> > 
> >     On 18-Jan-2021, at 3:51 PM, kajoljain <kjain@linux.ibm.com> wrote:
> > 
> > 
> > 
> >     On 1/12/21 3:08 PM, Jiri Olsa wrote:
> > 
> >         On Mon, Dec 28, 2020 at 09:14:14PM -0500, Athira Rajeev wrote:
> > 
> >         SNIP
> > 
> > 
> >             c000000002799370 b backtrace_flag
> >             c000000002799378 B radix_tree_node_cachep
> >             c000000002799380 B __bss_stop
> >             c0000000027a0000 B _end
> >             c008000003890000 t icmp_checkentry      [ip_tables]
> >             c008000003890038 t ipt_alloc_initial_table      [ip_tables]
> >             c008000003890468 T ipt_do_table [ip_tables]
> >             c008000003890de8 T ipt_unregister_table_pre_exit        [ip_tables]
> >             ...
> > 
> >             Perf calls function symbols__fixup_end() which sets the end of
> >             symbol
> >             to 0xc008000003890000, which is the next address and this is the
> >             start
> >             address of first module (icmp_checkentry in above) which will make
> >             the
> >             huge symbol size of 0x80000010f0000.
> > 
> >             After symbols__fixup_end:
> >             symbols__fixup_end: sym->name: _end, sym->start:
> >             0xc0000000027a0000,
> >             sym->end: 0xc008000003890000
> > 
> >             On powerpc, kernel text segment is located at 0xc000000000000000
> >             whereas the modules are located at very high memory addresses,
> >             0xc00800000xxxxxxx. Since the gap between end of kernel text
> >             segment
> >             and beginning of first module's address is high, histogram
> >             allocation
> >             using calloc fails.
> > 
> >             Fix this by detecting the kernel's last symbol and limiting
> >             the range of last kernel symbol to pagesize.
> > 
> > 
> >     Patch looks good to me.
> > 
> >     Tested-By: Kajol Jain<kjain@linux.ibm.com>
> > 
> >     Thanks,
> >     Kajol Jain
> > 
> > 
> >             Signed-off-by: Athira Rajeev<atrajeev@linux.vnet.ibm.com>
> > 
> > 
> >         I can't test, but since the same approach works for arm and s390,
> >         this also looks ok
> > 
> >         Acked-by: Jiri Olsa <jolsa@redhat.com>
> > 
> >         thanks,
> >         jirka
> > 
> > 
> > Hi Arnaldo,
> > 
> > Can you please help review this patch and merge if this looks good..
> 
> Thanks, collected the Tested-by from Kajol and the Acked-by from Jiri
> and applied to my local tree for testing, then up to my perf/core
> branch.

Had to apply this on top.

- Arnaldo

commit 0f000f9c89182950cd3500226729977251529364
Author: Arnaldo Carvalho de Melo <acme@redhat.com>
Date:   Tue Feb 9 09:41:21 2021 -0300

    perf powerpc: Fix printf conversion specifier for IP addresses
    
    We need to use "%#" PRIx64 for u64 values, not "%lx", fixing this build
    problem on powerpc 32-bit:
    
      72    13.69 ubuntu:18.04-x-powerpc        : FAIL powerpc-linux-gnu-gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
        arch/powerpc/util/machine.c: In function 'arch__symbols__fixup_end':
        arch/powerpc/util/machine.c:23:12: error: format '%lx' expects argument of type 'long unsigned int', but argument 6 has type 'u64 {aka long long unsigned int}' [-Werror=format=]
          pr_debug4("%s sym:%s end:%#lx\n", __func__, p->name, p->end);
                    ^
        /git/linux/tools/perf/util/debug.h:18:21: note: in definition of macro 'pr_fmt'
         #define pr_fmt(fmt) fmt
                             ^~~
        /git/linux/tools/perf/util/debug.h:33:29: note: in expansion of macro 'pr_debugN'
         #define pr_debug4(fmt, ...) pr_debugN(4, pr_fmt(fmt), ##__VA_ARGS__)
                                     ^~~~~~~~~
        /git/linux/tools/perf/util/debug.h:33:42: note: in expansion of macro 'pr_fmt'
         #define pr_debug4(fmt, ...) pr_debugN(4, pr_fmt(fmt), ##__VA_ARGS__)
                                                  ^~~~~~
        arch/powerpc/util/machine.c:23:2: note: in expansion of macro 'pr_debug4'
          pr_debug4("%s sym:%s end:%#lx\n", __func__, p->name, p->end);
          ^~~~~~~~~
        cc1: all warnings being treated as errors
        /git/linux/tools/build/Makefile.build:139: recipe for target 'util' failed
        make[5]: *** [util] Error 2
        /git/linux/tools/build/Makefile.build:139: recipe for target 'powerpc' failed
        make[4]: *** [powerpc] Error 2
        /git/linux/tools/build/Makefile.build:139: recipe for target 'arch' failed
        make[3]: *** [arch] Error 2
      73    30.47 ubuntu:18.04-x-powerpc64      : Ok   powerpc64-linux-gnu-gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
    
    Fixes: 557c3eadb7712741 ("perf powerpc: Fix gap between kernel end and module start")
    Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
    Cc: Jiri Olsa <jolsa@redhat.com>
    Cc: Kajol Jain <kjain@linux.ibm.com>
    Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
    Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

diff --git a/tools/perf/arch/powerpc/util/machine.c b/tools/perf/arch/powerpc/util/machine.c
index c30e5cc88c1673d6..e652a1aa8132274f 100644
--- a/tools/perf/arch/powerpc/util/machine.c
+++ b/tools/perf/arch/powerpc/util/machine.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 
+#include <inttypes.h>
 #include <stdio.h>
 #include <string.h>
 #include <internal/lib.h> // page_size
@@ -20,5 +21,5 @@ void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
 		p->end += page_size;
 	else
 		p->end = c->start;
-	pr_debug4("%s sym:%s end:%#lx\n", __func__, p->name, p->end);
+	pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end);
 }

^ permalink raw reply related

* Re: [PATCH v5 20/22] powerpc/syscall: Avoid storing 'current' in another pointer
From: Segher Boessenkool @ 2021-02-09 13:50 UTC (permalink / raw)
  To: Nicholas Piggin; +Cc: linux-kernel, Paul Mackerras, msuchanek, linuxppc-dev
In-Reply-To: <1612838134.rvncv9kzls.astroid@bobo.none>

On Tue, Feb 09, 2021 at 12:36:20PM +1000, Nicholas Piggin wrote:
> What if you did this?

> +static inline struct task_struct *get_current(void)
> +{
> +	register struct task_struct *task asm ("r2");
> +
> +	return task;
> +}

Local register asm variables are *only* guaranteed to live in that
register as operands to an asm.  See
  https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables
("The only supported use" etc.)

You can do something like

static inline struct task_struct *get_current(void)
{
	register struct task_struct *task asm ("r2");

	asm("" : "+r"(task));

	return task;
}

which makes sure that "task" actually is in r2 at the point of that asm.


Segher

^ permalink raw reply

* [PATCH v2 3/3] powerpc/uaccess: Merge raw_copy_to_user_allowed() into raw_copy_to_user()
From: Christophe Leroy @ 2021-02-09 14:02 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <99d4ccb58a20d8408d0e19874393655ad5b40822.1612879284.git.christophe.leroy@csgroup.eu>

Since commit 17bc43367fc2 ("powerpc/uaccess: Implement
unsafe_copy_to_user() as a simple loop"), raw_copy_to_user_allowed()
is only used by raw_copy_to_user().

Merge raw_copy_to_user_allowed() into raw_copy_to_user().

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/uaccess.h | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 2fb1d95f10d3..33b2de642120 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -401,19 +401,13 @@ static inline unsigned long raw_copy_from_user(void *to,
 	return ret;
 }
 
-static inline unsigned long
-raw_copy_to_user_allowed(void __user *to, const void *from, unsigned long n)
-{
-	return __copy_tofrom_user(to, (__force const void __user *)from, n);
-}
-
 static inline unsigned long
 raw_copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 	unsigned long ret;
 
 	allow_write_to_user(to, n);
-	ret = raw_copy_to_user_allowed(to, from, n);
+	ret = __copy_tofrom_user(to, (__force const void __user *)from, n);
 	prevent_write_to_user(to, n);
 	return ret;
 }
-- 
2.25.0


^ permalink raw reply related

* [PATCH v2 2/3] powerpc/uaccess: Merge __put_user_size_allowed() into __put_user_size()
From: Christophe Leroy @ 2021-02-09 14:02 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <99d4ccb58a20d8408d0e19874393655ad5b40822.1612879284.git.christophe.leroy@csgroup.eu>

__put_user_size_allowed() is only called from __put_user_size() now.

Merge them together.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/uaccess.h | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index a4d2569173ac..2fb1d95f10d3 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -108,22 +108,18 @@ static inline bool __access_ok(unsigned long addr, unsigned long size)
 
 extern long __put_user_bad(void);
 
-#define __put_user_size_allowed(x, ptr, size, retval)		\
+#define __put_user_size(x, ptr, size, retval)			\
 do {								\
 	__label__ __pu_failed;					\
 								\
 	retval = 0;						\
+	allow_write_to_user(ptr, size);				\
 	__put_user_size_goto(x, ptr, size, __pu_failed);	\
+	prevent_write_to_user(ptr, size);			\
 	break;							\
 								\
 __pu_failed:							\
 	retval = -EFAULT;					\
-} while (0)
-
-#define __put_user_size(x, ptr, size, retval)			\
-do {								\
-	allow_write_to_user(ptr, size);				\
-	__put_user_size_allowed(x, ptr, size, retval);		\
 	prevent_write_to_user(ptr, size);			\
 } while (0)
 
-- 
2.25.0


^ permalink raw reply related

* [PATCH v2 1/3] powerpc/uaccess: get rid of small constant size cases in raw_copy_{to,from}_user()
From: Christophe Leroy @ 2021-02-09 14:02 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linuxppc-dev, linux-kernel

Copied from commit 4b842e4e25b1 ("x86: get rid of small
constant size cases in raw_copy_{to,from}_user()")

Very few call sites where that would be triggered remain, and none
of those is anywhere near hot enough to bother.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/uaccess.h | 41 ------------------------------
 1 file changed, 41 deletions(-)

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 93d33f7e8b53..a4d2569173ac 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -398,26 +398,6 @@ static inline unsigned long raw_copy_from_user(void *to,
 		const void __user *from, unsigned long n)
 {
 	unsigned long ret;
-	if (__builtin_constant_p(n) && (n <= 8)) {
-		ret = 1;
-
-		switch (n) {
-		case 1:
-			__get_user_size(*(u8 *)to, from, 1, ret);
-			break;
-		case 2:
-			__get_user_size(*(u16 *)to, from, 2, ret);
-			break;
-		case 4:
-			__get_user_size(*(u32 *)to, from, 4, ret);
-			break;
-		case 8:
-			__get_user_size(*(u64 *)to, from, 8, ret);
-			break;
-		}
-		if (ret == 0)
-			return 0;
-	}
 
 	allow_read_from_user(from, n);
 	ret = __copy_tofrom_user((__force void __user *)to, from, n);
@@ -428,27 +408,6 @@ static inline unsigned long raw_copy_from_user(void *to,
 static inline unsigned long
 raw_copy_to_user_allowed(void __user *to, const void *from, unsigned long n)
 {
-	if (__builtin_constant_p(n) && (n <= 8)) {
-		unsigned long ret = 1;
-
-		switch (n) {
-		case 1:
-			__put_user_size_allowed(*(u8 *)from, (u8 __user *)to, 1, ret);
-			break;
-		case 2:
-			__put_user_size_allowed(*(u16 *)from, (u16 __user *)to, 2, ret);
-			break;
-		case 4:
-			__put_user_size_allowed(*(u32 *)from, (u32 __user *)to, 4, ret);
-			break;
-		case 8:
-			__put_user_size_allowed(*(u64 *)from, (u64 __user *)to, 8, ret);
-			break;
-		}
-		if (ret == 0)
-			return 0;
-	}
-
 	return __copy_tofrom_user(to, (__force const void __user *)from, n);
 }
 
-- 
2.25.0


^ permalink raw reply related

* Re: [PATCH v3 1/8] powerpc/uaccess: Add unsafe_copy_from_user
From: Christophe Leroy @ 2021-02-09 14:09 UTC (permalink / raw)
  To: Michael Ellerman, Christopher M. Riedl, linuxppc-dev
In-Reply-To: <87pn21r7yr.fsf@mpe.ellerman.id.au>



Le 19/01/2021 à 03:11, Michael Ellerman a écrit :
> "Christopher M. Riedl" <cmr@codefail.de> writes:
>> On Mon Jan 11, 2021 at 7:22 AM CST, Christophe Leroy wrote:
>>> Le 09/01/2021 à 04:25, Christopher M. Riedl a écrit :
>>>> Implement raw_copy_from_user_allowed() which assumes that userspace read
>>>> access is open. Use this new function to implement raw_copy_from_user().
>>>> Finally, wrap the new function to follow the usual "unsafe_" convention
>>>> of taking a label argument.
>>>
>>> I think there is no point implementing raw_copy_from_user_allowed(), see
>>> https://github.com/linuxppc/linux/commit/4b842e4e25b1 and
>>> https://patchwork.ozlabs.org/project/linuxppc-dev/patch/8c74fc9ce8131cabb10b3e95dc0e430f396ee83e.1610369143.git.christophe.leroy@csgroup.eu/
>>>
>>> You should simply do:
>>>
>>> #define unsafe_copy_from_user(d, s, l, e) \
>>> unsafe_op_wrap(__copy_tofrom_user((__force void __user *)d, s, l), e)
>>>
>>
>> I gave this a try and the signal ops decreased by ~8K. Now, to be
>> honest, I am not sure what an "acceptable" benchmark number here
>> actually is - so maybe this is ok? Same loss with both radix and hash:
>>
>> 	|                                      | hash   | radix  |
>> 	| ------------------------------------ | ------ | ------ |
>> 	| linuxppc/next                        | 118693 | 133296 |
>> 	| linuxppc/next w/o KUAP+KUEP          | 228911 | 228654 |
>> 	| unsafe-signal64                      | 200480 | 234067 |
>> 	| unsafe-signal64 (__copy_tofrom_user) | 192467 | 225119 |
>>
>> To put this into perspective, prior to KUAP and uaccess flush, signal
>> performance in this benchmark was ~290K on hash.
> 
> If I'm doing the math right 8K is ~4% of the best number.
> 
> It seems like 4% is worth a few lines of code to handle these constant
> sizes. It's not like we have performance to throw away.
> 
> Or, we should chase down where the call sites are that are doing small
> constant copies with copy_to/from_user() and change them to use
> get/put_user().
> 

I have built pmac32_defconfig and ppc64_defconfig with a BUILD_BUG_ON(__builtin_constant_p(n) && (n 
== 1 || n == 2 || n == 4 || n == 8) in raw_copy_from_user() and raw_copy_to_user():

On pmac32_defconfig, no hit.

On ppc64_defconfig, two hits:
- copies of sigset_t in signal64. This problem is only on linux/next. On next-test we don't have 
this problem anymore thanks to the series from Christopher.
- in pkey_set() in arch/powerpc/kernel/ptrace/ptrace-view.c, in the copy of new_amr. This is not a 
hot path I think so we can live with it.

Christophe

^ permalink raw reply

* [PATCH] powerpc/64: Fix stack trace not displaying final frame
From: Michael Ellerman @ 2021-02-09 14:16 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: npiggin

In commit bf13718bc57a ("powerpc: show registers when unwinding
interrupt frames") we changed our stack dumping logic to show the full
registers whenever we find an interrupt frame on the stack.

However we didn't notice that on 64-bit this doesn't show the final
frame, ie. the interrupt that brought us in from userspace, whereas on
32-bit it does.

That is due to confusion about the size of that last frame. The code
in show_stack() calls validate_sp(), passing it STACK_INT_FRAME_SIZE
to check the sp is at least that far below the top of the stack.

However on 64-bit that size is too large for the final frame, because
it includes the red zone, but we don't allocate a red zone for the
first frame.

So add a new define that encodes the correct size for 32-bit and
64-bit, and use it in show_stack().

This results in the full trace being shown on 64-bit, eg:

  sysrq: Trigger a crash
  Kernel panic - not syncing: sysrq triggered crash
  CPU: 0 PID: 83 Comm: sh Not tainted 5.11.0-rc2-gcc-8.2.0-00188-g571abcb96b10-dirty #649
  Call Trace:
  [c00000000a1c3ac0] [c000000000897b70] dump_stack+0xc4/0x114 (unreliable)
  [c00000000a1c3b00] [c00000000014334c] panic+0x178/0x41c
  [c00000000a1c3ba0] [c00000000094e600] sysrq_handle_crash+0x40/0x50
  [c00000000a1c3c00] [c00000000094ef98] __handle_sysrq+0xd8/0x210
  [c00000000a1c3ca0] [c00000000094f820] write_sysrq_trigger+0x100/0x188
  [c00000000a1c3ce0] [c0000000005559dc] proc_reg_write+0x10c/0x1b0
  [c00000000a1c3d10] [c000000000479950] vfs_write+0xf0/0x360
  [c00000000a1c3d60] [c000000000479d9c] ksys_write+0x7c/0x140
  [c00000000a1c3db0] [c00000000002bf5c] system_call_exception+0x19c/0x2c0
  [c00000000a1c3e10] [c00000000000d35c] system_call_common+0xec/0x278
  --- interrupt: c00 at 0x7fff9fbab428
  NIP:  00007fff9fbab428 LR: 000000001000b724 CTR: 0000000000000000
  REGS: c00000000a1c3e80 TRAP: 0c00   Not tainted  (5.11.0-rc2-gcc-8.2.0-00188-g571abcb96b10-dirty)
  MSR:  900000000280f033 <SF,HV,VEC,VSX,EE,PR,FP,ME,IR,DR,RI,LE>  CR: 22002884  XER: 00000000
  IRQMASK: 0
  GPR00: 0000000000000004 00007fffc3cb8960 00007fff9fc59900 0000000000000001
  GPR04: 000000002a4b32d0 0000000000000002 0000000000000063 0000000000000063
  GPR08: 000000002a4b32d0 0000000000000000 0000000000000000 0000000000000000
  GPR12: 0000000000000000 00007fff9fcca9a0 0000000000000000 0000000000000000
  GPR16: 0000000000000000 0000000000000000 0000000000000000 00000000100b8fd0
  GPR20: 000000002a4b3485 00000000100b8f90 0000000000000000 0000000000000000
  GPR24: 000000002a4b0440 00000000100e77b8 0000000000000020 000000002a4b32d0
  GPR28: 0000000000000001 0000000000000002 000000002a4b32d0 0000000000000001
  NIP [00007fff9fbab428] 0x7fff9fbab428
  LR [000000001000b724] 0x1000b724
  --- interrupt: c00

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/ptrace.h | 3 +++
 arch/powerpc/kernel/asm-offsets.c | 2 +-
 arch/powerpc/kernel/process.c     | 2 +-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 58f9dc060a7b..8236c5e749e4 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -70,6 +70,9 @@ struct pt_regs
 };
 #endif
 
+
+#define STACK_FRAME_WITH_PT_REGS (STACK_FRAME_OVERHEAD + sizeof(struct pt_regs))
+
 #ifdef __powerpc64__
 
 /*
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 31edd9bbce75..6109496e5fdf 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -308,7 +308,7 @@ int main(void)
 
 	/* Interrupt register frame */
 	DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE);
-	DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs));
+	DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_WITH_PT_REGS);
 	STACK_PT_REGS_OFFSET(GPR0, gpr[0]);
 	STACK_PT_REGS_OFFSET(GPR1, gpr[1]);
 	STACK_PT_REGS_OFFSET(GPR2, gpr[2]);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index e296440e9d16..924d023dad0a 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -2179,7 +2179,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack,
 		 * See if this is an exception frame.
 		 * We look for the "regshere" marker in the current frame.
 		 */
-		if (validate_sp(sp, tsk, STACK_INT_FRAME_SIZE)
+		if (validate_sp(sp, tsk, STACK_FRAME_WITH_PT_REGS)
 		    && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
 			struct pt_regs *regs = (struct pt_regs *)
 				(sp + STACK_FRAME_OVERHEAD);
-- 
2.25.1


^ permalink raw reply related

* Re: [PATCH v5 18/22] powerpc/syscall: Remove FULL_REGS verification in system_call_exception
From: Christophe Leroy @ 2021-02-09 14:31 UTC (permalink / raw)
  To: Nicholas Piggin, Benjamin Herrenschmidt, Michael Ellerman,
	msuchanek, Paul Mackerras
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <1612836023.l122pe2n2b.astroid@bobo.none>



Le 09/02/2021 à 03:02, Nicholas Piggin a écrit :
> Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:
>> For book3s/64, FULL_REGS() is 'true' at all time, so the test voids.
>> For others, non volatile registers are saved inconditionally.
>>
>> So the verification is pointless.
>>
>> Should one fail to do it, it would anyway be caught by the
>> CHECK_FULL_REGS() in copy_thread() as we have removed the
>> special versions ppc_fork() and friends.
>>
>> null_syscall benchmark reduction 4 cycles (332 => 328 cycles)
> 
> I wonder if we rather make a CONFIG option for a bunch of these simpler
> debug checks here (and also in interrupt exit, wrappers, etc) rather
> than remove them entirely.

We can drop this patch if you prefer. Anyway, like book3s/64, once ppc32 also do interrupt 
entry/exit in C, FULL_REGS() will already return true.

Christophe


> 
> Thanks,
> Nick
> 
>>
>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>> ---
>>   arch/powerpc/kernel/interrupt.c | 1 -
>>   1 file changed, 1 deletion(-)
>>
>> diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
>> index 8fafca727b8b..55e1aa18cdb9 100644
>> --- a/arch/powerpc/kernel/interrupt.c
>> +++ b/arch/powerpc/kernel/interrupt.c
>> @@ -42,7 +42,6 @@ notrace long system_call_exception(long r3, long r4, long r5,
>>   	if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x))
>>   		BUG_ON(!(regs->msr & MSR_RI));
>>   	BUG_ON(!(regs->msr & MSR_PR));
>> -	BUG_ON(!FULL_REGS(regs));
>>   	BUG_ON(arch_irq_disabled_regs(regs));
>>   
>>   #ifdef CONFIG_PPC_PKEY
>> -- 
>> 2.25.0
>>
>>

^ permalink raw reply

* RE: [PATCH v5 20/22] powerpc/syscall: Avoid storing 'current' in another pointer
From: David Laight @ 2021-02-09 14:31 UTC (permalink / raw)
  To: 'Segher Boessenkool', Nicholas Piggin
  Cc: linuxppc-dev@lists.ozlabs.org, msuchanek@suse.de, Paul Mackerras,
	linux-kernel@vger.kernel.org
In-Reply-To: <20210209135053.GD27854@gate.crashing.org>

From: Segher Boessenkool
> Sent: 09 February 2021 13:51
> 
> On Tue, Feb 09, 2021 at 12:36:20PM +1000, Nicholas Piggin wrote:
> > What if you did this?
> 
> > +static inline struct task_struct *get_current(void)
> > +{
> > +	register struct task_struct *task asm ("r2");
> > +
> > +	return task;
> > +}
> 
> Local register asm variables are *only* guaranteed to live in that
> register as operands to an asm.  See
>   https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables
> ("The only supported use" etc.)
> 
> You can do something like
> 
> static inline struct task_struct *get_current(void)
> {
> 	register struct task_struct *task asm ("r2");
> 
> 	asm("" : "+r"(task));
> 
> 	return task;
> }
> 
> which makes sure that "task" actually is in r2 at the point of that asm.

If "r2" always contains current (and is never assigned by the compiler)
why not use a global register variable for it?

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)


^ permalink raw reply

* Re: [PATCH v5 19/22] powerpc/syscall: Optimise checks in beginning of system_call_exception()
From: Christophe Leroy @ 2021-02-09 14:32 UTC (permalink / raw)
  To: Nicholas Piggin, Benjamin Herrenschmidt, Michael Ellerman,
	msuchanek, Paul Mackerras
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <1612836170.502t0sssvi.astroid@bobo.none>



Le 09/02/2021 à 03:06, Nicholas Piggin a écrit :
> Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:
>> Combine all tests of regs->msr into a single logical one.
> 
> Okay by me unless we choose to do the config option and put these all
> under it. I think I would prefer that because sometimes the registers
> are in a state you can't easily see what the values in the expression
> were. In this case it doesn't matter so much because they should be in
> regs in the interrupt frame.

Yes indeed. I reword the commit log and tell that.

> 
> Thanks,
> Nick
> 
>>
>> Before the patch:
>>
>>     0:	81 6a 00 84 	lwz     r11,132(r10)
>>     4:	90 6a 00 88 	stw     r3,136(r10)
>>     8:	69 60 00 02 	xori    r0,r11,2
>>     c:	54 00 ff fe 	rlwinm  r0,r0,31,31,31
>>    10:	0f 00 00 00 	twnei   r0,0
>>    14:	69 63 40 00 	xori    r3,r11,16384
>>    18:	54 63 97 fe 	rlwinm  r3,r3,18,31,31
>>    1c:	0f 03 00 00 	twnei   r3,0
>>    20:	69 6b 80 00 	xori    r11,r11,32768
>>    24:	55 6b 8f fe 	rlwinm  r11,r11,17,31,31
>>    28:	0f 0b 00 00 	twnei   r11,0
>>
>> After the patch:
>>
>>     0:	81 6a 00 84 	lwz     r11,132(r10)
>>     4:	90 6a 00 88 	stw     r3,136(r10)
>>     8:	7d 6b 58 f8 	not     r11,r11
>>     c:	71 6b c0 02 	andi.   r11,r11,49154
>>    10:	0f 0b 00 00 	twnei   r11,0
>>
>> 6 cycles less on powerpc 8xx (328 => 322 cycles).
>>
>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>> ---
>>   arch/powerpc/kernel/interrupt.c | 10 +++++++---
>>   1 file changed, 7 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
>> index 55e1aa18cdb9..8c38e8c95be2 100644
>> --- a/arch/powerpc/kernel/interrupt.c
>> +++ b/arch/powerpc/kernel/interrupt.c
>> @@ -28,6 +28,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
>>   				   unsigned long r0, struct pt_regs *regs)
>>   {
>>   	syscall_fn f;
>> +	unsigned long expected_msr;
>>   
>>   	regs->orig_gpr3 = r3;
>>   
>> @@ -39,10 +40,13 @@ notrace long system_call_exception(long r3, long r4, long r5,
>>   
>>   	trace_hardirqs_off(); /* finish reconciling */
>>   
>> +	expected_msr = MSR_PR;
>>   	if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x))
>> -		BUG_ON(!(regs->msr & MSR_RI));
>> -	BUG_ON(!(regs->msr & MSR_PR));
>> -	BUG_ON(arch_irq_disabled_regs(regs));
>> +		expected_msr |= MSR_RI;
>> +	if (IS_ENABLED(CONFIG_PPC32))
>> +		expected_msr |= MSR_EE;
>> +	BUG_ON((regs->msr & expected_msr) ^ expected_msr);
>> +	BUG_ON(IS_ENABLED(CONFIG_PPC64) && arch_irq_disabled_regs(regs));
>>   
>>   #ifdef CONFIG_PPC_PKEY
>>   	if (mmu_has_feature(MMU_FTR_PKEY)) {
>> -- 
>> 2.25.0
>>
>>

^ permalink raw reply

* Re: [PATCH v5 16/22] powerpc/syscall: Avoid stack frame in likely part of system_call_exception()
From: Christophe Leroy @ 2021-02-09 16:13 UTC (permalink / raw)
  To: Nicholas Piggin, Benjamin Herrenschmidt, Michael Ellerman,
	msuchanek, Paul Mackerras
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <1612834634.qle1lc7n6y.astroid@bobo.none>



Le 09/02/2021 à 02:55, Nicholas Piggin a écrit :
> Excerpts from Christophe Leroy's message of February 9, 2021 1:10 am:
>> When r3 is not modified, reload it from regs->orig_r3 to free
>> volatile registers. This avoids a stack frame for the likely part
>> of system_call_exception()
> 
> This doesn't on my 64s build, but it does reduce one non volatile
> register save/restore. With quite a bit more register pressure
> reduction 64s can avoid the stack frame as well.

The stack frame is not due to the registers because on PPC64 you have the redzone that you don't 
have on PPC32.

As far as I can see, this is due to a call to .arch_local_irq_restore().

On ppc32 arch_local_irq_restore() is just a write to MSR.


> 
> It's a cool trick but quite code and compiler specific so I don't know
> how worthwhile it is to keep considering we're calling out into random
> kernel C code after this.
> 
> Maybe just keep it PPC32 specific for the moment, will have to do more
> tuning for 64 and we have other stuff to do there first.
> 
> If you are happy to make it 32-bit only then

I think we can leave without this, that's only one or two cycles won.

> 
> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
> 

^ permalink raw reply

* [PATCH 2/5] powerpc/xive: Introduce an IPI interrupt domain
From: Cédric Le Goater @ 2021-02-09 16:19 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Cédric Le Goater
In-Reply-To: <20210209161936.377760-1-clg@kaod.org>

The IPI interrupt is a special case of the XIVE IRQ domain. When
mapping and unmapping the interrupts in the Linux interrupt number
space, the HW interrupt number 0 (XIVE_IPI_HW_IRQ) is checked to
distinguish the IPI interrupt from other interrupts of the system.

Simplify the XIVE interrupt domain by introducing a specific domain
for the IPI.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 arch/powerpc/sysdev/xive/common.c | 51 +++++++++++++------------------
 1 file changed, 22 insertions(+), 29 deletions(-)

diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 776871274b69..4aceac0f3046 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -63,6 +63,8 @@ static const struct xive_ops *xive_ops;
 static struct irq_domain *xive_irq_domain;
 
 #ifdef CONFIG_SMP
+static struct irq_domain *xive_ipi_irq_domain;
+
 /* The IPIs all use the same logical irq number */
 static u32 xive_ipi_irq;
 #endif
@@ -1068,20 +1070,32 @@ static struct irq_chip xive_ipi_chip = {
 	.irq_unmask = xive_ipi_do_nothing,
 };
 
+/*
+ * IPIs are marked per-cpu. We use separate HW interrupts under the
+ * hood but associated with the same "linux" interrupt
+ */
+static int xive_ipi_irq_domain_map(struct irq_domain *h, unsigned int virq,
+				   irq_hw_number_t hw)
+{
+	irq_set_chip_and_handler(virq, &xive_ipi_chip, handle_percpu_irq);
+	return 0;
+}
+
+static const struct irq_domain_ops xive_ipi_irq_domain_ops = {
+	.map = xive_ipi_irq_domain_map,
+};
+
 static void __init xive_request_ipi(void)
 {
 	unsigned int virq;
 
-	/*
-	 * Initialization failed, move on, we might manage to
-	 * reach the point where we display our errors before
-	 * the system falls appart
-	 */
-	if (!xive_irq_domain)
+	xive_ipi_irq_domain = irq_domain_add_linear(NULL, 1,
+						    &xive_ipi_irq_domain_ops, NULL);
+	if (WARN_ON(xive_ipi_irq_domain == NULL))
 		return;
 
 	/* Initialize it */
-	virq = irq_create_mapping(xive_irq_domain, XIVE_IPI_HW_IRQ);
+	virq = irq_create_mapping(xive_ipi_irq_domain, XIVE_IPI_HW_IRQ);
 	xive_ipi_irq = virq;
 
 	WARN_ON(request_irq(virq, xive_muxed_ipi_action,
@@ -1179,19 +1193,6 @@ static int xive_irq_domain_map(struct irq_domain *h, unsigned int virq,
 	 */
 	irq_clear_status_flags(virq, IRQ_LEVEL);
 
-#ifdef CONFIG_SMP
-	/* IPIs are special and come up with HW number 0 */
-	if (hw == XIVE_IPI_HW_IRQ) {
-		/*
-		 * IPIs are marked per-cpu. We use separate HW interrupts under
-		 * the hood but associated with the same "linux" interrupt
-		 */
-		irq_set_chip_and_handler(virq, &xive_ipi_chip,
-					 handle_percpu_irq);
-		return 0;
-	}
-#endif
-
 	rc = xive_irq_alloc_data(virq, hw);
 	if (rc)
 		return rc;
@@ -1203,15 +1204,7 @@ static int xive_irq_domain_map(struct irq_domain *h, unsigned int virq,
 
 static void xive_irq_domain_unmap(struct irq_domain *d, unsigned int virq)
 {
-	struct irq_data *data = irq_get_irq_data(virq);
-	unsigned int hw_irq;
-
-	/* XXX Assign BAD number */
-	if (!data)
-		return;
-	hw_irq = (unsigned int)irqd_to_hwirq(data);
-	if (hw_irq != XIVE_IPI_HW_IRQ)
-		xive_irq_free_data(virq);
+	xive_irq_free_data(virq);
 }
 
 static int xive_irq_domain_xlate(struct irq_domain *h, struct device_node *ct,
-- 
2.26.2


^ permalink raw reply related

* [PATCH 0/5] powerpc/xive: Map one IPI interrupt per node
From: Cédric Le Goater @ 2021-02-09 16:19 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Cédric Le Goater

Hello,

ipistorm [*] can be used to benchmark the raw interrupt rate of an
interrupt controller by measuring the number of IPIs a system can
sustain. When applied to the XIVE interrupt controller of POWER9 and
POWER10 systems, a significant drop of the interrupt rate can be
observed when crossing the second node boundary.

This is due to the fact that a single IPI interrupt is used for all
CPUs of the system. The structure is shared and the cache line updates
impact greatly the traffic between nodes and the overall IPI
performance.

As a workaround, the impact can be reduced by deactivating the IRQ
lockup detector ("noirqdebug") which does a lot of accounting in the
Linux IRQ descriptor structure and is responsible for most of the
performance penalty.

As a fix, this proposal allocates an IPI interrupt per node, to be
shared by all CPUs of that node. It solves the scaling issue, the IRQ
lockup detector still has an impact but the XIVE interrupt rate scales
linearly. It also improves the "noirqdebug" case as showed in the
tables below. 

 * P9 DD2.2 - 2s * 64 threads

                                               "noirqdebug"
                        Mint/s                    Mint/s   
 chips  cpus      IPI/sys   IPI/chip       IPI/chip    IPI/sys     
 --------------------------------------------------------------
 1      0-15     4.984023   4.875405       4.996536   5.048892
        0-31    10.879164  10.544040      10.757632  11.037859
        0-47    15.345301  14.688764      14.926520  15.310053
        0-63    17.064907  17.066812      17.613416  17.874511
 2      0-79    11.768764  21.650749      22.689120  22.566508
        0-95    10.616812  26.878789      28.434703  28.320324
        0-111   10.151693  31.397803      31.771773  32.388122
        0-127    9.948502  33.139336      34.875716  35.224548


 * P10 DD1 - 4s (not homogeneous) 352 threads

                                               "noirqdebug"
                        Mint/s                    Mint/s   
 chips  cpus      IPI/sys   IPI/chip       IPI/chip    IPI/sys     
 --------------------------------------------------------------
 1      0-15     2.409402   2.364108       2.383303   2.395091
        0-31     6.028325   6.046075       6.089999   6.073750
        0-47     8.655178   8.644531       8.712830   8.724702
        0-63    11.629652  11.735953      12.088203  12.055979
        0-79    14.392321  14.729959      14.986701  14.973073
        0-95    12.604158  13.004034      17.528748  17.568095
 2      0-111    9.767753  13.719831      19.968606  20.024218
        0-127    6.744566  16.418854      22.898066  22.995110
        0-143    6.005699  19.174421      25.425622  25.417541
        0-159    5.649719  21.938836      27.952662  28.059603
        0-175    5.441410  24.109484      31.133915  31.127996
 3      0-191    5.318341  24.405322      33.999221  33.775354
        0-207    5.191382  26.449769      36.050161  35.867307
        0-223    5.102790  29.356943      39.544135  39.508169
        0-239    5.035295  31.933051      42.135075  42.071975
        0-255    4.969209  34.477367      44.655395  44.757074
 4      0-271    4.907652  35.887016      47.080545  47.318537
        0-287    4.839581  38.076137      50.464307  50.636219
        0-303    4.786031  40.881319      53.478684  53.310759
        0-319    4.743750  43.448424      56.388102  55.973969
        0-335    4.709936  45.623532      59.400930  58.926857
        0-351    4.681413  45.646151      62.035804  61.830057

[*] https://github.com/antonblanchard/ipistorm

Thanks,

C.

Cédric Le Goater (5):
  powerpc/xive: Use cpu_to_node() instead of ibm,chip-id property
  powerpc/xive: Introduce an IPI interrupt domain
  powerpc/xive: Remove useless check on XIVE_IPI_HW_IRQ
  powerpc/xive: Simplify xive_core_debug_show()
  powerpc/xive: Map one IPI interrupt per node

 arch/powerpc/sysdev/xive/xive-internal.h |   2 -
 arch/powerpc/sysdev/xive/common.c        | 114 +++++++++++------------
 2 files changed, 56 insertions(+), 60 deletions(-)

-- 
2.26.2


^ permalink raw reply

* [PATCH 5/5] powerpc/xive: Map one IPI interrupt per node
From: Cédric Le Goater @ 2021-02-09 16:19 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Cédric Le Goater
In-Reply-To: <20210209161936.377760-1-clg@kaod.org>

ipistorm [*] can be used to benchmark the raw interrupt rate of an
interrupt controller by measuring the number of IPIs a system can
sustain. When applied to the XIVE interrupt controller of POWER9 and
POWER10 systems, a significant drop of the interrupt rate can be
observed when crossing the second node boundary.

This is due to the fact that a single IPI interrupt is used for all
CPUs of the system. The structure is shared and the cache line updates
impact greatly the traffic between nodes and the overall IPI
performance.

As a workaround, the impact can be reduced by deactivating the IRQ
lockup detector ("noirqdebug") which does a lot of accounting in the
Linux IRQ descriptor structure and is responsible for most of the
performance penalty.

As a fix, this proposal allocates an IPI interrupt per node, to be
shared by all CPUs of that node. It solves the scaling issue, the IRQ
lockup detector still has an impact but the XIVE interrupt rate scales
linearly. It also improves the "noirqdebug" case as showed in the
tables below.

 * P9 DD2.2 - 2s * 64 threads

                                               "noirqdebug"
                        Mint/s                    Mint/s
 chips  cpus      IPI/sys   IPI/chip       IPI/chip    IPI/sys
 --------------------------------------------------------------
 1      0-15     4.984023   4.875405       4.996536   5.048892
        0-31    10.879164  10.544040      10.757632  11.037859
        0-47    15.345301  14.688764      14.926520  15.310053
        0-63    17.064907  17.066812      17.613416  17.874511
 2      0-79    11.768764  21.650749      22.689120  22.566508
        0-95    10.616812  26.878789      28.434703  28.320324
        0-111   10.151693  31.397803      31.771773  32.388122
        0-127    9.948502  33.139336      34.875716  35.224548

 * P10 DD1 - 4s (not homogeneous) 352 threads

                                               "noirqdebug"
                        Mint/s                    Mint/s
 chips  cpus      IPI/sys   IPI/chip       IPI/chip    IPI/sys
 --------------------------------------------------------------
 1      0-15     2.409402   2.364108       2.383303   2.395091
        0-31     6.028325   6.046075       6.089999   6.073750
        0-47     8.655178   8.644531       8.712830   8.724702
        0-63    11.629652  11.735953      12.088203  12.055979
        0-79    14.392321  14.729959      14.986701  14.973073
        0-95    12.604158  13.004034      17.528748  17.568095
 2      0-111    9.767753  13.719831      19.968606  20.024218
        0-127    6.744566  16.418854      22.898066  22.995110
        0-143    6.005699  19.174421      25.425622  25.417541
        0-159    5.649719  21.938836      27.952662  28.059603
        0-175    5.441410  24.109484      31.133915  31.127996
 3      0-191    5.318341  24.405322      33.999221  33.775354
        0-207    5.191382  26.449769      36.050161  35.867307
        0-223    5.102790  29.356943      39.544135  39.508169
        0-239    5.035295  31.933051      42.135075  42.071975
        0-255    4.969209  34.477367      44.655395  44.757074
 4      0-271    4.907652  35.887016      47.080545  47.318537
        0-287    4.839581  38.076137      50.464307  50.636219
        0-303    4.786031  40.881319      53.478684  53.310759
        0-319    4.743750  43.448424      56.388102  55.973969
        0-335    4.709936  45.623532      59.400930  58.926857
        0-351    4.681413  45.646151      62.035804  61.830057

[*] https://github.com/antonblanchard/ipistorm

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 arch/powerpc/sysdev/xive/xive-internal.h |  2 --
 arch/powerpc/sysdev/xive/common.c        | 39 ++++++++++++++++++------
 2 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h
index 9cf57c722faa..b3a456fdd3a5 100644
--- a/arch/powerpc/sysdev/xive/xive-internal.h
+++ b/arch/powerpc/sysdev/xive/xive-internal.h
@@ -5,8 +5,6 @@
 #ifndef __XIVE_INTERNAL_H
 #define __XIVE_INTERNAL_H
 
-#define XIVE_IPI_HW_IRQ		0 /* interrupt source # for IPIs */
-
 /*
  * A "disabled" interrupt should never fire, to catch problems
  * we set its logical number to this
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 30034af4462b..a1e61a5cf927 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -65,8 +65,16 @@ static struct irq_domain *xive_irq_domain;
 #ifdef CONFIG_SMP
 static struct irq_domain *xive_ipi_irq_domain;
 
-/* The IPIs all use the same logical irq number */
-static u32 xive_ipi_irq;
+/* The IPIs use the same logical irq number when on the same chip */
+static struct xive_ipi_desc {
+	unsigned int irq;
+	char name[8]; /* enough bytes to fit IPI-XXX */
+} *xive_ipis;
+
+static unsigned int xive_ipi_cpu_to_irq(unsigned int cpu)
+{
+	return xive_ipis[cpu_to_node(cpu)].irq;
+}
 #endif
 
 /* Xive state for each CPU */
@@ -1087,25 +1095,36 @@ static const struct irq_domain_ops xive_ipi_irq_domain_ops = {
 
 static void __init xive_request_ipi(void)
 {
-	unsigned int virq;
+	unsigned int node;
 
-	xive_ipi_irq_domain = irq_domain_add_linear(NULL, 1,
+	xive_ipi_irq_domain = irq_domain_add_linear(NULL, nr_node_ids,
 						    &xive_ipi_irq_domain_ops, NULL);
 	if (WARN_ON(xive_ipi_irq_domain == NULL))
 		return;
 
-	/* Initialize it */
-	virq = irq_create_mapping(xive_ipi_irq_domain, XIVE_IPI_HW_IRQ);
-	xive_ipi_irq = virq;
+	xive_ipis = kcalloc(nr_node_ids, sizeof(*xive_ipis), GFP_KERNEL | __GFP_NOFAIL);
+	for_each_node(node) {
+		struct xive_ipi_desc *xid = &xive_ipis[node];
+		irq_hw_number_t node_ipi_hwirq = node;
+
+		/*
+		 * Map one IPI interrupt per node for all cpus of that node.
+		 * Since the HW interrupt number doesn't have any meaning,
+		 * simply use the node number.
+		 */
+		xid->irq = irq_create_mapping(xive_ipi_irq_domain, node_ipi_hwirq);
+		snprintf(xid->name, sizeof(xid->name), "IPI-%d", node);
 
-	WARN_ON(request_irq(virq, xive_muxed_ipi_action,
-			    IRQF_PERCPU | IRQF_NO_THREAD, "IPI", NULL));
+		WARN_ON(request_irq(xid->irq, xive_muxed_ipi_action,
+				    IRQF_PERCPU | IRQF_NO_THREAD, xid->name, NULL));
+	}
 }
 
 static int xive_setup_cpu_ipi(unsigned int cpu)
 {
 	struct xive_cpu *xc;
 	int rc;
+	unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu);
 
 	pr_debug("Setting up IPI for CPU %d\n", cpu);
 
@@ -1146,6 +1165,8 @@ static int xive_setup_cpu_ipi(unsigned int cpu)
 
 static void xive_cleanup_cpu_ipi(unsigned int cpu, struct xive_cpu *xc)
 {
+	unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu);
+
 	/* Disable the IPI and free the IRQ data */
 
 	/* Already cleaned up ? */
-- 
2.26.2


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox