linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 1/2] powerpc: add ISA v3.0 / v3.1 wait opcode macro
@ 2022-07-20 11:03 Nicholas Piggin
  2022-07-20 11:03 ` [PATCH v2 2/2] powerpc/64s: Make POWER10 and later use pause_short in cpu_relax loops Nicholas Piggin
  0 siblings, 1 reply; 2+ messages in thread
From: Nicholas Piggin @ 2022-07-20 11:03 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

The wait instruction encoding changed between ISA v2.07 and ISA v3.0.
In v3.1 the instruction gained a new field.

Update the PPC_WAIT macro to the current encoding. Rename the older
incompatible one with a _v203 suffix as it was introduced in v2.03
(the WC field was introduced in v2.07 but the kernel only uses WC=0).

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
v2: Update naming, patch changelog and title.

 arch/powerpc/include/asm/ppc-opcode.h | 7 +++++--
 arch/powerpc/kernel/idle_book3e.S     | 2 +-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 89beabf5325c..856555d69d93 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -331,6 +331,7 @@
 #define __PPC_XSP(s)	((((s) & 0x1e) | (((s) >> 5) & 0x1)) << 21)
 #define __PPC_XTP(s)	__PPC_XSP(s)
 #define __PPC_T_TLB(t)	(((t) & 0x3) << 21)
+#define __PPC_PL(p)	(((p) & 0x3) << 16)
 #define __PPC_WC(w)	(((w) & 0x3) << 21)
 #define __PPC_WS(w)	(((w) & 0x1f) << 11)
 #define __PPC_SH(s)	__PPC_WS(s)
@@ -398,7 +399,8 @@
 #define PPC_RAW_RFDI			(0x4c00004e)
 #define PPC_RAW_RFMCI			(0x4c00004c)
 #define PPC_RAW_TLBILX(t, a, b)		(0x7c000024 | __PPC_T_TLB(t) | 	__PPC_RA0(a) | __PPC_RB(b))
-#define PPC_RAW_WAIT(w)			(0x7c00007c | __PPC_WC(w))
+#define PPC_RAW_WAIT_v203		(0x7c00007c)
+#define PPC_RAW_WAIT(w, p)		(0x7c00003c | __PPC_WC(w) | __PPC_PL(p))
 #define PPC_RAW_TLBIE(lp, a)		(0x7c000264 | ___PPC_RB(a) | ___PPC_RS(lp))
 #define PPC_RAW_TLBIE_5(rb, rs, ric, prs, r) \
 	(0x7c000264 | ___PPC_RB(rb) | ___PPC_RS(rs) | ___PPC_RIC(ric) | ___PPC_PRS(prs) | ___PPC_R(r))
@@ -613,7 +615,8 @@
 #define PPC_TLBILX_ALL(a, b)	PPC_TLBILX(0, a, b)
 #define PPC_TLBILX_PID(a, b)	PPC_TLBILX(1, a, b)
 #define PPC_TLBILX_VA(a, b)	PPC_TLBILX(3, a, b)
-#define PPC_WAIT(w)		stringify_in_c(.long PPC_RAW_WAIT(w))
+#define PPC_WAIT_v203(w)	stringify_in_c(.long PPC_RAW_WAIT_v203)
+#define PPC_WAIT(w, p)		stringify_in_c(.long PPC_RAW_WAIT(w, p))
 #define PPC_TLBIE(lp, a) 	stringify_in_c(.long PPC_RAW_TLBIE(lp, a))
 #define	PPC_TLBIE_5(rb, rs, ric, prs, r) \
 				stringify_in_c(.long PPC_RAW_TLBIE_5(rb, rs, ric, prs, r))
diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S
index cc008de58b05..6447de51ea71 100644
--- a/arch/powerpc/kernel/idle_book3e.S
+++ b/arch/powerpc/kernel/idle_book3e.S
@@ -77,7 +77,7 @@ _GLOBAL(\name)
 
 .macro BOOK3E_IDLE_LOOP
 1:
-	PPC_WAIT(0)
+	PPC_WAIT_v203
 	b	1b
 .endm
 
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* [PATCH v2 2/2] powerpc/64s: Make POWER10 and later use pause_short in cpu_relax loops
  2022-07-20 11:03 [PATCH v2 1/2] powerpc: add ISA v3.0 / v3.1 wait opcode macro Nicholas Piggin
@ 2022-07-20 11:03 ` Nicholas Piggin
  0 siblings, 0 replies; 2+ messages in thread
From: Nicholas Piggin @ 2022-07-20 11:03 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

We want to move away from using SMT prioroty updates for cpu_relax, and
use a 'wait' instruction which is similar to x86. As well as being a
much better fit for what everybody else uses and tests with, priority
nops are stateful which is nasty (interrupts have to consider they might
be taken at a different priority), and they're expensive to execute,
similar to a mtSPR which can effect other threads in the pipe.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
Unfortunately qemu TCG does not emulate pause_short properly and will
cause hangs. I have a patch for it but not merged yet. But if we tune
qspinlock code it would be best to do it with this patch.

Thanks,
Nick

 arch/powerpc/include/asm/processor.h      | 30 +++++++++++++++++++----
 arch/powerpc/include/asm/vdso/processor.h | 10 +++++++-
 2 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index fdfaae194ddd..61f16515cbe0 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -355,11 +355,31 @@ static inline unsigned long __pack_fe01(unsigned int fpmode)
 
 #ifdef CONFIG_PPC64
 
-#define spin_begin()	HMT_low()
-
-#define spin_cpu_relax()	barrier()
-
-#define spin_end()	HMT_medium()
+#define spin_begin()							\
+do {									\
+	asm volatile(ASM_FTR_IFCLR(					\
+		"or 1,1,1", /* HMT_LOW */				\
+		"nop",/* POWER10 onward uses pause_short (wait 2,0) */	\
+				%0) :: "i" (CPU_FTR_ARCH_31) : "memory"); \
+} while (0)
+
+#define spin_cpu_relax()						\
+do {									\
+	asm volatile(ASM_FTR_IFCLR(					\
+		/* Pre-POWER10 uses low ; medium priority nops */	\
+		"nop",							\
+		/* POWER10 onward uses pause_short (wait 2,0) */	\
+		PPC_WAIT_BOOKS(2, 0),					\
+				%0) :: "i" (CPU_FTR_ARCH_31) : "memory"); \
+} while (0)
+
+#define spin_end()							\
+do {									\
+	asm volatile(ASM_FTR_IFCLR(					\
+		"or 2,2,2", /* HMT_MEDIUM */				\
+		"nop",/* POWER10 onward uses pause_short (wait 2,0) */	\
+				%0) :: "i" (CPU_FTR_ARCH_31) : "memory"); \
+} while (0)
 
 #endif
 
diff --git a/arch/powerpc/include/asm/vdso/processor.h b/arch/powerpc/include/asm/vdso/processor.h
index 8d79f994b4aa..1116230ebb08 100644
--- a/arch/powerpc/include/asm/vdso/processor.h
+++ b/arch/powerpc/include/asm/vdso/processor.h
@@ -22,7 +22,15 @@
 #endif
 
 #ifdef CONFIG_PPC64
-#define cpu_relax()	do { HMT_low(); HMT_medium(); barrier(); } while (0)
+#define cpu_relax()							\
+do {									\
+	asm volatile(ASM_FTR_IFCLR(					\
+		/* Pre-POWER10 uses low ; medium priority nops */	\
+		"or 1,1,1 ; or 2,2,2",					\
+		/* POWER10 onward uses pause_short (wait 2,0) */	\
+		PPC_WAIT_BOOKS(2, 0),					\
+				%0) :: "i" (CPU_FTR_ARCH_31) : "memory"); \
+} while (0)
 #else
 #define cpu_relax()	barrier()
 #endif
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2022-07-20 11:04 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-07-20 11:03 [PATCH v2 1/2] powerpc: add ISA v3.0 / v3.1 wait opcode macro Nicholas Piggin
2022-07-20 11:03 ` [PATCH v2 2/2] powerpc/64s: Make POWER10 and later use pause_short in cpu_relax loops Nicholas Piggin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).