* [PATCH v3 13/16] powerpc/32: implement fast entry for syscalls on non BOOKE
From: Christophe Leroy @ 2019-04-30 12:39 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
Nicholas Piggin
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1556627571.git.christophe.leroy@c-s.fr>
This patch implements a fast entry for syscalls.
Syscalls don't have to preserve non volatile registers except LR.
This patch then implement a fast entry for syscalls, where
volatile registers get clobbered.
As this entry is dedicated to syscall it always sets MSR_EE
and warns in case MSR_EE was previously off
It also assumes that the call is always from user, system calls are
unexpected from kernel.
The overall series improves null_syscall selftest by 12,5% on an 83xx
and by 17% on a 8xx.
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/kernel/entry_32.S | 32 ++++++++++++++++
arch/powerpc/kernel/head_32.S | 3 +-
arch/powerpc/kernel/head_32.h | 85 ++++++++++++++++++++++++++++++++++++++++--
arch/powerpc/kernel/head_40x.S | 3 +-
arch/powerpc/kernel/head_8xx.S | 3 +-
5 files changed, 116 insertions(+), 10 deletions(-)
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 0c555f9f1543..184cc1de2f37 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -342,6 +342,35 @@ stack_ovf:
SYNC
RFI
+#ifndef CONFIG_BOOKE /* to be removed once BOOKE uses fast syscall entry */
+#ifdef CONFIG_TRACE_IRQFLAGS
+trace_syscall_entry_irq_off:
+ /*
+ * Syscall shouldn't happen while interrupts are disabled,
+ * so let's do a warning here.
+ */
+0: trap
+ EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
+ bl trace_hardirqs_on
+
+ /* Now enable for real */
+ LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE)
+ mtmsr r10
+
+ REST_GPR(0, r1)
+ REST_4GPRS(3, r1)
+ REST_2GPRS(7, r1)
+ b DoSyscall
+#endif /* CONFIG_TRACE_IRQFLAGS */
+
+ .globl transfer_to_syscall
+transfer_to_syscall:
+#ifdef CONFIG_TRACE_IRQFLAGS
+ andi. r12,r9,MSR_EE
+ beq- trace_syscall_entry_irq_off
+#endif /* CONFIG_TRACE_IRQFLAGS */
+#endif /* !CONFIG_BOOKE */
+
/*
* Handle a system call.
*/
@@ -353,9 +382,11 @@ _GLOBAL(DoSyscall)
stw r3,ORIG_GPR3(r1)
li r12,0
stw r12,RESULT(r1)
+#ifdef CONFIG_BOOKE /* to be removed once BOOKE uses fast syscall entry */
lwz r11,_CCR(r1) /* Clear SO bit in CR */
rlwinm r11,r11,0,4,2
stw r11,_CCR(r1)
+#endif
#ifdef CONFIG_TRACE_IRQFLAGS
/* Make sure interrupts are enabled */
mfmsr r11
@@ -1219,6 +1250,7 @@ load_dbcr0:
.section .bss
.align 4
+ .global global_dbcr0
global_dbcr0:
.space 8*NR_CPUS
.previous
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 2404c39373d3..f1da8fef726a 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -370,8 +370,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE)
. = 0xc00
DO_KVM 0xc00
SystemCall:
- EXCEPTION_PROLOG
- EXC_XFER_SYS(0xc00, DoSyscall)
+ SYSCALL_ENTRY 0xc00
/* Single step - not used on 601 */
EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 14cb0af2f494..4a692553651f 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -73,6 +73,87 @@
SAVE_2GPRS(7, r11)
.endm
+.macro SYSCALL_ENTRY trapno
+ mfspr r12,SPRN_SPRG_THREAD
+ mfcr r10
+ lwz r11,TASK_STACK-THREAD(r12)
+ mflr r9
+ addi r11,r11,THREAD_SIZE - INT_FRAME_SIZE
+ rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */
+ tophys(r11,r11)
+ stw r10,_CCR(r11) /* save registers */
+ mfspr r10,SPRN_SRR0
+ stw r9,_LINK(r11)
+ mfspr r9,SPRN_SRR1
+ stw r1,GPR1(r11)
+ stw r1,0(r11)
+ tovirt(r1,r11) /* set new kernel sp */
+ stw r10,_NIP(r11)
+#ifdef CONFIG_40x
+ rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
+#else
+ LOAD_MSR_KERNEL(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */
+ MTMSRD(r10) /* (except for mach check in rtas) */
+#endif
+ lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
+ stw r2,GPR2(r11)
+ addi r10,r10,STACK_FRAME_REGS_MARKER@l
+ stw r9,_MSR(r11)
+ li r2, \trapno + 1
+ stw r10,8(r11)
+ stw r2,_TRAP(r11)
+ SAVE_GPR(0, r11)
+ SAVE_4GPRS(3, r11)
+ SAVE_2GPRS(7, r11)
+ addi r11,r1,STACK_FRAME_OVERHEAD
+ addi r2,r12,-THREAD
+ stw r11,PT_REGS(r12)
+#if defined(CONFIG_40x)
+ /* Check to see if the dbcr0 register is set up to debug. Use the
+ internal debug mode bit to do this. */
+ lwz r12,THREAD_DBCR0(r12)
+ andis. r12,r12,DBCR0_IDM@h
+#endif
+ ACCOUNT_CPU_USER_ENTRY(r2, r11, r12)
+#if defined(CONFIG_40x)
+ beq+ 3f
+ /* From user and task is ptraced - load up global dbcr0 */
+ li r12,-1 /* clear all pending debug events */
+ mtspr SPRN_DBSR,r12
+ lis r11,global_dbcr0@ha
+ tophys(r11,r11)
+ addi r11,r11,global_dbcr0@l
+ lwz r12,0(r11)
+ mtspr SPRN_DBCR0,r12
+ lwz r12,4(r11)
+ addi r12,r12,-1
+ stw r12,4(r11)
+#endif
+
+3:
+ tovirt(r2, r2) /* set r2 to current */
+ lis r11, transfer_to_syscall@h
+ ori r11, r11, transfer_to_syscall@l
+#ifdef CONFIG_TRACE_IRQFLAGS
+ /*
+ * If MSR is changing we need to keep interrupts disabled at this point
+ * otherwise we might risk taking an interrupt before we tell lockdep
+ * they are enabled.
+ */
+ LOAD_MSR_KERNEL(r10, MSR_KERNEL)
+ rlwimi r10, r9, 0, MSR_EE
+#else
+ LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE)
+#endif
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
+ mtspr SPRN_NRI, r0
+#endif
+ mtspr SPRN_SRR1,r10
+ mtspr SPRN_SRR0,r11
+ SYNC
+ RFI /* jump to handler, enable MMU */
+.endm
+
/*
* Note: code which follows this uses cr0.eq (set if from kernel),
* r11, r12 (SRR0), and r9 (SRR1).
@@ -119,8 +200,4 @@
EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \
ret_from_except)
-#define EXC_XFER_SYS(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL | MSR_EE, transfer_to_handler, \
- ret_from_except)
-
#endif /* __HEAD_32_H__ */
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index b68de183faf1..e115248edda1 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -348,8 +348,7 @@ _ENTRY(saved_ksp_limit)
/* 0x0C00 - System Call Exception */
START_EXCEPTION(0x0C00, SystemCall)
- EXCEPTION_PROLOG
- EXC_XFER_SYS(0xc00, DoSyscall)
+ SYSCALL_ENTRY 0xc00
EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_STD)
EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_STD)
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 4ebcdfdae920..a5826defad1f 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -185,8 +185,7 @@ Alignment:
/* System call */
. = 0xc00
SystemCall:
- EXCEPTION_PROLOG
- EXC_XFER_SYS(0xc00, DoSyscall)
+ SYSCALL_ENTRY 0xc00
/* Single step - not used on 601 */
EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
--
2.13.3
^ permalink raw reply related
* [PATCH v3 12/16] powerpc: Fix 32-bit handling of MSR_EE on exceptions
From: Christophe Leroy @ 2019-04-30 12:39 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
Nicholas Piggin
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1556627571.git.christophe.leroy@c-s.fr>
[text mostly copied from benh's RFC/WIP]
ppc32 are still doing something rather gothic and wrong on 32-bit
which we stopped doing on 64-bit a while ago.
We have that thing where some handlers "copy" the EE value from the
original stack frame into the new MSR before transferring to the
handler.
Thus for a number of exceptions, we enter the handlers with interrupts
enabled.
This is rather fishy, some of the stuff that handlers might do early
on such as irq_enter/exit or user_exit, context tracking, etc...
should be run with interrupts off afaik.
Generally our handlers know when to re-enable interrupts if needed.
The problem we were having is that we assumed these interrupts would
return with interrupts enabled. However that isn't the case.
Instead, this patch changes things so that we always enter exception
handlers with interrupts *off* with the notable exception of syscalls
which are special (and get a fast path).
Suggested-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/kernel/entry_32.S | 116 ++++++++++++++++++++++++-----------------
1 file changed, 67 insertions(+), 49 deletions(-)
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index d0cea3deb86c..0c555f9f1543 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -37,6 +37,7 @@
#include <asm/feature-fixups.h>
#include <asm/barrier.h>
#include <asm/kup.h>
+#include <asm/bug.h>
#include "head_32.h"
@@ -206,19 +207,42 @@ transfer_to_handler_cont:
mtspr SPRN_NRI, r0
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
+ /*
+ * When tracing IRQ state (lockdep) we enable the MMU before we call
+ * the IRQ tracing functions as they might access vmalloc space or
+ * perform IOs for console output.
+ *
+ * To speed up the syscall path where interrupts stay on, let's check
+ * first if we are changing the MSR value at all.
+ */
+ tophys(r12, r1)
+ lwz r12,_MSR(r12)
+ xor r12,r10,r12
+ andi. r12,r12,MSR_EE
+ bne 1f
+
+ /* MSR isn't changing, just transition directly */
+#endif
+ mtspr SPRN_SRR0,r11
+ mtspr SPRN_SRR1,r10
+ mtlr r9
+ SYNC
+ RFI /* jump to handler, enable MMU */
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+1: /* MSR is changing, re-enable MMU so we can notify lockdep. We need to
+ * keep interrupts disabled at this point otherwise we might risk
+ * taking an interrupt before we tell lockdep they are enabled.
+ */
lis r12,reenable_mmu@h
ori r12,r12,reenable_mmu@l
+ LOAD_MSR_KERNEL(r0, MSR_KERNEL)
mtspr SPRN_SRR0,r12
- mtspr SPRN_SRR1,r10
+ mtspr SPRN_SRR1,r0
SYNC
RFI
-reenable_mmu: /* re-enable mmu so we can */
- mfmsr r10
- lwz r12,_MSR(r1)
- xor r10,r10,r12
- andi. r10,r10,MSR_EE /* Did EE change? */
- beq 1f
+reenable_mmu:
/*
* The trace_hardirqs_off will use CALLER_ADDR0 and CALLER_ADDR1.
* If from user mode there is only one stack frame on the stack, and
@@ -233,14 +257,24 @@ reenable_mmu: /* re-enable mmu so we can */
* they aren't useful past this point (aren't syscall arguments),
* the rest is restored from the exception frame.
*/
+
+ /* Are we enabling or disabling interrupts ? */
+ andi. r0,r10,MSR_EE
+
stwu r1,-32(r1)
stw r9,8(r1)
stw r11,12(r1)
stw r3,16(r1)
stw r4,20(r1)
stw r5,24(r1)
- bl trace_hardirqs_off
- lwz r5,24(r1)
+
+ bne- 0f
+
+ /* If we are disabling interrupts (normal case), simply log it with
+ * lockdep
+ */
+1: bl trace_hardirqs_off
+2: lwz r5,24(r1)
lwz r4,20(r1)
lwz r3,16(r1)
lwz r11,12(r1)
@@ -250,15 +284,22 @@ reenable_mmu: /* re-enable mmu so we can */
lwz r6,GPR6(r1)
lwz r7,GPR7(r1)
lwz r8,GPR8(r1)
-1: mtctr r11
+ mtctr r11
mtlr r9
bctr /* jump to handler */
-#else /* CONFIG_TRACE_IRQFLAGS */
- mtspr SPRN_SRR0,r11
- mtspr SPRN_SRR1,r10
- mtlr r9
- SYNC
- RFI /* jump to handler, enable MMU */
+
+ /* If we are enabling interrupt, this is a syscall. They shouldn't
+ * happen while interrupts are disabled, so let's do a warning here.
+ */
+0: trap
+ EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
+ bl trace_hardirqs_on
+
+ /* Now enable for real */
+ mfmsr r10
+ ori r10,r10,MSR_EE
+ mtmsr r10
+ b 2b
#endif /* CONFIG_TRACE_IRQFLAGS */
#if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
@@ -316,29 +357,13 @@ _GLOBAL(DoSyscall)
rlwinm r11,r11,0,4,2
stw r11,_CCR(r1)
#ifdef CONFIG_TRACE_IRQFLAGS
- /* Return from syscalls can (and generally will) hard enable
- * interrupts. You aren't supposed to call a syscall with
- * interrupts disabled in the first place. However, to ensure
- * that we get it right vs. lockdep if it happens, we force
- * that hard enable here with appropriate tracing if we see
- * that we have been called with interrupts off
- */
+ /* Make sure interrupts are enabled */
mfmsr r11
andi. r12,r11,MSR_EE
- bne+ 1f
- /* We came in with interrupts disabled, we enable them now */
- bl trace_hardirqs_on
- mfmsr r11
- lwz r0,GPR0(r1)
- lwz r3,GPR3(r1)
- lwz r4,GPR4(r1)
- ori r11,r11,MSR_EE
- lwz r5,GPR5(r1)
- lwz r6,GPR6(r1)
- lwz r7,GPR7(r1)
- lwz r8,GPR8(r1)
- mtmsr r11
-1:
+ /* We came in with interrupts disabled, we WARN and mark them enabled
+ * for lockdep now */
+0: tweqi r12, 0
+ EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
#endif /* CONFIG_TRACE_IRQFLAGS */
lwz r11,TI_FLAGS(r2)
andi. r11,r11,_TIF_SYSCALL_DOTRACE
@@ -392,8 +417,7 @@ syscall_exit_cont:
lwz r8,_MSR(r1)
#ifdef CONFIG_TRACE_IRQFLAGS
/* If we are going to return from the syscall with interrupts
- * off, we trace that here. It shouldn't happen though but we
- * want to catch the bugger if it does right ?
+ * off, we trace that here. It shouldn't normally happen.
*/
andi. r10,r8,MSR_EE
bne+ 1f
@@ -918,13 +942,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
* off in this assembly code while peeking at TI_FLAGS() and such. However
* we need to inform it if the exception turned interrupts off, and we
* are about to trun them back on.
- *
- * The problem here sadly is that we don't know whether the exceptions was
- * one that turned interrupts off or not. So we always tell lockdep about
- * turning them on here when we go back to wherever we came from with EE
- * on, even if that may meen some redudant calls being tracked. Maybe later
- * we could encode what the exception did somewhere or test the exception
- * type in the pt_regs but that sounds overkill
*/
andi. r10,r9,MSR_EE
beq 1f
@@ -1212,9 +1229,10 @@ do_work: /* r10 contains MSR_KERNEL here */
beq do_user_signal
do_resched: /* r10 contains MSR_KERNEL here */
- /* Note: We don't need to inform lockdep that we are enabling
- * interrupts here. As far as it knows, they are already enabled
- */
+#ifdef CONFIG_TRACE_IRQFLAGS
+ bl trace_hardirqs_on
+ mfmsr r10
+#endif
ori r10,r10,MSR_EE
SYNC
MTMSRD(r10) /* hard-enable interrupts */
--
2.13.3
^ permalink raw reply related
* [PATCH v3 15/16] powerpc/32: don't do syscall stuff in transfer_to_handler
From: Christophe Leroy @ 2019-04-30 12:39 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
Nicholas Piggin
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1556627571.git.christophe.leroy@c-s.fr>
As syscalls are now handled via a fast entry path, syscall related
actions can be removed from the generic transfer_to_handler path.
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/kernel/entry_32.S | 19 -------------------
1 file changed, 19 deletions(-)
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index dc58fec51ed6..e65c3e70c648 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -217,7 +217,6 @@ transfer_to_handler_cont:
*/
tophys(r12, r1)
lwz r12,_MSR(r12)
- xor r12,r10,r12
andi. r12,r12,MSR_EE
bne 1f
@@ -258,9 +257,6 @@ reenable_mmu:
* the rest is restored from the exception frame.
*/
- /* Are we enabling or disabling interrupts ? */
- andi. r0,r10,MSR_EE
-
stwu r1,-32(r1)
stw r9,8(r1)
stw r11,12(r1)
@@ -268,8 +264,6 @@ reenable_mmu:
stw r4,20(r1)
stw r5,24(r1)
- bne- 0f
-
/* If we are disabling interrupts (normal case), simply log it with
* lockdep
*/
@@ -287,19 +281,6 @@ reenable_mmu:
mtctr r11
mtlr r9
bctr /* jump to handler */
-
- /* If we are enabling interrupt, this is a syscall. They shouldn't
- * happen while interrupts are disabled, so let's do a warning here.
- */
-0: trap
- EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
- bl trace_hardirqs_on
-
- /* Now enable for real */
- mfmsr r10
- ori r10,r10,MSR_EE
- mtmsr r10
- b 2b
#endif /* CONFIG_TRACE_IRQFLAGS */
#if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
--
2.13.3
^ permalink raw reply related
* [PATCH v3 14/16] powerpc/32: implement fast entry for syscalls on BOOKE
From: Christophe Leroy @ 2019-04-30 12:39 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
Nicholas Piggin
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1556627571.git.christophe.leroy@c-s.fr>
This patch implements a fast entry for syscalls.
Syscalls don't have to preserve non volatile registers except LR.
This patch then implement a fast entry for syscalls, where
volatile registers get clobbered.
As this entry is dedicated to syscall it always sets MSR_EE
and warns in case MSR_EE was previously off
It also assumes that the call is always from user, system calls are
unexpected from kernel.
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/kernel/entry_32.S | 7 ---
arch/powerpc/kernel/head_44x.S | 3 +-
arch/powerpc/kernel/head_booke.h | 103 +++++++++++++++++++++++++++++++++--
arch/powerpc/kernel/head_fsl_booke.S | 3 +-
4 files changed, 100 insertions(+), 16 deletions(-)
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 184cc1de2f37..dc58fec51ed6 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -342,7 +342,6 @@ stack_ovf:
SYNC
RFI
-#ifndef CONFIG_BOOKE /* to be removed once BOOKE uses fast syscall entry */
#ifdef CONFIG_TRACE_IRQFLAGS
trace_syscall_entry_irq_off:
/*
@@ -369,7 +368,6 @@ transfer_to_syscall:
andi. r12,r9,MSR_EE
beq- trace_syscall_entry_irq_off
#endif /* CONFIG_TRACE_IRQFLAGS */
-#endif /* !CONFIG_BOOKE */
/*
* Handle a system call.
@@ -382,11 +380,6 @@ _GLOBAL(DoSyscall)
stw r3,ORIG_GPR3(r1)
li r12,0
stw r12,RESULT(r1)
-#ifdef CONFIG_BOOKE /* to be removed once BOOKE uses fast syscall entry */
- lwz r11,_CCR(r1) /* Clear SO bit in CR */
- rlwinm r11,r11,0,4,2
- stw r11,_CCR(r1)
-#endif
#ifdef CONFIG_TRACE_IRQFLAGS
/* Make sure interrupts are enabled */
mfmsr r11
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index e06cb1c84951..7d73c7e39afe 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -282,8 +282,7 @@ interrupt_base:
#endif
/* System Call Interrupt */
START_EXCEPTION(SystemCall)
- NORMAL_EXCEPTION_PROLOG(BOOKE_INTERRUPT_SYSCALL)
- EXC_XFER_SYS(0x0c00, DoSyscall)
+ SYSCALL_ENTRY 0xc00 BOOKE_INTERRUPT_SYSCALL
/* Auxiliary Processor Unavailable Interrupt */
EXCEPTION(0x2020, BOOKE_INTERRUPT_AP_UNAVAIL, \
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 56dd1341eb3d..bfeb469e8106 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -6,6 +6,8 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_booke_hv_asm.h>
+#ifdef __ASSEMBLY__
+
/*
* Macros used for common Book-e exception handling
*/
@@ -81,6 +83,101 @@ END_BTB_FLUSH_SECTION
SAVE_4GPRS(3, r11); \
SAVE_2GPRS(7, r11)
+.macro SYSCALL_ENTRY trapno intno
+ mfspr r10, SPRN_SPRG_THREAD
+#ifdef CONFIG_KVM_BOOKE_HV
+BEGIN_FTR_SECTION
+ mtspr SPRN_SPRG_WSCRATCH0, r10
+ stw r11, THREAD_NORMSAVE(0)(r10)
+ stw r13, THREAD_NORMSAVE(2)(r10)
+ mfcr r13 /* save CR in r13 for now */
+ mfspr r11, SPRN_SRR1
+ mtocrf 0x80, r11 /* check MSR[GS] without clobbering reg */
+ bf 3, 1975f
+ b kvmppc_handler_BOOKE_INTERRUPT_\intno\()_SPRN_SRR1
+1975:
+ mr r12, r13
+ lwz r13, THREAD_NORMSAVE(2)(r10)
+FTR_SECTION_ELSE
+#endif
+ mfcr r12
+#ifdef CONFIG_KVM_BOOKE_HV
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
+#endif
+ BOOKE_CLEAR_BTB(r11)
+ lwz r11, TASK_STACK - THREAD(r10)
+ rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */
+ ALLOC_STACK_FRAME(r11, THREAD_SIZE - INT_FRAME_SIZE)
+ stw r12, _CCR(r11) /* save various registers */
+ mflr r12
+ stw r12,_LINK(r11)
+ mfspr r12,SPRN_SRR0
+ stw r1, GPR1(r11)
+ mfspr r9,SPRN_SRR1
+ stw r1, 0(r11)
+ mr r1, r11
+ stw r12,_NIP(r11)
+ rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
+ lis r12, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
+ stw r2,GPR2(r11)
+ addi r12, r12, STACK_FRAME_REGS_MARKER@l
+ stw r9,_MSR(r11)
+ li r2, \trapno + 1
+ stw r12, 8(r11)
+ stw r2,_TRAP(r11)
+ SAVE_GPR(0, r11)
+ SAVE_4GPRS(3, r11)
+ SAVE_2GPRS(7, r11)
+
+ addi r11,r1,STACK_FRAME_OVERHEAD
+ addi r2,r10,-THREAD
+ stw r11,PT_REGS(r10)
+ /* Check to see if the dbcr0 register is set up to debug. Use the
+ internal debug mode bit to do this. */
+ lwz r12,THREAD_DBCR0(r10)
+ andis. r12,r12,DBCR0_IDM@h
+ ACCOUNT_CPU_USER_ENTRY(r2, r11, r12)
+ beq+ 3f
+ /* From user and task is ptraced - load up global dbcr0 */
+ li r12,-1 /* clear all pending debug events */
+ mtspr SPRN_DBSR,r12
+ lis r11,global_dbcr0@ha
+ tophys(r11,r11)
+ addi r11,r11,global_dbcr0@l
+#ifdef CONFIG_SMP
+ lwz r9,TASK_CPU(r2)
+ slwi r9,r9,3
+ add r11,r11,r9
+#endif
+ lwz r12,0(r11)
+ mtspr SPRN_DBCR0,r12
+ lwz r12,4(r11)
+ addi r12,r12,-1
+ stw r12,4(r11)
+
+3:
+ tovirt(r2, r2) /* set r2 to current */
+ lis r11, transfer_to_syscall@h
+ ori r11, r11, transfer_to_syscall@l
+#ifdef CONFIG_TRACE_IRQFLAGS
+ /*
+ * If MSR is changing we need to keep interrupts disabled at this point
+ * otherwise we might risk taking an interrupt before we tell lockdep
+ * they are enabled.
+ */
+ lis r10, MSR_KERNEL@h
+ ori r10, r10, MSR_KERNEL@l
+ rlwimi r10, r9, 0, MSR_EE
+#else
+ lis r10, (MSR_KERNEL | MSR_EE)@h
+ ori r10, r10, (MSR_KERNEL | MSR_EE)@l
+#endif
+ mtspr SPRN_SRR1,r10
+ mtspr SPRN_SRR0,r11
+ SYNC
+ RFI /* jump to handler, enable MMU */
+.endm
+
/* To handle the additional exception priority levels on 40x and Book-E
* processors we allocate a stack per additional priority level.
*
@@ -245,10 +342,6 @@ END_BTB_FLUSH_SECTION
EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \
ret_from_except)
-#define EXC_XFER_SYS(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL | MSR_EE, transfer_to_handler, \
- ret_from_except)
-
/* Check for a single step debug exception while in an exception
* handler before state has been saved. This is to catch the case
* where an instruction that we are trying to single step causes
@@ -418,7 +511,7 @@ END_BTB_FLUSH_SECTION
1: addi r3,r1,STACK_FRAME_OVERHEAD; \
EXC_XFER_STD(0x800, kernel_fp_unavailable_exception)
-#ifndef __ASSEMBLY__
+#else /* __ASSEMBLY__ */
struct exception_regs {
unsigned long mas0;
unsigned long mas1;
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index b351851dc73d..a757be4f1cb5 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -410,8 +410,7 @@ interrupt_base:
/* System Call Interrupt */
START_EXCEPTION(SystemCall)
- NORMAL_EXCEPTION_PROLOG(SYSCALL)
- EXC_XFER_SYS(0x0c00, DoSyscall)
+ SYSCALL_ENTRY 0xc00 SYSCALL
/* Auxiliary Processor Unavailable Interrupt */
EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \
--
2.13.3
^ permalink raw reply related
* [PATCH v3 16/16] powerpc/32: Don't add dummy frames when calling trace_hardirqs_on/off
From: Christophe Leroy @ 2019-04-30 12:39 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
Nicholas Piggin
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1556627571.git.christophe.leroy@c-s.fr>
No need to add dummy frames when calling trace_hardirqs_on or
trace_hardirqs_off. GCC properly handles empty stacks.
In addition, powerpc doesn't set CONFIG_FRAME_POINTER, therefore
__builtin_return_address(1..) returns NULL at all time. So the
dummy frames are definitely unneeded here.
In the meantime, avoid reading memory for loading r1 with a value
we already know.
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
arch/powerpc/kernel/entry_32.S | 16 ++--------------
1 file changed, 2 insertions(+), 14 deletions(-)
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index e65c3e70c648..235a01d34b6d 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -243,12 +243,7 @@ transfer_to_handler_cont:
reenable_mmu:
/*
- * The trace_hardirqs_off will use CALLER_ADDR0 and CALLER_ADDR1.
- * If from user mode there is only one stack frame on the stack, and
- * accessing CALLER_ADDR1 will cause oops. So we need create a dummy
- * stack frame to make trace_hardirqs_off happy.
- *
- * This is handy because we also need to save a bunch of GPRs,
+ * We save a bunch of GPRs,
* r3 can be different from GPR3(r1) at this point, r9 and r11
* contains the old MSR and handler address respectively,
* r4 & r5 can contain page fault arguments that need to be passed
@@ -950,18 +945,11 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
*/
andi. r10,r9,MSR_EE
beq 1f
- /*
- * Since the ftrace irqsoff latency trace checks CALLER_ADDR1,
- * which is the stack frame here, we need to force a stack frame
- * in case we came from user space.
- */
stwu r1,-32(r1)
mflr r0
stw r0,4(r1)
- stwu r1,-32(r1)
bl trace_hardirqs_on
- lwz r1,0(r1)
- lwz r1,0(r1)
+ addi r1, r1, 32
lwz r9,_MSR(r1)
1:
#endif /* CONFIG_TRACE_IRQFLAGS */
--
2.13.3
^ permalink raw reply related
* [PATCH v2 stable v4.4 1/2] powerpc/fsl: Add FSL_PPC_BOOK3E as supported arch for nospectre_v2 boot arg
From: Diana Craciun @ 2019-04-30 12:42 UTC (permalink / raw)
To: stable, gregkh; +Cc: Diana Craciun, linuxppc-dev
commit e59f5bd759b7dee57593c5b6c0441609bda5d530 upstream.
Signed-off-by: Diana Craciun <diana.craciun@nxp.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
Documentation/kernel-parameters.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index da515c535e62..f0bdf78420a0 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2450,7 +2450,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
nohugeiomap [KNL,x86] Disable kernel huge I/O mappings.
- nospectre_v2 [X86] Disable all mitigations for the Spectre variant 2
+ nospectre_v2 [X86,PPC_FSL_BOOK3E] Disable all mitigations for the Spectre variant 2
(indirect branch prediction) vulnerability. System may
allow data leaks with this option, which is equivalent
to spectre_v2=off.
--
2.17.1
^ permalink raw reply related
* [PATCH v2 stable v4.4 2/2] Documentation: Add nospectre_v1 parameter
From: Diana Craciun @ 2019-04-30 12:42 UTC (permalink / raw)
To: stable, gregkh; +Cc: Diana Craciun, linuxppc-dev
In-Reply-To: <1556628147-15687-1-git-send-email-diana.craciun@nxp.com>
commit 26cb1f36c43ee6e89d2a9f48a5a7500d5248f836 upstream.
Currently only supported on powerpc.
Signed-off-by: Diana Craciun <diana.craciun@nxp.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
Documentation/kernel-parameters.txt | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index f0bdf78420a0..3ff87d5d6fea 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2449,6 +2449,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
legacy floating-point registers on task switch.
nohugeiomap [KNL,x86] Disable kernel huge I/O mappings.
+
+ nospectre_v1 [PPC] Disable mitigations for Spectre Variant 1 (bounds
+ check bypass). With this option data leaks are possible
+ in the system.
nospectre_v2 [X86,PPC_FSL_BOOK3E] Disable all mitigations for the Spectre variant 2
(indirect branch prediction) vulnerability. System may
--
2.17.1
^ permalink raw reply related
* Re: [PATCH kernel v3] powerpc/powernv: Isolate NVLinks between GV100GL on Witherspoon
From: Alex Williamson @ 2019-04-30 13:20 UTC (permalink / raw)
To: Alexey Kardashevskiy
Cc: Reza Arbab, Jose Ricardo Ziviani, kvm, Sam Bobroff,
Alistair Popple, Daniel Henrique Barboza, kvm-ppc,
Piotr Jaroszynski, Leonardo Augusto Guimarães Garcia,
linuxppc-dev, David Gibson
In-Reply-To: <cf35d5be-d0a3-aef3-d5eb-c2318ef7d92b@ozlabs.ru>
On Tue, 30 Apr 2019 16:14:35 +1000
Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
> On 30/04/2019 15:45, Alistair Popple wrote:
> > Alexey,
> >
> >>>>> +void pnv_try_isolate_nvidia_v100(struct pci_dev *bridge)
> >>>>> +{
> >>>>> + u32 mask, val;
> >>>>> + void __iomem *bar0_0, *bar0_120000, *bar0_a00000;
> >>>>> + struct pci_dev *pdev;
> >>>>> + u16 cmd = 0, cmdmask = PCI_COMMAND_MEMORY;
> >>>>> +
> >>>>> + if (!bridge->subordinate)
> >>>>> + return;
> >>>>> +
> >>>>> + pdev = list_first_entry_or_null(&bridge->subordinate->devices,
> >>>>> + struct pci_dev, bus_list);
> >>>>> + if (!pdev)
> >>>>> + return;
> >>>>> +
> >>>>> + if (pdev->vendor != PCI_VENDOR_ID_NVIDIA)
> >
> > Don't you also need to check the PCIe devid to match only [PV]100 devices as
> > well? I doubt there's any guarantee these registers will remain the same for
> > all future (or older) NVIDIA devices.
>
>
> I do not have the complete list of IDs and I already saw 3 different
> device ids and this only works for machines with ibm,npu/gpu/nvlinks
> properties so for now it works and for the future we are hoping to
> either have an open source nvidia driver or some small minidriver (also
> from nvidia, or may be a spec allowing us to write one) to allow
> topology discovery on the host so we would not depend on the skiboot's
> powernv DT.
>
> > IMHO this should really be done in the device driver in the guest. A malcious
> > guest could load a modified driver that doesn't do this, but that should not
> > compromise other guests which presumably load a non-compromised driver that
> > disables the links on that guests GPU. However I guess in practice what you
> > have here should work equally well.
>
> Doing it in the guest means a good guest needs to have an updated
> driver, we do not really want to depend on this. The idea of IOMMU
> groups is that the hypervisor provides isolation irrespective to what
> the guest does.
+1 It's not the user/guest driver's responsibility to maintain the
isolation of the device. Thanks,
Alex
> Also vfio+qemu+slof needs to convey the nvlink topology to the guest,
> seems like an unnecessary complication.
>
>
>
> > - Alistair
> >
> >>>>> + return;
> >>>>> +
> >>>>> + mask = nvlinkgpu_get_disable_mask(&pdev->dev);
> >>>>> + if (!mask)
> >>>>> + return;
> >>>>> +
> >>>>> + bar0_0 = pci_iomap_range(pdev, 0, 0, 0x10000);
> >>>>> + if (!bar0_0) {
> >>>>> + pci_err(pdev, "Error mapping BAR0 @0\n");
> >>>>> + return;
> >>>>> + }
> >>>>> + bar0_120000 = pci_iomap_range(pdev, 0, 0x120000, 0x10000);
> >>>>> + if (!bar0_120000) {
> >>>>> + pci_err(pdev, "Error mapping BAR0 @120000\n");
> >>>>> + goto bar0_0_unmap;
> >>>>> + }
> >>>>> + bar0_a00000 = pci_iomap_range(pdev, 0, 0xA00000, 0x10000);
> >>>>> + if (!bar0_a00000) {
> >>>>> + pci_err(pdev, "Error mapping BAR0 @A00000\n");
> >>>>> + goto bar0_120000_unmap;
> >>>>> + }
> >>>>
> >>>> Is it really necessary to do three separate ioremaps vs one that would
> >>>> cover them all here? I suspect you're just sneaking in PAGE_SIZE with
> >>>> the 0x10000 size mappings anyway. Seems like it would simplify setup,
> >>>> error reporting, and cleanup to to ioremap to the PAGE_ALIGN'd range
> >>>> of the highest register accessed. Thanks,
> >>>
> >>> Sure I can map it once, I just do not see the point in mapping/unmapping
> >>> all 0xa10000>>16=161 system pages for a very short period of time while
> >>> we know precisely that we need just 3 pages.
> >>>
> >>> Repost?
> >>
> >> Ping?
> >>
> >> Can this go in as it is (i.e. should I ping Michael) or this needs
> >> another round? It would be nice to get some formal acks. Thanks,
> >>
> >>>> Alex
> >>>>
> >>>>> +
> >>>>> + pci_restore_state(pdev);
> >>>>> + pci_read_config_word(pdev, PCI_COMMAND, &cmd);
> >>>>> + if ((cmd & cmdmask) != cmdmask)
> >>>>> + pci_write_config_word(pdev, PCI_COMMAND, cmd | cmdmask);
> >>>>> +
> >>>>> + /*
> >>>>> + * The sequence is from "Tesla P100 and V100 SXM2 NVLink Isolation on
> >>>>> + * Multi-Tenant Systems".
> >>>>> + * The register names are not provided there either, hence raw values.
> >>>>> + */
> >>>>> + iowrite32(0x4, bar0_120000 + 0x4C);
> >>>>> + iowrite32(0x2, bar0_120000 + 0x2204);
> >>>>> + val = ioread32(bar0_0 + 0x200);
> >>>>> + val |= 0x02000000;
> >>>>> + iowrite32(val, bar0_0 + 0x200);
> >>>>> + val = ioread32(bar0_a00000 + 0x148);
> >>>>> + val |= mask;
> >>>>> + iowrite32(val, bar0_a00000 + 0x148);
> >>>>> +
> >>>>> + if ((cmd | cmdmask) != cmd)
> >>>>> + pci_write_config_word(pdev, PCI_COMMAND, cmd);
> >>>>> +
> >>>>> + pci_iounmap(pdev, bar0_a00000);
> >>>>> +bar0_120000_unmap:
> >>>>> + pci_iounmap(pdev, bar0_120000);
> >>>>> +bar0_0_unmap:
> >>>>> + pci_iounmap(pdev, bar0_0);
> >>>>> +}
> >
> >
>
^ permalink raw reply
* Re: [PATCH v2 stable v4.4 2/2] Documentation: Add nospectre_v1 parameter
From: Greg KH @ 2019-04-30 13:23 UTC (permalink / raw)
To: Diana Craciun; +Cc: linuxppc-dev, stable
In-Reply-To: <1556628147-15687-2-git-send-email-diana.craciun@nxp.com>
On Tue, Apr 30, 2019 at 03:42:27PM +0300, Diana Craciun wrote:
> commit 26cb1f36c43ee6e89d2a9f48a5a7500d5248f836 upstream.
>
> Currently only supported on powerpc.
>
> Signed-off-by: Diana Craciun <diana.craciun@nxp.com>
> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
> ---
> Documentation/kernel-parameters.txt | 4 ++++
> 1 file changed, 4 insertions(+)
>
> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
> index f0bdf78420a0..3ff87d5d6fea 100644
> --- a/Documentation/kernel-parameters.txt
> +++ b/Documentation/kernel-parameters.txt
> @@ -2449,6 +2449,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
> legacy floating-point registers on task switch.
>
> nohugeiomap [KNL,x86] Disable kernel huge I/O mappings.
> +
Trailing whitespace :(
Fix up your editor to flag this as RED or something. I'll go fix it
up...
^ permalink raw reply
* Re: [PATCH v2 stable v4.4 2/2] Documentation: Add nospectre_v1 parameter
From: Greg KH @ 2019-04-30 13:25 UTC (permalink / raw)
To: Diana Craciun; +Cc: linuxppc-dev, stable
In-Reply-To: <1556628147-15687-2-git-send-email-diana.craciun@nxp.com>
On Tue, Apr 30, 2019 at 03:42:27PM +0300, Diana Craciun wrote:
> commit 26cb1f36c43ee6e89d2a9f48a5a7500d5248f836 upstream.
>
> Currently only supported on powerpc.
>
> Signed-off-by: Diana Craciun <diana.craciun@nxp.com>
> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
> ---
> Documentation/kernel-parameters.txt | 4 ++++
> 1 file changed, 4 insertions(+)
>
> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
> index f0bdf78420a0..3ff87d5d6fea 100644
> --- a/Documentation/kernel-parameters.txt
> +++ b/Documentation/kernel-parameters.txt
> @@ -2449,6 +2449,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
> legacy floating-point registers on task switch.
>
> nohugeiomap [KNL,x86] Disable kernel huge I/O mappings.
> +
> + nospectre_v1 [PPC] Disable mitigations for Spectre Variant 1 (bounds
> + check bypass). With this option data leaks are possible
> + in the system.
>
> nospectre_v2 [X86,PPC_FSL_BOOK3E] Disable all mitigations for the Spectre variant 2
> (indirect branch prediction) vulnerability. System may
> --
> 2.17.1
>
Both of these patches needed to be added to a bunch of the stable trees,
so I've now done that.
thanks,
greg k-h
^ permalink raw reply
* Re: [PATCH 22/41] drivers: tty: serial: cpm_uart: fix logging calls
From: Andy Shevchenko @ 2019-04-30 14:10 UTC (permalink / raw)
To: Christophe Leroy
Cc: lorenzo.pieralisi, linux-ia64, macro, linuxppc-dev, andrew,
gregkh, slemieux.tyco, liviu.dudau, linux-kernel, linux-mips,
linux, matthias.bgg, khilman, linux-serial, sudeep.holla,
sparclinux, jacmet, linux-amlogic, vz,
Enrico Weigelt, metux IT consult, davem
In-Reply-To: <a00ba23b-e73e-c964-a6d0-347cb605b8c8@c-s.fr>
On Mon, Apr 29, 2019 at 05:59:04PM +0200, Christophe Leroy wrote:
> Le 27/04/2019 à 14:52, Enrico Weigelt, metux IT consult a écrit :
> > Fix checkpatch warnings by using pr_err():
> >
> > WARNING: Prefer [subsystem eg: netdev]_err([subsystem]dev, ... then dev_err(dev, ... then pr_err(... to printk(KERN_ERR ...
> > #109: FILE: drivers/tty/serial/cpm_uart/cpm_uart_cpm2.c:109:
> > + printk(KERN_ERR
> >
> > WARNING: Prefer [subsystem eg: netdev]_err([subsystem]dev, ... then dev_err(dev, ... then pr_err(... to printk(KERN_ERR ...
> > #128: FILE: drivers/tty/serial/cpm_uart/cpm_uart_cpm2.c:128:
> > + printk(KERN_ERR
> >
> > WARNING: Prefer [subsystem eg: netdev]_err([subsystem]dev, ... then dev_err(dev, ... then pr_err(... to printk(KERN_ERR ...
> > + printk(KERN_ERR
> >
> > WARNING: Prefer [subsystem eg: netdev]_err([subsystem]dev, ... then dev_err(dev, ... then pr_err(... to printk(KERN_ERR ...
> > + printk(KERN_ERR
> >
> > Signed-off-by: Enrico Weigelt <info@metux.net>
>
> Reviewed-by: Christophe Leroy <christophe.leroy@c-s.fr>
>
> But is that really worth doing those changes ?
>
> If we want to do something useful, wouldn't it make more sense to introduce
> the use of dev_err() in order to identify the faulting device in the message
> ?
+1 for switching to dev_*().
--
With Best Regards,
Andy Shevchenko
^ permalink raw reply
* [PATCH v2] powerpc/32s: fix BATs setting with CONFIG_STRICT_KERNEL_RWX
From: Christophe Leroy @ 2019-04-30 16:11 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
Serge Belyshev, Segher Boessenkool
Cc: linuxppc-dev, linux-kernel
Serge reported some crashes with CONFIG_STRICT_KERNEL_RWX enabled
on a book3s32 machine.
Analysis shows two issues:
- BATs addresses and sizes are not properly aligned.
- There is a gap between the last address covered by BATs and the
first address covered by pages.
Memory mapped with DBATs:
0: 0xc0000000-0xc07fffff 0x00000000 Kernel RO coherent
1: 0xc0800000-0xc0bfffff 0x00800000 Kernel RO coherent
2: 0xc0c00000-0xc13fffff 0x00c00000 Kernel RW coherent
3: 0xc1400000-0xc23fffff 0x01400000 Kernel RW coherent
4: 0xc2400000-0xc43fffff 0x02400000 Kernel RW coherent
5: 0xc4400000-0xc83fffff 0x04400000 Kernel RW coherent
6: 0xc8400000-0xd03fffff 0x08400000 Kernel RW coherent
7: 0xd0400000-0xe03fffff 0x10400000 Kernel RW coherent
Memory mapped with pages:
0xe1000000-0xefffffff 0x21000000 240M rw present dirty accessed
This patch fixes both issues. With the patch, we get the following
which is as expected:
Memory mapped with DBATs:
0: 0xc0000000-0xc07fffff 0x00000000 Kernel RO coherent
1: 0xc0800000-0xc0bfffff 0x00800000 Kernel RO coherent
2: 0xc0c00000-0xc0ffffff 0x00c00000 Kernel RW coherent
3: 0xc1000000-0xc1ffffff 0x01000000 Kernel RW coherent
4: 0xc2000000-0xc3ffffff 0x02000000 Kernel RW coherent
5: 0xc4000000-0xc7ffffff 0x04000000 Kernel RW coherent
6: 0xc8000000-0xcfffffff 0x08000000 Kernel RW coherent
7: 0xd0000000-0xdfffffff 0x10000000 Kernel RW coherent
Memory mapped with pages:
0xe0000000-0xefffffff 0x20000000 256M rw present dirty accessed
Reported-by: Serge Belyshev <belyshev@depni.sinp.msu.ru>
Fixes: 63b2bc619565 ("powerpc/mm/32s: Use BATs for STRICT_KERNEL_RWX")
Cc: stable@vger.kernel.org
Acked-by: Segher Boessenkool <segher@kernel.crashing.org>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
v2: Added comment to explain block_size() function as recommended by Segher.
arch/powerpc/mm/ppc_mmu_32.c | 18 ++++++++++++++----
1 file changed, 14 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index bf1de3ca39bc..afd8dcb11432 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -98,10 +98,20 @@ static int find_free_bat(void)
return -1;
}
+/*
+ * This function calculates the size of the larger block usable to map the
+ * beginning of an area based on the start address and size of that area:
+ * - max block size is 8M on 601 and 256 on other 6xx.
+ * - base address must be aligned to the block size. So the maximum block size
+ * is identified by the lowest bit set to 1 in the base address (for instance
+ * if base is 0x16000000, max size is 0x02000000).
+ * - block size has to be a power of two. This is calculated by finding the
+ * highest bit set to 1.
+ */
static unsigned int block_size(unsigned long base, unsigned long top)
{
unsigned int max_size = (cpu_has_feature(CPU_FTR_601) ? 8 : 256) << 20;
- unsigned int base_shift = (fls(base) - 1) & 31;
+ unsigned int base_shift = (ffs(base) - 1) & 31;
unsigned int block_shift = (fls(top - base) - 1) & 31;
return min3(max_size, 1U << base_shift, 1U << block_shift);
@@ -157,7 +167,7 @@ static unsigned long __init __mmu_mapin_ram(unsigned long base, unsigned long to
unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
{
- int done;
+ unsigned long done;
unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET;
if (__map_without_bats) {
@@ -169,10 +179,10 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
return __mmu_mapin_ram(base, top);
done = __mmu_mapin_ram(base, border);
- if (done != border - base)
+ if (done != border)
return done;
- return done + __mmu_mapin_ram(border, top);
+ return __mmu_mapin_ram(border, top);
}
void mmu_mark_initmem_nx(void)
--
2.13.3
^ permalink raw reply related
* Re: [PATCH v4] powerpc/pseries: Remove limit in wait for dying CPU
From: Nathan Lynch @ 2019-04-30 16:34 UTC (permalink / raw)
To: Thiago Jung Bauermann, linuxppc-dev
Cc: Gautham R Shenoy, linux-kernel, Nicholas Piggin,
Michael Bringmann, Tyrel Datwyler, Vaidyanathan Srinivasan,
Thiago Jung Bauermann
In-Reply-To: <20190423223914.3882-1-bauerman@linux.ibm.com>
Thiago Jung Bauermann <bauerman@linux.ibm.com> writes:
> This can be a problem because if the busy loop finishes too early, then the
> kernel may offline another CPU before the previous one finished dying,
> which would lead to two concurrent calls to rtas-stop-self, which is
> prohibited by the PAPR.
>
> Since the hotplug machinery already assumes that cpu_die() is going to
> work, we can simply loop until the CPU stops.
>
> Also change the loop to wait 100 µs between each call to
> smp_query_cpu_stopped() to avoid querying RTAS too often.
[...]
> diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> index 97feb6e79f1a..d75cee60644c 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> @@ -214,13 +214,17 @@ static void pseries_cpu_die(unsigned int cpu)
> msleep(1);
> }
> } else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) {
> -
> - for (tries = 0; tries < 25; tries++) {
> + /*
> + * rtas_stop_self() panics if the CPU fails to stop and our
> + * callers already assume that we are going to succeed, so we
> + * can just loop until the CPU stops.
> + */
> + while (true) {
> cpu_status = smp_query_cpu_stopped(pcpu);
> if (cpu_status == QCSS_STOPPED ||
> cpu_status == QCSS_HARDWARE_ERROR)
> break;
> - cpu_relax();
> + udelay(100);
> }
> }
I agree with looping indefinitely but doesn't it need a cond_resched()
or similar check?
^ permalink raw reply
* Re: [PATCH] powerpc/mm/radix: Fix kernel crash when running subpage protect test
From: Sachin Sant @ 2019-04-30 8:51 UTC (permalink / raw)
To: Aneesh Kumar K.V; +Cc: paulus, linuxppc-dev, npiggin
In-Reply-To: <20190430075907.7701-1-aneesh.kumar@linux.ibm.com>
[-- Attachment #1: Type: text/plain, Size: 1385 bytes --]
> On 30-Apr-2019, at 1:29 PM, Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> wrote:
>
> This patch fixes the below crash by making sure we touch the subpage protection
> related structures only if we know they are allocated on the platform. With
> radix translation we don't allocate hash context at all and trying to access
> subpage_prot_table results in
>
> Faulting instruction address: 0xc00000000008bdb4
> Oops: Kernel access of bad area, sig: 11 [#1]
> LE PAGE_SIZE=64K MMU=Radix MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
> ....
> NIP [c00000000008bdb4] sys_subpage_prot+0x74/0x590
> LR [c00000000000b688] system_call+0x5c/0x70
> Call Trace:
> [c00020002c6b7d30] [c00020002c6b7d90] 0xc00020002c6b7d90 (unreliable)
> [c00020002c6b7e20] [c00000000000b688] system_call+0x5c/0x70
> Instruction dump:
> fb61ffd8 fb81ffe0 fba1ffe8 fbc1fff0 fbe1fff8 f821ff11 e92d1178 f9210068
> 39200000 e92d0968 ebe90630 e93f03e8 <eb891038> 60000000 3860fffe e9410068
>
> We also move the subpage_prot_table with mmp_sem held to avoid racec
> between two parallel subpage_prot syscall.
>
> Reported-by: Sachin Sant <sachinp@linux.vnet.ibm.com>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> —
Thanks for the patch. Fixes the kernel crash.
Tested-by: Sachin Sant <sachinp@linux.vnet.ibm.com <mailto:sachinp@linux.vnet.ibm.com>>
Thanks
-Sachin
[-- Attachment #2: Type: text/html, Size: 2528 bytes --]
^ permalink raw reply
* [PATCH RESEND 0/5] soc/fsl/qe: cleanups and new DT binding
From: Rasmus Villemoes @ 2019-04-30 13:36 UTC (permalink / raw)
To: Qiang Zhao, Li Yang
Cc: Valentin Longchamp, linux-kernel@vger.kernel.org, Scott Wood,
Rasmus Villemoes, Rob Herring, linuxppc-dev@lists.ozlabs.org,
linux-arm-kernel@lists.infradead.org
This small series consists of some small cleanups and simplifications
of the QUICC engine driver, and introduces a new DT binding that makes
it much easier to support other variants of the QUICC engine IP block
that appears in the wild: There's no reason to expect in general that
the number of valid SNUMs uniquely determines the set of such, so it's
better to simply let the device tree specify the values (and,
implicitly via the array length, also the count).
I sent these two months ago, but mostly as POC inside another
thread. Resending as proper patch series.
Rasmus Villemoes (5):
soc/fsl/qe: qe.c: drop useless static qualifier
soc/fsl/qe: qe.c: reduce static memory footprint by 1.7K
soc/fsl/qe: qe.c: introduce qe_get_device_node helper
soc/fsl/qe: qe.c: support fsl,qe-snums property
soc/fsl/qe: qe.c: fold qe_get_num_of_snums into qe_snums_init
.../devicetree/bindings/soc/fsl/cpm_qe/qe.txt | 8 +-
drivers/net/ethernet/freescale/ucc_geth.c | 2 +-
drivers/soc/fsl/qe/qe.c | 162 +++++++-----------
include/soc/fsl/qe/qe.h | 2 +-
4 files changed, 73 insertions(+), 101 deletions(-)
--
2.20.1
^ permalink raw reply
* [PATCH 3/5] soc/fsl/qe: qe.c: introduce qe_get_device_node helper
From: Rasmus Villemoes @ 2019-04-30 13:36 UTC (permalink / raw)
To: Qiang Zhao, Li Yang
Cc: Valentin Longchamp, linux-kernel@vger.kernel.org, Scott Wood,
Rasmus Villemoes, Rob Herring, linuxppc-dev@lists.ozlabs.org,
linux-arm-kernel@lists.infradead.org
In-Reply-To: <20190430133615.25721-1-rasmus.villemoes@prevas.dk>
The 'try of_find_compatible_node(NULL, NULL, "fsl,qe"), fall back to
of_find_node_by_type(NULL, "qe")' pattern is repeated five
times. Factor it into a common helper.
Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
---
drivers/soc/fsl/qe/qe.c | 71 +++++++++++++++++------------------------
1 file changed, 29 insertions(+), 42 deletions(-)
diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c
index d0393f83145c..aff9d1373529 100644
--- a/drivers/soc/fsl/qe/qe.c
+++ b/drivers/soc/fsl/qe/qe.c
@@ -56,6 +56,20 @@ static unsigned int qe_num_of_snum;
static phys_addr_t qebase = -1;
+static struct device_node *qe_get_device_node(void)
+{
+ struct device_node *qe;
+
+ /*
+ * Newer device trees have an "fsl,qe" compatible property for the QE
+ * node, but we still need to support older device trees.
+ */
+ qe = of_find_compatible_node(NULL, NULL, "fsl,qe");
+ if (qe)
+ return qe;
+ return of_find_node_by_type(NULL, "qe");
+}
+
static phys_addr_t get_qe_base(void)
{
struct device_node *qe;
@@ -65,12 +79,9 @@ static phys_addr_t get_qe_base(void)
if (qebase != -1)
return qebase;
- qe = of_find_compatible_node(NULL, NULL, "fsl,qe");
- if (!qe) {
- qe = of_find_node_by_type(NULL, "qe");
- if (!qe)
- return qebase;
- }
+ qe = qe_get_device_node();
+ if (!qe)
+ return qebase;
ret = of_address_to_resource(qe, 0, &res);
if (!ret)
@@ -164,12 +175,9 @@ unsigned int qe_get_brg_clk(void)
if (brg_clk)
return brg_clk;
- qe = of_find_compatible_node(NULL, NULL, "fsl,qe");
- if (!qe) {
- qe = of_find_node_by_type(NULL, "qe");
- if (!qe)
- return brg_clk;
- }
+ qe = qe_get_device_node();
+ if (!qe)
+ return brg_clk;
prop = of_get_property(qe, "brg-frequency", &size);
if (prop && size == sizeof(*prop))
@@ -563,16 +571,9 @@ struct qe_firmware_info *qe_get_firmware_info(void)
initialized = 1;
- /*
- * Newer device trees have an "fsl,qe" compatible property for the QE
- * node, but we still need to support older device trees.
- */
- qe = of_find_compatible_node(NULL, NULL, "fsl,qe");
- if (!qe) {
- qe = of_find_node_by_type(NULL, "qe");
- if (!qe)
- return NULL;
- }
+ qe = qe_get_device_node();
+ if (!qe)
+ return NULL;
/* Find the 'firmware' child node */
fw = of_get_child_by_name(qe, "firmware");
@@ -618,16 +619,9 @@ unsigned int qe_get_num_of_risc(void)
unsigned int num_of_risc = 0;
const u32 *prop;
- qe = of_find_compatible_node(NULL, NULL, "fsl,qe");
- if (!qe) {
- /* Older devices trees did not have an "fsl,qe"
- * compatible property, so we need to look for
- * the QE node by name.
- */
- qe = of_find_node_by_type(NULL, "qe");
- if (!qe)
- return num_of_risc;
- }
+ qe = qe_get_device_node();
+ if (!qe)
+ return num_of_risc;
prop = of_get_property(qe, "fsl,qe-num-riscs", &size);
if (prop && size == sizeof(*prop))
@@ -647,16 +641,9 @@ unsigned int qe_get_num_of_snums(void)
const u32 *prop;
num_of_snums = 28; /* The default number of snum for threads is 28 */
- qe = of_find_compatible_node(NULL, NULL, "fsl,qe");
- if (!qe) {
- /* Older devices trees did not have an "fsl,qe"
- * compatible property, so we need to look for
- * the QE node by name.
- */
- qe = of_find_node_by_type(NULL, "qe");
- if (!qe)
- return num_of_snums;
- }
+ qe = qe_get_device_node();
+ if (!qe)
+ return num_of_snums;
prop = of_get_property(qe, "fsl,qe-num-snums", &size);
if (prop && size == sizeof(*prop)) {
--
2.20.1
^ permalink raw reply related
* [PATCH 2/5] soc/fsl/qe: qe.c: reduce static memory footprint by 1.7K
From: Rasmus Villemoes @ 2019-04-30 13:36 UTC (permalink / raw)
To: Qiang Zhao, Li Yang
Cc: Valentin Longchamp, linux-kernel@vger.kernel.org, Scott Wood,
Rasmus Villemoes, Rob Herring, linuxppc-dev@lists.ozlabs.org,
linux-arm-kernel@lists.infradead.org
In-Reply-To: <20190430133615.25721-1-rasmus.villemoes@prevas.dk>
The current array of struct qe_snum use 256*4 bytes for just keeping
track of the free/used state of each index, and the struct layout
means there's another 768 bytes of padding. If we just unzip that
structure, the array of snum values just use 256 bytes, while the
free/inuse state can be tracked in a 32 byte bitmap.
So this reduces the .data footprint by 1760 bytes. It also serves as
preparation for introducing another DT binding for specifying the snum
values.
Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
---
drivers/soc/fsl/qe/qe.c | 37 ++++++++++++-------------------------
1 file changed, 12 insertions(+), 25 deletions(-)
diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c
index 855373deb746..d0393f83145c 100644
--- a/drivers/soc/fsl/qe/qe.c
+++ b/drivers/soc/fsl/qe/qe.c
@@ -14,6 +14,7 @@
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*/
+#include <linux/bitmap.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
@@ -43,25 +44,14 @@ static DEFINE_SPINLOCK(qe_lock);
DEFINE_SPINLOCK(cmxgcr_lock);
EXPORT_SYMBOL(cmxgcr_lock);
-/* QE snum state */
-enum qe_snum_state {
- QE_SNUM_STATE_USED,
- QE_SNUM_STATE_FREE
-};
-
-/* QE snum */
-struct qe_snum {
- u8 num;
- enum qe_snum_state state;
-};
-
/* We allocate this here because it is used almost exclusively for
* the communication processor devices.
*/
struct qe_immap __iomem *qe_immr;
EXPORT_SYMBOL(qe_immr);
-static struct qe_snum snums[QE_NUM_OF_SNUM]; /* Dynamically allocated SNUMs */
+static u8 snums[QE_NUM_OF_SNUM]; /* Dynamically allocated SNUMs */
+static DECLARE_BITMAP(snum_state, QE_NUM_OF_SNUM);
static unsigned int qe_num_of_snum;
static phys_addr_t qebase = -1;
@@ -308,6 +298,7 @@ static void qe_snums_init(void)
};
const u8 *snum_init;
+ bitmap_zero(snum_state, QE_NUM_OF_SNUM);
qe_num_of_snum = qe_get_num_of_snums();
if (qe_num_of_snum == 76)
@@ -315,10 +306,8 @@ static void qe_snums_init(void)
else
snum_init = snum_init_46;
- for (i = 0; i < qe_num_of_snum; i++) {
- snums[i].num = snum_init[i];
- snums[i].state = QE_SNUM_STATE_FREE;
- }
+ for (i = 0; i < qe_num_of_snum; i++)
+ snums[i] = snum_init[i];
}
int qe_get_snum(void)
@@ -328,12 +317,10 @@ int qe_get_snum(void)
int i;
spin_lock_irqsave(&qe_lock, flags);
- for (i = 0; i < qe_num_of_snum; i++) {
- if (snums[i].state == QE_SNUM_STATE_FREE) {
- snums[i].state = QE_SNUM_STATE_USED;
- snum = snums[i].num;
- break;
- }
+ i = find_first_zero_bit(snum_state, qe_num_of_snum);
+ if (i < qe_num_of_snum) {
+ set_bit(i, snum_state);
+ snum = snums[i];
}
spin_unlock_irqrestore(&qe_lock, flags);
@@ -346,8 +333,8 @@ void qe_put_snum(u8 snum)
int i;
for (i = 0; i < qe_num_of_snum; i++) {
- if (snums[i].num == snum) {
- snums[i].state = QE_SNUM_STATE_FREE;
+ if (snums[i] == snum) {
+ clear_bit(i, snum_state);
break;
}
}
--
2.20.1
^ permalink raw reply related
* [PATCH 1/5] soc/fsl/qe: qe.c: drop useless static qualifier
From: Rasmus Villemoes @ 2019-04-30 13:36 UTC (permalink / raw)
To: Qiang Zhao, Li Yang
Cc: Valentin Longchamp, linux-kernel@vger.kernel.org, Scott Wood,
Rasmus Villemoes, Rob Herring, linuxppc-dev@lists.ozlabs.org,
linux-arm-kernel@lists.infradead.org
In-Reply-To: <20190430133615.25721-1-rasmus.villemoes@prevas.dk>
The local variable snum_init has no reason to have static storage duration.
Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
---
drivers/soc/fsl/qe/qe.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c
index 612d9c551be5..855373deb746 100644
--- a/drivers/soc/fsl/qe/qe.c
+++ b/drivers/soc/fsl/qe/qe.c
@@ -306,7 +306,7 @@ static void qe_snums_init(void)
0x28, 0x29, 0x38, 0x39, 0x48, 0x49, 0x58, 0x59,
0x68, 0x69, 0x78, 0x79, 0x80, 0x81,
};
- static const u8 *snum_init;
+ const u8 *snum_init;
qe_num_of_snum = qe_get_num_of_snums();
--
2.20.1
^ permalink raw reply related
* [PATCH 4/5] soc/fsl/qe: qe.c: support fsl,qe-snums property
From: Rasmus Villemoes @ 2019-04-30 13:36 UTC (permalink / raw)
To: Qiang Zhao, Li Yang
Cc: Valentin Longchamp, linux-kernel@vger.kernel.org, Scott Wood,
Rasmus Villemoes, Rob Herring, linuxppc-dev@lists.ozlabs.org,
linux-arm-kernel@lists.infradead.org
In-Reply-To: <20190430133615.25721-1-rasmus.villemoes@prevas.dk>
The current code assumes that the set of snum _values_ to populate the
snums[] array with is a function of the _number_ of snums
alone. However, reading table 4-30, and its footnotes, of the QUICC
Engine Block Reference Manual shows that that is a bit too naive.
As an alternative, this introduces a new binding fsl,qe-snums, which
automatically encodes both the number of snums and the actual values to
use. Conveniently, of_property_read_variable_u8_array does exactly
what we need.
For example, for the MPC8309, one would specify the property as
fsl,qe-snums = /bits/ 8 <
0x88 0x89 0x98 0x99 0xa8 0xa9 0xb8 0xb9
0xc8 0xc9 0xd8 0xd9 0xe8 0xe9>;
Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
---
.../devicetree/bindings/soc/fsl/cpm_qe/qe.txt | 8 +++++++-
drivers/soc/fsl/qe/qe.c | 14 +++++++++++++-
2 files changed, 20 insertions(+), 2 deletions(-)
diff --git a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe.txt b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe.txt
index d7afaff5faff..05f5f485562a 100644
--- a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe.txt
+++ b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe.txt
@@ -18,7 +18,8 @@ Required properties:
- reg : offset and length of the device registers.
- bus-frequency : the clock frequency for QUICC Engine.
- fsl,qe-num-riscs: define how many RISC engines the QE has.
-- fsl,qe-num-snums: define how many serial number(SNUM) the QE can use for the
+- fsl,qe-snums: This property has to be specified as '/bits/ 8' value,
+ defining the array of serial number (SNUM) values for the virtual
threads.
Optional properties:
@@ -34,6 +35,11 @@ Recommended properties
- brg-frequency : the internal clock source frequency for baud-rate
generators in Hz.
+Deprecated properties
+- fsl,qe-num-snums: define how many serial number(SNUM) the QE can use
+ for the threads. Use fsl,qe-snums instead to not only specify the
+ number of snums, but also their values.
+
Example:
qe@e0100000 {
#address-cells = <1>;
diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c
index aff9d1373529..af3c2b2b268f 100644
--- a/drivers/soc/fsl/qe/qe.c
+++ b/drivers/soc/fsl/qe/qe.c
@@ -283,7 +283,6 @@ EXPORT_SYMBOL(qe_clock_source);
*/
static void qe_snums_init(void)
{
- int i;
static const u8 snum_init_76[] = {
0x04, 0x05, 0x0C, 0x0D, 0x14, 0x15, 0x1C, 0x1D,
0x24, 0x25, 0x2C, 0x2D, 0x34, 0x35, 0x88, 0x89,
@@ -304,9 +303,22 @@ static void qe_snums_init(void)
0x28, 0x29, 0x38, 0x39, 0x48, 0x49, 0x58, 0x59,
0x68, 0x69, 0x78, 0x79, 0x80, 0x81,
};
+ struct device_node *qe;
const u8 *snum_init;
+ int i;
bitmap_zero(snum_state, QE_NUM_OF_SNUM);
+ qe = qe_get_device_node();
+ if (qe) {
+ i = of_property_read_variable_u8_array(qe, "fsl,qe-snums",
+ snums, 1, QE_NUM_OF_SNUM);
+ of_node_put(qe);
+ if (i > 0) {
+ qe_num_of_snum = i;
+ return;
+ }
+ }
+
qe_num_of_snum = qe_get_num_of_snums();
if (qe_num_of_snum == 76)
--
2.20.1
^ permalink raw reply related
* [PATCH 5/5] soc/fsl/qe: qe.c: fold qe_get_num_of_snums into qe_snums_init
From: Rasmus Villemoes @ 2019-04-30 13:36 UTC (permalink / raw)
To: Qiang Zhao, Li Yang
Cc: Valentin Longchamp, linux-kernel@vger.kernel.org, Scott Wood,
Rasmus Villemoes, Rob Herring, linuxppc-dev@lists.ozlabs.org,
linux-arm-kernel@lists.infradead.org
In-Reply-To: <20190430133615.25721-1-rasmus.villemoes@prevas.dk>
The comment "No QE ever has fewer than 28 SNUMs" is false; e.g. the
MPC8309 has 14. The code path returning -EINVAL is also a recipe for
instant disaster, since the caller (qe_snums_init) uncritically
assigns the return value to the unsigned qe_num_of_snum, and would
thus proceed to attempt to copy 4GB from snum_init_46[] to the snum[]
array.
So fold the handling of the legacy fsl,qe-num-snums into
qe_snums_init, and make sure we do not end up using the snum_init_46
array in cases other than the two where we know it makes sense.
Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
---
drivers/net/ethernet/freescale/ucc_geth.c | 2 +-
drivers/soc/fsl/qe/qe.c | 54 +++++++----------------
include/soc/fsl/qe/qe.h | 2 +-
3 files changed, 19 insertions(+), 39 deletions(-)
diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index eb3e65e8868f..5748eb8464d0 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -3837,7 +3837,7 @@ static int ucc_geth_probe(struct platform_device* ofdev)
}
if (max_speed == SPEED_1000) {
- unsigned int snums = qe_get_num_of_snums();
+ unsigned int snums = qe_num_of_snum;
/* configure muram FIFOs for gigabit operation */
ug_info->uf_info.urfs = UCC_GETH_URFS_GIGA_INIT;
diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c
index af3c2b2b268f..8c3b3c62d81b 100644
--- a/drivers/soc/fsl/qe/qe.c
+++ b/drivers/soc/fsl/qe/qe.c
@@ -52,7 +52,8 @@ EXPORT_SYMBOL(qe_immr);
static u8 snums[QE_NUM_OF_SNUM]; /* Dynamically allocated SNUMs */
static DECLARE_BITMAP(snum_state, QE_NUM_OF_SNUM);
-static unsigned int qe_num_of_snum;
+unsigned int qe_num_of_snum;
+EXPORT_SYMBOL(qe_num_of_snum);
static phys_addr_t qebase = -1;
@@ -308,26 +309,34 @@ static void qe_snums_init(void)
int i;
bitmap_zero(snum_state, QE_NUM_OF_SNUM);
+ qe_num_of_snum = 28; /* The default number of snum for threads is 28 */
qe = qe_get_device_node();
if (qe) {
i = of_property_read_variable_u8_array(qe, "fsl,qe-snums",
snums, 1, QE_NUM_OF_SNUM);
- of_node_put(qe);
if (i > 0) {
+ of_node_put(qe);
qe_num_of_snum = i;
return;
}
+ /*
+ * Fall back to legacy binding of using the value of
+ * fsl,qe-num-snums to choose one of the static arrays
+ * above.
+ */
+ of_property_read_u32(qe, "fsl,qe-num-snums", &qe_num_of_snum);
+ of_node_put(qe);
}
- qe_num_of_snum = qe_get_num_of_snums();
-
if (qe_num_of_snum == 76)
snum_init = snum_init_76;
- else
+ else if (qe_num_of_snum == 28 || qe_num_of_snum == 46)
snum_init = snum_init_46;
-
- for (i = 0; i < qe_num_of_snum; i++)
- snums[i] = snum_init[i];
+ else {
+ pr_err("QE: unsupported value of fsl,qe-num-snums: %u\n", qe_num_of_snum);
+ return;
+ }
+ memcpy(snums, snum_init, qe_num_of_snum);
}
int qe_get_snum(void)
@@ -645,35 +654,6 @@ unsigned int qe_get_num_of_risc(void)
}
EXPORT_SYMBOL(qe_get_num_of_risc);
-unsigned int qe_get_num_of_snums(void)
-{
- struct device_node *qe;
- int size;
- unsigned int num_of_snums;
- const u32 *prop;
-
- num_of_snums = 28; /* The default number of snum for threads is 28 */
- qe = qe_get_device_node();
- if (!qe)
- return num_of_snums;
-
- prop = of_get_property(qe, "fsl,qe-num-snums", &size);
- if (prop && size == sizeof(*prop)) {
- num_of_snums = *prop;
- if ((num_of_snums < 28) || (num_of_snums > QE_NUM_OF_SNUM)) {
- /* No QE ever has fewer than 28 SNUMs */
- pr_err("QE: number of snum is invalid\n");
- of_node_put(qe);
- return -EINVAL;
- }
- }
-
- of_node_put(qe);
-
- return num_of_snums;
-}
-EXPORT_SYMBOL(qe_get_num_of_snums);
-
static int __init qe_init(void)
{
struct device_node *np;
diff --git a/include/soc/fsl/qe/qe.h b/include/soc/fsl/qe/qe.h
index b3d1aff5e8ad..af5739850bf4 100644
--- a/include/soc/fsl/qe/qe.h
+++ b/include/soc/fsl/qe/qe.h
@@ -212,7 +212,7 @@ int qe_setbrg(enum qe_clock brg, unsigned int rate, unsigned int multiplier);
int qe_get_snum(void);
void qe_put_snum(u8 snum);
unsigned int qe_get_num_of_risc(void);
-unsigned int qe_get_num_of_snums(void);
+extern unsigned int qe_num_of_snum;
static inline int qe_alive_during_sleep(void)
{
--
2.20.1
^ permalink raw reply related
* Re: [PATCH v2 3/6] x86: clean up _TIF_SYSCALL_EMU handling using ptrace_syscall_enter hook
From: Sudeep Holla @ 2019-04-30 16:44 UTC (permalink / raw)
To: Oleg Nesterov
Cc: Sudeep Holla, Haibo Xu, Steve Capper, Catalin Marinas, jdike, x86,
Will Deacon, linux-kernel, Bin Lu, Richard Weinberger,
Ingo Molnar, Paul Mackerras, Andy Lutomirski, Borislav Petkov,
Thomas Gleixner, linuxppc-dev, linux-arm-kernel
In-Reply-To: <20190318153321.GA23521@redhat.com>
On Mon, Mar 18, 2019 at 04:33:22PM +0100, Oleg Nesterov wrote:
> On 03/18, Sudeep Holla wrote:
> >
> > --- a/arch/x86/entry/common.c
> > +++ b/arch/x86/entry/common.c
> > @@ -70,22 +70,16 @@ static long syscall_trace_enter(struct pt_regs *regs)
> >
> > struct thread_info *ti = current_thread_info();
> > unsigned long ret = 0;
> > - bool emulated = false;
> > u32 work;
> >
> > if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
> > BUG_ON(regs != task_pt_regs(current));
> >
> > - work = READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;
> > -
> > - if (unlikely(work & _TIF_SYSCALL_EMU))
> > - emulated = true;
> > -
> > - if ((emulated || (work & _TIF_SYSCALL_TRACE)) &&
> > - tracehook_report_syscall_entry(regs))
> > + if (unlikely(ptrace_syscall_enter(regs)))
> > return -1L;
> >
> > - if (emulated)
> > + work = READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;
> > + if ((work & _TIF_SYSCALL_TRACE) && tracehook_report_syscall_entry(regs))
> > return -1L;
>
[...]
>
> And it seems that _TIF_WORK_SYSCALL_ENTRY needs some cleanups too... We don't need
> "& _TIF_WORK_SYSCALL_ENTRY" in syscall_trace_enter, and _TIF_WORK_SYSCALL_ENTRY
> should not include _TIF_NOHZ?
>
I was about to post the updated version and checked this to make sure I have
covered everything or not. I had missed the above comment. All architectures
have _TIF_NOHZ in their mask that they check to do work. And from x86, I read
"...syscall_trace_enter(). Also includes TIF_NOHZ for enter_from_user_mode()"
So I don't understand why _TIF_NOHZ needs to be dropped.
Also if we need to drop, we can address that separately examining all archs.
I will post the cleanup as you suggested for now.
--
Regards,
Sudeep
^ permalink raw reply
* Re: [PATCH v2 3/6] x86: clean up _TIF_SYSCALL_EMU handling using ptrace_syscall_enter hook
From: Andy Lutomirski @ 2019-04-30 16:46 UTC (permalink / raw)
To: Sudeep Holla
Cc: Haibo Xu, Steve Capper, Catalin Marinas, Jeff Dike, X86 ML,
Will Deacon, LKML, Oleg Nesterov, Richard Weinberger, Ingo Molnar,
Paul Mackerras, Andy Lutomirski, Borislav Petkov, Thomas Gleixner,
Bin Lu, linuxppc-dev, linux-arm-kernel
In-Reply-To: <20190318104925.16600-4-sudeep.holla@arm.com>
On Mon, Mar 18, 2019 at 3:49 AM Sudeep Holla <sudeep.holla@arm.com> wrote:
>
> Now that we have a new hook ptrace_syscall_enter that can be called from
> syscall entry code and it handles PTRACE_SYSEMU in generic code, we
> can do some cleanup using the same in syscall_trace_enter.
>
> Further the extra logic to find single stepping PTRACE_SYSEMU_SINGLESTEP
> in syscall_slow_exit_work seems unnecessary. Let's remove the same.
>
Unless the patch set contains a selftest that exercises all the
interesting cases here, NAK. To be clear, there needs to be a test
that passes on an unmodified kernel and still passes on a patched
kernel. And that test case needs to *fail* if, for example, you force
"emulated" to either true or false rather than reading out the actual
value.
--Andy
^ permalink raw reply
* Re: [PATCH 1/5] soc/fsl/qe: qe.c: drop useless static qualifier
From: Christophe Leroy @ 2019-04-30 17:03 UTC (permalink / raw)
To: Rasmus Villemoes, Qiang Zhao, Li Yang
Cc: Valentin Longchamp, linux-kernel@vger.kernel.org, Scott Wood,
Rasmus Villemoes, Rob Herring, linuxppc-dev@lists.ozlabs.org,
linux-arm-kernel@lists.infradead.org
In-Reply-To: <20190430133615.25721-2-rasmus.villemoes@prevas.dk>
Le 30/04/2019 à 15:36, Rasmus Villemoes a écrit :
> The local variable snum_init has no reason to have static storage duration.
>
> Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Reviewed-by: Christophe Leroy <christophe.leroy@c-s.fr>
> ---
> drivers/soc/fsl/qe/qe.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c
> index 612d9c551be5..855373deb746 100644
> --- a/drivers/soc/fsl/qe/qe.c
> +++ b/drivers/soc/fsl/qe/qe.c
> @@ -306,7 +306,7 @@ static void qe_snums_init(void)
> 0x28, 0x29, 0x38, 0x39, 0x48, 0x49, 0x58, 0x59,
> 0x68, 0x69, 0x78, 0x79, 0x80, 0x81,
> };
> - static const u8 *snum_init;
> + const u8 *snum_init;
>
> qe_num_of_snum = qe_get_num_of_snums();
>
>
^ permalink raw reply
* Re: [PATCH v2 3/6] x86: clean up _TIF_SYSCALL_EMU handling using ptrace_syscall_enter hook
From: Sudeep Holla @ 2019-04-30 17:09 UTC (permalink / raw)
To: Andy Lutomirski
Cc: Haibo Xu, Steve Capper, Catalin Marinas, Jeff Dike, X86 ML,
Will Deacon, LKML, Oleg Nesterov, Richard Weinberger, Ingo Molnar,
Paul Mackerras, Sudeep Holla, Borislav Petkov, Thomas Gleixner,
Bin Lu, linuxppc-dev, linux-arm-kernel
In-Reply-To: <CALCETrXEebRqX0W8MuS0SeaMDpEO5KdS3k7id279hZgHrmc8yA@mail.gmail.com>
On 30/04/2019 17:46, Andy Lutomirski wrote:
> On Mon, Mar 18, 2019 at 3:49 AM Sudeep Holla <sudeep.holla@arm.com> wrote:
>>
>> Now that we have a new hook ptrace_syscall_enter that can be called from
>> syscall entry code and it handles PTRACE_SYSEMU in generic code, we
>> can do some cleanup using the same in syscall_trace_enter.
>>
>> Further the extra logic to find single stepping PTRACE_SYSEMU_SINGLESTEP
>> in syscall_slow_exit_work seems unnecessary. Let's remove the same.
>>
>
> Unless the patch set contains a selftest that exercises all the
> interesting cases here, NAK. To be clear, there needs to be a test
> that passes on an unmodified kernel and still passes on a patched
> kernel. And that test case needs to *fail* if, for example, you force
> "emulated" to either true or false rather than reading out the actual
> value.
>
Tested using tools/testing/selftests/x86/ptrace_syscall.c
Also v3 doesn't change any logic or additional call to new function as
in v2. It's just simple cleanup as suggested by Oleg.
--
Regards,
Sudeep
^ permalink raw reply
* Re: [PATCH 2/5] soc/fsl/qe: qe.c: reduce static memory footprint by 1.7K
From: Christophe Leroy @ 2019-04-30 17:12 UTC (permalink / raw)
To: Rasmus Villemoes, Qiang Zhao, Li Yang
Cc: Valentin Longchamp, linux-kernel@vger.kernel.org, Scott Wood,
Rasmus Villemoes, Rob Herring, linuxppc-dev@lists.ozlabs.org,
linux-arm-kernel@lists.infradead.org
In-Reply-To: <20190430133615.25721-3-rasmus.villemoes@prevas.dk>
Le 30/04/2019 à 15:36, Rasmus Villemoes a écrit :
> The current array of struct qe_snum use 256*4 bytes for just keeping
> track of the free/used state of each index, and the struct layout
> means there's another 768 bytes of padding. If we just unzip that
> structure, the array of snum values just use 256 bytes, while the
> free/inuse state can be tracked in a 32 byte bitmap.
>
> So this reduces the .data footprint by 1760 bytes. It also serves as
> preparation for introducing another DT binding for specifying the snum
> values.
>
> Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
> ---
> drivers/soc/fsl/qe/qe.c | 37 ++++++++++++-------------------------
> 1 file changed, 12 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c
> index 855373deb746..d0393f83145c 100644
> --- a/drivers/soc/fsl/qe/qe.c
> +++ b/drivers/soc/fsl/qe/qe.c
> @@ -14,6 +14,7 @@
> * Free Software Foundation; either version 2 of the License, or (at your
> * option) any later version.
> */
> +#include <linux/bitmap.h>
> #include <linux/errno.h>
> #include <linux/sched.h>
> #include <linux/kernel.h>
> @@ -43,25 +44,14 @@ static DEFINE_SPINLOCK(qe_lock);
> DEFINE_SPINLOCK(cmxgcr_lock);
> EXPORT_SYMBOL(cmxgcr_lock);
>
> -/* QE snum state */
> -enum qe_snum_state {
> - QE_SNUM_STATE_USED,
> - QE_SNUM_STATE_FREE
> -};
> -
> -/* QE snum */
> -struct qe_snum {
> - u8 num;
> - enum qe_snum_state state;
> -};
> -
> /* We allocate this here because it is used almost exclusively for
> * the communication processor devices.
> */
> struct qe_immap __iomem *qe_immr;
> EXPORT_SYMBOL(qe_immr);
>
> -static struct qe_snum snums[QE_NUM_OF_SNUM]; /* Dynamically allocated SNUMs */
> +static u8 snums[QE_NUM_OF_SNUM]; /* Dynamically allocated SNUMs */
> +static DECLARE_BITMAP(snum_state, QE_NUM_OF_SNUM);
> static unsigned int qe_num_of_snum;
>
> static phys_addr_t qebase = -1;
> @@ -308,6 +298,7 @@ static void qe_snums_init(void)
> };
> const u8 *snum_init;
>
> + bitmap_zero(snum_state, QE_NUM_OF_SNUM);
Doesn't make much importance, but wouldn't it be more logical to add
this line where the setting of .state = QE_SNUM_STATE_FREE was done
previously, ie around the for() loop below ?
> qe_num_of_snum = qe_get_num_of_snums();
>
> if (qe_num_of_snum == 76)
> @@ -315,10 +306,8 @@ static void qe_snums_init(void)
> else
> snum_init = snum_init_46;
>
> - for (i = 0; i < qe_num_of_snum; i++) {
> - snums[i].num = snum_init[i];
> - snums[i].state = QE_SNUM_STATE_FREE;
> - }
> + for (i = 0; i < qe_num_of_snum; i++)
> + snums[i] = snum_init[i];
Could use memcpy() instead ?
> }
>
> int qe_get_snum(void)
> @@ -328,12 +317,10 @@ int qe_get_snum(void)
> int i;
>
> spin_lock_irqsave(&qe_lock, flags);
> - for (i = 0; i < qe_num_of_snum; i++) {
> - if (snums[i].state == QE_SNUM_STATE_FREE) {
> - snums[i].state = QE_SNUM_STATE_USED;
> - snum = snums[i].num;
> - break;
> - }
> + i = find_first_zero_bit(snum_state, qe_num_of_snum);
> + if (i < qe_num_of_snum) {
> + set_bit(i, snum_state);
> + snum = snums[i];
> }
> spin_unlock_irqrestore(&qe_lock, flags);
>
> @@ -346,8 +333,8 @@ void qe_put_snum(u8 snum)
> int i;
>
> for (i = 0; i < qe_num_of_snum; i++) {
> - if (snums[i].num == snum) {
> - snums[i].state = QE_SNUM_STATE_FREE;
> + if (snums[i] == snum) {
> + clear_bit(i, snum_state);
> break;
> }
> }
Can we replace this loop by memchr() ?
Christophe
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox