linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/3] powerpc/tm: Disable IRQ in tm_recheckpoint
@ 2014-03-28  5:40 Michael Neuling
  2014-03-28  5:40 ` [PATCH 2/3] powerpc/tm: Remove unnecessary r1 save Michael Neuling
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Michael Neuling @ 2014-03-28  5:40 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, Michael Neuling, Paul Mackerras

We can't take an IRQ when we're about to do a trechkpt as our GPR state is set
to user GPR values.

We've hit this when running some IBM Java stress tests in the lab resulting in
the following dump:

  cpu 0x3f: Vector: 700 (Program Check) at [c000000007eb3d40]
      pc: c000000000050074: restore_gprs+0xc0/0x148
      lr: 00000000b52a8184
      sp: ac57d360
     msr: 8000000100201030
    current = 0xc00000002c500000
    paca    = 0xc000000007dbfc00     softe: 0     irq_happened: 0x00
      pid   = 34535, comm = Pooled Thread #
  R00 = 00000000b52a8184   R16 = 00000000b3e48fda
  R01 = 00000000ac57d360   R17 = 00000000ade79bd8
  R02 = 00000000ac586930   R18 = 000000000fac9bcc
  R03 = 00000000ade60000   R19 = 00000000ac57f930
  R04 = 00000000f6624918   R20 = 00000000ade79be8
  R05 = 00000000f663f238   R21 = 00000000ac218a54
  R06 = 0000000000000002   R22 = 000000000f956280
  R07 = 0000000000000008   R23 = 000000000000007e
  R08 = 000000000000000a   R24 = 000000000000000c
  R09 = 00000000b6e69160   R25 = 00000000b424cf00
  R10 = 0000000000000181   R26 = 00000000f66256d4
  R11 = 000000000f365ec0   R27 = 00000000b6fdcdd0
  R12 = 00000000f66400f0   R28 = 0000000000000001
  R13 = 00000000ada71900   R29 = 00000000ade5a300
  R14 = 00000000ac2185a8   R30 = 00000000f663f238
  R15 = 0000000000000004   R31 = 00000000f6624918
  pc  = c000000000050074 restore_gprs+0xc0/0x148
  cfar= c00000000004fe28 dont_restore_vec+0x1c/0x1a4
  lr  = 00000000b52a8184
  msr = 8000000100201030   cr  = 24804888
  ctr = 0000000000000000   xer = 0000000000000000   trap =  700

This moves tm_recheckpoint to a C function and moves the tm_restore_sprs into
that function.  It then adds IRQ disabling over the trechkpt critical section.
It also sets the TEXASR FS in the signals code to ensure this is never set now
that we explictly write the TM sprs in tm_recheckpoint.

Signed-off-by: Michael Neuling <mikey@neuling.org>
cc: stable@vger.kernel.org
---
 arch/powerpc/kernel/process.c   | 30 +++++++++++++++++++++++++-----
 arch/powerpc/kernel/signal_32.c |  2 ++
 arch/powerpc/kernel/signal_64.c |  2 ++
 arch/powerpc/kernel/tm.S        |  2 +-
 4 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index af064d2..913334c 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -610,6 +610,31 @@ out_and_saveregs:
 	tm_save_sprs(thr);
 }
 
+extern void __tm_recheckpoint(struct thread_struct *thread,
+			      unsigned long orig_msr);
+
+void tm_recheckpoint(struct thread_struct *thread,
+		     unsigned long orig_msr)
+{
+	unsigned long flags;
+
+	/* We really can't be interrupted here as the TEXASR registers can't
+	 * change and later in the trecheckpoint code, we have a userspace R1.
+	 * So let's hard disable over this region.
+	 */
+	local_irq_save(flags);
+	hard_irq_disable();
+
+	/* The TM SPRs are restored here, so that TEXASR.FS can be set
+	 * before the trecheckpoint and no explosion occurs.
+	 */
+	tm_restore_sprs(thread);
+
+	__tm_recheckpoint(thread, orig_msr);
+
+	local_irq_restore(flags);
+}
+
 static inline void tm_recheckpoint_new_task(struct task_struct *new)
 {
 	unsigned long msr;
@@ -628,11 +653,6 @@ static inline void tm_recheckpoint_new_task(struct task_struct *new)
 	if (!new->thread.regs)
 		return;
 
-	/* The TM SPRs are restored here, so that TEXASR.FS can be set
-	 * before the trecheckpoint and no explosion occurs.
-	 */
-	tm_restore_sprs(&new->thread);
-
 	if (!MSR_TM_ACTIVE(new->thread.regs->msr))
 		return;
 	msr = new->thread.tm_orig_msr;
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index a67e00a..4e47db6 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -881,6 +881,8 @@ static long restore_tm_user_regs(struct pt_regs *regs,
 	 * transactional versions should be loaded.
 	 */
 	tm_enable();
+	/* Make sure the transaction is marked as failed */
+	current->thread.tm_texasr |= TEXASR_FS;
 	/* This loads the checkpointed FP/VEC state, if used */
 	tm_recheckpoint(&current->thread, msr);
 	/* Get the top half of the MSR */
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 8d253c2..d501dc4 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -527,6 +527,8 @@ static long restore_tm_sigcontexts(struct pt_regs *regs,
 	}
 #endif
 	tm_enable();
+	/* Make sure the transaction is marked as failed */
+	current->thread.tm_texasr |= TEXASR_FS;
 	/* This loads the checkpointed FP/VEC state, if used */
 	tm_recheckpoint(&current->thread, msr);
 
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index ef47bcb..03567c0 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -307,7 +307,7 @@ dont_backup_fp:
 	 *	Call with IRQs off, stacks get all out of sync for
 	 *	some periods in here!
 	 */
-_GLOBAL(tm_recheckpoint)
+_GLOBAL(__tm_recheckpoint)
 	mfcr	r5
 	mflr	r0
 	stw	r5, 8(r1)
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/3] powerpc/tm: Remove unnecessary r1 save
  2014-03-28  5:40 [PATCH 1/3] powerpc/tm: Disable IRQ in tm_recheckpoint Michael Neuling
@ 2014-03-28  5:40 ` Michael Neuling
  2014-03-28  5:40 ` [PATCH 3/3] powerpc/tm: Add checking to treclaim/trechkpt Michael Neuling
  2014-04-04  9:19 ` [PATCH v2] powerpc/tm: Disable IRQ in tm_recheckpoint Michael Neuling
  2 siblings, 0 replies; 4+ messages in thread
From: Michael Neuling @ 2014-03-28  5:40 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, Michael Neuling, Paul Mackerras

We save r1 to the scratch SPR and restore it from there after the trechkpt so
saving r1 to the paca is not needed.

Signed-off-by: Michael Neuling <mikey@neuling.org>
---
 arch/powerpc/kernel/tm.S | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 03567c0..0535c7f 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -320,8 +320,6 @@ _GLOBAL(__tm_recheckpoint)
 	 */
 	SAVE_NVGPRS(r1)
 
-	std	r1, PACAR1(r13)
-
 	/* Load complete register state from ts_ckpt* registers */
 
 	addi	r7, r3, PT_CKPT_REGS		/* Thread's ckpt_regs */
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 3/3] powerpc/tm: Add checking to treclaim/trechkpt
  2014-03-28  5:40 [PATCH 1/3] powerpc/tm: Disable IRQ in tm_recheckpoint Michael Neuling
  2014-03-28  5:40 ` [PATCH 2/3] powerpc/tm: Remove unnecessary r1 save Michael Neuling
@ 2014-03-28  5:40 ` Michael Neuling
  2014-04-04  9:19 ` [PATCH v2] powerpc/tm: Disable IRQ in tm_recheckpoint Michael Neuling
  2 siblings, 0 replies; 4+ messages in thread
From: Michael Neuling @ 2014-03-28  5:40 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, Michael Neuling, Paul Mackerras

If we do a treclaim and we are not in TM suspend mode, it results in a TM bad
thing (ie. a 0x700 program check).  Similarly if we do a trechkpt and we have
an active transaction or TEXASR Failure Summary (FS) is not set, we also take a
TM bad thing.

This should never happen, but if it does (ie. a kernel bug), the cause is
almost impossible to debug as the GPR state is mostly userspace and hence we
don't get a call chain.

This adds some checks in these cases case a BUG_ON() (in asm) in case we ever
hit these cases.  It moves the register saving around to preserve r1 till later
also.

Signed-off-by: Michael Neuling <mikey@neuling.org>
---
 arch/powerpc/include/asm/reg.h |  1 +
 arch/powerpc/kernel/tm.S       | 38 +++++++++++++++++++++++++++++++++++---
 2 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 1a36b8e..c09627d 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -214,6 +214,7 @@
 #define SPRN_TFIAR	0x81	/* Transaction Failure Inst Addr   */
 #define SPRN_TEXASR	0x82	/* Transaction EXception & Summary */
 #define SPRN_TEXASRU	0x83	/* ''	   ''	   ''	 Upper 32  */
+#define   TEXASR_FS     __MASK(63-36) /* TEXASR Failure Summary */
 #define SPRN_TFHAR	0x80	/* Transaction Failure Handler Addr */
 #define SPRN_CTRLF	0x088
 #define SPRN_CTRLT	0x098
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 0535c7f..508c54b 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -10,6 +10,7 @@
 #include <asm/ppc-opcode.h>
 #include <asm/ptrace.h>
 #include <asm/reg.h>
+#include <asm/bug.h>
 
 #ifdef CONFIG_VSX
 /* See fpu.S, this is borrowed from there */
@@ -175,6 +176,13 @@ dont_backup_vec:
 	stfd    fr0,FPSTATE_FPSCR(r7)
 
 dont_backup_fp:
+	/* Do sanity check on MSR to make sure we are suspended */
+	li	r7, (MSR_TS_S)@higher
+	srdi	r6, r14, 32
+	and	r6, r6, r7
+1:	tdeqi   r6, 0
+	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
+
 	/* The moment we treclaim, ALL of our GPRs will switch
 	 * to user register state.  (FPRs, CCR etc. also!)
 	 * Use an sprg and a tm_scratch in the PACA to shuffle.
@@ -383,12 +391,10 @@ restore_gprs:
 	/* ******************** CR,LR,CCR,MSR ********** */
 	ld	r4, _CTR(r7)
 	ld	r5, _LINK(r7)
-	ld	r6, _CCR(r7)
 	ld	r8, _XER(r7)
 
 	mtctr	r4
 	mtlr	r5
-	mtcr	r6
 	mtxer	r8
 
 	/* ******************** TAR ******************** */
@@ -404,7 +410,8 @@ restore_gprs:
 	li	r4, 0
 	mtmsrd	r4, 1
 
-	REST_4GPRS(0, r7)			/* GPR0-3 */
+	REST_GPR(0, r7)				/* GPR0 */
+	REST_2GPRS(2, r7)			/* GPR2-3 */
 	REST_GPR(4, r7)				/* GPR4 */
 	REST_4GPRS(8, r7)			/* GPR8-11 */
 	REST_2GPRS(12, r7)			/* GPR12-13 */
@@ -416,6 +423,31 @@ restore_gprs:
 	mtspr	SPRN_DSCR, r5
 	mtspr	SPRN_PPR, r6
 
+	/* Do final sanity check on TEXASR to make sure FS is set.  Do this
+	 * here before we load up the userspace r1 so any bugs we hit will get
+	 * a call chain */
+	mfspr	r5, SPRN_TEXASR
+	srdi	r5, r5, 16
+	li	r6, (TEXASR_FS)@h
+	and	r6, r6, r5
+1:	tdeqi	r6, 0
+	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
+
+	/* Do final sanity check on MSR to make sure we are not transactional
+	 * or suspended
+	 */
+	mfmsr   r6
+	li	r5, (MSR_TS_MASK)@higher
+	srdi	r6, r6, 32
+	and	r6, r6, r5
+1:	tdnei   r6, 0
+	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
+
+	/* Restore CR */
+	ld	r6, _CCR(r7)
+	mtcr    r6
+
+	REST_GPR(1, r7)				/* GPR1 */
 	REST_GPR(5, r7)				/* GPR5-7 */
 	REST_GPR(6, r7)
 	ld	r7, GPR7(r7)
-- 
1.8.3.2

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH v2] powerpc/tm: Disable IRQ in tm_recheckpoint
  2014-03-28  5:40 [PATCH 1/3] powerpc/tm: Disable IRQ in tm_recheckpoint Michael Neuling
  2014-03-28  5:40 ` [PATCH 2/3] powerpc/tm: Remove unnecessary r1 save Michael Neuling
  2014-03-28  5:40 ` [PATCH 3/3] powerpc/tm: Add checking to treclaim/trechkpt Michael Neuling
@ 2014-04-04  9:19 ` Michael Neuling
  2 siblings, 0 replies; 4+ messages in thread
From: Michael Neuling @ 2014-04-04  9:19 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, Paul Mackerras

We can't take an IRQ when we're about to do a trechkpt as our GPR state is set
to user GPR values.

We've hit this when running some IBM Java stress tests in the lab resulting in
the following dump:

  cpu 0x3f: Vector: 700 (Program Check) at [c000000007eb3d40]
      pc: c000000000050074: restore_gprs+0xc0/0x148
      lr: 00000000b52a8184
      sp: ac57d360
     msr: 8000000100201030
    current = 0xc00000002c500000
    paca    = 0xc000000007dbfc00     softe: 0     irq_happened: 0x00
      pid   = 34535, comm = Pooled Thread #
  R00 = 00000000b52a8184   R16 = 00000000b3e48fda
  R01 = 00000000ac57d360   R17 = 00000000ade79bd8
  R02 = 00000000ac586930   R18 = 000000000fac9bcc
  R03 = 00000000ade60000   R19 = 00000000ac57f930
  R04 = 00000000f6624918   R20 = 00000000ade79be8
  R05 = 00000000f663f238   R21 = 00000000ac218a54
  R06 = 0000000000000002   R22 = 000000000f956280
  R07 = 0000000000000008   R23 = 000000000000007e
  R08 = 000000000000000a   R24 = 000000000000000c
  R09 = 00000000b6e69160   R25 = 00000000b424cf00
  R10 = 0000000000000181   R26 = 00000000f66256d4
  R11 = 000000000f365ec0   R27 = 00000000b6fdcdd0
  R12 = 00000000f66400f0   R28 = 0000000000000001
  R13 = 00000000ada71900   R29 = 00000000ade5a300
  R14 = 00000000ac2185a8   R30 = 00000000f663f238
  R15 = 0000000000000004   R31 = 00000000f6624918
  pc  = c000000000050074 restore_gprs+0xc0/0x148
  cfar= c00000000004fe28 dont_restore_vec+0x1c/0x1a4
  lr  = 00000000b52a8184
  msr = 8000000100201030   cr  = 24804888
  ctr = 0000000000000000   xer = 0000000000000000   trap =  700

This moves tm_recheckpoint to a C function and moves the tm_restore_sprs into
that function.  It then adds IRQ disabling over the trechkpt critical section.
It also sets the TEXASR FS in the signals code to ensure this is never set now
that we explictly write the TM sprs in tm_recheckpoint.

Signed-off-by: Michael Neuling <mikey@neuling.org>
cc: stable@vger.kernel.org

---
v2:
  Fix bug where TM SPRs are not saved when outside a transaction.

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index af064d2..31d0215 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -610,6 +610,31 @@ out_and_saveregs:
 	tm_save_sprs(thr);
 }
 
+extern void __tm_recheckpoint(struct thread_struct *thread,
+			      unsigned long orig_msr);
+
+void tm_recheckpoint(struct thread_struct *thread,
+		     unsigned long orig_msr)
+{
+	unsigned long flags;
+
+	/* We really can't be interrupted here as the TEXASR registers can't
+	 * change and later in the trecheckpoint code, we have a userspace R1.
+	 * So let's hard disable over this region.
+	 */
+	local_irq_save(flags);
+	hard_irq_disable();
+
+	/* The TM SPRs are restored here, so that TEXASR.FS can be set
+	 * before the trecheckpoint and no explosion occurs.
+	 */
+	tm_restore_sprs(thread);
+
+	__tm_recheckpoint(thread, orig_msr);
+
+	local_irq_restore(flags);
+}
+
 static inline void tm_recheckpoint_new_task(struct task_struct *new)
 {
 	unsigned long msr;
@@ -628,13 +653,10 @@ static inline void tm_recheckpoint_new_task(struct task_struct *new)
 	if (!new->thread.regs)
 		return;
 
-	/* The TM SPRs are restored here, so that TEXASR.FS can be set
-	 * before the trecheckpoint and no explosion occurs.
-	 */
-	tm_restore_sprs(&new->thread);
-
-	if (!MSR_TM_ACTIVE(new->thread.regs->msr))
+	if (!MSR_TM_ACTIVE(new->thread.regs->msr)){
+		tm_restore_sprs(&new->thread);
 		return;
+	}
 	msr = new->thread.tm_orig_msr;
 	/* Recheckpoint to restore original checkpointed register state. */
 	TM_DEBUG("*** tm_recheckpoint of pid %d "
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index a67e00a..4e47db6 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -881,6 +881,8 @@ static long restore_tm_user_regs(struct pt_regs *regs,
 	 * transactional versions should be loaded.
 	 */
 	tm_enable();
+	/* Make sure the transaction is marked as failed */
+	current->thread.tm_texasr |= TEXASR_FS;
 	/* This loads the checkpointed FP/VEC state, if used */
 	tm_recheckpoint(&current->thread, msr);
 	/* Get the top half of the MSR */
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 8d253c2..d501dc4 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -527,6 +527,8 @@ static long restore_tm_sigcontexts(struct pt_regs *regs,
 	}
 #endif
 	tm_enable();
+	/* Make sure the transaction is marked as failed */
+	current->thread.tm_texasr |= TEXASR_FS;
 	/* This loads the checkpointed FP/VEC state, if used */
 	tm_recheckpoint(&current->thread, msr);
 
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index ef47bcb..03567c0 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -307,7 +307,7 @@ dont_backup_fp:
 	 *	Call with IRQs off, stacks get all out of sync for
 	 *	some periods in here!
 	 */
-_GLOBAL(tm_recheckpoint)
+_GLOBAL(__tm_recheckpoint)
 	mfcr	r5
 	mflr	r0
 	stw	r5, 8(r1)

^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2014-04-04  9:19 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-03-28  5:40 [PATCH 1/3] powerpc/tm: Disable IRQ in tm_recheckpoint Michael Neuling
2014-03-28  5:40 ` [PATCH 2/3] powerpc/tm: Remove unnecessary r1 save Michael Neuling
2014-03-28  5:40 ` [PATCH 3/3] powerpc/tm: Add checking to treclaim/trechkpt Michael Neuling
2014-04-04  9:19 ` [PATCH v2] powerpc/tm: Disable IRQ in tm_recheckpoint Michael Neuling

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).