LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH v3 01/12] powerpc/book3s: Split the common exception prolog logic into two section.
From: Mahesh J Salgaonkar @ 2013-08-26 19:31 UTC (permalink / raw)
  To: linuxppc-dev, Benjamin Herrenschmidt
  Cc: Jeremy Kerr, Paul Mackerras, Anton Blanchard
In-Reply-To: <20130826192616.2855.18749.stgit@mars>

From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

This patch splits the common exception prolog logic into two parts to
facilitate reuse of existing code in the next patch. The second part will
be reused in the machine check exception routine in the next patch.

Please note that this patch does not introduce or change existing code
logic. Instead it is just a code movement.

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/exception-64s.h |   67 ++++++++++++++++--------------
 1 file changed, 35 insertions(+), 32 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 07ca627..2386d40 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -248,6 +248,40 @@ do_kvm_##n:								\
 
 #define NOTEST(n)
 
+#define EXCEPTION_PROLOG_COMMON_2(n, area)				   \
+	std	r2,GPR2(r1);		/* save r2 in stackframe	*/ \
+	SAVE_4GPRS(3, r1);		/* save r3 - r6 in stackframe	*/ \
+	SAVE_2GPRS(7, r1);		/* save r7, r8 in stackframe	*/ \
+	ld	r9,area+EX_R9(r13);	/* move r9, r10 to stackframe	*/ \
+	ld	r10,area+EX_R10(r13);					   \
+	std	r9,GPR9(r1);						   \
+	std	r10,GPR10(r1);						   \
+	ld	r9,area+EX_R11(r13);	/* move r11 - r13 to stackframe	*/ \
+	ld	r10,area+EX_R12(r13);					   \
+	ld	r11,area+EX_R13(r13);					   \
+	std	r9,GPR11(r1);						   \
+	std	r10,GPR12(r1);						   \
+	std	r11,GPR13(r1);						   \
+	BEGIN_FTR_SECTION_NESTED(66);					   \
+	ld	r10,area+EX_CFAR(r13);					   \
+	std	r10,ORIG_GPR3(r1);					   \
+	END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66);		   \
+	GET_LR(r9,area);		/* Get LR, later save to stack	*/ \
+	ld	r2,PACATOC(r13);	/* get kernel TOC into r2	*/ \
+	std	r9,_LINK(r1);						   \
+	mfctr	r10;			/* save CTR in stackframe	*/ \
+	std	r10,_CTR(r1);						   \
+	lbz	r10,PACASOFTIRQEN(r13);				   \
+	mfspr	r11,SPRN_XER;		/* save XER in stackframe	*/ \
+	std	r10,SOFTE(r1);						   \
+	std	r11,_XER(r1);						   \
+	li	r9,(n)+1;						   \
+	std	r9,_TRAP(r1);		/* set trap number		*/ \
+	li	r10,0;							   \
+	ld	r11,exception_marker@toc(r2);				   \
+	std	r10,RESULT(r1);		/* clear regs->result		*/ \
+	std	r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame	*/
+
 /*
  * The common exception prolog is used for all except a few exceptions
  * such as a segment miss on a kernel address.  We have to be prepared
@@ -281,38 +315,7 @@ do_kvm_##n:								\
 	beq	4f;			/* if from kernel mode		*/ \
 	ACCOUNT_CPU_USER_ENTRY(r9, r10);				   \
 	SAVE_PPR(area, r9, r10);					   \
-4:	std	r2,GPR2(r1);		/* save r2 in stackframe	*/ \
-	SAVE_4GPRS(3, r1);		/* save r3 - r6 in stackframe	*/ \
-	SAVE_2GPRS(7, r1);		/* save r7, r8 in stackframe	*/ \
-	ld	r9,area+EX_R9(r13);	/* move r9, r10 to stackframe	*/ \
-	ld	r10,area+EX_R10(r13);					   \
-	std	r9,GPR9(r1);						   \
-	std	r10,GPR10(r1);						   \
-	ld	r9,area+EX_R11(r13);	/* move r11 - r13 to stackframe	*/ \
-	ld	r10,area+EX_R12(r13);					   \
-	ld	r11,area+EX_R13(r13);					   \
-	std	r9,GPR11(r1);						   \
-	std	r10,GPR12(r1);						   \
-	std	r11,GPR13(r1);						   \
-	BEGIN_FTR_SECTION_NESTED(66);					   \
-	ld	r10,area+EX_CFAR(r13);					   \
-	std	r10,ORIG_GPR3(r1);					   \
-	END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66);		   \
-	GET_LR(r9,area);		/* Get LR, later save to stack	*/ \
-	ld	r2,PACATOC(r13);	/* get kernel TOC into r2	*/ \
-	std	r9,_LINK(r1);						   \
-	mfctr	r10;			/* save CTR in stackframe	*/ \
-	std	r10,_CTR(r1);						   \
-	lbz	r10,PACASOFTIRQEN(r13);				   \
-	mfspr	r11,SPRN_XER;		/* save XER in stackframe	*/ \
-	std	r10,SOFTE(r1);						   \
-	std	r11,_XER(r1);						   \
-	li	r9,(n)+1;						   \
-	std	r9,_TRAP(r1);		/* set trap number		*/ \
-	li	r10,0;							   \
-	ld	r11,exception_marker@toc(r2);				   \
-	std	r10,RESULT(r1);		/* clear regs->result		*/ \
-	std	r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame	*/ \
+4:	EXCEPTION_PROLOG_COMMON_2(n, area)				   \
 	ACCOUNT_STOLEN_TIME
 
 /*

^ permalink raw reply related

* [RFC PATCH v3 02/12] powerpc/book3s: Introduce exclusive emergency stack for machine check exception.
From: Mahesh J Salgaonkar @ 2013-08-26 19:31 UTC (permalink / raw)
  To: linuxppc-dev, Benjamin Herrenschmidt
  Cc: Jeremy Kerr, Paul Mackerras, Anton Blanchard
In-Reply-To: <20130826192616.2855.18749.stgit@mars>

From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

This patch introduces exclusive emergency stack for machine check exception.
We use emergency stack to handle machine check exception so that we can save
MCE information (srr1, srr0, dar and dsisr) before turning on ME bit and be
ready for re-entrancy. This helps us to prevent clobbering of MCE information
in case of nested machine checks.

The reason for using emergency stack over normal kernel stack is that the
machine check might occur in the middle of setting up a stack frame which may
result into improper use of kernel stack.

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/paca.h |    9 +++++++++
 arch/powerpc/kernel/setup_64.c  |   10 +++++++++-
 arch/powerpc/xmon/xmon.c        |    4 ++++
 3 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 77c91e7..b4ca4e9 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -147,6 +147,15 @@ struct paca_struct {
 	 */
 	struct opal_machine_check_event *opal_mc_evt;
 #endif
+#ifdef CONFIG_PPC_BOOK3S_64
+	/* Exclusive emergency stack pointer for machine check exception. */
+	void *mc_emergency_sp;
+	/*
+	 * Flag to check whether we are in machine check early handler
+	 * and already using emergency stack.
+	 */
+	u16 in_mce;
+#endif
 
 	/* Stuff for accurate time accounting */
 	u64 user_time;			/* accumulated usermode TB ticks */
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 389fb807..6f96af0 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -529,7 +529,8 @@ static void __init exc_lvl_early_init(void)
 
 /*
  * Stack space used when we detect a bad kernel stack pointer, and
- * early in SMP boots before relocation is enabled.
+ * early in SMP boots before relocation is enabled. Exclusive emergency
+ * stack for machine checks.
  */
 static void __init emergency_stack_init(void)
 {
@@ -552,6 +553,13 @@ static void __init emergency_stack_init(void)
 		sp  = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);
 		sp += THREAD_SIZE;
 		paca[i].emergency_sp = __va(sp);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+		/* emergency stack for machine check exception handling. */
+		sp  = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);
+		sp += THREAD_SIZE;
+		paca[i].mc_emergency_sp = __va(sp);
+#endif
 	}
 }
 
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 96bf5bd..5f17adb 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2044,6 +2044,10 @@ static void dump_one_paca(int cpu)
 	DUMP(p, stab_addr, "lx");
 #endif
 	DUMP(p, emergency_sp, "p");
+#ifdef CONFIG_PPC_BOOK3S_64
+	DUMP(p, mc_emergency_sp, "p");
+	DUMP(p, in_mce, "x");
+#endif
 	DUMP(p, data_offset, "lx");
 	DUMP(p, hw_cpu_id, "x");
 	DUMP(p, cpu_start, "x");

^ permalink raw reply related

* [RFC PATCH v3 03/12] powerpc/book3s: handle machine check in Linux host.
From: Mahesh J Salgaonkar @ 2013-08-26 19:31 UTC (permalink / raw)
  To: linuxppc-dev, Benjamin Herrenschmidt
  Cc: Jeremy Kerr, Paul Mackerras, Anton Blanchard
In-Reply-To: <20130826192616.2855.18749.stgit@mars>

From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

Move machine check entry point into Linux. So far we were dependent on
firmware to decode MCE error details and handover the high level info to OS.

This patch introduces early machine check routine that saves the MCE
information (srr1, srr0, dar and dsisr) to the emergency stack. We allocate
stack frame on emergency stack and set the r1 accordingly. This allows us to be
prepared to take another exception without loosing context. One thing to note
here that, if we get another machine check while ME bit is off then we risk a
checkstop. Hence we restrict ourselves to save only MCE information and turn
the ME bit on. We use paca->in_mce flag to differentiate between first entry
and nested machine check entry which helps proper use of emergency stack. We
increment paca->in_mce every time we enter in early machine check handler and
decrement it while leaving. When we enter machine check early handler first
time (paca->in_mce == 0), we are sure nobody is using MC emergency stack and
allocate a stack frame at the start of the emergency stack. During subsequent
entry (paca->in_mce > 0), we know that r1 points inside emergency stack and we
allocate separate stack frame accordingly. This prevents us from clobbering MCE
information during nested machine checks.

The early machine check handler changes are placed under CPU_FTR_HVMODE
section. This makes sure that the early machine check handler will get executed
only in hypervisor kernel.

This is the code flow:

		Machine Check Interrupt
			|
			V
		   0x200 vector				  ME=0, IR=0, DR=0
			|
			V
	+-----------------------------------------------+
	|machine_check_pSeries_early:			| ME=0, IR=0, DR=0
	|	Alloc frame on emergency stack		|
	|	Save srr1, srr0, dar and dsisr on stack |
	+-----------------------------------------------+
			|
		(ME=1, IR=0, DR=0, RFID)
			|
			V
		machine_check_handle_early		  ME=1, IR=0, DR=0
			|
			V
	+-----------------------------------------------+
	|	machine_check_early (r3=pt_regs)	| ME=1, IR=0, DR=0
	|	Things to do: (in next patches)		|
	|		Flush SLB for SLB errors	|
	|		Flush TLB for TLB errors	|
	|		Decode and save MCE info	|
	+-----------------------------------------------+
			|
	(Fall through existing exception handler routine.)
			|
			V
		machine_check_pSerie			  ME=1, IR=0, DR=0
			|
		(ME=1, IR=1, DR=1, RFID)
			|
			V
		machine_check_common			  ME=1, IR=1, DR=1
			.
			.
			.


Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/asm-offsets.c    |    4 +
 arch/powerpc/kernel/exceptions-64s.S |  109 ++++++++++++++++++++++++++++++++++
 arch/powerpc/kernel/traps.c          |   12 ++++
 3 files changed, 125 insertions(+)

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 8207459..e0e8ebb 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -238,6 +238,10 @@ int main(void)
 	DEFINE(PACA_DTL_RIDX, offsetof(struct paca_struct, dtl_ridx));
 #endif /* CONFIG_PPC_STD_MMU_64 */
 	DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp));
+#ifdef CONFIG_PPC_BOOK3S_64
+	DEFINE(PACAMCEMERGSP, offsetof(struct paca_struct, mc_emergency_sp));
+	DEFINE(PACA_IN_MCE, offsetof(struct paca_struct, in_mce));
+#endif
 	DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
 	DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state));
 	DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime));
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 902ca3c..651a213 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -156,7 +156,11 @@ machine_check_pSeries_1:
 	HMT_MEDIUM_PPR_DISCARD
 	SET_SCRATCH0(r13)		/* save r13 */
 	EXCEPTION_PROLOG_0(PACA_EXMC)
+BEGIN_FTR_SECTION
+	b	machine_check_pSeries_early
+FTR_SECTION_ELSE
 	b	machine_check_pSeries_0
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
 
 	. = 0x300
 	.globl data_access_pSeries
@@ -404,6 +408,61 @@ denorm_exception_hv:
 
 	.align	7
 	/* moved from 0x200 */
+machine_check_pSeries_early:
+BEGIN_FTR_SECTION
+	EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
+	/*
+	 * Register contents:
+	 * R12		= interrupt vector
+	 * R13		= PACA
+	 * R9		= CR
+	 * R11 & R12 is saved on PACA_EXMC
+	 *
+	 * Switch to mc_emergency stack and handle re-entrancy (though we
+	 * currently don't test for overflow). Save MCE registers srr1,
+	 * srr0, dar and dsisr and then set ME=1
+	 *
+	 * We use paca->in_mce to check whether this is the first entry or
+	 * nested machine check. We increment paca->in_mce to track nested
+	 * machine checks.
+	 *
+	 * If this is the first entry then set stack pointer to
+	 * paca->mc_emergency_sp, otherwise r1 is already pointing to
+	 * stack frame on mc_emergency stack.
+	 *
+	 * NOTE: We are here with MSR_ME=0 (off), which means we risk a
+	 * checkstop if we get another machine check exception before we do
+	 * rfid with MSR_ME=1.
+	 */
+	mr	r11,r1			/* Save r1 */
+	lhz	r10,PACA_IN_MCE(r13)
+	cmpwi	r10,0			/* Are we in nested machine check */
+	bne	0f			/* Yes, we are. */
+	/* First machine check entry */
+	ld	r1,PACAMCEMERGSP(r13)	/* Use MC emergency stack */
+0:	subi	r1,r1,INT_FRAME_SIZE	/* alloc stack frame */
+	addi	r10,r10,1		/* increment paca->in_mce */
+	sth	r10,PACA_IN_MCE(r13)
+	std	r11,GPR1(r1)		/* Save r1 on the stack. */
+	std	r11,0(r1)		/* make stack chain pointer */
+	mfspr	r11,SPRN_SRR0		/* Save SRR0 */
+	std	r11,_NIP(r1)
+	mfspr	r11,SPRN_SRR1		/* Save SRR1 */
+	std	r11,_MSR(r1)
+	mfspr	r11,SPRN_DAR		/* Save DAR */
+	std	r11,_DAR(r1)
+	mfspr	r11,SPRN_DSISR		/* Save DSISR */
+	std	r11,_DSISR(r1)
+	mfmsr	r11			/* get MSR value */
+	ori	r11,r11,MSR_ME		/* turn on ME bit */
+	ld	r12,PACAKBASE(r13)	/* get high part of &label */
+	LOAD_HANDLER(r12, machine_check_handle_early)
+	mtspr	SPRN_SRR0,r12
+	mtspr	SPRN_SRR1,r11
+	rfid
+	b	.	/* prevent speculative execution */
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+
 machine_check_pSeries:
 	.globl machine_check_fwnmi
 machine_check_fwnmi:
@@ -681,6 +740,56 @@ machine_check_common:
 	bl	.machine_check_exception
 	b	.ret_from_except
 
+#define MACHINE_CHECK_HANDLER_WINDUP			\
+	/* Move original SRR0 and SRR1 into the respective regs */	\
+	ld	r9,_MSR(r1);				\
+	mtspr	SPRN_SRR1,r9;				\
+	ld	r3,_NIP(r1);				\
+	mtspr	SPRN_SRR0,r3;				\
+	REST_NVGPRS(r1);				\
+	ld	r9,_CTR(r1);				\
+	mtctr	r9;					\
+	ld	r9,_XER(r1);				\
+	mtxer	r9;					\
+BEGIN_FTR_SECTION_NESTED(66);				\
+	ld	r9,ORIG_GPR3(r1);			\
+	mtspr	SPRN_CFAR,r9;				\
+END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66);	\
+	ld	r9,_LINK(r1);				\
+	mtlr	r9;					\
+	REST_GPR(0, r1);				\
+	REST_8GPRS(2, r1);				\
+	REST_GPR(10, r1);				\
+	ld	r11,_CCR(r1);				\
+	mtcr	r11;					\
+	/* Decrement paca->in_mce. */			\
+	lhz	r12,PACA_IN_MCE(r13);			\
+	subi	r12,r12,1;				\
+	sth	r12,PACA_IN_MCE(r13);			\
+	REST_GPR(11, r1);				\
+	REST_2GPRS(12, r1);				\
+	/* restore original r1. */			\
+	ld	r1,GPR1(r1)
+
+	/*
+	 * Handle machine check early in real mode. We come here with
+	 * ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack.
+	 */
+	.align	7
+	.globl machine_check_handle_early
+machine_check_handle_early:
+BEGIN_FTR_SECTION
+	std	r9,_CCR(r1)	/* Save CR in stackframe */
+	std	r0,GPR0(r1)	/* Save r0 */
+	EXCEPTION_PROLOG_COMMON_2(0x200, PACA_EXMC)
+	bl	.save_nvgprs
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	bl	.machine_check_early
+	/* Deliver the machine check to host kernel in V mode. */
+	MACHINE_CHECK_HANDLER_WINDUP
+	b	machine_check_pSeries
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+
 	STD_EXCEPTION_COMMON_ASYNC(0x500, hardware_interrupt, do_IRQ)
 	STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, .timer_interrupt)
 	STD_EXCEPTION_COMMON(0x980, hdecrementer, .hdec_interrupt)
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index e435bc0..e8d6bf1 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -284,6 +284,18 @@ void system_reset_exception(struct pt_regs *regs)
 
 	/* What should we do here? We could issue a shutdown or hard reset. */
 }
+
+/*
+ * This function is called in real mode. Strictly no printk's please.
+ *
+ * regs->nip and regs->msr contains srr0 and ssr1.
+ */
+long machine_check_early(struct pt_regs *regs)
+{
+	/* TODO: handle/decode machine check reason */
+	return 0;
+}
+
 #endif
 
 /*

^ permalink raw reply related

* [RFC PATCH v3 04/12] Validate r1 value before going to host kernel in virtual mode.
From: Mahesh J Salgaonkar @ 2013-08-26 19:31 UTC (permalink / raw)
  To: linuxppc-dev, Benjamin Herrenschmidt
  Cc: Jeremy Kerr, Paul Mackerras, Anton Blanchard
In-Reply-To: <20130826192616.2855.18749.stgit@mars>

From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

We can get machine checks from any context. We need to make sure that
we handle all of them correctly. Once we decode MCE reason and generate
MCE event, we continue in host kernel in virtual mode so that we can
log/display it later. But before going to virtual mode we need to make
sure that r1 points to host kernel stack. But machine check can occur
in any context and r1 may not always point to host kernel stack. In cases
where we can not trust r1 value, we should queue up the MCE event and return
from interrupt. This patch implements the additional checks that helps to
decide whether to deleiver machine check event to host kernel right away
or queue it up and return.

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/exceptions-64s.S |   72 ++++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 651a213..d82ebac 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -785,8 +785,78 @@ BEGIN_FTR_SECTION
 	bl	.save_nvgprs
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	.machine_check_early
+
+	/*
+	 * We are now going to host kernel in V mode. We need to make sure
+	 * that r1 points to host kernel stack.
+	 *
+	 * If we are coming from userspace then we can continue in host kernel
+	 * in V mode.
+	 * But if we are coming from kernel and r1 does not point to kernel
+	 * stack then we can not continue, instead we return from here.
+	 */
+
+	ld	r12,_MSR(r1)
+	andi.	r11,r12,MSR_PR		/* See if coming from user. */
+	bne	3f			/* continue if we are. */
+
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+	/*
+	 * We are coming from kernel context. Check if we are coming from
+	 * guest. if yes, then we can continue. We will fall through
+	 * do_kvm_200->kvmppc_interrupt which will setup r1 correctly.
+	 */
+	lbz	r11,HSTATE_IN_GUEST(r13)
+	cmpwi	r11,0			/* Check if coming from guest */
+	bne	3f			/* continue if we are. */
+
+	/*
+	 * So, we did not come from guest. That leaves three possibilities:
+	 * a. We come from secondary thread which just came out of nap and
+	 *    about to call kvm_start_guest.
+	 * b. We come from secondary thread which is about to go to nap
+	 *    state (see kvm_no_guest()).
+	 * c. We come from opal context and r1 may be pointing to opal
+	 *    kernel stack.
+	 */
+
+	lbz	r11,HSTATE_HWTHREAD_STATE(r13)
+	cmpwi	r11,KVM_HWTHREAD_IN_NAP	/* Was it nap-ing? or about to */
+	beq	0f		/* Queue up event and return from interrupt */
+#endif
+
+	/*
+	 * So far we checked all possible situations where we can not
+	 * trust r1. Now we can trust r1.
+	 *	r1 < 0		r1 points to host kernel stack
+	 *	r1 > 0		r1 points to opal stack
+	 */
+	ld	r11,GPR1(r1)
+	cmpdi	r11,0			/* check if r1 is in kernel. */
+	blt+	3f			/* Continue if yes. */
+
+	/*
+	 * r1 points to opal stack. Queue up the MCE event and return
+	 * from the interrupt. But before that, check if this is an
+	 * un-recoverable exception. If yes, then stay on emergency
+	 * stack and panic.
+	 */
+0:	andi.	r11,r12,MSR_RI
+	bne	2f
+
+1:	addi	r3,r1,STACK_FRAME_OVERHEAD
+	bl	.unrecoverable_exception
+	b	1b
+
+	/*
+	 * Return from MC interrupt.
+	 * TODO: Queue up the MCE event so that we can log it later.
+	 */
+2:	MACHINE_CHECK_HANDLER_WINDUP
+	rfid
+
 	/* Deliver the machine check to host kernel in V mode. */
-	MACHINE_CHECK_HANDLER_WINDUP
+3:	MACHINE_CHECK_HANDLER_WINDUP
 	b	machine_check_pSeries
 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 

^ permalink raw reply related

* [RFC PATCH v3 05/12] powerpc/book3s: Introduce a early machine check hook in cpu_spec.
From: Mahesh J Salgaonkar @ 2013-08-26 19:31 UTC (permalink / raw)
  To: linuxppc-dev, Benjamin Herrenschmidt
  Cc: Jeremy Kerr, Paul Mackerras, Anton Blanchard
In-Reply-To: <20130826192616.2855.18749.stgit@mars>

From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

This patch adds the early machine check function pointer in cputable for
CPU specific early machine check handling. The early machine handle routine
will be called in real mode to handle SLB and TLB errors. This patch just
sets up a mechanism invoke CPU specific handler. The subsequent patches
will populate the function pointer.

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/cputable.h |    7 +++++++
 arch/powerpc/kernel/traps.c         |    7 +++++--
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 6f3887d..d8c098e 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -90,6 +90,13 @@ struct cpu_spec {
 	 * if the error is fatal, 1 if it was fully recovered and 0 to
 	 * pass up (not CPU originated) */
 	int		(*machine_check)(struct pt_regs *regs);
+
+	/*
+	 * Processor specific early machine check handler which is
+	 * called in real mode to handle SLB and TLB errors.
+	 */
+	long		(*machine_check_early)(struct pt_regs *regs);
+
 };
 
 extern struct cpu_spec		*cur_cpu_spec;
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index e8d6bf1..8b0a946 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -292,8 +292,11 @@ void system_reset_exception(struct pt_regs *regs)
  */
 long machine_check_early(struct pt_regs *regs)
 {
-	/* TODO: handle/decode machine check reason */
-	return 0;
+	long handled = 0;
+
+	if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
+		handled = cur_cpu_spec->machine_check_early(regs);
+	return handled;
 }
 
 #endif

^ permalink raw reply related

* [RFC PATCH v3 06/12] powerpc/book3s: Add flush_tlb operation in cpu_spec.
From: Mahesh J Salgaonkar @ 2013-08-26 19:32 UTC (permalink / raw)
  To: linuxppc-dev, Benjamin Herrenschmidt
  Cc: Jeremy Kerr, Paul Mackerras, Anton Blanchard
In-Reply-To: <20130826192616.2855.18749.stgit@mars>

From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

This patch introduces flush_tlb operation in cpu_spec structure. This will
help us to invoke appropriate CPU-side flush tlb routine. This patch
adds the foundation to invoke CPU specific flush routine for respective
architectures. Currently this patch introduce flush_tlb for p7 and p8.

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/cputable.h   |    5 ++++
 arch/powerpc/kernel/cpu_setup_power.S |   38 +++++++++++++++++++++++----------
 arch/powerpc/kernel/cputable.c        |    8 +++++++
 arch/powerpc/kvm/book3s_hv_ras.c      |   18 +++-------------
 4 files changed, 44 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index d8c098e..d76e47b 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -97,6 +97,11 @@ struct cpu_spec {
 	 */
 	long		(*machine_check_early)(struct pt_regs *regs);
 
+	/*
+	 * Processor specific routine to flush tlbs.
+	 */
+	void		(*flush_tlb)(unsigned long inval_selector);
+
 };
 
 extern struct cpu_spec		*cur_cpu_spec;
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 18b5b9c..37d1bb0 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -29,7 +29,7 @@ _GLOBAL(__setup_cpu_power7)
 	mtspr	SPRN_LPID,r0
 	mfspr	r3,SPRN_LPCR
 	bl	__init_LPCR
-	bl	__init_TLB
+	bl	__init_tlb_power7
 	mtlr	r11
 	blr
 
@@ -42,7 +42,7 @@ _GLOBAL(__restore_cpu_power7)
 	mtspr	SPRN_LPID,r0
 	mfspr	r3,SPRN_LPCR
 	bl	__init_LPCR
-	bl	__init_TLB
+	bl	__init_tlb_power7
 	mtlr	r11
 	blr
 
@@ -59,7 +59,7 @@ _GLOBAL(__setup_cpu_power8)
 	oris	r3, r3, LPCR_AIL_3@h
 	bl	__init_LPCR
 	bl	__init_HFSCR
-	bl	__init_TLB
+	bl	__init_tlb_power8
 	bl	__init_PMU_HV
 	mtlr	r11
 	blr
@@ -78,7 +78,7 @@ _GLOBAL(__restore_cpu_power8)
 	oris	r3, r3, LPCR_AIL_3@h
 	bl	__init_LPCR
 	bl	__init_HFSCR
-	bl	__init_TLB
+	bl	__init_tlb_power8
 	bl	__init_PMU_HV
 	mtlr	r11
 	blr
@@ -134,15 +134,31 @@ __init_HFSCR:
 	mtspr	SPRN_HFSCR,r3
 	blr
 
-__init_TLB:
-	/*
-	 * Clear the TLB using the "IS 3" form of tlbiel instruction
-	 * (invalidate by congruence class). P7 has 128 CCs, P8 has 512
-	 * so we just always do 512
-	 */
+/*
+ * Clear the TLB using the specified IS form of tlbiel instruction
+ * (invalidate by congruence class). P7 has 128 CCs., P8 has 512.
+ *
+ * r3 = IS field
+ */
+__init_tlb_power7:
+	li	r3,0xc00	/* IS field = 0b11 */
+_GLOBAL(__flush_tlb_power7)
+	li	r6,128
+	mtctr	r6
+	mr	r7,r3		/* IS field */
+	ptesync
+2:	tlbiel	r7
+	addi	r7,r7,0x1000
+	bdnz	2b
+	ptesync
+1:	blr
+
+__init_tlb_power8:
+	li	r3,0xc00	/* IS field = 0b11 */
+_GLOBAL(__flush_tlb_power8)
 	li	r6,512
 	mtctr	r6
-	li	r7,0xc00	/* IS field = 0b11 */
+	mr	r7,r3		/* IS field */
 	ptesync
 2:	tlbiel	r7
 	addi	r7,r7,0x1000
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 22973a7..cdbe115 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -71,6 +71,8 @@ extern void __restore_cpu_power7(void);
 extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec);
 extern void __restore_cpu_power8(void);
 extern void __restore_cpu_a2(void);
+extern void __flush_tlb_power7(unsigned long inval_selector);
+extern void __flush_tlb_power8(unsigned long inval_selector);
 #endif /* CONFIG_PPC64 */
 #if defined(CONFIG_E500)
 extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec);
@@ -440,6 +442,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.oprofile_cpu_type	= "ppc64/ibm-compat-v1",
 		.cpu_setup		= __setup_cpu_power7,
 		.cpu_restore		= __restore_cpu_power7,
+		.flush_tlb		= __flush_tlb_power7,
 		.platform		= "power7",
 	},
 	{	/* 2.07-compliant processor, i.e. Power8 "architected" mode */
@@ -456,6 +459,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.oprofile_cpu_type	= "ppc64/ibm-compat-v1",
 		.cpu_setup		= __setup_cpu_power8,
 		.cpu_restore		= __restore_cpu_power8,
+		.flush_tlb		= __flush_tlb_power8,
 		.platform		= "power8",
 	},
 	{	/* Power7 */
@@ -474,6 +478,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.oprofile_type		= PPC_OPROFILE_POWER4,
 		.cpu_setup		= __setup_cpu_power7,
 		.cpu_restore		= __restore_cpu_power7,
+		.flush_tlb		= __flush_tlb_power7,
 		.platform		= "power7",
 	},
 	{	/* Power7+ */
@@ -492,6 +497,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.oprofile_type		= PPC_OPROFILE_POWER4,
 		.cpu_setup		= __setup_cpu_power7,
 		.cpu_restore		= __restore_cpu_power7,
+		.flush_tlb		= __flush_tlb_power7,
 		.platform		= "power7+",
 	},
 	{	/* Power8E */
@@ -510,6 +516,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.oprofile_type		= PPC_OPROFILE_INVALID,
 		.cpu_setup		= __setup_cpu_power8,
 		.cpu_restore		= __restore_cpu_power8,
+		.flush_tlb		= __flush_tlb_power8,
 		.platform		= "power8",
 	},
 	{	/* Power8 */
@@ -528,6 +535,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.oprofile_type		= PPC_OPROFILE_INVALID,
 		.cpu_setup		= __setup_cpu_power8,
 		.cpu_restore		= __restore_cpu_power8,
+		.flush_tlb		= __flush_tlb_power8,
 		.platform		= "power8",
 	},
 	{	/* Cell Broadband Engine */
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index a353c48..5c427b4 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -58,18 +58,6 @@ static void reload_slb(struct kvm_vcpu *vcpu)
 	}
 }
 
-/* POWER7 TLB flush */
-static void flush_tlb_power7(struct kvm_vcpu *vcpu)
-{
-	unsigned long i, rb;
-
-	rb = TLBIEL_INVAL_SET_LPID;
-	for (i = 0; i < POWER7_TLB_SETS; ++i) {
-		asm volatile("tlbiel %0" : : "r" (rb));
-		rb += 1 << TLBIEL_INVAL_SET_SHIFT;
-	}
-}
-
 /*
  * On POWER7, see if we can handle a machine check that occurred inside
  * the guest in real mode, without switching to the host partition.
@@ -96,7 +84,8 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
 				   DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI);
 		}
 		if (dsisr & DSISR_MC_TLB_MULTI) {
-			flush_tlb_power7(vcpu);
+			if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
+				cur_cpu_spec->flush_tlb(TLBIEL_INVAL_SET_LPID);
 			dsisr &= ~DSISR_MC_TLB_MULTI;
 		}
 		/* Any other errors we don't understand? */
@@ -113,7 +102,8 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
 		reload_slb(vcpu);
 		break;
 	case SRR1_MC_IFETCH_TLBMULTI:
-		flush_tlb_power7(vcpu);
+		if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
+			cur_cpu_spec->flush_tlb(TLBIEL_INVAL_SET_LPID);
 		break;
 	default:
 		handled = 0;

^ permalink raw reply related

* [RFC PATCH v3 07/12] powerpc/book3s: Flush SLB/TLBs if we get SLB/TLB machine check errors on power7.
From: Mahesh J Salgaonkar @ 2013-08-26 19:32 UTC (permalink / raw)
  To: linuxppc-dev, Benjamin Herrenschmidt
  Cc: Jeremy Kerr, Paul Mackerras, Anton Blanchard
In-Reply-To: <20130826192616.2855.18749.stgit@mars>

From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

If we get a machine check exception due to SLB or TLB errors, then flush
SLBs/TLBs and reload SLBs to recover. We do this in real mode before turning
on MMU. Otherwise we would run into nested machine checks.

If we get a machine check when we are in guest, then just flush the
SLBs and continue. This patch handles errors for power7. The next
patch will handle errors for power8

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/bitops.h |    5 +
 arch/powerpc/include/asm/mce.h    |   67 ++++++++++++++++
 arch/powerpc/kernel/Makefile      |    1 
 arch/powerpc/kernel/cputable.c    |    4 +
 arch/powerpc/kernel/mce_power.c   |  153 +++++++++++++++++++++++++++++++++++++
 5 files changed, 230 insertions(+)
 create mode 100644 arch/powerpc/include/asm/mce.h
 create mode 100644 arch/powerpc/kernel/mce_power.c

diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h
index 910194e..a57f4bc 100644
--- a/arch/powerpc/include/asm/bitops.h
+++ b/arch/powerpc/include/asm/bitops.h
@@ -46,6 +46,11 @@
 #include <asm/asm-compat.h>
 #include <asm/synch.h>
 
+/* PPC bit number conversion */
+#define PPC_BIT(bit)		(0x8000000000000000UL >> (bit))
+#define PPC_BITLSHIFT(be)	(63 - (be))
+#define PPC_BITMASK(bs, be)	((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs))
+
 /*
  * clear_bit doesn't imply a memory barrier
  */
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
new file mode 100644
index 0000000..ba19073
--- /dev/null
+++ b/arch/powerpc/include/asm/mce.h
@@ -0,0 +1,67 @@
+/*
+ * Machine check exception header file.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright 2013 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#ifndef __ASM_PPC64_MCE_H__
+#define __ASM_PPC64_MCE_H__
+
+#include <linux/bitops.h>
+
+/*
+ * Machine Check bits on power7 and power8
+ */
+#define P7_SRR1_MC_LOADSTORE(srr1)	((srr1) & PPC_BIT(42)) /* P8 too */
+
+/* SRR1 bits for machine check (On Power7 and Power8) */
+#define P7_SRR1_MC_IFETCH(srr1)	((srr1) & PPC_BITMASK(43, 45)) /* P8 too */
+
+#define P7_SRR1_MC_IFETCH_UE		(0x1 << PPC_BITLSHIFT(45)) /* P8 too */
+#define P7_SRR1_MC_IFETCH_SLB_PARITY	(0x2 << PPC_BITLSHIFT(45)) /* P8 too */
+#define P7_SRR1_MC_IFETCH_SLB_MULTIHIT	(0x3 << PPC_BITLSHIFT(45)) /* P8 too */
+#define P7_SRR1_MC_IFETCH_SLB_BOTH	(0x4 << PPC_BITLSHIFT(45)) /* P8 too */
+#define P7_SRR1_MC_IFETCH_TLB_MULTIHIT	(0x5 << PPC_BITLSHIFT(45)) /* P8 too */
+#define P7_SRR1_MC_IFETCH_UE_TLB_RELOAD	(0x6 << PPC_BITLSHIFT(45)) /* P8 too */
+#define P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL	(0x7 << PPC_BITLSHIFT(45))
+
+/* SRR1 bits for machine check (On Power8) */
+#define P8_SRR1_MC_IFETCH_ERAT_MULTIHIT	(0x4 << PPC_BITLSHIFT(45))
+
+/* DSISR bits for machine check (On Power7 and Power8) */
+#define P7_DSISR_MC_UE			(PPC_BIT(48))	/* P8 too */
+#define P7_DSISR_MC_UE_TABLEWALK	(PPC_BIT(49))	/* P8 too */
+#define P7_DSISR_MC_ERAT_MULTIHIT	(PPC_BIT(52))	/* P8 too */
+#define P7_DSISR_MC_TLB_MULTIHIT_MFTLB	(PPC_BIT(53))	/* P8 too */
+#define P7_DSISR_MC_SLB_PARITY_MFSLB	(PPC_BIT(55))	/* P8 too */
+#define P7_DSISR_MC_SLB_MULTIHIT	(PPC_BIT(56))	/* P8 too */
+#define P7_DSISR_MC_SLB_MULTIHIT_PARITY	(PPC_BIT(57))	/* P8 too */
+
+/*
+ * DSISR bits for machine check (Power8) in addition to above.
+ * Secondary DERAT Multihit
+ */
+#define P8_DSISR_MC_ERAT_MULTIHIT_SEC	(PPC_BIT(54))
+
+/* SLB error bits */
+#define P7_DSISR_MC_SLB_ERRORS		(P7_DSISR_MC_ERAT_MULTIHIT | \
+					 P7_DSISR_MC_SLB_PARITY_MFSLB | \
+					 P7_DSISR_MC_SLB_MULTIHIT | \
+					 P7_DSISR_MC_SLB_MULTIHIT_PARITY)
+
+#endif /* __ASM_PPC64_MCE_H__ */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index a8619bf..a1aba53 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -39,6 +39,7 @@ obj-$(CONFIG_PPC64)		+= setup_64.o sys_ppc32.o \
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
 obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_ppc970.o cpu_setup_pa6t.o
 obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_power.o
+obj-$(CONFIG_PPC_BOOK3S_64)	+= mce_power.o
 obj64-$(CONFIG_RELOCATABLE)	+= reloc_64.o
 obj-$(CONFIG_PPC_BOOK3E_64)	+= exceptions-64e.o idle_book3e.o
 obj-$(CONFIG_PPC_A2)		+= cpu_setup_a2.o
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index cdbe115..c28cc2c 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -73,6 +73,7 @@ extern void __restore_cpu_power8(void);
 extern void __restore_cpu_a2(void);
 extern void __flush_tlb_power7(unsigned long inval_selector);
 extern void __flush_tlb_power8(unsigned long inval_selector);
+extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
 #endif /* CONFIG_PPC64 */
 #if defined(CONFIG_E500)
 extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec);
@@ -443,6 +444,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_setup		= __setup_cpu_power7,
 		.cpu_restore		= __restore_cpu_power7,
 		.flush_tlb		= __flush_tlb_power7,
+		.machine_check_early	= __machine_check_early_realmode_p7,
 		.platform		= "power7",
 	},
 	{	/* 2.07-compliant processor, i.e. Power8 "architected" mode */
@@ -479,6 +481,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_setup		= __setup_cpu_power7,
 		.cpu_restore		= __restore_cpu_power7,
 		.flush_tlb		= __flush_tlb_power7,
+		.machine_check_early	= __machine_check_early_realmode_p7,
 		.platform		= "power7",
 	},
 	{	/* Power7+ */
@@ -498,6 +501,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_setup		= __setup_cpu_power7,
 		.cpu_restore		= __restore_cpu_power7,
 		.flush_tlb		= __flush_tlb_power7,
+		.machine_check_early	= __machine_check_early_realmode_p7,
 		.platform		= "power7+",
 	},
 	{	/* Power8E */
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
new file mode 100644
index 0000000..645d722
--- /dev/null
+++ b/arch/powerpc/kernel/mce_power.c
@@ -0,0 +1,153 @@
+/*
+ * Machine check exception handling CPU-side for power7 and power8
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright 2013 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#undef DEBUG
+#define pr_fmt(fmt) "mce_power: " fmt
+
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <asm/mmu.h>
+#include <asm/mce.h>
+
+/* flush SLBs and reload */
+static void flush_and_reload_slb(void)
+{
+	struct slb_shadow *slb;
+	unsigned long i, n;
+
+	if (!mmu_has_feature(MMU_FTR_SLB))
+		return;
+
+	/* Invalidate all SLBs */
+	asm volatile("slbmte %0,%0; slbia" : : "r" (0));
+
+#ifdef CONFIG_KVM_BOOK3S_HANDLER
+	/*
+	 * If machine check is hit when in guest or in transition, we will
+	 * only flush the SLBs and continue.
+	 */
+	if (get_paca()->kvm_hstate.in_guest)
+		return;
+#endif
+
+	/* For host kernel, reload the SLBs from shadow SLB buffer. */
+	slb = get_slb_shadow();
+	if (!slb)
+		return;
+
+	n = min_t(u32, slb->persistent, SLB_MIN_SIZE);
+
+	/* Load up the SLB entries from shadow SLB */
+	for (i = 0; i < n; i++) {
+		unsigned long rb = slb->save_area[i].esid;
+		unsigned long rs = slb->save_area[i].vsid;
+
+		rb = (rb & ~0xFFFul) | i;
+		asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb));
+	}
+}
+
+static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits)
+{
+	long handled = 1;
+
+	/*
+	 * flush and reload SLBs for SLB errors and flush TLBs for TLB errors.
+	 * reset the error bits whenever we handle them so that at the end
+	 * we can check whether we handled all of them or not.
+	 * */
+	if (dsisr & slb_error_bits) {
+		flush_and_reload_slb();
+		/* reset error bits */
+		dsisr &= ~(slb_error_bits);
+	}
+	if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
+		if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
+			cur_cpu_spec->flush_tlb(TLBIEL_INVAL_PAGE);
+		/* reset error bits */
+		dsisr &= ~P7_DSISR_MC_TLB_MULTIHIT_MFTLB;
+	}
+	/* Any other errors we don't understand? */
+	if (dsisr & 0xffffffffUL)
+		handled = 0;
+
+	return handled;
+}
+
+static long mce_handle_derror_p7(uint64_t dsisr)
+{
+	return mce_handle_derror(dsisr, P7_DSISR_MC_SLB_ERRORS);
+}
+
+static long mce_handle_common_ierror(uint64_t srr1)
+{
+	long handled = 0;
+
+	switch (P7_SRR1_MC_IFETCH(srr1)) {
+	case 0:
+		break;
+	case P7_SRR1_MC_IFETCH_SLB_PARITY:
+	case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
+		/* flush and reload SLBs for SLB errors. */
+		flush_and_reload_slb();
+		handled = 1;
+		break;
+	case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
+		if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
+			cur_cpu_spec->flush_tlb(TLBIEL_INVAL_PAGE);
+			handled = 1;
+		}
+		break;
+	default:
+		break;
+	}
+
+	return handled;
+}
+
+static long mce_handle_ierror_p7(uint64_t srr1)
+{
+	long handled = 0;
+
+	handled = mce_handle_common_ierror(srr1);
+
+	if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
+		flush_and_reload_slb();
+		handled = 1;
+	}
+	return handled;
+}
+
+long __machine_check_early_realmode_p7(struct pt_regs *regs)
+{
+	uint64_t srr1;
+	long handled = 1;
+
+	srr1 = regs->msr;
+
+	if (P7_SRR1_MC_LOADSTORE(srr1))
+		handled = mce_handle_derror_p7(regs->dsisr);
+	else
+		handled = mce_handle_ierror_p7(srr1);
+
+	/* TODO: Decode machine check reason. */
+	return handled;
+}

^ permalink raw reply related

* [RFC PATCH v3 08/12] powerpc/book3s: Flush SLB/TLBs if we get SLB/TLB machine check errors on power8.
From: Mahesh J Salgaonkar @ 2013-08-26 19:32 UTC (permalink / raw)
  To: linuxppc-dev, Benjamin Herrenschmidt
  Cc: Jeremy Kerr, Paul Mackerras, Anton Blanchard
In-Reply-To: <20130826192616.2855.18749.stgit@mars>

From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

This patch handles the memory errors on power8. If we get a machine check
exception due to SLB or TLB errors, then flush SLBs/TLBs and reload SLBs to
recover.

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/mce.h  |    3 +++
 arch/powerpc/kernel/cputable.c  |    4 ++++
 arch/powerpc/kernel/mce_power.c |   34 ++++++++++++++++++++++++++++++++++
 3 files changed, 41 insertions(+)

diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index ba19073..6866062 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -64,4 +64,7 @@
 					 P7_DSISR_MC_SLB_MULTIHIT | \
 					 P7_DSISR_MC_SLB_MULTIHIT_PARITY)
 
+#define P8_DSISR_MC_SLB_ERRORS		(P7_DSISR_MC_SLB_ERRORS | \
+					 P8_DSISR_MC_ERAT_MULTIHIT_SEC)
+
 #endif /* __ASM_PPC64_MCE_H__ */
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index c28cc2c..0195358 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -74,6 +74,7 @@ extern void __restore_cpu_a2(void);
 extern void __flush_tlb_power7(unsigned long inval_selector);
 extern void __flush_tlb_power8(unsigned long inval_selector);
 extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
+extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
 #endif /* CONFIG_PPC64 */
 #if defined(CONFIG_E500)
 extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec);
@@ -462,6 +463,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_setup		= __setup_cpu_power8,
 		.cpu_restore		= __restore_cpu_power8,
 		.flush_tlb		= __flush_tlb_power8,
+		.machine_check_early	= __machine_check_early_realmode_p8,
 		.platform		= "power8",
 	},
 	{	/* Power7 */
@@ -521,6 +523,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_setup		= __setup_cpu_power8,
 		.cpu_restore		= __restore_cpu_power8,
 		.flush_tlb		= __flush_tlb_power8,
+		.machine_check_early	= __machine_check_early_realmode_p8,
 		.platform		= "power8",
 	},
 	{	/* Power8 */
@@ -540,6 +543,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_setup		= __setup_cpu_power8,
 		.cpu_restore		= __restore_cpu_power8,
 		.flush_tlb		= __flush_tlb_power8,
+		.machine_check_early	= __machine_check_early_realmode_p8,
 		.platform		= "power8",
 	},
 	{	/* Cell Broadband Engine */
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 645d722..949d102 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -151,3 +151,37 @@ long __machine_check_early_realmode_p7(struct pt_regs *regs)
 	/* TODO: Decode machine check reason. */
 	return handled;
 }
+
+static long mce_handle_ierror_p8(uint64_t srr1)
+{
+	long handled = 0;
+
+	handled = mce_handle_common_ierror(srr1);
+
+	if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
+		flush_and_reload_slb();
+		handled = 1;
+	}
+	return handled;
+}
+
+static long mce_handle_derror_p8(uint64_t dsisr)
+{
+	return mce_handle_derror(dsisr, P8_DSISR_MC_SLB_ERRORS);
+}
+
+long __machine_check_early_realmode_p8(struct pt_regs *regs)
+{
+	uint64_t srr1;
+	long handled = 1;
+
+	srr1 = regs->msr;
+
+	if (P7_SRR1_MC_LOADSTORE(srr1))
+		handled = mce_handle_derror_p8(regs->dsisr);
+	else
+		handled = mce_handle_ierror_p8(srr1);
+
+	/* TODO: Decode machine check reason. */
+	return handled;
+}

^ permalink raw reply related

* [RFC PATCH v3 09/12] powerpc/book3s: Decode and save machine check event.
From: Mahesh J Salgaonkar @ 2013-08-26 19:32 UTC (permalink / raw)
  To: linuxppc-dev, Benjamin Herrenschmidt
  Cc: Jeremy Kerr, Paul Mackerras, Anton Blanchard
In-Reply-To: <20130826192616.2855.18749.stgit@mars>

From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

Now that we handle machine check in linux, the MCE decoding should also
take place in linux host. This info is crucial to log before we go down
in case we can not handle the machine check errors. This patch decodes
and populates a machine check event which contain high level meaning full
MCE information.

We do this in real mode C code with ME bit on. The MCE information is still
available on emergency stack (in pt_regs structure format). Even if we take
another exception at this point the MCE early handler will allocate a new
stack frame on top of current one. So when we return back here we still have
our MCE information safe on current stack.

We use per cpu buffer to save high level MCE information. Each per cpu buffer
is an array of machine check event structure indexed by per cpu counter
mce_nest_count. The mce_nest_count is incremented every time we enter
machine check early handler in real mode to get the current free slot
(index = mce_nest_count - 1). The mce_nest_count is decremented once the
MCE info is consumed by virtual mode machine exception handler.

This patch provides save_mce_event(), get_mce_event() and release_mce_event()
generic routines that can be used by machine check handlers to populate and
retrieve the event. The routine release_mce_event() will free the event slot so
that it can be reused. Caller can invoke get_mce_event() with a release flag
either to release the event slot immediately OR keep it so that it can be
fetched again. The event slot can be also released anytime by invoking
release_mce_event().

This patch also updates kvm code to invoke get_mce_event to retrieve generic
mce event rather than paca->opal_mce_evt.

The KVM code always calls get_mce_event() with release flags set to false so
that event is available for linus host machine

If machine check occurs while we are in guest, KVM tries to handle the error.
If KVM is able to handle MC error successfully, it enters the guest and
delivers the machine check to guest. If KVM is not able to handle MC error, it
exists the guest and passes the control to linux host machine check handler
which then logs MC event and decides how to handle it in linux host. In failure
case, KVM needs to make sure that the MC event is available for linux host to
consume. Hence KVM always calls get_mce_event() with release flags set to false
and later it invokes release_mce_event() only if it succeeds to handle error.

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/mce.h        |  124 +++++++++++++++++++++++++
 arch/powerpc/kernel/Makefile          |    2 
 arch/powerpc/kernel/mce.c             |  164 +++++++++++++++++++++++++++++++++
 arch/powerpc/kernel/mce_power.c       |  116 ++++++++++++++++++++++-
 arch/powerpc/kvm/book3s_hv_ras.c      |   32 ++++--
 arch/powerpc/platforms/powernv/opal.c |   35 +++----
 6 files changed, 434 insertions(+), 39 deletions(-)
 create mode 100644 arch/powerpc/kernel/mce.c

diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index 6866062..d319161 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -66,5 +66,129 @@
 
 #define P8_DSISR_MC_SLB_ERRORS		(P7_DSISR_MC_SLB_ERRORS | \
 					 P8_DSISR_MC_ERAT_MULTIHIT_SEC)
+enum MCE_Version {
+	MCE_V1 = 1,
+};
+
+enum MCE_Severity {
+	MCE_SEV_NO_ERROR = 0,
+	MCE_SEV_WARNING = 1,
+	MCE_SEV_ERROR_SYNC = 2,
+	MCE_SEV_FATAL = 3,
+};
+
+enum MCE_Disposition {
+	MCE_DISPOSITION_RECOVERED = 0,
+	MCE_DISPOSITION_NOT_RECOVERED = 1,
+};
+
+enum MCE_Initiator {
+	MCE_INITIATOR_UNKNOWN = 0,
+	MCE_INITIATOR_CPU = 1,
+};
+
+enum MCE_ErrorType {
+	MCE_ERROR_TYPE_UNKNOWN = 0,
+	MCE_ERROR_TYPE_UE = 1,
+	MCE_ERROR_TYPE_SLB = 2,
+	MCE_ERROR_TYPE_ERAT = 3,
+	MCE_ERROR_TYPE_TLB = 4,
+};
+
+enum MCE_UeErrorType {
+	MCE_UE_ERROR_INDETERMINATE = 0,
+	MCE_UE_ERROR_IFETCH = 1,
+	MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH = 2,
+	MCE_UE_ERROR_LOAD_STORE = 3,
+	MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE = 4,
+};
+
+enum MCE_SlbErrorType {
+	MCE_SLB_ERROR_INDETERMINATE = 0,
+	MCE_SLB_ERROR_PARITY = 1,
+	MCE_SLB_ERROR_MULTIHIT = 2,
+};
+
+enum MCE_EratErrorType {
+	MCE_ERAT_ERROR_INDETERMINATE = 0,
+	MCE_ERAT_ERROR_PARITY = 1,
+	MCE_ERAT_ERROR_MULTIHIT = 2,
+};
+
+enum MCE_TlbErrorType {
+	MCE_TLB_ERROR_INDETERMINATE = 0,
+	MCE_TLB_ERROR_PARITY = 1,
+	MCE_TLB_ERROR_MULTIHIT = 2,
+};
+
+struct machine_check_event {
+	enum MCE_Version	version:8;	/* 0x00 */
+	uint8_t			in_use;		/* 0x01 */
+	enum MCE_Severity	severity:8;	/* 0x02 */
+	enum MCE_Initiator	initiator:8;	/* 0x03 */
+	enum MCE_ErrorType	error_type:8;	/* 0x04 */
+	enum MCE_Disposition	disposition:8;	/* 0x05 */
+	uint8_t			reserved_1[2];	/* 0x06 */
+	uint64_t		gpr3;		/* 0x08 */
+	uint64_t		srr0;		/* 0x10 */
+	uint64_t		srr1;		/* 0x18 */
+	union {					/* 0x20 */
+		struct {
+			enum MCE_UeErrorType ue_error_type:8;
+			uint8_t		effective_address_provided;
+			uint8_t		physical_address_provided;
+			uint8_t		reserved_1[5];
+			uint64_t	effective_address;
+			uint64_t	physical_address;
+			uint8_t		reserved_2[8];
+		} ue_error;
+
+		struct {
+			enum MCE_SlbErrorType slb_error_type:8;
+			uint8_t		effective_address_provided;
+			uint8_t		reserved_1[6];
+			uint64_t	effective_address;
+			uint8_t		reserved_2[16];
+		} slb_error;
+
+		struct {
+			enum MCE_EratErrorType erat_error_type:8;
+			uint8_t		effective_address_provided;
+			uint8_t		reserved_1[6];
+			uint64_t	effective_address;
+			uint8_t		reserved_2[16];
+		} erat_error;
+
+		struct {
+			enum MCE_TlbErrorType tlb_error_type:8;
+			uint8_t		effective_address_provided;
+			uint8_t		reserved_1[6];
+			uint64_t	effective_address;
+			uint8_t		reserved_2[16];
+		} tlb_error;
+	} u;
+};
+
+struct mce_error_info {
+	enum MCE_ErrorType error_type:8;
+	union {
+		enum MCE_UeErrorType ue_error_type:8;
+		enum MCE_SlbErrorType slb_error_type:8;
+		enum MCE_EratErrorType erat_error_type:8;
+		enum MCE_TlbErrorType tlb_error_type:8;
+	} u;
+	uint8_t		reserved[2];
+};
+
+#define MAX_MC_EVT	100
+
+/* Release flags for get_mce_event() */
+#define MCE_EVENT_RELEASE	true
+#define MCE_EVENT_DONTRELEASE	false
+
+extern void save_mce_event(struct pt_regs *regs, long handled,
+			   struct mce_error_info *mce_err, uint64_t addr);
+extern int get_mce_event(struct machine_check_event *mce, bool release);
+extern void release_mce_event(void);
 
 #endif /* __ASM_PPC64_MCE_H__ */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index a1aba53..c3ae108 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -39,7 +39,7 @@ obj-$(CONFIG_PPC64)		+= setup_64.o sys_ppc32.o \
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
 obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_ppc970.o cpu_setup_pa6t.o
 obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_power.o
-obj-$(CONFIG_PPC_BOOK3S_64)	+= mce_power.o
+obj-$(CONFIG_PPC_BOOK3S_64)	+= mce.o mce_power.o
 obj64-$(CONFIG_RELOCATABLE)	+= reloc_64.o
 obj-$(CONFIG_PPC_BOOK3E_64)	+= exceptions-64e.o idle_book3e.o
 obj-$(CONFIG_PPC_A2)		+= cpu_setup_a2.o
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
new file mode 100644
index 0000000..aeecdf1
--- /dev/null
+++ b/arch/powerpc/kernel/mce.c
@@ -0,0 +1,164 @@
+/*
+ * Machine check exception handling.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright 2013 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#undef DEBUG
+#define pr_fmt(fmt) "mce: " fmt
+
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/percpu.h>
+#include <linux/export.h>
+#include <asm/mce.h>
+
+static DEFINE_PER_CPU(int, mce_nest_count);
+static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
+
+static void mce_set_error_info(struct machine_check_event *mce,
+			       struct mce_error_info *mce_err)
+{
+	mce->error_type = mce_err->error_type;
+	switch (mce_err->error_type) {
+	case MCE_ERROR_TYPE_UE:
+		mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
+		break;
+	case MCE_ERROR_TYPE_SLB:
+		mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
+		break;
+	case MCE_ERROR_TYPE_ERAT:
+		mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
+		break;
+	case MCE_ERROR_TYPE_TLB:
+		mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
+		break;
+	case MCE_ERROR_TYPE_UNKNOWN:
+	default:
+		break;
+	}
+}
+
+/*
+ * Decode and save high level MCE information into per cpu buffer which
+ * is an array of machine_check_event structure.
+ */
+void save_mce_event(struct pt_regs *regs, long handled,
+		    struct mce_error_info *mce_err,
+		    uint64_t addr)
+{
+	uint64_t srr1;
+	int index = __get_cpu_var(mce_nest_count)++;
+	struct machine_check_event *mce = &__get_cpu_var(mce_event[index]);
+
+	/*
+	 * Return if we don't have enough space to log mce event.
+	 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
+	 * the check below will stop buffer overrun.
+	 */
+	if (index >= MAX_MC_EVT)
+		return;
+
+	/* Populate generic machine check info */
+	mce->version = MCE_V1;
+	mce->srr0 = regs->nip;
+	mce->srr1 = regs->msr;
+	mce->gpr3 = regs->gpr[3];
+	mce->in_use = 1;
+
+	mce->initiator = MCE_INITIATOR_CPU;
+	if (handled)
+		mce->disposition = MCE_DISPOSITION_RECOVERED;
+	else
+		mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
+	mce->severity = MCE_SEV_ERROR_SYNC;
+
+	srr1 = regs->msr;
+
+	/*
+	 * Populate the mce error_type and type-specific error_type.
+	 */
+	mce_set_error_info(mce, mce_err);
+
+	if (!addr)
+		return;
+
+	if (mce->error_type == MCE_ERROR_TYPE_TLB) {
+		mce->u.tlb_error.effective_address_provided = true;
+		mce->u.tlb_error.effective_address = addr;
+	} else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
+		mce->u.slb_error.effective_address_provided = true;
+		mce->u.slb_error.effective_address = addr;
+	} else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
+		mce->u.erat_error.effective_address_provided = true;
+		mce->u.erat_error.effective_address = addr;
+	} else if (mce->error_type == MCE_ERROR_TYPE_UE) {
+		mce->u.ue_error.effective_address_provided = true;
+		mce->u.ue_error.effective_address = addr;
+	}
+	return;
+}
+
+/*
+ * get_mce_event:
+ *	mce	Pointer to machine_check_event structure to be filled.
+ *	release Flag to indicate whether to free the event slot or not.
+ *		0 <= do not release the mce event. Caller will invoke
+ *		     release_mce_event() once event has been consumed.
+ *		1 <= release the slot.
+ *
+ *	return	1 = success
+ *		0 = failure
+ *
+ * get_mce_event() will be called by platform specific machine check
+ * handle routine and in KVM.
+ * When we call get_mce_event(), we are still in interrupt context and
+ * preemption will not be scheduled until ret_from_expect() routine
+ * is called.
+ */
+int get_mce_event(struct machine_check_event *mce, bool release)
+{
+	int index = __get_cpu_var(mce_nest_count) - 1;
+	struct machine_check_event *mc_evt;
+	int ret = 0;
+
+	/* Sanity check */
+	if (index < 0)
+		return ret;
+
+	/* Check if we have MCE info to process. */
+	if (index < MAX_MC_EVT) {
+		mc_evt = &__get_cpu_var(mce_event[index]);
+		/* Copy the event structure and release the original */
+		if (mce)
+			*mce = *mc_evt;
+		if (release)
+			mc_evt->in_use = 0;
+		ret = 1;
+	}
+	/* Decrement the count to free the slot. */
+	if (release)
+		__get_cpu_var(mce_nest_count)--;
+
+	return ret;
+}
+
+void release_mce_event(void)
+{
+	get_mce_event(NULL, true);
+}
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 949d102..c153d9c 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -136,22 +136,116 @@ static long mce_handle_ierror_p7(uint64_t srr1)
 	return handled;
 }
 
+static void mce_get_common_ierror(struct mce_error_info *mce_err, uint64_t srr1)
+{
+	switch (P7_SRR1_MC_IFETCH(srr1)) {
+	case P7_SRR1_MC_IFETCH_SLB_PARITY:
+		mce_err->error_type = MCE_ERROR_TYPE_SLB;
+		mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
+		break;
+	case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
+		mce_err->error_type = MCE_ERROR_TYPE_SLB;
+		mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
+		break;
+	case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
+		mce_err->error_type = MCE_ERROR_TYPE_TLB;
+		mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
+		break;
+	case P7_SRR1_MC_IFETCH_UE:
+	case P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL:
+		mce_err->error_type = MCE_ERROR_TYPE_UE;
+		mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH;
+		break;
+	case P7_SRR1_MC_IFETCH_UE_TLB_RELOAD:
+		mce_err->error_type = MCE_ERROR_TYPE_UE;
+		mce_err->u.ue_error_type =
+				MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
+		break;
+	}
+}
+
+static void mce_get_ierror_p7(struct mce_error_info *mce_err, uint64_t srr1)
+{
+	mce_get_common_ierror(mce_err, srr1);
+	if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
+		mce_err->error_type = MCE_ERROR_TYPE_SLB;
+		mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
+	}
+}
+
+static void mce_get_derror_p7(struct mce_error_info *mce_err, uint64_t dsisr)
+{
+	if (dsisr & P7_DSISR_MC_UE) {
+		mce_err->error_type = MCE_ERROR_TYPE_UE;
+		mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
+	} else if (dsisr & P7_DSISR_MC_UE_TABLEWALK) {
+		mce_err->error_type = MCE_ERROR_TYPE_UE;
+		mce_err->u.ue_error_type =
+				MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
+	} else if (dsisr & P7_DSISR_MC_ERAT_MULTIHIT) {
+		mce_err->error_type = MCE_ERROR_TYPE_ERAT;
+		mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+	} else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT) {
+		mce_err->error_type = MCE_ERROR_TYPE_SLB;
+		mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
+	} else if (dsisr & P7_DSISR_MC_SLB_PARITY_MFSLB) {
+		mce_err->error_type = MCE_ERROR_TYPE_SLB;
+		mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
+	} else if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
+		mce_err->error_type = MCE_ERROR_TYPE_TLB;
+		mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
+	} else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT_PARITY) {
+		mce_err->error_type = MCE_ERROR_TYPE_SLB;
+		mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
+	}
+}
+
 long __machine_check_early_realmode_p7(struct pt_regs *regs)
 {
-	uint64_t srr1;
+	uint64_t srr1, addr;
 	long handled = 1;
+	struct mce_error_info mce_error_info = { 0 };
 
 	srr1 = regs->msr;
 
-	if (P7_SRR1_MC_LOADSTORE(srr1))
+	/*
+	 * Handle memory errors depending whether this was a load/store or
+	 * ifetch exception. Also, populate the mce error_type and
+	 * type-specific error_type from either SRR1 or DSISR, depending
+	 * whether this was a load/store or ifetch exception
+	 */
+	if (P7_SRR1_MC_LOADSTORE(srr1)) {
 		handled = mce_handle_derror_p7(regs->dsisr);
-	else
+		mce_get_derror_p7(&mce_error_info, regs->dsisr);
+		addr = regs->dar;
+	} else {
 		handled = mce_handle_ierror_p7(srr1);
+		mce_get_ierror_p7(&mce_error_info, srr1);
+		addr = regs->nip;
+	}
 
-	/* TODO: Decode machine check reason. */
+	save_mce_event(regs, handled, &mce_error_info, addr);
 	return handled;
 }
 
+static void mce_get_ierror_p8(struct mce_error_info *mce_err, uint64_t srr1)
+{
+	mce_get_common_ierror(mce_err, srr1);
+	if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
+		mce_err->error_type = MCE_ERROR_TYPE_ERAT;
+		mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+	}
+}
+
+static void mce_get_derror_p8(struct mce_error_info *mce_err, uint64_t dsisr)
+{
+	mce_get_derror_p7(mce_err, dsisr);
+	if (dsisr & P8_DSISR_MC_ERAT_MULTIHIT_SEC) {
+		mce_err->error_type = MCE_ERROR_TYPE_ERAT;
+		mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+	}
+}
+
 static long mce_handle_ierror_p8(uint64_t srr1)
 {
 	long handled = 0;
@@ -172,16 +266,22 @@ static long mce_handle_derror_p8(uint64_t dsisr)
 
 long __machine_check_early_realmode_p8(struct pt_regs *regs)
 {
-	uint64_t srr1;
+	uint64_t srr1, addr;
 	long handled = 1;
+	struct mce_error_info mce_error_info = { 0 };
 
 	srr1 = regs->msr;
 
-	if (P7_SRR1_MC_LOADSTORE(srr1))
+	if (P7_SRR1_MC_LOADSTORE(srr1)) {
 		handled = mce_handle_derror_p8(regs->dsisr);
-	else
+		mce_get_derror_p8(&mce_error_info, regs->dsisr);
+		addr = regs->dar;
+	} else {
 		handled = mce_handle_ierror_p8(srr1);
+		mce_get_ierror_p8(&mce_error_info, srr1);
+		addr = regs->nip;
+	}
 
-	/* TODO: Decode machine check reason. */
+	save_mce_event(regs, handled, &mce_error_info, addr);
 	return handled;
 }
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index 5c427b4..768a9f9 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -12,6 +12,7 @@
 #include <linux/kvm_host.h>
 #include <linux/kernel.h>
 #include <asm/opal.h>
+#include <asm/mce.h>
 
 /* SRR1 bits for machine check on POWER7 */
 #define SRR1_MC_LDSTERR		(1ul << (63-42))
@@ -67,9 +68,7 @@ static void reload_slb(struct kvm_vcpu *vcpu)
 static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
 {
 	unsigned long srr1 = vcpu->arch.shregs.msr;
-#ifdef CONFIG_PPC_POWERNV
-	struct opal_machine_check_event *opal_evt;
-#endif
+	struct machine_check_event mce_evt;
 	long handled = 1;
 
 	if (srr1 & SRR1_MC_LDSTERR) {
@@ -109,22 +108,31 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
 		handled = 0;
 	}
 
-#ifdef CONFIG_PPC_POWERNV
 	/*
-	 * See if OPAL has already handled the condition.
-	 * We assume that if the condition is recovered then OPAL
+	 * See if we have already handled the condition in the linux host.
+	 * We assume that if the condition is recovered then linux host
 	 * will have generated an error log event that we will pick
 	 * up and log later.
+	 * Don't release mce event now. In case if condition is not
+	 * recovered we do guest exit and go back to linux host machine
+	 * check handler. Hence we need make sure that current mce event
+	 * is available for linux host to consume.
 	 */
-	opal_evt = local_paca->opal_mc_evt;
-	if (opal_evt->version == OpalMCE_V1 &&
-	    (opal_evt->severity == OpalMCE_SEV_NO_ERROR ||
-	     opal_evt->disposition == OpalMCE_DISPOSITION_RECOVERED))
+	if (!get_mce_event(&mce_evt, MCE_EVENT_DONTRELEASE))
+		goto out;
+
+	if (mce_evt.version == MCE_V1 &&
+	    (mce_evt.severity == MCE_SEV_NO_ERROR ||
+	     mce_evt.disposition == MCE_DISPOSITION_RECOVERED))
 		handled = 1;
 
+out:
+	/*
+	 * If we have handled the error, then release the mce event because
+	 * we will be delivering machine check to guest.
+	 */
 	if (handled)
-		opal_evt->in_use = 0;
-#endif
+		release_mce_event();
 
 	return handled;
 }
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 106301f..bcbbcdc 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -19,6 +19,7 @@
 #include <linux/slab.h>
 #include <asm/opal.h>
 #include <asm/firmware.h>
+#include <asm/mce.h>
 
 #include "powernv.h"
 
@@ -245,8 +246,7 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
 
 int opal_machine_check(struct pt_regs *regs)
 {
-	struct opal_machine_check_event *opal_evt = get_paca()->opal_mc_evt;
-	struct opal_machine_check_event evt;
+	struct machine_check_event evt;
 	const char *level, *sevstr, *subtype;
 	static const char *opal_mc_ue_types[] = {
 		"Indeterminate",
@@ -271,30 +271,29 @@ int opal_machine_check(struct pt_regs *regs)
 		"Multihit",
 	};
 
-	/* Copy the event structure and release the original */
-	evt = *opal_evt;
-	opal_evt->in_use = 0;
+	if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
+		return 0;
 
 	/* Print things out */
-	if (evt.version != OpalMCE_V1) {
+	if (evt.version != MCE_V1) {
 		pr_err("Machine Check Exception, Unknown event version %d !\n",
 		       evt.version);
 		return 0;
 	}
 	switch(evt.severity) {
-	case OpalMCE_SEV_NO_ERROR:
+	case MCE_SEV_NO_ERROR:
 		level = KERN_INFO;
 		sevstr = "Harmless";
 		break;
-	case OpalMCE_SEV_WARNING:
+	case MCE_SEV_WARNING:
 		level = KERN_WARNING;
 		sevstr = "";
 		break;
-	case OpalMCE_SEV_ERROR_SYNC:
+	case MCE_SEV_ERROR_SYNC:
 		level = KERN_ERR;
 		sevstr = "Severe";
 		break;
-	case OpalMCE_SEV_FATAL:
+	case MCE_SEV_FATAL:
 	default:
 		level = KERN_ERR;
 		sevstr = "Fatal";
@@ -302,12 +301,12 @@ int opal_machine_check(struct pt_regs *regs)
 	}
 
 	printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
-	       evt.disposition == OpalMCE_DISPOSITION_RECOVERED ?
+	       evt.disposition == MCE_DISPOSITION_RECOVERED ?
 	       "Recovered" : "[Not recovered");
 	printk("%s  Initiator: %s\n", level,
-	       evt.initiator == OpalMCE_INITIATOR_CPU ? "CPU" : "Unknown");
+	       evt.initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
 	switch(evt.error_type) {
-	case OpalMCE_ERROR_TYPE_UE:
+	case MCE_ERROR_TYPE_UE:
 		subtype = evt.u.ue_error.ue_error_type <
 			ARRAY_SIZE(opal_mc_ue_types) ?
 			opal_mc_ue_types[evt.u.ue_error.ue_error_type]
@@ -320,7 +319,7 @@ int opal_machine_check(struct pt_regs *regs)
 			printk("%s      Physial address: %016llx\n",
 			       level, evt.u.ue_error.physical_address);
 		break;
-	case OpalMCE_ERROR_TYPE_SLB:
+	case MCE_ERROR_TYPE_SLB:
 		subtype = evt.u.slb_error.slb_error_type <
 			ARRAY_SIZE(opal_mc_slb_types) ?
 			opal_mc_slb_types[evt.u.slb_error.slb_error_type]
@@ -330,7 +329,7 @@ int opal_machine_check(struct pt_regs *regs)
 			printk("%s    Effective address: %016llx\n",
 			       level, evt.u.slb_error.effective_address);
 		break;
-	case OpalMCE_ERROR_TYPE_ERAT:
+	case MCE_ERROR_TYPE_ERAT:
 		subtype = evt.u.erat_error.erat_error_type <
 			ARRAY_SIZE(opal_mc_erat_types) ?
 			opal_mc_erat_types[evt.u.erat_error.erat_error_type]
@@ -340,7 +339,7 @@ int opal_machine_check(struct pt_regs *regs)
 			printk("%s    Effective address: %016llx\n",
 			       level, evt.u.erat_error.effective_address);
 		break;
-	case OpalMCE_ERROR_TYPE_TLB:
+	case MCE_ERROR_TYPE_TLB:
 		subtype = evt.u.tlb_error.tlb_error_type <
 			ARRAY_SIZE(opal_mc_tlb_types) ?
 			opal_mc_tlb_types[evt.u.tlb_error.tlb_error_type]
@@ -351,11 +350,11 @@ int opal_machine_check(struct pt_regs *regs)
 			       level, evt.u.tlb_error.effective_address);
 		break;
 	default:
-	case OpalMCE_ERROR_TYPE_UNKNOWN:
+	case MCE_ERROR_TYPE_UNKNOWN:
 		printk("%s  Error type: Unknown\n", level);
 		break;
 	}
-	return evt.severity == OpalMCE_SEV_FATAL ? 0 : 1;
+	return evt.severity == MCE_SEV_FATAL ? 0 : 1;
 }
 
 static irqreturn_t opal_interrupt(int irq, void *data)

^ permalink raw reply related

* [RFC PATCH v3 10/12] Queue up and process delayed MCE events.
From: Mahesh J Salgaonkar @ 2013-08-26 19:32 UTC (permalink / raw)
  To: linuxppc-dev, Benjamin Herrenschmidt
  Cc: Jeremy Kerr, Paul Mackerras, Anton Blanchard
In-Reply-To: <20130826192616.2855.18749.stgit@mars>

From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

When machine check real mode handler can not continue into host kernel
in V mode, it returns from the interrupt and we loose MCE event which
never gets logged. In such a situation queue up the MCE event so that
we can log it later when we get back into host kernel with r1 pointing to
kernel stack e.g. during syscall exit.

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/mce.h        |    3 +
 arch/powerpc/kernel/entry_64.S        |    5 +
 arch/powerpc/kernel/exceptions-64s.S  |    6 +
 arch/powerpc/kernel/mce.c             |  154 +++++++++++++++++++++++++++++++++
 arch/powerpc/platforms/powernv/opal.c |   97 ---------------------
 5 files changed, 167 insertions(+), 98 deletions(-)

diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index d319161..1c20731 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -190,5 +190,8 @@ extern void save_mce_event(struct pt_regs *regs, long handled,
 			   struct mce_error_info *mce_err, uint64_t addr);
 extern int get_mce_event(struct machine_check_event *mce, bool release);
 extern void release_mce_event(void);
+extern void machine_check_queue_event(void);
+extern void machine_check_process_queued_event(void);
+extern void machine_check_print_event_info(struct machine_check_event *evt);
 
 #endif /* __ASM_PPC64_MCE_H__ */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 2bd0b88..71bcd41 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -183,6 +183,11 @@ syscall_exit:
 	bl	.do_show_syscall_exit
 	ld	r3,RESULT(r1)
 #endif
+#ifdef CONFIG_PPC_BOOK3S_64
+BEGIN_FTR_SECTION
+	bl	.machine_check_process_queued_event
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+#endif
 	CURRENT_THREAD_INFO(r12, r1)
 
 	ld	r8,_MSR(r1)
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index d82ebac..d20a456 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -850,9 +850,11 @@ BEGIN_FTR_SECTION
 
 	/*
 	 * Return from MC interrupt.
-	 * TODO: Queue up the MCE event so that we can log it later.
+	 * Queue up the MCE event so that we can log it later, while
+	 * returning from kernel or opal call.
 	 */
-2:	MACHINE_CHECK_HANDLER_WINDUP
+2:	bl	.machine_check_queue_event
+	MACHINE_CHECK_HANDLER_WINDUP
 	rfid
 
 	/* Deliver the machine check to host kernel in V mode. */
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index aeecdf1..1cca4b6 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -31,6 +31,10 @@
 static DEFINE_PER_CPU(int, mce_nest_count);
 static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
 
+/* Queue for delayed MCE events. */
+static DEFINE_PER_CPU(int, mce_queue_count);
+static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
+
 static void mce_set_error_info(struct machine_check_event *mce,
 			       struct mce_error_info *mce_err)
 {
@@ -162,3 +166,153 @@ void release_mce_event(void)
 {
 	get_mce_event(NULL, true);
 }
+
+/*
+ * Queue up the MCE event which then can be handled later.
+ */
+void machine_check_queue_event(void)
+{
+	int index;
+	struct machine_check_event evt;
+
+	if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
+		return;
+
+	index = __get_cpu_var(mce_queue_count)++;
+	/* If queue is full, just return for now. */
+	if (index >= MAX_MC_EVT) {
+		__get_cpu_var(mce_queue_count)--;
+		return;
+	}
+	__get_cpu_var(mce_event_queue[index]) = evt;
+}
+
+/*
+ * process pending MCE event from the mce event queue. This function will be
+ * called during syscall exit.
+ */
+void machine_check_process_queued_event(void)
+{
+	int index;
+
+	preempt_disable();
+	/*
+	 * For now just print it to console.
+	 * TODO: log this error event to FSP or nvram.
+	 */
+	while (__get_cpu_var(mce_queue_count) > 0) {
+		index = __get_cpu_var(mce_queue_count) - 1;
+		machine_check_print_event_info(
+				&__get_cpu_var(mce_event_queue[index]));
+		__get_cpu_var(mce_queue_count)--;
+	}
+	preempt_enable();
+}
+
+void machine_check_print_event_info(struct machine_check_event *evt)
+{
+	const char *level, *sevstr, *subtype;
+	static const char *mc_ue_types[] = {
+		"Indeterminate",
+		"Instruction fetch",
+		"Page table walk ifetch",
+		"Load/Store",
+		"Page table walk Load/Store",
+	};
+	static const char *mc_slb_types[] = {
+		"Indeterminate",
+		"Parity",
+		"Multihit",
+	};
+	static const char *mc_erat_types[] = {
+		"Indeterminate",
+		"Parity",
+		"Multihit",
+	};
+	static const char *mc_tlb_types[] = {
+		"Indeterminate",
+		"Parity",
+		"Multihit",
+	};
+
+	/* Print things out */
+	if (evt->version != MCE_V1) {
+		pr_err("Machine Check Exception, Unknown event version %d !\n",
+		       evt->version);
+		return;
+	}
+	switch(evt->severity) {
+	case MCE_SEV_NO_ERROR:
+		level = KERN_INFO;
+		sevstr = "Harmless";
+		break;
+	case MCE_SEV_WARNING:
+		level = KERN_WARNING;
+		sevstr = "";
+		break;
+	case MCE_SEV_ERROR_SYNC:
+		level = KERN_ERR;
+		sevstr = "Severe";
+		break;
+	case MCE_SEV_FATAL:
+	default:
+		level = KERN_ERR;
+		sevstr = "Fatal";
+		break;
+	}
+
+	printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
+	       evt->disposition == MCE_DISPOSITION_RECOVERED ?
+	       "Recovered" : "[Not recovered");
+	printk("%s  Initiator: %s\n", level,
+	       evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
+	switch(evt->error_type) {
+	case MCE_ERROR_TYPE_UE:
+		subtype = evt->u.ue_error.ue_error_type <
+			ARRAY_SIZE(mc_ue_types) ?
+			mc_ue_types[evt->u.ue_error.ue_error_type]
+			: "Unknown";
+		printk("%s  Error type: UE [%s]\n", level, subtype);
+		if (evt->u.ue_error.effective_address_provided)
+			printk("%s    Effective address: %016llx\n",
+			       level, evt->u.ue_error.effective_address);
+		if (evt->u.ue_error.physical_address_provided)
+			printk("%s      Physial address: %016llx\n",
+			       level, evt->u.ue_error.physical_address);
+		break;
+	case MCE_ERROR_TYPE_SLB:
+		subtype = evt->u.slb_error.slb_error_type <
+			ARRAY_SIZE(mc_slb_types) ?
+			mc_slb_types[evt->u.slb_error.slb_error_type]
+			: "Unknown";
+		printk("%s  Error type: SLB [%s]\n", level, subtype);
+		if (evt->u.slb_error.effective_address_provided)
+			printk("%s    Effective address: %016llx\n",
+			       level, evt->u.slb_error.effective_address);
+		break;
+	case MCE_ERROR_TYPE_ERAT:
+		subtype = evt->u.erat_error.erat_error_type <
+			ARRAY_SIZE(mc_erat_types) ?
+			mc_erat_types[evt->u.erat_error.erat_error_type]
+			: "Unknown";
+		printk("%s  Error type: ERAT [%s]\n", level, subtype);
+		if (evt->u.erat_error.effective_address_provided)
+			printk("%s    Effective address: %016llx\n",
+			       level, evt->u.erat_error.effective_address);
+		break;
+	case MCE_ERROR_TYPE_TLB:
+		subtype = evt->u.tlb_error.tlb_error_type <
+			ARRAY_SIZE(mc_tlb_types) ?
+			mc_tlb_types[evt->u.tlb_error.tlb_error_type]
+			: "Unknown";
+		printk("%s  Error type: TLB [%s]\n", level, subtype);
+		if (evt->u.tlb_error.effective_address_provided)
+			printk("%s    Effective address: %016llx\n",
+			       level, evt->u.tlb_error.effective_address);
+		break;
+	default:
+	case MCE_ERROR_TYPE_UNKNOWN:
+		printk("%s  Error type: Unknown\n", level);
+		break;
+	}
+}
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index bcbbcdc..f789514 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -247,29 +247,6 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
 int opal_machine_check(struct pt_regs *regs)
 {
 	struct machine_check_event evt;
-	const char *level, *sevstr, *subtype;
-	static const char *opal_mc_ue_types[] = {
-		"Indeterminate",
-		"Instruction fetch",
-		"Page table walk ifetch",
-		"Load/Store",
-		"Page table walk Load/Store",
-	};
-	static const char *opal_mc_slb_types[] = {
-		"Indeterminate",
-		"Parity",
-		"Multihit",
-	};
-	static const char *opal_mc_erat_types[] = {
-		"Indeterminate",
-		"Parity",
-		"Multihit",
-	};
-	static const char *opal_mc_tlb_types[] = {
-		"Indeterminate",
-		"Parity",
-		"Multihit",
-	};
 
 	if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
 		return 0;
@@ -280,80 +257,8 @@ int opal_machine_check(struct pt_regs *regs)
 		       evt.version);
 		return 0;
 	}
-	switch(evt.severity) {
-	case MCE_SEV_NO_ERROR:
-		level = KERN_INFO;
-		sevstr = "Harmless";
-		break;
-	case MCE_SEV_WARNING:
-		level = KERN_WARNING;
-		sevstr = "";
-		break;
-	case MCE_SEV_ERROR_SYNC:
-		level = KERN_ERR;
-		sevstr = "Severe";
-		break;
-	case MCE_SEV_FATAL:
-	default:
-		level = KERN_ERR;
-		sevstr = "Fatal";
-		break;
-	}
+	machine_check_print_event_info(&evt);
 
-	printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
-	       evt.disposition == MCE_DISPOSITION_RECOVERED ?
-	       "Recovered" : "[Not recovered");
-	printk("%s  Initiator: %s\n", level,
-	       evt.initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
-	switch(evt.error_type) {
-	case MCE_ERROR_TYPE_UE:
-		subtype = evt.u.ue_error.ue_error_type <
-			ARRAY_SIZE(opal_mc_ue_types) ?
-			opal_mc_ue_types[evt.u.ue_error.ue_error_type]
-			: "Unknown";
-		printk("%s  Error type: UE [%s]\n", level, subtype);
-		if (evt.u.ue_error.effective_address_provided)
-			printk("%s    Effective address: %016llx\n",
-			       level, evt.u.ue_error.effective_address);
-		if (evt.u.ue_error.physical_address_provided)
-			printk("%s      Physial address: %016llx\n",
-			       level, evt.u.ue_error.physical_address);
-		break;
-	case MCE_ERROR_TYPE_SLB:
-		subtype = evt.u.slb_error.slb_error_type <
-			ARRAY_SIZE(opal_mc_slb_types) ?
-			opal_mc_slb_types[evt.u.slb_error.slb_error_type]
-			: "Unknown";
-		printk("%s  Error type: SLB [%s]\n", level, subtype);
-		if (evt.u.slb_error.effective_address_provided)
-			printk("%s    Effective address: %016llx\n",
-			       level, evt.u.slb_error.effective_address);
-		break;
-	case MCE_ERROR_TYPE_ERAT:
-		subtype = evt.u.erat_error.erat_error_type <
-			ARRAY_SIZE(opal_mc_erat_types) ?
-			opal_mc_erat_types[evt.u.erat_error.erat_error_type]
-			: "Unknown";
-		printk("%s  Error type: ERAT [%s]\n", level, subtype);
-		if (evt.u.erat_error.effective_address_provided)
-			printk("%s    Effective address: %016llx\n",
-			       level, evt.u.erat_error.effective_address);
-		break;
-	case MCE_ERROR_TYPE_TLB:
-		subtype = evt.u.tlb_error.tlb_error_type <
-			ARRAY_SIZE(opal_mc_tlb_types) ?
-			opal_mc_tlb_types[evt.u.tlb_error.tlb_error_type]
-			: "Unknown";
-		printk("%s  Error type: TLB [%s]\n", level, subtype);
-		if (evt.u.tlb_error.effective_address_provided)
-			printk("%s    Effective address: %016llx\n",
-			       level, evt.u.tlb_error.effective_address);
-		break;
-	default:
-	case MCE_ERROR_TYPE_UNKNOWN:
-		printk("%s  Error type: Unknown\n", level);
-		break;
-	}
 	return evt.severity == MCE_SEV_FATAL ? 0 : 1;
 }
 

^ permalink raw reply related

* [RFC PATCH v3 11/12] powerpc/powernv: Remove machine check handling in OPAL.
From: Mahesh J Salgaonkar @ 2013-08-26 19:32 UTC (permalink / raw)
  To: linuxppc-dev, Benjamin Herrenschmidt
  Cc: Jeremy Kerr, Paul Mackerras, Anton Blanchard
In-Reply-To: <20130826192616.2855.18749.stgit@mars>

From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

Now that we are ready to handle machine check directly in linux, do not
register with firmware to handle machine check exception.

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/opal.c |    8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index f789514..0170d19 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -83,14 +83,10 @@ static int __init opal_register_exception_handlers(void)
 	if (!(powerpc_firmware_features & FW_FEATURE_OPAL))
 		return -ENODEV;
 
-	/* Hookup some exception handlers. We use the fwnmi area at 0x7000
-	 * to provide the glue space to OPAL
+	/* Hookup some exception handlers except machine check. We use the
+	 * fwnmi area at 0x7000 to provide the glue space to OPAL
 	 */
 	glue = 0x7000;
-	opal_register_exception_handler(OPAL_MACHINE_CHECK_HANDLER,
-					__pa(opal_mc_secondary_handler[0]),
-					glue);
-	glue += 128;
 	opal_register_exception_handler(OPAL_HYPERVISOR_MAINTENANCE_HANDLER,
 					0, glue);
 	glue += 128;

^ permalink raw reply related

* [RFC PATCH v3 12/12] powerpc/powernv: Machine check exception handling.
From: Mahesh J Salgaonkar @ 2013-08-26 19:32 UTC (permalink / raw)
  To: linuxppc-dev, Benjamin Herrenschmidt
  Cc: Jeremy Kerr, Paul Mackerras, Anton Blanchard
In-Reply-To: <20130826192616.2855.18749.stgit@mars>

From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>

Add basic error handling in machine check exception handler.

- If MSR_RI isn't set, we can not recover.
- Check if disposition set to OpalMCE_DISPOSITION_RECOVERED.
- Check if address at fault is inside kernel address space, if not then send
  SIGBUS to process if we hit exception when in userspace.
- If address at fault is not provided then and if we get a synchronous machine
  check while in userspace then kill the task.

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/mce.h        |    1 +
 arch/powerpc/kernel/mce.c             |   27 +++++++++++++++++++++
 arch/powerpc/platforms/powernv/opal.c |   43 ++++++++++++++++++++++++++++++++-
 3 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index 1c20731..f72ea4c 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -193,5 +193,6 @@ extern void release_mce_event(void);
 extern void machine_check_queue_event(void);
 extern void machine_check_process_queued_event(void);
 extern void machine_check_print_event_info(struct machine_check_event *evt);
+extern uint64_t get_mce_fault_addr(struct machine_check_event *evt);
 
 #endif /* __ASM_PPC64_MCE_H__ */
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 1cca4b6..3100509 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -316,3 +316,30 @@ void machine_check_print_event_info(struct machine_check_event *evt)
 		break;
 	}
 }
+
+uint64_t get_mce_fault_addr(struct machine_check_event *evt)
+{
+	switch (evt->error_type) {
+	case MCE_ERROR_TYPE_UE:
+		if (evt->u.ue_error.effective_address_provided)
+			return evt->u.ue_error.effective_address;
+		break;
+	case MCE_ERROR_TYPE_SLB:
+		if (evt->u.slb_error.effective_address_provided)
+			return evt->u.slb_error.effective_address;
+		break;
+	case MCE_ERROR_TYPE_ERAT:
+		if (evt->u.erat_error.effective_address_provided)
+			return evt->u.erat_error.effective_address;
+		break;
+	case MCE_ERROR_TYPE_TLB:
+		if (evt->u.tlb_error.effective_address_provided)
+			return evt->u.tlb_error.effective_address;
+		break;
+	default:
+	case MCE_ERROR_TYPE_UNKNOWN:
+		break;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(get_mce_fault_addr);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 0170d19..2070970 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -17,6 +17,7 @@
 #include <linux/interrupt.h>
 #include <linux/notifier.h>
 #include <linux/slab.h>
+#include <linux/sched.h>
 #include <asm/opal.h>
 #include <asm/firmware.h>
 #include <asm/mce.h>
@@ -240,6 +241,44 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
 	return written;
 }
 
+static int opal_recover_mce(struct pt_regs *regs,
+					struct machine_check_event *evt)
+{
+	int recovered = 0;
+	uint64_t ea = get_mce_fault_addr(evt);
+
+	if (!(regs->msr & MSR_RI)) {
+		/* If MSR_RI isn't set, we cannot recover */
+		recovered = 0;
+	} else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
+		/* Platform corrected itself */
+		recovered = 1;
+	} else if (ea && !is_kernel_addr(ea)) {
+		/*
+		 * Faulting address is not in kernel text. We should be fine.
+		 * We need to find which process uses this address.
+		 * For now, kill the task if we have received exception when
+		 * in userspace.
+		 *
+		 * TODO: Queue up this address for hwpoisioning later.
+		 */
+		if (user_mode(regs) && !is_global_init(current)) {
+			_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
+			recovered = 1;
+		} else
+			recovered = 0;
+	} else if (user_mode(regs) && !is_global_init(current) &&
+		evt->severity == MCE_SEV_ERROR_SYNC) {
+		/*
+		 * If we have received a synchronous error when in userspace
+		 * kill the task.
+		 */
+		_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
+		recovered = 1;
+	}
+	return recovered;
+}
+
 int opal_machine_check(struct pt_regs *regs)
 {
 	struct machine_check_event evt;
@@ -255,7 +294,9 @@ int opal_machine_check(struct pt_regs *regs)
 	}
 	machine_check_print_event_info(&evt);
 
-	return evt.severity == MCE_SEV_FATAL ? 0 : 1;
+	if (opal_recover_mce(regs, &evt))
+		return 1;
+	return 0;
 }
 
 static irqreturn_t opal_interrupt(int irq, void *data)

^ permalink raw reply related

* Re: Loading kernel on MPC86x
From: Scott Wood @ 2013-08-27  0:39 UTC (permalink / raw)
  To: Martin Hinner; +Cc: linuxppc-dev
In-Reply-To: <CAPVwjkzT6dyv-T_8fF-zm8VLKvGFgLkrEeBC4ib-p0mkFJ7=fA@mail.gmail.com>

On Mon, 2013-08-26 at 20:29 +0200, Martin Hinner wrote:
> On Mon, Aug 26, 2013 at 7:14 PM, Scott Wood <scottwood@freescale.com> wrote:
> >> that kernel must be at location 0. Another problem was that interrupts
> >> got re-enabled during execution of my bootloader (I am doing some
> >> syscalls -> goes  to Cisco rom),
> > Do you mean you're calling into the rom after Linux has already started
> > executing?  That's not normal for 8xx.
> 
> No, in bootloader. I have disabled interrupts and then later did a
> syscall which probably enabled them again. As I have overwritten some
> of Cisco ROM data it crashed (at random place).
> 
> >> I am also curious why CONFIG_PPC_EARLY_DEBUG_CPM uses
> >> CONFIG_PPC_EARLY_DEBUG_CPM_ADDR as pointer to transmit SMC buffer and
> >> not address of CPM/SCM parameter RAM ? TX buffer address can be read
> >> from SMC parameter RAM. Wouldn't this solution be more portable? At
> >> least this way I do it when I take over console from Cisco
> >> startup/rommon.
> >
> > The point was to keep things as simple as possible (e.g. for use in
> > temporary handcoded asm as needed).  This is a hacky debugging feature
> > that assumes you know what you're doing and can set the address to match
> > what the loader does (and that the loader's choice of address is
> > static).  If you have an improvement that keeps it simple, feel free to
> > send a patch.
> 
> How about making CONFIG_PPC_EARLY_DEBUG_CPM_PARRAM

PARAM

> that woud carry address of SMCx parameter RAM (IMMR+0x04180 on MPC866) and this value
> would be used in case CONFIG_PPC_EARLY_DEBUG_CPM_ADDR is zero ? This
> would allow kernel hackers to still use
> CONFIG_PPC_EARLY_DEBUG_CPM_ADDR for assembly debugging (+legacy use)
> and everyone else can use it as a more reliable option that does not
> rely on particular bootloader behavior. Early debug is good even for
> end-users so as they can send debug output if anything goes wrong at
> early stage.

If it only works with CPM1 SMC, then that should be in the name of the
symbol.

> Anyway, difference between _PARRAM and _ADDR is only one lwz
> instruction, so I guess it is possible to completely discard _ADDR if
> there is no legacy use for it. I am also not sure if this works with
> SCC UART ports or only CPM SMC UART.

Given that testing can be a challenge on this old hardware (I don't have
easy access anymore, except maybe one 8xx board), I'd rather leave the
existing mechanism in place if you don't have the ability to test all
these cases.  The ability to not care about what type of CPM serial port
it is, is an important simplification.

-Scott

^ permalink raw reply

* Re: [PATCH] powerpc/hvsi: increase handshake timeout from 200ms to 400ms.
From: Benjamin Herrenschmidt @ 2013-08-27  0:52 UTC (permalink / raw)
  To: Eugene Surovegin; +Cc: linuxppc-dev
In-Reply-To: <1377543212-20432-1-git-send-email-ebs@ebshome.net>

On Mon, 2013-08-26 at 11:53 -0700, Eugene Surovegin wrote:
> This solves a problem observed in kexec'ed kernel where 200ms timeout is
> too short and bootconsole fails to initialize. Console did eventually
> become workable but much later into the boot process.
> 
> Observed timeout was around 260ms, but I decided to make it a little bigger
> for more reliability.
> 
> This has been tested on Power7 machine with Petitboot as a primary
> bootloader and PowerNV firmware.

Thanks !

I've been carrying a patch like that in my test stuff, the only reason I
hadn't merged it yet was that I though there might be a problem under
the hood, such as us maybe not actually polling the FSP fast enough or
something, but so far it looks like it's just slow to respond.

Cheers,
Ben.

> Signed-off-by: Eugene Surovegin <surovegin@google.com>
> ---
>  drivers/tty/hvc/hvsi_lib.c |    4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/tty/hvc/hvsi_lib.c b/drivers/tty/hvc/hvsi_lib.c
> index 3396eb9..ac27671 100644
> --- a/drivers/tty/hvc/hvsi_lib.c
> +++ b/drivers/tty/hvc/hvsi_lib.c
> @@ -341,8 +341,8 @@ void hvsilib_establish(struct hvsi_priv *pv)
>  
>  	pr_devel("HVSI@%x:   ... waiting handshake\n", pv->termno);
>  
> -	/* Try for up to 200s */
> -	for (timeout = 0; timeout < 20; timeout++) {
> +	/* Try for up to 400ms */
> +	for (timeout = 0; timeout < 40; timeout++) {
>  		if (pv->established)
>  			goto established;
>  		if (!hvsi_get_packet(pv))

^ permalink raw reply

* Re: [alsa-devel] [PATCH v11] ASoC: fsl: Add S/PDIF machine driver
From: Nicolin Chen @ 2013-08-27  2:01 UTC (permalink / raw)
  To: Mark Brown
  Cc: mark.rutland, devicetree, alsa-devel, lars, Stephen Warren,
	s.hauer, tomasz.figa, rob.herring, p.zabel, shawn.guo,
	linuxppc-dev
In-Reply-To: <20130823191353.GK25263@sirena.org.uk>

On Fri, Aug 23, 2013 at 08:13:53PM +0100, Mark Brown wrote:
> On Fri, Aug 23, 2013 at 01:08:28PM -0600, Stephen Warren wrote:
> > On 08/23/2013 02:04 AM, Nicolin Chen wrote:
> > > This patch implements a device-tree-only machine driver for Freescale
> > > i.MX series Soc. It works with spdif_transmitter/spdif_receiver and
> > > fsl_spdif.c drivers.
> 
> > The binding looks reasonable to me now. Thanks.
> 
> Is that a Reviewed-by?

Sir,
I think this patch hasn't been applied yet, already been acked though.
Is there any problem in it?

Thank you.
Nicolin

^ permalink raw reply

* RE: [v3] powerpc/mpc85xx: Update the clock device tree nodes
From: Tang Yuantian-B29983 @ 2013-08-27  2:49 UTC (permalink / raw)
  To: Wood Scott-B07421
  Cc: devicetree@vger.kernel.org, linuxppc-dev@lists.ozlabs.org,
	Mike Turquette
In-Reply-To: <1377536416.3033.18.camel@snotra.buserror.net>

PiA+ID4gPiArCQl9Ow0KPiA+ID4gPiArCQlwbGwxOiBwbGwxQDgyMCB7DQo+ID4gPiA+ICsJCQkj
Y2xvY2stY2VsbHMgPSA8MT47DQo+ID4gPiA+ICsJCQlyZWcgPSA8MHg4MjA+Ow0KPiA+ID4gPiAr
CQkJY29tcGF0aWJsZSA9ICJmc2wsY29yZS1wbGwtY2xvY2siOw0KPiA+ID4gPiArCQkJY2xvY2tz
ID0gPCZjbG9ja2dlbj47DQo+ID4gPiA+ICsJCQljbG9jay1vdXRwdXQtbmFtZXMgPSAicGxsMSIs
ICJwbGwxLWRpdjIiLCAicGxsMS0NCj4gZGl2NCI7DQo+ID4gPiA+ICsJCX07DQo+ID4gPg0KPiA+
ID4gUGxlYXNlIGxlYXZlIGEgYmxhbmsgbGluZSBiZXR3ZWVuIHByb3BlcnRpZXMgYW5kIG5vZGVz
LCBhbmQgYmV0d2Vlbg0KPiBub2Rlcy4NCj4gPiA+DQo+ID4gT0ssIHdpbGwgYWRkLg0KPiA+DQo+
ID4gPiBXaGF0IGRvZXMgcmVnIHJlcHJlc2VudD8gIFdoZXJlIGlzIHRoZSBiaW5kaW5nIGZvciB0
aGlzPw0KPiA+ID4NCj4gPiA+IFRoZSBjb21wYXRpYmxlIGlzIHRvbyB2YWd1ZS4NCj4gPiBSZWcg
aXMgcmVnaXN0ZXIgb2Zmc2V0Lg0KPiANCj4gV2l0aCBubyBzaXplPw0KDQpObyBzaXplIGlzIG5l
ZWRlZC4NCg0KPiANCj4gPiBJIHNob3VsZCBoYXZlIGhhZCBhIGJpbmRpbmcgZG9jdW1lbnQuDQo+
ID4gQWJvdXQgdGhlIGNvbXBhdGlibGUsIHlvdSBzaG91bGQgcG9pbnRlZCBpdCBvdXQgZWFybGll
ciBpbiBTREsgcmV2aWV3Lg0KPiANCj4gU29ycnksIGl0IGRvZXNuJ3Qgd29yayB0aGF0IHdheS4g
IEkgZG9uJ3Qga25vdyB3aHkgSSBkaWRuJ3Qgbm90aWNlIHRoaXMNCj4gc3R1ZmYgdGhlcmUgLS0g
dGhlIFNESyByZXZpZXcgd2FzIHByb2JhYmx5IHJ1c2hlZCwgd2l0aCBzb21lb25lIHNob3V0aW5n
DQo+ICJ1cmdlbnQiLiAgVGhlIFNESyBkb2VzIG5vdCBkaWN0YXRlIHdoYXQgZ29lcyB1cHN0cmVh
bS4gIERldmljZSB0cmVlDQo+IGJpbmRpbmdzIHNob3VsZCBnbyB1cHN0cmVhbSBmaXJzdC4NCj4g
DQpXaGVuIEkgc2VudCB0aGUgcGF0Y2ggdjEsIHRoZXJlIGlzIGEgYmluZGluZyBkb2N1bWVudCB3
aXRoIGl0LiBCdXQgSSBtaXNzZWQNCkl0IGluIHRoZSBwYXRjaCB2Mywgc28gd2hlbiBwYXRjaCB2
MyBnb3QgbWVyZ2VkLCB0aGUgYmluZGluZyBkb2N1bWVudCBkaWRuJ3QgZ2V0IG1lcmdlZC4NCkkg
d2lsbCBtYWtlIHRoZSBiaW5kaW5nIGdvIHVwc3RyZWFtIGZpcnN0IG5leHQgdGltZS4NCg0KPiA+
IEl0IGlzIHRvbyBsYXRlciB0byBjaGFuZ2Ugc2luY2UgdGhlIGNsb2NrIGRyaXZlciBpcyBtZXJn
ZWQgZm9yIG1vbnRocw0KPiA+IGFsdGhvdWdoIEkgc2VudCB0aGlzIHBhdGNoIGZpcnN0Lg0KPiAN
Cj4gSXQgc2hvdWxkIG5vdCBoYXZlIGdvbmUgaW4gd2l0aG91dCBhbiBhcHByb3ZlZCBiaW5kaW5n
LiAgSXQgc2VlbXMgaXQgd2VudA0KPiBpbiB2aWEgTWlrZSBUdXJxdWV0dGUgKHdoeSBpcyBhIG5v
bi1BUk0tc3BlY2lmaWMgdHJlZSB1c2luZyBsaW51eC1hcm0tDQo+IGtlcm5lbCBhcyBpdHMgbGlz
dCwgQlRXPykuICBObyBhY2sgZnJvbSBCZW4sIEt1bWFyLCBvciBtZSBpcyBzaG93biBpbiB0aGUN
Cj4gY29tbWl0Lg0KVGhlIExpbnV4IGNvbW1vbiBjbG9jayBmcmFtZXdvcmsgaXMgbm90IEFSTSBz
cGVjaWZpYy4gQW55IG90aGVyIGFyY2ggY2FuIHVzZSBpdC4NCkluIGZhY3QsIHRoaXMgY2xvY2sg
ZHJpdmVyIGlzIHRoZSBmaXJzdCBvbmUgdGhhdCB1c2UgY29tbW9uIGNsayBvbiBQUEMgYXJjaC4N
Ckkgd2lsbCBnZXQgdGhlIGFjayBmcm9tIHlvdSBndXlzIG5leHQgdGltZS4gSSBob3BlIGl0IGRv
ZXNuJ3QgbWFrZSBtZSB3YWl0IHRvbyBsb25nLg0KIA0KPiANCj4gSW4gYW55IGNhc2UsIHlvdSBj
YW4gcHJlc2VydmUgY29tcGF0aWJpbGl0eSB3aXRoIGV4aXN0aW5nIHRyZWVzIHdpdGhvdXQNCj4g
dXNpbmcgdGhpcyBjb21wYXRpYmxlIGluIG5ldyB0cmVlcy4gIFRoZSBkcml2ZXIgY2FuIGNoZWNr
IGZvciBib3RoDQo+IGNvbXBhdGlibGVzLCB3aXRoIGEgY29tbWVudCBpbmRpY2F0aW5nIHRoYXQg
ImZzbCxjb3JlLW11eC1jbG9jayIgaXMNCj4gZGVwcmVjYXRlZCBhbmQgZm9yIGNvbXBhdGliaWxp
dHkgb25seS4NCkl0IGlzIHN1Yi1jbG9jayBub2RlLCBpcyBpdCByZWFsbHkgbmVjZXNzYXJ5IHRv
IHRoaW5rIGFib3V0IGNvbXBhdGliaWxpdHk/DQpJIHRoaW5rIHRoYXQncyB0aGUgbm9kZSBjbG9j
a2dlbidzIHJlc3BvbnNpYmlsaXR5Lg0KDQo+IA0KPiA+IEJlc2lkZXMsIGl0IGlzIG5vdCB0b28g
YmFkIGJlY2F1c2Ugb3RoZXIgYXJjaCB1c2UgdGhlIHNpbWlsYXIgbmFtZS4NCj4gDQo+IEkgZG9u
J3QgZm9sbG93LiAgVGhpcyBpcyBhIHNwZWNpZmljIEZyZWVzY2FsZSByZWdpc3RlciBpbnRlcmZh
Y2UsIG5vdCBhDQo+IGdlbmVyYWwgY29uY2VwdC4NCj4gDQo+IEluIGFueSBjYXNlLCB3aGljaCAi
c2ltaWxhciBuYW1lcyIgYXJlIHlvdSByZWZlcnJpbmcgdG8/ICBBIHNlYXJjaCBpbg0KPiBhcmNo
L2FybS9ib290L2R0cyBmb3IgIm11eCIgd2l0aCAiY2xrIiBvciAiY2xvY2siIHR1cm5zIHVwDQo+
ICJhbGx3aW5uZXIsc3VuNGktYXBiMS1tdXgtY2xrIiB3aGljaCBpcyBtdWNoIG1vcmUgc3BlY2lm
aWMgdGhhbg0KPiAiZnNsLGNvcmUtbXV4LWNsb2NrIi4NCk9rLCBJIHdpbGwgY2hhbmdlIHRoZSBj
b21wYXRpYmxlIHN0cmluZy4NCkRvIHlvdSB0aGluayAiZnNsLHBwYy1jb3JlLSoiIGlzIG9rPw0K
DQpSZWdhcmRzLA0KWXVhbnRpYW4NCj4gDQo+IC1TY290dA0KPiANCg0K

^ permalink raw reply

* linux-next: manual merge of the pm tree with the powerpc tree
From: Stephen Rothwell @ 2013-08-27  3:05 UTC (permalink / raw)
  To: Rafael J. Wysocki, Rafael J. Wysocki
  Cc: Sudeep KarkadaNagesha, linux-kernel, Rob Herring, linux-next,
	Ian Munsie, Grant Likely, linuxppc-dev

[-- Attachment #1: Type: text/plain, Size: 749 bytes --]

Hi Rafael,

Today's linux-next merge of the pm tree got a conflict in
arch/powerpc/kernel/prom.c between commit dc0e643afc50 ("powerpc: Make
prom.c device tree accesses endian safe") from the powerpc tree and
commits 819d596568d8 ("powerpc: refactor of_get_cpu_node to support other
architectures") and 183912d352a2 ("of: move of_get_cpu_node
implementation to DT core library") from the pm tree.

I think that the latter superceded the conflicting fixes in the former,
so I just used the latter and can carry the fix as necessary (no action
is required).

P.S. Rafael, I used both your addresses because I have had a bounce from
the sisk.pl one in the past ...
-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au

[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* Re: [PATCH v2 2/2] Register bootmem pages
From: Benjamin Herrenschmidt @ 2013-08-27  3:44 UTC (permalink / raw)
  To: Nathan Fontenot; +Cc: linuxppc-dev
In-Reply-To: <5212DA31.2060105@linux.vnet.ibm.com>

On Mon, 2013-08-19 at 21:53 -0500, Nathan Fontenot wrote:
> Previous commit 46723bfa540... introduced a new config option
> HAVE_BOOTMEM_INFO_NODE that ended up breaking memory hot-remove for ppc
> when sparse vmemmap is not defined.
> 
> This patch defines HAVE_BOOTMEM_INFO_NODE for ppc and adds the call to
> register_page_bootmem_info_node. Without this we get a BUG_ON for memory
> hot remove in put_page_bootmem().
> 
> This also adds a stub for register_page_bootmem_memmap to allow ppc to build
> with sparse vmemmap defined.
> 
> Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
> ---

So I still feel very uncomfortable with that stuff ....

For example, x86 calls register_page_bootmem_info_node() at boot time,
which does that strange "get_page_bootmem" on the NODE_DATA itself at
boot time, we don't. Should we ?

Since we don't, what do that mean ? We don't remove the node info pages
on unplug ? Is that ok ?

There's a whole pile of totally undocumented / uncommented generic code
with horrible function names in there whose sematic is very very
unclear.

Now, if we call that thing, are we expected to have
register_paqe_bootmem_memmap() to actually do something right? I assume
that means actually calling get_page_bootmem() on the various struct
page that comprise the vmemmap.

Well, we can probably implement that since we maintain a list of all the
vmemap pages... However, we don't implement vmemmap_free(). Should we ?

This all confuses me...

Cheers,
Ben.

> 
> ---
>  arch/powerpc/mm/init_64.c |    4 ++++
>  arch/powerpc/mm/mem.c     |    9 +++++++++
>  mm/Kconfig                |    2 +-
>  3 files changed, 14 insertions(+), 1 deletion(-)
> 
> Index: linux/arch/powerpc/mm/init_64.c
> ===================================================================
> --- linux.orig/arch/powerpc/mm/init_64.c
> +++ linux/arch/powerpc/mm/init_64.c
> @@ -300,5 +300,9 @@ void vmemmap_free(unsigned long start, u
>  {
>  }
> 
> +void register_page_bootmem_memmap(unsigned long section_nr,
> +				  struct page *start_page, unsigned long size)
> +{
> +}
>  #endif /* CONFIG_SPARSEMEM_VMEMMAP */
> 
> Index: linux/arch/powerpc/mm/mem.c
> ===================================================================
> --- linux.orig/arch/powerpc/mm/mem.c
> +++ linux/arch/powerpc/mm/mem.c
> @@ -297,12 +297,21 @@ void __init paging_init(void)
>  }
>  #endif /* ! CONFIG_NEED_MULTIPLE_NODES */
> 
> +static void __init register_page_bootmem_info(void)
> +{
> +	int i;
> +
> +	for_each_online_node(i)
> +		register_page_bootmem_info_node(NODE_DATA(i));
> +}
> +
>  void __init mem_init(void)
>  {
>  #ifdef CONFIG_SWIOTLB
>  	swiotlb_init(0);
>  #endif
> 
> +	register_page_bootmem_info();
>  	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
>  	set_max_mapnr(max_pfn);
>  	free_all_bootmem();
> Index: linux/mm/Kconfig
> ===================================================================
> --- linux.orig/mm/Kconfig
> +++ linux/mm/Kconfig
> @@ -183,7 +183,7 @@ config MEMORY_HOTPLUG_SPARSE
>  config MEMORY_HOTREMOVE
>  	bool "Allow for memory hot remove"
>  	select MEMORY_ISOLATION
> -	select HAVE_BOOTMEM_INFO_NODE if X86_64
> +	select HAVE_BOOTMEM_INFO_NODE if (X86_64 || PPC64)
>  	depends on MEMORY_HOTPLUG && ARCH_ENABLE_MEMORY_HOTREMOVE
>  	depends on MIGRATION
> 
> 
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev

^ permalink raw reply

* Re: [PATCH 02/10] KVM: PPC: reserve a capability number for multitce support
From: Benjamin Herrenschmidt @ 2013-08-27  4:19 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: kvm, linux-doc, Alexey Kardashevskiy, linux-kernel, kvm-ppc,
	Alexander Graf, Paul Mackerras, linuxppc-dev
In-Reply-To: <20130826123753.GA22977@redhat.com>

On Mon, 2013-08-26 at 15:37 +0300, Gleb Natapov wrote:
> > Gleb, any chance you can put this (and the next one) into a tree to
> > "lock in" the numbers ?
> > 
> Applied it. Sorry for slow response, was on vocation and still go
> through the email backlog.

Thanks. Since it's not in a topic branch that I can pull, I'm going to
just cherry-pick them. However, they are in your "queue" branch, not
"next" branch. Should I still assume this is a stable branch and that
the numbers aren't going to change ?

Cheers,
Ben.

^ permalink raw reply

* Re: [PATCH 02/10] KVM: PPC: reserve a capability number for multitce support
From: Benjamin Herrenschmidt @ 2013-08-27  4:22 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: kvm, linux-doc, Alexey Kardashevskiy, linux-kernel, kvm-ppc,
	Alexander Graf, Paul Mackerras, linuxppc-dev
In-Reply-To: <1377577198.3819.102.camel@pasglop>

On Tue, 2013-08-27 at 14:19 +1000, Benjamin Herrenschmidt wrote:
> On Mon, 2013-08-26 at 15:37 +0300, Gleb Natapov wrote:
> > > Gleb, any chance you can put this (and the next one) into a tree to
> > > "lock in" the numbers ?
> > > 
> > Applied it. Sorry for slow response, was on vocation and still go
> > through the email backlog.
> 
> Thanks. Since it's not in a topic branch that I can pull, I'm going to
> just cherry-pick them. However, they are in your "queue" branch, not
> "next" branch. Should I still assume this is a stable branch and that
> the numbers aren't going to change ?

Oh and Alexey mentions that there are two capabilities and you only
applied one :-)

Cheers,
Ben.

^ permalink raw reply

* Re: [RFC PATCH v2 04/11] pstore: Add compression support to pstore
From: Aruna Balakrishnaiah @ 2013-08-27  5:19 UTC (permalink / raw)
  To: Luck, Tony, Seiji Aguchi
  Cc: jkenisto@linux.vnet.ibm.com, keescook@chromium.org,
	mahesh@linux.vnet.ibm.com, ccross@android.com,
	linux-kernel@vger.kernel.org, linuxppc-dev@ozlabs.org,
	cbouatmailru@gmail.com
In-Reply-To: <3908561D78D1C84285E8C5FCA982C28F31CC7AC0@ORSMSX106.amr.corp.intel.com>

On Friday 23 August 2013 04:47 AM, Luck, Tony wrote:
> <1>[  383.209057] RIP  [<ffffffff813d3946>] sysrq_handle_crash+0x16/0x20
> <4>[  383.209057]  RSP <ffff88006f551e80>
> <4>[  383.209057] CR2: 0000000000000000
> <4>[  383.209057] ---[ end trace 04a1cddad37b4b33 ]---
> <3>[  383.209057] pstore: compression failed for Part 2 returned -5
> <3>[  383.209057] pstore: Capture uncompressed oops/panic report of Part 2
> <3>[  383.209057] pstore: compression failed for Part 5 returned -5
>
> Interesting.  With ERST backend I didn't see these messages.  Traces in
> pstore recovered files go as far as the line before the "---[ end trace 04a1cddad37b4b33 ]---"
>
> Why the difference depending on which back end is in use?
>
> But I agree that we shouldn't have these messages.  They use up space
> in the persistent store that could be better used saving some more lines
> from earlier in the console log.

Yeah. We can remove these messages as it will add to the space consumed. But it 
would
be good to know why the compression failed with efivars case.

Seiji,

Could you let us know the efivars buffer size with which the pstore is 
registered when
the failure occurred.

Regards,
Aruna


>
> -Tony
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
>

^ permalink raw reply

* [PATCH] powerpc/btext: Fix CONFIG_PPC_EARLY_DEBUG_BOOTX on ppc32
From: Benjamin Herrenschmidt @ 2013-08-27  6:03 UTC (permalink / raw)
  To: linuxppc-dev

The "rmci" stuff only exists on 64-bit

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/btext.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index 0428992..41c011c 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -52,7 +52,7 @@ extern void rmci_off(void);
 
 static inline void rmci_maybe_on(void)
 {
-#ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX
+#if defined(CONFIG_PPC_EARLY_DEBUG_BOOTX) && defined(CONFIG_PPC64)
 	if (!(mfmsr() & MSR_DR))
 		rmci_on();
 #endif
@@ -60,7 +60,7 @@ static inline void rmci_maybe_on(void)
 
 static inline void rmci_maybe_off(void)
 {
-#ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX
+#if defined(CONFIG_PPC_EARLY_DEBUG_BOOTX) && defined(CONFIG_PPC64)
 	if (!(mfmsr() & MSR_DR))
 		rmci_off();
 #endif

^ permalink raw reply related

* [PATCH] powerpc: Work around gcc miscompilation of __pa() on 64-bit
From: Paul Mackerras @ 2013-08-27  6:07 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Alan Modra

On 64-bit, __pa(&static_var) gets miscompiled by recent versions of
gcc as something like:

        addis 3,2,.LANCHOR1+4611686018427387904@toc@ha
        addi 3,3,.LANCHOR1+4611686018427387904@toc@l

This ends up effectively ignoring the offset, since its bottom 32 bits
are zero, and means that the result of __pa() still has 0xC in the top
nibble.  This happens with gcc 4.8.1, at least.

To work around this, for 64-bit we make __pa() use an AND operator,
and for symmetry, we make __va() use an OR operator.  Using an AND
operator rather than a subtraction ends up with slightly shorter code
since it can be done with a single clrldi instruction, whereas it
takes three instructions to form the constant (-PAGE_OFFSET) and add
it on.  (Note that MEMORY_START is always 0 on 64-bit.)

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/Kconfig            |  1 +
 arch/powerpc/include/asm/page.h | 10 ++++++++++
 2 files changed, 11 insertions(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index dbd9d3c..9cf59816d 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -979,6 +979,7 @@ config RELOCATABLE
 	  must live at a different physical address than the primary
 	  kernel.
 
+# This value must have zeroes in the bottom 60 bits otherwise lots will break
 config PAGE_OFFSET
 	hex
 	default "0xc000000000000000"
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 988c812..b9f4262 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -211,9 +211,19 @@ extern long long virt_phys_offset;
 #define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + VIRT_PHYS_OFFSET))
 #define __pa(x) ((unsigned long)(x) - VIRT_PHYS_OFFSET)
 #else
+#ifdef CONFIG_PPC64
+/*
+ * gcc miscompiles (unsigned long)(&static_var) - PAGE_OFFSET
+ * with -mcmodel=medium, so we use & and | instead of - and + on 64-bit.
+ */
+#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) | PAGE_OFFSET))
+#define __pa(x) ((unsigned long)(x) & 0x0fffffffffffffffUL)
+
+#else /* 32-bit, non book E */
 #define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + PAGE_OFFSET - MEMORY_START))
 #define __pa(x) ((unsigned long)(x) - PAGE_OFFSET + MEMORY_START)
 #endif
+#endif
 
 /*
  * Unfortunately the PLT is in the BSS in the PPC32 ELF ABI,
-- 
1.8.4.rc3

^ permalink raw reply related

* Re: [PATCH 02/10] KVM: PPC: reserve a capability number for multitce support
From: Gleb Natapov @ 2013-08-27  6:40 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: kvm, linux-doc, Alexey Kardashevskiy, linux-kernel, kvm-ppc,
	Alexander Graf, Paul Mackerras, linuxppc-dev
In-Reply-To: <1377577198.3819.102.camel@pasglop>

On Tue, Aug 27, 2013 at 02:19:58PM +1000, Benjamin Herrenschmidt wrote:
> On Mon, 2013-08-26 at 15:37 +0300, Gleb Natapov wrote:
> > > Gleb, any chance you can put this (and the next one) into a tree to
> > > "lock in" the numbers ?
> > > 
> > Applied it. Sorry for slow response, was on vocation and still go
> > through the email backlog.
> 
> Thanks. Since it's not in a topic branch that I can pull, I'm going to
> just cherry-pick them. However, they are in your "queue" branch, not
> "next" branch. Should I still assume this is a stable branch and that
> the numbers aren't going to change ?
> 
Queue will become next after I will test it and if test will fail the
commit hash may change, but since you are going to cherry-pick and this
does not preserve commit hash it does not matter.

--
			Gleb.

^ permalink raw reply

* [PATCH] powerpc: Don't Oops when accessing /proc/powerpc/lparcfg without hypervisor
From: Benjamin Herrenschmidt @ 2013-08-27  6:41 UTC (permalink / raw)
  To: linuxppc-dev

/proc/powerpc/lparcfg is an ancient facility (though still actively used)
which allows access to some informations relative to the partition when
running underneath a PAPR compliant hypervisor.

It makes no sense on non-pseries machines. However, currently, not only
can it be created on these if the kernel has pseries support, but accessing
it on such a machine will crash due to trying to do hypervisor calls.

In fact, it should also not do HV calls on older pseries that didn't have
an hypervisor either.

Finally, it has the plumbing to be a module but is a "bool" Kconfig option.

This fixes the whole lot by turning it into a machine_device_initcall
that is only created on pseries, and adding the necessary hypervisor
check before calling the H_GET_EM_PARMS hypercall

CC: <stable@vger.kernel.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---

Next I'll move it to arch/powerpc/platforms/pseries but in a separate
patch.

 arch/powerpc/kernel/lparcfg.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index d92f387..e2a0a16 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -35,7 +35,13 @@
 #include <asm/vdso_datapage.h>
 #include <asm/vio.h>
 #include <asm/mmu.h>
+#include <asm/machdep.h>
 
+
+/*
+ * This isn't a module but we expose that to userspace
+ * via /proc so leave the definitions here
+ */
 #define MODULE_VERS "1.9"
 #define MODULE_NAME "lparcfg"
 
@@ -418,7 +424,8 @@ static void parse_em_data(struct seq_file *m)
 {
 	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
 
-	if (plpar_hcall(H_GET_EM_PARMS, retbuf) == H_SUCCESS)
+	if (firmware_has_feature(FW_FEATURE_LPAR) &&
+	    plpar_hcall(H_GET_EM_PARMS, retbuf) == H_SUCCESS)
 		seq_printf(m, "power_mode_data=%016lx\n", retbuf[0]);
 }
 
@@ -677,7 +684,6 @@ static int lparcfg_open(struct inode *inode, struct file *file)
 }
 
 static const struct file_operations lparcfg_fops = {
-	.owner		= THIS_MODULE,
 	.read		= seq_read,
 	.write		= lparcfg_write,
 	.open		= lparcfg_open,
@@ -699,14 +705,4 @@ static int __init lparcfg_init(void)
 	}
 	return 0;
 }
-
-static void __exit lparcfg_cleanup(void)
-{
-	remove_proc_subtree("powerpc/lparcfg", NULL);
-}
-
-module_init(lparcfg_init);
-module_exit(lparcfg_cleanup);
-MODULE_DESCRIPTION("Interface for LPAR configuration data");
-MODULE_AUTHOR("Dave Engebretsen");
-MODULE_LICENSE("GPL");
+machine_device_initcall(pseries, lparcfg_init);

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox