linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
From: james.morse@arm.com (James Morse)
To: linux-arm-kernel@lists.infradead.org
Subject: [PATCH v3 12/20] arm64: kernel: Survive corrected RAS errors notified by SError
Date: Thu,  5 Oct 2017 20:18:04 +0100	[thread overview]
Message-ID: <20171005191812.5678-13-james.morse@arm.com> (raw)
In-Reply-To: <20171005191812.5678-1-james.morse@arm.com>

Prior to v8.2, SError is an uncontainable fatal exception. The v8.2 RAS
extensions use SError to notify software about RAS errors, these can be
contained by the ESB instruction.

An ACPI system with firmware-first may use SError as its 'SEI'
notification. Future patches may add code to 'claim' this SError as a
notification.

Other systems can distinguish these RAS errors from the SError ESR and
use the AET bits and additional data from RAS-Error registers to handle
the error. Future patches may add this kernel-first handling.

Without support for either of these we will panic(), even if we received
a corrected error. Add code to decode the severity of RAS errors. We can
safely ignore contained errors where the CPU can continue to make
progress. For all other errors we continue to panic().

Signed-off-by: James Morse <james.morse@arm.com>

==
I couldn't come up with a concise way to capture 'can continue to make
progress', so opted for 'blocking' instead.
---
 arch/arm64/include/asm/esr.h   | 10 ++++++++
 arch/arm64/include/asm/traps.h | 36 ++++++++++++++++++++++++++
 arch/arm64/kernel/traps.c      | 58 ++++++++++++++++++++++++++++++++++++++----
 3 files changed, 99 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 66ed8b6b9976..8ea52f15bf1c 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -85,6 +85,15 @@
 #define ESR_ELx_WNR_SHIFT	(6)
 #define ESR_ELx_WNR		(UL(1) << ESR_ELx_WNR_SHIFT)
 
+/* Asynchronous Error Type */
+#define ESR_ELx_AET		(UL(0x7) << 10)
+
+#define ESR_ELx_AET_UC		(UL(0) << 10)	/* Uncontainable */
+#define ESR_ELx_AET_UEU		(UL(1) << 10)	/* Uncorrected Unrecoverable */
+#define ESR_ELx_AET_UEO		(UL(2) << 10)	/* Uncorrected Restartable */
+#define ESR_ELx_AET_UER		(UL(3) << 10)	/* Uncorrected Recoverable */
+#define ESR_ELx_AET_CE		(UL(6) << 10)	/* Corrected */
+
 /* Shared ISS field definitions for Data/Instruction aborts */
 #define ESR_ELx_SET_SHIFT	(11)
 #define ESR_ELx_SET_MASK	(UL(3) << ESR_ELx_SET_SHIFT)
@@ -99,6 +108,7 @@
 #define ESR_ELx_FSC		(0x3F)
 #define ESR_ELx_FSC_TYPE	(0x3C)
 #define ESR_ELx_FSC_EXTABT	(0x10)
+#define ESR_ELx_FSC_SERROR	(0x11)
 #define ESR_ELx_FSC_ACCESS	(0x08)
 #define ESR_ELx_FSC_FAULT	(0x04)
 #define ESR_ELx_FSC_PERM	(0x0C)
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index d131501c6222..8d2a1fff5c6b 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -19,6 +19,7 @@
 #define __ASM_TRAP_H
 
 #include <linux/list.h>
+#include <asm/esr.h>
 #include <asm/sections.h>
 
 struct pt_regs;
@@ -58,4 +59,39 @@ static inline int in_entry_text(unsigned long ptr)
 	return ptr >= (unsigned long)&__entry_text_start &&
 	       ptr < (unsigned long)&__entry_text_end;
 }
+
+static inline bool arm64_is_ras_serror(u32 esr)
+{
+	bool impdef = esr & ESR_ELx_ISV; /* aka IDS */
+
+	if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
+		return !impdef;
+
+	return false;
+}
+
+/* Return the AET bits of an SError ESR, or 0/uncontainable/uncategorized */
+static inline u32 arm64_ras_serror_get_severity(u32 esr)
+{
+	u32 aet = esr & ESR_ELx_AET;
+
+	if (!arm64_is_ras_serror(esr)) {
+		/* Not a RAS error, we can't interpret the ESR */
+		return 0;
+	}
+
+	/*
+	 * AET is RES0 if 'the value returned in the DFSC field is not
+	 * [ESR_ELx_FSC_SERROR]'
+	 */
+	if ((esr & ESR_ELx_FSC) != ESR_ELx_FSC_SERROR) {
+		/* No severity information */
+		return 0;
+	}
+
+	return aet;
+}
+
+bool arm64_blocking_ras_serror(struct pt_regs *regs, unsigned int esr);
+void __noreturn arm64_serror_panic(struct pt_regs *regs, u32 esr);
 #endif
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 210647d33bea..ab0f61d98fd6 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -709,17 +709,65 @@ asmlinkage void handle_bad_stack(struct pt_regs *regs)
 }
 #endif
 
-asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr)
+void __noreturn arm64_serror_panic(struct pt_regs *regs, u32 esr)
 {
-	nmi_enter();
-
 	console_verbose();
 
 	pr_crit("SError Interrupt on CPU%d, code 0x%08x -- %s\n",
 		smp_processor_id(), esr, esr_get_class_string(esr));
-	__show_regs(regs);
+	if (regs)
+		__show_regs(regs);
+
+	/* KVM may call this this from a preemptible context */
+	preempt_disable();
+
+	/*
+	 * panic() unmasks interrupts, which unmasks SError. Use nmi_panic()
+	 * to avoid re-entering panic.
+	 */
+	nmi_panic(regs, "Asynchronous SError Interrupt");
+
+	cpu_park_loop();
+	unreachable();
+}
+
+bool arm64_blocking_ras_serror(struct pt_regs *regs, unsigned int esr)
+{
+	u32 aet = arm64_ras_serror_get_severity(esr);
+
+	switch (aet) {
+	case ESR_ELx_AET_CE:	/* corrected error */
+	case ESR_ELx_AET_UEO:	/* restartable, not yet consumed */
+		/*
+		 * The CPU can make progress. We may take UEO again as
+		 * a more severe error.
+		 */
+		return false;
+
+	case ESR_ELx_AET_UEU:	/* Uncorrected Unrecoverable */
+	case ESR_ELx_AET_UER:	/* Uncorrected Recoverable */
+		/*
+		 * The CPU can't make progress. The exception may have
+		 * been imprecise.
+		 */
+		return true;
+
+	case ESR_ELx_AET_UC:	/* Uncontainable error */
+	default:
+		/* Error has been silently propagated */
+		arm64_serror_panic(regs, esr);
+	}
+}
+
+asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr)
+{
+	nmi_enter();
+
+	/* non-RAS errors are not containable */
+	if (!arm64_is_ras_serror(esr) || arm64_blocking_ras_serror(regs, esr))
+		arm64_serror_panic(regs, esr);
 
-	panic("Asynchronous SError Interrupt");
+	nmi_exit();
 }
 
 void __pte_error(const char *file, int line, unsigned long val)
-- 
2.13.3

  parent reply	other threads:[~2017-10-05 19:18 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-10-05 19:17 [PATCH v3 00/20] SError rework + RAS&IESB for firmware first support James Morse
2017-10-05 19:17 ` [PATCH v3 01/20] arm64: explicitly mask all exceptions James Morse
2017-10-11 16:30   ` Julien Thierry
2017-10-12 12:26     ` James Morse
2017-10-18 14:23   ` Catalin Marinas
2017-10-18 14:25     ` Catalin Marinas
2017-10-05 19:17 ` [PATCH v3 02/20] arm64: introduce an order for exceptions James Morse
2017-10-11 17:11   ` Julien Thierry
2017-10-05 19:17 ` [PATCH v3 03/20] arm64: Move the async/fiq helpers to explicitly set process context flags James Morse
2017-10-05 19:17 ` [PATCH v3 04/20] arm64: Mask all exceptions during kernel_exit James Morse
2017-10-05 19:17 ` [PATCH v3 05/20] arm64: entry.S: Remove disable_dbg James Morse
2017-10-05 19:17 ` [PATCH v3 06/20] arm64: entry.S: convert el1_sync James Morse
2017-10-05 19:17 ` [PATCH v3 07/20] arm64: entry.S convert el0_sync James Morse
2017-10-05 19:18 ` [PATCH v3 08/20] arm64: entry.S: convert elX_irq James Morse
2017-10-11 17:13   ` Julien Thierry
2017-10-12 12:26     ` James Morse
2017-10-05 19:18 ` [PATCH v3 09/20] KVM: arm/arm64: mask/unmask daif around VHE guests James Morse
2017-10-11  9:01   ` Marc Zyngier
2017-10-11 15:40     ` James Morse
2017-10-05 19:18 ` [PATCH v3 10/20] arm64: entry.S: move SError handling into a C function for future expansion James Morse
2017-10-05 19:18 ` [PATCH v3 11/20] arm64: cpufeature: Detect CPU RAS Extentions James Morse
2017-10-05 19:18 ` James Morse [this message]
2017-10-05 19:18 ` [PATCH v3 13/20] arm64: cpufeature: Enable IESB on exception entry/return for firmware-first James Morse
2017-10-18 16:43   ` Catalin Marinas
2017-10-18 17:14     ` James Morse
2017-10-05 19:18 ` [PATCH v3 14/20] arm64: kernel: Prepare for a DISR user James Morse
2017-10-05 19:18 ` [PATCH v3 15/20] KVM: arm64: Set an impdef ESR for Virtual-SError using VSESR_EL2 James Morse
2017-10-13  9:25   ` gengdongjiu
2017-10-13 16:53     ` James Morse
2017-10-05 19:18 ` [PATCH v3 16/20] KVM: arm64: Save/Restore guest DISR_EL1 James Morse
2017-10-05 19:18 ` [PATCH v3 17/20] KVM: arm64: Save ESR_EL2 on guest SError James Morse
2017-10-05 19:18 ` [PATCH v3 18/20] KVM: arm64: Handle RAS SErrors from EL1 on guest exit James Morse
2017-10-05 19:18 ` [PATCH v3 19/20] KVM: arm64: Handle RAS SErrors from EL2 " James Morse
2017-10-11 10:37   ` Marc Zyngier
2017-10-12 12:28     ` James Morse
2017-10-05 19:18 ` [PATCH v3 20/20] KVM: arm64: Take any host SError before entering the guest James Morse
2017-10-18 16:55 ` [PATCH v3 00/20] SError rework + RAS&IESB for firmware first support Catalin Marinas

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20171005191812.5678-13-james.morse@arm.com \
    --to=james.morse@arm.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).