linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] x86, mce, severities: Add AMD severities function
@ 2015-03-16 17:16 Aravind Gopalakrishnan
  2015-03-17  7:42 ` Ingo Molnar
  2015-03-17 10:20 ` Borislav Petkov
  0 siblings, 2 replies; 16+ messages in thread
From: Aravind Gopalakrishnan @ 2015-03-16 17:16 UTC (permalink / raw)
  To: tglx, mingo, hpa, tony.luck, bp, slaoub, luto, x86, linux-kernel,
	linux-edac
  Cc: Aravind Gopalakrishnan, Aravind Gopalakrishnan

Add a severities function that caters to AMD processors.
This allows us to do some vendor specific work within the
function if necessary.

Also, introduce a vendor flag bitfield which contains vendor
specific flags. The severities code uses this to define error
scope based on the prescence of the flags field.

This is based off of work by Boris Petkov.

Testing details:
Tested the patch for any regressions on F15hM0h-0fh (Orochi)
and F15hM60h-6fh (Carrizo) and it works fine.

Signed-off-by: Aravind Gopalakrishnan <aravind.gopalakrishnan@amd.com>
---
 arch/x86/include/asm/mce.h                |  6 ++++
 arch/x86/kernel/cpu/mcheck/mce-severity.c | 52 +++++++++++++++++++++++++++++++
 arch/x86/kernel/cpu/mcheck/mce.c          |  9 ++++++
 3 files changed, 67 insertions(+)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index fd38a23..b574fbf 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -116,6 +116,12 @@ struct mca_config {
 	u32 rip_msr;
 };
 
+struct mce_vendor_flags {
+	__u64		overflow_recov	: 1, /* cpuid_ebx(80000007) */
+			__reserved_0	: 63;
+};
+extern struct mce_vendor_flags mce_flags;
+
 extern struct mca_config mca_cfg;
 extern void mce_register_decode_chain(struct notifier_block *nb);
 extern void mce_unregister_decode_chain(struct notifier_block *nb);
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 8bb4330..2af82b5 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -186,12 +186,64 @@ static int error_context(struct mce *m)
 	return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
 }
 
+/* keeping amd_mce_severity in sync with AMD error scope heirarchy table */
+static int amd_mce_severity(struct mce *m, enum context ctx)
+{
+	/* Processor Context Corrupt, no need to fumble too much, die! */
+	if (m->status & MCI_STATUS_PCC)
+		return MCE_PANIC_SEVERITY;
+
+	if (m->status & MCI_STATUS_UC) {
+		/*
+		 * On older systems, where overflow_recov flag is not
+		 * present, we should simply PANIC if Overflow occurs.
+		 * If overflow_recov flag set, then SW can try
+		 * to at least kill process to salvage systen operation.
+		 */
+
+		/* at least one error was not logged */
+		if (m->status & MCI_STATUS_OVER && !mce_flags.overflow_recov)
+				return MCE_PANIC_SEVERITY;
+
+		/* software can try to contain */
+		if (!(m->mcgstatus & MCG_STATUS_RIPV) &&
+		      mce_flags.overflow_recov) {
+			if (ctx == IN_KERNEL)
+				return MCE_PANIC_SEVERITY;
+
+			/* kill current process */
+			return MCE_AR_SEVERITY;
+		}
+		/*
+		 * any other case, return MCE_UC_SEVERITY so that
+		 * we log the error and exit #MC handler.
+		 */
+		return MCE_UC_SEVERITY;
+	}
+
+	/*
+	 * deferred error: poll handler catches these and adds to mce_ring
+	 * so memory-failure can take recovery actions.
+	 */
+	if (m->status & MCI_STATUS_DEFERRED)
+		return MCE_DEFERRED_SEVERITY;
+
+	/*
+	 * corrected error: poll handler catches these and passes
+	 * responsibility of decoding the error to EDAC
+	 */
+	return MCE_KEEP_SEVERITY;
+}
+
 int mce_severity(struct mce *m, int tolerant, char **msg, bool is_excp)
 {
 	enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP);
 	enum context ctx = error_context(m);
 	struct severity *s;
 
+	if (m->cpuvendor == X86_VENDOR_AMD)
+		return amd_mce_severity(m, ctx);
+
 	for (s = severities;; s++) {
 		if ((m->status & s->mask) != s->result)
 			continue;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 17ad025..680cfb2 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -65,6 +65,7 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex);
 DEFINE_PER_CPU(unsigned, mce_exception_count);
 
 struct mce_bank *mce_banks __read_mostly;
+struct mce_vendor_flags mce_flags __read_mostly;
 
 struct mca_config mca_cfg __read_mostly = {
 	.bootlog  = -1,
@@ -1532,6 +1533,13 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
 		 if (c->x86 == 6 && cfg->banks > 0)
 			mce_banks[0].ctl = 0;
 
+		/*
+		 * overflow_recov is supported for F15h Models 00h-0fh
+		 * even though we don't have cpuid bit for this
+		 */
+		if (c->x86 == 0x15 && c->x86_model <= 0xf)
+			mce_flags.overflow_recov = 1;
+
 		 /*
 		  * Turn off MC4_MISC thresholding banks on those models since
 		  * they're not supported there.
@@ -1637,6 +1645,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
 		break;
 	case X86_VENDOR_AMD:
 		mce_amd_feature_init(c);
+		mce_flags.overflow_recov = cpuid_ebx(0x80000007) & 0x1;
 		break;
 	default:
 		break;
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2015-03-20 17:50 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-03-16 17:16 [PATCH] x86, mce, severities: Add AMD severities function Aravind Gopalakrishnan
2015-03-17  7:42 ` Ingo Molnar
2015-03-17  9:04   ` Borislav Petkov
2015-03-17 10:11     ` Ingo Molnar
2015-03-17 10:20 ` Borislav Petkov
2015-03-17 18:41   ` Aravind Gopalakrishnan
2015-03-17 18:44     ` Borislav Petkov
2015-03-19  0:01       ` Luck, Tony
2015-03-19  9:29         ` Borislav Petkov
2015-03-19 14:41           ` Aravind Gopalakrishnan
2015-03-19 15:53             ` Borislav Petkov
2015-03-19 16:20               ` Aravind Gopalakrishnan
2015-03-19 17:15                 ` Luck, Tony
2015-03-20 15:59                   ` Aravind Gopalakrishnan
2015-03-20 16:03                     ` Borislav Petkov
2015-03-20 17:49                       ` Luck, Tony

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).