All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] 3/3: MCA/MCE correctable error handling
@ 2007-08-21 13:31 Christoph Egger
  2007-08-21 16:02 ` Jan Beulich
  0 siblings, 1 reply; 10+ messages in thread
From: Christoph Egger @ 2007-08-21 13:31 UTC (permalink / raw)
  To: xen-devel; +Cc: Gavin.Maltby, Keir Fraser

[-- Attachment #1: Type: text/plain, Size: 511 bytes --]


This is patch 3/3.

Signed-off-by: Christoph Egger <Christoph.Egger@amd.com>



-- 
AMD Saxony, Dresden, Germany
Operating System Research Center

Legal Information:
AMD Saxony Limited Liability Company & Co. KG
Sitz (Geschäftsanschrift):
   Wilschdorfer Landstr. 101, 01109 Dresden, Deutschland
Registergericht Dresden: HRA 4896
vertretungsberechtigter Komplementär:
   AMD Saxony LLC (Sitz Wilmington, Delaware, USA)
Geschäftsführer der AMD Saxony LLC:
   Dr. Hans-R. Deppe, Thomas McCoy

[-- Attachment #2: mca_diff3_event.diff --]
[-- Type: text/x-diff, Size: 16287 bytes --]

diff -r a5209d79d241 -r e18773b9584c xen/arch/x86/cpu/mcheck/mce.c
--- a/xen/arch/x86/cpu/mcheck/mce.c	Fri Aug 17 13:21:40 2007 +0200
+++ b/xen/arch/x86/cpu/mcheck/mce.c	Tue Aug 21 14:13:19 2007 +0200
@@ -13,6 +13,7 @@
 #include <asm/system.h>
 
 #include "mce.h"
+#include "x86_mca.h"
 
 int mce_disabled = 0;
 unsigned int nr_mce_banks;
@@ -89,3 +90,94 @@ static int __init mcheck_enable(char *st
 
 custom_param("nomce", mcheck_disable);
 custom_param("mce", mcheck_enable);
+
+
+
+
+void x86_mcinfo_clear(struct shared_info *si)
+{
+	memset(&si->arch.mc_info, 0, sizeof(struct arch_mc_info));
+	x86_mcinfo_nentries(si) = 0;
+}
+
+
+int x86_mcinfo_add(struct shared_info *si, void *mcinfo)
+{
+	int i;
+	unsigned long end1, end2;
+	struct mcinfo_common *mic, *mic_base, *mic_index;
+
+	mic = (struct mcinfo_common *)mcinfo;
+	mic_index = mic_base = x86_mcinfo_first(si);
+
+	/* go to first free entry */
+	for (i = 0; i < x86_mcinfo_nentries(si); i++) {
+		mic_index = x86_mcinfo_next(mic_index);
+	}
+
+	/* check if there is enough size */
+	end1 = (unsigned long)((uint8_t *)mic_base + sizeof(struct arch_mc_info));
+	end2 = (unsigned long)((uint8_t *)mic_index + mic->size);
+
+	if (end1 < end2)
+		return -1; /* No space. Can't add entry. */
+
+	/* there's enough space. add entry. */
+	memcpy(mic_index, mic, mic->size);
+	x86_mcinfo_nentries(si)++;
+
+	return 0;
+}
+
+/* Dump machine check information in a format,
+ * mcelog can parse. This is used only when
+ * Dom0 does not take the notification. */
+void x86_mcinfo_dump(struct shared_info *si)
+{
+	struct mcinfo_common *mic;
+	struct mcinfo_global *mc_global;
+	struct mcinfo_bank *mc_bank;
+
+	/* first print the global info */
+	x86_mcinfo_lookup(mic, si, MC_TYPE_GLOBAL);
+	if (mic == NULL)
+		return;
+	mc_global = (struct mcinfo_global *)mic;
+	if (mc_global->mc_flags & MC_FLAG_UNCORRECTABLE) {
+		printk(KERN_EMERG
+		       "CPU%d: Machine Check Exception: %16"PRIx64"\n",
+		       mc_global->mc_coreid, mc_global->mc_gstatus);
+	} else {
+		printk(KERN_INFO "MCE: The hardware reports a non "
+		       "fatal, correctable incident occured on "
+		       "CPU %d.\n",
+		       mc_global->mc_coreid);
+	}
+
+	/* then the bank information */
+	x86_mcinfo_lookup(mic, si, MC_TYPE_BANK); /* finds the first entry */
+	do {
+		if (mic == NULL)
+			return;
+		if (mic->type != MC_TYPE_BANK)
+			continue;
+
+		mc_bank = (struct mcinfo_bank *)mic;
+       
+		if (mc_global->mc_flags & MC_FLAG_UNCORRECTABLE)
+			printk(KERN_EMERG);
+		else
+			printk(KERN_INFO);
+
+		printk("Bank %d: %16"PRIx64,
+		       mc_bank->mc_bank,
+		       mc_bank->mc_status);
+		if (mc_bank->mc_status & MCi_STATUS_MISCV)
+			printk("[%16"PRIx64"]", mc_bank->mc_misc);
+		if (mc_bank->mc_status & MCi_STATUS_ADDRV)
+			printk(" at %16"PRIx64, mc_bank->mc_addr);
+
+		printk("\n");
+		mic = x86_mcinfo_next(mic); /* next entry */
+	} while (mic != NULL);
+}
diff -r a5209d79d241 -r e18773b9584c xen/arch/x86/cpu/mcheck/mce.h
--- a/xen/arch/x86/cpu/mcheck/mce.h	Fri Aug 17 13:21:40 2007 +0200
+++ b/xen/arch/x86/cpu/mcheck/mce.h	Tue Aug 21 14:13:19 2007 +0200
@@ -6,6 +6,10 @@ void intel_p6_mcheck_init(struct cpuinfo
 void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
 void winchip_mcheck_init(struct cpuinfo_x86 *c);
 
+void x86_mcinfo_clear(struct shared_info *si);
+int x86_mcinfo_add(struct shared_info *si, void *mcinfo);
+void x86_mcinfo_dump(struct shared_info *si);
+
 /* Call the installed machine check handler for this CPU setup. */
 extern fastcall void (*machine_check_vector)(struct cpu_user_regs *, long error_code);
 
diff -r a5209d79d241 -r e18773b9584c xen/arch/x86/cpu/mcheck/non-fatal.c
--- a/xen/arch/x86/cpu/mcheck/non-fatal.c	Fri Aug 17 13:21:40 2007 +0200
+++ b/xen/arch/x86/cpu/mcheck/non-fatal.c	Tue Aug 21 14:13:19 2007 +0200
@@ -13,19 +13,22 @@
 #include <xen/kernel.h>
 #include <xen/smp.h>
 #include <xen/timer.h>
-#include <xen/errno.h>
+#include <xen/event.h>
 #include <asm/processor.h> 
 #include <asm/system.h>
 #include <asm/msr.h>
 
 #include "mce.h"
 
-static int firstbank;
+static int firstbank = 0;
 static struct timer mce_timer;
 
 #define MCE_PERIOD MILLISECS(15000)
-
-static void mce_checkregs (void *info)
+#define MCE_MIN    MILLISECS(2000)
+#define MCE_MAX    MILLISECS(30000)
+
+
+static void mce_intel_checkregs (void *info)
 {
 	u32 low, high;
 	int i;
@@ -50,10 +53,170 @@ static void mce_checkregs (void *info)
 	}
 }
 
-static void mce_work_fn(void *data)
+static void mce_intel_work_fn(void *data)
 { 
-	on_each_cpu(mce_checkregs, NULL, 1, 1);
+	on_each_cpu(mce_intel_checkregs, NULL, 1, 1);
 	set_timer(&mce_timer, NOW() + MCE_PERIOD);
+}
+
+
+
+/* The MCi_STATUS_* #defines are needed here */
+#include "x86_mca.h"
+
+static s_time_t period = MCE_PERIOD;
+static int hw_threshold = 0;
+static int adjust = 0;
+
+
+/* The polling service routine:
+ * Collects information of correctable errors and notifies
+ * Dom0 via an event.
+ */
+void mce_amd_checkregs(void *info)
+{
+	struct shared_info *si;
+	struct vcpu  *vcpu = current;
+	struct mcinfo_global mc_global;
+	struct mcinfo_bank mc_info;
+	uint64_t status, addrv, miscv;
+	unsigned int i;
+	unsigned int event_enabled;
+
+	event_enabled = guest_enabled_event(dom0->vcpu[0], VIRQ_MCA);
+	adjust = 0;
+	si = (struct shared_info *)dom0->shared_info; /* cast silences gcc4 */
+	memset(&mc_global, 0, sizeof(mc_global));
+	mc_global.common.type = MC_TYPE_GLOBAL;
+	mc_global.common.size = sizeof(mc_global);
+
+	mc_global.mc_domid = vcpu->domain->domain_id; /* impacted domain */
+	mc_global.mc_coreid = vcpu->processor; /* impacted physical cpu */
+	mc_global.mc_vcpu_id = vcpu->vcpu_id; /* impacted vcpu */
+#if 0 /* todo: on which socket is this physical core? */
+	mc_global.mc_socketid = ???;
+#endif
+	mc_global.mc_flags |= MC_FLAG_CORRECTABLE;
+	rdmsrl(MSR_IA32_MCG_STATUS, mc_global.mc_gstatus);
+	x86_mcinfo_clear(si);
+	x86_mcinfo_add(si, &mc_global);
+
+	for (i = 0; i < nr_mce_banks; i++) {
+		rdmsrl(MSR_IA32_MC0_STATUS + i * 4, status);
+	       
+		if (!(status & MCi_STATUS_VAL))
+			continue;
+
+		memset(&mc_info, 0, sizeof(mc_info));
+		mc_info.common.type = MC_TYPE_BANK;
+		mc_info.common.size = sizeof(mc_info);
+		mc_info.mc_bank = i;
+		mc_info.mc_status = status;
+
+		/* Increase polling frequency */
+		adjust = 1;
+
+		addrv = 0;
+		if (status & MCi_STATUS_ADDRV)
+			rdmsrl(MSR_IA32_MC0_ADDR + i * 4, addrv);
+
+		miscv = 0;
+		if (status & MCi_STATUS_MISCV)
+			rdmsrl(MSR_IA32_MC0_MISC + i * 4, miscv);
+
+		mc_info.mc_addr = addrv;
+		mc_info.mc_misc = miscv;
+		x86_mcinfo_add(si, &mc_info);
+
+		/* clear status */
+		wrmsrl(MSR_IA32_MC0_STATUS + i * 4, 0x0ULL);
+		wmb();
+	}
+
+	if (adjust > 0) {
+		/* If Dom0 enabled the VIRQ_MCA event, then ... */
+		if (event_enabled)
+			/* ... notify it. */
+			send_guest_global_virq(dom0, VIRQ_MCA);
+		else
+			/* ... or dump it */
+			x86_mcinfo_dump(si);
+	}
+}
+
+/* polling service routine invoker:
+ * Adjust poll frequency at runtime. No error means slow polling frequency,
+ * an error means higher polling frequency.
+ * It uses hw threshold register introduced in AMD K8 RevF to detect
+ * multiple correctable errors between two polls. In that case,
+ * increase polling frequency higher than normal.
+ */
+static void mce_amd_work_fn(void *data)
+{
+	on_each_cpu(mce_amd_checkregs, data, 1, 1);
+
+	if (adjust > 0) {
+		if ( !guest_enabled_event(dom0->vcpu[0], VIRQ_MCA) ) {
+			/* Dom0 did not enable VIRQ_MCA, so Xen is reporting. */
+			printk("MCE: polling routine found correctable error\n");
+		}
+	}
+
+	if (hw_threshold) {
+		uint64_t value;
+		uint32_t counter;
+
+		rdmsrl(MSR_K8_MC4_MISC, value);
+		/* Only the error counter field is of interest
+		 * Bit field is described in AMD K8 BKDG chapter 6.4.5.5
+		 */
+		counter = (value & 0xFFF00000000ULL) >> 32U;
+
+		/* HW does not count *all* kinds of correctable errors.
+		 * Thus it is possible, that the polling routine finds an
+		 * correctable error even if the HW reports nothing.
+		 * However, the other way around is not possible (= BUG).
+		 */ 
+		if (counter > 0) {
+			/* HW reported correctable errors,
+			 * the polling routine did not find...
+			 */
+			BUG_ON(adjust == 0);
+			/* subtract 1 to not double count the error 
+			 * from the polling service routine */ 
+			adjust += (counter - 1);
+
+			/* Restart counter */
+			/* No interrupt, reset counter value */
+			value &= ~(0x60FFF00000000ULL);
+			/* Counter enable */
+			value |= (1ULL << 51);
+			wrmsrl(MSR_K8_MC4_MISC, value);
+			wmb();
+		}
+	}
+
+	if (adjust > 0) {
+		/* Increase polling frequency */
+		adjust++; /* adjust == 1 must have an effect */
+		period /= adjust;
+	} else {
+		/* Decrease polling frequency */
+		period *= 2;
+	}
+	if (period > MCE_MAX) {
+		/* limit: Poll at least every 30s */
+		period = MCE_MAX;
+	}
+	if (period < MCE_MIN) {
+		/* limit: Poll every 2s.
+		 * When this is reached an uncorrectable error
+		 * is expected to happen, if Dom0 does nothing.
+		 */
+		period = MCE_MIN;
+	}
+
+	set_timer(&mce_timer, NOW() + period);
 }
 
 static int __init init_nonfatal_mce_checker(void)
@@ -68,19 +231,63 @@ static int __init init_nonfatal_mce_chec
 	if (!cpu_has(c, X86_FEATURE_MCA))
 		return -ENODEV;
 
-	/* Some Athlons misbehave when we frob bank 0 */
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
-		boot_cpu_data.x86 == 6)
-			firstbank = 1;
-	else
-			firstbank = 0;
-
 	/*
 	 * Check for non-fatal errors every MCE_RATE s
 	 */
-	init_timer(&mce_timer, mce_work_fn, NULL, 0);
-	set_timer(&mce_timer, NOW() + MCE_PERIOD);
-	printk(KERN_INFO "Machine check exception polling timer started.\n");
+	switch (c->x86_vendor) {
+	case X86_VENDOR_AMD:
+		if (c->x86 == 6) { /* K7 */
+			firstbank = 1;
+			init_timer(&mce_timer, mce_intel_work_fn, NULL, 0);
+			set_timer(&mce_timer, NOW() + MCE_PERIOD);
+			break;
+		}
+
+		/* Assume we are on K8 or newer AMD CPU here */
+		if (cpu_has(c, X86_FEATURE_SVME)) {
+			uint64_t value;
+
+			/* hw threshold registers present */
+			hw_threshold = 1;
+			rdmsrl(MSR_K8_MC4_MISC, value);
+
+			if (value & (1ULL << 61)) { /* Locked bit */
+				/* Locked by BIOS. Not available for use */
+				hw_threshold = 0;
+			}
+			if (!(value & (1ULL << 63))) { /* Valid bit */
+				/* No CtrP present */
+				hw_threshold = 0;
+			} else {
+			       if (!(value & (1ULL << 62))) { /* Counter Bit */
+					/* No counter field present */
+					hw_threshold = 0;
+				}
+			}
+
+			if (hw_threshold) {
+				/* No interrupt, reset counter value */
+				value &= ~(0x60FFF00000000ULL);
+				/* Counter enable */
+				value |= (1ULL << 51);
+				wrmsrl(MSR_K8_MC4_MISC, value);
+				/* serialize */
+				wmb();
+				printk(KERN_INFO "MCA: Use hw thresholding to adjust polling frequency\n");
+			}
+		}
+
+		init_timer(&mce_timer, mce_amd_work_fn, NULL, 0);
+		set_timer(&mce_timer, NOW() + period);
+		break;
+
+	case X86_VENDOR_INTEL:
+		init_timer(&mce_timer, mce_intel_work_fn, NULL, 0);
+		set_timer(&mce_timer, NOW() + MCE_PERIOD);
+		break;
+	}
+
+	printk(KERN_INFO "MCA: Machine check polling timer started.\n");
 	return 0;
 }
 __initcall(init_nonfatal_mce_checker);
diff -r a5209d79d241 -r e18773b9584c xen/common/event_channel.c
--- a/xen/common/event_channel.c	Fri Aug 17 13:21:40 2007 +0200
+++ b/xen/common/event_channel.c	Tue Aug 21 14:13:19 2007 +0200
@@ -539,6 +539,21 @@ void evtchn_set_pending(struct vcpu *v, 
 }
 
 
+int guest_enabled_event(struct vcpu *v, int virq)
+{
+    int port;
+
+    if ( unlikely(v == NULL) )
+        return 0;
+
+    port = v->virq_to_evtchn[virq];
+    if ( port == 0 )
+        return 0;
+
+    /* virq is in use */
+    return 1;
+}
+
 void send_guest_vcpu_virq(struct vcpu *v, int virq)
 {
     int port;
diff -r a5209d79d241 -r e18773b9584c xen/include/asm-x86/event.h
--- a/xen/include/asm-x86/event.h	Fri Aug 17 13:21:40 2007 +0200
+++ b/xen/include/asm-x86/event.h	Tue Aug 21 14:13:19 2007 +0200
@@ -61,7 +61,12 @@ static inline void local_event_delivery_
 /* No arch specific virq definition now. Default to global. */
 static inline int arch_virq_is_global(int virq)
 {
-    return 1;
+    switch (virq) {
+    case VIRQ_MCA:
+        return 1;
+    default:
+        return 1;
+    }
 }
 
 #endif
diff -r a5209d79d241 -r e18773b9584c xen/include/public/arch-x86/xen.h
--- a/xen/include/public/arch-x86/xen.h	Fri Aug 17 13:21:40 2007 +0200
+++ b/xen/include/public/arch-x86/xen.h	Tue Aug 21 14:13:19 2007 +0200
@@ -82,6 +82,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
 
 #ifndef __ASSEMBLY__
 
+#define VIRQ_MCA VIRQ_ARCH_0 /* G. (DOM0) Machine Check Architecture */
+
 /*
  * Machine Check Architecure:
  * structs are read-only and used to report all kinds of
diff -r a5209d79d241 -r e18773b9584c xen/include/xen/event.h
--- a/xen/include/xen/event.h	Fri Aug 17 13:21:40 2007 +0200
+++ b/xen/include/xen/event.h	Tue Aug 21 14:13:19 2007 +0200
@@ -51,6 +51,9 @@ void free_xen_event_channel(
 void free_xen_event_channel(
     struct vcpu *local_vcpu, int port);
 
+/* Query if event channel is in use by the guest */
+int guest_enabled_event(struct vcpu *v, int virq);
+
 /* Notify remote end of a Xen-attached event channel.*/
 void notify_via_xen_event_channel(int lport);
 
diff -r a5209d79d241 -r e18773b9584c xen/arch/x86/cpu/mcheck/x86_mca.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/cpu/mcheck/x86_mca.h	Tue Aug 21 14:13:19 2007 +0200
@@ -0,0 +1,72 @@
+/*
+ * MCA implementation for AMD K7/K8 CPUs
+ * Copyright (c) 2007 Advanced Micro Devices, Inc. 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+
+/* The MCA/MCE MSRs should not be used anywhere else.
+ * They are cpu family/model specific and are only for use
+ * in terms of machine check handling.
+ * So we define them here rather in <asm/msr.h>.
+ */
+
+
+/* Bitfield of the MSR_IA32_MCG_CAP register */
+#define MCG_CAP_COUNT           0x00000000000000ffULL
+#define MCG_CTL_P               0x0000000000000100ULL
+/* Bits 9-63 are reserved */
+
+/* Bitfield of the MSR_IA32_MCG_STATUS register */
+#define MCG_STATUS_RIPV         0x0000000000000001ULL
+#define MCG_STATUS_EIPV         0x0000000000000002ULL
+#define MCG_STATUS_MCIP         0x0000000000000004ULL
+/* Bits 3-63 are reserved */
+
+/* Bitfield of MSR_K8_MCi_STATUS registers */
+/* MCA error code */
+#define MCi_STATUS_MCA          0x000000000000ffffULL
+/* model-specific error code */
+#define MCi_STATUS_MSEC         0x00000000ffff0000ULL
+/* Other information */
+#define MCi_STATUS_OTHER        0x01ffffff00000000ULL
+/* processor context corrupt */
+#define MCi_STATUS_PCC          0x0200000000000000ULL
+/* MSR_K8_MCi_ADDR register valid */
+#define MCi_STATUS_ADDRV        0x0400000000000000ULL
+/* MSR_K8_MCi_MISC register valid */
+#define MCi_STATUS_MISCV        0x0800000000000000ULL
+/* error condition enabled */
+#define MCi_STATUS_EN           0x1000000000000000ULL
+/* uncorrected error */
+#define MCi_STATUS_UC           0x2000000000000000ULL
+/* status register overflow */
+#define MCi_STATUS_OVER         0x4000000000000000ULL
+/* valid */
+#define MCi_STATUS_VAL          0x8000000000000000ULL
+
+/* Bitfield of MSi_STATUS_OTHER field */
+/* reserved bits */
+#define MCi_STATUS_OTHER_RESERVED1      0x00001fff00000000ULL
+/* uncorrectable ECC error */
+#define MCi_STATUS_OTEHR_UC_ECC         0x0000200000000000ULL
+/* correctable ECC error */
+#define MCi_STATUS_OTHER_C_ECC          0x0000400000000000ULL
+/* ECC syndrome of an ECC error */
+#define MCi_STATUS_OTHER_ECC_SYNDROME   0x007f800000000000ULL
+/* reserved bits */
+#define MCi_STATUS_OTHER_RESERVED2      0x0180000000000000ULL
+

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2007-08-23 14:07 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-08-21 13:31 [PATCH] 3/3: MCA/MCE correctable error handling Christoph Egger
2007-08-21 16:02 ` Jan Beulich
2007-08-22  9:00   ` Christoph Egger
2007-08-22 10:09     ` Jan Beulich
2007-08-22 15:56       ` Christoph Egger
2007-08-22 16:05         ` Keir Fraser
2007-08-22 16:10           ` Keir Fraser
2007-08-23  6:57             ` Christoph Egger
2007-08-23  9:27               ` [PATCH] resend " Christoph Egger
2007-08-23 14:07               ` [PATCH] " Keir Fraser

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.