From: "Christoph Egger" <Christoph.Egger@amd.com>
To: xen-devel@lists.xensource.com
Cc: Gavin.Maltby@sun.com, Keir Fraser <keir@xensource.com>
Subject: [PATCH] 3/3: MCA/MCE correctable error handling
Date: Tue, 21 Aug 2007 15:31:44 +0200 [thread overview]
Message-ID: <200708211531.44997.Christoph.Egger@amd.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 511 bytes --]
This is patch 3/3.
Signed-off-by: Christoph Egger <Christoph.Egger@amd.com>
--
AMD Saxony, Dresden, Germany
Operating System Research Center
Legal Information:
AMD Saxony Limited Liability Company & Co. KG
Sitz (Geschäftsanschrift):
Wilschdorfer Landstr. 101, 01109 Dresden, Deutschland
Registergericht Dresden: HRA 4896
vertretungsberechtigter Komplementär:
AMD Saxony LLC (Sitz Wilmington, Delaware, USA)
Geschäftsführer der AMD Saxony LLC:
Dr. Hans-R. Deppe, Thomas McCoy
[-- Attachment #2: mca_diff3_event.diff --]
[-- Type: text/x-diff, Size: 16287 bytes --]
diff -r a5209d79d241 -r e18773b9584c xen/arch/x86/cpu/mcheck/mce.c
--- a/xen/arch/x86/cpu/mcheck/mce.c Fri Aug 17 13:21:40 2007 +0200
+++ b/xen/arch/x86/cpu/mcheck/mce.c Tue Aug 21 14:13:19 2007 +0200
@@ -13,6 +13,7 @@
#include <asm/system.h>
#include "mce.h"
+#include "x86_mca.h"
int mce_disabled = 0;
unsigned int nr_mce_banks;
@@ -89,3 +90,94 @@ static int __init mcheck_enable(char *st
custom_param("nomce", mcheck_disable);
custom_param("mce", mcheck_enable);
+
+
+
+
+void x86_mcinfo_clear(struct shared_info *si)
+{
+ memset(&si->arch.mc_info, 0, sizeof(struct arch_mc_info));
+ x86_mcinfo_nentries(si) = 0;
+}
+
+
+int x86_mcinfo_add(struct shared_info *si, void *mcinfo)
+{
+ int i;
+ unsigned long end1, end2;
+ struct mcinfo_common *mic, *mic_base, *mic_index;
+
+ mic = (struct mcinfo_common *)mcinfo;
+ mic_index = mic_base = x86_mcinfo_first(si);
+
+ /* go to first free entry */
+ for (i = 0; i < x86_mcinfo_nentries(si); i++) {
+ mic_index = x86_mcinfo_next(mic_index);
+ }
+
+ /* check if there is enough size */
+ end1 = (unsigned long)((uint8_t *)mic_base + sizeof(struct arch_mc_info));
+ end2 = (unsigned long)((uint8_t *)mic_index + mic->size);
+
+ if (end1 < end2)
+ return -1; /* No space. Can't add entry. */
+
+ /* there's enough space. add entry. */
+ memcpy(mic_index, mic, mic->size);
+ x86_mcinfo_nentries(si)++;
+
+ return 0;
+}
+
+/* Dump machine check information in a format,
+ * mcelog can parse. This is used only when
+ * Dom0 does not take the notification. */
+void x86_mcinfo_dump(struct shared_info *si)
+{
+ struct mcinfo_common *mic;
+ struct mcinfo_global *mc_global;
+ struct mcinfo_bank *mc_bank;
+
+ /* first print the global info */
+ x86_mcinfo_lookup(mic, si, MC_TYPE_GLOBAL);
+ if (mic == NULL)
+ return;
+ mc_global = (struct mcinfo_global *)mic;
+ if (mc_global->mc_flags & MC_FLAG_UNCORRECTABLE) {
+ printk(KERN_EMERG
+ "CPU%d: Machine Check Exception: %16"PRIx64"\n",
+ mc_global->mc_coreid, mc_global->mc_gstatus);
+ } else {
+ printk(KERN_INFO "MCE: The hardware reports a non "
+ "fatal, correctable incident occured on "
+ "CPU %d.\n",
+ mc_global->mc_coreid);
+ }
+
+ /* then the bank information */
+ x86_mcinfo_lookup(mic, si, MC_TYPE_BANK); /* finds the first entry */
+ do {
+ if (mic == NULL)
+ return;
+ if (mic->type != MC_TYPE_BANK)
+ continue;
+
+ mc_bank = (struct mcinfo_bank *)mic;
+
+ if (mc_global->mc_flags & MC_FLAG_UNCORRECTABLE)
+ printk(KERN_EMERG);
+ else
+ printk(KERN_INFO);
+
+ printk("Bank %d: %16"PRIx64,
+ mc_bank->mc_bank,
+ mc_bank->mc_status);
+ if (mc_bank->mc_status & MCi_STATUS_MISCV)
+ printk("[%16"PRIx64"]", mc_bank->mc_misc);
+ if (mc_bank->mc_status & MCi_STATUS_ADDRV)
+ printk(" at %16"PRIx64, mc_bank->mc_addr);
+
+ printk("\n");
+ mic = x86_mcinfo_next(mic); /* next entry */
+ } while (mic != NULL);
+}
diff -r a5209d79d241 -r e18773b9584c xen/arch/x86/cpu/mcheck/mce.h
--- a/xen/arch/x86/cpu/mcheck/mce.h Fri Aug 17 13:21:40 2007 +0200
+++ b/xen/arch/x86/cpu/mcheck/mce.h Tue Aug 21 14:13:19 2007 +0200
@@ -6,6 +6,10 @@ void intel_p6_mcheck_init(struct cpuinfo
void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
void winchip_mcheck_init(struct cpuinfo_x86 *c);
+void x86_mcinfo_clear(struct shared_info *si);
+int x86_mcinfo_add(struct shared_info *si, void *mcinfo);
+void x86_mcinfo_dump(struct shared_info *si);
+
/* Call the installed machine check handler for this CPU setup. */
extern fastcall void (*machine_check_vector)(struct cpu_user_regs *, long error_code);
diff -r a5209d79d241 -r e18773b9584c xen/arch/x86/cpu/mcheck/non-fatal.c
--- a/xen/arch/x86/cpu/mcheck/non-fatal.c Fri Aug 17 13:21:40 2007 +0200
+++ b/xen/arch/x86/cpu/mcheck/non-fatal.c Tue Aug 21 14:13:19 2007 +0200
@@ -13,19 +13,22 @@
#include <xen/kernel.h>
#include <xen/smp.h>
#include <xen/timer.h>
-#include <xen/errno.h>
+#include <xen/event.h>
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/msr.h>
#include "mce.h"
-static int firstbank;
+static int firstbank = 0;
static struct timer mce_timer;
#define MCE_PERIOD MILLISECS(15000)
-
-static void mce_checkregs (void *info)
+#define MCE_MIN MILLISECS(2000)
+#define MCE_MAX MILLISECS(30000)
+
+
+static void mce_intel_checkregs (void *info)
{
u32 low, high;
int i;
@@ -50,10 +53,170 @@ static void mce_checkregs (void *info)
}
}
-static void mce_work_fn(void *data)
+static void mce_intel_work_fn(void *data)
{
- on_each_cpu(mce_checkregs, NULL, 1, 1);
+ on_each_cpu(mce_intel_checkregs, NULL, 1, 1);
set_timer(&mce_timer, NOW() + MCE_PERIOD);
+}
+
+
+
+/* The MCi_STATUS_* #defines are needed here */
+#include "x86_mca.h"
+
+static s_time_t period = MCE_PERIOD;
+static int hw_threshold = 0;
+static int adjust = 0;
+
+
+/* The polling service routine:
+ * Collects information of correctable errors and notifies
+ * Dom0 via an event.
+ */
+void mce_amd_checkregs(void *info)
+{
+ struct shared_info *si;
+ struct vcpu *vcpu = current;
+ struct mcinfo_global mc_global;
+ struct mcinfo_bank mc_info;
+ uint64_t status, addrv, miscv;
+ unsigned int i;
+ unsigned int event_enabled;
+
+ event_enabled = guest_enabled_event(dom0->vcpu[0], VIRQ_MCA);
+ adjust = 0;
+ si = (struct shared_info *)dom0->shared_info; /* cast silences gcc4 */
+ memset(&mc_global, 0, sizeof(mc_global));
+ mc_global.common.type = MC_TYPE_GLOBAL;
+ mc_global.common.size = sizeof(mc_global);
+
+ mc_global.mc_domid = vcpu->domain->domain_id; /* impacted domain */
+ mc_global.mc_coreid = vcpu->processor; /* impacted physical cpu */
+ mc_global.mc_vcpu_id = vcpu->vcpu_id; /* impacted vcpu */
+#if 0 /* todo: on which socket is this physical core? */
+ mc_global.mc_socketid = ???;
+#endif
+ mc_global.mc_flags |= MC_FLAG_CORRECTABLE;
+ rdmsrl(MSR_IA32_MCG_STATUS, mc_global.mc_gstatus);
+ x86_mcinfo_clear(si);
+ x86_mcinfo_add(si, &mc_global);
+
+ for (i = 0; i < nr_mce_banks; i++) {
+ rdmsrl(MSR_IA32_MC0_STATUS + i * 4, status);
+
+ if (!(status & MCi_STATUS_VAL))
+ continue;
+
+ memset(&mc_info, 0, sizeof(mc_info));
+ mc_info.common.type = MC_TYPE_BANK;
+ mc_info.common.size = sizeof(mc_info);
+ mc_info.mc_bank = i;
+ mc_info.mc_status = status;
+
+ /* Increase polling frequency */
+ adjust = 1;
+
+ addrv = 0;
+ if (status & MCi_STATUS_ADDRV)
+ rdmsrl(MSR_IA32_MC0_ADDR + i * 4, addrv);
+
+ miscv = 0;
+ if (status & MCi_STATUS_MISCV)
+ rdmsrl(MSR_IA32_MC0_MISC + i * 4, miscv);
+
+ mc_info.mc_addr = addrv;
+ mc_info.mc_misc = miscv;
+ x86_mcinfo_add(si, &mc_info);
+
+ /* clear status */
+ wrmsrl(MSR_IA32_MC0_STATUS + i * 4, 0x0ULL);
+ wmb();
+ }
+
+ if (adjust > 0) {
+ /* If Dom0 enabled the VIRQ_MCA event, then ... */
+ if (event_enabled)
+ /* ... notify it. */
+ send_guest_global_virq(dom0, VIRQ_MCA);
+ else
+ /* ... or dump it */
+ x86_mcinfo_dump(si);
+ }
+}
+
+/* polling service routine invoker:
+ * Adjust poll frequency at runtime. No error means slow polling frequency,
+ * an error means higher polling frequency.
+ * It uses hw threshold register introduced in AMD K8 RevF to detect
+ * multiple correctable errors between two polls. In that case,
+ * increase polling frequency higher than normal.
+ */
+static void mce_amd_work_fn(void *data)
+{
+ on_each_cpu(mce_amd_checkregs, data, 1, 1);
+
+ if (adjust > 0) {
+ if ( !guest_enabled_event(dom0->vcpu[0], VIRQ_MCA) ) {
+ /* Dom0 did not enable VIRQ_MCA, so Xen is reporting. */
+ printk("MCE: polling routine found correctable error\n");
+ }
+ }
+
+ if (hw_threshold) {
+ uint64_t value;
+ uint32_t counter;
+
+ rdmsrl(MSR_K8_MC4_MISC, value);
+ /* Only the error counter field is of interest
+ * Bit field is described in AMD K8 BKDG chapter 6.4.5.5
+ */
+ counter = (value & 0xFFF00000000ULL) >> 32U;
+
+ /* HW does not count *all* kinds of correctable errors.
+ * Thus it is possible, that the polling routine finds an
+ * correctable error even if the HW reports nothing.
+ * However, the other way around is not possible (= BUG).
+ */
+ if (counter > 0) {
+ /* HW reported correctable errors,
+ * the polling routine did not find...
+ */
+ BUG_ON(adjust == 0);
+ /* subtract 1 to not double count the error
+ * from the polling service routine */
+ adjust += (counter - 1);
+
+ /* Restart counter */
+ /* No interrupt, reset counter value */
+ value &= ~(0x60FFF00000000ULL);
+ /* Counter enable */
+ value |= (1ULL << 51);
+ wrmsrl(MSR_K8_MC4_MISC, value);
+ wmb();
+ }
+ }
+
+ if (adjust > 0) {
+ /* Increase polling frequency */
+ adjust++; /* adjust == 1 must have an effect */
+ period /= adjust;
+ } else {
+ /* Decrease polling frequency */
+ period *= 2;
+ }
+ if (period > MCE_MAX) {
+ /* limit: Poll at least every 30s */
+ period = MCE_MAX;
+ }
+ if (period < MCE_MIN) {
+ /* limit: Poll every 2s.
+ * When this is reached an uncorrectable error
+ * is expected to happen, if Dom0 does nothing.
+ */
+ period = MCE_MIN;
+ }
+
+ set_timer(&mce_timer, NOW() + period);
}
static int __init init_nonfatal_mce_checker(void)
@@ -68,19 +231,63 @@ static int __init init_nonfatal_mce_chec
if (!cpu_has(c, X86_FEATURE_MCA))
return -ENODEV;
- /* Some Athlons misbehave when we frob bank 0 */
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
- boot_cpu_data.x86 == 6)
- firstbank = 1;
- else
- firstbank = 0;
-
/*
* Check for non-fatal errors every MCE_RATE s
*/
- init_timer(&mce_timer, mce_work_fn, NULL, 0);
- set_timer(&mce_timer, NOW() + MCE_PERIOD);
- printk(KERN_INFO "Machine check exception polling timer started.\n");
+ switch (c->x86_vendor) {
+ case X86_VENDOR_AMD:
+ if (c->x86 == 6) { /* K7 */
+ firstbank = 1;
+ init_timer(&mce_timer, mce_intel_work_fn, NULL, 0);
+ set_timer(&mce_timer, NOW() + MCE_PERIOD);
+ break;
+ }
+
+ /* Assume we are on K8 or newer AMD CPU here */
+ if (cpu_has(c, X86_FEATURE_SVME)) {
+ uint64_t value;
+
+ /* hw threshold registers present */
+ hw_threshold = 1;
+ rdmsrl(MSR_K8_MC4_MISC, value);
+
+ if (value & (1ULL << 61)) { /* Locked bit */
+ /* Locked by BIOS. Not available for use */
+ hw_threshold = 0;
+ }
+ if (!(value & (1ULL << 63))) { /* Valid bit */
+ /* No CtrP present */
+ hw_threshold = 0;
+ } else {
+ if (!(value & (1ULL << 62))) { /* Counter Bit */
+ /* No counter field present */
+ hw_threshold = 0;
+ }
+ }
+
+ if (hw_threshold) {
+ /* No interrupt, reset counter value */
+ value &= ~(0x60FFF00000000ULL);
+ /* Counter enable */
+ value |= (1ULL << 51);
+ wrmsrl(MSR_K8_MC4_MISC, value);
+ /* serialize */
+ wmb();
+ printk(KERN_INFO "MCA: Use hw thresholding to adjust polling frequency\n");
+ }
+ }
+
+ init_timer(&mce_timer, mce_amd_work_fn, NULL, 0);
+ set_timer(&mce_timer, NOW() + period);
+ break;
+
+ case X86_VENDOR_INTEL:
+ init_timer(&mce_timer, mce_intel_work_fn, NULL, 0);
+ set_timer(&mce_timer, NOW() + MCE_PERIOD);
+ break;
+ }
+
+ printk(KERN_INFO "MCA: Machine check polling timer started.\n");
return 0;
}
__initcall(init_nonfatal_mce_checker);
diff -r a5209d79d241 -r e18773b9584c xen/common/event_channel.c
--- a/xen/common/event_channel.c Fri Aug 17 13:21:40 2007 +0200
+++ b/xen/common/event_channel.c Tue Aug 21 14:13:19 2007 +0200
@@ -539,6 +539,21 @@ void evtchn_set_pending(struct vcpu *v,
}
+int guest_enabled_event(struct vcpu *v, int virq)
+{
+ int port;
+
+ if ( unlikely(v == NULL) )
+ return 0;
+
+ port = v->virq_to_evtchn[virq];
+ if ( port == 0 )
+ return 0;
+
+ /* virq is in use */
+ return 1;
+}
+
void send_guest_vcpu_virq(struct vcpu *v, int virq)
{
int port;
diff -r a5209d79d241 -r e18773b9584c xen/include/asm-x86/event.h
--- a/xen/include/asm-x86/event.h Fri Aug 17 13:21:40 2007 +0200
+++ b/xen/include/asm-x86/event.h Tue Aug 21 14:13:19 2007 +0200
@@ -61,7 +61,12 @@ static inline void local_event_delivery_
/* No arch specific virq definition now. Default to global. */
static inline int arch_virq_is_global(int virq)
{
- return 1;
+ switch (virq) {
+ case VIRQ_MCA:
+ return 1;
+ default:
+ return 1;
+ }
}
#endif
diff -r a5209d79d241 -r e18773b9584c xen/include/public/arch-x86/xen.h
--- a/xen/include/public/arch-x86/xen.h Fri Aug 17 13:21:40 2007 +0200
+++ b/xen/include/public/arch-x86/xen.h Tue Aug 21 14:13:19 2007 +0200
@@ -82,6 +82,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
#ifndef __ASSEMBLY__
+#define VIRQ_MCA VIRQ_ARCH_0 /* G. (DOM0) Machine Check Architecture */
+
/*
* Machine Check Architecure:
* structs are read-only and used to report all kinds of
diff -r a5209d79d241 -r e18773b9584c xen/include/xen/event.h
--- a/xen/include/xen/event.h Fri Aug 17 13:21:40 2007 +0200
+++ b/xen/include/xen/event.h Tue Aug 21 14:13:19 2007 +0200
@@ -51,6 +51,9 @@ void free_xen_event_channel(
void free_xen_event_channel(
struct vcpu *local_vcpu, int port);
+/* Query if event channel is in use by the guest */
+int guest_enabled_event(struct vcpu *v, int virq);
+
/* Notify remote end of a Xen-attached event channel.*/
void notify_via_xen_event_channel(int lport);
diff -r a5209d79d241 -r e18773b9584c xen/arch/x86/cpu/mcheck/x86_mca.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Tue Aug 21 14:13:19 2007 +0200
@@ -0,0 +1,72 @@
+/*
+ * MCA implementation for AMD K7/K8 CPUs
+ * Copyright (c) 2007 Advanced Micro Devices, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+
+/* The MCA/MCE MSRs should not be used anywhere else.
+ * They are cpu family/model specific and are only for use
+ * in terms of machine check handling.
+ * So we define them here rather in <asm/msr.h>.
+ */
+
+
+/* Bitfield of the MSR_IA32_MCG_CAP register */
+#define MCG_CAP_COUNT 0x00000000000000ffULL
+#define MCG_CTL_P 0x0000000000000100ULL
+/* Bits 9-63 are reserved */
+
+/* Bitfield of the MSR_IA32_MCG_STATUS register */
+#define MCG_STATUS_RIPV 0x0000000000000001ULL
+#define MCG_STATUS_EIPV 0x0000000000000002ULL
+#define MCG_STATUS_MCIP 0x0000000000000004ULL
+/* Bits 3-63 are reserved */
+
+/* Bitfield of MSR_K8_MCi_STATUS registers */
+/* MCA error code */
+#define MCi_STATUS_MCA 0x000000000000ffffULL
+/* model-specific error code */
+#define MCi_STATUS_MSEC 0x00000000ffff0000ULL
+/* Other information */
+#define MCi_STATUS_OTHER 0x01ffffff00000000ULL
+/* processor context corrupt */
+#define MCi_STATUS_PCC 0x0200000000000000ULL
+/* MSR_K8_MCi_ADDR register valid */
+#define MCi_STATUS_ADDRV 0x0400000000000000ULL
+/* MSR_K8_MCi_MISC register valid */
+#define MCi_STATUS_MISCV 0x0800000000000000ULL
+/* error condition enabled */
+#define MCi_STATUS_EN 0x1000000000000000ULL
+/* uncorrected error */
+#define MCi_STATUS_UC 0x2000000000000000ULL
+/* status register overflow */
+#define MCi_STATUS_OVER 0x4000000000000000ULL
+/* valid */
+#define MCi_STATUS_VAL 0x8000000000000000ULL
+
+/* Bitfield of MSi_STATUS_OTHER field */
+/* reserved bits */
+#define MCi_STATUS_OTHER_RESERVED1 0x00001fff00000000ULL
+/* uncorrectable ECC error */
+#define MCi_STATUS_OTEHR_UC_ECC 0x0000200000000000ULL
+/* correctable ECC error */
+#define MCi_STATUS_OTHER_C_ECC 0x0000400000000000ULL
+/* ECC syndrome of an ECC error */
+#define MCi_STATUS_OTHER_ECC_SYNDROME 0x007f800000000000ULL
+/* reserved bits */
+#define MCi_STATUS_OTHER_RESERVED2 0x0180000000000000ULL
+
[-- Attachment #3: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
next reply other threads:[~2007-08-21 13:31 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-08-21 13:31 Christoph Egger [this message]
2007-08-21 16:02 ` [PATCH] 3/3: MCA/MCE correctable error handling Jan Beulich
2007-08-22 9:00 ` Christoph Egger
2007-08-22 10:09 ` Jan Beulich
2007-08-22 15:56 ` Christoph Egger
2007-08-22 16:05 ` Keir Fraser
2007-08-22 16:10 ` Keir Fraser
2007-08-23 6:57 ` Christoph Egger
2007-08-23 9:27 ` [PATCH] resend " Christoph Egger
2007-08-23 14:07 ` [PATCH] " Keir Fraser
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=200708211531.44997.Christoph.Egger@amd.com \
--to=christoph.egger@amd.com \
--cc=Gavin.Maltby@sun.com \
--cc=keir@xensource.com \
--cc=xen-devel@lists.xensource.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.